From b841711cdbf7357ff4deee89bbb2dd86d21082d7 Mon Sep 17 00:00:00 2001
From: Antoine Lambert <anlambert@softwareheritage.org>
Date: Mon, 13 Jan 2025 12:18:20 +0100
Subject: [PATCH] deposit: Synchronize HTTP read timeout with deposit server

When the deposit loader downloads a deposited tarball, it calls the raw
endpoint of deposit private HTTP API. In production, the implementation
of that endpoint downloads a set of tarballs from an azure blob storage
and return an aggregated version of them.

It has been observed some HTTP read timeouts while downloading tarballs
from azure. Those have been fixed in swh/devel/swh-deposit!453 by increasing
the read timeout used to read data from azure.

However, there is still some deposit loader tasks that end up with error
as the read timeout used to query the raw endpoint of deposit private API
is twice lesser than the read timeout used to download from azure.
So ensure those HTTP read timeouts have the same value of 120 seconds.
---
 swh/loader/core/utils.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/swh/loader/core/utils.py b/swh/loader/core/utils.py
index 96d08513..618d284a 100644
--- a/swh/loader/core/utils.py
+++ b/swh/loader/core/utils.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2018-2024 The Software Heritage developers
+# Copyright (C) 2018-2025 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
@@ -253,6 +253,7 @@ def download(
     filename: Optional[str] = None,
     auth: Optional[Tuple[str, str]] = None,
     extra_request_headers: Optional[Dict[str, str]] = None,
+    timeout: int = 120,
 ) -> Tuple[str, Dict]:
     """Download a remote file from url, and compute swh hashes on it.
 
@@ -264,6 +265,10 @@ def download(
             algorithms are defined in the :data:`swh.model.hashutil.ALGORITHMS` set.
         auth: Optional tuple of login/password (for http authentication
             service, e.g. deposit)
+        extra_request_headers: Optional dict holding extra HTTP headers to be
+            sent with the request
+        timeout: Value in seconds so the connection does not hang indefinitely
+            (read/connection timeout)
 
     Raises:
         ValueError in case of any error when fetching/computing (length,
@@ -278,8 +283,6 @@ def download(
         params["auth"] = auth
     if extra_request_headers is not None:
         params["headers"].update(extra_request_headers)
-    # so the connection does not hang indefinitely (read/connection timeout)
-    timeout = params.get("timeout", 60)
 
     parsed_url = urlparse(url)
     if parsed_url.scheme == "ftp":
-- 
GitLab