From 344088108625e243e46505fffe7cd5b91e28b8b3 Mon Sep 17 00:00:00 2001
From: Antoine Lambert <anlambert@softwareheritage.org>
Date: Wed, 22 Jan 2025 11:49:28 +0100
Subject: [PATCH] bitbucket: Fix request to get next page of buggy page

The bitbucket Web API to list repositories has buggy pages that
needs to be skipped to continue the listing.

Previously the request to get the next page when a buggy page
is detected was missing the after query parameter so the request
was always returning the second page of repositories listing
endpoint.

Also refine buggy page detection by considering all HTTP status
code >= 500.
---
 swh/lister/bitbucket/lister.py            | 6 ++++--
 swh/lister/bitbucket/tests/test_lister.py | 3 ++-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/swh/lister/bitbucket/lister.py b/swh/lister/bitbucket/lister.py
index 3461ebe6..5c6287e0 100644
--- a/swh/lister/bitbucket/lister.py
+++ b/swh/lister/bitbucket/lister.py
@@ -124,16 +124,18 @@ class BitbucketLister(Lister[BitbucketListerState, List[Dict[str, Any]]]):
                 body = self.http_request(self.url, params=self.url_params).json()
                 yield body["values"]
             except HTTPError as e:
-                if e.response is not None and e.response.status_code == 500:
+                if e.response is not None and e.response.status_code >= 500:
                     logger.warning(
-                        "URL %s is buggy (error 500), skip it and get next page.",
+                        "URL %s is buggy (error %s), skip it and get next page.",
                         e.response.url,
+                        e.response.status_code,
                     )
                     body = self.http_request(
                         self.url,
                         params={
                             "pagelen": self.url_params["pagelen"],
                             "fields": "next",
+                            "after": last_repo_cdate,
                         },
                     ).json()
 
diff --git a/swh/lister/bitbucket/tests/test_lister.py b/swh/lister/bitbucket/tests/test_lister.py
index 7882d31f..9583c951 100644
--- a/swh/lister/bitbucket/tests/test_lister.py
+++ b/swh/lister/bitbucket/tests/test_lister.py
@@ -190,7 +190,8 @@ def test_bitbucket_lister_buggy_page(
         BitbucketLister.API_URL,
         [
             {"json": bb_api_repositories_page1, "status_code": 200},
-            *[{"json": None, "status_code": 500}] * MAX_NUMBER_ATTEMPTS,
+            *[{"json": None, "status_code": 500}] * (MAX_NUMBER_ATTEMPTS - 1),
+            {"json": None, "status_code": 504},
             {"json": {"next": bb_api_repositories_page1["next"]}, "status_code": 200},
             {"json": bb_api_repositories_page2, "status_code": 200},
         ],
-- 
GitLab