From 344088108625e243e46505fffe7cd5b91e28b8b3 Mon Sep 17 00:00:00 2001 From: Antoine Lambert <anlambert@softwareheritage.org> Date: Wed, 22 Jan 2025 11:49:28 +0100 Subject: [PATCH] bitbucket: Fix request to get next page of buggy page The bitbucket Web API to list repositories has buggy pages that needs to be skipped to continue the listing. Previously the request to get the next page when a buggy page is detected was missing the after query parameter so the request was always returning the second page of repositories listing endpoint. Also refine buggy page detection by considering all HTTP status code >= 500. --- swh/lister/bitbucket/lister.py | 6 ++++-- swh/lister/bitbucket/tests/test_lister.py | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/swh/lister/bitbucket/lister.py b/swh/lister/bitbucket/lister.py index 3461ebe6..5c6287e0 100644 --- a/swh/lister/bitbucket/lister.py +++ b/swh/lister/bitbucket/lister.py @@ -124,16 +124,18 @@ class BitbucketLister(Lister[BitbucketListerState, List[Dict[str, Any]]]): body = self.http_request(self.url, params=self.url_params).json() yield body["values"] except HTTPError as e: - if e.response is not None and e.response.status_code == 500: + if e.response is not None and e.response.status_code >= 500: logger.warning( - "URL %s is buggy (error 500), skip it and get next page.", + "URL %s is buggy (error %s), skip it and get next page.", e.response.url, + e.response.status_code, ) body = self.http_request( self.url, params={ "pagelen": self.url_params["pagelen"], "fields": "next", + "after": last_repo_cdate, }, ).json() diff --git a/swh/lister/bitbucket/tests/test_lister.py b/swh/lister/bitbucket/tests/test_lister.py index 7882d31f..9583c951 100644 --- a/swh/lister/bitbucket/tests/test_lister.py +++ b/swh/lister/bitbucket/tests/test_lister.py @@ -190,7 +190,8 @@ def test_bitbucket_lister_buggy_page( BitbucketLister.API_URL, [ {"json": bb_api_repositories_page1, "status_code": 200}, - *[{"json": None, "status_code": 500}] * MAX_NUMBER_ATTEMPTS, + *[{"json": None, "status_code": 500}] * (MAX_NUMBER_ATTEMPTS - 1), + {"json": None, "status_code": 504}, {"json": {"next": bb_api_repositories_page1["next"]}, "status_code": 200}, {"json": bb_api_repositories_page2, "status_code": 200}, ], -- GitLab