From 88a715d0c1e12205bcfaf428c1605e202cfdb064 Mon Sep 17 00:00:00 2001 From: Antoine Lambert <anlambert@softwareheritage.org> Date: Fri, 25 Oct 2024 16:48:22 +0200 Subject: [PATCH] github: Ensure range listers do not override shared lister state Recent changes in base Lister class implementation turn the call to self.scheduler.update_lister mandatory to update the last termination date for a lister. It has some side effects on the GitHub lister as there is one incremental instance plus multiple range ones relisting previously discovered repos executed in parallel. Range GitHub listers should not override the shared incremental lister state as StaleData exceptions might be raised otherwise, so override the set_state_in_scheduler Lister method to ensure that. --- swh/lister/github/lister.py | 7 +++++++ swh/lister/github/tests/test_lister.py | 1 + 2 files changed, 8 insertions(+) diff --git a/swh/lister/github/lister.py b/swh/lister/github/lister.py index 011ff3c0..986f3d6d 100644 --- a/swh/lister/github/lister.py +++ b/swh/lister/github/lister.py @@ -213,3 +213,10 @@ class GitHubLister(Lister[GitHubListerState, List[Dict[str, Any]]]): # the current run is higher than that stored in the database. if self.state.last_seen_id > scheduler_state.last_seen_id: self.updated = True + + def set_state_in_scheduler( + self, with_listing_finished_date: bool = False, force_state: bool = False + ) -> None: + # github range lister should not override shared incremental lister state + if not self.relisting: + super().set_state_in_scheduler(with_listing_finished_date, force_state) diff --git a/swh/lister/github/tests/test_lister.py b/swh/lister/github/tests/test_lister.py index ba7e37ea..20717fe6 100644 --- a/swh/lister/github/tests/test_lister.py +++ b/swh/lister/github/tests/test_lister.py @@ -135,6 +135,7 @@ def test_relister(swh_scheduler, caplog, requests_mocker) -> None: # Check that the relisting mode hasn't touched the stored state. lister_data = get_lister_data(swh_scheduler) assert lister_data.current_state == {"last_seen_id": 123} + assert lister_data.last_listing_finished_at is None def test_anonymous_ratelimit( -- GitLab