From 88a715d0c1e12205bcfaf428c1605e202cfdb064 Mon Sep 17 00:00:00 2001
From: Antoine Lambert <anlambert@softwareheritage.org>
Date: Fri, 25 Oct 2024 16:48:22 +0200
Subject: [PATCH] github: Ensure range listers do not override shared lister
 state

Recent changes in base Lister class implementation turn the call to
self.scheduler.update_lister mandatory to update the last termination
date for a lister.

It has some side effects on the GitHub lister as there is one incremental
instance plus multiple range ones relisting previously discovered repos
executed in parallel.

Range GitHub listers should not override the shared incremental lister
state as StaleData exceptions might be raised otherwise, so override
the set_state_in_scheduler Lister method to ensure that.
---
 swh/lister/github/lister.py            | 7 +++++++
 swh/lister/github/tests/test_lister.py | 1 +
 2 files changed, 8 insertions(+)

diff --git a/swh/lister/github/lister.py b/swh/lister/github/lister.py
index 011ff3c0..986f3d6d 100644
--- a/swh/lister/github/lister.py
+++ b/swh/lister/github/lister.py
@@ -213,3 +213,10 @@ class GitHubLister(Lister[GitHubListerState, List[Dict[str, Any]]]):
         # the current run is higher than that stored in the database.
         if self.state.last_seen_id > scheduler_state.last_seen_id:
             self.updated = True
+
+    def set_state_in_scheduler(
+        self, with_listing_finished_date: bool = False, force_state: bool = False
+    ) -> None:
+        # github range lister should not override shared incremental lister state
+        if not self.relisting:
+            super().set_state_in_scheduler(with_listing_finished_date, force_state)
diff --git a/swh/lister/github/tests/test_lister.py b/swh/lister/github/tests/test_lister.py
index ba7e37ea..20717fe6 100644
--- a/swh/lister/github/tests/test_lister.py
+++ b/swh/lister/github/tests/test_lister.py
@@ -135,6 +135,7 @@ def test_relister(swh_scheduler, caplog, requests_mocker) -> None:
     # Check that the relisting mode hasn't touched the stored state.
     lister_data = get_lister_data(swh_scheduler)
     assert lister_data.current_state == {"last_seen_id": 123}
+    assert lister_data.last_listing_finished_at is None
 
 
 def test_anonymous_ratelimit(
-- 
GitLab