From 0e1093e308dd0599bb99a1628f9a1c0796a8bf26 Mon Sep 17 00:00:00 2001
From: Antoine Lambert <anlambert@softwareheritage.org>
Date: Wed, 9 Oct 2024 19:00:57 +0200
Subject: [PATCH] pattern: Add first_visits_queue_prefix parameter to Lister
 constructor

It enables to declare a lister whose first visits of listed origins must
be scheduled with high priority.

Related to swh/devel/swh-scheduler#4687.
---
 swh/lister/pattern.py            | 3 +++
 swh/lister/tests/test_pattern.py | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/swh/lister/pattern.py b/swh/lister/pattern.py
index 1b149d55..61ed8d7c 100644
--- a/swh/lister/pattern.py
+++ b/swh/lister/pattern.py
@@ -115,6 +115,7 @@ class Lister(Generic[StateType, PageType]):
         enable_origins: bool = True,
         with_github_session: bool = False,
         record_batch_size: int = 1000,
+        first_visits_queue_prefix: Optional[str] = None,
     ):
         if not self.LISTER_NAME:
             raise ValueError("Must set the LISTER_NAME attribute on Lister classes")
@@ -138,6 +139,7 @@ class Lister(Generic[StateType, PageType]):
             self.instance = instance
         else:
             self.instance = urlparse(self.url).netloc
+        self.first_visits_queue_prefix = first_visits_queue_prefix
 
         self.scheduler = scheduler
 
@@ -265,6 +267,7 @@ class Lister(Generic[StateType, PageType]):
         self.lister_obj = self.scheduler.get_or_create_lister(
             name=self.LISTER_NAME,
             instance_name=self.instance,
+            first_visits_queue_prefix=self.first_visits_queue_prefix,
         )
         return self.state_from_dict(copy.deepcopy(self.lister_obj.current_state))
 
diff --git a/swh/lister/tests/test_pattern.py b/swh/lister/tests/test_pattern.py
index 9abb8f70..f6ce91aa 100644
--- a/swh/lister/tests/test_pattern.py
+++ b/swh/lister/tests/test_pattern.py
@@ -189,6 +189,7 @@ def test_stateless_instantiation(swh_scheduler):
         scheduler=swh_scheduler,
         url="https://example.com",
         instance="example.com",
+        first_visits_queue_prefix="test",
     )
 
     # check the lister was registered in the scheduler backend
@@ -197,6 +198,7 @@ def test_stateless_instantiation(swh_scheduler):
     )
     assert stored_lister == lister.lister_obj
     assert stored_lister.current_state == {}
+    assert stored_lister.first_visits_queue_prefix == "test"
     assert lister.state is None
 
     with pytest.raises(NotImplementedError):
-- 
GitLab