From 0e1093e308dd0599bb99a1628f9a1c0796a8bf26 Mon Sep 17 00:00:00 2001 From: Antoine Lambert <anlambert@softwareheritage.org> Date: Wed, 9 Oct 2024 19:00:57 +0200 Subject: [PATCH] pattern: Add first_visits_queue_prefix parameter to Lister constructor It enables to declare a lister whose first visits of listed origins must be scheduled with high priority. Related to swh/devel/swh-scheduler#4687. --- swh/lister/pattern.py | 3 +++ swh/lister/tests/test_pattern.py | 2 ++ 2 files changed, 5 insertions(+) diff --git a/swh/lister/pattern.py b/swh/lister/pattern.py index 1b149d55..61ed8d7c 100644 --- a/swh/lister/pattern.py +++ b/swh/lister/pattern.py @@ -115,6 +115,7 @@ class Lister(Generic[StateType, PageType]): enable_origins: bool = True, with_github_session: bool = False, record_batch_size: int = 1000, + first_visits_queue_prefix: Optional[str] = None, ): if not self.LISTER_NAME: raise ValueError("Must set the LISTER_NAME attribute on Lister classes") @@ -138,6 +139,7 @@ class Lister(Generic[StateType, PageType]): self.instance = instance else: self.instance = urlparse(self.url).netloc + self.first_visits_queue_prefix = first_visits_queue_prefix self.scheduler = scheduler @@ -265,6 +267,7 @@ class Lister(Generic[StateType, PageType]): self.lister_obj = self.scheduler.get_or_create_lister( name=self.LISTER_NAME, instance_name=self.instance, + first_visits_queue_prefix=self.first_visits_queue_prefix, ) return self.state_from_dict(copy.deepcopy(self.lister_obj.current_state)) diff --git a/swh/lister/tests/test_pattern.py b/swh/lister/tests/test_pattern.py index 9abb8f70..f6ce91aa 100644 --- a/swh/lister/tests/test_pattern.py +++ b/swh/lister/tests/test_pattern.py @@ -189,6 +189,7 @@ def test_stateless_instantiation(swh_scheduler): scheduler=swh_scheduler, url="https://example.com", instance="example.com", + first_visits_queue_prefix="test", ) # check the lister was registered in the scheduler backend @@ -197,6 +198,7 @@ def test_stateless_instantiation(swh_scheduler): ) assert stored_lister == lister.lister_obj assert stored_lister.current_state == {} + assert stored_lister.first_visits_queue_prefix == "test" assert lister.state is None with pytest.raises(NotImplementedError): -- GitLab