From 6e7bc49ec74e89519c3f76ce7b87a71fc74d1fef Mon Sep 17 00:00:00 2001
From: Antoine Lambert <anlambert@softwareheritage.org>
Date: Wed, 6 Sep 2023 11:55:28 +0200
Subject: [PATCH] Harmonize listers parameters and add test to check mandatory
 ones

Ensure that all lister classes have the same set of mandatory parameters
in their constructors, notably: scheduler, url, instance and credentials.

Add a new test checking listers classes have mandatory parameters declared
in their constructors. The purpose is to avoid deployment issues on staging
or production environment as celery tasks can fail to be executed if mandatory
parameters are not handled by listers.

Reated to swh/infra/sysadm-environment#5030.
---
 swh/lister/arch/lister.py                |  9 ++--
 swh/lister/aur/lister.py                 |  8 +--
 swh/lister/bitbucket/lister.py           |  6 ++-
 swh/lister/bower/lister.py               |  8 +--
 swh/lister/conda/lister.py               |  5 +-
 swh/lister/cpan/lister.py                |  8 +--
 swh/lister/cran/lister.py                |  7 ++-
 swh/lister/crates/lister.py              |  8 +--
 swh/lister/debian/lister.py              | 14 +++---
 swh/lister/debian/tasks.py               |  5 ++
 swh/lister/debian/tests/test_lister.py   |  2 +-
 swh/lister/debian/tests/test_tasks.py    | 32 ++++++++++++
 swh/lister/github/lister.py              |  9 ++--
 swh/lister/gnu/lister.py                 |  9 ++--
 swh/lister/golang/lister.py              |  8 +--
 swh/lister/hackage/lister.py             |  9 ++--
 swh/lister/hex/lister.py                 | 15 ++++--
 swh/lister/launchpad/lister.py           |  7 ++-
 swh/lister/npm/lister.py                 | 12 +++--
 swh/lister/nuget/lister.py               |  8 +--
 swh/lister/packagist/lister.py           |  7 ++-
 swh/lister/pubdev/lister.py              |  8 +--
 swh/lister/puppet/lister.py              |  8 +--
 swh/lister/pypi/lister.py                |  6 ++-
 swh/lister/rubygems/lister.py            |  8 +--
 swh/lister/sourceforge/lister.py         | 10 ++--
 swh/lister/tests/test_lister_packages.py | 64 ++++++++++++++++++++++++
 27 files changed, 229 insertions(+), 71 deletions(-)
 create mode 100644 swh/lister/tests/test_lister_packages.py

diff --git a/swh/lister/arch/lister.py b/swh/lister/arch/lister.py
index c281f222..cdab7285 100644
--- a/swh/lister/arch/lister.py
+++ b/swh/lister/arch/lister.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2022  The Software Heritage developers
+# Copyright (C) 2022-2023  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
@@ -80,6 +80,7 @@ class ArchLister(StatelessLister[ArchListerPage]):
     VISIT_TYPE = "arch"
     INSTANCE = "arch"
 
+    BASE_URL = "https://archlinux.org"
     ARCH_PACKAGE_URL_PATTERN = "{base_url}/packages/{repo}/{arch}/{pkgname}"
     ARCH_PACKAGE_VERSIONS_URL_PATTERN = "{base_url}/packages/{pkgname[0]}/{pkgname}"
     ARCH_PACKAGE_DOWNLOAD_URL_PATTERN = (
@@ -93,6 +94,8 @@ class ArchLister(StatelessLister[ArchListerPage]):
     def __init__(
         self,
         scheduler: SchedulerInterface,
+        url: str = BASE_URL,
+        instance: str = INSTANCE,
         credentials: Optional[CredentialsType] = None,
         max_origins_per_page: Optional[int] = None,
         max_pages: Optional[int] = None,
@@ -119,8 +122,8 @@ class ArchLister(StatelessLister[ArchListerPage]):
         super().__init__(
             scheduler=scheduler,
             credentials=credentials,
-            url=flavours["official"]["base_info_url"],
-            instance=self.INSTANCE,
+            url=url,
+            instance=instance,
             max_origins_per_page=max_origins_per_page,
             max_pages=max_pages,
             enable_origins=enable_origins,
diff --git a/swh/lister/aur/lister.py b/swh/lister/aur/lister.py
index dc43d7d4..82a5c406 100644
--- a/swh/lister/aur/lister.py
+++ b/swh/lister/aur/lister.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2022  The Software Heritage developers
+# Copyright (C) 2022-2023  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
@@ -46,6 +46,8 @@ class AurLister(StatelessLister[AurListerPage]):
     def __init__(
         self,
         scheduler: SchedulerInterface,
+        url: str = BASE_URL,
+        instance: str = INSTANCE,
         credentials: Optional[CredentialsType] = None,
         max_origins_per_page: Optional[int] = None,
         max_pages: Optional[int] = None,
@@ -54,8 +56,8 @@ class AurLister(StatelessLister[AurListerPage]):
         super().__init__(
             scheduler=scheduler,
             credentials=credentials,
-            instance=self.INSTANCE,
-            url=self.BASE_URL,
+            instance=instance,
+            url=url,
             max_origins_per_page=max_origins_per_page,
             max_pages=max_pages,
             enable_origins=enable_origins,
diff --git a/swh/lister/bitbucket/lister.py b/swh/lister/bitbucket/lister.py
index d65d0c26..00d8abff 100644
--- a/swh/lister/bitbucket/lister.py
+++ b/swh/lister/bitbucket/lister.py
@@ -51,6 +51,8 @@ class BitbucketLister(Lister[BitbucketListerState, List[Dict[str, Any]]]):
     def __init__(
         self,
         scheduler: SchedulerInterface,
+        url: str = API_URL,
+        instance: str = INSTANCE,
         page_size: int = 1000,
         incremental: bool = True,
         credentials: CredentialsType = None,
@@ -61,8 +63,8 @@ class BitbucketLister(Lister[BitbucketListerState, List[Dict[str, Any]]]):
         super().__init__(
             scheduler=scheduler,
             credentials=credentials,
-            url=self.API_URL,
-            instance=self.INSTANCE,
+            url=url,
+            instance=instance,
             max_origins_per_page=max_origins_per_page,
             max_pages=max_pages,
             enable_origins=enable_origins,
diff --git a/swh/lister/bower/lister.py b/swh/lister/bower/lister.py
index cc440dc5..71473db1 100644
--- a/swh/lister/bower/lister.py
+++ b/swh/lister/bower/lister.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2022  The Software Heritage developers
+# Copyright (C) 2022-2023  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
@@ -29,6 +29,8 @@ class BowerLister(StatelessLister[BowerListerPage]):
     def __init__(
         self,
         scheduler: SchedulerInterface,
+        url: str = API_URL,
+        instance: str = INSTANCE,
         credentials: Optional[CredentialsType] = None,
         max_origins_per_page: Optional[int] = None,
         max_pages: Optional[int] = None,
@@ -37,8 +39,8 @@ class BowerLister(StatelessLister[BowerListerPage]):
         super().__init__(
             scheduler=scheduler,
             credentials=credentials,
-            instance=self.INSTANCE,
-            url=self.API_URL,
+            instance=instance,
+            url=url,
             max_origins_per_page=max_origins_per_page,
             max_pages=max_pages,
             enable_origins=enable_origins,
diff --git a/swh/lister/conda/lister.py b/swh/lister/conda/lister.py
index 4f5cb402..d18ac870 100644
--- a/swh/lister/conda/lister.py
+++ b/swh/lister/conda/lister.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2022  The Software Heritage developers
+# Copyright (C) 2022-2023  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
@@ -39,6 +39,7 @@ class CondaLister(StatelessLister[CondaListerPage]):
         scheduler: SchedulerInterface,
         credentials: Optional[CredentialsType] = None,
         url: str = BASE_REPO_URL,
+        instance: str = INSTANCE,
         channel: str = "",
         archs: List = [],
         max_origins_per_page: Optional[int] = None,
@@ -48,7 +49,7 @@ class CondaLister(StatelessLister[CondaListerPage]):
         super().__init__(
             scheduler=scheduler,
             credentials=credentials,
-            instance=self.INSTANCE,
+            instance=instance,
             url=url,
             max_origins_per_page=max_origins_per_page,
             max_pages=max_pages,
diff --git a/swh/lister/cpan/lister.py b/swh/lister/cpan/lister.py
index 80669ebd..0aee8a85 100644
--- a/swh/lister/cpan/lister.py
+++ b/swh/lister/cpan/lister.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2022  The Software Heritage developers
+# Copyright (C) 2022-2023  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
@@ -80,6 +80,8 @@ class CpanLister(StatelessLister[CpanListerPage]):
     def __init__(
         self,
         scheduler: SchedulerInterface,
+        url: str = API_BASE_URL,
+        instance: str = INSTANCE,
         credentials: Optional[CredentialsType] = None,
         max_origins_per_page: Optional[int] = None,
         max_pages: Optional[int] = None,
@@ -88,8 +90,8 @@ class CpanLister(StatelessLister[CpanListerPage]):
         super().__init__(
             scheduler=scheduler,
             credentials=credentials,
-            instance=self.INSTANCE,
-            url=self.API_BASE_URL,
+            instance=instance,
+            url=url,
             max_origins_per_page=max_origins_per_page,
             max_pages=max_pages,
             enable_origins=enable_origins,
diff --git a/swh/lister/cran/lister.py b/swh/lister/cran/lister.py
index e0dbd32f..26db72e6 100644
--- a/swh/lister/cran/lister.py
+++ b/swh/lister/cran/lister.py
@@ -34,10 +34,13 @@ class CRANLister(StatelessLister[PageType]):
     """
 
     LISTER_NAME = "cran"
+    INSTANCE = "cran"
 
     def __init__(
         self,
         scheduler: SchedulerInterface,
+        url: str = CRAN_MIRROR_URL,
+        instance: str = INSTANCE,
         credentials: Optional[CredentialsType] = None,
         max_origins_per_page: Optional[int] = None,
         max_pages: Optional[int] = None,
@@ -45,8 +48,8 @@ class CRANLister(StatelessLister[PageType]):
     ):
         super().__init__(
             scheduler,
-            url=CRAN_MIRROR_URL,
-            instance="cran",
+            url=url,
+            instance=instance,
             credentials=credentials,
             max_origins_per_page=max_origins_per_page,
             max_pages=max_pages,
diff --git a/swh/lister/crates/lister.py b/swh/lister/crates/lister.py
index b0b08832..41890eae 100644
--- a/swh/lister/crates/lister.py
+++ b/swh/lister/crates/lister.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2022  The Software Heritage developers
+# Copyright (C) 2022-2023  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
@@ -65,6 +65,8 @@ class CratesLister(Lister[CratesListerState, CratesListerPage]):
     def __init__(
         self,
         scheduler: SchedulerInterface,
+        url: str = BASE_URL,
+        instance: str = INSTANCE,
         credentials: CredentialsType = None,
         max_origins_per_page: Optional[int] = None,
         max_pages: Optional[int] = None,
@@ -73,8 +75,8 @@ class CratesLister(Lister[CratesListerState, CratesListerPage]):
         super().__init__(
             scheduler=scheduler,
             credentials=credentials,
-            url=self.BASE_URL,
-            instance=self.INSTANCE,
+            url=url,
+            instance=instance,
             max_origins_per_page=max_origins_per_page,
             max_pages=max_pages,
             enable_origins=enable_origins,
diff --git a/swh/lister/debian/lister.py b/swh/lister/debian/lister.py
index 4a6271e3..0b300f3a 100644
--- a/swh/lister/debian/lister.py
+++ b/swh/lister/debian/lister.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017-2022 The Software Heritage developers
+# Copyright (C) 2017-2023 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
@@ -68,12 +68,14 @@ class DebianLister(Lister[DebianListerState, DebianPageType]):
     """
 
     LISTER_NAME = "debian"
+    MIRROR_URL = "http://deb.debian.org/debian/"
+    INSTANCE = "Debian"
 
     def __init__(
         self,
         scheduler: SchedulerInterface,
-        distribution: str = "Debian",
-        mirror_url: str = "http://deb.debian.org/debian/",
+        url: str = MIRROR_URL,
+        instance: str = INSTANCE,
         suites: List[Suite] = ["stretch", "buster", "bullseye"],
         components: List[Component] = ["main", "contrib", "non-free"],
         credentials: Optional[CredentialsType] = None,
@@ -83,8 +85,8 @@ class DebianLister(Lister[DebianListerState, DebianPageType]):
     ):
         super().__init__(
             scheduler=scheduler,
-            url=mirror_url,
-            instance=distribution,
+            url=url,
+            instance=instance,
             credentials=credentials,
             max_origins_per_page=max_origins_per_page,
             max_pages=max_pages,
@@ -95,7 +97,7 @@ class DebianLister(Lister[DebianListerState, DebianPageType]):
         if not self.url.endswith("/"):
             self.url += "/"
 
-        self.distribution = distribution
+        self.distribution = instance
         self.suites = suites
         self.components = components
 
diff --git a/swh/lister/debian/tasks.py b/swh/lister/debian/tasks.py
index fe62a784..89b21fb4 100644
--- a/swh/lister/debian/tasks.py
+++ b/swh/lister/debian/tasks.py
@@ -10,6 +10,11 @@ from .lister import DebianLister
 @shared_task(name=__name__ + ".DebianListerTask")
 def list_debian_distribution(**lister_args):
     """List a Debian distribution"""
+    # for backward compatibility with previous parameter names
+    if "mirror_url" in lister_args:
+        lister_args["url"] = lister_args.pop("mirror_url")
+    if "distribution" in lister_args:
+        lister_args["instance"] = lister_args.pop("distribution")
     return DebianLister.from_configfile(**lister_args).run().dict()
 
 
diff --git a/swh/lister/debian/tests/test_lister.py b/swh/lister/debian/tests/test_lister.py
index 6f2711d4..fcaed468 100644
--- a/swh/lister/debian/tests/test_lister.py
+++ b/swh/lister/debian/tests/test_lister.py
@@ -65,7 +65,7 @@ def _init_test(
 
     lister = DebianLister(
         scheduler=swh_scheduler,
-        mirror_url=_mirror_url,
+        url=_mirror_url,
         suites=list(debian_sources.keys()),
         components=_components,
     )
diff --git a/swh/lister/debian/tests/test_tasks.py b/swh/lister/debian/tests/test_tasks.py
index 0a1d30d4..78688c90 100644
--- a/swh/lister/debian/tests/test_tasks.py
+++ b/swh/lister/debian/tests/test_tasks.py
@@ -23,6 +23,35 @@ def test_lister(lister, swh_scheduler_celery_app, swh_scheduler_celery_worker):
     stats = ListerStats(pages=12, origins=35618)
     lister.run.return_value = stats
 
+    kwargs = dict(
+        url="http://www-ftp.lip6.fr/pub/linux/distributions/Ubuntu/archive/",
+        instance="Ubuntu",
+        suites=["xenial", "bionic", "focal"],
+        components=["main", "multiverse", "restricted", "universe"],
+    )
+
+    res = swh_scheduler_celery_app.send_task(
+        "swh.lister.debian.tasks.DebianListerTask", kwargs=kwargs
+    )
+    assert res
+    res.wait()
+    assert res.successful()
+
+    lister.from_configfile.assert_called_once_with(**kwargs)
+    lister.run.assert_called_once_with()
+
+    assert res.result == stats.dict()
+
+
+@patch("swh.lister.debian.tasks.DebianLister")
+def test_lister_old_parameter_names(
+    lister, swh_scheduler_celery_app, swh_scheduler_celery_worker
+):
+    # setup the mocked DebianLister
+    lister.from_configfile.return_value = lister
+    stats = ListerStats(pages=12, origins=35618)
+    lister.run.return_value = stats
+
     kwargs = dict(
         mirror_url="http://www-ftp.lip6.fr/pub/linux/distributions/Ubuntu/archive/",
         distribution="Ubuntu",
@@ -37,6 +66,9 @@ def test_lister(lister, swh_scheduler_celery_app, swh_scheduler_celery_worker):
     res.wait()
     assert res.successful()
 
+    kwargs["url"] = kwargs.pop("mirror_url")
+    kwargs["instance"] = kwargs.pop("distribution")
+
     lister.from_configfile.assert_called_once_with(**kwargs)
     lister.run.assert_called_once_with()
 
diff --git a/swh/lister/github/lister.py b/swh/lister/github/lister.py
index 738c516f..7e63d16a 100644
--- a/swh/lister/github/lister.py
+++ b/swh/lister/github/lister.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2020-2022  The Software Heritage developers
+# Copyright (C) 2020-2023  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
@@ -62,6 +62,7 @@ class GitHubLister(Lister[GitHubListerState, List[Dict[str, Any]]]):
     """  # noqa: B950
 
     LISTER_NAME = "github"
+    INSTANCE = "github"
 
     API_URL = "https://api.github.com/repositories"
     PAGE_SIZE = 1000
@@ -69,6 +70,8 @@ class GitHubLister(Lister[GitHubListerState, List[Dict[str, Any]]]):
     def __init__(
         self,
         scheduler: SchedulerInterface,
+        url: str = API_URL,
+        instance: str = INSTANCE,
         credentials: CredentialsType = None,
         max_origins_per_page: Optional[int] = None,
         max_pages: Optional[int] = None,
@@ -79,8 +82,8 @@ class GitHubLister(Lister[GitHubListerState, List[Dict[str, Any]]]):
         super().__init__(
             scheduler=scheduler,
             credentials=credentials,
-            url=self.API_URL,
-            instance="github",
+            url=url,
+            instance=instance,
             with_github_session=True,
             max_origins_per_page=max_origins_per_page,
             max_pages=max_pages,
diff --git a/swh/lister/gnu/lister.py b/swh/lister/gnu/lister.py
index 721bdc23..2af66428 100644
--- a/swh/lister/gnu/lister.py
+++ b/swh/lister/gnu/lister.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2019-2021  The Software Heritage developers
+# Copyright (C) 2019-2023  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
@@ -25,11 +25,14 @@ class GNULister(StatelessLister[GNUPageType]):
     """
 
     LISTER_NAME = "GNU"
+    INSTANCE = "GNU"
     GNU_FTP_URL = "https://ftp.gnu.org"
 
     def __init__(
         self,
         scheduler: SchedulerInterface,
+        url: str = GNU_FTP_URL,
+        instance: str = INSTANCE,
         credentials: CredentialsType = None,
         max_origins_per_page: Optional[int] = None,
         max_pages: Optional[int] = None,
@@ -37,8 +40,8 @@ class GNULister(StatelessLister[GNUPageType]):
     ):
         super().__init__(
             scheduler=scheduler,
-            url=self.GNU_FTP_URL,
-            instance="GNU",
+            url=url,
+            instance=instance,
             credentials=credentials,
             max_origins_per_page=max_origins_per_page,
             max_pages=max_pages,
diff --git a/swh/lister/golang/lister.py b/swh/lister/golang/lister.py
index 36a247b1..368c1d08 100644
--- a/swh/lister/golang/lister.py
+++ b/swh/lister/golang/lister.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2022  The Software Heritage developers
+# Copyright (C) 2022-2023  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
@@ -45,6 +45,8 @@ class GolangLister(Lister[GolangStateType, GolangPageType]):
     def __init__(
         self,
         scheduler: SchedulerInterface,
+        url: str = GOLANG_MODULES_INDEX_URL,
+        instance: str = LISTER_NAME,
         incremental: bool = False,
         credentials: CredentialsType = None,
         max_origins_per_page: Optional[int] = None,
@@ -53,8 +55,8 @@ class GolangLister(Lister[GolangStateType, GolangPageType]):
     ):
         super().__init__(
             scheduler=scheduler,
-            url=self.GOLANG_MODULES_INDEX_URL,
-            instance=self.LISTER_NAME,
+            url=url,
+            instance=instance,
             credentials=credentials,
             max_origins_per_page=max_origins_per_page,
             max_pages=max_pages,
diff --git a/swh/lister/hackage/lister.py b/swh/lister/hackage/lister.py
index a86ff67c..1872bc68 100644
--- a/swh/lister/hackage/lister.py
+++ b/swh/lister/hackage/lister.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2022  The Software Heritage developers
+# Copyright (C) 2022-2023  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
@@ -43,17 +43,18 @@ class HackageLister(Lister[HackageListerState, HackageListerPage]):
     def __init__(
         self,
         scheduler: SchedulerInterface,
+        url: str = BASE_URL,
+        instance: str = INSTANCE,
         credentials: Optional[CredentialsType] = None,
         max_origins_per_page: Optional[int] = None,
         max_pages: Optional[int] = None,
         enable_origins: bool = True,
-        url: Optional[str] = None,
     ):
         super().__init__(
             scheduler=scheduler,
             credentials=credentials,
-            instance=self.INSTANCE,
-            url=url if url else self.BASE_URL,
+            instance=instance,
+            url=url,
             max_origins_per_page=max_origins_per_page,
             max_pages=max_pages,
             enable_origins=enable_origins,
diff --git a/swh/lister/hex/lister.py b/swh/lister/hex/lister.py
index b264b602..1ff3a8b1 100644
--- a/swh/lister/hex/lister.py
+++ b/swh/lister/hex/lister.py
@@ -1,11 +1,11 @@
-# Copyright (C) 2021-2022  The Software Heritage developers
+# Copyright (C) 2021-2023  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from dataclasses import asdict, dataclass
 import logging
-from typing import Any, Dict, Iterator, List
+from typing import Any, Dict, Iterator, List, Optional
 from urllib.parse import urljoin
 
 import iso8601
@@ -46,15 +46,22 @@ class HexLister(Lister[HexListerState, HexListerPage]):
     def __init__(
         self,
         scheduler: SchedulerInterface,
-        instance: str = "hex",
+        url: str = HEX_API_URL,
+        instance: str = LISTER_NAME,
         page_size: int = 100,
         credentials: CredentialsType = None,
+        max_origins_per_page: Optional[int] = None,
+        max_pages: Optional[int] = None,
+        enable_origins: bool = True,
     ):
         super().__init__(
             scheduler=scheduler,
             credentials=credentials,
-            url=self.HEX_API_URL,
+            url=url,
             instance=instance,
+            max_origins_per_page=max_origins_per_page,
+            max_pages=max_pages,
+            enable_origins=enable_origins,
         )
         # TODO: Add authentication support
         self.page_size = page_size
diff --git a/swh/lister/launchpad/lister.py b/swh/lister/launchpad/lister.py
index 987154c0..1545693e 100644
--- a/swh/lister/launchpad/lister.py
+++ b/swh/lister/launchpad/lister.py
@@ -59,11 +59,14 @@ class LaunchpadLister(Lister[LaunchpadListerState, LaunchpadPageType]):
             will be returned
     """
 
+    LAUNCHPAD_URL = "https://launchpad.net/"
     LISTER_NAME = "launchpad"
 
     def __init__(
         self,
         scheduler: SchedulerInterface,
+        url: str = LAUNCHPAD_URL,
+        instance: str = LISTER_NAME,
         incremental: bool = False,
         credentials: CredentialsType = None,
         max_origins_per_page: Optional[int] = None,
@@ -72,8 +75,8 @@ class LaunchpadLister(Lister[LaunchpadListerState, LaunchpadPageType]):
     ):
         super().__init__(
             scheduler=scheduler,
-            url="https://launchpad.net/",
-            instance="launchpad",
+            url=url,
+            instance=instance,
             credentials=credentials,
             max_origins_per_page=max_origins_per_page,
             max_pages=max_pages,
diff --git a/swh/lister/npm/lister.py b/swh/lister/npm/lister.py
index f10c02d1..b1276c6d 100644
--- a/swh/lister/npm/lister.py
+++ b/swh/lister/npm/lister.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2018-2022 the Software Heritage developers
+# Copyright (C) 2018-2023 the Software Heritage developers
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
@@ -50,6 +50,8 @@ class NpmLister(Lister[NpmListerState, List[Dict[str, Any]]]):
     def __init__(
         self,
         scheduler: SchedulerInterface,
+        url: str = API_FULL_LISTING_URL,
+        instance: str = INSTANCE,
         page_size: int = 1000,
         incremental: bool = False,
         credentials: CredentialsType = None,
@@ -60,10 +62,8 @@ class NpmLister(Lister[NpmListerState, List[Dict[str, Any]]]):
         super().__init__(
             scheduler=scheduler,
             credentials=credentials,
-            url=self.API_INCREMENTAL_LISTING_URL
-            if incremental
-            else self.API_FULL_LISTING_URL,
-            instance=self.INSTANCE,
+            url=url,
+            instance=instance,
             max_origins_per_page=max_origins_per_page,
             max_pages=max_pages,
             enable_origins=enable_origins,
@@ -75,6 +75,8 @@ class NpmLister(Lister[NpmListerState, List[Dict[str, Any]]]):
             # provided as the startkey query parameter value, so we increment the page
             # size by one to avoid double package processing
             self.page_size += 1
+        else:
+            self.url = self.API_INCREMENTAL_LISTING_URL
         self.incremental = incremental
 
         self.session.headers.update({"Accept": "application/json"})
diff --git a/swh/lister/nuget/lister.py b/swh/lister/nuget/lister.py
index 98f9fc9b..1d04f7d0 100644
--- a/swh/lister/nuget/lister.py
+++ b/swh/lister/nuget/lister.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2022  The Software Heritage developers
+# Copyright (C) 2022-2023  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
@@ -43,6 +43,8 @@ class NugetLister(Lister[NugetListerState, NugetListerPage]):
     def __init__(
         self,
         scheduler: SchedulerInterface,
+        url: str = API_INDEX_URL,
+        instance: str = INSTANCE,
         credentials: Optional[CredentialsType] = None,
         max_origins_per_page: Optional[int] = None,
         max_pages: Optional[int] = None,
@@ -51,8 +53,8 @@ class NugetLister(Lister[NugetListerState, NugetListerPage]):
         super().__init__(
             scheduler=scheduler,
             credentials=credentials,
-            instance=self.INSTANCE,
-            url=self.API_INDEX_URL,
+            instance=instance,
+            url=url,
             max_origins_per_page=max_origins_per_page,
             max_pages=max_pages,
             enable_origins=enable_origins,
diff --git a/swh/lister/packagist/lister.py b/swh/lister/packagist/lister.py
index 99dd9863..ba7ac12f 100644
--- a/swh/lister/packagist/lister.py
+++ b/swh/lister/packagist/lister.py
@@ -55,6 +55,7 @@ class PackagistLister(Lister[PackagistListerState, PackagistPageType]):
     """
 
     LISTER_NAME = "Packagist"
+    INSTANCE = "packagist"
     PACKAGIST_PACKAGES_LIST_URL = "https://packagist.org/packages/list.json"
     PACKAGIST_PACKAGE_URL_FORMATS = [
         # preferred, static, efficient on their side as it can be cached
@@ -72,6 +73,8 @@ class PackagistLister(Lister[PackagistListerState, PackagistPageType]):
     def __init__(
         self,
         scheduler: SchedulerInterface,
+        url: str = PACKAGIST_PACKAGES_LIST_URL,
+        instance: str = INSTANCE,
         credentials: CredentialsType = None,
         max_origins_per_page: Optional[int] = None,
         max_pages: Optional[int] = None,
@@ -80,8 +83,8 @@ class PackagistLister(Lister[PackagistListerState, PackagistPageType]):
     ):
         super().__init__(
             scheduler=scheduler,
-            url=self.PACKAGIST_PACKAGES_LIST_URL,
-            instance="packagist",
+            url=url,
+            instance=instance,
             credentials=credentials,
             with_github_session=True,
             max_origins_per_page=max_origins_per_page,
diff --git a/swh/lister/pubdev/lister.py b/swh/lister/pubdev/lister.py
index 50e4f15d..601bdefd 100644
--- a/swh/lister/pubdev/lister.py
+++ b/swh/lister/pubdev/lister.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2022  The Software Heritage developers
+# Copyright (C) 2022-2023  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
@@ -35,6 +35,8 @@ class PubDevLister(StatelessLister[PubDevListerPage]):
     def __init__(
         self,
         scheduler: SchedulerInterface,
+        url: str = BASE_URL,
+        instance: str = INSTANCE,
         credentials: Optional[CredentialsType] = None,
         max_origins_per_page: Optional[int] = None,
         max_pages: Optional[int] = None,
@@ -43,8 +45,8 @@ class PubDevLister(StatelessLister[PubDevListerPage]):
         super().__init__(
             scheduler=scheduler,
             credentials=credentials,
-            instance=self.INSTANCE,
-            url=self.BASE_URL,
+            instance=instance,
+            url=url,
             max_origins_per_page=max_origins_per_page,
             max_pages=max_pages,
             enable_origins=enable_origins,
diff --git a/swh/lister/puppet/lister.py b/swh/lister/puppet/lister.py
index 6e84b27c..26c7a4c3 100644
--- a/swh/lister/puppet/lister.py
+++ b/swh/lister/puppet/lister.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2022  The Software Heritage developers
+# Copyright (C) 2022-2023  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
@@ -42,6 +42,8 @@ class PuppetLister(Lister[PuppetListerState, PuppetListerPage]):
     def __init__(
         self,
         scheduler: SchedulerInterface,
+        url: str = BASE_URL,
+        instance: str = INSTANCE,
         credentials: Optional[CredentialsType] = None,
         max_origins_per_page: Optional[int] = None,
         max_pages: Optional[int] = None,
@@ -50,8 +52,8 @@ class PuppetLister(Lister[PuppetListerState, PuppetListerPage]):
         super().__init__(
             scheduler=scheduler,
             credentials=credentials,
-            instance=self.INSTANCE,
-            url=self.BASE_URL,
+            instance=instance,
+            url=url,
             max_origins_per_page=max_origins_per_page,
             max_pages=max_pages,
             enable_origins=enable_origins,
diff --git a/swh/lister/pypi/lister.py b/swh/lister/pypi/lister.py
index f5141c1d..5ba08eb7 100644
--- a/swh/lister/pypi/lister.py
+++ b/swh/lister/pypi/lister.py
@@ -69,6 +69,8 @@ class PyPILister(Lister[PyPIListerState, PackageListPage]):
     def __init__(
         self,
         scheduler: SchedulerInterface,
+        url: str = PACKAGE_LIST_URL,
+        instance: str = INSTANCE,
         credentials: Optional[CredentialsType] = None,
         max_origins_per_page: Optional[int] = None,
         max_pages: Optional[int] = None,
@@ -76,8 +78,8 @@ class PyPILister(Lister[PyPIListerState, PackageListPage]):
     ):
         super().__init__(
             scheduler=scheduler,
-            url=self.PACKAGE_LIST_URL,
-            instance=self.INSTANCE,
+            url=url,
+            instance=instance,
             credentials=credentials,
             max_origins_per_page=max_origins_per_page,
             max_pages=max_pages,
diff --git a/swh/lister/rubygems/lister.py b/swh/lister/rubygems/lister.py
index bb317eab..4e59b901 100644
--- a/swh/lister/rubygems/lister.py
+++ b/swh/lister/rubygems/lister.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2022  The Software Heritage developers
+# Copyright (C) 2022-2023  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
@@ -62,6 +62,8 @@ class RubyGemsLister(StatelessLister[RubyGemsListerPage]):
     def __init__(
         self,
         scheduler: SchedulerInterface,
+        url: str = RUBY_GEMS_POSTGRES_DUMP_BASE_URL,
+        instance: str = INSTANCE,
         credentials: Optional[CredentialsType] = None,
         max_origins_per_page: Optional[int] = None,
         max_pages: Optional[int] = None,
@@ -70,8 +72,8 @@ class RubyGemsLister(StatelessLister[RubyGemsListerPage]):
         super().__init__(
             scheduler=scheduler,
             credentials=credentials,
-            instance=self.INSTANCE,
-            url=self.RUBY_GEMS_POSTGRES_DUMP_BASE_URL,
+            instance=instance,
+            url=url,
             max_origins_per_page=max_origins_per_page,
             max_pages=max_pages,
             enable_origins=enable_origins,
diff --git a/swh/lister/sourceforge/lister.py b/swh/lister/sourceforge/lister.py
index 234e198f..518a7ece 100644
--- a/swh/lister/sourceforge/lister.py
+++ b/swh/lister/sourceforge/lister.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2022  The Software Heritage developers
+# Copyright (C) 2021-2023  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
@@ -105,12 +105,16 @@ ProjectsLastModifiedCache = Dict[Tuple[str, str], LastModifiedT]
 class SourceForgeLister(Lister[SourceForgeListerState, SourceForgeListerPage]):
     """List origins from the "SourceForge" forge."""
 
+    SOURCEFORGE_URL = "https://sourceforge.net"
     # Part of the lister API, that identifies this lister
     LISTER_NAME = "sourceforge"
+    INSTANCE = "main"
 
     def __init__(
         self,
         scheduler: SchedulerInterface,
+        url: str = SOURCEFORGE_URL,
+        instance: str = INSTANCE,
         incremental: bool = False,
         credentials: Optional[CredentialsType] = None,
         max_origins_per_page: Optional[int] = None,
@@ -119,8 +123,8 @@ class SourceForgeLister(Lister[SourceForgeListerState, SourceForgeListerPage]):
     ):
         super().__init__(
             scheduler=scheduler,
-            url="https://sourceforge.net",
-            instance="main",
+            url=url,
+            instance=instance,
             credentials=credentials,
             max_origins_per_page=max_origins_per_page,
             max_pages=max_pages,
diff --git a/swh/lister/tests/test_lister_packages.py b/swh/lister/tests/test_lister_packages.py
new file mode 100644
index 00000000..f0c6bef4
--- /dev/null
+++ b/swh/lister/tests/test_lister_packages.py
@@ -0,0 +1,64 @@
+# Copyright (C) 2023  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import importlib
+import inspect
+import pkgutil
+
+import pytest
+
+
+def lister_packages():
+    import swh.lister
+
+    return [
+        mod.name
+        for mod in pkgutil.iter_modules(swh.lister.__path__)
+        if mod.ispkg and mod.name != "tests"
+    ]
+
+
+@pytest.mark.parametrize("lister_package", lister_packages())
+def test_lister_has_mandatory_parameters(lister_package):
+    from swh.lister.pattern import Lister, StatelessLister
+
+    lister_mandatory_params = {
+        "scheduler",
+        "url",
+        "instance",
+        "credentials",
+        "max_origins_per_page",
+        "max_pages",
+        "enable_origins",
+    }
+
+    lister_module = importlib.import_module(f"swh.lister.{lister_package}.lister")
+    lister_module_members = inspect.getmembers(lister_module)
+    for name, obj in lister_module_members:
+        if (
+            inspect.isclass(obj)
+            and obj not in (Lister, StatelessLister)
+            and issubclass(obj, Lister)
+        ):
+            lister_params = set(inspect.getfullargspec(getattr(obj, "__init__")).args)
+
+            missing_params = lister_mandatory_params - lister_params
+
+            assert not missing_params, (
+                f"swh.lister.{lister_package}.{name} class is missing the following "
+                f"parameters in its constructor: {', '.join(missing_params)}.\n"
+                "Please add them and transmit them to the base lister class constructor "
+                f"to avoid bad surprises when deploying\nthe {lister_package} lister in "
+                "staging or production environment."
+            )
+
+
+@pytest.mark.parametrize("lister_package", lister_packages())
+def test_lister_package_has_register_function(lister_package):
+    lister_module = importlib.import_module(f"swh.lister.{lister_package}")
+    assert hasattr(lister_module, "register"), (
+        f"swh.lister.{lister_package} module is missing the register function required "
+        "to register its celery tasks in scheduler database."
+    )
-- 
GitLab