From b81621274b78f379d630047ba5320bb5700162ee Mon Sep 17 00:00:00 2001
From: "Antoine R. Dumont (@ardumont)" <antoine.romain.dumont@gmail.com>
Date: Wed, 29 May 2019 12:05:51 +0200
Subject: [PATCH] lister: Unify credentials structure between listers

This becomes a dictionary of key <lister-name>, value a dict of key
<instance-name>, value list of dict username/password.

Related T1772
---
 swh/lister/bitbucket/lister.py              |  3 ++-
 swh/lister/core/lister_base.py              |  9 +++----
 swh/lister/core/lister_transports.py        | 29 ++++++++++++++++++++-
 swh/lister/debian/lister.py                 |  1 +
 swh/lister/github/lister.py                 |  3 ++-
 swh/lister/gitlab/lister.py                 | 15 +----------
 swh/lister/npm/lister.py                    |  3 ++-
 swh/lister/phabricator/lister.py            |  7 ++++-
 swh/lister/phabricator/tasks.py             |  7 ++---
 swh/lister/phabricator/tests/test_lister.py |  4 ++-
 swh/lister/phabricator/tests/test_tasks.py  |  3 ++-
 swh/lister/pypi/lister.py                   |  3 ++-
 12 files changed, 56 insertions(+), 31 deletions(-)

diff --git a/swh/lister/bitbucket/lister.py b/swh/lister/bitbucket/lister.py
index ae5866ca..04a20aa1 100644
--- a/swh/lister/bitbucket/lister.py
+++ b/swh/lister/bitbucket/lister.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017-2018 the Software Heritage developers
+# Copyright (C) 2017-2019 the Software Heritage developers
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
@@ -17,6 +17,7 @@ class BitBucketLister(SWHIndexingHttpLister):
     PATH_TEMPLATE = '/repositories?after=%s'
     MODEL = BitBucketModel
     LISTER_NAME = 'bitbucket'
+    instance = 'bitbucket'
 
     def get_model_from_repo(self, repo):
         return {
diff --git a/swh/lister/core/lister_base.py b/swh/lister/core/lister_base.py
index b215810f..a742453e 100644
--- a/swh/lister/core/lister_base.py
+++ b/swh/lister/core/lister_base.py
@@ -239,12 +239,9 @@ class SWHListerBase(abc.ABC, config.SWHConfig):
     @property
     def ADDITIONAL_CONFIG(self):  # noqa: N802
         return {
-            'credentials':
-                ('list[dict]', []),
-            'cache_responses':
-                ('bool', False),
-            'cache_dir':
-                ('str', '~/.cache/swh/lister/%s' % self.LISTER_NAME),
+            'credentials': ('dict', {}),
+            'cache_responses': ('bool', False),
+            'cache_dir': ('str', '~/.cache/swh/lister/%s' % self.LISTER_NAME),
         }
 
     INITIAL_BACKOFF = 10
diff --git a/swh/lister/core/lister_transports.py b/swh/lister/core/lister_transports.py
index 20f841f6..75bc6ad4 100644
--- a/swh/lister/core/lister_transports.py
+++ b/swh/lister/core/lister_transports.py
@@ -62,13 +62,40 @@ class SWHListerHttpTransport(abc.ABC):
         """Get the full parameters passed to requests given the
         transport_request identifier.
 
+        This uses credentials if any are provided. The 'credentials'
+        configuration is expected to be a dict of multiple levels. The first
+        level is the lister's name, the second is the lister's instance name.
+
+        For example:
+
+        credentials:
+          github:  # github lister
+            github:  # has only one instance (so far)
+            - username: some
+              password: somekey
+            - username: one
+              password: onekey
+            - ...
+          gitlab:  # gitlab lister
+            riseup:  # has many instances
+            - username: someone
+              password: ...
+            - ...
+            gitlab:
+            - username: someone
+              password: ...
+            - ...
+
+
         MAY BE OVERRIDDEN if something more complex than the request headers
         is needed.
 
         """
         params = {}
         params['headers'] = self.request_headers() or {}
-        creds = self.config['credentials']
+        all_creds = self.config['credentials']
+        lister_creds = all_creds.get(self.LISTER_NAME, {})
+        creds = lister_creds.get(self.instance, {})
         auth = random.choice(creds) if creds else None
         if auth:
             params['auth'] = (auth['username'], auth['password'])
diff --git a/swh/lister/debian/lister.py b/swh/lister/debian/lister.py
index d23e693c..5b72c6f2 100644
--- a/swh/lister/debian/lister.py
+++ b/swh/lister/debian/lister.py
@@ -32,6 +32,7 @@ class DebianLister(SWHListerHttpTransport, SWHListerBase):
     MODEL = Package
     PATH_TEMPLATE = None
     LISTER_NAME = 'debian'
+    instance = 'debian'
 
     def __init__(self, override_config=None):
         SWHListerHttpTransport.__init__(self, api_baseurl="bogus")
diff --git a/swh/lister/github/lister.py b/swh/lister/github/lister.py
index c0e49c41..245afde9 100644
--- a/swh/lister/github/lister.py
+++ b/swh/lister/github/lister.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017-2018 the Software Heritage developers
+# Copyright (C) 2017-2019 the Software Heritage developers
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
@@ -14,6 +14,7 @@ class GitHubLister(SWHIndexingHttpLister):
     MODEL = GitHubModel
     API_URL_INDEX_RE = re.compile(r'^.*/repositories\?since=(\d+)')
     LISTER_NAME = 'github'
+    instance = 'github'  # There is only 1 instance of such lister
 
     def get_model_from_repo(self, repo):
         return {
diff --git a/swh/lister/gitlab/lister.py b/swh/lister/gitlab/lister.py
index 8e09e23b..41cb7c67 100644
--- a/swh/lister/gitlab/lister.py
+++ b/swh/lister/gitlab/lister.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2018 the Software Heritage developers
+# Copyright (C) 2018-2019 the Software Heritage developers
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
@@ -28,19 +28,6 @@ class GitLabLister(PageByPageHttpLister):
             self.PATH_TEMPLATE = '%s&per_page=%s' % (
                 self.PATH_TEMPLATE, per_page)
 
-    @property
-    def ADDITIONAL_CONFIG(self):
-        """Override additional config as the 'credentials' structure change
-           between the ancestor classes and this class.
-
-           cf. request_params method below
-
-        """
-        default_config = super().ADDITIONAL_CONFIG
-        # 'credentials' is a dict of (instance, {username, password}) dict
-        default_config['credentials'] = ('dict', {})
-        return default_config
-
     def request_params(self, identifier):
         """Get the full parameters passed to requests given the
         transport_request identifier.
diff --git a/swh/lister/npm/lister.py b/swh/lister/npm/lister.py
index 3c64de3f..d686851b 100644
--- a/swh/lister/npm/lister.py
+++ b/swh/lister/npm/lister.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2018 the Software Heritage developers
+# Copyright (C) 2018-2019 the Software Heritage developers
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
@@ -14,6 +14,7 @@ class NpmListerBase(SWHIndexingHttpLister):
     """
     MODEL = NpmModel
     LISTER_NAME = 'npm'
+    instance = 'npm'
 
     def __init__(self, api_baseurl='https://replicate.npmjs.com',
                  per_page=1000, override_config=None):
diff --git a/swh/lister/phabricator/lister.py b/swh/lister/phabricator/lister.py
index dfb77e3a..d6e062e4 100644
--- a/swh/lister/phabricator/lister.py
+++ b/swh/lister/phabricator/lister.py
@@ -2,6 +2,7 @@
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
+import urllib.parse
 
 from swh.lister.core.indexing_lister import SWHIndexingHttpLister
 from swh.lister.phabricator.models import PhabricatorModel
@@ -14,13 +15,17 @@ class PhabricatorLister(SWHIndexingHttpLister):
     MODEL = PhabricatorModel
     LISTER_NAME = 'phabricator'
 
-    def __init__(self, forge_url, api_token, override_config=None):
+    def __init__(self, forge_url, api_token, instance=None,
+                 override_config=None):
         if forge_url.endswith("/"):
             forge_url = forge_url[:-1]
         self.forge_url = forge_url
         api_endpoint = ('api/diffusion.repository.'
                         'search?api.token=%s') % api_token
         api_baseurl = '%s/%s' % (forge_url, api_endpoint)
+        if not instance:
+            instance = urllib.parse.urlparse(forge_url).hostname
+        self.instance = instance
         super().__init__(api_baseurl=api_baseurl,
                          override_config=override_config)
 
diff --git a/swh/lister/phabricator/tasks.py b/swh/lister/phabricator/tasks.py
index 83ecd67c..3ebb9810 100644
--- a/swh/lister/phabricator/tasks.py
+++ b/swh/lister/phabricator/tasks.py
@@ -6,9 +6,10 @@ from swh.scheduler.celery_backend.config import app
 from swh.lister.phabricator.lister import PhabricatorLister
 
 
-def new_lister(
-        forge_url='https://forge.softwareheritage.org', api_token='', **kw):
-    return PhabricatorLister(forge_url=forge_url, api_token=api_token, **kw)
+def new_lister(forge_url='https://forge.softwareheritage.org', api_token='',
+               instance='swh', **kw):
+    return PhabricatorLister(forge_url=forge_url, api_token=api_token,
+                             instance=instance, **kw)
 
 
 @app.task(name=__name__ + '.IncrementalPhabricatorLister')
diff --git a/swh/lister/phabricator/tests/test_lister.py b/swh/lister/phabricator/tests/test_lister.py
index 29fbcecd..6f565abf 100644
--- a/swh/lister/phabricator/tests/test_lister.py
+++ b/swh/lister/phabricator/tests/test_lister.py
@@ -24,9 +24,11 @@ class PhabricatorListerTester(HttpListerTester, unittest.TestCase):
 
     def get_fl(self, override_config=None):
         """(Override) Retrieve an instance of fake lister (fl).
+
         """
         if override_config or self.fl is None:
-            self.fl = self.Lister(forge_url='https://fakeurl', api_token='a-1',
+            self.fl = self.Lister(forge_url='https://fakeurl', instance='fake',
+                                  api_token='a-1',
                                   override_config=override_config)
             self.fl.INITIAL_BACKOFF = 1
 
diff --git a/swh/lister/phabricator/tests/test_tasks.py b/swh/lister/phabricator/tests/test_tasks.py
index 160efccb..a97196f4 100644
--- a/swh/lister/phabricator/tests/test_tasks.py
+++ b/swh/lister/phabricator/tests/test_tasks.py
@@ -24,6 +24,7 @@ def test_incremental(lister, swh_app, celery_session_worker):
     assert res.successful()
 
     lister.assert_called_once_with(
-        api_token='', forge_url='https://forge.softwareheritage.org')
+        api_token='', forge_url='https://forge.softwareheritage.org',
+        instance='swh')
     lister.db_last_index.assert_called_once_with()
     lister.run.assert_called_once_with(min_bound=42)
diff --git a/swh/lister/pypi/lister.py b/swh/lister/pypi/lister.py
index a488850d..fedffd07 100644
--- a/swh/lister/pypi/lister.py
+++ b/swh/lister/pypi/lister.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2018 the Software Heritage developers
+# Copyright (C) 2018-2019 the Software Heritage developers
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
@@ -16,6 +16,7 @@ class PyPILister(ListerOnePageApiTransport, SimpleLister):
     MODEL = PyPIModel
     LISTER_NAME = 'pypi'
     PAGE = 'https://pypi.org/simple/'
+    instance = 'pypi'  # As of today only the main pypi.org is used
 
     def __init__(self, override_config=None):
         ListerOnePageApiTransport .__init__(self)
-- 
GitLab