From b81621274b78f379d630047ba5320bb5700162ee Mon Sep 17 00:00:00 2001 From: "Antoine R. Dumont (@ardumont)" <antoine.romain.dumont@gmail.com> Date: Wed, 29 May 2019 12:05:51 +0200 Subject: [PATCH] lister: Unify credentials structure between listers This becomes a dictionary of key <lister-name>, value a dict of key <instance-name>, value list of dict username/password. Related T1772 --- swh/lister/bitbucket/lister.py | 3 ++- swh/lister/core/lister_base.py | 9 +++---- swh/lister/core/lister_transports.py | 29 ++++++++++++++++++++- swh/lister/debian/lister.py | 1 + swh/lister/github/lister.py | 3 ++- swh/lister/gitlab/lister.py | 15 +---------- swh/lister/npm/lister.py | 3 ++- swh/lister/phabricator/lister.py | 7 ++++- swh/lister/phabricator/tasks.py | 7 ++--- swh/lister/phabricator/tests/test_lister.py | 4 ++- swh/lister/phabricator/tests/test_tasks.py | 3 ++- swh/lister/pypi/lister.py | 3 ++- 12 files changed, 56 insertions(+), 31 deletions(-) diff --git a/swh/lister/bitbucket/lister.py b/swh/lister/bitbucket/lister.py index ae5866ca..04a20aa1 100644 --- a/swh/lister/bitbucket/lister.py +++ b/swh/lister/bitbucket/lister.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 the Software Heritage developers +# Copyright (C) 2017-2019 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -17,6 +17,7 @@ class BitBucketLister(SWHIndexingHttpLister): PATH_TEMPLATE = '/repositories?after=%s' MODEL = BitBucketModel LISTER_NAME = 'bitbucket' + instance = 'bitbucket' def get_model_from_repo(self, repo): return { diff --git a/swh/lister/core/lister_base.py b/swh/lister/core/lister_base.py index b215810f..a742453e 100644 --- a/swh/lister/core/lister_base.py +++ b/swh/lister/core/lister_base.py @@ -239,12 +239,9 @@ class SWHListerBase(abc.ABC, config.SWHConfig): @property def ADDITIONAL_CONFIG(self): # noqa: N802 return { - 'credentials': - ('list[dict]', []), - 'cache_responses': - ('bool', False), - 'cache_dir': - ('str', '~/.cache/swh/lister/%s' % self.LISTER_NAME), + 'credentials': ('dict', {}), + 'cache_responses': ('bool', False), + 'cache_dir': ('str', '~/.cache/swh/lister/%s' % self.LISTER_NAME), } INITIAL_BACKOFF = 10 diff --git a/swh/lister/core/lister_transports.py b/swh/lister/core/lister_transports.py index 20f841f6..75bc6ad4 100644 --- a/swh/lister/core/lister_transports.py +++ b/swh/lister/core/lister_transports.py @@ -62,13 +62,40 @@ class SWHListerHttpTransport(abc.ABC): """Get the full parameters passed to requests given the transport_request identifier. + This uses credentials if any are provided. The 'credentials' + configuration is expected to be a dict of multiple levels. The first + level is the lister's name, the second is the lister's instance name. + + For example: + + credentials: + github: # github lister + github: # has only one instance (so far) + - username: some + password: somekey + - username: one + password: onekey + - ... + gitlab: # gitlab lister + riseup: # has many instances + - username: someone + password: ... + - ... + gitlab: + - username: someone + password: ... + - ... + + MAY BE OVERRIDDEN if something more complex than the request headers is needed. """ params = {} params['headers'] = self.request_headers() or {} - creds = self.config['credentials'] + all_creds = self.config['credentials'] + lister_creds = all_creds.get(self.LISTER_NAME, {}) + creds = lister_creds.get(self.instance, {}) auth = random.choice(creds) if creds else None if auth: params['auth'] = (auth['username'], auth['password']) diff --git a/swh/lister/debian/lister.py b/swh/lister/debian/lister.py index d23e693c..5b72c6f2 100644 --- a/swh/lister/debian/lister.py +++ b/swh/lister/debian/lister.py @@ -32,6 +32,7 @@ class DebianLister(SWHListerHttpTransport, SWHListerBase): MODEL = Package PATH_TEMPLATE = None LISTER_NAME = 'debian' + instance = 'debian' def __init__(self, override_config=None): SWHListerHttpTransport.__init__(self, api_baseurl="bogus") diff --git a/swh/lister/github/lister.py b/swh/lister/github/lister.py index c0e49c41..245afde9 100644 --- a/swh/lister/github/lister.py +++ b/swh/lister/github/lister.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 the Software Heritage developers +# Copyright (C) 2017-2019 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -14,6 +14,7 @@ class GitHubLister(SWHIndexingHttpLister): MODEL = GitHubModel API_URL_INDEX_RE = re.compile(r'^.*/repositories\?since=(\d+)') LISTER_NAME = 'github' + instance = 'github' # There is only 1 instance of such lister def get_model_from_repo(self, repo): return { diff --git a/swh/lister/gitlab/lister.py b/swh/lister/gitlab/lister.py index 8e09e23b..41cb7c67 100644 --- a/swh/lister/gitlab/lister.py +++ b/swh/lister/gitlab/lister.py @@ -1,4 +1,4 @@ -# Copyright (C) 2018 the Software Heritage developers +# Copyright (C) 2018-2019 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -28,19 +28,6 @@ class GitLabLister(PageByPageHttpLister): self.PATH_TEMPLATE = '%s&per_page=%s' % ( self.PATH_TEMPLATE, per_page) - @property - def ADDITIONAL_CONFIG(self): - """Override additional config as the 'credentials' structure change - between the ancestor classes and this class. - - cf. request_params method below - - """ - default_config = super().ADDITIONAL_CONFIG - # 'credentials' is a dict of (instance, {username, password}) dict - default_config['credentials'] = ('dict', {}) - return default_config - def request_params(self, identifier): """Get the full parameters passed to requests given the transport_request identifier. diff --git a/swh/lister/npm/lister.py b/swh/lister/npm/lister.py index 3c64de3f..d686851b 100644 --- a/swh/lister/npm/lister.py +++ b/swh/lister/npm/lister.py @@ -1,4 +1,4 @@ -# Copyright (C) 2018 the Software Heritage developers +# Copyright (C) 2018-2019 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -14,6 +14,7 @@ class NpmListerBase(SWHIndexingHttpLister): """ MODEL = NpmModel LISTER_NAME = 'npm' + instance = 'npm' def __init__(self, api_baseurl='https://replicate.npmjs.com', per_page=1000, override_config=None): diff --git a/swh/lister/phabricator/lister.py b/swh/lister/phabricator/lister.py index dfb77e3a..d6e062e4 100644 --- a/swh/lister/phabricator/lister.py +++ b/swh/lister/phabricator/lister.py @@ -2,6 +2,7 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import urllib.parse from swh.lister.core.indexing_lister import SWHIndexingHttpLister from swh.lister.phabricator.models import PhabricatorModel @@ -14,13 +15,17 @@ class PhabricatorLister(SWHIndexingHttpLister): MODEL = PhabricatorModel LISTER_NAME = 'phabricator' - def __init__(self, forge_url, api_token, override_config=None): + def __init__(self, forge_url, api_token, instance=None, + override_config=None): if forge_url.endswith("/"): forge_url = forge_url[:-1] self.forge_url = forge_url api_endpoint = ('api/diffusion.repository.' 'search?api.token=%s') % api_token api_baseurl = '%s/%s' % (forge_url, api_endpoint) + if not instance: + instance = urllib.parse.urlparse(forge_url).hostname + self.instance = instance super().__init__(api_baseurl=api_baseurl, override_config=override_config) diff --git a/swh/lister/phabricator/tasks.py b/swh/lister/phabricator/tasks.py index 83ecd67c..3ebb9810 100644 --- a/swh/lister/phabricator/tasks.py +++ b/swh/lister/phabricator/tasks.py @@ -6,9 +6,10 @@ from swh.scheduler.celery_backend.config import app from swh.lister.phabricator.lister import PhabricatorLister -def new_lister( - forge_url='https://forge.softwareheritage.org', api_token='', **kw): - return PhabricatorLister(forge_url=forge_url, api_token=api_token, **kw) +def new_lister(forge_url='https://forge.softwareheritage.org', api_token='', + instance='swh', **kw): + return PhabricatorLister(forge_url=forge_url, api_token=api_token, + instance=instance, **kw) @app.task(name=__name__ + '.IncrementalPhabricatorLister') diff --git a/swh/lister/phabricator/tests/test_lister.py b/swh/lister/phabricator/tests/test_lister.py index 29fbcecd..6f565abf 100644 --- a/swh/lister/phabricator/tests/test_lister.py +++ b/swh/lister/phabricator/tests/test_lister.py @@ -24,9 +24,11 @@ class PhabricatorListerTester(HttpListerTester, unittest.TestCase): def get_fl(self, override_config=None): """(Override) Retrieve an instance of fake lister (fl). + """ if override_config or self.fl is None: - self.fl = self.Lister(forge_url='https://fakeurl', api_token='a-1', + self.fl = self.Lister(forge_url='https://fakeurl', instance='fake', + api_token='a-1', override_config=override_config) self.fl.INITIAL_BACKOFF = 1 diff --git a/swh/lister/phabricator/tests/test_tasks.py b/swh/lister/phabricator/tests/test_tasks.py index 160efccb..a97196f4 100644 --- a/swh/lister/phabricator/tests/test_tasks.py +++ b/swh/lister/phabricator/tests/test_tasks.py @@ -24,6 +24,7 @@ def test_incremental(lister, swh_app, celery_session_worker): assert res.successful() lister.assert_called_once_with( - api_token='', forge_url='https://forge.softwareheritage.org') + api_token='', forge_url='https://forge.softwareheritage.org', + instance='swh') lister.db_last_index.assert_called_once_with() lister.run.assert_called_once_with(min_bound=42) diff --git a/swh/lister/pypi/lister.py b/swh/lister/pypi/lister.py index a488850d..fedffd07 100644 --- a/swh/lister/pypi/lister.py +++ b/swh/lister/pypi/lister.py @@ -1,4 +1,4 @@ -# Copyright (C) 2018 the Software Heritage developers +# Copyright (C) 2018-2019 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -16,6 +16,7 @@ class PyPILister(ListerOnePageApiTransport, SimpleLister): MODEL = PyPIModel LISTER_NAME = 'pypi' PAGE = 'https://pypi.org/simple/' + instance = 'pypi' # As of today only the main pypi.org is used def __init__(self, override_config=None): ListerOnePageApiTransport .__init__(self) -- GitLab