From 9aa8a6f7ae920ac95342830ab97373e9a6ff0fbc Mon Sep 17 00:00:00 2001 From: "Antoine R. Dumont (@ardumont)" <antoine.romain.dumont@gmail.com> Date: Fri, 21 Jun 2019 16:06:48 +0200 Subject: [PATCH] bitbucket: Allow to specify the number of repos per api request This is independent but still, it somehow fixes the issue occurring on T1826. Related T1826 --- swh/lister/bitbucket/lister.py | 19 ++++++++++++++----- swh/lister/bitbucket/tasks.py | 6 +++--- swh/lister/bitbucket/tests/test_tasks.py | 9 ++++++--- 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/swh/lister/bitbucket/lister.py b/swh/lister/bitbucket/lister.py index 2c00c68c..c2e02924 100644 --- a/swh/lister/bitbucket/lister.py +++ b/swh/lister/bitbucket/lister.py @@ -15,12 +15,22 @@ from swh.lister.core.indexing_lister import IndexingHttpLister logger = logging.getLogger(__name__) +DEFAULT_BITBUCKET_PAGE = 10 + + class BitBucketLister(IndexingHttpLister): PATH_TEMPLATE = '/repositories?after=%s' MODEL = BitBucketModel LISTER_NAME = 'bitbucket' instance = 'bitbucket' + def __init__(self, api_baseurl, override_config=None, per_page=100): + super().__init__( + api_baseurl=api_baseurl, override_config=override_config) + if per_page != DEFAULT_BITBUCKET_PAGE: + self.PATH_TEMPLATE = '%s&pagelen=%s' % ( + self.PATH_TEMPLATE, per_page) + def get_model_from_repo(self, repo): return { 'uid': repo['uuid'], @@ -76,11 +86,10 @@ class BitBucketLister(IndexingHttpLister): else: ret = lower <= inner <= upper except Exception as e: - logger.error(str(e) + ': %s, %s, %s' % - (('inner=%s%s' % (type(inner), inner)), - ('lower=%s%s' % (type(lower), lower)), - ('upper=%s%s' % (type(upper), upper))) - ) + logger.error(str(e) + ': %s, %s, %s', + ('inner=%s%s' % (type(inner), inner)), + ('lower=%s%s' % (type(lower), lower)), + ('upper=%s%s' % (type(upper), upper))) raise return ret diff --git a/swh/lister/bitbucket/tasks.py b/swh/lister/bitbucket/tasks.py index 9985b488..5477b948 100644 --- a/swh/lister/bitbucket/tasks.py +++ b/swh/lister/bitbucket/tasks.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 the Software Heritage developers +# Copyright (C) 2017-2019 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -12,8 +12,8 @@ from .lister import BitBucketLister GROUP_SPLIT = 10000 -def new_lister(api_baseurl='https://api.bitbucket.org/2.0'): - return BitBucketLister(api_baseurl=api_baseurl) +def new_lister(api_baseurl='https://api.bitbucket.org/2.0', per_page=100): + return BitBucketLister(api_baseurl=api_baseurl, per_page=per_page) @app.task(name=__name__ + '.IncrementalBitBucketLister') diff --git a/swh/lister/bitbucket/tests/test_tasks.py b/swh/lister/bitbucket/tests/test_tasks.py index 76746279..1e02b6ff 100644 --- a/swh/lister/bitbucket/tests/test_tasks.py +++ b/swh/lister/bitbucket/tests/test_tasks.py @@ -26,7 +26,8 @@ def test_incremental(lister, swh_app, celery_session_worker): res.wait() assert res.successful() - lister.assert_called_once_with(api_baseurl='https://api.bitbucket.org/2.0') + lister.assert_called_once_with( + api_baseurl='https://api.bitbucket.org/2.0', per_page=100) lister.db_last_index.assert_called_once_with() lister.run.assert_called_once_with(min_bound=42, max_bound=None) @@ -44,7 +45,8 @@ def test_range(lister, swh_app, celery_session_worker): res.wait() assert res.successful() - lister.assert_called_once_with(api_baseurl='https://api.bitbucket.org/2.0') + lister.assert_called_once_with( + api_baseurl='https://api.bitbucket.org/2.0', per_page=100) lister.db_last_index.assert_not_called() lister.run.assert_called_once_with(min_bound=12, max_bound=42) @@ -74,7 +76,8 @@ def test_relister(lister, swh_app, celery_session_worker): break sleep(1) - lister.assert_called_with(api_baseurl='https://api.bitbucket.org/2.0') + lister.assert_called_with( + api_baseurl='https://api.bitbucket.org/2.0', per_page=100) # one by the FullBitbucketRelister task # + 5 for the RangeBitbucketLister subtasks -- GitLab