From 9aa8a6f7ae920ac95342830ab97373e9a6ff0fbc Mon Sep 17 00:00:00 2001
From: "Antoine R. Dumont (@ardumont)" <antoine.romain.dumont@gmail.com>
Date: Fri, 21 Jun 2019 16:06:48 +0200
Subject: [PATCH] bitbucket: Allow to specify the number of repos per api
 request

This is independent but still, it somehow fixes the issue occurring on T1826.

Related T1826
---
 swh/lister/bitbucket/lister.py           | 19 ++++++++++++++-----
 swh/lister/bitbucket/tasks.py            |  6 +++---
 swh/lister/bitbucket/tests/test_tasks.py |  9 ++++++---
 3 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/swh/lister/bitbucket/lister.py b/swh/lister/bitbucket/lister.py
index 2c00c68c..c2e02924 100644
--- a/swh/lister/bitbucket/lister.py
+++ b/swh/lister/bitbucket/lister.py
@@ -15,12 +15,22 @@ from swh.lister.core.indexing_lister import IndexingHttpLister
 logger = logging.getLogger(__name__)
 
 
+DEFAULT_BITBUCKET_PAGE = 10
+
+
 class BitBucketLister(IndexingHttpLister):
     PATH_TEMPLATE = '/repositories?after=%s'
     MODEL = BitBucketModel
     LISTER_NAME = 'bitbucket'
     instance = 'bitbucket'
 
+    def __init__(self, api_baseurl, override_config=None, per_page=100):
+        super().__init__(
+            api_baseurl=api_baseurl, override_config=override_config)
+        if per_page != DEFAULT_BITBUCKET_PAGE:
+            self.PATH_TEMPLATE = '%s&pagelen=%s' % (
+                self.PATH_TEMPLATE, per_page)
+
     def get_model_from_repo(self, repo):
         return {
             'uid': repo['uuid'],
@@ -76,11 +86,10 @@ class BitBucketLister(IndexingHttpLister):
             else:
                 ret = lower <= inner <= upper
         except Exception as e:
-            logger.error(str(e) + ': %s, %s, %s' %
-                         (('inner=%s%s' % (type(inner), inner)),
-                          ('lower=%s%s' % (type(lower), lower)),
-                          ('upper=%s%s' % (type(upper), upper)))
-                         )
+            logger.error(str(e) + ': %s, %s, %s',
+                         ('inner=%s%s' % (type(inner), inner)),
+                         ('lower=%s%s' % (type(lower), lower)),
+                         ('upper=%s%s' % (type(upper), upper)))
             raise
 
         return ret
diff --git a/swh/lister/bitbucket/tasks.py b/swh/lister/bitbucket/tasks.py
index 9985b488..5477b948 100644
--- a/swh/lister/bitbucket/tasks.py
+++ b/swh/lister/bitbucket/tasks.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017-2018 the Software Heritage developers
+# Copyright (C) 2017-2019 the Software Heritage developers
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
@@ -12,8 +12,8 @@ from .lister import BitBucketLister
 GROUP_SPLIT = 10000
 
 
-def new_lister(api_baseurl='https://api.bitbucket.org/2.0'):
-    return BitBucketLister(api_baseurl=api_baseurl)
+def new_lister(api_baseurl='https://api.bitbucket.org/2.0', per_page=100):
+    return BitBucketLister(api_baseurl=api_baseurl, per_page=per_page)
 
 
 @app.task(name=__name__ + '.IncrementalBitBucketLister')
diff --git a/swh/lister/bitbucket/tests/test_tasks.py b/swh/lister/bitbucket/tests/test_tasks.py
index 76746279..1e02b6ff 100644
--- a/swh/lister/bitbucket/tests/test_tasks.py
+++ b/swh/lister/bitbucket/tests/test_tasks.py
@@ -26,7 +26,8 @@ def test_incremental(lister, swh_app, celery_session_worker):
     res.wait()
     assert res.successful()
 
-    lister.assert_called_once_with(api_baseurl='https://api.bitbucket.org/2.0')
+    lister.assert_called_once_with(
+        api_baseurl='https://api.bitbucket.org/2.0', per_page=100)
     lister.db_last_index.assert_called_once_with()
     lister.run.assert_called_once_with(min_bound=42, max_bound=None)
 
@@ -44,7 +45,8 @@ def test_range(lister, swh_app, celery_session_worker):
     res.wait()
     assert res.successful()
 
-    lister.assert_called_once_with(api_baseurl='https://api.bitbucket.org/2.0')
+    lister.assert_called_once_with(
+        api_baseurl='https://api.bitbucket.org/2.0', per_page=100)
     lister.db_last_index.assert_not_called()
     lister.run.assert_called_once_with(min_bound=12, max_bound=42)
 
@@ -74,7 +76,8 @@ def test_relister(lister, swh_app, celery_session_worker):
             break
         sleep(1)
 
-    lister.assert_called_with(api_baseurl='https://api.bitbucket.org/2.0')
+    lister.assert_called_with(
+        api_baseurl='https://api.bitbucket.org/2.0', per_page=100)
 
     # one by the FullBitbucketRelister task
     # + 5 for the RangeBitbucketLister subtasks
-- 
GitLab