Skip to content
Snippets Groups Projects
Verified Commit 935b9cd2 authored by Antoine R. Dumont's avatar Antoine R. Dumont
Browse files

swh.lister.core: Make gitlab lister a paging lister instance

Related T989
parent db36c499
No related branches found
No related tags found
No related merge requests found
# Copyright (C) 2015-2018 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import abc
import logging
from .lister_transports import SWHListerHttpTransport
from .lister_base import SWHListerBase
class SWHPagingLister(SWHListerBase):
"""Lister* intermediate class for any service that follows the simple
pagination page pattern.
- Client sends a request to list repositories starting from a
given page identifier.
- Client receives structured (json/xml/etc) response with
information about a sequential series of repositories (per page)
starting from a given index. And, if available, some indication
of the next page index for fetching the remaining repository
data.
See :class:`swh.lister.core.lister_base.SWHListerBase` for more
details.
This class cannot be instantiated. To create a new Lister for a
source code listing service that follows the model described
above, you must subclass this class. Then provide the required
overrides in addition to any unmet implementation/override
requirements of this class's base (see parent class and member
docstrings for details).
Required Overrides::
def get_next_target_from_response
"""
@abc.abstractmethod
def get_next_target_from_response(self, response):
"""Find the next server endpoint page given the entire response.
Implementation of this method depends on the server API spec
and the shape of the network response object returned by the
transport_request method.
For example, some api can use the headers links to provide the
next page.
Args:
response (transport response): response page from the server
Returns:
index of next page, possibly extracted from a next href url
"""
pass
# You probably don't need to override anything below this line.
def run(self, min_index=None, max_index=None):
"""Main entry function. Sequentially fetches repository data from the
service according to the basic outline in the class
docstring. Continually fetching sublists until either there
is no next index reference given or the given next index is
greater than the desired max_index.
Args:
min_index (indexable type): optional index to start from
max_index (indexable type): optional index to stop at
Returns:
nothing
"""
index = min_index or ''
loop_count = 0
self.min_index = min_index
self.max_index = max_index
while self.is_within_bounds(index, self.min_index, self.max_index):
logging.info('listing repos starting at %s' % index)
response, injected_repos = self.ingest_data(index)
next_index = self.get_next_target_from_response(response)
# termination condition
if (next_index is None) or (next_index == index):
logging.info('stopping after index %s, no next link found' %
index)
break
else:
index = next_index
loop_count += 1
if loop_count == 20:
logging.info('flushing updates')
loop_count = 0
self.db_session.commit()
self.db_session = self.mk_session()
self.db_session.commit()
self.db_session = self.mk_session()
class SWHPagingHttpLister(SWHListerHttpTransport, SWHPagingLister):
"""Convenience class for ensuring right lookup and init order when
combining SWHPagingLister and SWHListerHttpTransport.
"""
def __init__(self, lister_name=None, api_baseurl=None,
override_config=None):
SWHListerHttpTransport.__init__(self, api_baseurl=api_baseurl)
SWHPagingLister.__init__(self, lister_name=lister_name,
override_config=override_config)
......@@ -6,20 +6,14 @@ import random
import re
import time
from ..core.indexing_lister import SWHIndexingHttpLister
from ..core.paging_lister import SWHPagingHttpLister
from .models import GitLabModel
class GitLabLister(SWHIndexingHttpLister):
# Path to give and mentioning the last id for the next page
class GitLabLister(SWHPagingHttpLister):
# Template path expecting an integer that represents the page id
PATH_TEMPLATE = '/projects?page=%d&order_by=id&sort=asc&simple=true'
# gitlab api do not have an indexable identifier so using the page
# id
API_URL_INDEX_RE = re.compile(r'^.*/projects.*\&page=(\d+).*')
# The indexable field, the one we are supposed to use in the api
# query is not part of the lookup query. So, we cannot filter
# (method filter_before_inject), nor detect and disable origins
# (method disable_deleted_repo_tasks)
MODEL = GitLabModel
@property
......@@ -79,12 +73,6 @@ class GitLabLister(SWHIndexingHttpLister):
params['auth'] = (auth['username'], auth['password'])
return params
def filter_before_inject(self, models_list):
"""We cannot filter so returns the models_list as is.
"""
return models_list
def get_model_from_repo(self, repo):
return {
'instance': self.lister_name,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment