Skip to content
Snippets Groups Projects
Verified Commit 0f9ba7bd authored by Antoine R. Dumont's avatar Antoine R. Dumont
Browse files

swh.lister.gitlab: Bootstrap gitlab lister

Related T989
parent c3504341
Branches iter_masked_swhids
Tags v2.3.1
No related merge requests found
# Copyright (C) 2018 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import re
import time
from ..core.indexing_lister import SWHIndexingHttpLister
from .models import GitlabModel
class GitlabLister(SWHIndexingHttpLister):
# Path to give and mentioning the last id for the next page
PATH_TEMPLATE = '/projects?since=%d&visibility=public'
# base orm model
MODEL = GitlabModel
API_URL_INDEX_RE = re.compile(r'^.*/projects\?since=(\d+)&visibility=public')
def get_model_from_repo(self, repo):
return {
'uid': repo['id'],
'indexable': repo['id'],
'name': repo['name'],
'full_name': repo['path_with_namespace'],
'html_url': repo['web_url'],
'origin_url': repo['web_url'],
'origin_type': 'git',
'description': repo['description'],
# FIXME: How to determine the fork nature?
'fork': False,
}
def transport_quota_check(self, response):
"""Deal with ratelimit
"""
reqs_remaining = int(response.headers['RateLimit-Remaining'])
if response.status_code == 403 and reqs_remaining == 0:
reset_at = int(response.headers['RateLimit-Reset'])
delay = min(reset_at - time.time(), 3600)
return True, delay
return False, 0
def get_next_target_from_response(self, response):
"""Deal with pagination
"""
if 'next' in response.links:
next_url = response.links['next']['url']
return int(self.API_URL_INDEX_RE.match(next_url).group(1))
return None
def transport_response_simplified(self, response):
repos = response.json()
return [self.get_model_from_repo(repo) for repo in repos]
# Copyright (C) 2018 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from sqlalchemy import Column, Boolean, Integer
from ..core.models import ModelBase
class GitlabModel(ModelBase):
"""a Gitlab repository"""
__tablename__ = 'main_gitlab_repos'
uid = Column(Integer, primary_key=True)
indexable = Column(Integer, index=True)
fork = Column(Boolean)
def __init__(self, *args, **kwargs):
self.fork = kwargs.pop('fork', False)
super().__init__(*args, **kwargs)
# Copyright (C) 2018 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from swh.lister.core.tasks import (IndexingDiscoveryListerTask,
IndexingRangeListerTask,
IndexingRefreshListerTask, ListerTaskBase)
from .lister import GitlabLister
class GitlabDotComListerTask(ListerTaskBase):
def new_lister(self):
return GitlabLister(lister_name='gitlab.com',
api_baseurl='https://gitlab.com/api/v4')
class IncrementalGitlabDotComLister(GitlabDotComListerTask,
IndexingDiscoveryListerTask):
task_queue = 'swh_lister_gitlab_discover'
class RangeGitlabLister(GitlabDotComListerTask, IndexingRangeListerTask):
task_queue = 'swh_lister_gitlab_refresh'
class FullGitlabRelister(GitlabDotComListerTask, IndexingRefreshListerTask):
task_queue = 'swh_lister_gitlab_refresh'
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment