Skip to content
Snippets Groups Projects
Commit fedfd73c authored by Archit Agrawal's avatar Archit Agrawal Committed by Archit Agrawal
Browse files

swh.lister.phabricator

Add a lister of all hosted repositories on a Phabricator instance
Closes T808
parent 4efb2ce6
No related branches found
No related tags found
No related merge requests found
Showing
with 2894 additions and 1 deletion
......@@ -15,6 +15,7 @@ following Python modules:
- `swh.lister.gitlab`
- `swh.lister.pypi`
- `swh.lister.npm`
- `swh.lister.phabricator`
Dependencies
------------
......@@ -164,6 +165,18 @@ logging.basicConfig(level=logging.DEBUG)
npm_lister()
```
## lister-phabricator
Once configured, you can execute a Phabricator lister using the following instructions in a `python3` script:
```lang=python
import logging
from swh.lister.phabricator.tasks import incremental_phabricator_lister
logging.basicConfig(level=logging.DEBUG)
incremental_phabricator_lister(forge_url='https://forge.softwareheritage.org', api_token='XXXX')
```
Licensing
---------
......
......@@ -9,7 +9,8 @@ import click
logger = logging.getLogger(__name__)
SUPPORTED_LISTERS = ['github', 'gitlab', 'bitbucket', 'debian', 'pypi', 'npm']
SUPPORTED_LISTERS = ['github', 'gitlab', 'bitbucket', 'debian', 'pypi',
'npm', 'phabricator']
@click.command()
......@@ -96,6 +97,14 @@ def cli(db_url, listers, drop_tables):
NpmVisitModel.metadata.drop_all(_lister.db_engine)
NpmVisitModel.metadata.create_all(_lister.db_engine)
elif lister == 'phabricator':
from .phabricator.models import IndexingModelBase as ModelBase
from .phabricator.lister import PhabricatorLister
_lister = PhabricatorLister(
forge_url='https://forge.softwareheritage.org',
api_token='',
override_config=override_conf)
else:
raise ValueError(
'Invalid lister %s: only supported listers are %s' %
......
......@@ -11,4 +11,5 @@ def celery_includes():
'swh.lister.gitlab.tasks',
'swh.lister.npm.tasks',
'swh.lister.pypi.tasks',
'swh.lister.phabricator.tasks',
]
# Copyright (C) 2019 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from swh.lister.core.indexing_lister import SWHIndexingHttpLister
from swh.lister.phabricator.models import PhabricatorModel
from collections import defaultdict
class PhabricatorLister(SWHIndexingHttpLister):
PATH_TEMPLATE = '&order=oldest&attachments[uris]=1&after=%s'
MODEL = PhabricatorModel
LISTER_NAME = 'phabricator'
def __init__(self, forge_url, api_token, override_config=None):
if forge_url.endswith("/"):
forge_url = forge_url[:-1]
self.forge_url = forge_url
api_endpoint = ('api/diffusion.repository.'
'search?api.token=%s') % api_token
api_baseurl = '%s/%s' % (forge_url, api_endpoint)
super().__init__(api_baseurl=api_baseurl,
override_config=override_config)
def request_headers(self):
"""
(Override) Set requests headers to send when querying the
Phabricator API
"""
return {'User-Agent': 'Software Heritage phabricator lister',
'Accept': 'application/json'}
def get_model_from_repo(self, repo):
url = get_repo_url(repo['attachments']['uris']['uris'])
if url is None:
return None
return {
'uid': self.forge_url + str(repo['id']),
'indexable': repo['id'],
'name': repo['fields']['shortName'],
'full_name': repo['fields']['name'],
'html_url': url,
'origin_url': url,
'description': None,
'origin_type': repo['fields']['vcs']
}
def get_next_target_from_response(self, response):
body = response.json()['result']['cursor']
if body['after'] != 'null':
return body['after']
else:
return None
def transport_response_simplified(self, response):
repos = response.json()
if repos['result'] is None:
raise ValueError(
'Problem during information fetch: %s' % repos['error_code'])
repos = repos['result']['data']
return [self.get_model_from_repo(repo) for repo in repos]
def filter_before_inject(self, models_list):
"""
(Overrides) SWHIndexingLister.filter_before_inject
Bounds query results by this Lister's set max_index.
"""
models_list = [m for m in models_list if m is not None]
return super().filter_before_inject(models_list)
def _bootstrap_repositories_listing(self):
"""
Method called when no min_bound value has been provided
to the lister. Its purpose is to:
1. get the first repository data hosted on the Phabricator
instance
2. inject them into the lister database
3. return the first repository index to start the listing
after that value
Returns:
int: The first repository index
"""
params = '&order=oldest&limit=1'
response = self.safely_issue_request(params)
models_list = self.transport_response_simplified(response)
self.max_index = models_list[0]['indexable']
models_list = self.filter_before_inject(models_list)
injected = self.inject_repo_data_into_db(models_list)
self.create_missing_origins_and_tasks(models_list, injected)
return self.max_index
def run(self, min_bound=None, max_bound=None):
"""
(Override) Run the lister on the specified Phabricator instance
Args:
min_bound (int): Optional repository index to start the listing
after it
max_bound (int): Optional repository index to stop the listing
after it
"""
# initial call to the lister, we need to bootstrap it in that case
if min_bound is None:
min_bound = self._bootstrap_repositories_listing()
super().run(min_bound, max_bound)
def get_repo_url(attachments):
"""
Return url for a hosted repository from its uris attachments according
to the following priority lists:
* protocol: https > http
* identifier: shortname > callsign > id
"""
processed_urls = defaultdict(dict)
for uri in attachments:
protocol = uri['fields']['builtin']['protocol']
url = uri['fields']['uri']['effective']
identifier = uri['fields']['builtin']['identifier']
if protocol in ('http', 'https'):
processed_urls[protocol][identifier] = url
elif protocol is None:
for protocol in ('https', 'http'):
if url.startswith(protocol):
processed_urls[protocol]['undefined'] = url
break
for protocol in ['https', 'http']:
for identifier in ['shortname', 'callsign', 'id', 'undefined']:
if (protocol in processed_urls and
identifier in processed_urls[protocol]):
return processed_urls[protocol][identifier]
return None
# Copyright (C) 2019 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from sqlalchemy import Column, String, Integer
from swh.lister.core.models import IndexingModelBase
class PhabricatorModel(IndexingModelBase):
"""a Phabricator repository"""
__tablename__ = 'phabricator_repos'
uid = Column(String, primary_key=True)
indexable = Column(Integer, index=True)
# Copyright (C) 2019 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from swh.scheduler.celery_backend.config import app
from swh.lister.phabricator.lister import PhabricatorLister
def new_lister(
forge_url='https://forge.softwareheritage.org', api_token='', **kw):
return PhabricatorLister(forge_url=forge_url, api_token=api_token, **kw)
@app.task(name=__name__ + '.IncrementalPhabricatorLister')
def incremental_phabricator_lister(**lister_args):
lister = new_lister(**lister_args)
lister.run(min_bound=lister.db_last_index())
@app.task(name=__name__ + '.FullPhabricatorLister')
def full_phabricator_lister(**lister_args):
lister = new_lister(**lister_args)
lister.run()
@app.task(name=__name__ + '.ping')
def ping():
return 'OK'
{
"result": {
"data": [],
"cursor": {
"after": null
}
}
}
This diff is collapsed.
{
"id": 8,
"type": "REPO",
"phid": "PHID-REPO-ge2icigfu5ijk2whqfbl",
"fields": {
"name": "Blender Libraries",
"vcs": "svn",
"callsign": "BL",
"shortName": null,
"status": "active",
"isImporting": false,
"almanacServicePHID": null,
"spacePHID": null,
"dateCreated": 1385564674,
"dateModified": 1468574079,
"policy": {
"view": "public",
"edit": "admin",
"diffusion.push": "PHID-PROJ-hclk7tvd6pmpjmqastjl"
}
},
"attachments": {
"uris": {
"uris": [
{
"id": "70",
"type": "RURI",
"phid": "PHID-RURI-h7zdbkud6why4xrb2s2e",
"fields": {
"repositoryPHID": "PHID-REPO-ge2icigfu5ijk2whqfbl",
"uri": {
"raw": "https://svn.blender.org/svnroot/bf-blender/",
"display": "https://svn.blender.org/svnroot/bf-blender/",
"effective": "https://svn.blender.org/svnroot/bf-blender/",
"normalized": "svn.blender.org/svnroot/bf-blender"
},
"io": {
"raw": "observe",
"default": "none",
"effective": "observe"
},
"display": {
"raw": "always",
"default": "never",
"effective": "always"
},
"credentialPHID": null,
"disabled": false,
"builtin": {
"protocol": null,
"identifier": null
},
"dateCreated": "1467894515",
"dateModified": "1468574079"
}
}
]
}
}
}
\ No newline at end of file
from swh.lister.core.tests.conftest import * # noqa
# Copyright (C) 2019 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import re
import json
import unittest
from swh.lister.core.tests.test_lister import HttpListerTester
from swh.lister.phabricator.lister import PhabricatorLister
from swh.lister.phabricator.lister import get_repo_url
class PhabricatorListerTester(HttpListerTester, unittest.TestCase):
Lister = PhabricatorLister
test_re = re.compile(r'\&after=([^?&]+)')
lister_subdir = 'phabricator'
good_api_response_file = 'api_response.json'
good_api_response_undefined_protocol = 'api_response_undefined_'\
'protocol.json'
bad_api_response_file = 'api_empty_response.json'
first_index = 1
last_index = 12
entries_per_page = 10
def get_fl(self, override_config=None):
"""(Override) Retrieve an instance of fake lister (fl).
"""
if override_config or self.fl is None:
self.fl = self.Lister(forge_url='https://fakeurl', api_token='a-1',
override_config=override_config)
self.fl.INITIAL_BACKOFF = 1
self.fl.reset_backoff()
return self.fl
def test_get_repo_url(self):
f = open('swh/lister/%s/tests/%s' % (self.lister_subdir,
self.good_api_response_file))
api_response = json.load(f)
repos = api_response['result']['data']
for repo in repos:
self.assertEqual(
'https://forge.softwareheritage.org/source/%s.git' %
(repo['fields']['shortName']),
get_repo_url(repo['attachments']['uris']['uris']))
f = open('swh/lister/%s/tests/%s' %
(self.lister_subdir,
self.good_api_response_undefined_protocol))
repo = json.load(f)
self.assertEqual(
'https://svn.blender.org/svnroot/bf-blender/',
get_repo_url(repo['attachments']['uris']['uris']))
from unittest.mock import patch
def test_ping(swh_app, celery_session_worker):
res = swh_app.send_task(
'swh.lister.phabricator.tasks.ping')
assert res
res.wait()
assert res.successful()
assert res.result == 'OK'
@patch('swh.lister.phabricator.tasks.PhabricatorLister')
def test_incremental(lister, swh_app, celery_session_worker):
# setup the mocked PhabricatorLister
lister.return_value = lister
lister.db_last_index.return_value = 42
lister.run.return_value = None
res = swh_app.send_task(
'swh.lister.phabricator.tasks.IncrementalPhabricatorLister')
assert res
res.wait()
assert res.successful()
lister.assert_called_once_with(
api_token='', forge_url='https://forge.softwareheritage.org')
lister.db_last_index.assert_called_once_with()
lister.run.assert_called_once_with(min_bound=42)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment