Skip to content
Snippets Groups Projects
Commit 2793ef9a authored by Franck Bret's avatar Franck Bret
Browse files

D lang lister

Add a dlang module that retrieve origins from an http api endpoint.
Each origin is a git based project url on github.com, gitlab.com or
bitbucket.com.
parent 1c964ccc
No related branches found
No related tags found
1 merge request!488D lang lister
......@@ -65,6 +65,8 @@ setup(
lister.cran=swh.lister.cran:register
lister.crates=swh.lister.crates:register
lister.debian=swh.lister.debian:register
lister.dlang=swh.lister.dlang:register
lister.fedora=swh.lister.fedora:register
lister.gitea=swh.lister.gitea:register
lister.github=swh.lister.github:register
lister.gitiles=swh.lister.gitiles:register
......
# Copyright (C) 2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
"""
Dlang lister
=============
D is a general-purpose programming language with static typing, systems-level access,
and C-like syntax.
The `Dlang`_ lister list origins from its packages manager registry `DUB`_.
The registry provides an `http api endpoint`_ that helps in getting the packages index
with name, url, versions and dates.
As of July 2023 `DUB`_ list 2364 package names.
Origins retrieving strategy
---------------------------
To build a list of origins we make a GET request to an `http api endpoint`_ that returns
a Json array of objects.
The origin url for each package is constructed with the information of corresponding
`repository` entry which represents Git based projects hosted on Github, GitLab or
Bitbucket.
Page listing
------------
There is only one page listing all origins url.
Origins from page
-----------------
The lister is stateless and yields all origins url from one page. It is a list of package
url with last update information.
Running tests
-------------
Activate the virtualenv and run from within swh-lister directory::
pytest -s -vv --log-cli-level=DEBUG swh/lister/dlang/tests
Testing with Docker
-------------------
Change directory to swh/docker then launch the docker environment::
docker compose up -d
Then schedule a dlang listing task::
docker compose exec swh-scheduler swh scheduler task add -p oneshot list-dlang
You can follow lister execution by displaying logs of swh-lister service::
docker compose logs -f swh-lister
.. _Dlang: https://dlang.org/
.. _DUB: https://code.dlang.org/
.. _http api endpoint: https://code.dlang.org/api/packages/dump"
"""
def register():
from .lister import DlangLister
return {
"lister": DlangLister,
"task_modules": ["%s.tasks" % __name__],
}
# Copyright (C) 2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import logging
from typing import Any, Dict, Iterator, List, Optional
import iso8601
from swh.scheduler.interface import SchedulerInterface
from swh.scheduler.model import ListedOrigin
from ..pattern import CredentialsType, StatelessLister
logger = logging.getLogger(__name__)
# Aliasing the page results returned by `get_pages` method from the lister.
DlangListerPage = List[Dict[str, Any]]
class DlangLister(StatelessLister[DlangListerPage]):
"""List D lang origins."""
LISTER_NAME = "dlang"
VISIT_TYPE = "git" # D lang origins url are Git repositories
INSTANCE = "dlang"
BASE_URL = "https://code.dlang.org"
PACKAGES_DUMP_URL = BASE_URL + "/api/packages/dump"
KINDS = {
"github": "https://github.com",
"gitlab": "https://gitlab.com",
"bitbucket": "https://bitbucket.com",
}
KIND_URL_PATTERN = "{url}/{owner}/{project}"
def __init__(
self,
scheduler: SchedulerInterface,
credentials: Optional[CredentialsType] = None,
max_origins_per_page: Optional[int] = None,
max_pages: Optional[int] = None,
enable_origins: bool = True,
):
super().__init__(
scheduler=scheduler,
credentials=credentials,
instance=self.INSTANCE,
url=self.PACKAGES_DUMP_URL,
max_origins_per_page=max_origins_per_page,
max_pages=max_pages,
enable_origins=enable_origins,
)
self.session.headers.update({"Accept": "application/json"})
def get_pages(self) -> Iterator[DlangListerPage]:
"""Yield an iterator which returns 'page'
It uses the api endpoint provided by `https://registry.dlang.io/packages`
to get a list of package names with an origin url that corresponds to Git
repository.
There is only one page that list all origins urls.
"""
response = self.http_request(self.url)
yield response.json()
def get_origins_from_page(self, page: DlangListerPage) -> Iterator[ListedOrigin]:
"""Iterate on all pages and yield ListedOrigin instances"""
assert self.lister_obj.id is not None
for entry in page:
repo: Dict[str, Any] = entry["repository"]
kind: str = repo["kind"]
if kind not in self.KINDS:
logging.error("Can not build a repository url with %r" % repo)
continue
repo_url = self.KIND_URL_PATTERN.format(
url=self.KINDS[kind], owner=repo["owner"], project=repo["project"]
)
last_update = iso8601.parse_date(entry["stats"]["updatedAt"])
yield ListedOrigin(
lister_id=self.lister_obj.id,
visit_type=self.VISIT_TYPE,
url=repo_url,
last_update=last_update,
)
# Copyright (C) 2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from celery import shared_task
from swh.lister.dlang.lister import DlangLister
@shared_task(name=__name__ + ".DlangListerTask")
def list_dlang(**lister_args):
"""Lister task for D lang packages registry"""
return DlangLister.from_configfile(**lister_args).run().dict()
@shared_task(name=__name__ + ".ping")
def _ping():
return "OK"
This diff is collapsed.
# Copyright (C) 2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import iso8601
from swh.lister.dlang.lister import DlangLister
expected_origins = [
{
"url": "https://github.com/katyukha/TheProcess",
"last_update": "2023-07-12T14:42:46.231Z",
},
{
"url": "https://gitlab.com/AntonMeep/silly",
"last_update": "2023-07-12T01:32:31.235Z",
},
]
def test_dlang_lister(datadir, requests_mock_datadir, swh_scheduler):
lister = DlangLister(scheduler=swh_scheduler)
res = lister.run()
assert res.pages == 1
assert res.origins == 1 + 1
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
assert len(scheduler_origins) == len(expected_origins)
assert {
(
scheduled.visit_type,
scheduled.url,
scheduled.last_update,
)
for scheduled in scheduler_origins
} == {
("git", expected["url"], iso8601.parse_date(expected["last_update"]))
for expected in expected_origins
}
# Copyright (C) 2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from swh.lister.pattern import ListerStats
def test_dlang_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker):
res = swh_scheduler_celery_app.send_task("swh.lister.dlang.tasks.ping")
assert res
res.wait()
assert res.successful()
assert res.result == "OK"
def test_dlang_lister(swh_scheduler_celery_app, swh_scheduler_celery_worker, mocker):
# setup the mocked DlangLister
lister = mocker.patch("swh.lister.dlang.tasks.DlangLister")
lister.from_configfile.return_value = lister
stats = ListerStats(pages=42, origins=42)
lister.run.return_value = stats
res = swh_scheduler_celery_app.send_task("swh.lister.dlang.tasks.DlangListerTask")
assert res
res.wait()
assert res.successful()
assert res.result == stats.dict()
lister.from_configfile.assert_called_once_with()
lister.run.assert_called_once_with()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment