Skip to content
Snippets Groups Projects
Commit 3a1beae3 authored by Franck Bret's avatar Franck Bret
Browse files

Elm Lister

The Elm Lister lists Elm packages origins from the Elm
lang registry.
It uses an http api endpoint to list packages origins.
Origins are Github repositories, releases take advantages
of Github relase Api.
parent f814e117
No related branches found
No related tags found
1 merge request!490Elm Lister
# Copyright (C) 2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
"""
Elm lister
==========
`Elm`_ is a functional language that compiles to JavaScript.
Additional packages for the language can be searched from the `Packages`_ website
and installed with `elm install`_ command. The Elm packages website also provides a
`Http Api endpoint`_ listing all available packages.
Elm origins are Git repositories hosted on Github. Each repository must provide its
packaged releases using the Github release system.
As of July 2023 `Packages`_ list 1746 packages.
Origins retrieving strategy
---------------------------
To build a list of origins we make a GET request to the `Http Api endpoint`_ that returns
a Json array of objects.
The origin url for each package is constructed with the information of corresponding
`name` entry which represents the suffix of Github repositories (org/project_name).
Page listing
------------
There is only one page listing all origins url.
Origins from page
-----------------
The lister is stateless and yields all origins url from one page. It is a list of package
repository url.
Running tests
-------------
Activate the virtualenv and run from within swh-lister directory::
pytest -s -vv --log-cli-level=DEBUG swh/lister/elm/tests
Testing with Docker
-------------------
Change directory to swh/docker then launch the docker environment::
docker compose up -d
Then schedule a elm listing task::
docker compose exec swh-scheduler swh scheduler task add -p oneshot list-elm
You can follow lister execution by displaying logs of swh-lister service::
docker compose logs -f swh-lister
.. _Elm: https://elm-lang.org/
.. _Packages: https://package.elm-lang.org/
.. _elm install: https://guide.elm-lang.org/install/elm.html#elm-install
.. _Http Api endpoint: https://package.elm-lang.org/search.json
"""
def register():
from .lister import ElmLister
return {
"lister": ElmLister,
"task_modules": ["%s.tasks" % __name__],
}
# Copyright (C) 2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import logging
from typing import Any, Dict, Iterator, List, Optional
from swh.scheduler.interface import SchedulerInterface
from swh.scheduler.model import ListedOrigin
from ..pattern import CredentialsType, StatelessLister
logger = logging.getLogger(__name__)
# Aliasing the page results returned by `get_pages` method from the lister.
ElmListerPage = List[Dict[str, Any]]
class ElmLister(StatelessLister[ElmListerPage]):
"""List Elm packages origins"""
LISTER_NAME = "elm"
VISIT_TYPE = "git" # Elm origins url are Git repositories
INSTANCE = "elm"
SEARCH_URL = "https://package.elm-lang.org/search.json"
REPO_URL_PATTERN = "https://github.com/{name}"
def __init__(
self,
scheduler: SchedulerInterface,
credentials: Optional[CredentialsType] = None,
max_origins_per_page: Optional[int] = None,
max_pages: Optional[int] = None,
enable_origins: bool = True,
):
super().__init__(
scheduler=scheduler,
credentials=credentials,
instance=self.INSTANCE,
url=self.SEARCH_URL,
max_origins_per_page=max_origins_per_page,
max_pages=max_pages,
enable_origins=enable_origins,
)
self.session.headers.update({"Accept": "application/json"})
def get_pages(self) -> Iterator[ElmListerPage]:
"""Yield an iterator which returns 'page'
It uses the unique Http api endpoint `https://package.elm-lang.org/search.json`
to get a list of names corresponding to Github repository url suffixes.
There is only one page that list all origins urls.
"""
response = self.http_request(self.url)
yield response.json()
def get_origins_from_page(self, page: ElmListerPage) -> Iterator[ListedOrigin]:
"""Iterate on all pages and yield ListedOrigin instances"""
assert self.lister_obj.id is not None
for entry in page:
name: str = entry["name"]
repo_url: str = self.REPO_URL_PATTERN.format(name=name)
yield ListedOrigin(
lister_id=self.lister_obj.id,
visit_type=self.VISIT_TYPE,
url=repo_url,
last_update=None,
)
# Copyright (C) 2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from celery import shared_task
from swh.lister.elm.lister import ElmLister
@shared_task(name=__name__ + ".ElmListerTask")
def list_elm(**lister_args):
"""Lister task for Elm lang packages"""
return ElmLister.from_configfile(**lister_args).run().dict()
@shared_task(name=__name__ + ".ping")
def _ping():
return "OK"
[
{
"name": "elm/bytes",
"summary": "Work with sequences of bytes (a.k.a. ArrayBuffer, typed arrays, DataView)",
"license": "BSD-3-Clause",
"version": "1.0.8"
},
{
"name": "STTR13/ziplist",
"summary": "List with a selected element that makes impossible state impossible.",
"license": "BSD-3-Clause",
"version": "1.4.2"
},
{
"name": "cuducos/elm-format-number",
"summary": "Format numbers as pretty strings",
"license": "BSD-3-Clause",
"version": "9.0.1"
}
]
# Copyright (C) 2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from swh.lister.elm.lister import ElmLister
expected_origins = [
"https://github.com/STTR13/ziplist",
"https://github.com/elm/bytes",
"https://github.com/cuducos/elm-format-number",
]
def test_elm_lister(datadir, requests_mock_datadir, swh_scheduler):
lister = ElmLister(scheduler=swh_scheduler)
res = lister.run()
assert res.pages == 1
assert res.origins == 1 + 1 + 1
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
assert len(scheduler_origins) == len(expected_origins)
assert {
(
scheduled.visit_type,
scheduled.url,
scheduled.last_update,
)
for scheduled in scheduler_origins
} == {("git", expected, None) for expected in expected_origins}
# Copyright (C) 2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from swh.lister.pattern import ListerStats
def test_elm_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker):
res = swh_scheduler_celery_app.send_task("swh.lister.elm.tasks.ping")
assert res
res.wait()
assert res.successful()
assert res.result == "OK"
def test_elm_lister(swh_scheduler_celery_app, swh_scheduler_celery_worker, mocker):
# setup the mocked ElmLister
lister = mocker.patch("swh.lister.elm.tasks.ElmLister")
lister.from_configfile.return_value = lister
stats = ListerStats(pages=42, origins=42)
lister.run.return_value = stats
res = swh_scheduler_celery_app.send_task("swh.lister.elm.tasks.ElmListerTask")
assert res
res.wait()
assert res.successful()
assert res.result == stats.dict()
lister.from_configfile.assert_called_once_with()
lister.run.assert_called_once_with()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment