diff --git a/swh/lister/elm/__init__.py b/swh/lister/elm/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..614d8efda1e5554fa17937f75b1ece99548a234a --- /dev/null +++ b/swh/lister/elm/__init__.py @@ -0,0 +1,76 @@ +# Copyright (C) 2023 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + + +""" +Elm lister +========== + +`Elm`_ is a functional language that compiles to JavaScript. + +Additional packages for the language can be searched from the `Packages`_ website +and installed with `elm install`_ command. The Elm packages website also provides a +`Http Api endpoint`_ listing all available packages. + +Elm origins are Git repositories hosted on Github. Each repository must provide its +packaged releases using the Github release system. + +As of July 2023 `Packages`_ list 1746 packages. + +Origins retrieving strategy +--------------------------- + +To build a list of origins we make a GET request to the `Http Api endpoint`_ that returns +a Json array of objects. +The origin url for each package is constructed with the information of corresponding +`name` entry which represents the suffix of Github repositories (org/project_name). + +Page listing +------------ + +There is only one page listing all origins url. + +Origins from page +----------------- + +The lister is stateless and yields all origins url from one page. It is a list of package +repository url. + +Running tests +------------- + +Activate the virtualenv and run from within swh-lister directory:: + + pytest -s -vv --log-cli-level=DEBUG swh/lister/elm/tests + +Testing with Docker +------------------- + +Change directory to swh/docker then launch the docker environment:: + + docker compose up -d + +Then schedule a elm listing task:: + + docker compose exec swh-scheduler swh scheduler task add -p oneshot list-elm + +You can follow lister execution by displaying logs of swh-lister service:: + + docker compose logs -f swh-lister + +.. _Elm: https://elm-lang.org/ +.. _Packages: https://package.elm-lang.org/ +.. _elm install: https://guide.elm-lang.org/install/elm.html#elm-install +.. _Http Api endpoint: https://package.elm-lang.org/search.json +""" + + +def register(): + from .lister import ElmLister + + return { + "lister": ElmLister, + "task_modules": ["%s.tasks" % __name__], + } diff --git a/swh/lister/elm/lister.py b/swh/lister/elm/lister.py new file mode 100644 index 0000000000000000000000000000000000000000..83a1b84e3d9d6b292294fe98f1e2159072104127 --- /dev/null +++ b/swh/lister/elm/lister.py @@ -0,0 +1,74 @@ +# Copyright (C) 2023 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import logging +from typing import Any, Dict, Iterator, List, Optional + +from swh.scheduler.interface import SchedulerInterface +from swh.scheduler.model import ListedOrigin + +from ..pattern import CredentialsType, StatelessLister + +logger = logging.getLogger(__name__) + +# Aliasing the page results returned by `get_pages` method from the lister. +ElmListerPage = List[Dict[str, Any]] + + +class ElmLister(StatelessLister[ElmListerPage]): + """List Elm packages origins""" + + LISTER_NAME = "elm" + VISIT_TYPE = "git" # Elm origins url are Git repositories + INSTANCE = "elm" + + SEARCH_URL = "https://package.elm-lang.org/search.json" + + REPO_URL_PATTERN = "https://github.com/{name}" + + def __init__( + self, + scheduler: SchedulerInterface, + credentials: Optional[CredentialsType] = None, + max_origins_per_page: Optional[int] = None, + max_pages: Optional[int] = None, + enable_origins: bool = True, + ): + super().__init__( + scheduler=scheduler, + credentials=credentials, + instance=self.INSTANCE, + url=self.SEARCH_URL, + max_origins_per_page=max_origins_per_page, + max_pages=max_pages, + enable_origins=enable_origins, + ) + self.session.headers.update({"Accept": "application/json"}) + + def get_pages(self) -> Iterator[ElmListerPage]: + """Yield an iterator which returns 'page' + + It uses the unique Http api endpoint `https://package.elm-lang.org/search.json` + to get a list of names corresponding to Github repository url suffixes. + + There is only one page that list all origins urls. + """ + response = self.http_request(self.url) + yield response.json() + + def get_origins_from_page(self, page: ElmListerPage) -> Iterator[ListedOrigin]: + """Iterate on all pages and yield ListedOrigin instances""" + assert self.lister_obj.id is not None + + for entry in page: + name: str = entry["name"] + repo_url: str = self.REPO_URL_PATTERN.format(name=name) + + yield ListedOrigin( + lister_id=self.lister_obj.id, + visit_type=self.VISIT_TYPE, + url=repo_url, + last_update=None, + ) diff --git a/swh/lister/elm/tasks.py b/swh/lister/elm/tasks.py new file mode 100644 index 0000000000000000000000000000000000000000..de9ff97549a2d4472267ccaf0cab1e09d32b78bc --- /dev/null +++ b/swh/lister/elm/tasks.py @@ -0,0 +1,19 @@ +# Copyright (C) 2023 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from celery import shared_task + +from swh.lister.elm.lister import ElmLister + + +@shared_task(name=__name__ + ".ElmListerTask") +def list_elm(**lister_args): + """Lister task for Elm lang packages""" + return ElmLister.from_configfile(**lister_args).run().dict() + + +@shared_task(name=__name__ + ".ping") +def _ping(): + return "OK" diff --git a/swh/lister/elm/tests/__init__.py b/swh/lister/elm/tests/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/swh/lister/elm/tests/data/https_package.elm-lang.org/search.json b/swh/lister/elm/tests/data/https_package.elm-lang.org/search.json new file mode 100644 index 0000000000000000000000000000000000000000..cdb20cecc06452dcc75bcbe816abf9da6033c3a5 --- /dev/null +++ b/swh/lister/elm/tests/data/https_package.elm-lang.org/search.json @@ -0,0 +1,20 @@ +[ + { + "name": "elm/bytes", + "summary": "Work with sequences of bytes (a.k.a. ArrayBuffer, typed arrays, DataView)", + "license": "BSD-3-Clause", + "version": "1.0.8" + }, + { + "name": "STTR13/ziplist", + "summary": "List with a selected element that makes impossible state impossible.", + "license": "BSD-3-Clause", + "version": "1.4.2" + }, + { + "name": "cuducos/elm-format-number", + "summary": "Format numbers as pretty strings", + "license": "BSD-3-Clause", + "version": "9.0.1" + } +] diff --git a/swh/lister/elm/tests/test_lister.py b/swh/lister/elm/tests/test_lister.py new file mode 100644 index 0000000000000000000000000000000000000000..88511bbbae33a17ce855396d975dc202929044c1 --- /dev/null +++ b/swh/lister/elm/tests/test_lister.py @@ -0,0 +1,32 @@ +# Copyright (C) 2023 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from swh.lister.elm.lister import ElmLister + +expected_origins = [ + "https://github.com/STTR13/ziplist", + "https://github.com/elm/bytes", + "https://github.com/cuducos/elm-format-number", +] + + +def test_elm_lister(datadir, requests_mock_datadir, swh_scheduler): + lister = ElmLister(scheduler=swh_scheduler) + res = lister.run() + + assert res.pages == 1 + assert res.origins == 1 + 1 + 1 + + scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results + + assert len(scheduler_origins) == len(expected_origins) + assert { + ( + scheduled.visit_type, + scheduled.url, + scheduled.last_update, + ) + for scheduled in scheduler_origins + } == {("git", expected, None) for expected in expected_origins} diff --git a/swh/lister/elm/tests/test_tasks.py b/swh/lister/elm/tests/test_tasks.py new file mode 100644 index 0000000000000000000000000000000000000000..cbea3220f94b5c17ee28f7f5d6daec889b4cc4d8 --- /dev/null +++ b/swh/lister/elm/tests/test_tasks.py @@ -0,0 +1,31 @@ +# Copyright (C) 2023 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from swh.lister.pattern import ListerStats + + +def test_elm_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker): + res = swh_scheduler_celery_app.send_task("swh.lister.elm.tasks.ping") + assert res + res.wait() + assert res.successful() + assert res.result == "OK" + + +def test_elm_lister(swh_scheduler_celery_app, swh_scheduler_celery_worker, mocker): + # setup the mocked ElmLister + lister = mocker.patch("swh.lister.elm.tasks.ElmLister") + lister.from_configfile.return_value = lister + stats = ListerStats(pages=42, origins=42) + lister.run.return_value = stats + + res = swh_scheduler_celery_app.send_task("swh.lister.elm.tasks.ElmListerTask") + assert res + res.wait() + assert res.successful() + assert res.result == stats.dict() + + lister.from_configfile.assert_called_once_with() + lister.run.assert_called_once_with()