Skip to content
Snippets Groups Projects
Commit f8cfa05f authored by Franck Bret's avatar Franck Bret
Browse files

Add Julia Lister for listing Julia Packages

This module introduce Julia Lister.
It retrieves Julia packages origins from the Julia General Registry, a Git
repository made of per package directory with Toml definition files.
parent 7b932f46
No related branches found
No related tags found
No related merge requests found
......@@ -5,3 +5,4 @@ requests_mock
types-click
types-pyyaml
types-requests
types-toml
......@@ -12,3 +12,4 @@ rpy2
setuptools
tenacity >= 6.2
testing.postgresql
toml
......@@ -76,6 +76,7 @@ setup(
lister.gogs=swh.lister.gogs:register
lister.hackage=swh.lister.hackage:register
lister.hex=swh.lister.hex:register
lister.julia=swh.lister.julia:register
lister.launchpad=swh.lister.launchpad:register
lister.nixguix=swh.lister.nixguix:register
lister.npm=swh.lister.npm:register
......
# Copyright (C) 2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
"""
Julia lister
=============
`Julia`_ is a dynamic language for scientific computing applications. It comes with
an ecosystem of packages managed with its internal package manager `Pkg`_.
A list of all officially registered packages can be found in the `Julia General Registry`_
on GitHub, but it's easier to search for packages using the `JuliaHub`_ and
`Julia Packages`_ sites.
The `Julia`_ lister lists origins from a Git repository, the `Julia General registry`_.
The main `Registry.toml`_ file list available Julia packages. Each directory
match a package name and have Toml files to describe the package and its versions.
Julia origins are Git repositories hosted on Github. Each repository must provide its
packaged releases using the Github release system.
As of July 2023 `Julia General registry`_ list 9714 packages names.
Origins retrieving strategy
---------------------------
To build a list of origins we clone the `Julia General registry`_ Git repository, then
read the `Registry.toml`_ file to get the path to packages directories.
Each directory have a `Package.toml` file from where we get the Git repository url for
a package.
Page listing
------------
There is only one page listing all origins url.
Origins from page
-----------------
The lister is stateless and yields all origins url from one page.
Each url corresponds to the Git url of the package repository.
Running tests
-------------
Activate the virtualenv and run from within swh-lister directory::
pytest -s -vv --log-cli-level=DEBUG swh/lister/julia/tests
Testing with Docker
-------------------
Change directory to swh/docker then launch the docker environment::
docker compose up -d
Then schedule a julia listing task::
docker compose exec swh-scheduler swh scheduler task add -p oneshot list-julia
You can follow lister execution by displaying logs of swh-lister service::
docker compose logs -f swh-lister
.. _Julia: https://julialang.org/
.. _Pkg: https://docs.julialang.org/en/v1/stdlib/Pkg/
.. _Julia General registry: https://github.com/JuliaRegistries/General
.. _JuliaHub: https://juliahub.com/
.. _Julia Packages: https://julialang.org/packages/
.. _Registry.toml: https://github.com/JuliaRegistries/General/blob/master/Registry.toml
""" # noqa: B950
def register():
from .lister import JuliaLister
return {
"lister": JuliaLister,
"task_modules": ["%s.tasks" % __name__],
}
# Copyright (C) 2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import logging
from pathlib import Path
from typing import Any, Iterator, List, Optional, Tuple
from dulwich import porcelain
import toml
from swh.scheduler.interface import SchedulerInterface
from swh.scheduler.model import ListedOrigin
from ..pattern import CredentialsType, StatelessLister
logger = logging.getLogger(__name__)
# Aliasing the page results returned by `get_pages` method from the lister.
JuliaListerPage = List[Tuple[str, Any]]
class JuliaLister(StatelessLister[JuliaListerPage]):
"""List Julia packages origins"""
LISTER_NAME = "julia"
VISIT_TYPE = "git" # Julia origins url are Git repositories
INSTANCE = "julia"
REPO_URL = (
"https://github.com/JuliaRegistries/General.git" # Julia General Registry
)
REPO_PATH = Path("/tmp/General")
REGISTRY_PATH = REPO_PATH / "Registry.toml"
def __init__(
self,
scheduler: SchedulerInterface,
credentials: Optional[CredentialsType] = None,
url: Optional[str] = None,
max_origins_per_page: Optional[int] = None,
max_pages: Optional[int] = None,
enable_origins: bool = True,
):
super().__init__(
scheduler=scheduler,
credentials=credentials,
instance=self.INSTANCE,
url=url or self.REPO_URL,
max_origins_per_page=max_origins_per_page,
max_pages=max_pages,
enable_origins=enable_origins,
)
def get_registry_repository(self) -> None:
"""Get Julia General Registry Git repository up to date on disk"""
if self.REPO_PATH.exists():
porcelain.pull(self.REPO_PATH, remote_location=self.url)
else:
porcelain.clone(source=self.url, target=self.REPO_PATH)
def get_pages(self) -> Iterator[JuliaListerPage]:
"""Yield an iterator which returns 'page'
It uses the api endpoint provided by `https://registry.julia.io/packages`
to get a list of package names with an origin url that corresponds to Git
repository.
There is only one page that list all origins urls.
"""
self.get_registry_repository()
assert self.REGISTRY_PATH.exists()
registry = toml.load(self.REGISTRY_PATH)
yield registry["packages"].items()
def get_origins_from_page(self, page: JuliaListerPage) -> Iterator[ListedOrigin]:
"""Iterate on all pages and yield ListedOrigin instances"""
assert self.lister_obj.id is not None
assert self.REPO_PATH.exists()
for uuid, info in page:
package_info_path = self.REPO_PATH / info["path"] / "Package.toml"
package_info = toml.load(package_info_path)
yield ListedOrigin(
lister_id=self.lister_obj.id,
visit_type=self.VISIT_TYPE,
url=package_info["repo"],
last_update=None,
)
# Copyright (C) 2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from celery import shared_task
from swh.lister.julia.lister import JuliaLister
@shared_task(name=__name__ + ".JuliaListerTask")
def list_julia(**lister_args):
"""Lister task for Julia General Registry"""
return JuliaLister.from_configfile(**lister_args).run().dict()
@shared_task(name=__name__ + ".ping")
def _ping():
return "OK"
# Copyright (C) 2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os
from pathlib import PosixPath
import subprocess
from typing import Optional, Union
def prepare_repository_from_archive(
archive_path: str,
filename: Optional[str] = None,
tmp_path: Union[PosixPath, str] = "/tmp",
) -> str:
"""Given an existing archive_path, uncompress it.
Returns a file repo url which can be used as origin url.
This does not deal with the case where the archive passed along does not exist.
"""
if not isinstance(tmp_path, str):
tmp_path = str(tmp_path)
# uncompress folder/repositories/dump for the loader to ingest
subprocess.check_output(["tar", "xf", archive_path, "-C", tmp_path])
# build the origin url (or some derivative form)
_fname = filename if filename else os.path.basename(archive_path)
repo_url = f"file://{tmp_path}/{_fname}"
return repo_url
File added
#!/usr/bin/env bash
# Script to generate fake-julia-registry-repository.tar.gz
# Creates a git repository like https://github.com/JuliaRegistries/General.git
# for tests purposes
set -euo pipefail
# files and directories for Julia registry repository
mkdir -p tmp_dir/General/
cd tmp_dir/General/
touch Registry.toml
echo -e '''name = "General"
uuid = "23338594-aafe-5451-b93e-139f81909106"
repo = "https://github.com/JuliaRegistries/General.git"
description = """
Official general Julia package registry where people can
register any package they want without too much debate about
naming and without enforced standards on documentation or
testing. We nevertheless encourage documentation, testing and
some amount of consideration when choosing package names.
"""
[packages]''' > Registry.toml
# Init as a git repository
git init
git add .
git commit -m "Init fake Julia registry repository for tests purpose"
mkdir -p F/Fable
touch F/Fable/Package.toml
touch F/Fable/Versions.toml
echo -e '''name = "Fable"
uuid = "a3ea4736-0a3b-4c29-ac8a-20364318a635"
repo = "https://github.com/leios/Fable.jl.git"
''' > F/Fable/Package.toml
echo -e '''["0.0.1"]
git-tree-sha1 = "d98ef9a5309f0ec8caaf34bf4cefaf1f1ca525e8"
["0.0.2"]
git-tree-sha1 = "65301af3ab06b04cf8a52cd43b06222bab5249c2"
''' > F/Fable/Versions.toml
echo 'a3ea4736-0a3b-4c29-ac8a-20364318a635 = { name = "Fable", path = "F/Fable" }' >> Registry.toml
git add .
git commit -m "New package: Fable v0.0.2"
mkdir -p O/Oscar
touch O/Oscar/Package.toml
touch O/Oscar/Versions.toml
echo -e '''name = "Oscar"
uuid = "f1435218-dba5-11e9-1e4d-f1a5fab5fc13"
repo = "https://github.com/oscar-system/Oscar.jl.git"
''' > O/Oscar/Package.toml
echo -e '''["0.2.0"]
git-tree-sha1 = "cda489ed50fbd625d245655ce6e5858c3c21ce12"
["0.3.0"]
git-tree-sha1 = "d62e911d06affb6450a0d059c3432df284a8e3c1"
["0.4.0"]
git-tree-sha1 = "91a9c623da588d5fcfc1f0ce0b3d57a0e35c65d2"
["0.5.0"]
git-tree-sha1 = "5d595e843a71df04da0e8027c4773a158be0c4f4"
["0.5.1"]
git-tree-sha1 = "501602b8c0efc9b4fc6a68d0cb53b9103f736313"
["0.5.2"]
git-tree-sha1 = "aa42d7bc3282e72b1b5c41d518661634cc454de0"
["0.6.0"]
git-tree-sha1 = "a3ca062f1e9ab1728de6af6812c1a09bb527e5ce"
["0.7.0"]
git-tree-sha1 = "185ce4c7b082bf3530940af4954642292da25ff9"
["0.7.1"]
git-tree-sha1 = "26815d2504820400189b2ba822bea2b4c81555d9"
["0.8.0"]
git-tree-sha1 = "25c9620ab9ee15e72b1fea5a903de51088185a7e"
["0.8.1"]
git-tree-sha1 = "53a5c754fbf891bc279040cfb9a2b85c03489f38"
["0.8.2"]
git-tree-sha1 = "cd7595c13e95d810bfd2dd3a96558fb8fd545470"
["0.9.0"]
git-tree-sha1 = "738574ad4cb14da838e3fa5a2bae0c84cca324ed"
["0.10.0"]
git-tree-sha1 = "79e850c5e047754e985c8e0a4220d6f7b1715999"
["0.10.1"]
git-tree-sha1 = "45a146665c899f358c5d24a1551fee8e710285a1"
["0.10.2"]
git-tree-sha1 = "0b127546fd5068de5d161c9ace299cbeb5b8c8b3"
["0.11.0"]
git-tree-sha1 = "001842c060d17eecae8070f8ba8e8163f760722f"
["0.11.1"]
git-tree-sha1 = "3309b97c9327617cd063cc1de5850dc13aad6007"
["0.11.2"]
git-tree-sha1 = "9c2873412042edb336c5347ffa7a9daf29264da8"
["0.11.3"]
git-tree-sha1 = "0c452a18943144989213e2042766371d49505b22"
["0.12.0"]
git-tree-sha1 = "7618e3ba2e9b2ea43ad5d2c809e726a8a9e6e7b1"
["0.12.1"]
git-tree-sha1 = "59619a31c56c9e61b5dabdbd339e30c227c5d13d"
''' > O/Oscar/Versions.toml
echo 'f1435218-dba5-11e9-1e4d-f1a5fab5fc13 = { name = "Oscar", path = "O/Oscar" }' >> Registry.toml
git add .
git commit -m "New package: Oscar v0.12.1"
# Save some space
rm .git/hooks/*.sample
# Archive
cd ../
tar -czf fake-julia-registry-repository.tar.gz General
mv fake-julia-registry-repository.tar.gz ../
# Clean up tmp_dir
cd ../
rm -rf tmp_dir
# Copyright (C) 2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from pathlib import Path
from swh.lister.julia.lister import JuliaLister
from swh.lister.julia.tests import prepare_repository_from_archive
expected_origins = [
"https://github.com/leios/Fable.jl.git",
"https://github.com/oscar-system/Oscar.jl.git",
]
def test_julia_lister(datadir, tmp_path, swh_scheduler):
archive_path = Path(datadir, "fake-julia-registry-repository.tar.gz")
repo_url = prepare_repository_from_archive(archive_path, "General", tmp_path)
lister = JuliaLister(url=repo_url, scheduler=swh_scheduler)
lister.REPO_PATH = Path(tmp_path, "General")
lister.REGISTRY_PATH = lister.REPO_PATH / "Registry.toml"
res = lister.run()
assert res.origins == 1 + 1
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
assert len(scheduler_origins) == len(expected_origins)
assert {
(
scheduled.visit_type,
scheduled.url,
scheduled.last_update,
)
for scheduled in scheduler_origins
} == {("git", expected, None) for expected in expected_origins}
# Copyright (C) 2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from swh.lister.pattern import ListerStats
def test_julia_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker):
res = swh_scheduler_celery_app.send_task("swh.lister.julia.tasks.ping")
assert res
res.wait()
assert res.successful()
assert res.result == "OK"
def test_julia_lister(swh_scheduler_celery_app, swh_scheduler_celery_worker, mocker):
# setup the mocked JuliaLister
lister = mocker.patch("swh.lister.julia.tasks.JuliaLister")
lister.from_configfile.return_value = lister
stats = ListerStats(pages=42, origins=42)
lister.run.return_value = stats
res = swh_scheduler_celery_app.send_task("swh.lister.julia.tasks.JuliaListerTask")
assert res
res.wait()
assert res.successful()
assert res.result == stats.dict()
lister.from_configfile.assert_called_once_with()
lister.run.assert_called_once_with()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment