From f8cfa05f3f878717d1116ce3147c40a6ec79e2af Mon Sep 17 00:00:00 2001 From: Franck Bret <franck.bret@octobus.net> Date: Thu, 20 Jul 2023 17:18:26 +0200 Subject: [PATCH] Add Julia Lister for listing Julia Packages This module introduce Julia Lister. It retrieves Julia packages origins from the Julia General Registry, a Git repository made of per package directory with Toml definition files. --- requirements-test.txt | 1 + requirements.txt | 1 + setup.py | 1 + swh/lister/julia/__init__.py | 83 ++++++++++ swh/lister/julia/lister.py | 90 +++++++++++ swh/lister/julia/tasks.py | 19 +++ swh/lister/julia/tests/__init__.py | 30 ++++ .../fake-julia-registry-repository.tar.gz | Bin 0 -> 7479 bytes .../data/fake_julia_registry_repository.sh | 148 ++++++++++++++++++ swh/lister/julia/tests/test_lister.py | 36 +++++ swh/lister/julia/tests/test_tasks.py | 31 ++++ 11 files changed, 440 insertions(+) create mode 100644 swh/lister/julia/__init__.py create mode 100644 swh/lister/julia/lister.py create mode 100644 swh/lister/julia/tasks.py create mode 100644 swh/lister/julia/tests/__init__.py create mode 100644 swh/lister/julia/tests/data/fake-julia-registry-repository.tar.gz create mode 100644 swh/lister/julia/tests/data/fake_julia_registry_repository.sh create mode 100644 swh/lister/julia/tests/test_lister.py create mode 100644 swh/lister/julia/tests/test_tasks.py diff --git a/requirements-test.txt b/requirements-test.txt index 977c91ea..8e1fd60c 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -5,3 +5,4 @@ requests_mock types-click types-pyyaml types-requests +types-toml diff --git a/requirements.txt b/requirements.txt index bf2beb6e..bc64858a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,3 +12,4 @@ rpy2 setuptools tenacity >= 6.2 testing.postgresql +toml diff --git a/setup.py b/setup.py index 49bcfdfd..2a9c51f6 100755 --- a/setup.py +++ b/setup.py @@ -76,6 +76,7 @@ setup( lister.gogs=swh.lister.gogs:register lister.hackage=swh.lister.hackage:register lister.hex=swh.lister.hex:register + lister.julia=swh.lister.julia:register lister.launchpad=swh.lister.launchpad:register lister.nixguix=swh.lister.nixguix:register lister.npm=swh.lister.npm:register diff --git a/swh/lister/julia/__init__.py b/swh/lister/julia/__init__.py new file mode 100644 index 00000000..ed9c8584 --- /dev/null +++ b/swh/lister/julia/__init__.py @@ -0,0 +1,83 @@ +# Copyright (C) 2023 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + + +""" +Julia lister +============= + +`Julia`_ is a dynamic language for scientific computing applications. It comes with +an ecosystem of packages managed with its internal package manager `Pkg`_. + +A list of all officially registered packages can be found in the `Julia General Registry`_ +on GitHub, but it's easier to search for packages using the `JuliaHub`_ and +`Julia Packages`_ sites. + +The `Julia`_ lister lists origins from a Git repository, the `Julia General registry`_. +The main `Registry.toml`_ file list available Julia packages. Each directory +match a package name and have Toml files to describe the package and its versions. + +Julia origins are Git repositories hosted on Github. Each repository must provide its +packaged releases using the Github release system. + +As of July 2023 `Julia General registry`_ list 9714 packages names. + +Origins retrieving strategy +--------------------------- + +To build a list of origins we clone the `Julia General registry`_ Git repository, then +read the `Registry.toml`_ file to get the path to packages directories. +Each directory have a `Package.toml` file from where we get the Git repository url for +a package. + +Page listing +------------ + +There is only one page listing all origins url. + +Origins from page +----------------- + +The lister is stateless and yields all origins url from one page. +Each url corresponds to the Git url of the package repository. + +Running tests +------------- + +Activate the virtualenv and run from within swh-lister directory:: + + pytest -s -vv --log-cli-level=DEBUG swh/lister/julia/tests + +Testing with Docker +------------------- + +Change directory to swh/docker then launch the docker environment:: + + docker compose up -d + +Then schedule a julia listing task:: + + docker compose exec swh-scheduler swh scheduler task add -p oneshot list-julia + +You can follow lister execution by displaying logs of swh-lister service:: + + docker compose logs -f swh-lister + +.. _Julia: https://julialang.org/ +.. _Pkg: https://docs.julialang.org/en/v1/stdlib/Pkg/ +.. _Julia General registry: https://github.com/JuliaRegistries/General +.. _JuliaHub: https://juliahub.com/ +.. _Julia Packages: https://julialang.org/packages/ +.. _Registry.toml: https://github.com/JuliaRegistries/General/blob/master/Registry.toml +""" # noqa: B950 + + +def register(): + from .lister import JuliaLister + + return { + "lister": JuliaLister, + "task_modules": ["%s.tasks" % __name__], + } diff --git a/swh/lister/julia/lister.py b/swh/lister/julia/lister.py new file mode 100644 index 00000000..ecc22f10 --- /dev/null +++ b/swh/lister/julia/lister.py @@ -0,0 +1,90 @@ +# Copyright (C) 2023 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import logging +from pathlib import Path +from typing import Any, Iterator, List, Optional, Tuple + +from dulwich import porcelain +import toml + +from swh.scheduler.interface import SchedulerInterface +from swh.scheduler.model import ListedOrigin + +from ..pattern import CredentialsType, StatelessLister + +logger = logging.getLogger(__name__) + +# Aliasing the page results returned by `get_pages` method from the lister. +JuliaListerPage = List[Tuple[str, Any]] + + +class JuliaLister(StatelessLister[JuliaListerPage]): + """List Julia packages origins""" + + LISTER_NAME = "julia" + VISIT_TYPE = "git" # Julia origins url are Git repositories + INSTANCE = "julia" + + REPO_URL = ( + "https://github.com/JuliaRegistries/General.git" # Julia General Registry + ) + REPO_PATH = Path("/tmp/General") + REGISTRY_PATH = REPO_PATH / "Registry.toml" + + def __init__( + self, + scheduler: SchedulerInterface, + credentials: Optional[CredentialsType] = None, + url: Optional[str] = None, + max_origins_per_page: Optional[int] = None, + max_pages: Optional[int] = None, + enable_origins: bool = True, + ): + super().__init__( + scheduler=scheduler, + credentials=credentials, + instance=self.INSTANCE, + url=url or self.REPO_URL, + max_origins_per_page=max_origins_per_page, + max_pages=max_pages, + enable_origins=enable_origins, + ) + + def get_registry_repository(self) -> None: + """Get Julia General Registry Git repository up to date on disk""" + if self.REPO_PATH.exists(): + porcelain.pull(self.REPO_PATH, remote_location=self.url) + else: + porcelain.clone(source=self.url, target=self.REPO_PATH) + + def get_pages(self) -> Iterator[JuliaListerPage]: + """Yield an iterator which returns 'page' + + It uses the api endpoint provided by `https://registry.julia.io/packages` + to get a list of package names with an origin url that corresponds to Git + repository. + + There is only one page that list all origins urls. + """ + self.get_registry_repository() + assert self.REGISTRY_PATH.exists() + registry = toml.load(self.REGISTRY_PATH) + yield registry["packages"].items() + + def get_origins_from_page(self, page: JuliaListerPage) -> Iterator[ListedOrigin]: + """Iterate on all pages and yield ListedOrigin instances""" + assert self.lister_obj.id is not None + assert self.REPO_PATH.exists() + + for uuid, info in page: + package_info_path = self.REPO_PATH / info["path"] / "Package.toml" + package_info = toml.load(package_info_path) + yield ListedOrigin( + lister_id=self.lister_obj.id, + visit_type=self.VISIT_TYPE, + url=package_info["repo"], + last_update=None, + ) diff --git a/swh/lister/julia/tasks.py b/swh/lister/julia/tasks.py new file mode 100644 index 00000000..ef6b7075 --- /dev/null +++ b/swh/lister/julia/tasks.py @@ -0,0 +1,19 @@ +# Copyright (C) 2023 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from celery import shared_task + +from swh.lister.julia.lister import JuliaLister + + +@shared_task(name=__name__ + ".JuliaListerTask") +def list_julia(**lister_args): + """Lister task for Julia General Registry""" + return JuliaLister.from_configfile(**lister_args).run().dict() + + +@shared_task(name=__name__ + ".ping") +def _ping(): + return "OK" diff --git a/swh/lister/julia/tests/__init__.py b/swh/lister/julia/tests/__init__.py new file mode 100644 index 00000000..69c40d6c --- /dev/null +++ b/swh/lister/julia/tests/__init__.py @@ -0,0 +1,30 @@ +# Copyright (C) 2023 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import os +from pathlib import PosixPath +import subprocess +from typing import Optional, Union + + +def prepare_repository_from_archive( + archive_path: str, + filename: Optional[str] = None, + tmp_path: Union[PosixPath, str] = "/tmp", +) -> str: + """Given an existing archive_path, uncompress it. + Returns a file repo url which can be used as origin url. + + This does not deal with the case where the archive passed along does not exist. + + """ + if not isinstance(tmp_path, str): + tmp_path = str(tmp_path) + # uncompress folder/repositories/dump for the loader to ingest + subprocess.check_output(["tar", "xf", archive_path, "-C", tmp_path]) + # build the origin url (or some derivative form) + _fname = filename if filename else os.path.basename(archive_path) + repo_url = f"file://{tmp_path}/{_fname}" + return repo_url diff --git a/swh/lister/julia/tests/data/fake-julia-registry-repository.tar.gz b/swh/lister/julia/tests/data/fake-julia-registry-repository.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1681e535c3b04d1bd3c6061bacbd0863570dc2be GIT binary patch literal 7479 zcmaJ_Wn7fe(x=1)5tfh^q@)|AyQGnl?i2wDsRb74?gm*(0VSkGx=UDUX;=a2l7@xd z=k?zE{hg2V;mn*f|Crxlj0a);H*gOv0?f5s7;7_pG$N_nN_DgH8(<JxHHujE7?t=5 z(>E=|elF|E3651XiZctsB2k0(bg{!3nwA;Cu7@S|R%G}mtyux8`_-o-N5MPNNa%4# z`FQ~{VB-2V7=}FYEA*!ASN49Zj#%Dh!J&}1&cTo~3;cz{JZ$iaIieKt;%p*FL9tdK z7OcA*_Df2M#4J7BP}15jCAq_S*WZp#fKuv<C}7?EhhC}SkfduQhoSWHBzZ!qv(~*L z80XEp)jc*1wzV`&wdqE7M>a2R1M5vUW?bxh_MVTrQ5UufSm|SGML~(bc2aD_unXDx zAF#q1VGN!csQ<{}t$TYBQc~gD(^}8HRY~*8#L20N1TbRRtXxmeBPug(<|4$d$7E%| z^0}S(<5Hufd9Fz3U<d?~%LwPDVf&2JYjpx@m%tJzv}LY*PpsQTQzZ8euY5c7v_Qae z0GsFuF9c1c{aAPE6na>b_`Bbt(wP^`U6@uhgJUX>RpO(G*X<?pc?&C!vSqX%!(G}p z@a2$E(rc={$We?Etg0fSQEBq{Ss^R8z)uc7Q;iM@2YKEZFWTybSsLnb>?w1ia!ReQ zVu>p6;2?=64C|W9%p?C91rN<Hr2Pll$^;UpL=PCOBb*b{p0uI&cWtRkv7ak|MNq3q zUp1Y~L6a|CM)gIabT62M2-i6><lc+FVrry!u-4NSBvdC32PHA5g!PhYw)dxpwiu9< zIX`hiYDm?*+(4YH)wW=zJ4i(^QxkFi;nA}30&Tn!?=P+@h~`KrcTRA6_UDt`tqZj} zy4>#eGNeOU%1AN!9l?hlGxU7h)({~M26a!e3Fa6FqPivTXHYUKF{Tz>FEJ7Thk`ZP zf0OaT+V;We9v4jaEO=qB?&)w&{?<~kz>)<Bg0KK;u2T<iJ6x7(sKSTmP4dgPBS61c z6Bj^@x2}hA0*PTUpN6Wl>}1-W;S6tu<qTF_TCFAEiq}$>m?P;JgdSQ~2${YsPAqCf zX{w<mW^m}A(qpS@PWDsi!cZ{;@wfuI;B;Z@c2nvhLDOgIlg@^xp+dCOlNzvc?Dir( zf;yA|yU}E&aVVuQ%QJIuKT*Prlj;WPnlWOB#`X(UHFkl|ln2i)3F!llfM=GI9TPW= zI7Kt<>e!_}+hP(SVh;%nVlZPAoZOXRe)oWmT0rnp#W=tO`nnGkbz<0Wfgx47Mwv_} z>|iF<(pkd&8y)N<!~G8RO6-dgEHR9WvDp5n1H-dgzZIycR~G#Nl>FqTD{oU);Rdid zg&6>%ptT*@u+jz8(+7+xz~;Xvp`PuBQhesGoKH?%+J=I^NdTMA;?&N&n7YHI)OrXD z!az6?;^y+CVjt6pV#8268DJK{!tz%xApm3}7?{^vzof?t3dPtJ82~RpYwO6dnbl2} zg$hH!rsVDY!-E~D8Xzm9l<kMTmVeFG1VDQvF>ugSyHNX@+pH-dKY4u&fc4Px&p@_# zFQ}LsZu~f-sb*hiyo}O!wX7Y-cA!KF>1K5AzX@_iNN79CA8@zVVChtfZ187-wJQxu zVs-}`GO}jcSF4Nfo)`PJZvr^g{FCz#f?eDVS?b8L8#uTk3yR*T52VL~eu5J9mZ4zs zy1c+;JMnMD8+P4TsA7>RMzFc~po{wXUYEQsIY1HrmuE;&Wb_%HqAkT>5rusHGXiqF z=wiHq2Ir@^KGHqA&=}x#yE>XG$_A9V0abi9A!gmfglrEn5+0Uh!5N8kl@NV;o!M&> zcDH4|Nw8^`)lit9&edto{|lbM?xC}BSJkd;Aygyg*Y-E<+TvS~#u>)9jlqIN6e-<) zH23vv$bui@nd_HTp%EU)vpP32$X*__C+=tnt8my6@ebdZ%RWJ>zSQcxI;Vh%-_~gu z2*XJLrcxKLBA)Tl!?|jsvrl7zV+`wRrD0)xzGcU;qI&2Q;U0|psI{u{y5XaHFN2-F z&%zQzAr3VbZL>|^$2i;?%Jv{_*G^sCqf5pw+}s4$URne3=`!Pjy-cn>HSUQL@IuJi z>>M}Tq0jK{$)kxf$cL*niPb^-lx2j-CD(F<!+Z@hi5Kxt{FY(bw^!p6+yMOw>)J(% zcab7{q?P&@@#lCoU$dkw#8m7WoBToCwAqQw+Op$d+|FYyW9d>;fPE@z4r*Wuxc-Br z?86zt2t`hc&o&Hfo_O-FvifNXz%3uCHG2i{nLjkV&c}o+W$tuYgR%+%A)<{0i0}0k z0TvUbl05k1eZK!4#iiGAC&nL+R*tU7WlGgQp+`L@p-Y`UZRq4=@fELaO`RIvy=&sV z61~XPs30*o0pk7@C(77>h8I1@kka=0Q!9zbM45og1*6Kq^iNt8Sh>IKfRt<nEJW#= z_GA;lhL?;?a%#Zik7Ao!YR7Y}p`TP^9DRw48$YP8EThzGqOHyMWTh(H9BL0OAWxh5 zgI5S$JfYrH=@Spr?XNz@wjMd68z8Ye!><*8B1hX&e_r{1o=I*d=6qNe%l5pS4~BO> z9cooXW#cDFk=Doc()e2HtxfRD_f4y#mZA-Od>ZYO2h$H4@|U^dlIh=`{pg)Ho%2&# zLJRF<ZA0n4-UO+5ks$Do3i7j#c1QviCbM@ulK0V!@0`)91pnF&%eyION@LLtQxa#` z;(Rb>2U1RvB09DGqw`k`{+B3eeq6;3m0awMK#|0Br(x)DJO{BcHNEvA%`$CX&dC*J z+||b~m%HQ(Umonsz?FThH{}{~`t+~do1rvjJ|V4AZfE^<BsbkR5pkR8FP8H!_x#{2 zm2jQdwY&5sjtsiE5<Z^1MP~8+g^{$rRB2}Ttp#%3=+s#suI!r4lJ2PWy){L~eL12h zpljvn<qEg7Y>-+;Cl-a9P3PzYq;PfRmztS}``VFD`>WOfq{Cu3%%R7~CY&KBI$sl^ zb$@SQ3$qGp5Z0vQpQmhWeH&DBL;hxmkELl`VJ5HEt-fgdWwW8{7DH_WT))G^Nu`DB z+r^>6JLXA@u$GbE+doavkMKnt_+e^LRB*}6l*(drT)Cpck?L5&qotdx#NhREL3XKX zRn1LXs6G+HcU!p1SdQ|`o~}&)ob!wiMyDUr*|D?rPLh9meyudr9`kISSFqE_lctTb zk}M>9P5pz}MYYWLOa`fWQ)sp&%OGxi{f-9GYN3ARt+Ep!XkR}i*T9=dm$wN-yhXzl zXib63v}b!=*#Fdt6{m+j!dqmp0fp$HPap@4OoRHo%feLNM`4>(xW9Zm1FhIUhm~MT z_3k~u{WN|#kZzQB_nh-|Q=yiv<gu}cyUp(TEmN2H3%3^=(__!xSTR`Iw|2d^{G#Pk zTh|xdPRXu1Od`G%t~0LXu^1>VDcyCH8S%8PIk+*A*5f3g)_%TT)o!%>Vi7DPRkpez z7sQ!cKqY~aUw>hfTD^<{-$@t|u~8EuP_j$EFBuJf0v`Y2Gv8Z*htpCz3gC$iB6VAy zAt6Eo16NwMCmafjV8efuI*_$&!h{$9uW8k%E3CpFtH>%2f~Cj45TQ_a@-QRc)_VkE zn^$Y~L<;G1s`7a}GNFV*kO-8D{*gL?>bz06*4u8lGs&74nSiTH5>uxy9B!z=_@tRd z2O*K4i7kWO=JTQ@Np|SFErLJUYiNZr)aYCInMip5nR9&kZ4;K$IO0c(ob1T@ONqGq z=Tm5_J+u|Ryw*K8g}^v72k|;#(&p>1Vx~w1`v;C<Y0o)mm1)3R2acsi=vov;hq{-P z6Z6krSAbOz#uJ!#buK>9?IN#r2k@_Mt1={@LhPgU`lge&ZaN?peKEHQfu~Iq!qC(7 zo1o?Xq;Q9qPxz0B8GefTot2sUA+Wz4oJY}hB<u<OEQUNU=Bt<kY&wfC8T(k#qM)}M zuqZIz7p5dt0Z^G3tvq$iI;Yad->hTttyl?O1ttGQ0r*=WKoupocMA>weS~5xz-MA1 zZ}`>Wnw9vjz&7|mPDJ{4LcEK~>`1)<x*Xih+B3cznf^gV5mplssB(E}r7#VAy~#&G zweI!H!b$|AtWBb#Oz?XA1sUZ$A1HZB#iy2_!bQw=P$WHu2eJZ6uTG%CptXH~@+uQU zo(O|4`x$lAzczAi(+>7(PAB=Y;xl~vCw?&T)_v9f_+XTB$L~_JaN$yo(glITf7p7w z-f*{<JcFxF4<qXITpu_Ir=tn@g4_Cdm7Cy$^5uc-_y3p!aNku1FfzV4h3iTs+dWfm z_#TF(mi7kBGalNG^WX_lvNuP>cY2fmN;FSW;@4{^!5*3)|6>Sx2Yg^YIl9?!;Jfn_ zhfp}^-5p$n&P(#wlz0<RfFao8eV!A-BK7CB!(3!SmN)OrkUy(Pq4d$k(!~6=?;|e8 z48QHvyWUnOd<#KoI}``PGJWCao7?M3-U$~Ed>bnyAx()S^;_Y$DPVtA=?l7at2@^$ zbNc4zqT7DgR{2ImCLXyB8|M6<Zi(q$oo<EF7@H;k;Z#DM)Kqp}m*<v`bFD~?uNT4x z0-oC9Yro`E3NjP97(mgLm;ys1QS;FMjPai_xfB|Pij4ln>t~kY)W!jUUULk+mx|WS z2{W|bi2Au$p#_|NJ%Lj8LCph!Wcea1V2_0d*UjV8Nbz-A@o%cLRMGDb_Th7%eaw-b zR>}vA<#7F{-lo8Xf<4iC?Nlv5&*2BKY3xxlzN>2kSdyPyP4LFs^;PSqN1MsYO83~k zy80B`Rq#H~L9Oru3^MEz`y45=uV+lknemw5)SGW<Pp|pZeyU;2Z~0a%J451=zm#47 z*HrIi|F*-FlXhoeaqF1^Sg`D4;$>0+-c{qvzF7^1<}i;z3xB2T8EvjH7u=zrjUIMR z_wD!k^ChXW8O)3E?Ug09_^i9yOx4j)SHY^Sl0QykZzg04S8btnymN!xt~#!;y}snU z<Ix{+5m)#1SlW*)H;wlobqT{Bg)e@!S?y-mJk4%X_G!OKXBO2R9Khd+Y<7FMrx);H zH(i?DmzVnk|9-pU2hX~Z+C5dL51^%4PRgJO36n#fpV05CC5v^E5-FkyoAHlCl5Pps zo%9?#R`d(jMyzC}5yuKJ?E!aof*Gp!I@!Xx7Xu#+Ufb^DjL{k8K2P7Sg463;ljqgr z2YOnxX#f3rJ$ui%Ls&%@1MI^9fPWJDAHn!XW)|_B9&5^SP*|V*19?G?o+41>IV&M4 z#aJAT!c#!)RrMw8dJgCYbN$;-!A>jdMVMRu<q%AHmKJiM^0!FG2csJcZtibxNJt_` zQcp_aW&n|9r-8o=^JB;-TM%`#9g`Ha7{a^c&kJDVVL2WPiF|(Lu5xPBtU8fFNmKv? zoa;nZomHjNMIDc6dom?meK5cIwGG2a(=%4_@SDmu!3OZs50E~=Q~y&(l<f5H1K_p! z^#3p!qau)pLrPKXLTM+S@I)B<0sK87D2hT#=ewo_pk~1PPa%dOmyCO85`2Y%I`=;l zW!Al;PV>g<ABL)C{1~bWgC6Gu5Hl87+iQ0Dj5Yn%nRvv<_1MxAJ<yN|AOA7a;qAMX ztFv}~m?Us{tqLDw)X_ppZ!OkA9tsT>`sLg!hQ3=oi?qmi;M<Vpb2QbACW6*VhFROS zKrZ`m|E|$E%zLDLIm$oz<StXPIi+E@^d_(|wuvs~_GqpCB&&iy-?u;<t$U684QDS- zNw%#DVcGC7J2DYH7i2wv)gU;j7C4y-Ugq{jAJ&f@lA9_iU72-GA^dhn*_!1&k2H<8 z`OSZCTcHNFI<?EI+CC>PO4!68bg#E~>386Vht{pVd-68ei=AfCmL%Kb@-rW}9hWns zwCYy`j=KrV&&u|k%9|G>Bv(hr9=l$&Aryv#K6Z_T)Z1pEls0B~U(Z|-<+~RMxbHO= zsus-u3=wTYV00W|R1#)9FIGF`jf67K$Cupc-35bJ7S7Is_*-B?`G+faml1?OJFyw` zBEQ2y(POJw`l<Vtw8Oj)tu!*EF`%WE*vCb!+FV%VOS{`%ZfqaCCcvlb_tlEgo~$La zi(`t%TsWu3Jxj8d7^S!GX?NPgYR9S-@*g#@?7OAVkFckbQqq;VkoBwv{ln%mdr<`5 zU*X!IZ<W@}<G%M2u%v2{a}AkA*(SrSSgWq#GxynJt0M~nAHxBf5cvvPjR(qc27hUi ze#oVnoP1I<QF=z}s!2y${c?20q&`Ra+>$H5NcnY@D!fvoB;#kOk#1e+kER&<&wTzR zMh_@rUT`Te*^*pfRp(5MDG-l5A|SJTmwQr>o!_$Jkp;~KV3OApy-r{C(n}yAzSAZR z<rfg^%Z=G~nfyX=PV@8^LR6om_wJXJM<i7~*Cl5c=!^cnHn{loW=~7T^|Jkvt?!uU z7X|^D?_BZ=#&}&$kyxVYcb=58^I8hr_cFqM1nV4AZId`97>`-4-L%kKB}g3Qm%V}U zFG=71;H-t?`NfuyB*7$H1fg|j$uohxtLp5>txi@WlL6Or)&9=MW;0p*!4j?W&9PKg zn0rJM{OH?mhf4sm^#KBR^4`_gkbj}>bbJF5EVIz+el8pRt3s!@772v-(G9&0{eeMJ ztx99s4gdlL$eAd<x3yS0m^>`_v&hhJ{OWBvj#Yg35G`Gow}Vd(&}0I{SM~lll_eFs zK_QpRPgc*A_WC<+YLpe7FY{<<$VC(H=VRliqxhj=>r-$d|BKZTnA_U4>+X`vS@AdS z1C?5cZ7{lXKef1ayP)-Fo7wgxO@9*<afCrC9^XO<<RNDdr$M~9`wy{ki~~j=cJA06 zYN$jKQEcLj)tCayR~V1owmX0<%;*4m;3^B{b+{@)-F5ZI2BuVh%e~8B_nYguSl@4Z z!$7`ovF2ptH$PwON;;1*X|V!4;eZBsDZOj5bIRO#mnq`ApgL`lRE%_S(^=C~0;wp6 zozYzZOu1(b@Z-{oK;HXa=ZvHo(?S9hzxztc?HUKG32+Ov7;Mb0fv;Wtjtl4-rxAEB zhrl&-x(+%#y~kvt`R;~-VbE5Zidv(ZugkeDzADEskIw6KDkts317Lf$Y^A^(;^*i3 z)Rd(Lo$n6CFtQz^bQC5F?<HHf-~B5;43xgS1{OUB#{pKny2~!txWI%xa@yO8t+un8 z9El%+!@R*Nn$KbFZR==x6lx40yk0YIt#t~ljq^UPSkb*c{c4u&K6Y#pqB7Uor(S~4 zmkBx`4&2%^c@*V{XzxUqCO$&0+zh)IlFMjt1#ul!t#7i#NBu$u&EQ^?$aVygt~7O= z)fqlQLhk;|Y^z)crAXhQ?tE8Jj3WCNZ=z2mi*JsEJ=$E>xx*hyqg+PnU(ls7U&Dje z{}fpCX#;^fWhJ#9`G$h=UpxX?cdm|)YXC`lYrxLCzM<i{xp0X2T*RRU|8Bhyt=@h3 z7C_yS3wRy?<l!0@kJlG`J(#VWBD|TWuLf%6X_Z_i-H`qwzp?K4`40B0yXfBUo-%%! zl?Hg3oVkG-Bwjl%K!WW?4u~ECJNH29!(G<if)DN*G4CA}RU_g2^HYCg1}!xn9p;~p z+Bfo4#FS<3?^Xz0P&-9n!J?zF@w!jztIEm*Xl2^IHwu_?#KA{EPK!XViut!bDnkmY zxc99S{0T~9<q>9AT*UeHXYUPEOH%)Sc(Qe+|7m3}&}*p`LQhgFPqU7zvY<&yaj*b- zG_C7Z?rJT!GhN45{q+%k5`N<)u(jml3I1!b4SnXj31yy4x_{KDYw_hlG#~VCeig_C z(;Hyen8^@Z(H4WnLr|GeceJ0v?+sZhp!MctcK{pH_1$1^#KKBN6@&b8SG@z3I;{Bz zj?H?np(RC<yZz6W3uakf#|=;ynI(A0^m+aH!w3aYeo$o6;`9w+=C7`uG98WVlYI0m zHyNbB#`0H(f!cPe{p{Se-z(G5wOzUC5(l<{M2E&c$Xo6;Nq(YUs8wr=hnlC6<tp9I zCA9hNVcN%H{k#D7{j*m#CN5=>VuPage^Jr-$y)7u)&8YjE2eD{O}kO!Lt4RZsD+{H zq}-o-)gFpyuW33dbIHrZzDLA=(DEMb_KSuj>9CsNJKNQwwr?IXo5!nkt_tNM=f2II zb~3UlEkl`+-1D&ar^oW8P(M0qf3?dNnm1KlBAm05EiN~%>-i~?A??z?2&M$o3mg2m z=LXRFQL`TRfkz|CR@HZ8R0+dG#R8hzR$<YP*KXtgxcMU_&z+uxq`iTRWJU2cA;bQ7 zwva$%8vqMQK4anESqD)b!I%-x+CP6p3a(E{fR5nX#?M1l7Jlz!3PTR`xwn5oON(6| z?`T!;up3n~+j&&AIyoci9}5>I1T~voEy(!(O4_ae#huJsQH@Nq(DmK%CYZE|K1p%} zU;%jB=zP!JTgI@3aey6<_srv>UbQ4aF<UmOzZ%d1nT~ZGIs4!2KEuRoT{jj2J)IDY zs)8<;x{trT*k^eZ=g5;Mkz7|s*1R#v9R|<dr>L`TR4NFi#)XcT3EKDeiLi-h#SDd5 zfD<jLMqcT(h%3n}r#Q}xT-aI|Q@YGnHkND{`=+KHrVXDknl3|^D=z;|rJ`A0nJ#y! znq#uT=9y&q@nM_+BXdgnrCX-uDuj3mCQTcfbbK86D_Fd_6us7D)XYt!__Q1klnsPk z7ZQ0m7~-iQ+C|c@@#-K6F(SVT*`D-VNh@%Gv6Uk<{;Vjr6fI)BJDpCu;sLU7%3IM1 zQ~Fq;PQ`mhI<kU$^3b`nldK!0t1yOq;|fp%{rif*-)rdy@X0hu?JK#!B2N^?^^2c# zA>`x}pVlWv+!UQcO%nKD2(ghN*b={PO^2Sa{JV%~Kk!oKT{>_#d<5ix`<eYR@Hdo? z^$2=@7*XWN1;lXKD=Ssb7Jf+(ie(GaQ=Fv$Hg9;YG{>0?y2%Ie7>ebv9&lvmSeFsm zDhVXAy|ZOHH4&pQ4+L?`)9BBkjUU>F&OYmoL*Y1Ues*$Q=vUZm&{b~LG6qq~7~q2) zbnLNi({5xaQ|je#)rc11WTN~fzTG$+)@05s0dff!5knLy*aO(nSaggKv2DhD<Be@) z1#sEDoNa%~)oUzVE&>AzA$T~$b3HQk416sf%X>-!p2+digb$O<p)$7W%OO|PN@RrH zxcTO{OL0}P1IcilTr@#y2o^OVQIbOt9!OD)jX^$!g$?{7>clB_nSYqlRap+(hT}?r zY1rTTT+%UJidR>z44tQk_|>~H|KRnAG@ep-Q&eNIQv#*I`_RlUU;#&0?w}4BTW7-e zqUrK}yeLoVSAV?C*j#YjU2V-CeK&LqUku6mA(S@SLTDE)F<xm4##3SX{QdVFQ4T@Q z>si7Pk1Wz;Br~J%K6!BrQIVl|RCOoaGV`*i(P~a+V|R=*q8~?IQp8-CkQLGWmYXea z*-_we6)jVd)TY^~;;6VsaqO~i1mP<wcmuolGVWZGU6flUY$;?;^A7xMx#xT2bNBgB zEM~Jua9`~2qqUV`3>oIH62-DV5?8zcQ;N`GN1W0<nIQ{f{TmwAIj3g~x%g;f<R;SK zXdOgy1Ine$`;pjdWQTjKk(BD8O6t<_IE(CSPASePp)P1^k+tmK@ufc!j=bfH46Vux zL+jW%-HlB*AJs(EW&IvtT~G+$d^wuOM`na3XzrtRqBnUQ=ZPweuHLEhec&9L_vy*I zAz&%XSc@Ry5{5n98_4UR%trHjj!UWsucdGyt|Rh!C+B+-uaCuLg^f<evZYEzIFW+$ g)UWJUVNV#0_yoM)|9^2~H2}pPQ0TG8`iq72e|k6zxBvhE literal 0 HcmV?d00001 diff --git a/swh/lister/julia/tests/data/fake_julia_registry_repository.sh b/swh/lister/julia/tests/data/fake_julia_registry_repository.sh new file mode 100644 index 00000000..0a3f181b --- /dev/null +++ b/swh/lister/julia/tests/data/fake_julia_registry_repository.sh @@ -0,0 +1,148 @@ +#!/usr/bin/env bash + +# Script to generate fake-julia-registry-repository.tar.gz +# Creates a git repository like https://github.com/JuliaRegistries/General.git +# for tests purposes + +set -euo pipefail + +# files and directories for Julia registry repository +mkdir -p tmp_dir/General/ +cd tmp_dir/General/ + +touch Registry.toml + +echo -e '''name = "General" +uuid = "23338594-aafe-5451-b93e-139f81909106" +repo = "https://github.com/JuliaRegistries/General.git" + +description = """ +Official general Julia package registry where people can +register any package they want without too much debate about +naming and without enforced standards on documentation or +testing. We nevertheless encourage documentation, testing and +some amount of consideration when choosing package names. +""" + +[packages]''' > Registry.toml + +# Init as a git repository +git init +git add . +git commit -m "Init fake Julia registry repository for tests purpose" + +mkdir -p F/Fable + +touch F/Fable/Package.toml +touch F/Fable/Versions.toml + +echo -e '''name = "Fable" +uuid = "a3ea4736-0a3b-4c29-ac8a-20364318a635" +repo = "https://github.com/leios/Fable.jl.git" +''' > F/Fable/Package.toml + +echo -e '''["0.0.1"] +git-tree-sha1 = "d98ef9a5309f0ec8caaf34bf4cefaf1f1ca525e8" + +["0.0.2"] +git-tree-sha1 = "65301af3ab06b04cf8a52cd43b06222bab5249c2" +''' > F/Fable/Versions.toml + +echo 'a3ea4736-0a3b-4c29-ac8a-20364318a635 = { name = "Fable", path = "F/Fable" }' >> Registry.toml + +git add . +git commit -m "New package: Fable v0.0.2" + +mkdir -p O/Oscar + +touch O/Oscar/Package.toml +touch O/Oscar/Versions.toml + +echo -e '''name = "Oscar" +uuid = "f1435218-dba5-11e9-1e4d-f1a5fab5fc13" +repo = "https://github.com/oscar-system/Oscar.jl.git" +''' > O/Oscar/Package.toml + +echo -e '''["0.2.0"] +git-tree-sha1 = "cda489ed50fbd625d245655ce6e5858c3c21ce12" + +["0.3.0"] +git-tree-sha1 = "d62e911d06affb6450a0d059c3432df284a8e3c1" + +["0.4.0"] +git-tree-sha1 = "91a9c623da588d5fcfc1f0ce0b3d57a0e35c65d2" + +["0.5.0"] +git-tree-sha1 = "5d595e843a71df04da0e8027c4773a158be0c4f4" + +["0.5.1"] +git-tree-sha1 = "501602b8c0efc9b4fc6a68d0cb53b9103f736313" + +["0.5.2"] +git-tree-sha1 = "aa42d7bc3282e72b1b5c41d518661634cc454de0" + +["0.6.0"] +git-tree-sha1 = "a3ca062f1e9ab1728de6af6812c1a09bb527e5ce" + +["0.7.0"] +git-tree-sha1 = "185ce4c7b082bf3530940af4954642292da25ff9" + +["0.7.1"] +git-tree-sha1 = "26815d2504820400189b2ba822bea2b4c81555d9" + +["0.8.0"] +git-tree-sha1 = "25c9620ab9ee15e72b1fea5a903de51088185a7e" + +["0.8.1"] +git-tree-sha1 = "53a5c754fbf891bc279040cfb9a2b85c03489f38" + +["0.8.2"] +git-tree-sha1 = "cd7595c13e95d810bfd2dd3a96558fb8fd545470" + +["0.9.0"] +git-tree-sha1 = "738574ad4cb14da838e3fa5a2bae0c84cca324ed" + +["0.10.0"] +git-tree-sha1 = "79e850c5e047754e985c8e0a4220d6f7b1715999" + +["0.10.1"] +git-tree-sha1 = "45a146665c899f358c5d24a1551fee8e710285a1" + +["0.10.2"] +git-tree-sha1 = "0b127546fd5068de5d161c9ace299cbeb5b8c8b3" + +["0.11.0"] +git-tree-sha1 = "001842c060d17eecae8070f8ba8e8163f760722f" + +["0.11.1"] +git-tree-sha1 = "3309b97c9327617cd063cc1de5850dc13aad6007" + +["0.11.2"] +git-tree-sha1 = "9c2873412042edb336c5347ffa7a9daf29264da8" + +["0.11.3"] +git-tree-sha1 = "0c452a18943144989213e2042766371d49505b22" + +["0.12.0"] +git-tree-sha1 = "7618e3ba2e9b2ea43ad5d2c809e726a8a9e6e7b1" + +["0.12.1"] +git-tree-sha1 = "59619a31c56c9e61b5dabdbd339e30c227c5d13d" +''' > O/Oscar/Versions.toml + +echo 'f1435218-dba5-11e9-1e4d-f1a5fab5fc13 = { name = "Oscar", path = "O/Oscar" }' >> Registry.toml + +git add . +git commit -m "New package: Oscar v0.12.1" + +# Save some space +rm .git/hooks/*.sample + +# Archive +cd ../ +tar -czf fake-julia-registry-repository.tar.gz General +mv fake-julia-registry-repository.tar.gz ../ + +# Clean up tmp_dir +cd ../ +rm -rf tmp_dir diff --git a/swh/lister/julia/tests/test_lister.py b/swh/lister/julia/tests/test_lister.py new file mode 100644 index 00000000..f67b0bf8 --- /dev/null +++ b/swh/lister/julia/tests/test_lister.py @@ -0,0 +1,36 @@ +# Copyright (C) 2023 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from pathlib import Path + +from swh.lister.julia.lister import JuliaLister +from swh.lister.julia.tests import prepare_repository_from_archive + +expected_origins = [ + "https://github.com/leios/Fable.jl.git", + "https://github.com/oscar-system/Oscar.jl.git", +] + + +def test_julia_lister(datadir, tmp_path, swh_scheduler): + archive_path = Path(datadir, "fake-julia-registry-repository.tar.gz") + repo_url = prepare_repository_from_archive(archive_path, "General", tmp_path) + lister = JuliaLister(url=repo_url, scheduler=swh_scheduler) + lister.REPO_PATH = Path(tmp_path, "General") + lister.REGISTRY_PATH = lister.REPO_PATH / "Registry.toml" + + res = lister.run() + assert res.origins == 1 + 1 + + scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results + assert len(scheduler_origins) == len(expected_origins) + assert { + ( + scheduled.visit_type, + scheduled.url, + scheduled.last_update, + ) + for scheduled in scheduler_origins + } == {("git", expected, None) for expected in expected_origins} diff --git a/swh/lister/julia/tests/test_tasks.py b/swh/lister/julia/tests/test_tasks.py new file mode 100644 index 00000000..e9e8b841 --- /dev/null +++ b/swh/lister/julia/tests/test_tasks.py @@ -0,0 +1,31 @@ +# Copyright (C) 2023 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from swh.lister.pattern import ListerStats + + +def test_julia_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker): + res = swh_scheduler_celery_app.send_task("swh.lister.julia.tasks.ping") + assert res + res.wait() + assert res.successful() + assert res.result == "OK" + + +def test_julia_lister(swh_scheduler_celery_app, swh_scheduler_celery_worker, mocker): + # setup the mocked JuliaLister + lister = mocker.patch("swh.lister.julia.tasks.JuliaLister") + lister.from_configfile.return_value = lister + stats = ListerStats(pages=42, origins=42) + lister.run.return_value = stats + + res = swh_scheduler_celery_app.send_task("swh.lister.julia.tasks.JuliaListerTask") + assert res + res.wait() + assert res.successful() + assert res.result == stats.dict() + + lister.from_configfile.assert_called_once_with() + lister.run.assert_called_once_with() -- GitLab