diff --git a/swh/lister/nixguix/lister.py b/swh/lister/nixguix/lister.py index f20c90207a473ac6bb164b6f4ec9f6c8d0bbeb8b..89bf4a84929386c8be583b765370ca104699f636 100644 --- a/swh/lister/nixguix/lister.py +++ b/swh/lister/nixguix/lister.py @@ -113,6 +113,9 @@ class Artifact: """Optional reference on the artifact (git commit, branch, svn commit, tag, ...)""" submodules: bool """Indicates if submodules should be retrieved for a git-checkout visit type""" + svn_paths: Optional[List[str]] + """Optional list of paths for the svn-export loader, only those will be exported + and loaded into the archive""" @dataclass @@ -472,14 +475,23 @@ class NixGuixLister(StatelessLister[PageResult]): if not checksums: continue + origin_url = plain_url + svn_paths = artifact.get("svn_files") + if svn_paths: + # as multiple svn-export visit types can use the same base svn URL + # we modify the origin URL to ensure it is unique by appending the + # NAR hash value as a query parameter + origin_url += f"?nar={integrity}" + yield ArtifactType.ARTIFACT, Artifact( - origin=plain_url, + origin=origin_url, fallback_urls=[], checksums=checksums, checksum_layout=MAPPING_CHECKSUM_LAYOUT[outputHashMode], visit_type=VCS_ARTIFACT_TYPE_TO_VISIT_TYPE[artifact_type], ref=plain_ref, submodules=artifact.get("submodule", False), + svn_paths=svn_paths, ) elif artifact_type == "url": @@ -614,6 +626,7 @@ class NixGuixLister(StatelessLister[PageResult]): visit_type="tarball-directory" if is_tar else "content", ref=None, submodules=False, + svn_paths=None, ) else: logger.warning( @@ -644,6 +657,10 @@ class NixGuixLister(StatelessLister[PageResult]): loader_arguments["ref"] = artifact.ref if artifact.submodules: loader_arguments["submodules"] = artifact.submodules + if artifact.svn_paths: + # extract the base svn url from the modified origin URL (see get_pages method) + loader_arguments["svn_url"] = artifact.origin.rsplit("?", maxsplit=1)[0] + loader_arguments["svn_paths"] = artifact.svn_paths yield ListedOrigin( lister_id=self.lister_obj.id, url=artifact.origin, diff --git a/swh/lister/nixguix/tests/data/sources-texlive.json b/swh/lister/nixguix/tests/data/sources-texlive.json new file mode 100644 index 0000000000000000000000000000000000000000..4894db784312a9b478a18d931787a620a201e655 --- /dev/null +++ b/swh/lister/nixguix/tests/data/sources-texlive.json @@ -0,0 +1,44 @@ +{ + "sources": [ + { + "type": "svn", + "svn_url": "svn://www.tug.org/texlive/tags/texlive-2023.0/Master/texmf-dist/", + "svn_files": [ + "bibtex/bib/oberdiek/", + "doc/latex/oberdiek/", + "source/latex/oberdiek/", + "tex/generic/oberdiek/", + "tex/latex/oberdiek/" + ], + "integrity": "sha256-n9ZrKjR0JYOsbFtKby7UWykYjVY0f1hgInyR3DNbpro=", + "outputHashAlgo": "sha256", + "outputHashMode": "recursive", + "svn_revision": 66594 + }, + { + "type": "svn", + "svn_url": "svn://www.tug.org/texlive/tags/texlive-2023.0/Master/texmf-dist/", + "svn_files": [ + "fonts/source/public/knuth-lib/", + "fonts/tfm/public/knuth-lib/", + "tex/generic/knuth-lib/", + "tex/plain/knuth-lib/" + ], + "integrity": "sha256-it3vOYZ4VrsXcBY6QSeIuHsNcoQkZsWP7aYaC8j4iDY=", + "outputHashAlgo": "sha256", + "outputHashMode": "recursive", + "svn_revision": 66594 + }, + { + "type": "svn", + "svn_url": "svn://www.tug.org/texlive/tags/texlive-2023.0/Master/texmf-dist/", + "svn_files": [ + "doc/latex/etdipa/" + ], + "integrity": "sha256-H25frh/nt438g8lsUCcfNuytvPrBrkYIpEEpqq4q48o=", + "outputHashAlgo": "sha256", + "outputHashMode": "recursive", + "svn_revision": 66594 + } + ] +} \ No newline at end of file diff --git a/swh/lister/nixguix/tests/test_lister.py b/swh/lister/nixguix/tests/test_lister.py index 8dab20f217f9590f21a7a3a9108ed4c637abe4ee..180fe4fe54b2c6b7ab454e2fe044e4f15e8ffced 100644 --- a/swh/lister/nixguix/tests/test_lister.py +++ b/swh/lister/nixguix/tests/test_lister.py @@ -457,3 +457,37 @@ def test_lister_nixguix_fail(datadir, swh_scheduler, requests_mock): scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results assert len(scheduler_origins) == 0 + + +def test_lister_nixguix_svn_export_sub_trees(datadir, swh_scheduler, requests_mock): + """NixGuixLister should handle svn-export visit types exporting a subset of + a subversion source tree (e.g. Tex Live packages for Guix)""" + url = SOURCES["guix"]["manifest"] + origin_upstream = SOURCES["guix"]["repo"] + lister = NixGuixLister(swh_scheduler, url=url, origin_upstream=origin_upstream) + + response = page_response(datadir, "texlive") + requests_mock.get(url, [{"json": response}]) + + listed_result = lister.run() + + assert listed_result == ListerStats(pages=7, origins=5) + + scheduler_origins = { + origin.url: origin + for origin in lister.scheduler.get_listed_origins(lister.lister_obj.id).results + } + + for source in response["sources"]: + svn_url = source["svn_url"] + origin_url = f"{source['svn_url']}?nar={source['integrity']}" + assert origin_url in scheduler_origins + assert "svn_url" in scheduler_origins[origin_url].extra_loader_arguments + assert ( + scheduler_origins[origin_url].extra_loader_arguments["svn_url"] == svn_url + ) + assert "svn_paths" in scheduler_origins[origin_url].extra_loader_arguments + assert ( + scheduler_origins[origin_url].extra_loader_arguments["svn_paths"] + == source["svn_files"] + )