From fdeb086f7743498744ca2aa95a5c1c1e658c811e Mon Sep 17 00:00:00 2001 From: Antoine Lambert <anlambert@softwareheritage.org> Date: Tue, 12 Mar 2024 17:53:15 +0100 Subject: [PATCH] nixguix: Handle creation of svn-export visit types on svn sub-trees Some Guix packages correspond to subset exports of a subversion source tree at a given revision, typically the Tex Live ones. In that case, we must pass an extra parameter to the svn-export loader to specify the sub-paths to export but also use a unique origin URL for each package to archive as otherwise the same one would be used and only a single package would be archived. Related to swh/infra/sysadm-environment#5263. --- swh/lister/nixguix/lister.py | 19 +++++++- .../nixguix/tests/data/sources-texlive.json | 44 +++++++++++++++++++ swh/lister/nixguix/tests/test_lister.py | 34 ++++++++++++++ 3 files changed, 96 insertions(+), 1 deletion(-) create mode 100644 swh/lister/nixguix/tests/data/sources-texlive.json diff --git a/swh/lister/nixguix/lister.py b/swh/lister/nixguix/lister.py index f20c9020..89bf4a84 100644 --- a/swh/lister/nixguix/lister.py +++ b/swh/lister/nixguix/lister.py @@ -113,6 +113,9 @@ class Artifact: """Optional reference on the artifact (git commit, branch, svn commit, tag, ...)""" submodules: bool """Indicates if submodules should be retrieved for a git-checkout visit type""" + svn_paths: Optional[List[str]] + """Optional list of paths for the svn-export loader, only those will be exported + and loaded into the archive""" @dataclass @@ -472,14 +475,23 @@ class NixGuixLister(StatelessLister[PageResult]): if not checksums: continue + origin_url = plain_url + svn_paths = artifact.get("svn_files") + if svn_paths: + # as multiple svn-export visit types can use the same base svn URL + # we modify the origin URL to ensure it is unique by appending the + # NAR hash value as a query parameter + origin_url += f"?nar={integrity}" + yield ArtifactType.ARTIFACT, Artifact( - origin=plain_url, + origin=origin_url, fallback_urls=[], checksums=checksums, checksum_layout=MAPPING_CHECKSUM_LAYOUT[outputHashMode], visit_type=VCS_ARTIFACT_TYPE_TO_VISIT_TYPE[artifact_type], ref=plain_ref, submodules=artifact.get("submodule", False), + svn_paths=svn_paths, ) elif artifact_type == "url": @@ -614,6 +626,7 @@ class NixGuixLister(StatelessLister[PageResult]): visit_type="tarball-directory" if is_tar else "content", ref=None, submodules=False, + svn_paths=None, ) else: logger.warning( @@ -644,6 +657,10 @@ class NixGuixLister(StatelessLister[PageResult]): loader_arguments["ref"] = artifact.ref if artifact.submodules: loader_arguments["submodules"] = artifact.submodules + if artifact.svn_paths: + # extract the base svn url from the modified origin URL (see get_pages method) + loader_arguments["svn_url"] = artifact.origin.rsplit("?", maxsplit=1)[0] + loader_arguments["svn_paths"] = artifact.svn_paths yield ListedOrigin( lister_id=self.lister_obj.id, url=artifact.origin, diff --git a/swh/lister/nixguix/tests/data/sources-texlive.json b/swh/lister/nixguix/tests/data/sources-texlive.json new file mode 100644 index 00000000..4894db78 --- /dev/null +++ b/swh/lister/nixguix/tests/data/sources-texlive.json @@ -0,0 +1,44 @@ +{ + "sources": [ + { + "type": "svn", + "svn_url": "svn://www.tug.org/texlive/tags/texlive-2023.0/Master/texmf-dist/", + "svn_files": [ + "bibtex/bib/oberdiek/", + "doc/latex/oberdiek/", + "source/latex/oberdiek/", + "tex/generic/oberdiek/", + "tex/latex/oberdiek/" + ], + "integrity": "sha256-n9ZrKjR0JYOsbFtKby7UWykYjVY0f1hgInyR3DNbpro=", + "outputHashAlgo": "sha256", + "outputHashMode": "recursive", + "svn_revision": 66594 + }, + { + "type": "svn", + "svn_url": "svn://www.tug.org/texlive/tags/texlive-2023.0/Master/texmf-dist/", + "svn_files": [ + "fonts/source/public/knuth-lib/", + "fonts/tfm/public/knuth-lib/", + "tex/generic/knuth-lib/", + "tex/plain/knuth-lib/" + ], + "integrity": "sha256-it3vOYZ4VrsXcBY6QSeIuHsNcoQkZsWP7aYaC8j4iDY=", + "outputHashAlgo": "sha256", + "outputHashMode": "recursive", + "svn_revision": 66594 + }, + { + "type": "svn", + "svn_url": "svn://www.tug.org/texlive/tags/texlive-2023.0/Master/texmf-dist/", + "svn_files": [ + "doc/latex/etdipa/" + ], + "integrity": "sha256-H25frh/nt438g8lsUCcfNuytvPrBrkYIpEEpqq4q48o=", + "outputHashAlgo": "sha256", + "outputHashMode": "recursive", + "svn_revision": 66594 + } + ] +} \ No newline at end of file diff --git a/swh/lister/nixguix/tests/test_lister.py b/swh/lister/nixguix/tests/test_lister.py index 8dab20f2..180fe4fe 100644 --- a/swh/lister/nixguix/tests/test_lister.py +++ b/swh/lister/nixguix/tests/test_lister.py @@ -457,3 +457,37 @@ def test_lister_nixguix_fail(datadir, swh_scheduler, requests_mock): scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results assert len(scheduler_origins) == 0 + + +def test_lister_nixguix_svn_export_sub_trees(datadir, swh_scheduler, requests_mock): + """NixGuixLister should handle svn-export visit types exporting a subset of + a subversion source tree (e.g. Tex Live packages for Guix)""" + url = SOURCES["guix"]["manifest"] + origin_upstream = SOURCES["guix"]["repo"] + lister = NixGuixLister(swh_scheduler, url=url, origin_upstream=origin_upstream) + + response = page_response(datadir, "texlive") + requests_mock.get(url, [{"json": response}]) + + listed_result = lister.run() + + assert listed_result == ListerStats(pages=7, origins=5) + + scheduler_origins = { + origin.url: origin + for origin in lister.scheduler.get_listed_origins(lister.lister_obj.id).results + } + + for source in response["sources"]: + svn_url = source["svn_url"] + origin_url = f"{source['svn_url']}?nar={source['integrity']}" + assert origin_url in scheduler_origins + assert "svn_url" in scheduler_origins[origin_url].extra_loader_arguments + assert ( + scheduler_origins[origin_url].extra_loader_arguments["svn_url"] == svn_url + ) + assert "svn_paths" in scheduler_origins[origin_url].extra_loader_arguments + assert ( + scheduler_origins[origin_url].extra_loader_arguments["svn_paths"] + == source["svn_files"] + ) -- GitLab