From e8699422d757eb968779a9e44fc5017fb5a6dd97 Mon Sep 17 00:00:00 2001 From: Valentin Lorentz <vlorentz@softwareheritage.org> Date: Fri, 4 Nov 2022 13:50:25 +0100 Subject: [PATCH] nixguix: Reject Git SSH URLs and pseudo-URLs For consistency with Maven and Packagist listers --- swh/lister/nixguix/lister.py | 2 +- swh/lister/nixguix/tests/data/sources-failure.json | 10 ++++++++++ swh/lister/nixguix/tests/test_lister.py | 13 ++++++++++--- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/swh/lister/nixguix/lister.py b/swh/lister/nixguix/lister.py index 9ebe82ea..3e410aa2 100644 --- a/swh/lister/nixguix/lister.py +++ b/swh/lister/nixguix/lister.py @@ -402,7 +402,7 @@ class NixGuixLister(StatelessLister[PageResult]): urls = [] for url in origin_urls: urlparsed = urlparse(url) - if urlparsed.scheme == "": + if urlparsed.scheme == "" and not re.match(r"^\w+@[^/]+:", url): logger.warning("Missing scheme for <%s>: fallback to http", url) fixed_url = f"http://{url}" else: diff --git a/swh/lister/nixguix/tests/data/sources-failure.json b/swh/lister/nixguix/tests/data/sources-failure.json index 237a0186..86b34a8d 100644 --- a/swh/lister/nixguix/tests/data/sources-failure.json +++ b/swh/lister/nixguix/tests/data/sources-failure.json @@ -53,6 +53,16 @@ "urls": [ "unknown://example.org/wrong-scheme-so-skipped.txt" ], "integrity": "sha256-wAEswtkl3ulAw3zq4perrGS6Wlww5XXnQYsEAoYT9fI=" }, + { + "type": "url", + "urls": [ "ssh://git@example.org:wrong-scheme-so-skipped.txt" ], + "integrity": "sha256-wAEswtkl3ulAw3zq4perrGS6Wlww5XXnQYsEAoYT9fI=" + }, + { + "type": "url", + "urls": [ "git@example.org:git-pseudourl/so-skipped" ], + "integrity": "sha256-wAEswtkl3ulAw3zq4perrGS6Wlww5XXnQYsEAoYT9fI=" + }, { "type": "url", "urls": [ "https://code.9front.org/hg/plan9front" ], diff --git a/swh/lister/nixguix/tests/test_lister.py b/swh/lister/nixguix/tests/test_lister.py index fdb7210e..a00a5f61 100644 --- a/swh/lister/nixguix/tests/test_lister.py +++ b/swh/lister/nixguix/tests/test_lister.py @@ -353,13 +353,20 @@ def test_lister_nixguix_mostly_noop(datadir, swh_scheduler, requests_mock): ) listed_result = lister.run() - # only the origin upstream is listed, every other entries are unsupported or incomplete - assert listed_result == ListerStats(pages=1, origins=1) + expected_origins = ["https://github.com/NixOS/nixpkgs"] scheduler_origins = lister.scheduler.get_listed_origins( lister.lister_obj.id ).results - assert len(scheduler_origins) == 1 + scheduler_origin_urls = [orig.url for orig in scheduler_origins] + + assert scheduler_origin_urls == expected_origins + + # only the origin upstream is listed, every other entries are unsupported or incomplete + assert listed_result == ListerStats(pages=1, origins=1), ( + f"Expected origins: {' '.join(expected_origins)}, got: " + f"{' '.join(scheduler_origin_urls)}" + ) assert scheduler_origins[0].visit_type == "git" -- GitLab