diff --git a/swh/lister/nixguix/lister.py b/swh/lister/nixguix/lister.py index 3fb6f927a3c75d66e0efc9efe4fd317ca5e34490..e27a99bad003feb08bf6b04b6dd7d5496a5990ac 100644 --- a/swh/lister/nixguix/lister.py +++ b/swh/lister/nixguix/lister.py @@ -129,8 +129,7 @@ def is_tarball(urls: List[str], request: Optional[Any] = None) -> Tuple[bool, st url = urls[index] try: - is_tar = _is_tarball(url) - return is_tar, urls[0] + return _is_tarball(url), urls[0] except IndexError: if request is None: raise ArtifactNatureUndetected( @@ -285,15 +284,25 @@ class NixGuixLister(StatelessLister[PageResult]): ) elif artifact_type == "url": # It's either a tarball or a file - urls = artifact.get("urls") - if not urls: + origin_urls = artifact.get("urls") + if not origin_urls: # Nothing to fetch logger.warning("Skipping url <%s>: empty artifact", artifact) continue - assert urls is not None + assert origin_urls is not None + + # Deal with urls with empty scheme (basic fallback to http) + urls = [] + for url in origin_urls: + urlparsed = urlparse(url) + if urlparsed.scheme == "": + logger.warning("Missing scheme for <%s>, fallback to http", url) + fixed_url = f"http://{url}" + else: + fixed_url = url + urls.append(fixed_url) - # FIXME: T3294: Fix missing scheme in urls origin, *fallback_urls = urls integrity = artifact.get("integrity") diff --git a/swh/lister/nixguix/tests/data/nixpkgs-swh_sources.json b/swh/lister/nixguix/tests/data/nixpkgs-swh_sources.json index cde21852a0df39420000d77401db8da651994ed1..57e32f52227e5a587a878de90308cbe1ae61ecb1 100644 --- a/swh/lister/nixguix/tests/data/nixpkgs-swh_sources.json +++ b/swh/lister/nixguix/tests/data/nixpkgs-swh_sources.json @@ -29,6 +29,13 @@ ], "integrity": "sha256-bss09x9yOnuW+Q5BHHjf8nNcCNxCKMdl9/2/jKSFcrQ=" }, + { + "type": "url", + "urls": [ + "www.roudoudou.com/export/cpc/rasm/rasm_v0117_src.zip" + ], + "integrity": "sha256-wAEswtkl3ulAw3zq4perrGS6Wlww5XXnQYsEAoYT9fI=" + }, { "type": "url", "urls": [ diff --git a/swh/lister/nixguix/tests/test_lister.py b/swh/lister/nixguix/tests/test_lister.py index 34ba62f0af4b1f2996b275cc1fde1f5d32cd34ea..6ed4d1d2ee0504c4eb781be77d97cb7695ca13f5 100644 --- a/swh/lister/nixguix/tests/test_lister.py +++ b/swh/lister/nixguix/tests/test_lister.py @@ -146,7 +146,7 @@ def test_is_tarball_complex_with_content_type_result( assert origin == url -def test_lister_nixguix(datadir, swh_scheduler, requests_mock): +def test_lister_nixguix_ok(datadir, swh_scheduler, requests_mock): """NixGuixLister should list all origins per visit type""" url = "https://nix-community.github.io/nixpkgs-swh/sources-unstable.json" origin_upstream = "https://github.com/NixOS/nixpkgs"