Skip to content
Snippets Groups Projects
Verified Commit 81688ca1 authored by Antoine R. Dumont's avatar Antoine R. Dumont
Browse files

nixguix: Use content-disposition from http head request if provided

As a last fallback after the content-type check, instead of raising immediately.

Related to T3781
parent 026fea21
No related branches found
Tags v4.1.0
No related merge requests found
...@@ -242,12 +242,33 @@ def is_tarball(urls: List[str], request: Optional[Any] = None) -> Tuple[bool, st ...@@ -242,12 +242,33 @@ def is_tarball(urls: List[str], request: Optional[Any] = None) -> Tuple[bool, st
url, url,
) )
origin = urls[0]
content_type = response.headers.get("Content-Type") content_type = response.headers.get("Content-Type")
if content_type: if content_type:
logger.debug("Content-Type: %s", content_type) logger.debug("Content-Type: %s", content_type)
if content_type == "application/json": if content_type == "application/json":
return False, urls[0] return False, origin
return content_type.startswith(POSSIBLE_TARBALL_MIMETYPES), urls[0] return content_type.startswith(POSSIBLE_TARBALL_MIMETYPES), origin
content_disposition = response.headers.get("Content-Disposition")
if content_disposition:
logger.debug("Content-Disposition: %s", content_disposition)
if "filename=" in content_disposition:
fields = content_disposition.split("; ")
for field in fields:
if "filename=" in field:
_, filename = field.split("filename=")
break
return (
url_endswith(
urlparse(filename),
TARBALL_EXTENSIONS,
raise_when_no_extension=False,
),
origin,
)
raise ArtifactNatureUndetected( raise ArtifactNatureUndetected(
f"Cannot determine artifact type from url <{url}>" f"Cannot determine artifact type from url <{url}>"
......
...@@ -272,6 +272,20 @@ ...@@ -272,6 +272,20 @@
"https://codeload.github.com/fifengine/fifechan/tar.gz/0.1.5" "https://codeload.github.com/fifengine/fifechan/tar.gz/0.1.5"
], ],
"integrity": "sha256-Kb5f9LN54vxPiO99i8FyNCEw3T53owYfZMinXv5OunM=" "integrity": "sha256-Kb5f9LN54vxPiO99i8FyNCEw3T53owYfZMinXv5OunM="
},
{
"type": "url",
"urls": [
"https://codeload.github.com/unknown-horizons/unknown-horizons/tar.gz/2019.1"
],
"integrity": "sha256-pBf9PTQiEv0ZDk8hvoLvE8EOHtfCiPu+RuRiAM895Ng="
},
{
"type": "url",
"urls": [
"https://codeload.github.com/fifengine/fifengine/tar.gz/0.4.2"
],
"integrity": "sha256-6IK1W++jauLxqJraFq8PgUobePfL5gIexbFgVgTPj/g="
} }
], ],
"version": "1", "version": "1",
......
...@@ -240,6 +240,19 @@ def test_lister_nixguix_ok(datadir, swh_scheduler, requests_mock): ...@@ -240,6 +240,19 @@ def test_lister_nixguix_ok(datadir, swh_scheduler, requests_mock):
"Content-Type": "application/x-gzip", "Content-Type": "application/x-gzip",
}, },
) )
requests_mock.head(
"https://codeload.github.com/unknown-horizons/unknown-horizons/tar.gz/2019.1",
headers={
"Content-Disposition": "attachment; filename=unknown-horizons-2019.1.tar.gz",
},
)
requests_mock.head(
"https://codeload.github.com/fifengine/fifengine/tar.gz/0.4.2",
headers={
"Content-Disposition": "attachment; name=fieldName; "
"filename=fifengine-0.4.2.tar.gz; other=stuff",
},
)
expected_visit_types = defaultdict(int) expected_visit_types = defaultdict(int)
# origin upstream is added as origin # origin upstream is added as origin
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment