diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py index a009316e1804f923fa42cf7818d680bbd70544f4..e4598eb3efde98f835c6672c564de70f76994a53 100644 --- a/swh/model/identifiers.py +++ b/swh/model/identifiers.py @@ -985,8 +985,18 @@ class QualifiedSWHID(_BaseSWHID[ObjectType]): ) def qualifiers(self) -> Dict[str, str]: + origin = self.origin + if origin: + unescaped_origin = origin + origin = origin.replace(";", "%3B") + assert urllib.parse.unquote_to_bytes( + origin + ) == urllib.parse.unquote_to_bytes( + unescaped_origin + ), "Escaping ';' in the origin qualifier corrupted the origin URL." + d: Dict[str, Optional[str]] = { - "origin": self.origin, + "origin": origin, "visit": str(self.visit) if self.visit else None, "anchor": str(self.anchor) if self.anchor else None, "path": ( diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py index 6d1a7540d522fc1292a422b53b3b41d8e0f71ce7..49d6833a05d28c4a4af0763791925c8b3fa98354 100644 --- a/swh/model/tests/test_identifiers.py +++ b/swh/model/tests/test_identifiers.py @@ -1452,6 +1452,14 @@ QUALIFIED_SWHIDS = [ origin="https://github.com/python/cpython", ), ), + ( + f"swh:1:cnt:{HASH};origin=https://example.org/foo%3Bbar%25baz", + QualifiedSWHID( + object_type=ObjectType.CONTENT, + object_id=_x(HASH), + origin="https://example.org/foo%3Bbar%25baz", + ), + ), # visit: ( f"swh:1:cnt:{HASH};visit=swh:1:snp:{HASH}", @@ -1526,7 +1534,7 @@ QUALIFIED_SWHIDS = [ @pytest.mark.parametrize("string,parsed", QUALIFIED_SWHIDS) -def test_QualifiedSWHID_parse_qualifiers(string, parsed): +def test_QualifiedSWHID_parse_serialize_qualifiers(string, parsed): """Tests parsing and serializing valid SWHIDs with the various SWHID classes.""" if parsed is None: with pytest.raises(ValidationError): @@ -1536,6 +1544,17 @@ def test_QualifiedSWHID_parse_qualifiers(string, parsed): assert str(parsed) == string +def test_QualifiedSWHID_serialize_origin(): + """Checks that semicolon in origins are escaped.""" + string = f"swh:1:cnt:{HASH};origin=https://example.org/foo%3Bbar%25baz" + swhid = QualifiedSWHID( + object_type=ObjectType.CONTENT, + object_id=_x(HASH), + origin="https://example.org/foo;bar%25baz", + ) + assert str(swhid) == string + + def test_QualifiedSWHID_attributes(): """Checks the set of QualifiedSWHID attributes match the SWHID_QUALIFIERS constant."""