diff --git a/PKG-INFO b/PKG-INFO index 94a9185df5647d74c1f5238c6806e78f4ef7c63c..6922571d85582fde7a703d8af0ed5a82512e17f8 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: swh.model -Version: 0.13.0 +Version: 1.0.0 Summary: Software Heritage data model Home-page: https://forge.softwareheritage.org/diffusion/DMOD/ Author: Software Heritage developers diff --git a/swh.model.egg-info/PKG-INFO b/swh.model.egg-info/PKG-INFO index 94a9185df5647d74c1f5238c6806e78f4ef7c63c..6922571d85582fde7a703d8af0ed5a82512e17f8 100644 --- a/swh.model.egg-info/PKG-INFO +++ b/swh.model.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: swh.model -Version: 0.13.0 +Version: 1.0.0 Summary: Software Heritage data model Home-page: https://forge.softwareheritage.org/diffusion/DMOD/ Author: Software Heritage developers diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py index e4598eb3efde98f835c6672c564de70f76994a53..44daee52915162b633962335652caf79d803182f 100644 --- a/swh/model/identifiers.py +++ b/swh/model/identifiers.py @@ -857,6 +857,17 @@ class CoreSWHID(_BaseSWHID[ObjectType]): ) """the type of object the identifier points to""" + def to_extended(self) -> ExtendedSWHID: + """Converts this CoreSWHID into an ExtendedSWHID. + + As ExtendedSWHID is a superset of CoreSWHID, this is lossless.""" + return ExtendedSWHID( + namespace=self.namespace, + scheme_version=self.scheme_version, + object_type=ExtendedObjectType(self.object_type.value), + object_id=self.object_id, + ) + def _parse_core_swhid(swhid: Union[str, CoreSWHID, None]) -> Optional[CoreSWHID]: if swhid is None or isinstance(swhid, CoreSWHID): diff --git a/swh/model/model.py b/swh/model/model.py index 10b39f208c37f278b68bca42d746b15f9d89c096..d111194799169c2b9a17712094436c0670ad130a 100644 --- a/swh/model/model.py +++ b/swh/model/model.py @@ -18,14 +18,17 @@ from typing_extensions import Final from .collections import ImmutableDict from .hashutil import DEFAULT_ALGORITHMS, MultiHash, hash_to_bytes from .identifiers import ( - SWHID, directory_identifier, normalize_timestamp, - parse_swhid, + origin_identifier, release_identifier, revision_identifier, snapshot_identifier, ) +from .identifiers import CoreSWHID +from .identifiers import ExtendedObjectType as SwhidExtendedObjectType +from .identifiers import ExtendedSWHID +from .identifiers import ObjectType as SwhidObjectType class MissingData(Exception): @@ -63,7 +66,7 @@ def dictify(value): "Helper function used by BaseModel.to_dict()" if isinstance(value, BaseModel): return value.to_dict() - elif isinstance(value, SWHID): + elif isinstance(value, (CoreSWHID, ExtendedSWHID)): return str(value) elif isinstance(value, Enum): return value.value @@ -274,6 +277,13 @@ class Origin(BaseModel): def unique_key(self) -> KeyType: return {"url": self.url} + def swhid(self) -> ExtendedSWHID: + """Returns a SWHID representing this origin.""" + return ExtendedSWHID( + object_type=SwhidExtendedObjectType.ORIGIN, + object_id=hash_to_bytes(origin_identifier(self.unique_key())), + ) + @attr.s(frozen=True, slots=True) class OriginVisit(BaseModel): @@ -415,6 +425,10 @@ class Snapshot(HashableObject, BaseModel): **d, ) + def swhid(self) -> CoreSWHID: + """Returns a SWHID representing this object.""" + return CoreSWHID(object_type=SwhidObjectType.SNAPSHOT, object_id=self.id) + @attr.s(frozen=True, slots=True) class Release(HashableObject, BaseModel): @@ -461,6 +475,10 @@ class Release(HashableObject, BaseModel): d["date"] = TimestampWithTimezone.from_dict(d["date"]) return cls(target_type=ObjectType(d.pop("target_type")), **d) + def swhid(self) -> CoreSWHID: + """Returns a SWHID representing this object.""" + return CoreSWHID(object_type=SwhidObjectType.RELEASE, object_id=self.id) + def anonymize(self) -> "Release": """Returns an anonymized version of the Release object. @@ -549,6 +567,10 @@ class Revision(HashableObject, BaseModel): **d, ) + def swhid(self) -> CoreSWHID: + """Returns a SWHID representing this object.""" + return CoreSWHID(object_type=SwhidObjectType.REVISION, object_id=self.id) + def anonymize(self) -> "Revision": """Returns an anonymized version of the Revision object. @@ -591,6 +613,10 @@ class Directory(HashableObject, BaseModel): **d, ) + def swhid(self) -> CoreSWHID: + """Returns a SWHID representing this object.""" + return CoreSWHID(object_type=SwhidObjectType.DIRECTORY, object_id=self.id) + @attr.s(frozen=True, slots=True) class BaseContent(BaseModel): @@ -706,6 +732,10 @@ class Content(BaseContent): def unique_key(self) -> KeyType: return self.sha1 # TODO: use a dict of hashes + def swhid(self) -> CoreSWHID: + """Returns a SWHID representing this object.""" + return CoreSWHID(object_type=SwhidObjectType.CONTENT, object_id=self.sha1_git) + @attr.s(frozen=True, slots=True) class SkippedContent(BaseContent): @@ -850,25 +880,12 @@ class MetadataFetcher(BaseModel): return {"name": self.name, "version": self.version} -class MetadataTargetType(Enum): - """The type of object extrinsic metadata refer to.""" - - CONTENT = "content" - DIRECTORY = "directory" - REVISION = "revision" - RELEASE = "release" - SNAPSHOT = "snapshot" - ORIGIN = "origin" - - @attr.s(frozen=True, slots=True) class RawExtrinsicMetadata(BaseModel): object_type: Final = "raw_extrinsic_metadata" # target object - type = attr.ib(type=MetadataTargetType, validator=type_validator()) - target = attr.ib(type=Union[str, SWHID], validator=type_validator()) - """URL if type=MetadataTargetType.ORIGIN, else core SWHID""" + target = attr.ib(type=ExtendedSWHID, validator=type_validator()) # source discovery_date = attr.ib(type=datetime.datetime, validator=type_validator()) @@ -882,21 +899,19 @@ class RawExtrinsicMetadata(BaseModel): # context origin = attr.ib(type=Optional[str], default=None, validator=type_validator()) visit = attr.ib(type=Optional[int], default=None, validator=type_validator()) - snapshot = attr.ib(type=Optional[SWHID], default=None, validator=type_validator()) - release = attr.ib(type=Optional[SWHID], default=None, validator=type_validator()) - revision = attr.ib(type=Optional[SWHID], default=None, validator=type_validator()) + snapshot = attr.ib( + type=Optional[CoreSWHID], default=None, validator=type_validator() + ) + release = attr.ib( + type=Optional[CoreSWHID], default=None, validator=type_validator() + ) + revision = attr.ib( + type=Optional[CoreSWHID], default=None, validator=type_validator() + ) path = attr.ib(type=Optional[bytes], default=None, validator=type_validator()) - directory = attr.ib(type=Optional[SWHID], default=None, validator=type_validator()) - - @target.validator - def check_target(self, attribute, value): - if self.type == MetadataTargetType.ORIGIN: - if isinstance(value, SWHID) or value.startswith("swh:"): - raise ValueError( - "Got SWHID as target for origin metadata (expected an URL)." - ) - else: - self._check_swhid(self.type.value, value) + directory = attr.ib( + type=Optional[CoreSWHID], default=None, validator=type_validator() + ) @discovery_date.validator def check_discovery_date(self, attribute, value): @@ -909,15 +924,16 @@ class RawExtrinsicMetadata(BaseModel): if value is None: return - if self.type not in ( - MetadataTargetType.SNAPSHOT, - MetadataTargetType.RELEASE, - MetadataTargetType.REVISION, - MetadataTargetType.DIRECTORY, - MetadataTargetType.CONTENT, + if self.target.object_type not in ( + SwhidExtendedObjectType.SNAPSHOT, + SwhidExtendedObjectType.RELEASE, + SwhidExtendedObjectType.REVISION, + SwhidExtendedObjectType.DIRECTORY, + SwhidExtendedObjectType.CONTENT, ): raise ValueError( - f"Unexpected 'origin' context for {self.type.value} object: {value}" + f"Unexpected 'origin' context for " + f"{self.target.object_type.name.lower()} object: {value}" ) if value.startswith("swh:"): @@ -933,15 +949,16 @@ class RawExtrinsicMetadata(BaseModel): if value is None: return - if self.type not in ( - MetadataTargetType.SNAPSHOT, - MetadataTargetType.RELEASE, - MetadataTargetType.REVISION, - MetadataTargetType.DIRECTORY, - MetadataTargetType.CONTENT, + if self.target.object_type not in ( + SwhidExtendedObjectType.SNAPSHOT, + SwhidExtendedObjectType.RELEASE, + SwhidExtendedObjectType.REVISION, + SwhidExtendedObjectType.DIRECTORY, + SwhidExtendedObjectType.CONTENT, ): raise ValueError( - f"Unexpected 'visit' context for {self.type.value} object: {value}" + f"Unexpected 'visit' context for " + f"{self.target.object_type.name.lower()} object: {value}" ) if self.origin is None: @@ -955,54 +972,64 @@ class RawExtrinsicMetadata(BaseModel): if value is None: return - if self.type not in ( - MetadataTargetType.RELEASE, - MetadataTargetType.REVISION, - MetadataTargetType.DIRECTORY, - MetadataTargetType.CONTENT, + if self.target.object_type not in ( + SwhidExtendedObjectType.RELEASE, + SwhidExtendedObjectType.REVISION, + SwhidExtendedObjectType.DIRECTORY, + SwhidExtendedObjectType.CONTENT, ): raise ValueError( - f"Unexpected 'snapshot' context for {self.type.value} object: {value}" + f"Unexpected 'snapshot' context for " + f"{self.target.object_type.name.lower()} object: {value}" ) - self._check_swhid("snapshot", value) + self._check_swhid(SwhidObjectType.SNAPSHOT, value) @release.validator def check_release(self, attribute, value): if value is None: return - if self.type not in ( - MetadataTargetType.REVISION, - MetadataTargetType.DIRECTORY, - MetadataTargetType.CONTENT, + if self.target.object_type not in ( + SwhidExtendedObjectType.REVISION, + SwhidExtendedObjectType.DIRECTORY, + SwhidExtendedObjectType.CONTENT, ): raise ValueError( - f"Unexpected 'release' context for {self.type.value} object: {value}" + f"Unexpected 'release' context for " + f"{self.target.object_type.name.lower()} object: {value}" ) - self._check_swhid("release", value) + self._check_swhid(SwhidObjectType.RELEASE, value) @revision.validator def check_revision(self, attribute, value): if value is None: return - if self.type not in (MetadataTargetType.DIRECTORY, MetadataTargetType.CONTENT,): + if self.target.object_type not in ( + SwhidExtendedObjectType.DIRECTORY, + SwhidExtendedObjectType.CONTENT, + ): raise ValueError( - f"Unexpected 'revision' context for {self.type.value} object: {value}" + f"Unexpected 'revision' context for " + f"{self.target.object_type.name.lower()} object: {value}" ) - self._check_swhid("revision", value) + self._check_swhid(SwhidObjectType.REVISION, value) @path.validator def check_path(self, attribute, value): if value is None: return - if self.type not in (MetadataTargetType.DIRECTORY, MetadataTargetType.CONTENT,): + if self.target.object_type not in ( + SwhidExtendedObjectType.DIRECTORY, + SwhidExtendedObjectType.CONTENT, + ): raise ValueError( - f"Unexpected 'path' context for {self.type.value} object: {value}" + f"Unexpected 'path' context for " + f"{self.target.object_type.name.lower()} object: {value}" ) @directory.validator @@ -1010,12 +1037,13 @@ class RawExtrinsicMetadata(BaseModel): if value is None: return - if self.type not in (MetadataTargetType.CONTENT,): + if self.target.object_type not in (SwhidExtendedObjectType.CONTENT,): raise ValueError( - f"Unexpected 'directory' context for {self.type.value} object: {value}" + f"Unexpected 'directory' context for " + f"{self.target.object_type.name.lower()} object: {value}" ) - self._check_swhid("directory", value) + self._check_swhid(SwhidObjectType.DIRECTORY, value) def _check_swhid(self, expected_object_type, swhid): if isinstance(swhid, str): @@ -1023,13 +1051,10 @@ class RawExtrinsicMetadata(BaseModel): if swhid.object_type != expected_object_type: raise ValueError( - f"Expected SWHID type '{expected_object_type}', " - f"got '{swhid.object_type}' in {swhid}" + f"Expected SWHID type '{expected_object_type.name.lower()}', " + f"got '{swhid.object_type.name.lower()}' in {swhid}" ) - if swhid.metadata: - raise ValueError(f"Expected core SWHID, but got: {swhid}") - def to_dict(self): d = super().to_dict() @@ -1051,24 +1076,20 @@ class RawExtrinsicMetadata(BaseModel): def from_dict(cls, d): d = { **d, - "type": MetadataTargetType(d["type"]), + "target": ExtendedSWHID.from_string(d["target"]), "authority": MetadataAuthority.from_dict(d["authority"]), "fetcher": MetadataFetcher.from_dict(d["fetcher"]), } - if d["type"] != MetadataTargetType.ORIGIN: - d["target"] = parse_swhid(d["target"]) - swhid_keys = ("snapshot", "release", "revision", "directory") for swhid_key in swhid_keys: if d.get(swhid_key): - d[swhid_key] = parse_swhid(d[swhid_key]) + d[swhid_key] = CoreSWHID.from_string(d[swhid_key]) return super().from_dict(d) def unique_key(self) -> KeyType: return { - "type": self.type.value, "target": str(self.target), "authority_type": self.authority.type.value, "authority_url": self.authority.url, diff --git a/swh/model/tests/swh_model_data.py b/swh/model/tests/swh_model_data.py index 8f49709606de28f66abad0225ea1f3d9b7db595f..ae6f9635e8013ead488c5d489772751dabc5e458 100644 --- a/swh/model/tests/swh_model_data.py +++ b/swh/model/tests/swh_model_data.py @@ -8,8 +8,8 @@ from typing import Dict, Sequence import attr -from swh.model.hashutil import MultiHash, hash_to_bytes, hash_to_hex -from swh.model.identifiers import SWHID +from swh.model.hashutil import MultiHash, hash_to_bytes +from swh.model.identifiers import ExtendedSWHID from swh.model.model import ( BaseModel, Content, @@ -18,7 +18,6 @@ from swh.model.model import ( MetadataAuthority, MetadataAuthorityType, MetadataFetcher, - MetadataTargetType, ObjectType, Origin, OriginVisit, @@ -310,8 +309,7 @@ METADATA_FETCHERS = [ RAW_EXTRINSIC_METADATA = [ RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target="http://example.org/foo.git", + target=Origin("http://example.org/foo.git").swhid(), discovery_date=datetime.datetime(2020, 7, 30, 17, 8, 20, tzinfo=UTC), authority=attr.evolve(METADATA_AUTHORITIES[0], metadata=None), fetcher=attr.evolve(METADATA_FETCHERS[0], metadata=None), @@ -319,10 +317,7 @@ RAW_EXTRINSIC_METADATA = [ metadata=b'{"foo": "bar"}', ), RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, - target=SWHID( - object_type="content", object_id=hash_to_hex(CONTENTS[0].sha1_git) - ), + target=ExtendedSWHID.from_string(str(CONTENTS[0].swhid())), discovery_date=datetime.datetime(2020, 7, 30, 17, 8, 20, tzinfo=UTC), authority=attr.evolve(METADATA_AUTHORITIES[0], metadata=None), fetcher=attr.evolve(METADATA_FETCHERS[0], metadata=None), diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py index 93d075c60fc2d3168225444af3e62351080a1948..a3a5295f5bca6bdd6c70675cb9b9add40f542ea3 100644 --- a/swh/model/tests/test_identifiers.py +++ b/swh/model/tests/test_identifiers.py @@ -108,21 +108,22 @@ class UtilityFunctionsDateOffset(unittest.TestCase): self.assertEqual(identifiers.format_offset(offset), res) +content_example = { + "status": "visible", + "length": 5, + "data": b"1984\n", + "ctime": datetime.datetime(2015, 11, 22, 16, 33, 56, tzinfo=datetime.timezone.utc), +} + + class ContentIdentifier(unittest.TestCase): def setUp(self): - self.content = { - "status": "visible", - "length": 5, - "data": b"1984\n", - "ctime": datetime.datetime( - 2015, 11, 22, 16, 33, 56, tzinfo=datetime.timezone.utc - ), - } - - self.content_id = hashutil.MultiHash.from_data(self.content["data"]).digest() + self.content_id = hashutil.MultiHash.from_data(content_example["data"]).digest() def test_content_identifier(self): - self.assertEqual(identifiers.content_identifier(self.content), self.content_id) + self.assertEqual( + identifiers.content_identifier(content_example), self.content_id + ) directory_example = { @@ -772,15 +773,15 @@ class SnapshotIdentifier(unittest.TestCase): ) -class OriginIdentifier(unittest.TestCase): - def setUp(self): - self.origin = { - "url": "https://github.com/torvalds/linux", - } +origin_example = { + "url": "https://github.com/torvalds/linux", +} + +class OriginIdentifier(unittest.TestCase): def test_content_identifier(self): self.assertEqual( - identifiers.origin_identifier(self.origin), + identifiers.origin_identifier(origin_example), "b63a575fe3faab7692c9f38fb09d4bb45651bb0f", ) @@ -1367,6 +1368,18 @@ def test_parse_unparse_swhids(string, core, qualified, extended): assert string == str(parsed_swhid) +@pytest.mark.parametrize( + "core,extended", + [ + pytest.param(core, extended, id=string) + for (string, core, qualified, extended) in VALID_SWHIDS + if core is not None + ], +) +def test_core_to_extended(core, extended): + assert core.to_extended() == extended + + @pytest.mark.parametrize( "ns,version,type,id,qualifiers", [ diff --git a/swh/model/tests/test_model.py b/swh/model/tests/test_model.py index 393dcfd8c8a4243f39c6989875c68f32092065bb..bb554636808e158bdee35fdc9ef2095b8fa7b174 100644 --- a/swh/model/tests/test_model.py +++ b/swh/model/tests/test_model.py @@ -12,12 +12,15 @@ from hypothesis import given from hypothesis.strategies import binary import pytest -from swh.model.hashutil import MultiHash, hash_to_bytes +from swh.model.hashutil import MultiHash, hash_to_bytes, hash_to_hex import swh.model.hypothesis_strategies as strategies from swh.model.identifiers import ( - SWHID, + CoreSWHID, + ExtendedSWHID, + ObjectType, + content_identifier, directory_identifier, - parse_swhid, + origin_identifier, release_identifier, revision_identifier, snapshot_identifier, @@ -29,7 +32,6 @@ from swh.model.model import ( MetadataAuthority, MetadataAuthorityType, MetadataFetcher, - MetadataTargetType, MissingData, Origin, OriginVisit, @@ -44,12 +46,16 @@ from swh.model.model import ( TimestampWithTimezone, ) from swh.model.tests.test_identifiers import ( + content_example, directory_example, + origin_example, release_example, revision_example, snapshot_example, ) +EXAMPLE_HASH = hash_to_bytes("94a9ed024d3859793618152ea559a168bbcbb5e2") + @given(strategies.objects()) def test_todict_inverse_fromdict(objtype_and_obj): @@ -702,22 +708,34 @@ def test_revision_extra_headers_as_lists_from_dict(): # ID computation +def test_content_model_id_computation(): + cnt_dict = content_example.copy() + + cnt_id_str = hash_to_hex(content_identifier(cnt_dict)["sha1_git"]) + cnt_model = Content.from_data(cnt_dict["data"]) + assert str(cnt_model.swhid()) == "swh:1:cnt:" + cnt_id_str + + def test_directory_model_id_computation(): dir_dict = directory_example.copy() del dir_dict["id"] - dir_id = hash_to_bytes(directory_identifier(dir_dict)) + dir_id_str = directory_identifier(dir_dict) + dir_id = hash_to_bytes(dir_id_str) dir_model = Directory.from_dict(dir_dict) assert dir_model.id == dir_id + assert str(dir_model.swhid()) == "swh:1:dir:" + dir_id_str def test_revision_model_id_computation(): rev_dict = revision_example.copy() del rev_dict["id"] - rev_id = hash_to_bytes(revision_identifier(rev_dict)) + rev_id_str = revision_identifier(rev_dict) + rev_id = hash_to_bytes(rev_id_str) rev_model = Revision.from_dict(rev_dict) assert rev_model.id == rev_id + assert str(rev_model.swhid()) == "swh:1:rev:" + rev_id_str def test_revision_model_id_computation_with_no_date(): @@ -740,19 +758,31 @@ def test_release_model_id_computation(): rel_dict = release_example.copy() del rel_dict["id"] - rel_id = hash_to_bytes(release_identifier(rel_dict)) + rel_id_str = release_identifier(rel_dict) + rel_id = hash_to_bytes(rel_id_str) rel_model = Release.from_dict(rel_dict) assert isinstance(rel_model.date, TimestampWithTimezone) assert rel_model.id == hash_to_bytes(rel_id) + assert str(rel_model.swhid()) == "swh:1:rel:" + rel_id_str def test_snapshot_model_id_computation(): snp_dict = snapshot_example.copy() del snp_dict["id"] - snp_id = hash_to_bytes(snapshot_identifier(snp_dict)) + snp_id_str = snapshot_identifier(snp_dict) + snp_id = hash_to_bytes(snp_id_str) snp_model = Snapshot.from_dict(snp_dict) assert snp_model.id == snp_id + assert str(snp_model.swhid()) == "swh:1:snp:" + snp_id_str + + +def test_origin_model_id_computation(): + ori_dict = origin_example.copy() + + ori_id_str = origin_identifier(ori_dict) + ori_model = Origin.from_dict(ori_dict) + assert str(ori_model.swhid()) == "swh:1:ori:" + ori_id_str @given(strategies.objects(split_content=True)) @@ -780,8 +810,13 @@ _metadata_authority = MetadataAuthority( type=MetadataAuthorityType.FORGE, url="https://forge.softwareheritage.org", ) _metadata_fetcher = MetadataFetcher(name="test-fetcher", version="0.0.1",) -_content_swhid = parse_swhid("swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2") +_content_swhid = ExtendedSWHID.from_string( + "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2" +) _origin_url = "https://forge.softwareheritage.org/source/swh-model.git" +_origin_swhid = ExtendedSWHID.from_string( + "swh:1:ori:94a9ed024d3859793618152ea559a168bbcbb5e2" +) _dummy_qualifiers = {"origin": "https://example.com", "lines": "42"} _common_metadata_fields = dict( discovery_date=datetime.datetime.now(tz=datetime.timezone.utc), @@ -796,15 +831,11 @@ def test_metadata_valid(): """Checks valid RawExtrinsicMetadata objects don't raise an error.""" # Simplest case - RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, target=_origin_url, **_common_metadata_fields - ) + RawExtrinsicMetadata(target=_origin_swhid, **_common_metadata_fields) # Object with an SWHID RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, - target=_content_swhid, - **_common_metadata_fields, + target=_content_swhid, **_common_metadata_fields, ) @@ -819,92 +850,59 @@ def test_metadata_to_dict(): "metadata": b'{"origin": "https://example.com", "lines": "42"}', } - m = RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, target=_origin_url, **_common_metadata_fields, - ) + m = RawExtrinsicMetadata(target=_origin_swhid, **_common_metadata_fields,) + assert m.to_dict() == { + "target": str(_origin_swhid), + **common_fields, + } + assert RawExtrinsicMetadata.from_dict(m.to_dict()) == m + + m = RawExtrinsicMetadata(target=_content_swhid, **_common_metadata_fields,) assert m.to_dict() == { - "type": "origin", - "target": _origin_url, + "target": "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2", **common_fields, } assert RawExtrinsicMetadata.from_dict(m.to_dict()) == m + hash_hex = "6162" * 10 + hash_bin = b"ab" * 10 m = RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=_content_swhid, **_common_metadata_fields, + origin="https://example.org/", + snapshot=CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=hash_bin), + release=CoreSWHID(object_type=ObjectType.RELEASE, object_id=hash_bin), + revision=CoreSWHID(object_type=ObjectType.REVISION, object_id=hash_bin), + path=b"/foo/bar", + directory=CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=hash_bin), ) assert m.to_dict() == { - "type": "content", "target": "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2", **common_fields, + "origin": "https://example.org/", + "snapshot": f"swh:1:snp:{hash_hex}", + "release": f"swh:1:rel:{hash_hex}", + "revision": f"swh:1:rev:{hash_hex}", + "path": b"/foo/bar", + "directory": f"swh:1:dir:{hash_hex}", } assert RawExtrinsicMetadata.from_dict(m.to_dict()) == m def test_metadata_invalid_target(): """Checks various invalid values for the 'target' field.""" - - # SWHID for an origin - with pytest.raises(ValueError, match="expected an URL"): - RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target=_content_swhid, - **_common_metadata_fields, - ) - - # SWHID for an origin (even when passed as string) - with pytest.raises(ValueError, match="expected an URL"): - RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target="swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2", - **_common_metadata_fields, - ) - - # URL for a non-origin - with pytest.raises(ValueError, match="Expected SWHID, got a string"): - RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, - target=_origin_url, - **_common_metadata_fields, - ) - # SWHID passed as string instead of SWHID - with pytest.raises(ValueError, match="Expected SWHID, got a string"): + with pytest.raises(ValueError, match="target must be.*ExtendedSWHID"): RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target="swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2", **_common_metadata_fields, ) - # Object type does not match the SWHID - with pytest.raises( - ValueError, match="Expected SWHID type 'revision', got 'content'" - ): - RawExtrinsicMetadata( - type=MetadataTargetType.REVISION, - target=_content_swhid, - **_common_metadata_fields, - ) - - # Non-core SWHID - with pytest.raises(ValueError, match="Expected core SWHID"): - RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, - target=SWHID( - object_type="content", - object_id="94a9ed024d3859793618152ea559a168bbcbb5e2", - metadata=_dummy_qualifiers, - ), - **_common_metadata_fields, - ) - def test_metadata_naive_datetime(): with pytest.raises(ValueError, match="must be a timezone-aware datetime"): RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target=_origin_url, + target=_origin_swhid, **{**_common_metadata_fields, "discovery_date": datetime.datetime.now()}, ) @@ -917,24 +915,17 @@ def test_metadata_validate_context_origin(): ValueError, match="Unexpected 'origin' context for origin object" ): RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target=_origin_url, - origin=_origin_url, - **_common_metadata_fields, + target=_origin_swhid, origin=_origin_url, **_common_metadata_fields, ) # but all other types can RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, - target=_content_swhid, - origin=_origin_url, - **_common_metadata_fields, + target=_content_swhid, origin=_origin_url, **_common_metadata_fields, ) # SWHIDs aren't valid origin URLs with pytest.raises(ValueError, match="SWHID used as context origin URL"): RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=_content_swhid, origin="swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2", **_common_metadata_fields, @@ -949,34 +940,23 @@ def test_metadata_validate_context_visit(): ValueError, match="Unexpected 'visit' context for origin object" ): RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target=_origin_url, - visit=42, - **_common_metadata_fields, + target=_origin_swhid, visit=42, **_common_metadata_fields, ) # but all other types can RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, - target=_content_swhid, - origin=_origin_url, - visit=42, - **_common_metadata_fields, + target=_content_swhid, origin=_origin_url, visit=42, **_common_metadata_fields, ) # Missing 'origin' with pytest.raises(ValueError, match="'origin' context must be set if 'visit' is"): RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, - target=_content_swhid, - visit=42, - **_common_metadata_fields, + target=_content_swhid, visit=42, **_common_metadata_fields, ) # visit id must be positive with pytest.raises(ValueError, match="Nonpositive visit id"): RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=_content_swhid, origin=_origin_url, visit=-42, @@ -992,49 +972,27 @@ def test_metadata_validate_context_snapshot(): ValueError, match="Unexpected 'snapshot' context for origin object" ): RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target=_origin_url, - snapshot=SWHID( - object_type="snapshot", - object_id="94a9ed024d3859793618152ea559a168bbcbb5e2", + target=_origin_swhid, + snapshot=CoreSWHID( + object_type=ObjectType.SNAPSHOT, object_id=EXAMPLE_HASH, ), **_common_metadata_fields, ) # but content can RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=_content_swhid, - snapshot=SWHID( - object_type="snapshot", object_id="94a9ed024d3859793618152ea559a168bbcbb5e2" - ), + snapshot=CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=EXAMPLE_HASH), **_common_metadata_fields, ) - # Non-core SWHID - with pytest.raises(ValueError, match="Expected core SWHID"): - RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, - target=_content_swhid, - snapshot=SWHID( - object_type="snapshot", - object_id="94a9ed024d3859793618152ea559a168bbcbb5e2", - metadata=_dummy_qualifiers, - ), - **_common_metadata_fields, - ) - # SWHID type doesn't match the expected type of this context key with pytest.raises( ValueError, match="Expected SWHID type 'snapshot', got 'content'" ): RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=_content_swhid, - snapshot=SWHID( - object_type="content", - object_id="94a9ed024d3859793618152ea559a168bbcbb5e2", - ), + snapshot=CoreSWHID(object_type=ObjectType.CONTENT, object_id=EXAMPLE_HASH,), **_common_metadata_fields, ) @@ -1047,49 +1005,25 @@ def test_metadata_validate_context_release(): ValueError, match="Unexpected 'release' context for origin object" ): RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target=_origin_url, - release=SWHID( - object_type="release", - object_id="94a9ed024d3859793618152ea559a168bbcbb5e2", - ), + target=_origin_swhid, + release=CoreSWHID(object_type=ObjectType.RELEASE, object_id=EXAMPLE_HASH,), **_common_metadata_fields, ) # but content can RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=_content_swhid, - release=SWHID( - object_type="release", object_id="94a9ed024d3859793618152ea559a168bbcbb5e2" - ), + release=CoreSWHID(object_type=ObjectType.RELEASE, object_id=EXAMPLE_HASH), **_common_metadata_fields, ) - # Non-core SWHID - with pytest.raises(ValueError, match="Expected core SWHID"): - RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, - target=_content_swhid, - release=SWHID( - object_type="release", - object_id="94a9ed024d3859793618152ea559a168bbcbb5e2", - metadata=_dummy_qualifiers, - ), - **_common_metadata_fields, - ) - # SWHID type doesn't match the expected type of this context key with pytest.raises( ValueError, match="Expected SWHID type 'release', got 'content'" ): RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=_content_swhid, - release=SWHID( - object_type="content", - object_id="94a9ed024d3859793618152ea559a168bbcbb5e2", - ), + release=CoreSWHID(object_type=ObjectType.CONTENT, object_id=EXAMPLE_HASH,), **_common_metadata_fields, ) @@ -1102,49 +1036,27 @@ def test_metadata_validate_context_revision(): ValueError, match="Unexpected 'revision' context for origin object" ): RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target=_origin_url, - revision=SWHID( - object_type="revision", - object_id="94a9ed024d3859793618152ea559a168bbcbb5e2", + target=_origin_swhid, + revision=CoreSWHID( + object_type=ObjectType.REVISION, object_id=EXAMPLE_HASH, ), **_common_metadata_fields, ) # but content can RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=_content_swhid, - revision=SWHID( - object_type="revision", object_id="94a9ed024d3859793618152ea559a168bbcbb5e2" - ), + revision=CoreSWHID(object_type=ObjectType.REVISION, object_id=EXAMPLE_HASH), **_common_metadata_fields, ) - # Non-core SWHID - with pytest.raises(ValueError, match="Expected core SWHID"): - RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, - target=_content_swhid, - revision=SWHID( - object_type="revision", - object_id="94a9ed024d3859793618152ea559a168bbcbb5e2", - metadata=_dummy_qualifiers, - ), - **_common_metadata_fields, - ) - # SWHID type doesn't match the expected type of this context key with pytest.raises( ValueError, match="Expected SWHID type 'revision', got 'content'" ): RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=_content_swhid, - revision=SWHID( - object_type="content", - object_id="94a9ed024d3859793618152ea559a168bbcbb5e2", - ), + revision=CoreSWHID(object_type=ObjectType.CONTENT, object_id=EXAMPLE_HASH,), **_common_metadata_fields, ) @@ -1155,18 +1067,12 @@ def test_metadata_validate_context_path(): # Origins can't have a 'path' context with pytest.raises(ValueError, match="Unexpected 'path' context for origin object"): RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target=_origin_url, - path=b"/foo/bar", - **_common_metadata_fields, + target=_origin_swhid, path=b"/foo/bar", **_common_metadata_fields, ) # but content can RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, - target=_content_swhid, - path=b"/foo/bar", - **_common_metadata_fields, + target=_content_swhid, path=b"/foo/bar", **_common_metadata_fields, ) @@ -1178,49 +1084,28 @@ def test_metadata_validate_context_directory(): ValueError, match="Unexpected 'directory' context for origin object" ): RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target=_origin_url, - directory=SWHID( - object_type="directory", - object_id="94a9ed024d3859793618152ea559a168bbcbb5e2", + target=_origin_swhid, + directory=CoreSWHID( + object_type=ObjectType.DIRECTORY, object_id=EXAMPLE_HASH, ), **_common_metadata_fields, ) # but content can RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=_content_swhid, - directory=SWHID( - object_type="directory", - object_id="94a9ed024d3859793618152ea559a168bbcbb5e2", - ), + directory=CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=EXAMPLE_HASH,), **_common_metadata_fields, ) - # Non-core SWHID - with pytest.raises(ValueError, match="Expected core SWHID"): - RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, - target=_content_swhid, - directory=SWHID( - object_type="directory", - object_id="94a9ed024d3859793618152ea559a168bbcbb5e2", - metadata=_dummy_qualifiers, - ), - **_common_metadata_fields, - ) - # SWHID type doesn't match the expected type of this context key with pytest.raises( ValueError, match="Expected SWHID type 'directory', got 'content'" ): RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=_content_swhid, - directory=SWHID( - object_type="content", - object_id="94a9ed024d3859793618152ea559a168bbcbb5e2", + directory=CoreSWHID( + object_type=ObjectType.CONTENT, object_id=EXAMPLE_HASH, ), **_common_metadata_fields, )