diff --git a/PKG-INFO b/PKG-INFO index f75cd2d814b892d329431a068cc4b69bb83c5d34..3f04287e2c1bb13d17d1ec6385cde12aa43133bd 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: swh.model -Version: 2.0.0 +Version: 2.1.0 Summary: Software Heritage data model Home-page: https://forge.softwareheritage.org/diffusion/DMOD/ Author: Software Heritage developers diff --git a/swh.model.egg-info/PKG-INFO b/swh.model.egg-info/PKG-INFO index f75cd2d814b892d329431a068cc4b69bb83c5d34..3f04287e2c1bb13d17d1ec6385cde12aa43133bd 100644 --- a/swh.model.egg-info/PKG-INFO +++ b/swh.model.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: swh.model -Version: 2.0.0 +Version: 2.1.0 Summary: Software Heritage data model Home-page: https://forge.softwareheritage.org/diffusion/DMOD/ Author: Software Heritage developers diff --git a/swh/model/hashutil.py b/swh/model/hashutil.py index 908b736bd1d09b842c951ca9f0240774a0814a52..e332c23bc885e887c104387418cbc4af7e0b296a 100644 --- a/swh/model/hashutil.py +++ b/swh/model/hashutil.py @@ -296,6 +296,7 @@ def hash_git_data(data, git_type, base_algo="sha1"): "tag", "snapshot", "raw_extrinsic_metadata", + "extid", } if git_type not in git_object_types: diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py index 434f3c815becb09e19e57f1e2e95f8398bbc330a..c9f20dcb7b52b83dbd13f42a633822f87c961222 100644 --- a/swh/model/identifiers.py +++ b/swh/model/identifiers.py @@ -823,6 +823,37 @@ def raw_extrinsic_metadata_identifier(metadata: Dict[str, Any]) -> str: ) +def extid_identifier(extid: Dict[str, Any]) -> str: + """Return the intrinsic identifier for an ExtID object. + + An ExtID identifier is a salted sha1 (using the git hashing algorithm with + the ``extid`` object type) of a manifest following the format: + + ``` + extid_type $StrWithoutSpaces + extid $Bytes + target $CoreSwhid + ``` + + $StrWithoutSpaces is an ASCII string, and may not contain spaces. + + Newlines in $Bytes are escaped as with other git fields, ie. by adding a + space after them. + + Returns: + str: the intrinsic identifier for `extid` + + """ + + headers = [ + (b"extid_type", extid["extid_type"].encode("ascii")), + (b"extid", extid["extid"]), + (b"target", str(extid["target"]).encode("ascii")), + ] + + return identifier_to_str(hash_manifest("extid", headers)) + + # type of the "object_type" attribute of the SWHID class; either # ObjectType or ExtendedObjectType _TObjectType = TypeVar("_TObjectType", ObjectType, ExtendedObjectType) diff --git a/swh/model/model.py b/swh/model/model.py index 6df310a0f35ce769ccf21305814691c514b662f9..da0547186bc54cbeb30df7570e8bb1c3a44cb3f8 100644 --- a/swh/model/model.py +++ b/swh/model/model.py @@ -19,6 +19,7 @@ from .collections import ImmutableDict from .hashutil import DEFAULT_ALGORITHMS, MultiHash, hash_to_bytes from .identifiers import ( directory_identifier, + extid_identifier, normalize_timestamp, origin_identifier, raw_extrinsic_metadata_identifier, @@ -848,7 +849,10 @@ class MetadataAuthority(BaseModel): @classmethod def from_dict(cls, d): - d["type"] = MetadataAuthorityType(d["type"]) + d = { + **d, + "type": MetadataAuthorityType(d["type"]), + } return super().from_dict(d) def unique_key(self) -> KeyType: @@ -1093,3 +1097,25 @@ class RawExtrinsicMetadata(HashableObject, BaseModel): d[swhid_key] = CoreSWHID.from_string(d[swhid_key]) return super().from_dict(d) + + +@attr.s(frozen=True, slots=True) +class ExtID(HashableObject, BaseModel): + object_type: Final = "extid" + + extid_type = attr.ib(type=str, validator=type_validator()) + extid = attr.ib(type=bytes, validator=type_validator()) + target = attr.ib(type=CoreSWHID, validator=type_validator()) + + id = attr.ib(type=Sha1Git, validator=type_validator(), default=b"") + + @classmethod + def from_dict(cls, d): + return cls( + extid=d["extid"], + extid_type=d["extid_type"], + target=CoreSWHID.from_string(d["target"]), + ) + + def compute_hash(self) -> bytes: + return hash_to_bytes(extid_identifier(self.to_dict())) diff --git a/swh/model/tests/swh_model_data.py b/swh/model/tests/swh_model_data.py index ae6f9635e8013ead488c5d489772751dabc5e458..c4700cba3475382deffb45160cbdf8a88dfb0176 100644 --- a/swh/model/tests/swh_model_data.py +++ b/swh/model/tests/swh_model_data.py @@ -15,6 +15,7 @@ from swh.model.model import ( Content, Directory, DirectoryEntry, + ExtID, MetadataAuthority, MetadataAuthorityType, MetadataFetcher, @@ -131,6 +132,11 @@ REVISIONS = [ ), ] +EXTIDS = [ + ExtID(extid_type="git256", extid=b"\x03" * 32, target=REVISIONS[0].swhid(),), + ExtID(extid_type="hg", extid=b"\x04" * 20, target=REVISIONS[1].swhid(),), +] + RELEASES = [ Release( id=hash_to_bytes("8059dc4e17fcd0e51ca3bcd6b80f4577d281fd08"), @@ -330,6 +336,7 @@ RAW_EXTRINSIC_METADATA = [ TEST_OBJECTS: Dict[str, Sequence[BaseModel]] = { "content": CONTENTS, "directory": DIRECTORIES, + "extid": EXTIDS, "metadata_authority": METADATA_AUTHORITIES, "metadata_fetcher": METADATA_FETCHERS, "origin": ORIGINS, diff --git a/swh/model/tests/test_model.py b/swh/model/tests/test_model.py index 2c84d714e882fc76e024d78fdf1e5443a33f190c..bf9725c628c337b8d701ad73ee7b39dba25d060e 100644 --- a/swh/model/tests/test_model.py +++ b/swh/model/tests/test_model.py @@ -45,6 +45,7 @@ from swh.model.model import ( Timestamp, TimestampWithTimezone, ) +from swh.model.tests.swh_model_data import TEST_OBJECTS from swh.model.tests.test_identifiers import ( content_example, directory_example, @@ -78,6 +79,17 @@ def test_todict_inverse_fromdict(objtype_and_obj): assert obj_as_dict == type(obj).from_dict(obj_as_dict).to_dict() +@pytest.mark.parametrize("object_type, objects", TEST_OBJECTS.items()) +def test_swh_model_todict_fromdict(object_type, objects): + """checks model objects in swh_model_data are in correct shape""" + assert objects + for obj in objects: + # Check the composition of from_dict and to_dict is the identity + obj_as_dict = obj.to_dict() + assert obj == type(obj).from_dict(obj_as_dict) + assert obj_as_dict == type(obj).from_dict(obj_as_dict).to_dict() + + def test_unique_key(): url = "http://example.org/" date = datetime.datetime.now(tz=datetime.timezone.utc)