From fc808e1fc9e59ffae1e82ea483d529025d7d1436 Mon Sep 17 00:00:00 2001
From: Valentin Lorentz <vlorentz@softwareheritage.org>
Date: Fri, 29 Jan 2021 15:08:49 +0100
Subject: [PATCH] model: Add 'id' field to RawExtrinsicMetadata

So that they can be properly deduplicated and referenced.
---
 swh/model/model.py            | 8 +++++++-
 swh/model/tests/test_model.py | 7 ++++++-
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/swh/model/model.py b/swh/model/model.py
index d1111947..baa51a7e 100644
--- a/swh/model/model.py
+++ b/swh/model/model.py
@@ -21,6 +21,7 @@ from .identifiers import (
     directory_identifier,
     normalize_timestamp,
     origin_identifier,
+    raw_extrinsic_metadata_identifier,
     release_identifier,
     revision_identifier,
     snapshot_identifier,
@@ -881,7 +882,7 @@ class MetadataFetcher(BaseModel):
 
 
 @attr.s(frozen=True, slots=True)
-class RawExtrinsicMetadata(BaseModel):
+class RawExtrinsicMetadata(HashableObject, BaseModel):
     object_type: Final = "raw_extrinsic_metadata"
 
     # target object
@@ -913,6 +914,11 @@ class RawExtrinsicMetadata(BaseModel):
         type=Optional[CoreSWHID], default=None, validator=type_validator()
     )
 
+    id = attr.ib(type=Sha1Git, validator=type_validator(), default=b"")
+
+    def compute_hash(self) -> bytes:
+        return hash_to_bytes(raw_extrinsic_metadata_identifier(self.to_dict()))
+
     @discovery_date.validator
     def check_discovery_date(self, attribute, value):
         """Checks the discovery_date has a timezone."""
diff --git a/swh/model/tests/test_model.py b/swh/model/tests/test_model.py
index bb554636..bdc741a4 100644
--- a/swh/model/tests/test_model.py
+++ b/swh/model/tests/test_model.py
@@ -819,7 +819,9 @@ _origin_swhid = ExtendedSWHID.from_string(
 )
 _dummy_qualifiers = {"origin": "https://example.com", "lines": "42"}
 _common_metadata_fields = dict(
-    discovery_date=datetime.datetime.now(tz=datetime.timezone.utc),
+    discovery_date=datetime.datetime(
+        2021, 1, 29, 13, 57, 9, tzinfo=datetime.timezone.utc
+    ),
     authority=_metadata_authority,
     fetcher=_metadata_fetcher,
     format="json",
@@ -853,6 +855,7 @@ def test_metadata_to_dict():
     m = RawExtrinsicMetadata(target=_origin_swhid, **_common_metadata_fields,)
     assert m.to_dict() == {
         "target": str(_origin_swhid),
+        "id": b"\xeck\x9cQ\xf1\x1f\xeb\xde\x85{\x7f\xf0\x83\x9c\x8a\xd5\xfb\x8e2\xef",
         **common_fields,
     }
     assert RawExtrinsicMetadata.from_dict(m.to_dict()) == m
@@ -860,6 +863,7 @@ def test_metadata_to_dict():
     m = RawExtrinsicMetadata(target=_content_swhid, **_common_metadata_fields,)
     assert m.to_dict() == {
         "target": "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2",
+        "id": b"\x8e:_;eb\xe6\xf1Y\xd9\xa5aG[\rt\x89\xa1\x0b\xe4",
         **common_fields,
     }
     assert RawExtrinsicMetadata.from_dict(m.to_dict()) == m
@@ -878,6 +882,7 @@ def test_metadata_to_dict():
     )
     assert m.to_dict() == {
         "target": "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2",
+        "id": b"\x07\xf1aS\xbe\xda\xd2\xf2\xd7\xaf:\xc7\xb7\x91C\x87W\x85R\x19",
         **common_fields,
         "origin": "https://example.org/",
         "snapshot": f"swh:1:snp:{hash_hex}",
-- 
GitLab