From 1545ef77e36d4539d372bdb569e02edf1f4b860d Mon Sep 17 00:00:00 2001
From: Nicolas Dandrimont <nicolas@dandrimont.eu>
Date: Fri, 23 Jul 2021 12:09:41 +0200
Subject: [PATCH] Add an extid_version field to ExtIDs

This allows distinguishing multiple potential versions of the mapping
between external objects and their counterparts archived in Software
Heritage, for instance when a loader has a backwards-incompatible change
that should result in objects being loaded again.

The field defaults to zero, in which case it's backwards-compatible with
the previous implementation in terms of identifier computation.
---
 swh/model/identifiers.py            | 12 ++++++++++--
 swh/model/model.py                  |  2 ++
 swh/model/tests/swh_model_data.py   |  6 ++++++
 swh/model/tests/test_identifiers.py | 23 +++++++++++++++++++++++
 4 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py
index 04bd4078..421d2e77 100644
--- a/swh/model/identifiers.py
+++ b/swh/model/identifiers.py
@@ -857,6 +857,7 @@ def extid_identifier(extid: Dict[str, Any]) -> str:
 
     ```
     extid_type $StrWithoutSpaces
+    [extid_version $Str]
     extid $Bytes
     target $CoreSwhid
     ```
@@ -866,6 +867,8 @@ def extid_identifier(extid: Dict[str, Any]) -> str:
     Newlines in $Bytes are escaped as with other git fields, ie. by adding a
     space after them.
 
+    The extid_version line is only generated if the version is non-zero.
+
     Returns:
       str: the intrinsic identifier for `extid`
 
@@ -873,9 +876,14 @@ def extid_identifier(extid: Dict[str, Any]) -> str:
 
     headers = [
         (b"extid_type", extid["extid_type"].encode("ascii")),
-        (b"extid", extid["extid"]),
-        (b"target", str(extid["target"]).encode("ascii")),
     ]
+    extid_version = extid.get("extid_version", 0)
+    if extid_version != 0:
+        headers.append((b"extid_version", str(extid_version).encode("ascii")))
+
+    headers.extend(
+        [(b"extid", extid["extid"]), (b"target", str(extid["target"]).encode("ascii")),]
+    )
 
     git_object = format_git_object_from_headers("extid", headers)
     return hashlib.new("sha1", git_object).hexdigest()
diff --git a/swh/model/model.py b/swh/model/model.py
index 1d1c1ce5..e34eea10 100644
--- a/swh/model/model.py
+++ b/swh/model/model.py
@@ -1133,6 +1133,7 @@ class ExtID(HashableObject, BaseModel):
     extid_type = attr.ib(type=str, validator=type_validator())
     extid = attr.ib(type=bytes, validator=type_validator())
     target = attr.ib(type=CoreSWHID, validator=type_validator())
+    extid_version = attr.ib(type=int, validator=type_validator(), default=0)
 
     id = attr.ib(type=Sha1Git, validator=type_validator(), default=b"")
 
@@ -1142,6 +1143,7 @@ class ExtID(HashableObject, BaseModel):
             extid=d["extid"],
             extid_type=d["extid_type"],
             target=CoreSWHID.from_string(d["target"]),
+            extid_version=d.get("extid_version", 0),
         )
 
     def compute_hash(self) -> bytes:
diff --git a/swh/model/tests/swh_model_data.py b/swh/model/tests/swh_model_data.py
index 8db2a285..1f5dded9 100644
--- a/swh/model/tests/swh_model_data.py
+++ b/swh/model/tests/swh_model_data.py
@@ -138,6 +138,12 @@ REVISIONS = [
 EXTIDS = [
     ExtID(extid_type="git256", extid=b"\x03" * 32, target=REVISIONS[0].swhid(),),
     ExtID(extid_type="hg", extid=b"\x04" * 20, target=REVISIONS[1].swhid(),),
+    ExtID(
+        extid_type="hg-nodeid",
+        extid=b"\x05" * 20,
+        target=REVISIONS[1].swhid(),
+        extid_version=1,
+    ),
 ]
 
 RELEASES = [
diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py
index 7a236ac9..01d2b70b 100644
--- a/swh/model/tests/test_identifiers.py
+++ b/swh/model/tests/test_identifiers.py
@@ -1811,6 +1811,29 @@ def test_ExtendedSWHID_eq():
     ) == ExtendedSWHID(object_type=ExtendedObjectType.DIRECTORY, object_id=object_id,)
 
 
+def test_extid_identifier_bwcompat():
+    extid_dict = {
+        "extid_type": "test-type",
+        "extid": b"extid",
+        "target": ExtendedSWHID(
+            object_type=ExtendedObjectType.DIRECTORY, object_id=b"\x00" * 20
+        ),
+    }
+
+    assert (
+        identifiers.extid_identifier(extid_dict)
+        == "b9295e1931c31e40a7e3e1e967decd1c89426455"
+    )
+
+    assert identifiers.extid_identifier(
+        {**extid_dict, "extid_version": 0}
+    ) == identifiers.extid_identifier(extid_dict)
+
+    assert identifiers.extid_identifier(
+        {**extid_dict, "extid_version": 1}
+    ) != identifiers.extid_identifier(extid_dict)
+
+
 def test_object_types():
     """Checks ExtendedObjectType is a superset of ObjectType"""
     for member in ObjectType:
-- 
GitLab