From eba8d84de660e2b3d7df304fa32da7404fb9f6bb Mon Sep 17 00:00:00 2001
From: Valentin Lorentz <vlorentz@softwareheritage.org>
Date: Thu, 18 Feb 2021 13:09:21 +0100
Subject: [PATCH] Add new class CoreSWHID as an alternative to
 SWHID/QualifiedSWHID

Following the discussion on T3034, we decided to replace SWHID with
two or three classes:

* QualifiedSWHID to replace the existing SWHID (standard types + qualifiers)
* CoreSWHID, for "core SWHID" only (standard types + no qualifiers)
* ExtendedSWHID for internal use in Software Heritage (extra types + no qualifiers)

This commit adds the second one
---
 swh/model/identifiers.py            |  91 ++++++++++++++++++++++
 swh/model/tests/test_identifiers.py | 116 ++++++++++++++++++++++++++++
 2 files changed, 207 insertions(+)

diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py
index 37f36fb4..62359258 100644
--- a/swh/model/identifiers.py
+++ b/swh/model/identifiers.py
@@ -718,6 +718,97 @@ _swhid_type_map = {
 }
 
 
+@attr.s(frozen=True, kw_only=True)
+class CoreSWHID:
+    """
+    Dataclass holding the relevant info associated to a SoftWare Heritage
+    persistent IDentifier (SWHID).
+
+    Unlike `QualifiedSWHID`, it is restricted to core SWHIDs, ie. SWHIDs
+    with no qualifiers.
+
+    Raises:
+        swh.model.exceptions.ValidationError: In case of invalid object type or id
+
+    To get the raw SWHID string from an instance of this class,
+    use the :func:`str` function:
+
+    >>> swhid = CoreSWHID(
+    ...     object_type=ObjectType.CONTENT,
+    ...     object_id=bytes.fromhex('8ff44f081d43176474b267de5451f2c2e88089d0'),
+    ... )
+    >>> str(swhid)
+    'swh:1:cnt:8ff44f081d43176474b267de5451f2c2e88089d0'
+
+    And vice-versa with :meth:`CoreSWHID.from_string`:
+
+    >>> swhid == CoreSWHID.from_string(
+    ...     "swh:1:cnt:8ff44f081d43176474b267de5451f2c2e88089d0"
+    ... )
+    True
+    """
+
+    namespace = attr.ib(type=str, default=SWHID_NAMESPACE)
+    """the namespace of the identifier, defaults to ``swh``"""
+
+    scheme_version = attr.ib(type=int, default=SWHID_VERSION)
+    """the scheme version of the identifier, defaults to 1"""
+
+    object_type = attr.ib(type=ObjectType, validator=type_validator())
+    """the type of object the identifier points to"""
+
+    object_id = attr.ib(type=bytes, validator=type_validator())
+    """object's identifier"""
+
+    @namespace.validator
+    def check_namespace(self, attribute, value):
+        if value != SWHID_NAMESPACE:
+            raise ValidationError(
+                "Invalid SWHID: invalid namespace: %(namespace)s",
+                params={"namespace": value},
+            )
+
+    @scheme_version.validator
+    def check_scheme_version(self, attribute, value):
+        if value != SWHID_VERSION:
+            raise ValidationError(
+                "Invalid SWHID: invalid version: %(version)s", params={"version": value}
+            )
+
+    @object_id.validator
+    def check_object_id(self, attribute, value):
+        if len(value) != 20:
+            raise ValidationError(
+                "Invalid SWHID: invalid checksum: %(object_id)s",
+                params={"object_id": hash_to_hex(value)},
+            )
+
+    def __str__(self) -> str:
+        return SWHID_SEP.join(
+            [
+                self.namespace,
+                str(self.scheme_version),
+                self.object_type.value,
+                hash_to_hex(self.object_id),
+            ]
+        )
+
+    @classmethod
+    def from_string(cls, s: str) -> CoreSWHID:
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            old_swhid = parse_swhid(s)
+        object_type = ObjectType(_object_type_map[old_swhid.object_type]["short_name"])
+        if old_swhid.metadata:
+            raise ValidationError("CoreSWHID does not support qualifiers.")
+        return CoreSWHID(
+            namespace=old_swhid.namespace,
+            scheme_version=old_swhid.scheme_version,
+            object_type=object_type,
+            object_id=hash_to_bytes(old_swhid.object_id),
+        )
+
+
 @attr.s(frozen=True, kw_only=True)
 class QualifiedSWHID:
     """
diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py
index 29bfb539..443f1ae5 100644
--- a/swh/model/tests/test_identifiers.py
+++ b/swh/model/tests/test_identifiers.py
@@ -20,6 +20,7 @@ from swh.model.identifiers import (
     REVISION,
     SNAPSHOT,
     SWHID,
+    CoreSWHID,
     ObjectType,
     QualifiedSWHID,
     normalize_timestamp,
@@ -1405,3 +1406,118 @@ def test_QualifiedSWHID_eq():
         object_id=object_id,
         qualifiers=dummy_qualifiers,
     )
+
+
+def test_parse_serialize_core_swhid():
+    for swhid, _type, _version, _hash in [
+        (
+            "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2",
+            ObjectType.CONTENT,
+            1,
+            _x("94a9ed024d3859793618152ea559a168bbcbb5e2"),
+        ),
+        (
+            "swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505",
+            ObjectType.DIRECTORY,
+            1,
+            _x("d198bc9d7a6bcf6db04f476d29314f157507d505"),
+        ),
+        (
+            "swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d",
+            ObjectType.REVISION,
+            1,
+            _x("309cf2674ee7a0749978cf8265ab91a60aea0f7d"),
+        ),
+        (
+            "swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f",
+            ObjectType.RELEASE,
+            1,
+            _x("22ece559cc7cc2364edc5e5593d63ae8bd229f9f"),
+        ),
+        (
+            "swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453",
+            ObjectType.SNAPSHOT,
+            1,
+            _x("c7c108084bc0bf3d81436bf980b46e98bd338453"),
+        ),
+    ]:
+        expected_result = CoreSWHID(
+            namespace="swh",
+            scheme_version=_version,
+            object_type=_type,
+            object_id=_hash,
+        )
+        actual_result = CoreSWHID.from_string(swhid)
+        assert actual_result == expected_result
+        assert str(expected_result) == str(actual_result) == swhid
+
+
+@pytest.mark.parametrize(
+    "invalid_swhid",
+    [
+        "swh:1:cnt",
+        "swh:1:",
+        "swh:",
+        "swh:1:cnt:",
+        "foo:1:cnt:abc8bc9d7a6bcf6db04f476d29314f157507d505",
+        "swh:2:dir:def8bc9d7a6bcf6db04f476d29314f157507d505",
+        "swh:1:foo:fed8bc9d7a6bcf6db04f476d29314f157507d505",
+        "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;visit=swh:1:snp:gh6959356d30f1a4e9b7f6bca59b9a336464c03d",  # noqa
+        "swh:1:snp:gh6959356d30f1a4e9b7f6bca59b9a336464c03d",
+        "swh:1:snp:foo",
+        "swh:1: dir: 0b6959356d30f1a4e9b7f6bca59b9a336464c03d",
+    ],
+)
+def test_parse_core_swhid_parsing_error(invalid_swhid):
+    with pytest.raises(ValidationError):
+        CoreSWHID.from_string(invalid_swhid)
+
+
+@pytest.mark.filterwarnings("ignore:.*SWHID.*:DeprecationWarning")
+@pytest.mark.parametrize(
+    "ns,version,type,id",
+    [
+        ("foo", 1, ObjectType.CONTENT, "abc8bc9d7a6bcf6db04f476d29314f157507d505"),
+        ("swh", 2, ObjectType.CONTENT, "def8bc9d7a6bcf6db04f476d29314f157507d505"),
+        ("swh", 1, ObjectType.DIRECTORY, "aaaa"),
+    ],
+)
+def test_CoreSWHID_validation_error(ns, version, type, id):
+    with pytest.raises(ValidationError):
+        CoreSWHID(
+            namespace=ns, scheme_version=version, object_type=type, object_id=_x(id),
+        )
+
+
+def test_CoreSWHID_hash():
+    object_id = _x("94a9ed024d3859793618152ea559a168bbcbb5e2")
+
+    assert hash(
+        CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=object_id)
+    ) == hash(CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=object_id))
+
+    assert hash(
+        CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=object_id,)
+    ) == hash(CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=object_id,))
+
+    # Different order of the dictionary, so the underlying order of the tuple in
+    # ImmutableDict is different.
+    assert hash(
+        CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=object_id,)
+    ) == hash(CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=object_id,))
+
+
+def test_CoreSWHID_eq():
+    object_id = _x("94a9ed024d3859793618152ea559a168bbcbb5e2")
+
+    assert CoreSWHID(
+        object_type=ObjectType.DIRECTORY, object_id=object_id
+    ) == CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=object_id)
+
+    assert CoreSWHID(
+        object_type=ObjectType.DIRECTORY, object_id=object_id,
+    ) == CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=object_id,)
+
+    assert CoreSWHID(
+        object_type=ObjectType.DIRECTORY, object_id=object_id,
+    ) == CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=object_id,)
-- 
GitLab