From bdf26f5314ee5c45834691a179bb9fe355ccd877 Mon Sep 17 00:00:00 2001
From: "Antoine R. Dumont (@ardumont)" <antoine.romain.dumont@gmail.com>
Date: Wed, 17 Jan 2018 10:22:55 +0100
Subject: [PATCH] swh.model.identifiers: persistent_identifier takes object as
 input

Related T335
---
 swh/model/identifiers.py            | 55 +++++++++++++++++++----------
 swh/model/tests/test_identifiers.py | 54 ++++++++++++++++------------
 2 files changed, 68 insertions(+), 41 deletions(-)

diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py
index 15fa0e6f..51d2d2e9 100644
--- a/swh/model/identifiers.py
+++ b/swh/model/identifiers.py
@@ -594,33 +594,55 @@ def snapshot_identifier(snapshot, *, ignore_unresolved=False):
     return identifier_to_str(hash_git_data(b''.join(lines), 'snapshot'))
 
 
-def persistent_identifier(type, hash):
-    """Compute persistent identifier as per the documentation.
+def persistent_identifier(type, object, version=1):
+    """Compute persistent identifier (stable over time) as per
+       documentation.
 
-    Source: https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html  # noqa
+    Documentation:
+        https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html  # noqa
 
     Args:
-        type (str): Object type
-        hash (str): Object hash
+        type (str): Object's type
+        object (str): Object's dict representation
+        version (int): persistent identifier version (default to 1)
 
     Returns:
         Persistent identifier as string.
 
     """
     _map = {
-        SNAPSHOT: 'snp',
-        RELEASE: 'rel',
-        REVISION: 'rev',
-        DIRECTORY: 'dir',
-        CONTENT: 'cnt',
+        SNAPSHOT: {
+            'short_name': 'snp',
+            'key_id': 'id'
+        },
+        RELEASE: {
+            'short_name': 'rel',
+            'key_id': 'id'
+        },
+        REVISION: {
+            'short_name': 'rev',
+            'key_id': 'id'
+        },
+        DIRECTORY: {
+            'short_name': 'dir',
+            'key_id': 'id'
+        },
+        CONTENT: {
+            'short_name': 'cnt',
+            'key_id': 'sha1_git'
+        },
     }
-    _hash = hash_to_hex(hash)
+    o = _map[type]
+    _hash = hash_to_hex(object[o['key_id']])
+    return 'swh:%s:%s:%s' % (version, o['short_name'], _hash)
+
 
-    return 'swh:1:%s:%s' % (_map[type], _hash)
+PERSISTENT_IDENTIFIER_KEYS = [
+    'namespace', 'scheme_version', 'object_type', 'object_id']
 
 
 def parse_persistent_identifier(persistent_id):
-    """Parse swh's persistent identifier.
+    """Parse swh's persistent identifier scheme.
 
     Args:
         persistent_id (str): A persistent identifier
@@ -630,9 +652,4 @@ def parse_persistent_identifier(persistent_id):
 
     """
     data = persistent_id.split(':')
-    return {
-        'namespace': data[0],  # should be 'swh'
-        'scheme_version': data[1],
-        'object_type': data[2],
-        'object_id': data[3],
-    }
+    return dict(zip(PERSISTENT_IDENTIFIER_KEYS, data))
diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py
index 5a5661f2..26dc01e9 100644
--- a/swh/model/tests/test_identifiers.py
+++ b/swh/model/tests/test_identifiers.py
@@ -773,39 +773,49 @@ class SnapshotIdentifier(unittest.TestCase):
         )
 
     def test_persistent_identifier(self):
-        for full_type, _hash, expected_persistent_id in [
-                (SNAPSHOT, hashutil.hash_to_bytes(
-                    'c7c108084bc0bf3d81436bf980b46e98bd338453'),
-                 'swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453'),
-                (RELEASE, '22ece559cc7cc2364edc5e5593d63ae8bd229f9f',
-                 'swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f'),
-                (REVISION, '309cf2674ee7a0749978cf8265ab91a60aea0f7d',
-                 'swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d'),
-                (DIRECTORY, 'd198bc9d7a6bcf6db04f476d29314f157507d505',
-                 'swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505'),
-                (CONTENT, '94a9ed024d3859793618152ea559a168bbcbb5e2',
-                 'swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2')
+        _snapshot = {'id': hashutil.hash_to_bytes(
+                    'c7c108084bc0bf3d81436bf980b46e98bd338453')}
+        _release = {'id': '22ece559cc7cc2364edc5e5593d63ae8bd229f9f'}
+        _revision = {'id': '309cf2674ee7a0749978cf8265ab91a60aea0f7d'}
+        _directory = {'id': 'd198bc9d7a6bcf6db04f476d29314f157507d505'}
+        _content = {'sha1_git': '94a9ed024d3859793618152ea559a168bbcbb5e2'}
+        for full_type, _hash, expected_persistent_id, version in [
+                (SNAPSHOT, _snapshot,
+                 'swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453', None),
+                (RELEASE, _release,
+                 'swh:2:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f', 2),
+                (REVISION, _revision,
+                 'swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d', None),
+                (DIRECTORY, _directory,
+                 'swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505', None),
+                (CONTENT, _content,
+                 'swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2', 1)
         ]:
-            actual_value = identifiers.persistent_identifier(
-                full_type, _hash)
+            if version:
+                actual_value = identifiers.persistent_identifier(
+                    full_type, _hash, version)
+            else:
+                actual_value = identifiers.persistent_identifier(
+                    full_type, _hash)
+
             self.assertEquals(actual_value, expected_persistent_id)
 
     def test_parse_persistent_identifier(self):
-        for pid, _type, _hash in [
+        for pid, _type, _version, _hash in [
                 ('swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2', 'cnt',
-                 '94a9ed024d3859793618152ea559a168bbcbb5e2'),
-                ('swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505', 'dir',
-                 'd198bc9d7a6bcf6db04f476d29314f157507d505'),
+                 '1', '94a9ed024d3859793618152ea559a168bbcbb5e2'),
+                ('swh:2:dir:d198bc9d7a6bcf6db04f476d29314f157507d505', 'dir',
+                 '2', 'd198bc9d7a6bcf6db04f476d29314f157507d505'),
                 ('swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d', 'rev',
-                 '309cf2674ee7a0749978cf8265ab91a60aea0f7d'),
+                 '1', '309cf2674ee7a0749978cf8265ab91a60aea0f7d'),
                 ('swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f', 'rel',
-                 '22ece559cc7cc2364edc5e5593d63ae8bd229f9f'),
+                 '1', '22ece559cc7cc2364edc5e5593d63ae8bd229f9f'),
                 ('swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453', 'snp',
-                 'c7c108084bc0bf3d81436bf980b46e98bd338453'),
+                 '1', 'c7c108084bc0bf3d81436bf980b46e98bd338453'),
         ]:
             expected_result = {
                 'namespace': 'swh',
-                'scheme_version': '1',
+                'scheme_version': _version,
                 'object_type': _type,
                 'object_id': _hash,
             }
-- 
GitLab