From bdf26f5314ee5c45834691a179bb9fe355ccd877 Mon Sep 17 00:00:00 2001 From: "Antoine R. Dumont (@ardumont)" <antoine.romain.dumont@gmail.com> Date: Wed, 17 Jan 2018 10:22:55 +0100 Subject: [PATCH] swh.model.identifiers: persistent_identifier takes object as input Related T335 --- swh/model/identifiers.py | 55 +++++++++++++++++++---------- swh/model/tests/test_identifiers.py | 54 ++++++++++++++++------------ 2 files changed, 68 insertions(+), 41 deletions(-) diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py index 15fa0e6f..51d2d2e9 100644 --- a/swh/model/identifiers.py +++ b/swh/model/identifiers.py @@ -594,33 +594,55 @@ def snapshot_identifier(snapshot, *, ignore_unresolved=False): return identifier_to_str(hash_git_data(b''.join(lines), 'snapshot')) -def persistent_identifier(type, hash): - """Compute persistent identifier as per the documentation. +def persistent_identifier(type, object, version=1): + """Compute persistent identifier (stable over time) as per + documentation. - Source: https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html # noqa + Documentation: + https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html # noqa Args: - type (str): Object type - hash (str): Object hash + type (str): Object's type + object (str): Object's dict representation + version (int): persistent identifier version (default to 1) Returns: Persistent identifier as string. """ _map = { - SNAPSHOT: 'snp', - RELEASE: 'rel', - REVISION: 'rev', - DIRECTORY: 'dir', - CONTENT: 'cnt', + SNAPSHOT: { + 'short_name': 'snp', + 'key_id': 'id' + }, + RELEASE: { + 'short_name': 'rel', + 'key_id': 'id' + }, + REVISION: { + 'short_name': 'rev', + 'key_id': 'id' + }, + DIRECTORY: { + 'short_name': 'dir', + 'key_id': 'id' + }, + CONTENT: { + 'short_name': 'cnt', + 'key_id': 'sha1_git' + }, } - _hash = hash_to_hex(hash) + o = _map[type] + _hash = hash_to_hex(object[o['key_id']]) + return 'swh:%s:%s:%s' % (version, o['short_name'], _hash) + - return 'swh:1:%s:%s' % (_map[type], _hash) +PERSISTENT_IDENTIFIER_KEYS = [ + 'namespace', 'scheme_version', 'object_type', 'object_id'] def parse_persistent_identifier(persistent_id): - """Parse swh's persistent identifier. + """Parse swh's persistent identifier scheme. Args: persistent_id (str): A persistent identifier @@ -630,9 +652,4 @@ def parse_persistent_identifier(persistent_id): """ data = persistent_id.split(':') - return { - 'namespace': data[0], # should be 'swh' - 'scheme_version': data[1], - 'object_type': data[2], - 'object_id': data[3], - } + return dict(zip(PERSISTENT_IDENTIFIER_KEYS, data)) diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py index 5a5661f2..26dc01e9 100644 --- a/swh/model/tests/test_identifiers.py +++ b/swh/model/tests/test_identifiers.py @@ -773,39 +773,49 @@ class SnapshotIdentifier(unittest.TestCase): ) def test_persistent_identifier(self): - for full_type, _hash, expected_persistent_id in [ - (SNAPSHOT, hashutil.hash_to_bytes( - 'c7c108084bc0bf3d81436bf980b46e98bd338453'), - 'swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453'), - (RELEASE, '22ece559cc7cc2364edc5e5593d63ae8bd229f9f', - 'swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f'), - (REVISION, '309cf2674ee7a0749978cf8265ab91a60aea0f7d', - 'swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d'), - (DIRECTORY, 'd198bc9d7a6bcf6db04f476d29314f157507d505', - 'swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505'), - (CONTENT, '94a9ed024d3859793618152ea559a168bbcbb5e2', - 'swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2') + _snapshot = {'id': hashutil.hash_to_bytes( + 'c7c108084bc0bf3d81436bf980b46e98bd338453')} + _release = {'id': '22ece559cc7cc2364edc5e5593d63ae8bd229f9f'} + _revision = {'id': '309cf2674ee7a0749978cf8265ab91a60aea0f7d'} + _directory = {'id': 'd198bc9d7a6bcf6db04f476d29314f157507d505'} + _content = {'sha1_git': '94a9ed024d3859793618152ea559a168bbcbb5e2'} + for full_type, _hash, expected_persistent_id, version in [ + (SNAPSHOT, _snapshot, + 'swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453', None), + (RELEASE, _release, + 'swh:2:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f', 2), + (REVISION, _revision, + 'swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d', None), + (DIRECTORY, _directory, + 'swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505', None), + (CONTENT, _content, + 'swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2', 1) ]: - actual_value = identifiers.persistent_identifier( - full_type, _hash) + if version: + actual_value = identifiers.persistent_identifier( + full_type, _hash, version) + else: + actual_value = identifiers.persistent_identifier( + full_type, _hash) + self.assertEquals(actual_value, expected_persistent_id) def test_parse_persistent_identifier(self): - for pid, _type, _hash in [ + for pid, _type, _version, _hash in [ ('swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2', 'cnt', - '94a9ed024d3859793618152ea559a168bbcbb5e2'), - ('swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505', 'dir', - 'd198bc9d7a6bcf6db04f476d29314f157507d505'), + '1', '94a9ed024d3859793618152ea559a168bbcbb5e2'), + ('swh:2:dir:d198bc9d7a6bcf6db04f476d29314f157507d505', 'dir', + '2', 'd198bc9d7a6bcf6db04f476d29314f157507d505'), ('swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d', 'rev', - '309cf2674ee7a0749978cf8265ab91a60aea0f7d'), + '1', '309cf2674ee7a0749978cf8265ab91a60aea0f7d'), ('swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f', 'rel', - '22ece559cc7cc2364edc5e5593d63ae8bd229f9f'), + '1', '22ece559cc7cc2364edc5e5593d63ae8bd229f9f'), ('swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453', 'snp', - 'c7c108084bc0bf3d81436bf980b46e98bd338453'), + '1', 'c7c108084bc0bf3d81436bf980b46e98bd338453'), ]: expected_result = { 'namespace': 'swh', - 'scheme_version': '1', + 'scheme_version': _version, 'object_type': _type, 'object_id': _hash, } -- GitLab