diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py index 15fa0e6f6a82901729a64b572ba461f38f9447ae..51d2d2e9b6f94467b526ef6291bc1f76d79b8202 100644 --- a/swh/model/identifiers.py +++ b/swh/model/identifiers.py @@ -594,33 +594,55 @@ def snapshot_identifier(snapshot, *, ignore_unresolved=False): return identifier_to_str(hash_git_data(b''.join(lines), 'snapshot')) -def persistent_identifier(type, hash): - """Compute persistent identifier as per the documentation. +def persistent_identifier(type, object, version=1): + """Compute persistent identifier (stable over time) as per + documentation. - Source: https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html # noqa + Documentation: + https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html # noqa Args: - type (str): Object type - hash (str): Object hash + type (str): Object's type + object (str): Object's dict representation + version (int): persistent identifier version (default to 1) Returns: Persistent identifier as string. """ _map = { - SNAPSHOT: 'snp', - RELEASE: 'rel', - REVISION: 'rev', - DIRECTORY: 'dir', - CONTENT: 'cnt', + SNAPSHOT: { + 'short_name': 'snp', + 'key_id': 'id' + }, + RELEASE: { + 'short_name': 'rel', + 'key_id': 'id' + }, + REVISION: { + 'short_name': 'rev', + 'key_id': 'id' + }, + DIRECTORY: { + 'short_name': 'dir', + 'key_id': 'id' + }, + CONTENT: { + 'short_name': 'cnt', + 'key_id': 'sha1_git' + }, } - _hash = hash_to_hex(hash) + o = _map[type] + _hash = hash_to_hex(object[o['key_id']]) + return 'swh:%s:%s:%s' % (version, o['short_name'], _hash) + - return 'swh:1:%s:%s' % (_map[type], _hash) +PERSISTENT_IDENTIFIER_KEYS = [ + 'namespace', 'scheme_version', 'object_type', 'object_id'] def parse_persistent_identifier(persistent_id): - """Parse swh's persistent identifier. + """Parse swh's persistent identifier scheme. Args: persistent_id (str): A persistent identifier @@ -630,9 +652,4 @@ def parse_persistent_identifier(persistent_id): """ data = persistent_id.split(':') - return { - 'namespace': data[0], # should be 'swh' - 'scheme_version': data[1], - 'object_type': data[2], - 'object_id': data[3], - } + return dict(zip(PERSISTENT_IDENTIFIER_KEYS, data)) diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py index 5a5661f22e7e75e341e0d202e2eb0eeb7fd735b4..26dc01e98543557197926ac41da6733d8de786ff 100644 --- a/swh/model/tests/test_identifiers.py +++ b/swh/model/tests/test_identifiers.py @@ -773,39 +773,49 @@ class SnapshotIdentifier(unittest.TestCase): ) def test_persistent_identifier(self): - for full_type, _hash, expected_persistent_id in [ - (SNAPSHOT, hashutil.hash_to_bytes( - 'c7c108084bc0bf3d81436bf980b46e98bd338453'), - 'swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453'), - (RELEASE, '22ece559cc7cc2364edc5e5593d63ae8bd229f9f', - 'swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f'), - (REVISION, '309cf2674ee7a0749978cf8265ab91a60aea0f7d', - 'swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d'), - (DIRECTORY, 'd198bc9d7a6bcf6db04f476d29314f157507d505', - 'swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505'), - (CONTENT, '94a9ed024d3859793618152ea559a168bbcbb5e2', - 'swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2') + _snapshot = {'id': hashutil.hash_to_bytes( + 'c7c108084bc0bf3d81436bf980b46e98bd338453')} + _release = {'id': '22ece559cc7cc2364edc5e5593d63ae8bd229f9f'} + _revision = {'id': '309cf2674ee7a0749978cf8265ab91a60aea0f7d'} + _directory = {'id': 'd198bc9d7a6bcf6db04f476d29314f157507d505'} + _content = {'sha1_git': '94a9ed024d3859793618152ea559a168bbcbb5e2'} + for full_type, _hash, expected_persistent_id, version in [ + (SNAPSHOT, _snapshot, + 'swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453', None), + (RELEASE, _release, + 'swh:2:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f', 2), + (REVISION, _revision, + 'swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d', None), + (DIRECTORY, _directory, + 'swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505', None), + (CONTENT, _content, + 'swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2', 1) ]: - actual_value = identifiers.persistent_identifier( - full_type, _hash) + if version: + actual_value = identifiers.persistent_identifier( + full_type, _hash, version) + else: + actual_value = identifiers.persistent_identifier( + full_type, _hash) + self.assertEquals(actual_value, expected_persistent_id) def test_parse_persistent_identifier(self): - for pid, _type, _hash in [ + for pid, _type, _version, _hash in [ ('swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2', 'cnt', - '94a9ed024d3859793618152ea559a168bbcbb5e2'), - ('swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505', 'dir', - 'd198bc9d7a6bcf6db04f476d29314f157507d505'), + '1', '94a9ed024d3859793618152ea559a168bbcbb5e2'), + ('swh:2:dir:d198bc9d7a6bcf6db04f476d29314f157507d505', 'dir', + '2', 'd198bc9d7a6bcf6db04f476d29314f157507d505'), ('swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d', 'rev', - '309cf2674ee7a0749978cf8265ab91a60aea0f7d'), + '1', '309cf2674ee7a0749978cf8265ab91a60aea0f7d'), ('swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f', 'rel', - '22ece559cc7cc2364edc5e5593d63ae8bd229f9f'), + '1', '22ece559cc7cc2364edc5e5593d63ae8bd229f9f'), ('swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453', 'snp', - 'c7c108084bc0bf3d81436bf980b46e98bd338453'), + '1', 'c7c108084bc0bf3d81436bf980b46e98bd338453'), ]: expected_result = { 'namespace': 'swh', - 'scheme_version': '1', + 'scheme_version': _version, 'object_type': _type, 'object_id': _hash, }