diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py index b4ec15dd4ee13320e7d56b03e6244efc428a2bba..15fa0e6f6a82901729a64b572ba461f38f9447ae 100644 --- a/swh/model/identifiers.py +++ b/swh/model/identifiers.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015 The Software Heritage developers +# Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -8,6 +8,14 @@ import datetime from functools import lru_cache from .hashutil import hash_data, hash_git_data, DEFAULT_ALGORITHMS +from .hashutil import hash_to_hex + + +SNAPSHOT = 'snapshot' +REVISION = 'revision' +RELEASE = 'release' +DIRECTORY = 'directory' +CONTENT = 'content' @lru_cache() @@ -584,3 +592,47 @@ def snapshot_identifier(snapshot, *, ignore_unresolved=False): for name, target in unresolved)) return identifier_to_str(hash_git_data(b''.join(lines), 'snapshot')) + + +def persistent_identifier(type, hash): + """Compute persistent identifier as per the documentation. + + Source: https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html # noqa + + Args: + type (str): Object type + hash (str): Object hash + + Returns: + Persistent identifier as string. + + """ + _map = { + SNAPSHOT: 'snp', + RELEASE: 'rel', + REVISION: 'rev', + DIRECTORY: 'dir', + CONTENT: 'cnt', + } + _hash = hash_to_hex(hash) + + return 'swh:1:%s:%s' % (_map[type], _hash) + + +def parse_persistent_identifier(persistent_id): + """Parse swh's persistent identifier. + + Args: + persistent_id (str): A persistent identifier + + Returns: + dict with keys namespace, scheme_version, object_type, object_id + + """ + data = persistent_id.split(':') + return { + 'namespace': data[0], # should be 'swh' + 'scheme_version': data[1], + 'object_type': data[2], + 'object_id': data[3], + } diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py index 4a56b0c26434d86c5a263d20b042a15fc3c09baf..5a5661f22e7e75e341e0d202e2eb0eeb7fd735b4 100644 --- a/swh/model/tests/test_identifiers.py +++ b/swh/model/tests/test_identifiers.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2017 The Software Heritage developers +# Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -11,6 +11,9 @@ from nose.tools import istest from swh.model import hashutil, identifiers +from swh.model.identifiers import SNAPSHOT, RELEASE, REVISION, DIRECTORY +from swh.model.identifiers import CONTENT + class UtilityFunctionsIdentifier(unittest.TestCase): def setUp(self): @@ -768,3 +771,43 @@ class SnapshotIdentifier(unittest.TestCase): identifiers.snapshot_identifier(self.all_types), identifiers.identifier_to_str(self.all_types['id']), ) + + def test_persistent_identifier(self): + for full_type, _hash, expected_persistent_id in [ + (SNAPSHOT, hashutil.hash_to_bytes( + 'c7c108084bc0bf3d81436bf980b46e98bd338453'), + 'swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453'), + (RELEASE, '22ece559cc7cc2364edc5e5593d63ae8bd229f9f', + 'swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f'), + (REVISION, '309cf2674ee7a0749978cf8265ab91a60aea0f7d', + 'swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d'), + (DIRECTORY, 'd198bc9d7a6bcf6db04f476d29314f157507d505', + 'swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505'), + (CONTENT, '94a9ed024d3859793618152ea559a168bbcbb5e2', + 'swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2') + ]: + actual_value = identifiers.persistent_identifier( + full_type, _hash) + self.assertEquals(actual_value, expected_persistent_id) + + def test_parse_persistent_identifier(self): + for pid, _type, _hash in [ + ('swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2', 'cnt', + '94a9ed024d3859793618152ea559a168bbcbb5e2'), + ('swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505', 'dir', + 'd198bc9d7a6bcf6db04f476d29314f157507d505'), + ('swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d', 'rev', + '309cf2674ee7a0749978cf8265ab91a60aea0f7d'), + ('swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f', 'rel', + '22ece559cc7cc2364edc5e5593d63ae8bd229f9f'), + ('swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453', 'snp', + 'c7c108084bc0bf3d81436bf980b46e98bd338453'), + ]: + expected_result = { + 'namespace': 'swh', + 'scheme_version': '1', + 'object_type': _type, + 'object_id': _hash, + } + actual_result = identifiers.parse_persistent_identifier(pid) + self.assertEquals(actual_result, expected_result)