Skip to content
Snippets Groups Projects
Verified Commit 122326dd authored by Antoine R. Dumont's avatar Antoine R. Dumont
Browse files

swh.models.hashutil: Add persistent identifier function

Related T335
Related T933
parent b61c6665
No related branches found
No related tags found
No related merge requests found
# Copyright (C) 2015 The Software Heritage developers # Copyright (C) 2015-2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution # See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version # License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information # See top-level LICENSE file for more information
...@@ -8,6 +8,14 @@ import datetime ...@@ -8,6 +8,14 @@ import datetime
from functools import lru_cache from functools import lru_cache
from .hashutil import hash_data, hash_git_data, DEFAULT_ALGORITHMS from .hashutil import hash_data, hash_git_data, DEFAULT_ALGORITHMS
from .hashutil import hash_to_hex
SNAPSHOT = 'snapshot'
REVISION = 'revision'
RELEASE = 'release'
DIRECTORY = 'directory'
CONTENT = 'content'
@lru_cache() @lru_cache()
...@@ -584,3 +592,47 @@ def snapshot_identifier(snapshot, *, ignore_unresolved=False): ...@@ -584,3 +592,47 @@ def snapshot_identifier(snapshot, *, ignore_unresolved=False):
for name, target in unresolved)) for name, target in unresolved))
return identifier_to_str(hash_git_data(b''.join(lines), 'snapshot')) return identifier_to_str(hash_git_data(b''.join(lines), 'snapshot'))
def persistent_identifier(type, hash):
"""Compute persistent identifier as per the documentation.
Source: https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html # noqa
Args:
type (str): Object type
hash (str): Object hash
Returns:
Persistent identifier as string.
"""
_map = {
SNAPSHOT: 'snp',
RELEASE: 'rel',
REVISION: 'rev',
DIRECTORY: 'dir',
CONTENT: 'cnt',
}
_hash = hash_to_hex(hash)
return 'swh:1:%s:%s' % (_map[type], _hash)
def parse_persistent_identifier(persistent_id):
"""Parse swh's persistent identifier.
Args:
persistent_id (str): A persistent identifier
Returns:
dict with keys namespace, scheme_version, object_type, object_id
"""
data = persistent_id.split(':')
return {
'namespace': data[0], # should be 'swh'
'scheme_version': data[1],
'object_type': data[2],
'object_id': data[3],
}
# Copyright (C) 2015-2017 The Software Heritage developers # Copyright (C) 2015-2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution # See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version # License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information # See top-level LICENSE file for more information
...@@ -11,6 +11,9 @@ from nose.tools import istest ...@@ -11,6 +11,9 @@ from nose.tools import istest
from swh.model import hashutil, identifiers from swh.model import hashutil, identifiers
from swh.model.identifiers import SNAPSHOT, RELEASE, REVISION, DIRECTORY
from swh.model.identifiers import CONTENT
class UtilityFunctionsIdentifier(unittest.TestCase): class UtilityFunctionsIdentifier(unittest.TestCase):
def setUp(self): def setUp(self):
...@@ -768,3 +771,43 @@ class SnapshotIdentifier(unittest.TestCase): ...@@ -768,3 +771,43 @@ class SnapshotIdentifier(unittest.TestCase):
identifiers.snapshot_identifier(self.all_types), identifiers.snapshot_identifier(self.all_types),
identifiers.identifier_to_str(self.all_types['id']), identifiers.identifier_to_str(self.all_types['id']),
) )
def test_persistent_identifier(self):
for full_type, _hash, expected_persistent_id in [
(SNAPSHOT, hashutil.hash_to_bytes(
'c7c108084bc0bf3d81436bf980b46e98bd338453'),
'swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453'),
(RELEASE, '22ece559cc7cc2364edc5e5593d63ae8bd229f9f',
'swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f'),
(REVISION, '309cf2674ee7a0749978cf8265ab91a60aea0f7d',
'swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d'),
(DIRECTORY, 'd198bc9d7a6bcf6db04f476d29314f157507d505',
'swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505'),
(CONTENT, '94a9ed024d3859793618152ea559a168bbcbb5e2',
'swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2')
]:
actual_value = identifiers.persistent_identifier(
full_type, _hash)
self.assertEquals(actual_value, expected_persistent_id)
def test_parse_persistent_identifier(self):
for pid, _type, _hash in [
('swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2', 'cnt',
'94a9ed024d3859793618152ea559a168bbcbb5e2'),
('swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505', 'dir',
'd198bc9d7a6bcf6db04f476d29314f157507d505'),
('swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d', 'rev',
'309cf2674ee7a0749978cf8265ab91a60aea0f7d'),
('swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f', 'rel',
'22ece559cc7cc2364edc5e5593d63ae8bd229f9f'),
('swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453', 'snp',
'c7c108084bc0bf3d81436bf980b46e98bd338453'),
]:
expected_result = {
'namespace': 'swh',
'scheme_version': '1',
'object_type': _type,
'object_id': _hash,
}
actual_result = identifiers.parse_persistent_identifier(pid)
self.assertEquals(actual_result, expected_result)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment