Skip to content
Snippets Groups Projects
Verified Commit bdf26f53 authored by Antoine R. Dumont's avatar Antoine R. Dumont
Browse files

swh.model.identifiers: persistent_identifier takes object as input

Related T335
parent 122326dd
No related branches found
Tags v0.0.21
No related merge requests found
......@@ -594,33 +594,55 @@ def snapshot_identifier(snapshot, *, ignore_unresolved=False):
return identifier_to_str(hash_git_data(b''.join(lines), 'snapshot'))
def persistent_identifier(type, hash):
"""Compute persistent identifier as per the documentation.
def persistent_identifier(type, object, version=1):
"""Compute persistent identifier (stable over time) as per
documentation.
Source: https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html # noqa
Documentation:
https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html # noqa
Args:
type (str): Object type
hash (str): Object hash
type (str): Object's type
object (str): Object's dict representation
version (int): persistent identifier version (default to 1)
Returns:
Persistent identifier as string.
"""
_map = {
SNAPSHOT: 'snp',
RELEASE: 'rel',
REVISION: 'rev',
DIRECTORY: 'dir',
CONTENT: 'cnt',
SNAPSHOT: {
'short_name': 'snp',
'key_id': 'id'
},
RELEASE: {
'short_name': 'rel',
'key_id': 'id'
},
REVISION: {
'short_name': 'rev',
'key_id': 'id'
},
DIRECTORY: {
'short_name': 'dir',
'key_id': 'id'
},
CONTENT: {
'short_name': 'cnt',
'key_id': 'sha1_git'
},
}
_hash = hash_to_hex(hash)
o = _map[type]
_hash = hash_to_hex(object[o['key_id']])
return 'swh:%s:%s:%s' % (version, o['short_name'], _hash)
return 'swh:1:%s:%s' % (_map[type], _hash)
PERSISTENT_IDENTIFIER_KEYS = [
'namespace', 'scheme_version', 'object_type', 'object_id']
def parse_persistent_identifier(persistent_id):
"""Parse swh's persistent identifier.
"""Parse swh's persistent identifier scheme.
Args:
persistent_id (str): A persistent identifier
......@@ -630,9 +652,4 @@ def parse_persistent_identifier(persistent_id):
"""
data = persistent_id.split(':')
return {
'namespace': data[0], # should be 'swh'
'scheme_version': data[1],
'object_type': data[2],
'object_id': data[3],
}
return dict(zip(PERSISTENT_IDENTIFIER_KEYS, data))
......@@ -773,39 +773,49 @@ class SnapshotIdentifier(unittest.TestCase):
)
def test_persistent_identifier(self):
for full_type, _hash, expected_persistent_id in [
(SNAPSHOT, hashutil.hash_to_bytes(
'c7c108084bc0bf3d81436bf980b46e98bd338453'),
'swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453'),
(RELEASE, '22ece559cc7cc2364edc5e5593d63ae8bd229f9f',
'swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f'),
(REVISION, '309cf2674ee7a0749978cf8265ab91a60aea0f7d',
'swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d'),
(DIRECTORY, 'd198bc9d7a6bcf6db04f476d29314f157507d505',
'swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505'),
(CONTENT, '94a9ed024d3859793618152ea559a168bbcbb5e2',
'swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2')
_snapshot = {'id': hashutil.hash_to_bytes(
'c7c108084bc0bf3d81436bf980b46e98bd338453')}
_release = {'id': '22ece559cc7cc2364edc5e5593d63ae8bd229f9f'}
_revision = {'id': '309cf2674ee7a0749978cf8265ab91a60aea0f7d'}
_directory = {'id': 'd198bc9d7a6bcf6db04f476d29314f157507d505'}
_content = {'sha1_git': '94a9ed024d3859793618152ea559a168bbcbb5e2'}
for full_type, _hash, expected_persistent_id, version in [
(SNAPSHOT, _snapshot,
'swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453', None),
(RELEASE, _release,
'swh:2:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f', 2),
(REVISION, _revision,
'swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d', None),
(DIRECTORY, _directory,
'swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505', None),
(CONTENT, _content,
'swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2', 1)
]:
actual_value = identifiers.persistent_identifier(
full_type, _hash)
if version:
actual_value = identifiers.persistent_identifier(
full_type, _hash, version)
else:
actual_value = identifiers.persistent_identifier(
full_type, _hash)
self.assertEquals(actual_value, expected_persistent_id)
def test_parse_persistent_identifier(self):
for pid, _type, _hash in [
for pid, _type, _version, _hash in [
('swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2', 'cnt',
'94a9ed024d3859793618152ea559a168bbcbb5e2'),
('swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505', 'dir',
'd198bc9d7a6bcf6db04f476d29314f157507d505'),
'1', '94a9ed024d3859793618152ea559a168bbcbb5e2'),
('swh:2:dir:d198bc9d7a6bcf6db04f476d29314f157507d505', 'dir',
'2', 'd198bc9d7a6bcf6db04f476d29314f157507d505'),
('swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d', 'rev',
'309cf2674ee7a0749978cf8265ab91a60aea0f7d'),
'1', '309cf2674ee7a0749978cf8265ab91a60aea0f7d'),
('swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f', 'rel',
'22ece559cc7cc2364edc5e5593d63ae8bd229f9f'),
'1', '22ece559cc7cc2364edc5e5593d63ae8bd229f9f'),
('swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453', 'snp',
'c7c108084bc0bf3d81436bf980b46e98bd338453'),
'1', 'c7c108084bc0bf3d81436bf980b46e98bd338453'),
]:
expected_result = {
'namespace': 'swh',
'scheme_version': '1',
'scheme_version': _version,
'object_type': _type,
'object_id': _hash,
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment