diff --git a/PKG-INFO b/PKG-INFO index b2ef8133528bcc8e62d761a1c4bcdd2d05f493b0..f131e26ae3d7aedd856580e86947e7649f822121 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 1.0 Name: swh.model -Version: 0.0.24 +Version: 0.0.25 Summary: Software Heritage data model Home-page: https://forge.softwareheritage.org/diffusion/DMOD/ Author: Software Heritage developers diff --git a/swh.model.egg-info/PKG-INFO b/swh.model.egg-info/PKG-INFO index b2ef8133528bcc8e62d761a1c4bcdd2d05f493b0..f131e26ae3d7aedd856580e86947e7649f822121 100644 --- a/swh.model.egg-info/PKG-INFO +++ b/swh.model.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 1.0 Name: swh.model -Version: 0.0.24 +Version: 0.0.25 Summary: Software Heritage data model Home-page: https://forge.softwareheritage.org/diffusion/DMOD/ Author: Software Heritage developers diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py index 00471f354424c34584d234c1be756f0d71e6a203..f775426d7216706b0e561e799f1c00d8cfbc2af2 100644 --- a/swh/model/identifiers.py +++ b/swh/model/identifiers.py @@ -5,6 +5,8 @@ import binascii import datetime + +from collections import namedtuple from functools import lru_cache from .exceptions import ValidationError @@ -64,7 +66,7 @@ def identifier_to_str(identifier): The length 40 string corresponding to the given identifier, hex encoded Raises: - ValueError if the identifier is of an unexpected type or length. + ValueError: if the identifier is of an unexpected type or length. """ if isinstance(identifier, str): @@ -596,71 +598,121 @@ def snapshot_identifier(snapshot, *, ignore_unresolved=False): return identifier_to_str(hash_git_data(b''.join(lines), 'snapshot')) -def persistent_identifier(type, object, version=1): - """Compute persistent identifier (stable over time) as per - documentation. - - Documentation: - https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html # noqa +_object_type_map = { + SNAPSHOT: { + 'short_name': 'snp', + 'key_id': 'id' + }, + RELEASE: { + 'short_name': 'rel', + 'key_id': 'id' + }, + REVISION: { + 'short_name': 'rev', + 'key_id': 'id' + }, + DIRECTORY: { + 'short_name': 'dir', + 'key_id': 'id' + }, + CONTENT: { + 'short_name': 'cnt', + 'key_id': 'sha1_git' + } +} - Args: - type (str): Object's type - object (dict/bytes/str): Object's dict representation or object - identifier - version (int): persistent identifier version (default to 1) +PERSISTENT_IDENTIFIER_TYPES = ['snp', 'rel', 'rev', 'dir', 'cnt'] - Raises: - ValidationError (class) in case of: +PERSISTENT_IDENTIFIER_KEYS = [ + 'namespace', 'scheme_version', 'object_type', 'object_id', 'metadata'] - invalid type - invalid hash object +PERSISTENT_IDENTIFIER_PARTS_SEP = ';' - Returns: - Persistent identifier as string. +class PersistentId(namedtuple('PersistentId', PERSISTENT_IDENTIFIER_KEYS)): """ - _map = { - SNAPSHOT: { - 'short_name': 'snp', - 'key_id': 'id' - }, - RELEASE: { - 'short_name': 'rel', - 'key_id': 'id' - }, - REVISION: { - 'short_name': 'rev', - 'key_id': 'id' - }, - DIRECTORY: { - 'short_name': 'dir', - 'key_id': 'id' - }, - CONTENT: { - 'short_name': 'cnt', - 'key_id': 'sha1_git' - }, - } - o = _map.get(type) - if not o: - raise ValidationError('Wrong input: Supported types are %s' % ( - list(_map.keys()))) + Named tuple holding the relevant info associated to a Software Heritage + persistent identifier. - if isinstance(object, dict): # internal swh representation resolution - _hash = object[o['key_id']] - else: # client passed direct identifier (bytes/str) - _hash = object - validate_sha1(_hash) # can raise if invalid hash - _hash = hash_to_hex(_hash) - return 'swh:%s:%s:%s' % (version, o['short_name'], _hash) + Args: + namespace (str): the namespace of the identifier, defaults to 'swh' + scheme_version (int): the scheme version of the identifier, + defaults to 1 + object_type (str): the type of object the identifier points to, + either 'content', 'directory', 'release', 'revision' or 'snapshot' + object_id (dict/bytes/str): object's dict representation or + object identifier + metadata (dict): optional dict filled with metadata related to + pointed object + Raises: + swh.model.exceptions.ValidationError: In case of invalid object type or id + + Once created, it contains the following attributes: + + Attributes: + namespace (str): the namespace of the identifier + scheme_version (int): the scheme version of the identifier + object_type (str): the type of object the identifier points to + object_id (str): hexadecimal representation of the object hash + metadata (dict): metadata related to the pointed object + + To get the raw persistent identifier string from an instance of + this named tuple, use the :func:`str` function:: + + pid = PersistentId(object_type='content', object_id='8ff44f081d43176474b267de5451f2c2e88089d0') + pid_str = str(pid) # 'swh:1:cnt:8ff44f081d43176474b267de5451f2c2e88089d0' + """ # noqa + __slots__ = () + + def __new__(cls, namespace='swh', scheme_version=1, + object_type='', object_id='', metadata={}): + o = _object_type_map.get(object_type) + if not o: + raise ValidationError('Wrong input: Supported types are %s' % ( + list(_object_type_map.keys()))) + # internal swh representation resolution + if isinstance(object_id, dict): + object_id = object_id[o['key_id']] + validate_sha1(object_id) # can raise if invalid hash + object_id = hash_to_hex(object_id) + return super(cls, PersistentId).__new__( + cls, namespace, scheme_version, object_type, object_id, metadata) + + def __str__(self): + o = _object_type_map.get(self.object_type) + pid = '%s:%s:%s:%s' % (self.namespace, self.scheme_version, + o['short_name'], self.object_id) + if self.metadata: + for k, v in self.metadata.items(): + pid += '%s%s=%s' % (PERSISTENT_IDENTIFIER_PARTS_SEP, k, v) + return pid + + +def persistent_identifier(object_type, object_id, scheme_version=1): + """Compute persistent identifier (stable over time) as per + documentation. -PERSISTENT_IDENTIFIER_TYPES = ['snp', 'rel', 'rev', 'dir', 'cnt'] + Documentation: + https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html -PERSISTENT_IDENTIFIER_KEYS = [ - 'namespace', 'scheme_version', 'object_type', 'object_id', 'metadata'] + Args: + object_type (str): object's type, either 'content', 'directory', 'release', + 'revision' or 'snapshot' + object_id (dict/bytes/str): object's dict representation or object + identifier + scheme_version (int): persistent identifier scheme version, defaults to 1 -PERSISTENT_IDENTIFIER_PARTS_SEP = ';' + Raises: + swh.model.exceptions.ValidationError: In case of invalid object type or id + + Returns: + str: the persistent identifier + + """ # noqa + pid = PersistentId(scheme_version=scheme_version, object_type=object_type, + object_id=object_id) + return str(pid) def parse_persistent_identifier(persistent_id): @@ -670,23 +722,17 @@ def parse_persistent_identifier(persistent_id): persistent_id (str): A persistent identifier Raises: - ValidationError (class) in case of: + swh.model.exceptions.ValidationError: in case of: - missing mandatory values (4) - invalid namespace supplied - invalid version supplied - invalid type supplied - missing hash - invalid hash identifier supplied + * missing mandatory values (4) + * invalid namespace supplied + * invalid version supplied + * invalid type supplied + * missing hash + * invalid hash identifier supplied Returns: - dict: dict with keys : - - * namespace, holding str value - * scheme_version, holding str value - * object_type, holding str value - * object_id, holding str value - * metadata, holding dict value + PersistentId: a named tuple holding the parsing result """ # <pid>;<contextual-information> @@ -695,7 +741,7 @@ def parse_persistent_identifier(persistent_id): if len(pid_data) != 4: raise ValidationError( - 'Wrong format: There should be 4 mandatory parameters') + 'Wrong format: There should be 4 mandatory values') # Checking for parsing errors _ns, _version, _type, _id = pid_data @@ -707,12 +753,19 @@ def parse_persistent_identifier(persistent_id): raise ValidationError( 'Wrong format: Supported version is 1') + pid_data[1] = int(pid_data[1]) + expected_types = PERSISTENT_IDENTIFIER_TYPES if _type not in expected_types: raise ValidationError( 'Wrong format: Supported types are %s' % ( ', '.join(expected_types))) + for otype, data in _object_type_map.items(): + if _type == data['short_name']: + pid_data[2] = otype + break + if not _id: raise ValidationError( 'Wrong format: Identifier should be present') @@ -732,4 +785,4 @@ def parse_persistent_identifier(persistent_id): msg = 'Contextual data is badly formatted, form key=val expected' raise ValidationError(msg) pid_data.append(persistent_id_metadata) - return dict(zip(PERSISTENT_IDENTIFIER_KEYS, pid_data)) + return PersistentId._make(pid_data) diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py index 7daf8e40ac25b68d4cf05cb0ebc455e63bee26b2..01c1153c6ac40e689a71aa2432af13e5267e6c1d 100644 --- a/swh/model/tests/test_identifiers.py +++ b/swh/model/tests/test_identifiers.py @@ -14,6 +14,7 @@ from swh.model import hashutil, identifiers from swh.model.exceptions import ValidationError from swh.model.identifiers import SNAPSHOT, RELEASE, REVISION, DIRECTORY from swh.model.identifiers import CONTENT, PERSISTENT_IDENTIFIER_TYPES +from swh.model.identifiers import PersistentId class UtilityFunctionsIdentifier(unittest.TestCase): @@ -831,58 +832,58 @@ class SnapshotIdentifier(unittest.TestCase): def test_parse_persistent_identifier(self): for pid, _type, _version, _hash in [ - ('swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2', 'cnt', - '1', '94a9ed024d3859793618152ea559a168bbcbb5e2'), - ('swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505', 'dir', - '1', 'd198bc9d7a6bcf6db04f476d29314f157507d505'), - ('swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d', 'rev', - '1', '309cf2674ee7a0749978cf8265ab91a60aea0f7d'), - ('swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f', 'rel', - '1', '22ece559cc7cc2364edc5e5593d63ae8bd229f9f'), - ('swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453', 'snp', - '1', 'c7c108084bc0bf3d81436bf980b46e98bd338453'), + ('swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2', + CONTENT, 1, '94a9ed024d3859793618152ea559a168bbcbb5e2'), + ('swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505', + DIRECTORY, 1, 'd198bc9d7a6bcf6db04f476d29314f157507d505'), + ('swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d', + REVISION, 1, '309cf2674ee7a0749978cf8265ab91a60aea0f7d'), + ('swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f', + RELEASE, 1, '22ece559cc7cc2364edc5e5593d63ae8bd229f9f'), + ('swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453', + SNAPSHOT, 1, 'c7c108084bc0bf3d81436bf980b46e98bd338453'), ]: - expected_result = { - 'namespace': 'swh', - 'scheme_version': _version, - 'object_type': _type, - 'object_id': _hash, - 'metadata': {} - } + expected_result = PersistentId( + namespace='swh', + scheme_version=_version, + object_type=_type, + object_id=_hash, + metadata={} + ) actual_result = identifiers.parse_persistent_identifier(pid) self.assertEquals(actual_result, expected_result) for pid, _type, _version, _hash, _metadata in [ ('swh:1:cnt:9c95815d9e9d91b8dae8e05d8bbc696fe19f796b;lines=1-18;origin=https://github.com/python/cpython', # noqa - 'cnt', '1', '9c95815d9e9d91b8dae8e05d8bbc696fe19f796b', + CONTENT, 1, '9c95815d9e9d91b8dae8e05d8bbc696fe19f796b', { 'lines': '1-18', 'origin': 'https://github.com/python/cpython' }), ('swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;origin=deb://Debian/packages/linuxdoc-tools', # noqa - 'dir', '1', '0b6959356d30f1a4e9b7f6bca59b9a336464c03d', + DIRECTORY, 1, '0b6959356d30f1a4e9b7f6bca59b9a336464c03d', { 'origin': 'deb://Debian/packages/linuxdoc-tools' }) ]: - expected_result = { - 'namespace': 'swh', - 'scheme_version': _version, - 'object_type': _type, - 'object_id': _hash, - 'metadata': _metadata - } + expected_result = PersistentId( + namespace='swh', + scheme_version=_version, + object_type=_type, + object_id=_hash, + metadata=_metadata + ) actual_result = identifiers.parse_persistent_identifier(pid) self.assertEquals(actual_result, expected_result) def test_parse_persistent_identifier_parsing_error(self): for pid, _error in [ ('swh:1:cnt', - 'Wrong format: There should be 4 mandatory parameters'), + 'Wrong format: There should be 4 mandatory values'), ('swh:1:', - 'Wrong format: There should be 4 mandatory parameters'), + 'Wrong format: There should be 4 mandatory values'), ('swh:', - 'Wrong format: There should be 4 mandatory parameters'), + 'Wrong format: There should be 4 mandatory values'), ('swh:1:cnt:', 'Wrong format: Identifier should be present'), ('foo:1:cnt:abc8bc9d7a6bcf6db04f476d29314f157507d505', diff --git a/version.txt b/version.txt index 8af3930e8c604e6de47bdf78d405936146a71cf7..23c319733ec7a3769d649c9bf9a787778e450346 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.24-0-g5eb055d \ No newline at end of file +v0.0.25-0-gedcd365 \ No newline at end of file