diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py index eef7710698e0ec499a62e85e1a507a9371f8fcf5..138ca33a1949dd9d56f6492e1ec987eccd1762fb 100644 --- a/swh/model/identifiers.py +++ b/swh/model/identifiers.py @@ -637,12 +637,21 @@ def persistent_identifier(type, object, version=1): return 'swh:%s:%s:%s' % (version, o['short_name'], _hash) +PERSISTENT_IDENTIFIER_TYPES = ['snp', 'rel', 'rev', 'dir', 'cnt'] + PERSISTENT_IDENTIFIER_KEYS = [ 'namespace', 'scheme_version', 'object_type', 'object_id', 'metadata'] PERSISTENT_IDENTIFIER_PARTS_SEP = ';' +class SWHMalformedIdentifierException(ValueError): + """Exception when a string representing an identifier is badly formatted. + + """ + pass + + def parse_persistent_identifier(persistent_id): """Parse swh's :ref:`persistent-identifiers` scheme. @@ -659,8 +668,34 @@ def parse_persistent_identifier(persistent_id): * metadata, holding dict value """ + # <pid>;<contextual-information> persistent_id_parts = persistent_id.split(PERSISTENT_IDENTIFIER_PARTS_SEP) - data = persistent_id_parts.pop(0).split(':') + pid_data = persistent_id_parts.pop(0).split(':') + + if len(pid_data) != 4: + raise SWHMalformedIdentifierException( + 'Wrong format: There should be 4 mandatory parameters') + + # Checking for parsing errors + _ns, _version, _type, _id = pid_data + if _ns != 'swh': + raise SWHMalformedIdentifierException( + 'Wrong format: Supported namespace is \'swh\'') + + if _version != '1': + raise SWHMalformedIdentifierException( + 'Wrong format: Supported version is 1') + + expected_types = PERSISTENT_IDENTIFIER_TYPES + if _type not in expected_types: + raise SWHMalformedIdentifierException( + 'Wrong format: Supported types are %s' % ( + ', '.join(expected_types))) + + if not _id: + raise SWHMalformedIdentifierException( + 'Wrong format: Identifier should be present') + persistent_id_metadata = {} for part in persistent_id_parts: try: @@ -668,5 +703,5 @@ def parse_persistent_identifier(persistent_id): persistent_id_metadata[key] = val except Exception: pass - data.append(persistent_id_metadata) - return dict(zip(PERSISTENT_IDENTIFIER_KEYS, data)) + pid_data.append(persistent_id_metadata) + return dict(zip(PERSISTENT_IDENTIFIER_KEYS, pid_data)) diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py index afe943b6de8699ffb1775c8df135253c3a1d1a42..8282aebb0c4adc0655dcfdf1c727ed2409d08bb9 100644 --- a/swh/model/tests/test_identifiers.py +++ b/swh/model/tests/test_identifiers.py @@ -804,8 +804,8 @@ class SnapshotIdentifier(unittest.TestCase): for pid, _type, _version, _hash in [ ('swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2', 'cnt', '1', '94a9ed024d3859793618152ea559a168bbcbb5e2'), - ('swh:2:dir:d198bc9d7a6bcf6db04f476d29314f157507d505', 'dir', - '2', 'd198bc9d7a6bcf6db04f476d29314f157507d505'), + ('swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505', 'dir', + '1', 'd198bc9d7a6bcf6db04f476d29314f157507d505'), ('swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d', 'rev', '1', '309cf2674ee7a0749978cf8265ab91a60aea0f7d'), ('swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f', 'rel', @@ -847,3 +847,27 @@ class SnapshotIdentifier(unittest.TestCase): } actual_result = identifiers.parse_persistent_identifier(pid) self.assertEquals(actual_result, expected_result) + + def test_parse_persistent_identifier_parsing_error(self): + from swh.model.identifiers import (SWHMalformedIdentifierException, + PERSISTENT_IDENTIFIER_TYPES) + for pid, _error in [ + ('swh:1:cnt', + 'Wrong format: There should be 4 mandatory parameters'), + ('swh:1:', + 'Wrong format: There should be 4 mandatory parameters'), + ('swh:', + 'Wrong format: There should be 4 mandatory parameters'), + ('foo:1:cnt:abc8bc9d7a6bcf6db04f476d29314f157507d505', + 'Wrong format: Supported namespace is \'swh\''), + ('swh:2:dir:def8bc9d7a6bcf6db04f476d29314f157507d505', + 'Wrong format: Supported version is 1'), + ('swh:1:foo:fed8bc9d7a6bcf6db04f476d29314f157507d505', + 'Wrong format: Supported types are %s' % ( + ', '.join(PERSISTENT_IDENTIFIER_TYPES))), + ('swh:1:cnt:', + 'Wrong format: Identifier should be present'), + ]: + with self.assertRaisesRegex( + SWHMalformedIdentifierException, _error): + identifiers.parse_persistent_identifier(pid)