diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py index 934ace85b2bc1a77a0d1fd93ee9e0db5892a40ef..f8a87a1351b9ad5692c1b8ecd448e9e0e13ad8b1 100644 --- a/swh/model/identifiers.py +++ b/swh/model/identifiers.py @@ -7,6 +7,8 @@ import binascii import datetime from functools import lru_cache +from .exceptions import ValidationError +from .fields.hashes import validate_sha1 from .hashutil import hash_data, hash_git_data, DEFAULT_ALGORITHMS from .hashutil import hash_to_hex @@ -696,6 +698,12 @@ def parse_persistent_identifier(persistent_id): raise SWHMalformedIdentifierException( 'Wrong format: Identifier should be present') + try: + validate_sha1(_id) + except ValidationError: + raise SWHMalformedIdentifierException( + 'Wrong format: Identifier should be a valid hash') + persistent_id_metadata = {} for part in persistent_id_parts: try: diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py index 0cab5003516531d590453bc9dfda4d48a2ec65cb..b191676a92f30b837eabfabd1460cb217f426854 100644 --- a/swh/model/tests/test_identifiers.py +++ b/swh/model/tests/test_identifiers.py @@ -867,7 +867,11 @@ class SnapshotIdentifier(unittest.TestCase): ', '.join(PERSISTENT_IDENTIFIER_TYPES))), ('swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;invalid;' 'malformed', - 'Contextual data is badly formatted, form key=val expected') + 'Contextual data is badly formatted, form key=val expected'), + ('swh:1:snp:gh6959356d30f1a4e9b7f6bca59b9a336464c03d', + 'Wrong format: Identifier should be a valid hash'), + ('swh:1:snp:foo', + 'Wrong format: Identifier should be a valid hash') ]: with self.assertRaisesRegex( SWHMalformedIdentifierException, _error):