diff --git a/PKG-INFO b/PKG-INFO index 6922571d85582fde7a703d8af0ed5a82512e17f8..413f13191826c23a944249c1a139f8467cf1d229 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: swh.model -Version: 1.0.0 +Version: 1.0.1 Summary: Software Heritage data model Home-page: https://forge.softwareheritage.org/diffusion/DMOD/ Author: Software Heritage developers diff --git a/swh.model.egg-info/PKG-INFO b/swh.model.egg-info/PKG-INFO index 6922571d85582fde7a703d8af0ed5a82512e17f8..413f13191826c23a944249c1a139f8467cf1d229 100644 --- a/swh.model.egg-info/PKG-INFO +++ b/swh.model.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: swh.model -Version: 1.0.0 +Version: 1.0.1 Summary: Software Heritage data model Home-page: https://forge.softwareheritage.org/diffusion/DMOD/ Author: Software Heritage developers diff --git a/swh/model/cli.py b/swh/model/cli.py index 6e69d1d861fd68eca64730b9a0b3fb83061c1bd7..8ac925079d9442cfa0b75afb13eae4bb43b019fe 100644 --- a/swh/model/cli.py +++ b/swh/model/cli.py @@ -5,14 +5,14 @@ import os import sys -from typing import List +from typing import Dict, List, Optional # WARNING: do not import unnecessary things here to keep cli startup time under # control import click from swh.core.cli import swh as swh_cli_group -from swh.model.identifiers import SWHID +from swh.model.identifiers import CoreSWHID, ObjectType CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"]) @@ -26,45 +26,48 @@ _DULWICH_TYPES = { } -class SWHIDParamType(click.ParamType): - """Click argument that accepts SWHID and return them as - :class:`swh.model.identifiers.SWHID` instances """ +class CoreSWHIDParamType(click.ParamType): + """Click argument that accepts a core SWHID and returns them as + :class:`swh.model.identifiers.CoreSWHID` instances """ name = "SWHID" - def convert(self, value, param, ctx) -> SWHID: + def convert(self, value, param, ctx) -> CoreSWHID: from swh.model.exceptions import ValidationError - from swh.model.identifiers import parse_swhid try: - return parse_swhid(value) + return CoreSWHID.from_string(value) except ValidationError as e: - self.fail(f'"{value}" is not a valid SWHID: {e}', param, ctx) + self.fail(f'"{value}" is not a valid core SWHID: {e}', param, ctx) -def swhid_of_file(path): +def swhid_of_file(path) -> CoreSWHID: from swh.model.from_disk import Content - from swh.model.identifiers import CONTENT, swhid + from swh.model.hashutil import hash_to_bytes object = Content.from_file(path=path).get_data() - return swhid(CONTENT, object) + return CoreSWHID( + object_type=ObjectType.CONTENT, object_id=hash_to_bytes(object["sha1_git"]) + ) -def swhid_of_file_content(data): +def swhid_of_file_content(data) -> CoreSWHID: from swh.model.from_disk import Content - from swh.model.identifiers import CONTENT, swhid + from swh.model.hashutil import hash_to_bytes object = Content.from_bytes(mode=644, data=data).get_data() - return swhid(CONTENT, object) + return CoreSWHID( + object_type=ObjectType.CONTENT, object_id=hash_to_bytes(object["sha1_git"]) + ) -def swhid_of_dir(path: bytes, exclude_patterns: List[bytes] = None) -> str: +def swhid_of_dir(path: bytes, exclude_patterns: List[bytes] = None) -> CoreSWHID: from swh.model.from_disk import ( Directory, accept_all_directories, ignore_directories_patterns, ) - from swh.model.identifiers import DIRECTORY, swhid + from swh.model.hashutil import hash_to_bytes dir_filter = ( ignore_directories_patterns(path, exclude_patterns) @@ -73,24 +76,34 @@ def swhid_of_dir(path: bytes, exclude_patterns: List[bytes] = None) -> str: ) object = Directory.from_disk(path=path, dir_filter=dir_filter).get_data() - return swhid(DIRECTORY, object) + return CoreSWHID( + object_type=ObjectType.DIRECTORY, object_id=hash_to_bytes(object["id"]) + ) def swhid_of_origin(url): - from swh.model.identifiers import SWHID, origin_identifier + from swh.model.hashutil import hash_to_bytes + from swh.model.identifiers import ( + ExtendedObjectType, + ExtendedSWHID, + origin_identifier, + ) - return str(SWHID(object_type="origin", object_id=origin_identifier({"url": url}))) + return ExtendedSWHID( + object_type=ExtendedObjectType.ORIGIN, + object_id=hash_to_bytes(origin_identifier({"url": url})), + ) -def swhid_of_git_repo(path): +def swhid_of_git_repo(path) -> CoreSWHID: import dulwich.repo from swh.model import hashutil - from swh.model.identifiers import SWHID, snapshot_identifier + from swh.model.identifiers import snapshot_identifier repo = dulwich.repo.Repo(path) - branches = {} + branches: Dict[bytes, Optional[Dict]] = {} for ref, target in repo.refs.as_dict().items(): obj = repo[target] if obj: @@ -109,10 +122,13 @@ def swhid_of_git_repo(path): snapshot = {"branches": branches} - return str(SWHID(object_type="snapshot", object_id=snapshot_identifier(snapshot))) + return CoreSWHID( + object_type=ObjectType.SNAPSHOT, + object_id=hashutil.hash_to_bytes(snapshot_identifier(snapshot)), + ) -def identify_object(obj_type, follow_symlinks, exclude_patterns, obj): +def identify_object(obj_type, follow_symlinks, exclude_patterns, obj) -> str: from urllib.parse import urlparse if obj_type == "auto": @@ -129,31 +145,29 @@ def identify_object(obj_type, follow_symlinks, exclude_patterns, obj): except ValueError: raise click.BadParameter("cannot detect object type for %s" % obj) - swhid = None - if obj == "-": content = sys.stdin.buffer.read() - swhid = swhid_of_file_content(content) + swhid = str(swhid_of_file_content(content)) elif obj_type in ["content", "directory"]: path = obj.encode(sys.getfilesystemencoding()) if follow_symlinks and os.path.islink(obj): path = os.path.realpath(obj) if obj_type == "content": - swhid = swhid_of_file(path) + swhid = str(swhid_of_file(path)) elif obj_type == "directory": - swhid = swhid_of_dir( - path, [pattern.encode() for pattern in exclude_patterns] + swhid = str( + swhid_of_dir(path, [pattern.encode() for pattern in exclude_patterns]) ) elif obj_type == "origin": - swhid = swhid_of_origin(obj) + swhid = str(swhid_of_origin(obj)) elif obj_type == "snapshot": - swhid = swhid_of_git_repo(obj) + swhid = str(swhid_of_git_repo(obj)) else: # shouldn't happen, due to option validation raise click.BadParameter("invalid object type: " + obj_type) # note: we return original obj instead of path here, to preserve user-given # file name in output - return (obj, swhid) + return swhid @swh_cli_group.command(context_settings=CONTEXT_SETTINGS) @@ -191,7 +205,7 @@ def identify_object(obj_type, follow_symlinks, exclude_patterns, obj): "--verify", "-v", metavar="SWHID", - type=SWHIDParamType(), + type=CoreSWHIDParamType(), help="reference identifier to be compared with computed one", ) @click.argument("objects", nargs=-1, required=True) @@ -232,8 +246,12 @@ def identify( if verify and len(objects) != 1: raise click.BadParameter("verification requires a single object") - results = map( - partial(identify_object, obj_type, follow_symlinks, exclude_patterns), objects, + results = zip( + objects, + map( + partial(identify_object, obj_type, follow_symlinks, exclude_patterns), + objects, + ), ) if verify: diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py index 44daee52915162b633962335652caf79d803182f..a07b047e23649fbb4fa248935dd5d83d1ce0af7f 100644 --- a/swh/model/identifiers.py +++ b/swh/model/identifiers.py @@ -24,14 +24,11 @@ from typing import ( Union, ) import urllib.parse -import warnings import attr from attrs_strict import type_validator -from .collections import ImmutableDict from .exceptions import ValidationError -from .fields.hashes import validate_sha1 from .hashutil import MultiHash, hash_git_data, hash_to_bytes, hash_to_hex @@ -71,6 +68,7 @@ DIRECTORY = "directory" CONTENT = "content" RAW_EXTRINSIC_METADATA = "raw_extrinsic_metadata" + SWHID_NAMESPACE = "swh" SWHID_VERSION = 1 SWHID_TYPES = ["snp", "rel", "rev", "dir", "cnt"] @@ -726,27 +724,6 @@ def origin_identifier(origin): return hashlib.sha1(origin["url"].encode("utf-8")).hexdigest() -_object_type_map = { - ORIGIN: {"short_name": "ori", "key_id": "id"}, - SNAPSHOT: {"short_name": "snp", "key_id": "id"}, - RELEASE: {"short_name": "rel", "key_id": "id"}, - REVISION: {"short_name": "rev", "key_id": "id"}, - DIRECTORY: {"short_name": "dir", "key_id": "id"}, - CONTENT: {"short_name": "cnt", "key_id": "sha1_git"}, - RAW_EXTRINSIC_METADATA: {"short_name": "emd", "key_id": "id"}, -} - -_swhid_type_map = { - "ori": ORIGIN, - "snp": SNAPSHOT, - "rel": RELEASE, - "rev": REVISION, - "dir": DIRECTORY, - "cnt": CONTENT, - "emd": RAW_EXTRINSIC_METADATA, -} - - # type of the "object_type" attribute of the SWHID class; either # ObjectType or ExtendedObjectType _TObjectType = TypeVar("_TObjectType", ObjectType, ExtendedObjectType) @@ -1094,150 +1071,6 @@ class ExtendedSWHID(_BaseSWHID[ExtendedObjectType]): """the type of object the identifier points to""" -@attr.s(frozen=True) -class SWHID: - """ - Deprecated alternative to QualifiedSWHID. - - Args: - namespace (str): the namespace of the identifier, defaults to ``swh`` - scheme_version (int): the scheme version of the identifier, - defaults to 1 - object_type (str): the type of object the identifier points to, - either ``content``, ``directory``, ``release``, ``revision`` or ``snapshot`` - object_id (str): object's identifier - metadata (dict): optional dict filled with metadata related to - pointed object - - Raises: - swh.model.exceptions.ValidationError: In case of invalid object type or id - - Once created, it contains the following attributes: - - Attributes: - namespace (str): the namespace of the identifier - scheme_version (int): the scheme version of the identifier - object_type (str): the type of object the identifier points to - object_id (str): hexadecimal representation of the object hash - metadata (dict): metadata related to the pointed object - - To get the raw SWHID string from an instance of this named tuple, - use the :func:`str` function:: - - swhid = SWHID( - object_type='content', - object_id='8ff44f081d43176474b267de5451f2c2e88089d0' - ) - swhid_str = str(swhid) - # 'swh:1:cnt:8ff44f081d43176474b267de5451f2c2e88089d0' - """ - - namespace = attr.ib(type=str, default=SWHID_NAMESPACE) - scheme_version = attr.ib(type=int, default=SWHID_VERSION) - object_type = attr.ib(type=str, default="") - object_id = attr.ib(type=str, converter=hash_to_hex, default="") # type: ignore - metadata = attr.ib( - type=ImmutableDict[str, Any], converter=ImmutableDict, default=ImmutableDict() - ) - - def __attrs_post_init__(self): - warnings.warn( - "swh.model.identifiers.SWHID is deprecated; " - "use swh.model.identifiers.QualifiedSWHID instead.", - DeprecationWarning, - ) - - @namespace.validator - def check_namespace(self, attribute, value): - if value != SWHID_NAMESPACE: - raise ValidationError( - "Invalid SWHID: invalid namespace: %(namespace)s", - params={"namespace": value}, - ) - - @scheme_version.validator - def check_scheme_version(self, attribute, value): - if value != SWHID_VERSION: - raise ValidationError( - "Invalid SWHID: invalid version: %(version)s", params={"version": value} - ) - - @object_type.validator - def check_object_type(self, attribute, value): - if value not in _object_type_map: - raise ValidationError( - "Invalid SWHID: invalid type: %(object_type)s)", - params={"object_type": value}, - ) - - @object_id.validator - def check_object_id(self, attribute, value): - try: - validate_sha1(value) # can raise if invalid hash - except ValidationError: - raise ValidationError( - "Invalid SWHID: invalid checksum: %(object_id)s", - params={"object_id": value}, - ) from None - - @metadata.validator - def check_qualifiers(self, attribute, value): - for k in value: - if k not in SWHID_QUALIFIERS: - raise ValidationError( - "Invalid SWHID: unknown qualifier: %(qualifier)s", - params={"qualifier": k}, - ) - - def to_dict(self) -> Dict[str, Any]: - return attr.asdict(self) - - def __str__(self) -> str: - o = _object_type_map.get(self.object_type) - assert o - swhid = SWHID_SEP.join( - [self.namespace, str(self.scheme_version), o["short_name"], self.object_id] - ) - if self.metadata: - for k, v in self.metadata.items(): - swhid += "%s%s=%s" % (SWHID_CTXT_SEP, k, v) - return swhid - - -def swhid( - object_type: str, - object_id: Union[str, Dict[str, Any]], - scheme_version: int = 1, - metadata: Union[ImmutableDict[str, Any], Dict[str, Any]] = ImmutableDict(), -) -> str: - """Compute :ref:`persistent-identifiers` - - Args: - object_type: object's type, either ``content``, ``directory``, - ``release``, ``revision`` or ``snapshot`` - object_id: object's identifier - scheme_version: SWHID scheme version, defaults to 1 - metadata: metadata related to the pointed object - - Raises: - swh.model.exceptions.ValidationError: In case of invalid object type or id - - Returns: - the SWHID of the object - - """ - if isinstance(object_id, dict): - o = _object_type_map[object_type] - object_id = object_id[o["key_id"]] - swhid = SWHID( - scheme_version=scheme_version, - object_type=object_type, - object_id=object_id, - metadata=metadata, # type: ignore # mypy can't properly unify types - ) - return str(swhid) - - def _parse_swhid(swhid: str) -> Dict[str, Any]: """Parse a Software Heritage identifier (SWHID) from string (see: :ref:`persistent-identifiers`.) @@ -1276,24 +1109,3 @@ def _parse_swhid(swhid: str) -> Dict[str, Any]: parts["scheme_version"] = int(parts["scheme_version"]) parts["object_id"] = hash_to_bytes(parts["object_id"]) return parts - - -def parse_swhid(swhid: str) -> SWHID: - """Parse a Software Heritage identifier (SWHID) from string (see: - :ref:`persistent-identifiers`.) - - Args: - swhid (str): A persistent identifier - - Raises: - swh.model.exceptions.ValidationError: if passed string is not a valid SWHID - - """ - parts = _parse_swhid(swhid) - return SWHID( - parts["namespace"], - parts["scheme_version"], - _swhid_type_map[parts["object_type"]], - hash_to_hex(parts["object_id"]), - parts["qualifiers"], - ) diff --git a/swh/model/tests/test_cli.py b/swh/model/tests/test_cli.py index 3d86ede6ba648957aca108cc310d7ffeaf897ac5..9a006607f15d96be1917ca8a2cbc12aeb763f5f8 100644 --- a/swh/model/tests/test_cli.py +++ b/swh/model/tests/test_cli.py @@ -23,7 +23,7 @@ class TestIdentify(DataMixin, unittest.TestCase): self.runner = CliRunner() def assertSWHID(self, result, swhid): - self.assertEqual(result.exit_code, 0) + self.assertEqual(result.exit_code, 0, result.output) self.assertEqual(result.output.split()[0], swhid) def test_no_args(self): @@ -127,7 +127,7 @@ class TestIdentify(DataMixin, unittest.TestCase): def test_auto_origin(self): """automatic object type detection: origin""" result = self.runner.invoke(cli.identify, ["https://github.com/torvalds/linux"]) - self.assertEqual(result.exit_code, 0) + self.assertEqual(result.exit_code, 0, result.output) self.assertRegex(result.output, r"^swh:\d+:ori:") def test_verify_content(self): @@ -139,7 +139,7 @@ class TestIdentify(DataMixin, unittest.TestCase): # match path = os.path.join(self.tmpdir_name, filename) result = self.runner.invoke(cli.identify, ["--verify", expected_id, path]) - self.assertEqual(result.exit_code, 0) + self.assertEqual(result.exit_code, 0, result.output) # mismatch with open(path, "a") as f: diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py index a3a5295f5bca6bdd6c70675cb9b9add40f542ea3..38d7e357d2fd4260568fbe4d7c00e564e8bff519 100644 --- a/swh/model/tests/test_identifiers.py +++ b/swh/model/tests/test_identifiers.py @@ -16,12 +16,6 @@ from swh.model import hashutil, identifiers from swh.model.exceptions import ValidationError from swh.model.hashutil import hash_to_bytes as _x from swh.model.identifiers import ( - CONTENT, - DIRECTORY, - RELEASE, - REVISION, - SNAPSHOT, - SWHID, SWHID_QUALIFIERS, CoreSWHID, ExtendedObjectType, @@ -891,317 +885,6 @@ def test_normalize_timestamp_dict_invalid_timestamp(dict_input): normalize_timestamp(dict_input) -class TestSwhid(unittest.TestCase): - @pytest.mark.filterwarnings("ignore:.*SWHID.*:DeprecationWarning") - def test_swhid(self): - _snapshot_id = _x("c7c108084bc0bf3d81436bf980b46e98bd338453") - _release_id = "22ece559cc7cc2364edc5e5593d63ae8bd229f9f" - _revision_id = "309cf2674ee7a0749978cf8265ab91a60aea0f7d" - _directory_id = "d198bc9d7a6bcf6db04f476d29314f157507d505" - _content_id = "94a9ed024d3859793618152ea559a168bbcbb5e2" - _snapshot = {"id": _snapshot_id} - _release = {"id": _release_id} - _revision = {"id": _revision_id} - _directory = {"id": _directory_id} - _content = {"sha1_git": _content_id} - - for full_type, _hash, expected_swhid, version, _meta in [ - ( - SNAPSHOT, - _snapshot_id, - "swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453", - None, - {}, - ), - ( - RELEASE, - _release_id, - "swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f", - 1, - {}, - ), - ( - REVISION, - _revision_id, - "swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d", - None, - {}, - ), - ( - DIRECTORY, - _directory_id, - "swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505", - None, - {}, - ), - ( - CONTENT, - _content_id, - "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2", - 1, - {}, - ), - ( - SNAPSHOT, - _snapshot, - "swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453", - None, - {}, - ), - ( - RELEASE, - _release, - "swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f", - 1, - {}, - ), - ( - REVISION, - _revision, - "swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d", - None, - {}, - ), - ( - DIRECTORY, - _directory, - "swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505", - None, - {}, - ), - ( - CONTENT, - _content, - "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2", - 1, - {}, - ), - ( - CONTENT, - _content, - "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2;origin=1", - 1, - {"origin": "1"}, - ), - ]: - if version: - actual_value = identifiers.swhid( - full_type, _hash, version, metadata=_meta - ) - else: - actual_value = identifiers.swhid(full_type, _hash, metadata=_meta) - - self.assertEqual(actual_value, expected_swhid) - - def test_swhid_wrong_input(self): - _snapshot_id = "notahash4bc0bf3d81436bf980b46e98bd338453" - _snapshot = {"id": _snapshot_id} - - for _type, _hash in [ - (SNAPSHOT, _snapshot_id), - (SNAPSHOT, _snapshot), - ("lines", "42"), - ]: - with self.assertRaises(ValidationError): - identifiers.swhid(_type, _hash) - - @pytest.mark.filterwarnings("ignore:.*SWHID.*:DeprecationWarning") - def test_parse_swhid(self): - for swhid, _type, _version, _hash in [ - ( - "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2", - CONTENT, - 1, - "94a9ed024d3859793618152ea559a168bbcbb5e2", - ), - ( - "swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505", - DIRECTORY, - 1, - "d198bc9d7a6bcf6db04f476d29314f157507d505", - ), - ( - "swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d", - REVISION, - 1, - "309cf2674ee7a0749978cf8265ab91a60aea0f7d", - ), - ( - "swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f", - RELEASE, - 1, - "22ece559cc7cc2364edc5e5593d63ae8bd229f9f", - ), - ( - "swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453", - SNAPSHOT, - 1, - "c7c108084bc0bf3d81436bf980b46e98bd338453", - ), - ]: - with pytest.warns(DeprecationWarning): - expected_result = SWHID( - namespace="swh", - scheme_version=_version, - object_type=_type, - object_id=_hash, - metadata={}, - ) - actual_result = identifiers.parse_swhid(swhid) - self.assertEqual(actual_result, expected_result) - self.assertEqual(str(expected_result), swhid) - - for swhid, _type, _version, _hash, _metadata in [ - ( - "swh:1:cnt:9c95815d9e9d91b8dae8e05d8bbc696fe19f796b;lines=1-18;origin=https://github.com/python/cpython", # noqa - CONTENT, - 1, - "9c95815d9e9d91b8dae8e05d8bbc696fe19f796b", - {"lines": "1-18", "origin": "https://github.com/python/cpython"}, - ), - ( - "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;origin=deb://Debian/packages/linuxdoc-tools", # noqa - DIRECTORY, - 1, - "0b6959356d30f1a4e9b7f6bca59b9a336464c03d", - {"origin": "deb://Debian/packages/linuxdoc-tools"}, - ), - ]: - with pytest.warns(DeprecationWarning): - expected_result = SWHID( - namespace="swh", - scheme_version=_version, - object_type=_type, - object_id=_hash, - metadata=_metadata, - ) - actual_result = identifiers.parse_swhid(swhid) - self.assertEqual(actual_result, expected_result) - self.assertEqual( - expected_result.to_dict(), - { - "namespace": "swh", - "scheme_version": _version, - "object_type": _type, - "object_id": _hash, - "metadata": _metadata, - }, - ) - self.assertEqual(str(expected_result), swhid) - - -@pytest.mark.parametrize( - "invalid_swhid", - [ - "swh:1:cnt", - "swh:1:", - "swh:", - "swh:1:cnt:", - "foo:1:cnt:abc8bc9d7a6bcf6db04f476d29314f157507d505", - "swh:2:dir:def8bc9d7a6bcf6db04f476d29314f157507d505", - "swh:1:foo:fed8bc9d7a6bcf6db04f476d29314f157507d505", - "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;invalid;malformed", - "swh:1:snp:gh6959356d30f1a4e9b7f6bca59b9a336464c03d", - "swh:1:snp:foo", - # wrong qualifier: ori should be origin - "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;ori=something;anchor=1;visit=1;path=/", # noqa - # wrong qualifier: anc should be anchor - "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;origin=something;anc=1;visit=1;path=/", # noqa - # wrong qualifier: vis should be visit - "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;origin=something;anchor=1;vis=1;path=/", # noqa - # wrong qualifier: pa should be path - "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;origin=something;anchor=1;visit=1;pa=/", # noqa - # wrong qualifier: line should be lines - "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;line=10;origin=something;anchor=1;visit=1;path=/", # noqa - # wrong qualifier value: it contains space before of after - "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;origin= https://some-url", # noqa - "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;origin=something;anchor=some-anchor ", # noqa - "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;origin=something;anchor=some-anchor ;visit=1", # noqa - # invalid swhid: whitespaces - "swh :1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;ori=something;anchor=1;visit=1;path=/", # noqa - "swh: 1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;ori=something;anchor=1;visit=1;path=/", # noqa - "swh: 1: dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;ori=something;anchor=1;visit=1;path=/", # noqa - "swh:1: dir: 0b6959356d30f1a4e9b7f6bca59b9a336464c03d", - "swh:1: dir: 0b6959356d30f1a4e9b7f6bca59b9a336464c03d; origin=blah", - "swh:1: dir: 0b6959356d30f1a4e9b7f6bca59b9a336464c03d;lines=12", - # other whitespaces - "swh\t:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;lines=12", - "swh:1\n:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;lines=12", - "swh:1:\rdir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;lines=12", - "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d\f;lines=12", - "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;lines=12\v", - ], -) -def test_parse_swhid_parsing_error(invalid_swhid): - with pytest.raises(ValidationError): - identifiers.parse_swhid(invalid_swhid) - - -@pytest.mark.filterwarnings("ignore:.*SWHID.*:DeprecationWarning") -@pytest.mark.parametrize( - "ns,version,type,id", - [ - ("foo", 1, CONTENT, "abc8bc9d7a6bcf6db04f476d29314f157507d505",), - ("swh", 2, DIRECTORY, "def8bc9d7a6bcf6db04f476d29314f157507d505",), - ("swh", 1, "foo", "fed8bc9d7a6bcf6db04f476d29314f157507d505",), - ("swh", 1, SNAPSHOT, "gh6959356d30f1a4e9b7f6bca59b9a336464c03d",), - ], -) -def test_SWHID_class_validation_error(ns, version, type, id): - with pytest.raises(ValidationError): - SWHID( - namespace=ns, scheme_version=version, object_type=type, object_id=id, - ) - - -@pytest.mark.filterwarnings("ignore:.*SWHID.*:DeprecationWarning") -def test_SWHID_hash(): - object_id = "94a9ed024d3859793618152ea559a168bbcbb5e2" - - assert hash(SWHID(object_type="directory", object_id=object_id)) == hash( - SWHID(object_type="directory", object_id=object_id) - ) - - assert hash( - SWHID(object_type="directory", object_id=object_id, metadata=dummy_qualifiers,) - ) == hash( - SWHID(object_type="directory", object_id=object_id, metadata=dummy_qualifiers,) - ) - - # Different order of the dictionary, so the underlying order of the tuple in - # ImmutableDict is different. - assert hash( - SWHID( - object_type="directory", - object_id=object_id, - metadata={"origin": "https://example.com", "lines": "42"}, - ) - ) == hash( - SWHID( - object_type="directory", - object_id=object_id, - metadata={"lines": "42", "origin": "https://example.com"}, - ) - ) - - -@pytest.mark.filterwarnings("ignore:.*SWHID.*:DeprecationWarning") -def test_SWHID_eq(): - object_id = "94a9ed024d3859793618152ea559a168bbcbb5e2" - - assert SWHID(object_type="directory", object_id=object_id) == SWHID( - object_type="directory", object_id=object_id - ) - - assert SWHID( - object_type="directory", object_id=object_id, metadata=dummy_qualifiers, - ) == SWHID(object_type="directory", object_id=object_id, metadata=dummy_qualifiers,) - - assert SWHID( - object_type="directory", object_id=object_id, metadata=dummy_qualifiers, - ) == SWHID(object_type="directory", object_id=object_id, metadata=dummy_qualifiers,) - - # SWHIDs that are outright invalid, no matter the context INVALID_SWHIDS = [ "swh:1:cnt",