diff --git a/PKG-INFO b/PKG-INFO index 2b46f3e1f28938822858cc690385b9ea7132e04d..94a9185df5647d74c1f5238c6806e78f4ef7c63c 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: swh.model -Version: 0.12.0 +Version: 0.13.0 Summary: Software Heritage data model Home-page: https://forge.softwareheritage.org/diffusion/DMOD/ Author: Software Heritage developers diff --git a/docs/persistent-identifiers.rst b/docs/persistent-identifiers.rst index ea1781dd3cf1745d798811f7a10d958f74aa40cc..bb8078437632f3f0bb9b958aa0dfaf08a49c491c 100644 --- a/docs/persistent-identifiers.rst +++ b/docs/persistent-identifiers.rst @@ -299,7 +299,10 @@ it can be *computed from the object itself*, without having to rely on any third party. An implementation of SWHID that allows to do so locally is the `swh identify <https://docs.softwareheritage.org/devel/swh-model/cli.html>`_ tool, available from the `swh.model <https://pypi.org/project/swh.model/>`_ -Python package under the GPL license. +Python package under the GPL license. This package can be installed via the ``pip`` +package manager with the one liner ``pip3 install swh.model`` on any machine with +Python (at least version 3.7) and ``pip`` installed (on a Debian or Ubuntu system a simple ``apt install python3 python3-pip`` +will suffice, see `the general instructions <https://packaging.python.org/tutorials/installing-packages/>`_ for other platforms). SWHIDs are also automatically computed by Software Heritage for all archived objects as part of its archival activity, and can be looked up via the project diff --git a/swh.model.egg-info/PKG-INFO b/swh.model.egg-info/PKG-INFO index 2b46f3e1f28938822858cc690385b9ea7132e04d..94a9185df5647d74c1f5238c6806e78f4ef7c63c 100644 --- a/swh.model.egg-info/PKG-INFO +++ b/swh.model.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: swh.model -Version: 0.12.0 +Version: 0.13.0 Summary: Software Heritage data model Home-page: https://forge.softwareheritage.org/diffusion/DMOD/ Author: Software Heritage developers diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py index 98843a5a92ed2abc10d48e6330d0b0f3919c86f3..e4598eb3efde98f835c6672c564de70f76994a53 100644 --- a/swh/model/identifiers.py +++ b/swh/model/identifiers.py @@ -1,40 +1,88 @@ -# Copyright (C) 2015-2020 The Software Heritage developers +# Copyright (C) 2015-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +from __future__ import annotations + import binascii import datetime +import enum from functools import lru_cache import hashlib import re -from typing import Any, Dict, Iterable, List, Optional, Tuple, Union +from typing import ( + Any, + Dict, + Generic, + Iterable, + List, + Optional, + Tuple, + Type, + TypeVar, + Union, +) +import urllib.parse +import warnings import attr +from attrs_strict import type_validator from .collections import ImmutableDict from .exceptions import ValidationError from .fields.hashes import validate_sha1 -from .hashutil import MultiHash, hash_git_data, hash_to_hex +from .hashutil import MultiHash, hash_git_data, hash_to_bytes, hash_to_hex + + +class ObjectType(enum.Enum): + """Possible object types of a QualifiedSWHID or CoreSWHID. + + The values of each variant is what is used in the SWHID's string representation.""" + + SNAPSHOT = "snp" + REVISION = "rev" + RELEASE = "rel" + DIRECTORY = "dir" + CONTENT = "cnt" + + +class ExtendedObjectType(enum.Enum): + """Possible object types of an ExtendedSWHID. + + The variants are a superset of :cls:`ObjectType`'s""" + + SNAPSHOT = "snp" + REVISION = "rev" + RELEASE = "rel" + DIRECTORY = "dir" + CONTENT = "cnt" + ORIGIN = "ori" + RAW_EXTRINSIC_METADATA = "emd" + +# The following are deprecated aliases of the variants defined in ObjectType +# while transitioning from SWHID to QualifiedSWHID ORIGIN = "origin" SNAPSHOT = "snapshot" REVISION = "revision" RELEASE = "release" DIRECTORY = "directory" CONTENT = "content" +RAW_EXTRINSIC_METADATA = "raw_extrinsic_metadata" SWHID_NAMESPACE = "swh" SWHID_VERSION = 1 -SWHID_TYPES = ["ori", "snp", "rel", "rev", "dir", "cnt"] +SWHID_TYPES = ["snp", "rel", "rev", "dir", "cnt"] +EXTENDED_SWHID_TYPES = SWHID_TYPES + ["ori", "emd"] SWHID_SEP = ":" SWHID_CTXT_SEP = ";" SWHID_QUALIFIERS = {"origin", "anchor", "visit", "path", "lines"} SWHID_RE_RAW = ( - f"(?P<scheme>{SWHID_NAMESPACE})" - f"{SWHID_SEP}(?P<version>{SWHID_VERSION})" - f"{SWHID_SEP}(?P<object_type>{'|'.join(SWHID_TYPES)})" + f"(?P<namespace>{SWHID_NAMESPACE})" + f"{SWHID_SEP}(?P<scheme_version>{SWHID_VERSION})" + f"{SWHID_SEP}(?P<object_type>{'|'.join(EXTENDED_SWHID_TYPES)})" f"{SWHID_SEP}(?P<object_id>[0-9a-f]{{40}})" f"({SWHID_CTXT_SEP}(?P<qualifiers>\\S+))?" ) @@ -685,6 +733,7 @@ _object_type_map = { REVISION: {"short_name": "rev", "key_id": "id"}, DIRECTORY: {"short_name": "dir", "key_id": "id"}, CONTENT: {"short_name": "cnt", "key_id": "sha1_git"}, + RAW_EXTRINSIC_METADATA: {"short_name": "emd", "key_id": "id"}, } _swhid_type_map = { @@ -694,14 +743,350 @@ _swhid_type_map = { "rev": REVISION, "dir": DIRECTORY, "cnt": CONTENT, + "emd": RAW_EXTRINSIC_METADATA, } +# type of the "object_type" attribute of the SWHID class; either +# ObjectType or ExtendedObjectType +_TObjectType = TypeVar("_TObjectType", ObjectType, ExtendedObjectType) + +# the SWHID class itself (this is used so that X.from_string() can return X +# for all X subclass of _BaseSWHID) +_TSWHID = TypeVar("_TSWHID", bound="_BaseSWHID") + + +@attr.s(frozen=True, kw_only=True) +class _BaseSWHID(Generic[_TObjectType]): + """Common base class for CoreSWHID, QualifiedSWHID, and ExtendedSWHID. + + This is an "abstract" class and should not be instantiated directly; + it only exists to deduplicate code between these three SWHID classes.""" + + namespace = attr.ib(type=str, default=SWHID_NAMESPACE) + """the namespace of the identifier, defaults to ``swh``""" + + scheme_version = attr.ib(type=int, default=SWHID_VERSION) + """the scheme version of the identifier, defaults to 1""" + + # overridden by subclasses + object_type: _TObjectType + """the type of object the identifier points to""" + + object_id = attr.ib(type=bytes, validator=type_validator()) + """object's identifier""" + + @namespace.validator + def check_namespace(self, attribute, value): + if value != SWHID_NAMESPACE: + raise ValidationError( + "Invalid SWHID: invalid namespace: %(namespace)s", + params={"namespace": value}, + ) + + @scheme_version.validator + def check_scheme_version(self, attribute, value): + if value != SWHID_VERSION: + raise ValidationError( + "Invalid SWHID: invalid version: %(version)s", params={"version": value} + ) + + @object_id.validator + def check_object_id(self, attribute, value): + if len(value) != 20: + raise ValidationError( + "Invalid SWHID: invalid checksum: %(object_id)s", + params={"object_id": hash_to_hex(value)}, + ) + + def __str__(self) -> str: + return SWHID_SEP.join( + [ + self.namespace, + str(self.scheme_version), + self.object_type.value, + hash_to_hex(self.object_id), + ] + ) + + @classmethod + def from_string(cls: Type[_TSWHID], s: str) -> _TSWHID: + parts = _parse_swhid(s) + if parts.pop("qualifiers"): + raise ValidationError(f"{cls.__name__} does not support qualifiers.") + try: + return cls(**parts) + except ValueError as e: + raise ValidationError( + "ValueError: %(args)", params={"args": e.args} + ) from None + + +@attr.s(frozen=True, kw_only=True) +class CoreSWHID(_BaseSWHID[ObjectType]): + """ + Dataclass holding the relevant info associated to a SoftWare Heritage + persistent IDentifier (SWHID). + + Unlike `QualifiedSWHID`, it is restricted to core SWHIDs, ie. SWHIDs + with no qualifiers. + + Raises: + swh.model.exceptions.ValidationError: In case of invalid object type or id + + To get the raw SWHID string from an instance of this class, + use the :func:`str` function: + + >>> swhid = CoreSWHID( + ... object_type=ObjectType.CONTENT, + ... object_id=bytes.fromhex('8ff44f081d43176474b267de5451f2c2e88089d0'), + ... ) + >>> str(swhid) + 'swh:1:cnt:8ff44f081d43176474b267de5451f2c2e88089d0' + + And vice-versa with :meth:`CoreSWHID.from_string`: + + >>> swhid == CoreSWHID.from_string( + ... "swh:1:cnt:8ff44f081d43176474b267de5451f2c2e88089d0" + ... ) + True + """ + + object_type = attr.ib( + type=ObjectType, validator=type_validator(), converter=ObjectType + ) + """the type of object the identifier points to""" + + +def _parse_core_swhid(swhid: Union[str, CoreSWHID, None]) -> Optional[CoreSWHID]: + if swhid is None or isinstance(swhid, CoreSWHID): + return swhid + else: + return CoreSWHID.from_string(swhid) + + +def _parse_lines_qualifier( + lines: Union[str, Tuple[int, Optional[int]], None] +) -> Optional[Tuple[int, Optional[int]]]: + try: + if lines is None or isinstance(lines, tuple): + return lines + elif "-" in lines: + (from_, to) = lines.split("-", 2) + return (int(from_), int(to)) + else: + return (int(lines), None) + except ValueError: + raise ValidationError( + "Invalid format for the lines qualifier: %(lines)", params={"lines": lines} + ) + + +def _parse_path_qualifier(path: Union[str, bytes, None]) -> Optional[bytes]: + if path is None or isinstance(path, bytes): + return path + else: + return urllib.parse.unquote_to_bytes(path) + + +@attr.s(frozen=True, kw_only=True) +class QualifiedSWHID(_BaseSWHID[ObjectType]): + """ + Dataclass holding the relevant info associated to a SoftWare Heritage + persistent IDentifier (SWHID) + + Raises: + swh.model.exceptions.ValidationError: In case of invalid object type or id + + To get the raw SWHID string from an instance of this class, + use the :func:`str` function: + + >>> swhid = QualifiedSWHID( + ... object_type=ObjectType.CONTENT, + ... object_id=bytes.fromhex('8ff44f081d43176474b267de5451f2c2e88089d0'), + ... lines=(5, 10), + ... ) + >>> str(swhid) + 'swh:1:cnt:8ff44f081d43176474b267de5451f2c2e88089d0;lines=5-10' + + And vice-versa with :meth:`QualifiedSWHID.from_string`: + + >>> swhid == QualifiedSWHID.from_string( + ... "swh:1:cnt:8ff44f081d43176474b267de5451f2c2e88089d0;lines=5-10" + ... ) + True + """ + + object_type = attr.ib( + type=ObjectType, validator=type_validator(), converter=ObjectType + ) + """the type of object the identifier points to""" + + # qualifiers: + + origin = attr.ib(type=Optional[str], default=None, validator=type_validator()) + """the software origin where an object has been found or observed in the wild, + as an URI""" + + visit = attr.ib(type=Optional[CoreSWHID], default=None, converter=_parse_core_swhid) + """the core identifier of a snapshot corresponding to a specific visit + of a repository containing the designated object""" + + anchor = attr.ib( + type=Optional[CoreSWHID], + default=None, + validator=type_validator(), + converter=_parse_core_swhid, + ) + """a designated node in the Merkle DAG relative to which a path to the object + is specified, as the core identifier of a directory, a revision, a release, + or a snapshot""" + + path = attr.ib( + type=Optional[bytes], + default=None, + validator=type_validator(), + converter=_parse_path_qualifier, + ) + """the absolute file path, from the root directory associated to the anchor node, + to the object; when the anchor denotes a directory or a revision, and almost always + when it’s a release, the root directory is uniquely determined; + when the anchor denotes a snapshot, the root directory is the one pointed to by HEAD + (possibly indirectly), and undefined if such a reference is missing""" + + lines = attr.ib( + type=Optional[Tuple[int, Optional[int]]], + default=None, + validator=type_validator(), + converter=_parse_lines_qualifier, + ) + """lines: line number(s) of interest, usually within a content object""" + + @visit.validator + def check_visit(self, attribute, value): + if value and value.object_type != ObjectType.SNAPSHOT: + raise ValidationError( + "The 'visit' qualifier must be a 'snp' SWHID, not '%(type)s'", + params={"type": value.object_type.value}, + ) + + @anchor.validator + def check_anchor(self, attribute, value): + if value and value.object_type not in ( + ObjectType.DIRECTORY, + ObjectType.REVISION, + ObjectType.RELEASE, + ObjectType.SNAPSHOT, + ): + raise ValidationError( + "The 'visit' qualifier must be a 'dir', 'rev', 'rel', or 'snp' SWHID, " + "not '%s(type)s'", + params={"type": value.object_type.value}, + ) + + def qualifiers(self) -> Dict[str, str]: + origin = self.origin + if origin: + unescaped_origin = origin + origin = origin.replace(";", "%3B") + assert urllib.parse.unquote_to_bytes( + origin + ) == urllib.parse.unquote_to_bytes( + unescaped_origin + ), "Escaping ';' in the origin qualifier corrupted the origin URL." + + d: Dict[str, Optional[str]] = { + "origin": origin, + "visit": str(self.visit) if self.visit else None, + "anchor": str(self.anchor) if self.anchor else None, + "path": ( + urllib.parse.quote_from_bytes(self.path) + if self.path is not None + else None + ), + "lines": ( + "-".join(str(line) for line in self.lines if line is not None) + if self.lines + else None + ), + } + return {k: v for (k, v) in d.items() if v is not None} + + def __str__(self) -> str: + swhid = SWHID_SEP.join( + [ + self.namespace, + str(self.scheme_version), + self.object_type.value, + hash_to_hex(self.object_id), + ] + ) + qualifiers = self.qualifiers() + if qualifiers: + for k, v in qualifiers.items(): + swhid += "%s%s=%s" % (SWHID_CTXT_SEP, k, v) + return swhid + + @classmethod + def from_string(cls, s: str) -> QualifiedSWHID: + parts = _parse_swhid(s) + qualifiers = parts.pop("qualifiers") + invalid_qualifiers = set(qualifiers) - SWHID_QUALIFIERS + if invalid_qualifiers: + raise ValidationError( + "Invalid qualifier(s): %(qualifiers)", + params={"qualifiers": ", ".join(invalid_qualifiers)}, + ) + try: + return QualifiedSWHID(**parts, **qualifiers) + except ValueError as e: + raise ValidationError( + "ValueError: %(args)s", params={"args": e.args} + ) from None + + +@attr.s(frozen=True, kw_only=True) +class ExtendedSWHID(_BaseSWHID[ExtendedObjectType]): + """ + Dataclass holding the relevant info associated to a SoftWare Heritage + persistent IDentifier (SWHID). + + It extends `CoreSWHID`, by allowing non-standard object types; and should + only be used internally to Software Heritage. + + Raises: + swh.model.exceptions.ValidationError: In case of invalid object type or id + + To get the raw SWHID string from an instance of this class, + use the :func:`str` function: + + >>> swhid = ExtendedSWHID( + ... object_type=ExtendedObjectType.CONTENT, + ... object_id=bytes.fromhex('8ff44f081d43176474b267de5451f2c2e88089d0'), + ... ) + >>> str(swhid) + 'swh:1:cnt:8ff44f081d43176474b267de5451f2c2e88089d0' + + And vice-versa with :meth:`CoreSWHID.from_string`: + + >>> swhid == ExtendedSWHID.from_string( + ... "swh:1:cnt:8ff44f081d43176474b267de5451f2c2e88089d0" + ... ) + True + """ + + object_type = attr.ib( + type=ExtendedObjectType, + validator=type_validator(), + converter=ExtendedObjectType, + ) + """the type of object the identifier points to""" + + @attr.s(frozen=True) class SWHID: """ - Named tuple holding the relevant info associated to a SoftWare Heritage - persistent IDentifier (SWHID) + Deprecated alternative to QualifiedSWHID. Args: namespace (str): the namespace of the identifier, defaults to ``swh`` @@ -744,6 +1129,13 @@ class SWHID: type=ImmutableDict[str, Any], converter=ImmutableDict, default=ImmutableDict() ) + def __attrs_post_init__(self): + warnings.warn( + "swh.model.identifiers.SWHID is deprecated; " + "use swh.model.identifiers.QualifiedSWHID instead.", + DeprecationWarning, + ) + @namespace.validator def check_namespace(self, attribute, value): if value != SWHID_NAMESPACE: @@ -835,16 +1227,17 @@ def swhid( return str(swhid) -def parse_swhid(swhid: str) -> SWHID: +def _parse_swhid(swhid: str) -> Dict[str, Any]: """Parse a Software Heritage identifier (SWHID) from string (see: :ref:`persistent-identifiers`.) + This is for internal use; use :meth:`CoreSWHID.from_string`, + :meth:`QualifiedSWHID.from_string`, or :meth:`ExtendedSWHID.from_string` instead, + as they perform validation and build a dataclass. + Args: swhid (str): A persistent identifier - Returns: - a named tuple holding the parsing result - Raises: swh.model.exceptions.ValidationError: if passed string is not a valid SWHID @@ -854,10 +1247,10 @@ def parse_swhid(swhid: str) -> SWHID: raise ValidationError( "Invalid SWHID: invalid syntax: %(swhid)s", params={"swhid": swhid} ) - parts = m.groupdict() + parts: Dict[str, Any] = m.groupdict() - _qualifiers = {} qualifiers_raw = parts["qualifiers"] + parts["qualifiers"] = {} if qualifiers_raw: for qualifier in qualifiers_raw.split(SWHID_CTXT_SEP): try: @@ -867,12 +1260,29 @@ def parse_swhid(swhid: str) -> SWHID: "Invalid SWHID: invalid qualifier: %(qualifier)s", params={"qualifier": qualifier}, ) - _qualifiers[k] = v + parts["qualifiers"][k] = v + + parts["scheme_version"] = int(parts["scheme_version"]) + parts["object_id"] = hash_to_bytes(parts["object_id"]) + return parts + +def parse_swhid(swhid: str) -> SWHID: + """Parse a Software Heritage identifier (SWHID) from string (see: + :ref:`persistent-identifiers`.) + + Args: + swhid (str): A persistent identifier + + Raises: + swh.model.exceptions.ValidationError: if passed string is not a valid SWHID + + """ + parts = _parse_swhid(swhid) return SWHID( - parts["scheme"], - int(parts["version"]), + parts["namespace"], + parts["scheme_version"], _swhid_type_map[parts["object_type"]], - parts["object_id"], - _qualifiers, # type: ignore # mypy can't properly unify types + hash_to_hex(parts["object_id"]), + parts["qualifiers"], ) diff --git a/swh/model/tests/test_hashutil.py b/swh/model/tests/test_hashutil.py index ff99cf242e128d3326f4e480dd197a1021250549..59787a2289439fe5813b6f5ea3a42730d303ed98 100644 --- a/swh/model/tests/test_hashutil.py +++ b/swh/model/tests/test_hashutil.py @@ -1,8 +1,9 @@ -# Copyright (C) 2015-2018 The Software Heritage developers +# Copyright (C) 2015-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import contextlib import hashlib import io import os @@ -14,6 +15,17 @@ from swh.model import hashutil from swh.model.hashutil import MultiHash +@contextlib.contextmanager +def patch_blake2(function_name): + try: + with patch(function_name) as mock: + yield mock + finally: + # mocking blake2 inserts mock objects in the cache; we need + # to clean it before the next test runs + hashutil._blake2_hash_cache.clear() + + class BaseHashutil(unittest.TestCase): def setUp(self): # Reset function cache @@ -195,7 +207,7 @@ class Hashutil(BaseHashutil): if "blake2b" not in hashlib.algorithms_available: self.skipTest("blake2b not built in") - with patch("hashlib.blake2b") as mock_blake2b: + with patch_blake2("hashlib.blake2b") as mock_blake2b: mock_blake2b.return_value = sentinel = object() h = hashutil._new_hash("blake2b512") @@ -216,7 +228,7 @@ class Hashutil(BaseHashutil): if "blake2s" not in hashlib.algorithms_available: self.skipTest("blake2s not built in") - with patch("hashlib.blake2s") as mock_blake2s: + with patch_blake2("hashlib.blake2s") as mock_blake2s: mock_blake2s.return_value = sentinel = object() h = hashutil._new_hash("blake2s256") @@ -233,7 +245,7 @@ class Hashutil(BaseHashutil): if "blake2b" in hashlib.algorithms_available: self.skipTest("blake2b built in") - with patch("pyblake2.blake2b") as mock_blake2b: + with patch_blake2("pyblake2.blake2b") as mock_blake2b: mock_blake2b.return_value = sentinel = object() h = hashutil._new_hash("blake2b512") @@ -247,7 +259,7 @@ class Hashutil(BaseHashutil): if "blake2s" in hashlib.algorithms_available: self.skipTest("blake2s built in") - with patch("pyblake2.blake2s") as mock_blake2s: + with patch_blake2("pyblake2.blake2s") as mock_blake2s: mock_blake2s.return_value = sentinel = object() h = hashutil._new_hash("blake2s256") diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py index 470f8ed9927ccdb1b09cf4694b5cd1a44202cb82..93d075c60fc2d3168225444af3e62351080a1948 100644 --- a/swh/model/tests/test_identifiers.py +++ b/swh/model/tests/test_identifiers.py @@ -1,13 +1,15 @@ -# Copyright (C) 2015-2018 The Software Heritage developers +# Copyright (C) 2015-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import binascii import datetime +import itertools from typing import Dict import unittest +import attr import pytest from swh.model import hashutil, identifiers @@ -20,6 +22,12 @@ from swh.model.identifiers import ( REVISION, SNAPSHOT, SWHID, + SWHID_QUALIFIERS, + CoreSWHID, + ExtendedObjectType, + ExtendedSWHID, + ObjectType, + QualifiedSWHID, normalize_timestamp, ) @@ -883,6 +891,7 @@ def test_normalize_timestamp_dict_invalid_timestamp(dict_input): class TestSwhid(unittest.TestCase): + @pytest.mark.filterwarnings("ignore:.*SWHID.*:DeprecationWarning") def test_swhid(self): _snapshot_id = _x("c7c108084bc0bf3d81436bf980b46e98bd338453") _release_id = "22ece559cc7cc2364edc5e5593d63ae8bd229f9f" @@ -995,6 +1004,7 @@ class TestSwhid(unittest.TestCase): with self.assertRaises(ValidationError): identifiers.swhid(_type, _hash) + @pytest.mark.filterwarnings("ignore:.*SWHID.*:DeprecationWarning") def test_parse_swhid(self): for swhid, _type, _version, _hash in [ ( @@ -1028,15 +1038,17 @@ class TestSwhid(unittest.TestCase): "c7c108084bc0bf3d81436bf980b46e98bd338453", ), ]: - expected_result = SWHID( - namespace="swh", - scheme_version=_version, - object_type=_type, - object_id=_hash, - metadata={}, - ) - actual_result = identifiers.parse_swhid(swhid) + with pytest.warns(DeprecationWarning): + expected_result = SWHID( + namespace="swh", + scheme_version=_version, + object_type=_type, + object_id=_hash, + metadata={}, + ) + actual_result = identifiers.parse_swhid(swhid) self.assertEqual(actual_result, expected_result) + self.assertEqual(str(expected_result), swhid) for swhid, _type, _version, _hash, _metadata in [ ( @@ -1054,14 +1066,15 @@ class TestSwhid(unittest.TestCase): {"origin": "deb://Debian/packages/linuxdoc-tools"}, ), ]: - expected_result = SWHID( - namespace="swh", - scheme_version=_version, - object_type=_type, - object_id=_hash, - metadata=_metadata, - ) - actual_result = identifiers.parse_swhid(swhid) + with pytest.warns(DeprecationWarning): + expected_result = SWHID( + namespace="swh", + scheme_version=_version, + object_type=_type, + object_id=_hash, + metadata=_metadata, + ) + actual_result = identifiers.parse_swhid(swhid) self.assertEqual(actual_result, expected_result) self.assertEqual( expected_result.to_dict(), @@ -1073,6 +1086,7 @@ class TestSwhid(unittest.TestCase): "metadata": _metadata, }, ) + self.assertEqual(str(expected_result), swhid) @pytest.mark.parametrize( @@ -1122,6 +1136,7 @@ def test_parse_swhid_parsing_error(invalid_swhid): identifiers.parse_swhid(invalid_swhid) +@pytest.mark.filterwarnings("ignore:.*SWHID.*:DeprecationWarning") @pytest.mark.parametrize( "ns,version,type,id", [ @@ -1138,7 +1153,8 @@ def test_SWHID_class_validation_error(ns, version, type, id): ) -def test_swhid_hash(): +@pytest.mark.filterwarnings("ignore:.*SWHID.*:DeprecationWarning") +def test_SWHID_hash(): object_id = "94a9ed024d3859793618152ea559a168bbcbb5e2" assert hash(SWHID(object_type="directory", object_id=object_id)) == hash( @@ -1168,7 +1184,8 @@ def test_swhid_hash(): ) -def test_swhid_eq(): +@pytest.mark.filterwarnings("ignore:.*SWHID.*:DeprecationWarning") +def test_SWHID_eq(): object_id = "94a9ed024d3859793618152ea559a168bbcbb5e2" assert SWHID(object_type="directory", object_id=object_id) == SWHID( @@ -1182,3 +1199,585 @@ def test_swhid_eq(): assert SWHID( object_type="directory", object_id=object_id, metadata=dummy_qualifiers, ) == SWHID(object_type="directory", object_id=object_id, metadata=dummy_qualifiers,) + + +# SWHIDs that are outright invalid, no matter the context +INVALID_SWHIDS = [ + "swh:1:cnt", + "swh:1:", + "swh:", + "swh:1:cnt:", + "foo:1:cnt:abc8bc9d7a6bcf6db04f476d29314f157507d505", + "swh:2:dir:def8bc9d7a6bcf6db04f476d29314f157507d505", + "swh:1:foo:fed8bc9d7a6bcf6db04f476d29314f157507d505", + "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;invalid;malformed", + "swh:1:snp:gh6959356d30f1a4e9b7f6bca59b9a336464c03d", + "swh:1:snp:foo", + # wrong qualifier: ori should be origin + "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;ori=something;anchor=1;visit=1;path=/", # noqa + # wrong qualifier: anc should be anchor + "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;origin=something;anc=1;visit=1;path=/", # noqa + # wrong qualifier: vis should be visit + "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;origin=something;anchor=1;vis=1;path=/", # noqa + # wrong qualifier: pa should be path + "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;origin=something;anchor=1;visit=1;pa=/", # noqa + # wrong qualifier: line should be lines + "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;line=10;origin=something;anchor=1;visit=1;path=/", # noqa + # wrong qualifier value: it contains space before of after + "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;origin= https://some-url", # noqa + "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;origin=something;anchor=some-anchor ", # noqa + "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;origin=something;anchor=some-anchor ;visit=1", # noqa + # invalid swhid: whitespaces + "swh :1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;ori=something;anchor=1;visit=1;path=/", # noqa + "swh: 1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;ori=something;anchor=1;visit=1;path=/", # noqa + "swh: 1: dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;ori=something;anchor=1;visit=1;path=/", # noqa + "swh:1: dir: 0b6959356d30f1a4e9b7f6bca59b9a336464c03d", + "swh:1: dir: 0b6959356d30f1a4e9b7f6bca59b9a336464c03d; origin=blah", + "swh:1: dir: 0b6959356d30f1a4e9b7f6bca59b9a336464c03d;lines=12", + # other whitespaces + "swh\t:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;lines=12", + "swh:1\n:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;lines=12", + "swh:1:\rdir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;lines=12", + "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d\f;lines=12", + "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;lines=12\v", +] + +SWHID_CLASSES = [CoreSWHID, QualifiedSWHID, ExtendedSWHID] + + +@pytest.mark.parametrize( + "invalid_swhid,swhid_class", itertools.product(INVALID_SWHIDS, SWHID_CLASSES) +) +def test_swhid_parsing_error(invalid_swhid, swhid_class): + """Tests SWHID strings that are invalid for all SWHID classes do raise + a ValidationError""" + with pytest.raises(ValidationError): + swhid_class.from_string(invalid_swhid) + + +# string SWHIDs, and how they should be parsed by each of the classes, +# or None if the class does not support it +HASH = "94a9ed024d3859793618152ea559a168bbcbb5e2" +VALID_SWHIDS = [ + ( + f"swh:1:cnt:{HASH}", + CoreSWHID(object_type=ObjectType.CONTENT, object_id=_x(HASH),), + QualifiedSWHID(object_type=ObjectType.CONTENT, object_id=_x(HASH),), + ExtendedSWHID(object_type=ExtendedObjectType.CONTENT, object_id=_x(HASH),), + ), + ( + f"swh:1:dir:{HASH}", + CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=_x(HASH),), + QualifiedSWHID(object_type=ObjectType.DIRECTORY, object_id=_x(HASH),), + ExtendedSWHID(object_type=ExtendedObjectType.DIRECTORY, object_id=_x(HASH),), + ), + ( + f"swh:1:rev:{HASH}", + CoreSWHID(object_type=ObjectType.REVISION, object_id=_x(HASH),), + QualifiedSWHID(object_type=ObjectType.REVISION, object_id=_x(HASH),), + ExtendedSWHID(object_type=ExtendedObjectType.REVISION, object_id=_x(HASH),), + ), + ( + f"swh:1:rel:{HASH}", + CoreSWHID(object_type=ObjectType.RELEASE, object_id=_x(HASH),), + QualifiedSWHID(object_type=ObjectType.RELEASE, object_id=_x(HASH),), + ExtendedSWHID(object_type=ExtendedObjectType.RELEASE, object_id=_x(HASH),), + ), + ( + f"swh:1:snp:{HASH}", + CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=_x(HASH),), + QualifiedSWHID(object_type=ObjectType.SNAPSHOT, object_id=_x(HASH),), + ExtendedSWHID(object_type=ExtendedObjectType.SNAPSHOT, object_id=_x(HASH),), + ), + ( + f"swh:1:cnt:{HASH};origin=https://github.com/python/cpython;lines=1-18", + None, # CoreSWHID does not allow qualifiers + QualifiedSWHID( + object_type=ObjectType.CONTENT, + object_id=_x(HASH), + origin="https://github.com/python/cpython", + lines=(1, 18), + ), + None, # Neither does ExtendedSWHID + ), + ( + f"swh:1:cnt:{HASH};origin=https://github.com/python/cpython;lines=18", + None, # likewise + QualifiedSWHID( + object_type=ObjectType.CONTENT, + object_id=_x(HASH), + origin="https://github.com/python/cpython", + lines=(18, None), + ), + None, # likewise + ), + ( + f"swh:1:dir:{HASH};origin=deb://Debian/packages/linuxdoc-tools", + None, # likewise + QualifiedSWHID( + object_type=ObjectType.DIRECTORY, + object_id=_x(HASH), + origin="deb://Debian/packages/linuxdoc-tools", + ), + None, # likewise + ), + ( + f"swh:1:ori:{HASH}", + None, # CoreSWHID does not allow origin pseudo-SWHIDs + None, # Neither does QualifiedSWHID + ExtendedSWHID(object_type=ExtendedObjectType.ORIGIN, object_id=_x(HASH),), + ), + ( + f"swh:1:emd:{HASH}", + None, # likewise for metadata pseudo-SWHIDs + None, # Neither does QualifiedSWHID + ExtendedSWHID( + object_type=ExtendedObjectType.RAW_EXTRINSIC_METADATA, object_id=_x(HASH), + ), + ), + ( + f"swh:1:emd:{HASH};origin=https://github.com/python/cpython", + None, # CoreSWHID does not allow metadata pseudo-SWHIDs or qualifiers + None, # QualifiedSWHID does not allow metadata pseudo-SWHIDs + None, # ExtendedSWHID does not allow qualifiers + ), +] + + +@pytest.mark.parametrize( + "string,core,qualified,extended", + [ + pytest.param(string, core, qualified, extended, id=string) + for (string, core, qualified, extended) in VALID_SWHIDS + ], +) +def test_parse_unparse_swhids(string, core, qualified, extended): + """Tests parsing and serializing valid SWHIDs with the various SWHID classes.""" + classes = [CoreSWHID, QualifiedSWHID, ExtendedSWHID] + for (cls, parsed_swhid) in zip(classes, [core, qualified, extended]): + if parsed_swhid is None: + # This class should not accept this SWHID + with pytest.raises(ValidationError): + cls.from_string(string) + else: + # This class should + assert cls.from_string(string) == parsed_swhid + + # Also check serialization + assert string == str(parsed_swhid) + + +@pytest.mark.parametrize( + "ns,version,type,id,qualifiers", + [ + ("foo", 1, ObjectType.CONTENT, "abc8bc9d7a6bcf6db04f476d29314f157507d505", {}), + ("swh", 2, ObjectType.CONTENT, "def8bc9d7a6bcf6db04f476d29314f157507d505", {}), + ("swh", 1, ObjectType.DIRECTORY, "aaaa", {}), + ], +) +def test_QualifiedSWHID_validation_error(ns, version, type, id, qualifiers): + with pytest.raises(ValidationError): + QualifiedSWHID( + namespace=ns, + scheme_version=version, + object_type=type, + object_id=_x(id), + **qualifiers, + ) + + +@pytest.mark.parametrize( + "object_type,qualifiers,expected", + [ + # No qualifier: + (ObjectType.CONTENT, {}, f"swh:1:cnt:{HASH}"), + # origin: + (ObjectType.CONTENT, {"origin": None}, f"swh:1:cnt:{HASH}"), + (ObjectType.CONTENT, {"origin": 42}, ValueError), + # visit: + ( + ObjectType.CONTENT, + {"visit": f"swh:1:snp:{HASH}"}, + f"swh:1:cnt:{HASH};visit=swh:1:snp:{HASH}", + ), + ( + ObjectType.CONTENT, + {"visit": CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=_x(HASH))}, + f"swh:1:cnt:{HASH};visit=swh:1:snp:{HASH}", + ), + (ObjectType.CONTENT, {"visit": 42}, TypeError), + (ObjectType.CONTENT, {"visit": f"swh:1:rel:{HASH}"}, ValidationError,), + ( + ObjectType.CONTENT, + {"visit": CoreSWHID(object_type=ObjectType.RELEASE, object_id=_x(HASH))}, + ValidationError, + ), + # anchor: + ( + ObjectType.CONTENT, + {"anchor": f"swh:1:snp:{HASH}"}, + f"swh:1:cnt:{HASH};anchor=swh:1:snp:{HASH}", + ), + ( + ObjectType.CONTENT, + {"anchor": CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=_x(HASH))}, + f"swh:1:cnt:{HASH};anchor=swh:1:snp:{HASH}", + ), + ( + ObjectType.CONTENT, + {"anchor": f"swh:1:dir:{HASH}"}, + f"swh:1:cnt:{HASH};anchor=swh:1:dir:{HASH}", + ), + ( + ObjectType.CONTENT, + {"anchor": CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=_x(HASH))}, + f"swh:1:cnt:{HASH};anchor=swh:1:dir:{HASH}", + ), + (ObjectType.CONTENT, {"anchor": 42}, TypeError), + (ObjectType.CONTENT, {"anchor": f"swh:1:cnt:{HASH}"}, ValidationError,), + ( + ObjectType.CONTENT, + {"anchor": CoreSWHID(object_type=ObjectType.CONTENT, object_id=_x(HASH))}, + ValidationError, + ), + # path: + (ObjectType.CONTENT, {"path": b"/foo"}, f"swh:1:cnt:{HASH};path=/foo",), + ( + ObjectType.CONTENT, + {"path": b"/foo;bar"}, + f"swh:1:cnt:{HASH};path=/foo%3Bbar", + ), + (ObjectType.CONTENT, {"path": "/foo"}, f"swh:1:cnt:{HASH};path=/foo",), + ( + ObjectType.CONTENT, + {"path": "/foo;bar"}, + f"swh:1:cnt:{HASH};path=/foo%3Bbar", + ), + (ObjectType.CONTENT, {"path": 42}, Exception), + # lines: + (ObjectType.CONTENT, {"lines": (42, None)}, f"swh:1:cnt:{HASH};lines=42",), + (ObjectType.CONTENT, {"lines": (21, 42)}, f"swh:1:cnt:{HASH};lines=21-42",), + (ObjectType.CONTENT, {"lines": 42}, TypeError,), + (ObjectType.CONTENT, {"lines": (None, 42)}, ValueError,), + (ObjectType.CONTENT, {"lines": ("42", None)}, ValueError,), + ], +) +def test_QualifiedSWHID_init(object_type, qualifiers, expected): + """Tests validation and converters of qualifiers""" + if isinstance(expected, type): + assert issubclass(expected, Exception) + with pytest.raises(expected): + QualifiedSWHID(object_type=object_type, object_id=_x(HASH), **qualifiers) + else: + assert isinstance(expected, str) + swhid = QualifiedSWHID( + object_type=object_type, object_id=_x(HASH), **qualifiers + ) + + # Check the build object has the right serialization + assert expected == str(swhid) + + # Check the internal state of the object is the same as if parsed from a string + assert QualifiedSWHID.from_string(expected) == swhid + + +def test_QualifiedSWHID_hash(): + object_id = _x("94a9ed024d3859793618152ea559a168bbcbb5e2") + + assert hash( + QualifiedSWHID(object_type=ObjectType.DIRECTORY, object_id=object_id) + ) == hash(QualifiedSWHID(object_type=ObjectType.DIRECTORY, object_id=object_id)) + + assert hash( + QualifiedSWHID( + object_type=ObjectType.DIRECTORY, object_id=object_id, **dummy_qualifiers, + ) + ) == hash( + QualifiedSWHID( + object_type=ObjectType.DIRECTORY, object_id=object_id, **dummy_qualifiers, + ) + ) + + # Different order of the dictionary, so the underlying order of the tuple in + # ImmutableDict is different. + assert hash( + QualifiedSWHID( + object_type=ObjectType.DIRECTORY, + object_id=object_id, + origin="https://example.com", + lines=(42, None), + ) + ) == hash( + QualifiedSWHID( + object_type=ObjectType.DIRECTORY, + object_id=object_id, + lines=(42, None), + origin="https://example.com", + ) + ) + + +def test_QualifiedSWHID_eq(): + object_id = _x("94a9ed024d3859793618152ea559a168bbcbb5e2") + + assert QualifiedSWHID( + object_type=ObjectType.DIRECTORY, object_id=object_id + ) == QualifiedSWHID(object_type=ObjectType.DIRECTORY, object_id=object_id) + + assert QualifiedSWHID( + object_type=ObjectType.DIRECTORY, object_id=object_id, **dummy_qualifiers, + ) == QualifiedSWHID( + object_type=ObjectType.DIRECTORY, object_id=object_id, **dummy_qualifiers, + ) + + assert QualifiedSWHID( + object_type=ObjectType.DIRECTORY, object_id=object_id, **dummy_qualifiers, + ) == QualifiedSWHID( + object_type=ObjectType.DIRECTORY, object_id=object_id, **dummy_qualifiers, + ) + + +QUALIFIED_SWHIDS = [ + # origin: + ( + f"swh:1:cnt:{HASH};origin=https://github.com/python/cpython", + QualifiedSWHID( + object_type=ObjectType.CONTENT, + object_id=_x(HASH), + origin="https://github.com/python/cpython", + ), + ), + ( + f"swh:1:cnt:{HASH};origin=https://example.org/foo%3Bbar%25baz", + QualifiedSWHID( + object_type=ObjectType.CONTENT, + object_id=_x(HASH), + origin="https://example.org/foo%3Bbar%25baz", + ), + ), + # visit: + ( + f"swh:1:cnt:{HASH};visit=swh:1:snp:{HASH}", + QualifiedSWHID( + object_type=ObjectType.CONTENT, + object_id=_x(HASH), + visit=CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=_x(HASH)), + ), + ), + (f"swh:1:cnt:{HASH};visit=swh:1:rel:{HASH}", None,), + # anchor: + ( + f"swh:1:cnt:{HASH};anchor=swh:1:dir:{HASH}", + QualifiedSWHID( + object_type=ObjectType.CONTENT, + object_id=_x(HASH), + anchor=CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=_x(HASH)), + ), + ), + ( + f"swh:1:cnt:{HASH};anchor=swh:1:rev:{HASH}", + QualifiedSWHID( + object_type=ObjectType.CONTENT, + object_id=_x(HASH), + anchor=CoreSWHID(object_type=ObjectType.REVISION, object_id=_x(HASH)), + ), + ), + ( + f"swh:1:cnt:{HASH};anchor=swh:1:cnt:{HASH}", + None, # 'cnt' is not valid in anchor + ), + ( + f"swh:1:cnt:{HASH};anchor=swh:1:ori:{HASH}", + None, # 'ori' is not valid in a CoreSWHID + ), + # path: + ( + f"swh:1:cnt:{HASH};path=/foo", + QualifiedSWHID( + object_type=ObjectType.CONTENT, object_id=_x(HASH), path=b"/foo" + ), + ), + ( + f"swh:1:cnt:{HASH};path=/foo%3Bbar", + QualifiedSWHID( + object_type=ObjectType.CONTENT, object_id=_x(HASH), path=b"/foo;bar" + ), + ), + ( + f"swh:1:cnt:{HASH};path=/foo%25bar", + QualifiedSWHID( + object_type=ObjectType.CONTENT, object_id=_x(HASH), path=b"/foo%bar" + ), + ), + # lines + ( + f"swh:1:cnt:{HASH};lines=1-18", + QualifiedSWHID( + object_type=ObjectType.CONTENT, object_id=_x(HASH), lines=(1, 18), + ), + ), + ( + f"swh:1:cnt:{HASH};lines=18", + QualifiedSWHID( + object_type=ObjectType.CONTENT, object_id=_x(HASH), lines=(18, None), + ), + ), + (f"swh:1:cnt:{HASH};lines=", None,), + (f"swh:1:cnt:{HASH};lines=aa", None,), + (f"swh:1:cnt:{HASH};lines=18-aa", None,), +] + + +@pytest.mark.parametrize("string,parsed", QUALIFIED_SWHIDS) +def test_QualifiedSWHID_parse_serialize_qualifiers(string, parsed): + """Tests parsing and serializing valid SWHIDs with the various SWHID classes.""" + if parsed is None: + with pytest.raises(ValidationError): + print(repr(QualifiedSWHID.from_string(string))) + else: + assert QualifiedSWHID.from_string(string) == parsed + assert str(parsed) == string + + +def test_QualifiedSWHID_serialize_origin(): + """Checks that semicolon in origins are escaped.""" + string = f"swh:1:cnt:{HASH};origin=https://example.org/foo%3Bbar%25baz" + swhid = QualifiedSWHID( + object_type=ObjectType.CONTENT, + object_id=_x(HASH), + origin="https://example.org/foo;bar%25baz", + ) + assert str(swhid) == string + + +def test_QualifiedSWHID_attributes(): + """Checks the set of QualifiedSWHID attributes match the SWHID_QUALIFIERS + constant.""" + + assert set(attr.fields_dict(QualifiedSWHID)) == { + "namespace", + "scheme_version", + "object_type", + "object_id", + *SWHID_QUALIFIERS, + } + + +@pytest.mark.parametrize( + "ns,version,type,id", + [ + ("foo", 1, ObjectType.CONTENT, "abc8bc9d7a6bcf6db04f476d29314f157507d505"), + ("swh", 2, ObjectType.CONTENT, "def8bc9d7a6bcf6db04f476d29314f157507d505"), + ("swh", 1, ObjectType.DIRECTORY, "aaaa"), + ], +) +def test_CoreSWHID_validation_error(ns, version, type, id): + with pytest.raises(ValidationError): + CoreSWHID( + namespace=ns, scheme_version=version, object_type=type, object_id=_x(id), + ) + + +def test_CoreSWHID_hash(): + object_id = _x("94a9ed024d3859793618152ea559a168bbcbb5e2") + + assert hash( + CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=object_id) + ) == hash(CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=object_id)) + + assert hash( + CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=object_id,) + ) == hash(CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=object_id,)) + + # Different order of the dictionary, so the underlying order of the tuple in + # ImmutableDict is different. + assert hash( + CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=object_id,) + ) == hash(CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=object_id,)) + + +def test_CoreSWHID_eq(): + object_id = _x("94a9ed024d3859793618152ea559a168bbcbb5e2") + + assert CoreSWHID( + object_type=ObjectType.DIRECTORY, object_id=object_id + ) == CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=object_id) + + assert CoreSWHID( + object_type=ObjectType.DIRECTORY, object_id=object_id, + ) == CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=object_id,) + + assert CoreSWHID( + object_type=ObjectType.DIRECTORY, object_id=object_id, + ) == CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=object_id,) + + +@pytest.mark.parametrize( + "ns,version,type,id", + [ + ( + "foo", + 1, + ExtendedObjectType.CONTENT, + "abc8bc9d7a6bcf6db04f476d29314f157507d505", + ), + ( + "swh", + 2, + ExtendedObjectType.CONTENT, + "def8bc9d7a6bcf6db04f476d29314f157507d505", + ), + ("swh", 1, ExtendedObjectType.DIRECTORY, "aaaa"), + ], +) +def test_ExtendedSWHID_validation_error(ns, version, type, id): + with pytest.raises(ValidationError): + ExtendedSWHID( + namespace=ns, scheme_version=version, object_type=type, object_id=_x(id), + ) + + +def test_ExtendedSWHID_hash(): + object_id = _x("94a9ed024d3859793618152ea559a168bbcbb5e2") + + assert hash( + ExtendedSWHID(object_type=ExtendedObjectType.DIRECTORY, object_id=object_id) + ) == hash( + ExtendedSWHID(object_type=ExtendedObjectType.DIRECTORY, object_id=object_id) + ) + + assert hash( + ExtendedSWHID(object_type=ExtendedObjectType.DIRECTORY, object_id=object_id,) + ) == hash( + ExtendedSWHID(object_type=ExtendedObjectType.DIRECTORY, object_id=object_id,) + ) + + # Different order of the dictionary, so the underlying order of the tuple in + # ImmutableDict is different. + assert hash( + ExtendedSWHID(object_type=ExtendedObjectType.DIRECTORY, object_id=object_id,) + ) == hash( + ExtendedSWHID(object_type=ExtendedObjectType.DIRECTORY, object_id=object_id,) + ) + + +def test_ExtendedSWHID_eq(): + object_id = _x("94a9ed024d3859793618152ea559a168bbcbb5e2") + + assert ExtendedSWHID( + object_type=ExtendedObjectType.DIRECTORY, object_id=object_id + ) == ExtendedSWHID(object_type=ExtendedObjectType.DIRECTORY, object_id=object_id) + + assert ExtendedSWHID( + object_type=ExtendedObjectType.DIRECTORY, object_id=object_id, + ) == ExtendedSWHID(object_type=ExtendedObjectType.DIRECTORY, object_id=object_id,) + + assert ExtendedSWHID( + object_type=ExtendedObjectType.DIRECTORY, object_id=object_id, + ) == ExtendedSWHID(object_type=ExtendedObjectType.DIRECTORY, object_id=object_id,) + + +def test_object_types(): + """Checks ExtendedObjectType is a superset of ObjectType""" + for member in ObjectType: + assert getattr(ExtendedObjectType, member.name).value == member.value diff --git a/tox.ini b/tox.ini index 930a49232a0322752c7d82c40b5b786e1a5c8ce1..bffcdfafd6924e3f97839d387b9611ab495ebaff 100644 --- a/tox.ini +++ b/tox.ini @@ -8,6 +8,7 @@ deps = pytest-cov commands = pytest --cov={envsitepackagesdir}/swh/model \ + --doctest-modules \ {envsitepackagesdir}/swh/model \ --cov-branch {posargs}