diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 69b33490b621ead79cc10580132da2804d49f842..3cc45b37f7afb418d419cce6d395a05727049a4d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,10 +3,14 @@ repos: rev: v2.4.0 hooks: - id: trailing-whitespace - - id: flake8 - id: check-json - id: check-yaml +- repo: https://gitlab.com/pycqa/flake8 + rev: 3.8.3 + hooks: + - id: flake8 + - repo: https://github.com/codespell-project/codespell rev: v1.16.0 hooks: @@ -22,6 +26,11 @@ repos: language: system types: [python] +- repo: https://github.com/PyCQA/isort + rev: 5.5.2 + hooks: + - id: isort + - repo: https://github.com/python/black rev: 19.10b0 hooks: diff --git a/PKG-INFO b/PKG-INFO index 953d410becc8cea925c44c425a7975adf99615b2..77a79614a9a217c9fcf906644e6302a3fccafd96 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: swh.model -Version: 0.6.6 +Version: 0.6.7 Summary: Software Heritage data model Home-page: https://forge.softwareheritage.org/diffusion/DMOD/ Author: Software Heritage developers diff --git a/bin/swh-hashtree b/bin/swh-hashtree index 5b85b7b5e73d24afc9d4e53b0a275327c2586925..a4f8d7b70b303bf55159b7c44c895a293f9407ec 100755 --- a/bin/swh-hashtree +++ b/bin/swh-hashtree @@ -5,9 +5,10 @@ # --ignore-empty-folders # 38f8d2c3a951f6b94007896d0981077e48bbd702 -import click import os +import click + from swh.model import from_disk, hashutil diff --git a/bin/swh-revhash b/bin/swh-revhash index d3a8caf84fab120f7721bffd4d269ee77626982e..56b587d905826efbba8976ae13c5129c567f1663 100755 --- a/bin/swh-revhash +++ b/bin/swh-revhash @@ -11,7 +11,7 @@ import sys -from swh.model import identifiers, hashutil +from swh.model import hashutil, identifiers def revhash(revision_raw): diff --git a/pyproject.toml b/pyproject.toml index b5413f6c74cabcba98d479c179506f0c61e9d7ef..69b8f4dd830abf638e624eeea85dbc580c862538 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,2 +1,11 @@ [tool.black] target-version = ['py37'] + +[tool.isort] +multi_line_output = 3 +include_trailing_comma = true +force_grid_wrap = 0 +use_parentheses = true +ensure_newline_before_comments = true +line_length = 88 +force_sort_within_sections = true diff --git a/pytest.ini b/pytest.ini index c8e8c197ec42d9116539ae8d5cdedf12a4678a6b..9fa2d75a39cb3c34a52416c06d11f80d11abb1fd 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,5 +1,5 @@ [pytest] -addopts = --doctest-modules -norecursedirs = docs +addopts = --doctest-modules -p no:pytest_swh_core +norecursedirs = docs .* markers = fs: tests that involve filesystem ios diff --git a/requirements-cli.txt b/requirements-cli.txt index 7365d1f9c6169a1f81b7a2272b8fe543e524297a..8564d0090a8429bb78e1e5f90d36a424567f198f 100644 --- a/requirements-cli.txt +++ b/requirements-cli.txt @@ -1,3 +1,3 @@ -swh.core +swh.core >= 0.3 Click dulwich diff --git a/setup.py b/setup.py index ecaac1058cb3dc356fe45ef910d5bed157dbc11b..8f9d32fdcec61b2ac22ea9928cfe8ebabca8cd8a 100755 --- a/setup.py +++ b/setup.py @@ -4,10 +4,10 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from setuptools import setup, find_packages - -from os import path from io import open +from os import path + +from setuptools import find_packages, setup here = path.abspath(path.dirname(__file__)) @@ -54,14 +54,14 @@ setup( ), extras_require={ "cli": parse_requirements("cli"), - "testing": parse_requirements("test"), + "testing": parse_requirements("test") + parse_requirements("cli"), }, include_package_data=True, entry_points=""" [console_scripts] swh-identify=swh.model.cli:identify [swh.cli.subcommands] - identify=swh.model.cli:identify + identify=swh.model.cli """, classifiers=[ "Programming Language :: Python :: 3", diff --git a/swh.model.egg-info/PKG-INFO b/swh.model.egg-info/PKG-INFO index 953d410becc8cea925c44c425a7975adf99615b2..77a79614a9a217c9fcf906644e6302a3fccafd96 100644 --- a/swh.model.egg-info/PKG-INFO +++ b/swh.model.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: swh.model -Version: 0.6.6 +Version: 0.6.7 Summary: Software Heritage data model Home-page: https://forge.softwareheritage.org/diffusion/DMOD/ Author: Software Heritage developers diff --git a/swh.model.egg-info/SOURCES.txt b/swh.model.egg-info/SOURCES.txt index 90423a1aae99e120c12d7bd1203d5be5c9303399..16bd75b4b07767e4deecfd43e4d26d7aac7aa411 100644 --- a/swh.model.egg-info/SOURCES.txt +++ b/swh.model.egg-info/SOURCES.txt @@ -17,7 +17,6 @@ requirements.txt setup.cfg setup.py tox.ini -version.txt bin/git-revhash bin/swh-hashtree bin/swh-revhash diff --git a/swh.model.egg-info/entry_points.txt b/swh.model.egg-info/entry_points.txt index 03eb1114b8c543f6bcdd3518f05ecbd259b5c3e9..f754c9a16705de6891bad78a36e659a443f14de4 100644 --- a/swh.model.egg-info/entry_points.txt +++ b/swh.model.egg-info/entry_points.txt @@ -2,5 +2,5 @@ [console_scripts] swh-identify=swh.model.cli:identify [swh.cli.subcommands] - identify=swh.model.cli:identify + identify=swh.model.cli \ No newline at end of file diff --git a/swh.model.egg-info/requires.txt b/swh.model.egg-info/requires.txt index e257baf1120c947eeee8ed54be398131a74f4be9..919e9687732fb52e32fa2fd42085d88933a043db 100644 --- a/swh.model.egg-info/requires.txt +++ b/swh.model.egg-info/requires.txt @@ -10,7 +10,7 @@ typing_extensions pyblake2 [cli] -swh.core +swh.core>=0.3 Click dulwich @@ -19,3 +19,6 @@ Click dulwich pytest pytz +swh.core>=0.3 +Click +dulwich diff --git a/swh/model/cli.py b/swh/model/cli.py index a545a703ed2e1fdcd3e0a2a5d7a79ec0451adcb5..4c8b7c17c4a4ddd49987d4e00db3938d05f57feb 100644 --- a/swh/model/cli.py +++ b/swh/model/cli.py @@ -1,28 +1,15 @@ -# Copyright (C) 2018-2019 The Software Heritage developers +# Copyright (C) 2018-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import click -import dulwich.repo import os import sys -from functools import partial -from urllib.parse import urlparse - -from swh.model import hashutil -from swh.model.identifiers import ( - origin_identifier, - snapshot_identifier, - parse_swhid, - swhid, - SWHID, - CONTENT, - DIRECTORY, -) -from swh.model.exceptions import ValidationError -from swh.model.from_disk import Content, Directory +# WARNING: do not import unnecessary things here to keep cli startup time under +# control +import click +from swh.core.cli import swh as swh_cli_group CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"]) @@ -41,6 +28,9 @@ class SWHIDParamType(click.ParamType): name = "persistent identifier" def convert(self, value, param, ctx): + from swh.model.exceptions import ValidationError + from swh.model.identifiers import parse_swhid + try: parse_swhid(value) return value # return as string, as we need just that @@ -49,26 +39,41 @@ class SWHIDParamType(click.ParamType): def swhid_of_file(path): + from swh.model.from_disk import Content + from swh.model.identifiers import CONTENT, swhid + object = Content.from_file(path=path).get_data() return swhid(CONTENT, object) def swhid_of_file_content(data): + from swh.model.from_disk import Content + from swh.model.identifiers import CONTENT, swhid + object = Content.from_bytes(mode=644, data=data).get_data() return swhid(CONTENT, object) def swhid_of_dir(path): + from swh.model.from_disk import Directory + from swh.model.identifiers import DIRECTORY, swhid + object = Directory.from_disk(path=path).get_data() return swhid(DIRECTORY, object) def swhid_of_origin(url): - swhid = SWHID(object_type="origin", object_id=origin_identifier({"url": url})) - return str(swhid) + from swh.model.identifiers import SWHID, origin_identifier + + return str(SWHID(object_type="origin", object_id=origin_identifier({"url": url}))) def swhid_of_git_repo(path): + import dulwich.repo + + from swh.model import hashutil + from swh.model.identifiers import SWHID, snapshot_identifier + repo = dulwich.repo.Repo(path) branches = {} @@ -90,11 +95,12 @@ def swhid_of_git_repo(path): snapshot = {"branches": branches} - swhid = SWHID(object_type="snapshot", object_id=snapshot_identifier(snapshot)) - return str(swhid) + return str(SWHID(object_type="snapshot", object_id=snapshot_identifier(snapshot))) def identify_object(obj_type, follow_symlinks, obj): + from urllib.parse import urlparse + if obj_type == "auto": if obj == "-" or os.path.isfile(obj): obj_type = "content" @@ -134,7 +140,7 @@ def identify_object(obj_type, follow_symlinks, obj): return (obj, swhid) -@click.command(context_settings=CONTEXT_SETTINGS) +@swh_cli_group.command(context_settings=CONTEXT_SETTINGS) @click.option( "--dereference/--no-dereference", "follow_symlinks", @@ -194,6 +200,7 @@ def identify(obj_type, verify, show_filename, follow_symlinks, objects): swh:1:snp:510aa88bdc517345d258c1fc2babcd0e1f905e93 helloworld.git """ # NoQA # overlong lines in shell examples are fine + from functools import partial if verify and len(objects) != 1: raise click.BadParameter("verification requires a single object") diff --git a/swh/model/fields/__init__.py b/swh/model/fields/__init__.py index a5b1ed3f8642c3de1593861fc3b9780697683f98..7e3c2fef75cadbf876b937bd14e3e79fe9bbfc0c 100644 --- a/swh/model/fields/__init__.py +++ b/swh/model/fields/__init__.py @@ -6,13 +6,13 @@ # We do our imports here but we don't use them, so flake8 complains # flake8: noqa +from .compound import validate_against_schema, validate_all_keys, validate_any_key +from .hashes import validate_sha1, validate_sha1_git, validate_sha256 from .simple import ( - validate_type, - validate_int, - validate_str, validate_bytes, validate_datetime, validate_enum, + validate_int, + validate_str, + validate_type, ) -from .hashes import validate_sha1, validate_sha1_git, validate_sha256 -from .compound import validate_against_schema, validate_all_keys, validate_any_key diff --git a/swh/model/fields/compound.py b/swh/model/fields/compound.py index 3133f59cc705d497ab3cbfddd3cb5d098b92e04d..90b4685bafc0b9cf8cac78c87d5a95e521468515 100644 --- a/swh/model/fields/compound.py +++ b/swh/model/fields/compound.py @@ -6,7 +6,7 @@ from collections import defaultdict import itertools -from ..exceptions import ValidationError, NON_FIELD_ERRORS +from ..exceptions import NON_FIELD_ERRORS, ValidationError def validate_against_schema(model, schema, value): diff --git a/swh/model/fields/hashes.py b/swh/model/fields/hashes.py index 47e872c7b994881bd86a1c33a4a184acbb4732a2..9b5ee4ad4ec58d512ced2223f7577d935864b502 100644 --- a/swh/model/fields/hashes.py +++ b/swh/model/fields/hashes.py @@ -4,6 +4,7 @@ # See top-level LICENSE file for more information import string + from ..exceptions import ValidationError diff --git a/swh/model/from_disk.py b/swh/model/from_disk.py index 5ac97e257fcf30af80ba1a811fb92013eb7b90b3..719599d0563fef48b00973b8daf6e92168bf4a73 100644 --- a/swh/model/from_disk.py +++ b/swh/model/from_disk.py @@ -7,21 +7,18 @@ import datetime import enum import os import stat +from typing import Any, Iterable, List, Optional, Tuple import attr from attrs_strict import type_validator -from typing import Any, Iterable, List, Optional, Tuple from typing_extensions import Final +from . import model from .hashutil import MultiHash +from .identifiers import directory_entry_sort_key, directory_identifier +from .identifiers import identifier_to_bytes as id_to_bytes +from .identifiers import identifier_to_str as id_to_str from .merkle import MerkleLeaf, MerkleNode -from .identifiers import ( - directory_entry_sort_key, - directory_identifier, - identifier_to_bytes as id_to_bytes, - identifier_to_str as id_to_str, -) -from . import model @attr.s diff --git a/swh/model/hashutil.py b/swh/model/hashutil.py index 954ae9576452a00649040eb4662ecd3756fe00a5..cec87789cb7fffc5b9112a9a81e38de4d8eab0d8 100644 --- a/swh/model/hashutil.py +++ b/swh/model/hashutil.py @@ -54,9 +54,8 @@ Basic usage examples: import binascii import functools import hashlib -import os - from io import BytesIO +import os from typing import Callable, Dict ALGORITHMS = set(["sha1", "sha256", "sha1_git", "blake2s256", "blake2b512"]) diff --git a/swh/model/hypothesis_strategies.py b/swh/model/hypothesis_strategies.py index 21e922eed40cfc95db664bce1acc4ce0d344ef35..0c54a994e7d6dbb0df372868faea5f8a2c81be58 100644 --- a/swh/model/hypothesis_strategies.py +++ b/swh/model/hypothesis_strategies.py @@ -28,28 +28,27 @@ from hypothesis.strategies import ( ) from .from_disk import DentryPerms +from .identifiers import identifier_to_bytes, snapshot_identifier from .model import ( - Person, - Timestamp, - TimestampWithTimezone, + BaseContent, + Content, + Directory, + DirectoryEntry, + ObjectType, Origin, OriginVisit, OriginVisitStatus, - Snapshot, - SnapshotBranch, - ObjectType, - TargetType, + Person, Release, Revision, RevisionType, - BaseContent, - Directory, - DirectoryEntry, - Content, SkippedContent, + Snapshot, + SnapshotBranch, + TargetType, + Timestamp, + TimestampWithTimezone, ) -from .identifiers import snapshot_identifier, identifier_to_bytes - pgsql_alphabet = characters( blacklist_categories=("Cs",), blacklist_characters=["\u0000"] diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py index e1cf0dfe3b80fe40f0df23beb3c493098f82c2fd..6c61317c3e22d1e37c9f6096389dc66615aee1bd 100644 --- a/swh/model/identifiers.py +++ b/swh/model/identifiers.py @@ -5,19 +5,16 @@ import binascii import datetime -import hashlib - from functools import lru_cache +import hashlib from typing import Any, Dict, Union import attr -from deprecated import deprecated from .collections import ImmutableDict from .exceptions import ValidationError from .fields.hashes import validate_sha1 -from .hashutil import hash_git_data, hash_to_hex, MultiHash - +from .hashutil import MultiHash, hash_git_data, hash_to_hex ORIGIN = "origin" SNAPSHOT = "snapshot" @@ -32,13 +29,6 @@ SWHID_TYPES = ["ori", "snp", "rel", "rev", "dir", "cnt"] SWHID_SEP = ":" SWHID_CTXT_SEP = ";" -# deprecated variables -PID_NAMESPACE = SWHID_NAMESPACE -PID_VERSION = SWHID_VERSION -PID_TYPES = SWHID_TYPES -PID_SEP = SWHID_SEP -PID_CTXT_SEP = SWHID_CTXT_SEP - @lru_cache() def identifier_to_bytes(identifier): @@ -738,20 +728,6 @@ class SWHID: return swhid -@deprecated("Use swh.model.identifiers.SWHID instead") -class PersistentId(SWHID): - """ - Named tuple holding the relevant info associated to a SoftWare Heritage - persistent IDentifier. - - .. deprecated:: 0.3.8 - Use :class:`swh.model.identifiers.SWHID` instead - - """ - - pass - - def swhid( object_type: str, object_id: Union[str, Dict[str, Any]], @@ -786,17 +762,6 @@ def swhid( return str(swhid) -@deprecated("Use swh.model.identifiers.swhid instead") -def persistent_identifier(*args, **kwargs) -> str: - """Compute :ref:`persistent-identifiers` - - .. deprecated:: 0.3.8 - Use :func:`swh.model.identifiers.swhid` instead - - """ - return swhid(*args, **kwargs) - - def parse_swhid(swhid: str) -> SWHID: """Parse :ref:`persistent-identifiers`. @@ -850,13 +815,3 @@ def parse_swhid(swhid: str) -> SWHID: _id, _metadata, # type: ignore # mypy can't properly unify types ) - - -@deprecated("Use swh.model.identifiers.parse_swhid instead") -def parse_persistent_identifier(persistent_id: str) -> PersistentId: - """Parse :ref:`persistent-identifiers`. - - .. deprecated:: 0.3.8 - Use :func:`swh.model.identifiers.parse_swhid` instead - """ - return PersistentId(**parse_swhid(persistent_id).to_dict()) diff --git a/swh/model/merkle.py b/swh/model/merkle.py index 0311d9d19dd02ab5cf68378d95a7913ff4b226d2..e84ef9d9823a3ffb3444a3c5586abae16f4b6772 100644 --- a/swh/model/merkle.py +++ b/swh/model/merkle.py @@ -7,7 +7,6 @@ import abc from collections.abc import Mapping - from typing import Iterator, List, Set diff --git a/swh/model/model.py b/swh/model/model.py index c1c20a55e8c39d1336c8e82521702087570063bd..ca3a8c7032490cd724470e2fe2914328b0131779 100644 --- a/swh/model/model.py +++ b/swh/model/model.py @@ -3,29 +3,28 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import datetime - from abc import ABCMeta, abstractmethod +import datetime from enum import Enum from hashlib import sha256 from typing import Any, Dict, Iterable, Optional, Tuple, TypeVar, Union -from typing_extensions import Final import attr from attrs_strict import type_validator import dateutil.parser import iso8601 +from typing_extensions import Final from .collections import ImmutableDict -from .hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, MultiHash +from .hashutil import DEFAULT_ALGORITHMS, MultiHash, hash_to_bytes from .identifiers import ( - normalize_timestamp, + SWHID, directory_identifier, - revision_identifier, + normalize_timestamp, + parse_swhid, release_identifier, + revision_identifier, snapshot_identifier, - SWHID, - parse_swhid, ) @@ -267,6 +266,12 @@ class OriginVisit(BaseModel): """Should not be set before calling 'origin_visit_add()'.""" visit = attr.ib(type=Optional[int], validator=type_validator(), default=None) + @date.validator + def check_date(self, attribute, value): + """Checks the date has a timezone.""" + if value is not None and value.tzinfo is None: + raise ValueError("date must be a timezone-aware datetime.") + def to_dict(self): """Serializes the date as a string and omits the visit id if it is `None`.""" @@ -300,6 +305,12 @@ class OriginVisitStatus(BaseModel): default=None, ) + @date.validator + def check_date(self, attribute, value): + """Checks the date has a timezone.""" + if value is not None and value.tzinfo is None: + raise ValueError("date must be a timezone-aware datetime.") + class TargetType(Enum): """The type of content pointed to by a snapshot branch. Usually a @@ -437,7 +448,7 @@ class RevisionType(Enum): MERCURIAL = "hg" -def tuplify_extra_headers(value: Iterable) -> Tuple: +def tuplify_extra_headers(value: Iterable): return tuple((k, v) for k, v in value) @@ -464,9 +475,9 @@ class Revision(BaseModel, HashableObject): parents = attr.ib(type=Tuple[Sha1Git, ...], validator=type_validator(), default=()) id = attr.ib(type=Sha1Git, validator=type_validator(), default=b"") extra_headers = attr.ib( - type=Tuple[Tuple[bytes, bytes], ...], # but it makes mypy sad + type=Tuple[Tuple[bytes, bytes], ...], validator=type_validator(), - converter=tuplify_extra_headers, # type: ignore + converter=tuplify_extra_headers, default=(), ) @@ -621,6 +632,12 @@ class Content(BaseContent): if value < 0: raise ValueError("Length must be positive.") + @ctime.validator + def check_ctime(self, attribute, value): + """Checks the ctime has a timezone.""" + if value is not None and value.tzinfo is None: + raise ValueError("ctime must be a timezone-aware datetime.") + def to_dict(self): content = super().to_dict() if content["data"] is None: @@ -695,6 +712,12 @@ class SkippedContent(BaseContent): if value < -1: raise ValueError("Length must be positive or -1.") + @ctime.validator + def check_ctime(self, attribute, value): + """Checks the ctime has a timezone.""" + if value is not None and value.tzinfo is None: + raise ValueError("ctime must be a timezone-aware datetime.") + def to_dict(self): content = super().to_dict() if content["origin"] is None: @@ -833,7 +856,13 @@ class RawExtrinsicMetadata(BaseModel): "Got SWHID as id for origin metadata (expected an URL)." ) else: - self._check_pid(self.type.value, value) + self._check_swhid(self.type.value, value) + + @discovery_date.validator + def check_discovery_date(self, attribute, value): + """Checks the discovery_date has a timezone.""" + if value is not None and value.tzinfo is None: + raise ValueError("discovery_date must be a timezone-aware datetime.") @origin.validator def check_origin(self, attribute, value): @@ -896,7 +925,7 @@ class RawExtrinsicMetadata(BaseModel): f"Unexpected 'snapshot' context for {self.type.value} object: {value}" ) - self._check_pid("snapshot", value) + self._check_swhid("snapshot", value) @release.validator def check_release(self, attribute, value): @@ -912,7 +941,7 @@ class RawExtrinsicMetadata(BaseModel): f"Unexpected 'release' context for {self.type.value} object: {value}" ) - self._check_pid("release", value) + self._check_swhid("release", value) @revision.validator def check_revision(self, attribute, value): @@ -924,7 +953,7 @@ class RawExtrinsicMetadata(BaseModel): f"Unexpected 'revision' context for {self.type.value} object: {value}" ) - self._check_pid("revision", value) + self._check_swhid("revision", value) @path.validator def check_path(self, attribute, value): @@ -946,20 +975,20 @@ class RawExtrinsicMetadata(BaseModel): f"Unexpected 'directory' context for {self.type.value} object: {value}" ) - self._check_pid("directory", value) + self._check_swhid("directory", value) - def _check_pid(self, expected_object_type, pid): - if isinstance(pid, str): - raise ValueError(f"Expected SWHID, got a string: {pid}") + def _check_swhid(self, expected_object_type, swhid): + if isinstance(swhid, str): + raise ValueError(f"Expected SWHID, got a string: {swhid}") - if pid.object_type != expected_object_type: + if swhid.object_type != expected_object_type: raise ValueError( f"Expected SWHID type '{expected_object_type}', " - f"got '{pid.object_type}' in {pid}" + f"got '{swhid.object_type}' in {swhid}" ) - if pid.metadata: - raise ValueError(f"Expected core SWHID, but got: {pid}") + if swhid.metadata: + raise ValueError(f"Expected core SWHID, but got: {swhid}") def to_dict(self): d = super().to_dict() diff --git a/swh/model/tests/generate_testdata.py b/swh/model/tests/generate_testdata.py index 0280a6ab15582e49d1d79b1ba9104d55aa5abd7e..f4093a4d5ab5db80ab1616110cd4426201f47ee8 100644 --- a/swh/model/tests/generate_testdata.py +++ b/swh/model/tests/generate_testdata.py @@ -4,12 +4,12 @@ # See top-level LICENSE file for more information from datetime import datetime -from pytz import all_timezones, timezone from random import choice, randint, random, shuffle -from typing import List, Dict +from typing import Dict, List -from swh.model.hashutil import MultiHash +from pytz import all_timezones, timezone +from swh.model.hashutil import MultiHash PROTOCOLS = ["git", "http", "https", "deb", "svn", "mock"] DOMAINS = ["example.com", "some.long.host.name", "xn--n28h.tld"] diff --git a/swh/model/tests/generate_testdata_from_disk.py b/swh/model/tests/generate_testdata_from_disk.py index 063e39093618a1e07c7dc9f55acc184a05d83643..3ad456463eb2b45a974c42953927b38d192763c3 100644 --- a/swh/model/tests/generate_testdata_from_disk.py +++ b/swh/model/tests/generate_testdata_from_disk.py @@ -7,7 +7,7 @@ from operator import itemgetter import os import sys -from swh.model.from_disk import Directory, DentryPerms +from swh.model.from_disk import DentryPerms, Directory from swh.model.hashutil import ALGORITHMS, hash_to_hex diff --git a/swh/model/tests/test_from_disk.py b/swh/model/tests/test_from_disk.py index 025615608f7240c1b28054cd63d2660e478ab3e0..497bf6c0ddf510d52f99ad2a1e7cdea883cac4fe 100644 --- a/swh/model/tests/test_from_disk.py +++ b/swh/model/tests/test_from_disk.py @@ -3,19 +3,18 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +from collections import defaultdict import os -import pytest import tarfile import tempfile +from typing import ClassVar, Optional import unittest -from collections import defaultdict -from typing import ClassVar, Optional +import pytest -from swh.model import from_disk +from swh.model import from_disk, model from swh.model.from_disk import Content, DentryPerms, Directory, DiskBackedContent from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex -from swh.model import model TEST_DATA = os.path.join(os.path.dirname(__file__), "data") diff --git a/swh/model/tests/test_generate_testdata.py b/swh/model/tests/test_generate_testdata.py index aa9c8af305cff8cc6108853f6b01ceafcad3c8c5..6ed2e6383315196898d793a04c10cb4ee01ccb59 100644 --- a/swh/model/tests/test_generate_testdata.py +++ b/swh/model/tests/test_generate_testdata.py @@ -3,9 +3,9 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from .generate_testdata import gen_contents, gen_origins, ORIGINS +from swh.model.model import BaseContent, Origin -from swh.model.model import Origin, BaseContent +from .generate_testdata import ORIGINS, gen_contents, gen_origins def test_gen_origins_empty(): diff --git a/swh/model/tests/test_hypothesis_strategies.py b/swh/model/tests/test_hypothesis_strategies.py index e1ab9b73aae2cfa48b137cd26d14ed1a29a6359c..c93b24b91f91c0fb78eb648596f5a3cb7b6e5e4c 100644 --- a/swh/model/tests/test_hypothesis_strategies.py +++ b/swh/model/tests/test_hypothesis_strategies.py @@ -6,23 +6,22 @@ import datetime import attr -import iso8601 from hypothesis import given, settings +import iso8601 from swh.model.hashutil import DEFAULT_ALGORITHMS from swh.model.hypothesis_strategies import ( aware_datetimes, - objects, - object_dicts, contents, - skipped_contents, - snapshots, + object_dicts, + objects, origin_visits, persons, + skipped_contents, + snapshots, ) from swh.model.model import TargetType - target_types = ("content", "directory", "revision", "release", "snapshot", "alias") all_but_skipped_content = ( "origin", diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py index 5acbd2d38c19466bda2c14b16595ac9d3d41cc0e..3741b70af906cff0f4b8c6af25112ccb126356c9 100644 --- a/swh/model/tests/test_identifiers.py +++ b/swh/model/tests/test_identifiers.py @@ -5,9 +5,10 @@ import binascii import datetime -import pytest import unittest +import pytest + from swh.model import hashutil, identifiers from swh.model.exceptions import ValidationError from swh.model.hashutil import hash_to_bytes as _x diff --git a/swh/model/tests/test_model.py b/swh/model/tests/test_model.py index eb226e5398064d06238883d7beea191a56d78e28..fdd5e044315024ac63c3d6c3afbfb755e729a103 100644 --- a/swh/model/tests/test_model.py +++ b/swh/model/tests/test_model.py @@ -12,39 +12,41 @@ from hypothesis import given from hypothesis.strategies import binary import pytest +from swh.model.hashutil import MultiHash, hash_to_bytes +import swh.model.hypothesis_strategies as strategies +from swh.model.identifiers import ( + SWHID, + directory_identifier, + parse_swhid, + release_identifier, + revision_identifier, + snapshot_identifier, +) from swh.model.model import ( BaseModel, Content, - SkippedContent, Directory, - Revision, + MetadataAuthority, + MetadataAuthorityType, + MetadataFetcher, + MetadataTargetType, + MissingData, + Origin, + OriginVisit, + OriginVisitStatus, + Person, + RawExtrinsicMetadata, Release, + Revision, + SkippedContent, Snapshot, - Origin, Timestamp, TimestampWithTimezone, - MissingData, - Person, - RawExtrinsicMetadata, - MetadataTargetType, - MetadataAuthority, - MetadataAuthorityType, - MetadataFetcher, -) -from swh.model.hashutil import hash_to_bytes, MultiHash -import swh.model.hypothesis_strategies as strategies -from swh.model.identifiers import ( - directory_identifier, - revision_identifier, - release_identifier, - snapshot_identifier, - parse_swhid, - SWHID, ) from swh.model.tests.test_identifiers import ( directory_example, - revision_example, release_example, + revision_example, snapshot_example, ) @@ -97,7 +99,7 @@ def test_anonymization(objtype_and_obj): assert anon_obj is None -# Origin, OriginVisit +# Origin, OriginVisit, OriginVisitStatus @given(strategies.origins()) @@ -115,6 +117,13 @@ def test_todict_origin_visits(origin_visit): assert origin_visit == type(origin_visit).from_dict(obj) +def test_origin_visit_naive_datetime(): + with pytest.raises(ValueError, match="must be a timezone-aware datetime"): + OriginVisit( + origin="http://foo/", date=datetime.datetime.now(), type="git", + ) + + @given(strategies.origin_visit_statuses()) def test_todict_origin_visit_statuses(origin_visit_status): obj = origin_visit_status.to_dict() @@ -122,6 +131,17 @@ def test_todict_origin_visit_statuses(origin_visit_status): assert origin_visit_status == type(origin_visit_status).from_dict(obj) +def test_origin_visit_status_naive_datetime(): + with pytest.raises(ValueError, match="must be a timezone-aware datetime"): + OriginVisitStatus( + origin="http://foo/", + visit=42, + date=datetime.datetime.now(), + status="ongoing", + snapshot=None, + ) + + # Timestamp @@ -224,6 +244,13 @@ def test_timestampwithtimezone_from_datetime(): ) +def test_timestampwithtimezone_from_naive_datetime(): + date = datetime.datetime(2020, 2, 27, 14, 39, 19) + + with pytest.raises(ValueError, match="datetime without timezone"): + TimestampWithTimezone.from_datetime(date) + + def test_timestampwithtimezone_from_iso8601(): date = "2020-02-27 14:39:19.123456+0100" @@ -363,7 +390,7 @@ def test_content_from_dict(content_d): def test_content_from_dict_str_ctime(): # test with ctime as a string - n = datetime.datetime(2020, 5, 6, 12, 34) + n = datetime.datetime(2020, 5, 6, 12, 34, tzinfo=datetime.timezone.utc) content_d = { "ctime": n.isoformat(), "data": b"", @@ -377,6 +404,22 @@ def test_content_from_dict_str_ctime(): assert c.ctime == n +def test_content_from_dict_str_naive_ctime(): + # test with ctime as a string + n = datetime.datetime(2020, 5, 6, 12, 34) + content_d = { + "ctime": n.isoformat(), + "data": b"", + "length": 0, + "sha1": b"\x00", + "sha256": b"\x00", + "sha1_git": b"\x00", + "blake2s256": b"\x00", + } + with pytest.raises(ValueError, match="must be a timezone-aware datetime."): + Content.from_dict(content_d) + + @given(binary(max_size=4096)) def test_content_from_data(data): c = Content.from_data(data) @@ -397,6 +440,14 @@ def test_hidden_content_from_data(data): assert getattr(c, key) == value +def test_content_naive_datetime(): + c = Content.from_data(b"foo") + with pytest.raises(ValueError, match="must be a timezone-aware datetime"): + Content( + **c.to_dict(), ctime=datetime.datetime.now(), + ) + + # SkippedContent @@ -422,6 +473,14 @@ def test_skipped_content_origin_is_str(skipped_content_d): SkippedContent.from_dict(skipped_content_d) +def test_skipped_content_naive_datetime(): + c = SkippedContent.from_data(b"foo", reason="reason") + with pytest.raises(ValueError, match="must be a timezone-aware datetime"): + SkippedContent( + **c.to_dict(), ctime=datetime.datetime.now(), + ) + + # Revision @@ -694,7 +753,7 @@ _metadata_fetcher = MetadataFetcher(name="test-fetcher", version="0.0.1",) _content_swhid = parse_swhid("swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2") _origin_url = "https://forge.softwareheritage.org/source/swh-model.git" _common_metadata_fields = dict( - discovery_date=datetime.datetime.now(), + discovery_date=datetime.datetime.now(tz=datetime.timezone.utc), authority=_metadata_authority, fetcher=_metadata_fetcher, format="json", @@ -802,6 +861,15 @@ def test_metadata_invalid_id(): ) +def test_metadata_naive_datetime(): + with pytest.raises(ValueError, match="must be a timezone-aware datetime"): + RawExtrinsicMetadata( + type=MetadataTargetType.ORIGIN, + id=_origin_url, + **{**_common_metadata_fields, "discovery_date": datetime.datetime.now()}, + ) + + def test_metadata_validate_context_origin(): """Checks validation of RawExtrinsicMetadata.origin.""" diff --git a/swh/model/validators.py b/swh/model/validators.py index 6cd7fc110dc13d4074338aa52eede6a76366b431..a2f9dbff2a672fd085eaa2ad14aaa43208b04314 100644 --- a/swh/model/validators.py +++ b/swh/model/validators.py @@ -3,8 +3,8 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from .exceptions import ValidationError, NON_FIELD_ERRORS from . import fields +from .exceptions import NON_FIELD_ERRORS, ValidationError from .hashutil import MultiHash, hash_to_bytes diff --git a/version.txt b/version.txt deleted file mode 100644 index 6ebf82ac7c32ff364a1005eda5ee5dc50229dde2..0000000000000000000000000000000000000000 --- a/version.txt +++ /dev/null @@ -1 +0,0 @@ -v0.6.1-0-g08632e7 \ No newline at end of file