diff --git a/PKG-INFO b/PKG-INFO index 39cb407ce462c1da718c0fb452bb98d68dfc9237..6bf007cf43487927ea173d702e776f3e5bfc4ad6 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: swh.model -Version: 0.0.64 +Version: 0.0.65 Summary: Software Heritage data model Home-page: https://forge.softwareheritage.org/diffusion/DMOD/ Author: Software Heritage developers diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000000000000000000000000000000000000..b5413f6c74cabcba98d479c179506f0c61e9d7ef --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,2 @@ +[tool.black] +target-version = ['py37'] diff --git a/setup.cfg b/setup.cfg index 8bfd5a12f85b8fbb6c058cf67dd23da690835ea0..1d722c2c602748efade93c7dafe59053fa74f68e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,3 +1,7 @@ +[flake8] +ignore = E203,E231,W503 +max-line-length = 88 + [egg_info] tag_build = tag_date = 0 diff --git a/setup.py b/setup.py index 6f2eb37c6904f74a18844b9f67d00cc1bc8ac4b2..f0ea60468c4f15816640b2d17491d5108d044d54 100755 --- a/setup.py +++ b/setup.py @@ -12,15 +12,15 @@ from io import open here = path.abspath(path.dirname(__file__)) # Get the long description from the README file -with open(path.join(here, 'README.md'), encoding='utf-8') as f: +with open(path.join(here, "README.md"), encoding="utf-8") as f: long_description = f.read() def parse_requirements(name=None): if name: - reqf = 'requirements-%s.txt' % name + reqf = "requirements-%s.txt" % name else: - reqf = 'requirements.txt' + reqf = "requirements.txt" requirements = [] if not path.exists(reqf): @@ -29,7 +29,7 @@ def parse_requirements(name=None): with open(reqf) as f: for line in f.readlines(): line = line.strip() - if not line or line.startswith('#'): + if not line or line.startswith("#"): continue requirements.append(line) return requirements @@ -38,29 +38,30 @@ def parse_requirements(name=None): blake2_requirements = ['pyblake2;python_version<"3.6"'] setup( - name='swh.model', - description='Software Heritage data model', + name="swh.model", + description="Software Heritage data model", long_description=long_description, - long_description_content_type='text/markdown', - author='Software Heritage developers', - author_email='swh-devel@inria.fr', - url='https://forge.softwareheritage.org/diffusion/DMOD/', + long_description_content_type="text/markdown", + author="Software Heritage developers", + author_email="swh-devel@inria.fr", + url="https://forge.softwareheritage.org/diffusion/DMOD/", packages=find_packages(), - setup_requires=['vcversioner'], - install_requires=(parse_requirements() + parse_requirements('swh') + - blake2_requirements), + setup_requires=["vcversioner"], + install_requires=( + parse_requirements() + parse_requirements("swh") + blake2_requirements + ), extras_require={ - 'cli': parse_requirements('cli'), - 'testing': parse_requirements('test'), + "cli": parse_requirements("cli"), + "testing": parse_requirements("test"), }, vcversioner={}, include_package_data=True, - entry_points=''' + entry_points=""" [console_scripts] swh-identify=swh.model.cli:identify [swh.cli.subcommands] identify=swh.model.cli:identify - ''', + """, classifiers=[ "Programming Language :: Python :: 3", "Intended Audience :: Developers", @@ -69,8 +70,8 @@ setup( "Development Status :: 5 - Production/Stable", ], project_urls={ - 'Bug Reports': 'https://forge.softwareheritage.org/maniphest', - 'Funding': 'https://www.softwareheritage.org/donate', - 'Source': 'https://forge.softwareheritage.org/source/swh-model', + "Bug Reports": "https://forge.softwareheritage.org/maniphest", + "Funding": "https://www.softwareheritage.org/donate", + "Source": "https://forge.softwareheritage.org/source/swh-model", }, ) diff --git a/swh.model.egg-info/PKG-INFO b/swh.model.egg-info/PKG-INFO index 39cb407ce462c1da718c0fb452bb98d68dfc9237..6bf007cf43487927ea173d702e776f3e5bfc4ad6 100644 --- a/swh.model.egg-info/PKG-INFO +++ b/swh.model.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: swh.model -Version: 0.0.64 +Version: 0.0.65 Summary: Software Heritage data model Home-page: https://forge.softwareheritage.org/diffusion/DMOD/ Author: Software Heritage developers diff --git a/swh.model.egg-info/SOURCES.txt b/swh.model.egg-info/SOURCES.txt index 7bfbefea34350cf840f1a8cc9ca8c0e271c814b4..74c28607d46eeefa9f5c9f724229ccfddf41df7b 100644 --- a/swh.model.egg-info/SOURCES.txt +++ b/swh.model.egg-info/SOURCES.txt @@ -1,9 +1,11 @@ MANIFEST.in Makefile README.md +pyproject.toml requirements-cli.txt requirements-test.txt requirements.txt +setup.cfg setup.py version.txt swh/__init__.py diff --git a/swh/model/cli.py b/swh/model/cli.py index 581bb45b892cae89ab2bb6415fb8d608d905784b..ae51d190c6423b1f9f963d791465c3ded81ecd5b 100644 --- a/swh/model/cli.py +++ b/swh/model/cli.py @@ -17,27 +17,27 @@ from swh.model.exceptions import ValidationError from swh.model.from_disk import Content, Directory -CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help']) +CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"]) # Mapping between dulwich types and Software Heritage ones. Used by snapshot ID # computation. _DULWICH_TYPES = { - b'blob': 'content', - b'tree': 'directory', - b'commit': 'revision', - b'tag': 'release', + b"blob": "content", + b"tree": "directory", + b"commit": "revision", + b"tag": "release", } class PidParamType(click.ParamType): - name = 'persistent identifier' + name = "persistent identifier" def convert(self, value, param, ctx): try: pids.parse_persistent_identifier(value) return value # return as string, as we need just that except ValidationError as e: - self.fail('%s is not a valid PID. %s.' % (value, e), param, ctx) + self.fail("%s is not a valid PID. %s." % (value, e), param, ctx) def pid_of_file(path): @@ -56,8 +56,9 @@ def pid_of_dir(path): def pid_of_origin(url): - pid = pids.PersistentId(object_type='origin', - object_id=pids.origin_identifier({'url': url})) + pid = pids.PersistentId( + object_type="origin", object_id=pids.origin_identifier({"url": url}) + ) return str(pid) @@ -69,60 +70,60 @@ def pid_of_git_repo(path): obj = repo[target] if obj: branches[ref] = { - 'target': hashutil.bytehex_to_hash(target), - 'target_type': _DULWICH_TYPES[obj.type_name], + "target": hashutil.bytehex_to_hash(target), + "target_type": _DULWICH_TYPES[obj.type_name], } else: branches[ref] = None for ref, target in repo.refs.get_symrefs().items(): branches[ref] = { - 'target': target, - 'target_type': 'alias', + "target": target, + "target_type": "alias", } - snapshot = {'branches': branches} + snapshot = {"branches": branches} - pid = pids.PersistentId(object_type='snapshot', - object_id=pids.snapshot_identifier(snapshot)) + pid = pids.PersistentId( + object_type="snapshot", object_id=pids.snapshot_identifier(snapshot) + ) return str(pid) def identify_object(obj_type, follow_symlinks, obj): - if obj_type == 'auto': - if obj == '-' or os.path.isfile(obj): - obj_type = 'content' + if obj_type == "auto": + if obj == "-" or os.path.isfile(obj): + obj_type = "content" elif os.path.isdir(obj): - obj_type = 'directory' + obj_type = "directory" else: try: # URL parsing if urlparse(obj).scheme: - obj_type = 'origin' + obj_type = "origin" else: raise ValueError except ValueError: - raise click.BadParameter('cannot detect object type for %s' % - obj) + raise click.BadParameter("cannot detect object type for %s" % obj) pid = None - if obj == '-': + if obj == "-": content = sys.stdin.buffer.read() pid = pid_of_file_content(content) - elif obj_type in ['content', 'directory']: + elif obj_type in ["content", "directory"]: path = obj.encode(sys.getfilesystemencoding()) if follow_symlinks and os.path.islink(obj): path = os.path.realpath(obj) - if obj_type == 'content': + if obj_type == "content": pid = pid_of_file(path) - elif obj_type == 'directory': + elif obj_type == "directory": pid = pid_of_dir(path) - elif obj_type == 'origin': + elif obj_type == "origin": pid = pid_of_origin(obj) - elif obj_type == 'snapshot': + elif obj_type == "snapshot": pid = pid_of_git_repo(obj) else: # shouldn't happen, due to option validation - raise click.BadParameter('invalid object type: ' + obj_type) + raise click.BadParameter("invalid object type: " + obj_type) # note: we return original obj instead of path here, to preserve user-given # file name in output @@ -130,19 +131,35 @@ def identify_object(obj_type, follow_symlinks, obj): @click.command(context_settings=CONTEXT_SETTINGS) -@click.option('--dereference/--no-dereference', 'follow_symlinks', - default=True, - help='follow (or not) symlinks for OBJECTS passed as arguments ' - + '(default: follow)') -@click.option('--filename/--no-filename', 'show_filename', default=True, - help='show/hide file name (default: show)') -@click.option('--type', '-t', 'obj_type', default='auto', - type=click.Choice(['auto', 'content', 'directory', 'origin', - 'snapshot']), - help='type of object to identify (default: auto)') -@click.option('--verify', '-v', metavar='PID', type=PidParamType(), - help='reference identifier to be compared with computed one') -@click.argument('objects', nargs=-1) +@click.option( + "--dereference/--no-dereference", + "follow_symlinks", + default=True, + help="follow (or not) symlinks for OBJECTS passed as arguments " + + "(default: follow)", +) +@click.option( + "--filename/--no-filename", + "show_filename", + default=True, + help="show/hide file name (default: show)", +) +@click.option( + "--type", + "-t", + "obj_type", + default="auto", + type=click.Choice(["auto", "content", "directory", "origin", "snapshot"]), + help="type of object to identify (default: auto)", +) +@click.option( + "--verify", + "-v", + metavar="PID", + type=PidParamType(), + help="reference identifier to be compared with computed one", +) +@click.argument("objects", nargs=-1) def identify(obj_type, verify, show_filename, follow_symlinks, objects): """Compute the Software Heritage persistent identifier (PID) for the given source code object(s). @@ -172,28 +189,28 @@ def identify(obj_type, verify, show_filename, follow_symlinks, objects): """ # NoQA # overlong lines in shell examples are fine if not objects: - objects = ['-'] + objects = ["-"] if verify and len(objects) != 1: - raise click.BadParameter('verification requires a single object') + raise click.BadParameter("verification requires a single object") results = map(partial(identify_object, obj_type, follow_symlinks), objects) if verify: pid = next(results)[1] if verify == pid: - click.echo('PID match: %s' % pid) + click.echo("PID match: %s" % pid) sys.exit(0) else: - click.echo('PID mismatch: %s != %s' % (verify, pid)) + click.echo("PID mismatch: %s != %s" % (verify, pid)) sys.exit(1) else: for (obj, pid) in results: msg = pid if show_filename: - msg = '%s\t%s' % (pid, os.fsdecode(obj)) + msg = "%s\t%s" % (pid, os.fsdecode(obj)) click.echo(msg) -if __name__ == '__main__': +if __name__ == "__main__": identify() diff --git a/swh/model/exceptions.py b/swh/model/exceptions.py index 147c5ad8d45990f506866c4bd9f68546f663498e..774dfc22aa0c787be71fd3b65b761dfac95fc74d 100644 --- a/swh/model/exceptions.py +++ b/swh/model/exceptions.py @@ -33,11 +33,12 @@ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. -NON_FIELD_ERRORS = '__all__' +NON_FIELD_ERRORS = "__all__" class ValidationError(Exception): """An error while validating data.""" + def __init__(self, message, code=None, params=None): """ The `message` argument can be a single error, a list of errors, or a @@ -54,16 +55,15 @@ class ValidationError(Exception): message = message[0] if isinstance(message, ValidationError): - if hasattr(message, 'error_dict'): + if hasattr(message, "error_dict"): message = message.error_dict # PY2 has a `message` property which is always there so we can't # duck-type on it. It was introduced in Python 2.5 and already # deprecated in Python 2.6. - elif not hasattr(message, 'message'): + elif not hasattr(message, "message"): message = message.error_list else: - message, code, params = (message.message, message.code, - message.params) + message, code, params = (message.message, message.code, message.params) if isinstance(message, dict): self.error_dict = {} @@ -78,9 +78,8 @@ class ValidationError(Exception): # Normalize plain strings to instances of ValidationError. if not isinstance(message, ValidationError): message = ValidationError(message) - if hasattr(message, 'error_dict'): - self.error_list.extend(sum(message.error_dict.values(), - [])) + if hasattr(message, "error_dict"): + self.error_list.extend(sum(message.error_dict.values(), [])) else: self.error_list.extend(message.error_list) @@ -94,18 +93,18 @@ class ValidationError(Exception): def message_dict(self): # Trigger an AttributeError if this ValidationError # doesn't have an error_dict. - getattr(self, 'error_dict') + getattr(self, "error_dict") return dict(self) @property def messages(self): - if hasattr(self, 'error_dict'): + if hasattr(self, "error_dict"): return sum(dict(self).values(), []) return list(self) def update_error_dict(self, error_dict): - if hasattr(self, 'error_dict'): + if hasattr(self, "error_dict"): for field, error_list in self.error_dict.items(): error_dict.setdefault(field, []).extend(error_list) else: @@ -113,7 +112,7 @@ class ValidationError(Exception): return error_dict def __iter__(self): - if hasattr(self, 'error_dict'): + if hasattr(self, "error_dict"): for field, errors in self.error_dict.items(): yield field, list(ValidationError(errors)) else: @@ -124,9 +123,9 @@ class ValidationError(Exception): yield message def __str__(self): - if hasattr(self, 'error_dict'): + if hasattr(self, "error_dict"): return repr(dict(self)) return repr(list(self)) def __repr__(self): - return 'ValidationError(%s)' % self + return "ValidationError(%s)" % self diff --git a/swh/model/fields/__init__.py b/swh/model/fields/__init__.py index d2b3cefd5a026ba94f299443047273b79f25d8ab..a5b1ed3f8642c3de1593861fc3b9780697683f98 100644 --- a/swh/model/fields/__init__.py +++ b/swh/model/fields/__init__.py @@ -6,8 +6,13 @@ # We do our imports here but we don't use them, so flake8 complains # flake8: noqa -from .simple import (validate_type, validate_int, validate_str, validate_bytes, - validate_datetime, validate_enum) -from .hashes import (validate_sha1, validate_sha1_git, validate_sha256) -from .compound import (validate_against_schema, validate_all_keys, - validate_any_key) +from .simple import ( + validate_type, + validate_int, + validate_str, + validate_bytes, + validate_datetime, + validate_enum, +) +from .hashes import validate_sha1, validate_sha1_git, validate_sha256 +from .compound import validate_against_schema, validate_all_keys, validate_any_key diff --git a/swh/model/fields/compound.py b/swh/model/fields/compound.py index 00eb25250f37065761afd9ead8f9ee5fe2e12e0f..3133f59cc705d497ab3cbfddd3cb5d098b92e04d 100644 --- a/swh/model/fields/compound.py +++ b/swh/model/fields/compound.py @@ -26,19 +26,16 @@ def validate_against_schema(model, schema, value): if not isinstance(value, dict): raise ValidationError( - 'Unexpected type %(type)s for %(model)s, expected dict', - params={ - 'model': model, - 'type': value.__class__.__name__, - }, - code='model-unexpected-type', + "Unexpected type %(type)s for %(model)s, expected dict", + params={"model": model, "type": value.__class__.__name__,}, + code="model-unexpected-type", ) errors = defaultdict(list) for key, (mandatory, validators) in itertools.chain( ((k, v) for k, v in schema.items() if k != NON_FIELD_ERRORS), - [(NON_FIELD_ERRORS, (False, schema.get(NON_FIELD_ERRORS, [])))] + [(NON_FIELD_ERRORS, (False, schema.get(NON_FIELD_ERRORS, [])))], ): if not validators: continue @@ -54,9 +51,9 @@ def validate_against_schema(model, schema, value): if mandatory: errors[key].append( ValidationError( - 'Field %(field)s is mandatory', - params={'field': key}, - code='model-field-mandatory', + "Field %(field)s is mandatory", + params={"field": key}, + code="model-field-mandatory", ) ) @@ -74,19 +71,21 @@ def validate_against_schema(model, schema, value): else: if not valid: errdata = { - 'validator': validator.__name__, + "validator": validator.__name__, } if key == NON_FIELD_ERRORS: - errmsg = 'Validation of model %(model)s failed in ' \ - '%(validator)s' - errdata['model'] = model - errcode = 'model-validation-failed' + errmsg = ( + "Validation of model %(model)s failed in " "%(validator)s" + ) + errdata["model"] = model + errcode = "model-validation-failed" else: - errmsg = 'Validation of field %(field)s failed in ' \ - '%(validator)s' - errdata['field'] = key - errcode = 'field-validation-failed' + errmsg = ( + "Validation of field %(field)s failed in " "%(validator)s" + ) + errdata["field"] = key + errcode = "field-validation-failed" errors[key].append( ValidationError(errmsg, params=errdata, code=errcode) @@ -102,11 +101,11 @@ def validate_all_keys(value, keys): """Validate that all the given keys are present in value""" missing_keys = set(keys) - set(value) if missing_keys: - missing_fields = ', '.join(sorted(missing_keys)) + missing_fields = ", ".join(sorted(missing_keys)) raise ValidationError( - 'Missing mandatory fields %(missing_fields)s', - params={'missing_fields': missing_fields}, - code='missing-mandatory-field' + "Missing mandatory fields %(missing_fields)s", + params={"missing_fields": missing_fields}, + code="missing-mandatory-field", ) return True @@ -116,11 +115,11 @@ def validate_any_key(value, keys): """Validate that any of the given keys is present in value""" present_keys = set(keys) & set(value) if not present_keys: - missing_fields = ', '.join(sorted(keys)) + missing_fields = ", ".join(sorted(keys)) raise ValidationError( - 'Must contain one of the alternative fields %(missing_fields)s', - params={'missing_fields': missing_fields}, - code='missing-alternative-field', + "Must contain one of the alternative fields %(missing_fields)s", + params={"missing_fields": missing_fields}, + code="missing-alternative-field", ) return True diff --git a/swh/model/fields/hashes.py b/swh/model/fields/hashes.py index 3819565d841052e2f3c166b07d85be2c79043398..47e872c7b994881bd86a1c33a4a184acbb4732a2 100644 --- a/swh/model/fields/hashes.py +++ b/swh/model/fields/hashes.py @@ -22,22 +22,22 @@ def validate_hash(value, hash_type): """ hash_lengths = { - 'sha1': 20, - 'sha1_git': 20, - 'sha256': 32, + "sha1": 20, + "sha1_git": 20, + "sha256": 32, } hex_digits = set(string.hexdigits) if hash_type not in hash_lengths: raise ValidationError( - 'Unexpected hash type %(hash_type)s, expected one of' - ' %(hash_types)s', + "Unexpected hash type %(hash_type)s, expected one of" " %(hash_types)s", params={ - 'hash_type': hash_type, - 'hash_types': ', '.join(sorted(hash_lengths)), + "hash_type": hash_type, + "hash_types": ", ".join(sorted(hash_lengths)), }, - code='unexpected-hash-type') + code="unexpected-hash-type", + ) if isinstance(value, str): errors = [] @@ -48,10 +48,10 @@ def validate_hash(value, hash_type): "Unexpected characters `%(unexpected_chars)s' for hash " "type %(hash_type)s", params={ - 'unexpected_chars': ', '.join(sorted(extra_chars)), - 'hash_type': hash_type, + "unexpected_chars": ", ".join(sorted(extra_chars)), + "hash_type": hash_type, }, - code='unexpected-hash-contents', + code="unexpected-hash-contents", ) ) @@ -60,14 +60,14 @@ def validate_hash(value, hash_type): if length != expected_length: errors.append( ValidationError( - 'Unexpected length %(length)d for hash type ' - '%(hash_type)s, expected %(expected_length)d', + "Unexpected length %(length)d for hash type " + "%(hash_type)s, expected %(expected_length)d", params={ - 'length': length, - 'expected_length': expected_length, - 'hash_type': hash_type, + "length": length, + "expected_length": expected_length, + "hash_type": hash_type, }, - code='unexpected-hash-length', + code="unexpected-hash-length", ) ) @@ -81,37 +81,35 @@ def validate_hash(value, hash_type): expected_length = hash_lengths[hash_type] if length != expected_length: raise ValidationError( - 'Unexpected length %(length)d for hash type ' - '%(hash_type)s, expected %(expected_length)d', + "Unexpected length %(length)d for hash type " + "%(hash_type)s, expected %(expected_length)d", params={ - 'length': length, - 'expected_length': expected_length, - 'hash_type': hash_type, + "length": length, + "expected_length": expected_length, + "hash_type": hash_type, }, - code='unexpected-hash-length', + code="unexpected-hash-length", ) return True raise ValidationError( - 'Unexpected type %(type)s for hash, expected str or bytes', - params={ - 'type': value.__class__.__name__, - }, - code='unexpected-hash-value-type', + "Unexpected type %(type)s for hash, expected str or bytes", + params={"type": value.__class__.__name__,}, + code="unexpected-hash-value-type", ) def validate_sha1(sha1): """Validate that sha1 is a valid sha1 hash""" - return validate_hash(sha1, 'sha1') + return validate_hash(sha1, "sha1") def validate_sha1_git(sha1_git): """Validate that sha1_git is a valid sha1_git hash""" - return validate_hash(sha1_git, 'sha1_git') + return validate_hash(sha1_git, "sha1_git") def validate_sha256(sha256): """Validate that sha256 is a valid sha256 hash""" - return validate_hash(sha256, 'sha256') + return validate_hash(sha256, "sha256") diff --git a/swh/model/fields/simple.py b/swh/model/fields/simple.py index 302099780df9bab00ec2176223ed8fdd2db05199..98fcc118532d075f1de3b43022f0e992bc2d5376 100644 --- a/swh/model/fields/simple.py +++ b/swh/model/fields/simple.py @@ -13,16 +13,13 @@ def validate_type(value, type): """Validate that value is an integer""" if not isinstance(value, type): if isinstance(type, tuple): - typestr = 'one of %s' % ', '.join(typ.__name__ for typ in type) + typestr = "one of %s" % ", ".join(typ.__name__ for typ in type) else: typestr = type.__name__ raise ValidationError( - 'Unexpected type %(type)s, expected %(expected_type)s', - params={ - 'type': value.__class__.__name__, - 'expected_type': typestr, - }, - code='unexpected-type' + "Unexpected type %(type)s, expected %(expected_type)s", + params={"type": value.__class__.__name__, "expected_type": typestr,}, + code="unexpected-type", ) return True @@ -54,10 +51,12 @@ def validate_datetime(value): errors.append(e) if isinstance(value, datetime.datetime) and value.tzinfo is None: - errors.append(ValidationError( - 'Datetimes must be timezone-aware in swh', - code='datetime-without-tzinfo', - )) + errors.append( + ValidationError( + "Datetimes must be timezone-aware in swh", + code="datetime-without-tzinfo", + ) + ) if errors: raise ValidationError(errors) @@ -69,12 +68,12 @@ def validate_enum(value, expected_values): """Validate that value is contained in expected_values""" if value not in expected_values: raise ValidationError( - 'Unexpected value %(value)s, expected one of %(expected_values)s', + "Unexpected value %(value)s, expected one of %(expected_values)s", params={ - 'value': value, - 'expected_values': ', '.join(sorted(expected_values)), + "value": value, + "expected_values": ", ".join(sorted(expected_values)), }, - code='unexpected-value', + code="unexpected-value", ) return True diff --git a/swh/model/from_disk.py b/swh/model/from_disk.py index 583df11cf3cccb4a4dfe4fc4356d67034bb8d637..5176dc9e22b53093abd625e6c30b9a2ea34d1824 100644 --- a/swh/model/from_disk.py +++ b/swh/model/from_disk.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 The Software Heritage developers +# Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -8,12 +8,13 @@ import os import stat import attr -from typing import List, Optional +from typing import List, Optional, Iterable, Any from .hashutil import MultiHash from .merkle import MerkleLeaf, MerkleNode from .identifiers import ( - directory_entry_sort_key, directory_identifier, + directory_entry_sort_key, + directory_identifier, identifier_to_bytes as id_to_bytes, identifier_to_str as id_to_str, ) @@ -23,24 +24,24 @@ from . import model @attr.s class DiskBackedContent(model.Content): """Subclass of Content, which allows lazy-loading data from the disk.""" + path = attr.ib(type=Optional[bytes], default=None) def __attrs_post_init__(self): if self.path is None: - raise TypeError('path must not be None.') + raise TypeError("path must not be None.") def with_data(self) -> model.Content: args = self.to_dict() - del args['path'] + del args["path"] assert self.path is not None - with open(self.path, 'rb') as fd: - return model.Content.from_dict({ - **args, - 'data': fd.read()}) + with open(self.path, "rb") as fd: + return model.Content.from_dict({**args, "data": fd.read()}) class DentryPerms(enum.IntEnum): """Admissible permissions for directory entries.""" + content = 0o100644 """Content""" executable_content = 0o100755 @@ -89,8 +90,9 @@ class Content(MerkleLeaf): computation. """ + __slots__ = [] # type: List[str] - type = 'content' + type = "content" @classmethod def from_bytes(cls, *, mode, data): @@ -101,10 +103,10 @@ class Content(MerkleLeaf): data (bytes): raw contents of the file """ ret = MultiHash.from_data(data).digest() - ret['length'] = len(data) - ret['perms'] = mode_to_perms(mode) - ret['data'] = data - ret['status'] = 'visible' + ret["length"] = len(data) + ret["perms"] = mode_to_perms(mode) + ret["data"] = data + ret["status"] = "visible" return cls(ret) @@ -114,8 +116,7 @@ class Content(MerkleLeaf): return cls.from_bytes(mode=mode, data=os.readlink(path)) @classmethod - def from_file( - cls, *, path, max_content_length=None): + def from_file(cls, *, path, max_content_length=None): """Compute the Software Heritage content entry corresponding to an on-disk file. @@ -132,8 +133,7 @@ class Content(MerkleLeaf): file_stat = os.lstat(path) mode = file_stat.st_mode length = file_stat.st_size - too_large = max_content_length is not None \ - and length > max_content_length + too_large = max_content_length is not None and length > max_content_length if stat.S_ISLNK(mode): # Symbolic link: return a file whose contents are the link target @@ -145,15 +145,15 @@ class Content(MerkleLeaf): # Thankfully, this should not happen for reasonable values of # max_content_length because of OS/filesystem limitations, # so let's just raise an error. - raise Exception(f'Symlink too large ({length} bytes)') + raise Exception(f"Symlink too large ({length} bytes)") return cls.from_symlink(path=path, mode=mode) elif not stat.S_ISREG(mode): # not a regular file: return the empty file instead - return cls.from_bytes(mode=mode, data=b'') + return cls.from_bytes(mode=mode, data=b"") if too_large: - skip_reason = 'Content too large' + skip_reason = "Content too large" else: skip_reason = None @@ -161,42 +161,42 @@ class Content(MerkleLeaf): if skip_reason: ret = { **hashes, - 'status': 'absent', - 'reason': skip_reason, + "status": "absent", + "reason": skip_reason, } else: ret = { **hashes, - 'status': 'visible', + "status": "visible", } - ret['path'] = path - ret['perms'] = mode_to_perms(mode) - ret['length'] = length + ret["path"] = path + ret["perms"] = mode_to_perms(mode) + ret["length"] = length obj = cls(ret) return obj def __repr__(self): - return 'Content(id=%s)' % id_to_str(self.hash) + return "Content(id=%s)" % id_to_str(self.hash) def compute_hash(self): - return self.data['sha1_git'] + return self.data["sha1_git"] def to_model(self) -> model.BaseContent: """Builds a `model.BaseContent` object based on this leaf.""" data = self.get_data().copy() - data.pop('perms', None) - if data['status'] == 'absent': - data.pop('path', None) + data.pop("perms", None) + if data["status"] == "absent": + data.pop("path", None) return model.SkippedContent.from_dict(data) - elif 'data' in data: + elif "data" in data: return model.Content.from_dict(data) else: return DiskBackedContent.from_dict(data) -def accept_all_directories(dirname, entries): +def accept_all_directories(dirpath: str, dirname: str, entries: Iterable[Any]) -> bool: """Default filter for :func:`Directory.from_disk` accepting all directories @@ -207,7 +207,9 @@ def accept_all_directories(dirname, entries): return True -def ignore_empty_directories(dirname, entries): +def ignore_empty_directories( + dirpath: str, dirname: str, entries: Iterable[Any] +) -> bool: """Filter for :func:`directory_to_objects` ignoring empty directories Args: @@ -233,8 +235,13 @@ def ignore_named_directories(names, *, case_sensitive=True): if not case_sensitive: names = [name.lower() for name in names] - def named_filter(dirname, entries, - names=names, case_sensitive=case_sensitive): + def named_filter( + dirpath: str, + dirname: str, + entries: Iterable[Any], + names: Iterable[Any] = names, + case_sensitive: bool = case_sensitive, + ): if case_sensitive: return dirname not in names else: @@ -262,13 +269,14 @@ class Directory(MerkleNode): the same method. This enables the efficient collection of updated nodes, for instance when the client is applying diffs. """ - __slots__ = ['__entries'] - type = 'directory' + + __slots__ = ["__entries"] + type = "directory" @classmethod - def from_disk(cls, *, path, - dir_filter=accept_all_directories, - max_content_length=None): + def from_disk( + cls, *, path, dir_filter=accept_all_directories, max_content_length=None + ): """Compute the Software Heritage objects for a given directory tree Args: @@ -282,7 +290,6 @@ class Directory(MerkleNode): max_content_length (Optional[int]): if given, all contents larger than this will be skipped. """ - top_path = path dirs = {} @@ -294,13 +301,14 @@ class Directory(MerkleNode): path = os.path.join(root, name) if not os.path.isdir(path) or os.path.islink(path): content = Content.from_file( - path=path, max_content_length=max_content_length) + path=path, max_content_length=max_content_length + ) entries[name] = content else: - if dir_filter(name, dirs[path].entries): + if dir_filter(path, name, dirs[path].entries): entries[name] = dirs[path] - dirs[root] = cls({'name': os.path.basename(root)}) + dirs[root] = cls({"name": os.path.basename(root)}) dirs[root].update(entries) return dirs[top_path] @@ -317,25 +325,25 @@ class Directory(MerkleNode): def child_to_directory_entry(name, child): if isinstance(child, Directory): return { - 'type': 'dir', - 'perms': DentryPerms.directory, - 'target': child.hash, - 'name': name, + "type": "dir", + "perms": DentryPerms.directory, + "target": child.hash, + "name": name, } elif isinstance(child, Content): return { - 'type': 'file', - 'perms': child.data['perms'], - 'target': child.hash, - 'name': name, + "type": "file", + "perms": child.data["perms"], + "target": child.hash, + "name": name, } else: - raise ValueError('unknown child') + raise ValueError("unknown child") def get_data(self, **kwargs): return { - 'id': self.hash, - 'entries': self.entries, + "id": self.hash, + "entries": self.entries, } @property @@ -343,15 +351,18 @@ class Directory(MerkleNode): """Child nodes, sorted by name in the same way `directory_identifier` does.""" if self.__entries is None: - self.__entries = sorted(( - self.child_to_directory_entry(name, child) - for name, child in self.items() - ), key=directory_entry_sort_key) + self.__entries = sorted( + ( + self.child_to_directory_entry(name, child) + for name, child in self.items() + ), + key=directory_entry_sort_key, + ) return self.__entries def compute_hash(self): - return id_to_bytes(directory_identifier({'entries': self.entries})) + return id_to_bytes(directory_identifier({"entries": self.entries})) def to_model(self) -> model.Directory: """Builds a `model.Directory` object based on this node; @@ -360,48 +371,49 @@ class Directory(MerkleNode): def __getitem__(self, key): if not isinstance(key, bytes): - raise ValueError('Can only get a bytes from Directory') + raise ValueError("Can only get a bytes from Directory") # Convenience shortcut - if key == b'': + if key == b"": return self - if b'/' not in key: + if b"/" not in key: return super().__getitem__(key) else: - key1, key2 = key.split(b'/', 1) + key1, key2 = key.split(b"/", 1) return self.__getitem__(key1)[key2] def __setitem__(self, key, value): if not isinstance(key, bytes): - raise ValueError('Can only set a bytes Directory entry') + raise ValueError("Can only set a bytes Directory entry") if not isinstance(value, (Content, Directory)): - raise ValueError('Can only set a Directory entry to a Content or ' - 'Directory') + raise ValueError( + "Can only set a Directory entry to a Content or " "Directory" + ) - if key == b'': - raise ValueError('Directory entry must have a name') - if b'\x00' in key: - raise ValueError('Directory entry name must not contain nul bytes') + if key == b"": + raise ValueError("Directory entry must have a name") + if b"\x00" in key: + raise ValueError("Directory entry name must not contain nul bytes") - if b'/' not in key: + if b"/" not in key: return super().__setitem__(key, value) else: - key1, key2 = key.rsplit(b'/', 1) + key1, key2 = key.rsplit(b"/", 1) self[key1].__setitem__(key2, value) def __delitem__(self, key): if not isinstance(key, bytes): - raise ValueError('Can only delete a bytes Directory entry') + raise ValueError("Can only delete a bytes Directory entry") - if b'/' not in key: + if b"/" not in key: super().__delitem__(key) else: - key1, key2 = key.rsplit(b'/', 1) + key1, key2 = key.rsplit(b"/", 1) del self[key1][key2] def __repr__(self): - return 'Directory(id=%s, entries=[%s])' % ( + return "Directory(id=%s, entries=[%s])" % ( id_to_str(self.hash), - ', '.join(str(entry) for entry in self), + ", ".join(str(entry) for entry in self), ) diff --git a/swh/model/hashutil.py b/swh/model/hashutil.py index f045fb01e48e15320d625d92076b28168c367032..954ae9576452a00649040eb4662ecd3756fe00a5 100644 --- a/swh/model/hashutil.py +++ b/swh/model/hashutil.py @@ -59,10 +59,10 @@ import os from io import BytesIO from typing import Callable, Dict -ALGORITHMS = set(['sha1', 'sha256', 'sha1_git', 'blake2s256', 'blake2b512']) +ALGORITHMS = set(["sha1", "sha256", "sha1_git", "blake2s256", "blake2b512"]) """Hashing algorithms supported by this module""" -DEFAULT_ALGORITHMS = set(['sha1', 'sha256', 'sha1_git', 'blake2s256']) +DEFAULT_ALGORITHMS = set(["sha1", "sha256", "sha1_git", "blake2s256"]) """Algorithms computed by default when calling the functions from this module. Subset of :const:`ALGORITHMS`. @@ -87,12 +87,13 @@ class MultiHash: computed and returned. """ + def __init__(self, hash_names=DEFAULT_ALGORITHMS, length=None): self.state = {} self.track_length = False for name in hash_names: - if name == 'length': - self.state['length'] = 0 + if name == "length": + self.state["length"] = 0 self.track_length = True else: self.state[name] = _new_hash(name, length) @@ -116,7 +117,7 @@ class MultiHash: @classmethod def from_path(cls, path, hash_names=DEFAULT_ALGORITHMS): length = os.path.getsize(path) - with open(path, 'rb') as f: + with open(path, "rb") as f: ret = cls.from_file(f, hash_names=hash_names, length=length) return ret @@ -128,34 +129,33 @@ class MultiHash: def update(self, chunk): for name, h in self.state.items(): - if name == 'length': + if name == "length": continue h.update(chunk) if self.track_length: - self.state['length'] += len(chunk) + self.state["length"] += len(chunk) def digest(self): return { - name: h.digest() if name != 'length' else h + name: h.digest() if name != "length" else h for name, h in self.state.items() } def hexdigest(self): return { - name: h.hexdigest() if name != 'length' else h + name: h.hexdigest() if name != "length" else h for name, h in self.state.items() } def bytehexdigest(self): return { - name: hash_to_bytehex(h.digest()) if name != 'length' else h + name: hash_to_bytehex(h.digest()) if name != "length" else h for name, h in self.state.items() } def copy(self): copied_state = { - name: h.copy() if name != 'length' else h - for name, h in self.state.items() + name: h.copy() if name != "length" else h for name, h in self.state.items() } return self.from_state(copied_state, self.track_length) @@ -168,8 +168,8 @@ def _new_blake2_hash(algo): return _blake2_hash_cache[algo]() lalgo = algo.lower() - if not lalgo.startswith('blake2'): - raise ValueError('Algorithm %s is not a blake2 hash' % algo) + if not lalgo.startswith("blake2"): + raise ValueError("Algorithm %s is not a blake2 hash" % algo) blake_family = lalgo[:7] @@ -178,12 +178,10 @@ def _new_blake2_hash(algo): try: digest_size, remainder = divmod(int(lalgo[7:]), 8) except ValueError: - raise ValueError( - 'Unknown digest size for algo %s' % algo - ) from None + raise ValueError("Unknown digest size for algo %s" % algo) from None if remainder: raise ValueError( - 'Digest size for algorithm %s must be a multiple of 8' % algo + "Digest size for algorithm %s must be a multiple of 8" % algo ) if lalgo in hashlib.algorithms_available: @@ -196,6 +194,7 @@ def _new_blake2_hash(algo): blake2 = getattr(hashlib, blake_family) else: import pyblake2 + blake2 = getattr(pyblake2, blake_family) _blake2_hash_cache[algo] = lambda: blake2(digest_size=digest_size) @@ -208,7 +207,7 @@ def _new_hashlib_hash(algo): Handle the swh-specific names for the blake2-related algorithms """ - if algo.startswith('blake2'): + if algo.startswith("blake2"): return _new_blake2_hash(algo) else: return hashlib.new(algo) @@ -236,8 +235,8 @@ def _new_git_hash(base_algo, git_type, length): """ h = _new_hashlib_hash(base_algo) - git_header = '%s %d\0' % (git_type, length) - h.update(git_header.encode('ascii')) + git_header = "%s %d\0" % (git_type, length) + h.update(git_header.encode("ascii")) return h @@ -264,19 +263,20 @@ def _new_hash(algo, length=None): """ if algo not in ALGORITHMS: raise ValueError( - 'Unexpected hashing algorithm %s, expected one of %s' % - (algo, ', '.join(sorted(ALGORITHMS)))) + "Unexpected hashing algorithm %s, expected one of %s" + % (algo, ", ".join(sorted(ALGORITHMS))) + ) - if algo.endswith('_git'): + if algo.endswith("_git"): if length is None: - raise ValueError('Missing length for git hashing algorithm') + raise ValueError("Missing length for git hashing algorithm") base_algo = algo[:-4] - return _new_git_hash(base_algo, 'blob', length) + return _new_git_hash(base_algo, "blob", length) return _new_hashlib_hash(algo) -def hash_git_data(data, git_type, base_algo='sha1'): +def hash_git_data(data, git_type, base_algo="sha1"): """Hash the given data as a git object of type git_type. Args: @@ -290,11 +290,13 @@ def hash_git_data(data, git_type, base_algo='sha1'): ValueError if the git_type is unexpected. """ - git_object_types = {'blob', 'tree', 'commit', 'tag', 'snapshot'} + git_object_types = {"blob", "tree", "commit", "tag", "snapshot"} if git_type not in git_object_types: - raise ValueError('Unexpected git object type %s, expected one of %s' % - (git_type, ', '.join(sorted(git_object_types)))) + raise ValueError( + "Unexpected git object type %s, expected one of %s" + % (git_type, ", ".join(sorted(git_object_types))) + ) h = _new_git_hash(base_algo, git_type, len(data)) h.update(data) @@ -315,7 +317,7 @@ def hash_to_hex(hash): """ if isinstance(hash, str): return hash - return binascii.hexlify(hash).decode('ascii') + return binascii.hexlify(hash).decode("ascii") @functools.lru_cache() diff --git a/swh/model/hypothesis_strategies.py b/swh/model/hypothesis_strategies.py index 320b160b4193cd01dc3ab159b87b11d417aa59f3..a66efdc1c72c4b9ff20b4d4336c84fc902c92180 100644 --- a/swh/model/hypothesis_strategies.py +++ b/swh/model/hypothesis_strategies.py @@ -7,22 +7,51 @@ import datetime from hypothesis import assume from hypothesis.strategies import ( - binary, booleans, builds, characters, - composite, datetimes, dictionaries, from_regex, integers, just, lists, - none, one_of, sampled_from, sets, text, ) + binary, + booleans, + builds, + characters, + composite, + datetimes, + dictionaries, + from_regex, + integers, + just, + lists, + none, + one_of, + sampled_from, + sets, + text, +) from .from_disk import DentryPerms from .model import ( - Person, Timestamp, TimestampWithTimezone, Origin, - OriginVisit, OriginVisitUpdate, Snapshot, SnapshotBranch, ObjectType, - TargetType, Release, Revision, RevisionType, BaseContent, Directory, - DirectoryEntry, Content, SkippedContent, ) + Person, + Timestamp, + TimestampWithTimezone, + Origin, + OriginVisit, + OriginVisitUpdate, + Snapshot, + SnapshotBranch, + ObjectType, + TargetType, + Release, + Revision, + RevisionType, + BaseContent, + Directory, + DirectoryEntry, + Content, + SkippedContent, +) from .identifiers import snapshot_identifier, identifier_to_bytes pgsql_alphabet = characters( - blacklist_categories=('Cs', ), - blacklist_characters=['\u0000']) # postgresql does not like these + blacklist_categories=("Cs",), blacklist_characters=["\u0000"] +) # postgresql does not like these def optional(strategy): @@ -43,18 +72,15 @@ def sha1(): @composite def urls(draw): - protocol = draw(sampled_from(['git', 'http', 'https', 'deb'])) - domain = draw(from_regex(r'\A([a-z]([a-z0-9-]*)\.){1,3}[a-z0-9]+\Z')) + protocol = draw(sampled_from(["git", "http", "https", "deb"])) + domain = draw(from_regex(r"\A([a-z]([a-z0-9-]*)\.){1,3}[a-z0-9]+\Z")) - return '%s://%s' % (protocol, domain) + return "%s://%s" % (protocol, domain) def persons_d(): return builds( - dict, - fullname=binary(), - email=optional(binary()), - name=optional(binary()), + dict, fullname=binary(), email=optional(binary()), name=optional(binary()), ) @@ -64,13 +90,16 @@ def persons(): def timestamps_d(): max_seconds = datetime.datetime.max.replace( - tzinfo=datetime.timezone.utc).timestamp() + tzinfo=datetime.timezone.utc + ).timestamp() min_seconds = datetime.datetime.min.replace( - tzinfo=datetime.timezone.utc).timestamp() + tzinfo=datetime.timezone.utc + ).timestamp() return builds( dict, seconds=integers(min_seconds, max_seconds), - microseconds=integers(0, 1000000)) + microseconds=integers(0, 1000000), + ) def timestamps(): @@ -79,28 +108,25 @@ def timestamps(): @composite def timestamps_with_timezone_d( - draw, - timestamp=timestamps_d(), - offset=integers(min_value=-14*60, max_value=14*60), - negative_utc=booleans()): + draw, + timestamp=timestamps_d(), + offset=integers(min_value=-14 * 60, max_value=14 * 60), + negative_utc=booleans(), +): timestamp = draw(timestamp) offset = draw(offset) negative_utc = draw(negative_utc) assume(not (negative_utc and offset)) - return dict( - timestamp=timestamp, - offset=offset, - negative_utc=negative_utc) + return dict(timestamp=timestamp, offset=offset, negative_utc=negative_utc) timestamps_with_timezone = timestamps_with_timezone_d().map( - TimestampWithTimezone.from_dict) + TimestampWithTimezone.from_dict +) def origins_d(): - return builds( - dict, - url=urls()) + return builds(dict, url=urls()) def origins(): @@ -113,7 +139,7 @@ def origin_visits_d(): visit=integers(0, 1000), origin=urls(), date=datetimes(), - status=sampled_from(['ongoing', 'full', 'partial']), + status=sampled_from(["ongoing", "full", "partial"]), type=pgsql_text(), snapshot=optional(sha1_git()), ) @@ -132,10 +158,11 @@ def origin_visit_updates_d(): dict, visit=integers(0, 1000), origin=urls(), - status=sampled_from(['ongoing', 'full', 'partial']), + status=sampled_from(["ongoing", "full", "partial"]), date=datetimes(), snapshot=optional(sha1_git()), - metadata=one_of(none(), metadata_dicts())) + metadata=one_of(none(), metadata_dicts()), + ) def origin_visit_updates(): @@ -151,30 +178,32 @@ def releases_d(draw): target = sha1_git() metadata = one_of(none(), revision_metadata()) - return draw(one_of( - builds( - dict, - name=name, - message=message, - synthetic=synthetic, - author=none(), - date=none(), - target=target, - target_type=target_type, - metadata=metadata, - ), - builds( - dict, - name=name, - message=message, - synthetic=synthetic, - date=timestamps_with_timezone_d(), - author=persons_d(), - target=target, - target_type=target_type, - metadata=metadata, - ), - )) + return draw( + one_of( + builds( + dict, + name=name, + message=message, + synthetic=synthetic, + author=none(), + date=none(), + target=target, + target_type=target_type, + metadata=metadata, + ), + builds( + dict, + name=name, + message=message, + synthetic=synthetic, + date=timestamps_with_timezone_d(), + author=persons_d(), + target=target, + target_type=target_type, + metadata=metadata, + ), + ) + ) def releases(): @@ -196,7 +225,8 @@ def revisions_d(): parents=lists(sha1_git()), directory=sha1_git(), type=sampled_from([x.value for x in RevisionType]), - metadata=one_of(none(), revision_metadata())) + metadata=one_of(none(), revision_metadata()), + ) # TODO: metadata['extra_headers'] can have binary keys and values @@ -209,8 +239,9 @@ def directory_entries_d(): dict, name=binary(), target=sha1_git(), - type=sampled_from(['file', 'dir', 'rev']), - perms=sampled_from([perm.value for perm in DentryPerms])) + type=sampled_from(["file", "dir", "rev"]), + perms=sampled_from([perm.value for perm in DentryPerms]), + ) def directory_entries(): @@ -218,9 +249,7 @@ def directory_entries(): def directories_d(): - return builds( - dict, - entries=lists(directory_entries_d())) + return builds(dict, entries=lists(directory_entries_d())) def directories(): @@ -240,7 +269,7 @@ def present_contents_d(): dict, data=binary(max_size=4096), ctime=optional(datetimes()), - status=one_of(just('visible'), just('hidden')), + status=one_of(just("visible"), just("hidden")), ) @@ -251,15 +280,15 @@ def present_contents(): @composite def skipped_contents_d(draw): result = BaseContent._hash_data(draw(binary(max_size=4096))) - result.pop('data') + result.pop("data") nullify_attrs = draw( - sets(sampled_from(['sha1', 'sha1_git', 'sha256', 'blake2s256'])) + sets(sampled_from(["sha1", "sha1_git", "sha256", "blake2s256"])) ) for k in nullify_attrs: result[k] = None - result['reason'] = draw(pgsql_text()) - result['status'] = 'absent' - result['ctime'] = draw(optional(datetimes())) + result["reason"] = draw(pgsql_text()) + result["status"] = "absent" + result["ctime"] = draw(optional(datetimes())) return result @@ -275,16 +304,16 @@ def branch_targets_object_d(): return builds( dict, target=sha1_git(), - target_type=sampled_from([ - x.value for x in TargetType - if x.value not in ('alias', )])) + target_type=sampled_from( + [x.value for x in TargetType if x.value not in ("alias",)] + ), + ) def branch_targets_alias_d(): return builds( - dict, - target=sha1_git(), - target_type=just('alias')) # TargetType.ALIAS.value)) + dict, target=sha1_git(), target_type=just("alias") + ) # TargetType.ALIAS.value)) def branch_targets_d(*, only_objects=False): @@ -295,31 +324,30 @@ def branch_targets_d(*, only_objects=False): def branch_targets(*, only_objects=False): - return builds( - SnapshotBranch.from_dict, - branch_targets_d(only_objects=only_objects)) + return builds(SnapshotBranch.from_dict, branch_targets_d(only_objects=only_objects)) @composite def snapshots_d(draw, *, min_size=0, max_size=100, only_objects=False): - branches = draw(dictionaries( - keys=branch_names(), - values=one_of( - none(), - branch_targets_d(only_objects=only_objects) - ), - min_size=min_size, - max_size=max_size, - )) + branches = draw( + dictionaries( + keys=branch_names(), + values=one_of(none(), branch_targets_d(only_objects=only_objects)), + min_size=min_size, + max_size=max_size, + ) + ) if not only_objects: # Make sure aliases point to actual branches unresolved_aliases = { - branch: target['target'] + branch: target["target"] for branch, target in branches.items() - if (target - and target['target_type'] == 'alias' - and target['target'] not in branches) + if ( + target + and target["target_type"] == "alias" + and target["target"] not in branches + ) } for alias_name, alias_target in unresolved_aliases.items(): # Override alias branch with one pointing to a real object @@ -330,37 +358,38 @@ def snapshots_d(draw, *, min_size=0, max_size=100, only_objects=False): # Ensure no cycles between aliases while True: try: - id_ = snapshot_identifier({ - 'branches': { - name: branch or None - for (name, branch) in branches.items()}}) + id_ = snapshot_identifier( + { + "branches": { + name: branch or None for (name, branch) in branches.items() + } + } + ) except ValueError as e: for (source, target) in e.args[1]: branches[source] = draw(branch_targets_d(only_objects=True)) else: break - return dict( - id=identifier_to_bytes(id_), - branches=branches) + return dict(id=identifier_to_bytes(id_), branches=branches) def snapshots(*, min_size=0, max_size=100, only_objects=False): - return snapshots_d(min_size=min_size, max_size=max_size, - only_objects=only_objects).map( - Snapshot.from_dict) + return snapshots_d( + min_size=min_size, max_size=max_size, only_objects=only_objects + ).map(Snapshot.from_dict) def objects(): return one_of( - origins().map(lambda x: ('origin', x)), - origin_visits().map(lambda x: ('origin_visit', x)), - origin_visit_updates().map(lambda x: ('origin_visit_update', x)), - snapshots().map(lambda x: ('snapshot', x)), - releases().map(lambda x: ('release', x)), - revisions().map(lambda x: ('revision', x)), - directories().map(lambda x: ('directory', x)), - contents().map(lambda x: ('content', x)), + origins().map(lambda x: ("origin", x)), + origin_visits().map(lambda x: ("origin_visit", x)), + origin_visit_updates().map(lambda x: ("origin_visit_update", x)), + snapshots().map(lambda x: ("snapshot", x)), + releases().map(lambda x: ("release", x)), + revisions().map(lambda x: ("revision", x)), + directories().map(lambda x: ("directory", x)), + contents().map(lambda x: ("content", x)), ) @@ -370,11 +399,11 @@ def object_dicts(): which dict is suitable for <ModelForType>.from_dict() factory methods. """ return one_of( - origins_d().map(lambda x: ('origin', x)), - origin_visits_d().map(lambda x: ('origin_visit', x)), - snapshots_d().map(lambda x: ('snapshot', x)), - releases_d().map(lambda x: ('release', x)), - revisions_d().map(lambda x: ('revision', x)), - directories_d().map(lambda x: ('directory', x)), - contents_d().map(lambda x: ('content', x)), + origins_d().map(lambda x: ("origin", x)), + origin_visits_d().map(lambda x: ("origin_visit", x)), + snapshots_d().map(lambda x: ("snapshot", x)), + releases_d().map(lambda x: ("release", x)), + revisions_d().map(lambda x: ("revision", x)), + directories_d().map(lambda x: ("directory", x)), + contents_d().map(lambda x: ("content", x)), ) diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py index 6ccf948849e96bc45298909bb0677c9e881ef38e..dd3451346a7226226a7dceab3413bfa243a7cb37 100644 --- a/swh/model/identifiers.py +++ b/swh/model/identifiers.py @@ -15,18 +15,18 @@ from .fields.hashes import validate_sha1 from .hashutil import hash_git_data, hash_to_hex, MultiHash -ORIGIN = 'origin' -SNAPSHOT = 'snapshot' -REVISION = 'revision' -RELEASE = 'release' -DIRECTORY = 'directory' -CONTENT = 'content' - -PID_NAMESPACE = 'swh' +ORIGIN = "origin" +SNAPSHOT = "snapshot" +REVISION = "revision" +RELEASE = "release" +DIRECTORY = "directory" +CONTENT = "content" + +PID_NAMESPACE = "swh" PID_VERSION = 1 -PID_TYPES = ['ori', 'snp', 'rel', 'rev', 'dir', 'cnt'] -PID_SEP = ':' -PID_CTXT_SEP = ';' +PID_TYPES = ["ori", "snp", "rel", "rev", "dir", "cnt"] +PID_SEP = ":" +PID_CTXT_SEP = ";" @lru_cache() @@ -46,19 +46,21 @@ def identifier_to_bytes(identifier): if isinstance(identifier, bytes): if len(identifier) != 20: raise ValueError( - 'Wrong length for bytes identifier %s, expected 20' % - len(identifier)) + "Wrong length for bytes identifier %s, expected 20" % len(identifier) + ) return identifier if isinstance(identifier, str): if len(identifier) != 40: raise ValueError( - 'Wrong length for str identifier %s, expected 40' % - len(identifier)) + "Wrong length for str identifier %s, expected 40" % len(identifier) + ) return bytes.fromhex(identifier) - raise ValueError('Wrong type for identifier %s, expected bytes or str' % - identifier.__class__.__name__) + raise ValueError( + "Wrong type for identifier %s, expected bytes or str" + % identifier.__class__.__name__ + ) @lru_cache() @@ -79,19 +81,21 @@ def identifier_to_str(identifier): if isinstance(identifier, str): if len(identifier) != 40: raise ValueError( - 'Wrong length for str identifier %s, expected 40' % - len(identifier)) + "Wrong length for str identifier %s, expected 40" % len(identifier) + ) return identifier if isinstance(identifier, bytes): if len(identifier) != 20: raise ValueError( - 'Wrong length for bytes identifier %s, expected 20' % - len(identifier)) + "Wrong length for bytes identifier %s, expected 20" % len(identifier) + ) return binascii.hexlify(identifier).decode() - raise ValueError('Wrong type for identifier %s, expected bytes or str' % - identifier.__class__.__name__) + raise ValueError( + "Wrong type for identifier %s, expected bytes or str" + % identifier.__class__.__name__ + ) def content_identifier(content): @@ -111,22 +115,22 @@ def content_identifier(content): """ - return MultiHash.from_data(content['data']).digest() + return MultiHash.from_data(content["data"]).digest() def directory_entry_sort_key(entry): """The sorting key for tree entries""" - if entry['type'] == 'dir': - return entry['name'] + b'/' + if entry["type"] == "dir": + return entry["name"] + b"/" else: - return entry['name'] + return entry["name"] @lru_cache() def _perms_to_bytes(perms): """Convert the perms value to its bytes representation""" oc = oct(perms)[2:] - return oc.encode('ascii') + return oc.encode("ascii") def escape_newlines(snippet): @@ -137,8 +141,8 @@ def escape_newlines(snippet): """ - if b'\n' in snippet: - return b'\n '.join(snippet.split(b'\n')) + if b"\n" in snippet: + return b"\n ".join(snippet.split(b"\n")) else: return snippet @@ -182,16 +186,18 @@ def directory_identifier(directory): components = [] - for entry in sorted(directory['entries'], key=directory_entry_sort_key): - components.extend([ - _perms_to_bytes(entry['perms']), - b'\x20', - entry['name'], - b'\x00', - identifier_to_bytes(entry['target']), - ]) + for entry in sorted(directory["entries"], key=directory_entry_sort_key): + components.extend( + [ + _perms_to_bytes(entry["perms"]), + b"\x20", + entry["name"], + b"\x00", + identifier_to_bytes(entry["target"]), + ] + ) - return identifier_to_str(hash_git_data(b''.join(components), 'tree')) + return identifier_to_str(hash_git_data(b"".join(components), "tree")) def format_date(date): @@ -209,15 +215,15 @@ def format_date(date): """ if not isinstance(date, dict): - raise ValueError('format_date only supports dicts, %r received' % date) + raise ValueError("format_date only supports dicts, %r received" % date) - seconds = date.get('seconds', 0) - microseconds = date.get('microseconds', 0) + seconds = date.get("seconds", 0) + microseconds = date.get("microseconds", 0) if not microseconds: return str(seconds).encode() else: - float_value = ('%d.%06d' % (seconds, microseconds)) - return float_value.rstrip('0').encode() + float_value = "%d.%06d" % (seconds, microseconds) + return float_value.rstrip("0").encode() @lru_cache() @@ -232,14 +238,14 @@ def format_offset(offset, negative_utc=None): A null offset is represented as +0000. """ if offset < 0 or offset == 0 and negative_utc: - sign = '-' + sign = "-" else: - sign = '+' + sign = "+" hours = abs(offset) // 60 minutes = abs(offset) % 60 - t = '%s%02d%02d' % (sign, hours, minutes) + t = "%s%02d%02d" % (sign, hours, minutes) return t.encode() @@ -275,28 +281,29 @@ def normalize_timestamp(time_representation): negative_utc = False if isinstance(time_representation, dict): - ts = time_representation['timestamp'] + ts = time_representation["timestamp"] if isinstance(ts, dict): - seconds = ts.get('seconds', 0) - microseconds = ts.get('microseconds', 0) + seconds = ts.get("seconds", 0) + microseconds = ts.get("microseconds", 0) elif isinstance(ts, int): seconds = ts microseconds = 0 else: raise ValueError( - 'normalize_timestamp received non-integer timestamp member:' - ' %r' % ts) - offset = time_representation['offset'] - if 'negative_utc' in time_representation: - negative_utc = time_representation['negative_utc'] + "normalize_timestamp received non-integer timestamp member:" " %r" % ts + ) + offset = time_representation["offset"] + if "negative_utc" in time_representation: + negative_utc = time_representation["negative_utc"] elif isinstance(time_representation, datetime.datetime): seconds = int(time_representation.timestamp()) microseconds = time_representation.microsecond utcoffset = time_representation.utcoffset() if utcoffset is None: raise ValueError( - 'normalize_timestamp received datetime without timezone: %s' % - time_representation) + "normalize_timestamp received datetime without timezone: %s" + % time_representation + ) # utcoffset is an integer number of minutes seconds_offset = utcoffset.total_seconds() @@ -307,16 +314,14 @@ def normalize_timestamp(time_representation): offset = 0 else: raise ValueError( - 'normalize_timestamp received non-integer timestamp:' - ' %r' % time_representation) + "normalize_timestamp received non-integer timestamp:" + " %r" % time_representation + ) return { - 'timestamp': { - 'seconds': seconds, - 'microseconds': microseconds, - }, - 'offset': offset, - 'negative_utc': negative_utc, + "timestamp": {"seconds": seconds, "microseconds": microseconds,}, + "offset": offset, + "negative_utc": negative_utc, } @@ -335,16 +340,16 @@ def format_author(author): if isinstance(author, bytes) or author is None: return author - if 'fullname' in author: - return author['fullname'] + if "fullname" in author: + return author["fullname"] ret = [] - if author['name'] is not None: - ret.append(author['name']) - if author['email'] is not None: - ret.append(b''.join([b'<', author['email'], b'>'])) + if author["name"] is not None: + ret.append(author["name"]) + if author["email"] is not None: + ret.append(b"".join([b"<", author["email"], b">"])) - return b' '.join(ret) + return b" ".join(ret) def format_author_line(header, author, date_offset): @@ -385,19 +390,18 @@ def format_author_line(header, author, date_offset): """ - ret = [header.encode(), b' ', escape_newlines(format_author(author))] + ret = [header.encode(), b" ", escape_newlines(format_author(author))] date_offset = normalize_timestamp(date_offset) if date_offset is not None: - date_f = format_date(date_offset['timestamp']) - offset_f = format_offset(date_offset['offset'], - date_offset['negative_utc']) + date_f = format_date(date_offset["timestamp"]) + offset_f = format_offset(date_offset["offset"], date_offset["negative_utc"]) - ret.extend([b' ', date_f, b' ', offset_f]) + ret.extend([b" ", date_f, b" ", offset_f]) - ret.append(b'\n') - return b''.join(ret) + ret.append(b"\n") + return b"".join(ret) def revision_identifier(revision): @@ -451,74 +455,84 @@ def revision_identifier(revision): """ components = [ - b'tree ', identifier_to_str(revision['directory']).encode(), b'\n', + b"tree ", + identifier_to_str(revision["directory"]).encode(), + b"\n", ] - for parent in revision['parents']: + for parent in revision["parents"]: if parent: - components.extend([ - b'parent ', identifier_to_str(parent).encode(), b'\n', - ]) - - components.extend([ - format_author_line('author', revision['author'], revision['date']), - format_author_line('committer', revision['committer'], - revision['committer_date']), - ]) + components.extend( + [b"parent ", identifier_to_str(parent).encode(), b"\n",] + ) + + components.extend( + [ + format_author_line("author", revision["author"], revision["date"]), + format_author_line( + "committer", revision["committer"], revision["committer_date"] + ), + ] + ) # Handle extra headers - metadata = revision.get('metadata') + metadata = revision.get("metadata") if not metadata: metadata = {} - for key, value in metadata.get('extra_headers', []): + for key, value in metadata.get("extra_headers", []): # Integer values: decimal representation if isinstance(value, int): - value = str(value).encode('utf-8') + value = str(value).encode("utf-8") # Unicode string values: utf-8 encoding if isinstance(value, str): - value = value.encode('utf-8') + value = value.encode("utf-8") # encode the key to utf-8 - components.extend([key.encode('utf-8'), b' ', - escape_newlines(value), b'\n']) + components.extend([key.encode("utf-8"), b" ", escape_newlines(value), b"\n"]) - if revision['message'] is not None: - components.extend([b'\n', revision['message']]) + if revision["message"] is not None: + components.extend([b"\n", revision["message"]]) - commit_raw = b''.join(components) - return identifier_to_str(hash_git_data(commit_raw, 'commit')) + commit_raw = b"".join(components) + return identifier_to_str(hash_git_data(commit_raw, "commit")) def target_type_to_git(target_type): """Convert a software heritage target type to a git object type""" return { - 'content': b'blob', - 'directory': b'tree', - 'revision': b'commit', - 'release': b'tag', - 'snapshot': b'refs' + "content": b"blob", + "directory": b"tree", + "revision": b"commit", + "release": b"tag", + "snapshot": b"refs", }[target_type] def release_identifier(release): """Return the intrinsic identifier for a release.""" components = [ - b'object ', identifier_to_str(release['target']).encode(), b'\n', - b'type ', target_type_to_git(release['target_type']), b'\n', - b'tag ', release['name'], b'\n', + b"object ", + identifier_to_str(release["target"]).encode(), + b"\n", + b"type ", + target_type_to_git(release["target_type"]), + b"\n", + b"tag ", + release["name"], + b"\n", ] - if 'author' in release and release['author']: + if "author" in release and release["author"]: components.append( - format_author_line('tagger', release['author'], release['date']) + format_author_line("tagger", release["author"], release["date"]) ) - if release['message'] is not None: - components.extend([b'\n', release['message']]) + if release["message"] is not None: + components.extend([b"\n", release["message"]]) - return identifier_to_str(hash_git_data(b''.join(components), 'tag')) + return identifier_to_str(hash_git_data(b"".join(components), "tag")) def snapshot_identifier(snapshot, *, ignore_unresolved=False): @@ -580,30 +594,38 @@ def snapshot_identifier(snapshot, *, ignore_unresolved=False): unresolved = [] lines = [] - for name, target in sorted(snapshot['branches'].items()): + for name, target in sorted(snapshot["branches"].items()): if not target: - target_type = b'dangling' - target_id = b'' - elif target['target_type'] == 'alias': - target_type = b'alias' - target_id = target['target'] - if target_id not in snapshot['branches'] or target_id == name: + target_type = b"dangling" + target_id = b"" + elif target["target_type"] == "alias": + target_type = b"alias" + target_id = target["target"] + if target_id not in snapshot["branches"] or target_id == name: unresolved.append((name, target_id)) else: - target_type = target['target_type'].encode() - target_id = identifier_to_bytes(target['target']) - - lines.extend([ - target_type, b'\x20', name, b'\x00', - ('%d:' % len(target_id)).encode(), target_id, - ]) + target_type = target["target_type"].encode() + target_id = identifier_to_bytes(target["target"]) + + lines.extend( + [ + target_type, + b"\x20", + name, + b"\x00", + ("%d:" % len(target_id)).encode(), + target_id, + ] + ) if unresolved and not ignore_unresolved: - raise ValueError('Branch aliases unresolved: %s' % - ', '.join('%s -> %s' % x for x in unresolved), - unresolved) + raise ValueError( + "Branch aliases unresolved: %s" + % ", ".join("%s -> %s" % x for x in unresolved), + unresolved, + ) - return identifier_to_str(hash_git_data(b''.join(lines), 'snapshot')) + return identifier_to_str(hash_git_data(b"".join(lines), "snapshot")) def origin_identifier(origin): @@ -612,45 +634,29 @@ def origin_identifier(origin): An origin's identifier is the sha1 checksum of the entire origin URL """ - return hashlib.sha1(origin['url'].encode('utf-8')).hexdigest() + return hashlib.sha1(origin["url"].encode("utf-8")).hexdigest() _object_type_map = { - ORIGIN: { - 'short_name': 'ori', - 'key_id': 'id' - }, - SNAPSHOT: { - 'short_name': 'snp', - 'key_id': 'id' - }, - RELEASE: { - 'short_name': 'rel', - 'key_id': 'id' - }, - REVISION: { - 'short_name': 'rev', - 'key_id': 'id' - }, - DIRECTORY: { - 'short_name': 'dir', - 'key_id': 'id' - }, - CONTENT: { - 'short_name': 'cnt', - 'key_id': 'sha1_git' - } + ORIGIN: {"short_name": "ori", "key_id": "id"}, + SNAPSHOT: {"short_name": "snp", "key_id": "id"}, + RELEASE: {"short_name": "rel", "key_id": "id"}, + REVISION: {"short_name": "rev", "key_id": "id"}, + DIRECTORY: {"short_name": "dir", "key_id": "id"}, + CONTENT: {"short_name": "cnt", "key_id": "sha1_git"}, } _PersistentId = NamedTuple( - 'PersistentId', [ - ('namespace', str), - ('scheme_version', int), - ('object_type', str), - ('object_id', str), - ('metadata', Dict[str, Any]), - ]) + "PersistentId", + [ + ("namespace", str), + ("scheme_version", int), + ("object_type", str), + ("object_id", str), + ("metadata", Dict[str, Any]), + ], +) class PersistentId(_PersistentId): @@ -692,41 +698,51 @@ class PersistentId(_PersistentId): pid_str = str(pid) # 'swh:1:cnt:8ff44f081d43176474b267de5451f2c2e88089d0' """ + __slots__ = () - def __new__(cls, namespace=PID_NAMESPACE, scheme_version=PID_VERSION, - object_type='', object_id='', metadata={}): + def __new__( + cls, + namespace=PID_NAMESPACE, + scheme_version=PID_VERSION, + object_type="", + object_id="", + metadata={}, + ): o = _object_type_map.get(object_type) if not o: - raise ValidationError('Wrong input: Supported types are %s' % ( - list(_object_type_map.keys()))) + raise ValidationError( + "Wrong input: Supported types are %s" % (list(_object_type_map.keys())) + ) if namespace != PID_NAMESPACE: raise ValidationError( - "Wrong format: only supported namespace is '%s'" - % PID_NAMESPACE) + "Wrong format: only supported namespace is '%s'" % PID_NAMESPACE + ) if scheme_version != PID_VERSION: raise ValidationError( - 'Wrong format: only supported version is %d' % PID_VERSION) + "Wrong format: only supported version is %d" % PID_VERSION + ) # internal swh representation resolution if isinstance(object_id, dict): - object_id = object_id[o['key_id']] + object_id = object_id[o["key_id"]] validate_sha1(object_id) # can raise if invalid hash object_id = hash_to_hex(object_id) return super(cls, PersistentId).__new__( - cls, namespace, scheme_version, object_type, object_id, metadata) + cls, namespace, scheme_version, object_type, object_id, metadata + ) def __str__(self): o = _object_type_map.get(self.object_type) - pid = PID_SEP.join([self.namespace, str(self.scheme_version), - o['short_name'], self.object_id]) + pid = PID_SEP.join( + [self.namespace, str(self.scheme_version), o["short_name"], self.object_id] + ) if self.metadata: for k, v in self.metadata.items(): - pid += '%s%s=%s' % (PID_CTXT_SEP, k, v) + pid += "%s%s=%s" % (PID_CTXT_SEP, k, v) return pid -def persistent_identifier(object_type, object_id, scheme_version=1, - metadata={}): +def persistent_identifier(object_type, object_id, scheme_version=1, metadata={}): """Compute persistent identifier (stable over time) as per documentation. @@ -750,8 +766,12 @@ def persistent_identifier(object_type, object_id, scheme_version=1, str: the persistent identifier """ - pid = PersistentId(scheme_version=scheme_version, object_type=object_type, - object_id=object_id, metadata=metadata) + pid = PersistentId( + scheme_version=scheme_version, + object_type=object_type, + object_id=object_id, + metadata=metadata, + ) return str(pid) @@ -777,32 +797,30 @@ def parse_persistent_identifier(persistent_id): """ # <pid>;<contextual-information> persistent_id_parts = persistent_id.split(PID_CTXT_SEP) - pid_data = persistent_id_parts.pop(0).split(':') + pid_data = persistent_id_parts.pop(0).split(":") if len(pid_data) != 4: - raise ValidationError( - 'Wrong format: There should be 4 mandatory values') + raise ValidationError("Wrong format: There should be 4 mandatory values") # Checking for parsing errors _ns, _version, _type, _id = pid_data pid_data[1] = int(pid_data[1]) for otype, data in _object_type_map.items(): - if _type == data['short_name']: + if _type == data["short_name"]: pid_data[2] = otype break if not _id: - raise ValidationError( - 'Wrong format: Identifier should be present') + raise ValidationError("Wrong format: Identifier should be present") persistent_id_metadata = {} for part in persistent_id_parts: try: - key, val = part.split('=') + key, val = part.split("=") persistent_id_metadata[key] = val except Exception: - msg = 'Contextual data is badly formatted, form key=val expected' + msg = "Contextual data is badly formatted, form key=val expected" raise ValidationError(msg) pid_data.append(persistent_id_metadata) return PersistentId(*pid_data) diff --git a/swh/model/merkle.py b/swh/model/merkle.py index 9d97efdc55b1c0bf23c5abfdea8f995988197ea9..36407d8ffda1d25c2d5911274d9bfbdc14c7cd96 100644 --- a/swh/model/merkle.py +++ b/swh/model/merkle.py @@ -108,7 +108,8 @@ class MerkleNode(dict, metaclass=abc.ABCMeta): collected (bool): whether the current node has been collected """ - __slots__ = ['parents', 'data', '__hash', 'collected'] + + __slots__ = ["parents", "data", "__hash", "collected"] type = None # type: Optional[str] # TODO: make this an enum """Type of the current node (used as a classifier for :func:`collect`)""" @@ -121,8 +122,11 @@ class MerkleNode(dict, metaclass=abc.ABCMeta): self.collected = False def __eq__(self, other): - return isinstance(other, MerkleNode) \ - and super().__eq__(other) and self.data == other.data + return ( + isinstance(other, MerkleNode) + and super().__eq__(other) + and self.data == other.data + ) def __ne__(self, other): return not self.__eq__(other) @@ -170,7 +174,7 @@ class MerkleNode(dict, metaclass=abc.ABCMeta): The hash should depend on the data of the node, as well as on hashes of the children nodes. """ - raise NotImplementedError('Must implement compute_hash method') + raise NotImplementedError("Must implement compute_hash method") def __setitem__(self, name, new_child): """Add a child, invalidating the current hash""" @@ -273,14 +277,13 @@ class MerkleNode(dict, metaclass=abc.ABCMeta): for child in self.values(): child.reset_collect() - def iter_tree(self) -> Iterator['MerkleNode']: + def iter_tree(self) -> Iterator["MerkleNode"]: """Yields all children nodes, recursively. Common nodes are deduplicated. """ yield from self._iter_tree(set()) - def _iter_tree( - self, seen: Set[bytes]) -> Iterator['MerkleNode']: + def _iter_tree(self, seen: Set[bytes]) -> Iterator["MerkleNode"]: if self.hash not in seen: seen.add(self.hash) yield self @@ -293,17 +296,18 @@ class MerkleLeaf(MerkleNode): A Merkle leaf is simply a Merkle node with children disabled. """ + __slots__ = [] # type: List[str] def __setitem__(self, name, child): - raise ValueError('%s is a leaf' % self.__class__.__name__) + raise ValueError("%s is a leaf" % self.__class__.__name__) def __getitem__(self, name): - raise ValueError('%s is a leaf' % self.__class__.__name__) + raise ValueError("%s is a leaf" % self.__class__.__name__) def __delitem__(self, name): - raise ValueError('%s is a leaf' % self.__class__.__name__) + raise ValueError("%s is a leaf" % self.__class__.__name__) def update(self, new_children): """Children update operation. Disabled for leaves.""" - raise ValueError('%s is a leaf' % self.__class__.__name__) + raise ValueError("%s is a leaf" % self.__class__.__name__) diff --git a/swh/model/model.py b/swh/model/model.py index d7b1f896535607ba9975e496c20d1edb10d4e179..2814fd6bc686f652399581e8391cd606abf9aadd 100644 --- a/swh/model/model.py +++ b/swh/model/model.py @@ -15,8 +15,11 @@ import dateutil.parser import iso8601 from .identifiers import ( - normalize_timestamp, directory_identifier, revision_identifier, - release_identifier, snapshot_identifier + normalize_timestamp, + directory_identifier, + revision_identifier, + release_identifier, + snapshot_identifier, ) from .hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, MultiHash @@ -24,6 +27,7 @@ from .hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, MultiHash class MissingData(Exception): """Raised by `Content.with_data` when it has no way of fetching the data (but not when fetching the data fails).""" + pass @@ -79,21 +83,16 @@ class HashableObject(metaclass=ABCMeta): def __attrs_post_init__(self): if not self.id: obj_id = hash_to_bytes(self.compute_hash(self.to_dict())) - object.__setattr__(self, 'id', obj_id) + object.__setattr__(self, "id", obj_id) @attr.s(frozen=True) class Person(BaseModel): """Represents the author/committer of a revision or release.""" - fullname = attr.ib( - type=bytes, - validator=type_validator()) - name = attr.ib( - type=Optional[bytes], - validator=type_validator()) - email = attr.ib( - type=Optional[bytes], - validator=type_validator()) + + fullname = attr.ib(type=bytes, validator=type_validator()) + name = attr.ib(type=Optional[bytes], validator=type_validator()) + email = attr.ib(type=Optional[bytes], validator=type_validator()) @classmethod def from_fullname(cls, fullname: bytes): @@ -102,19 +101,19 @@ class Person(BaseModel): The fullname is left unchanged.""" if fullname is None: - raise TypeError('fullname is None.') + raise TypeError("fullname is None.") name: Optional[bytes] email: Optional[bytes] try: - open_bracket = fullname.index(b'<') + open_bracket = fullname.index(b"<") except ValueError: name = fullname email = None else: raw_name = fullname[:open_bracket] - raw_email = fullname[open_bracket+1:] + raw_email = fullname[open_bracket + 1 :] if not raw_name: name = None @@ -122,63 +121,51 @@ class Person(BaseModel): name = raw_name.strip() try: - close_bracket = raw_email.rindex(b'>') + close_bracket = raw_email.rindex(b">") except ValueError: email = raw_email else: email = raw_email[:close_bracket] - return Person( - name=name or None, - email=email or None, - fullname=fullname, - ) + return Person(name=name or None, email=email or None, fullname=fullname,) @attr.s(frozen=True) class Timestamp(BaseModel): """Represents a naive timestamp from a VCS.""" - seconds = attr.ib( - type=int, - validator=type_validator()) - microseconds = attr.ib( - type=int, - validator=type_validator()) + + seconds = attr.ib(type=int, validator=type_validator()) + microseconds = attr.ib(type=int, validator=type_validator()) @seconds.validator def check_seconds(self, attribute, value): """Check that seconds fit in a 64-bits signed integer.""" - if not (-2**63 <= value < 2**63): - raise ValueError('Seconds must be a signed 64-bits integer.') + if not (-(2 ** 63) <= value < 2 ** 63): + raise ValueError("Seconds must be a signed 64-bits integer.") @microseconds.validator def check_microseconds(self, attribute, value): """Checks that microseconds are positive and < 1000000.""" - if not (0 <= value < 10**6): - raise ValueError('Microseconds must be in [0, 1000000[.') + if not (0 <= value < 10 ** 6): + raise ValueError("Microseconds must be in [0, 1000000[.") @attr.s(frozen=True) class TimestampWithTimezone(BaseModel): """Represents a TZ-aware timestamp from a VCS.""" - timestamp = attr.ib( - type=Timestamp, - validator=type_validator()) - offset = attr.ib( - type=int, - validator=type_validator()) - negative_utc = attr.ib( - type=bool, - validator=type_validator()) + + timestamp = attr.ib(type=Timestamp, validator=type_validator()) + offset = attr.ib(type=int, validator=type_validator()) + negative_utc = attr.ib(type=bool, validator=type_validator()) @offset.validator def check_offset(self, attribute, value): """Checks the offset is a 16-bits signed integer (in theory, it should always be between -14 and +14 hours).""" - if not (-2**15 <= value < 2**15): + if not (-(2 ** 15) <= value < 2 ** 15): # max 14 hours offset in theory, but you never know what # you'll find in the wild... - raise ValueError('offset too large: %d minutes' % value) + raise ValueError("offset too large: %d minutes" % value) @negative_utc.validator def check_negative_utc(self, attribute, value): @@ -193,9 +180,10 @@ class TimestampWithTimezone(BaseModel): # name d = normalize_timestamp(obj) return cls( - timestamp=Timestamp.from_dict(d['timestamp']), - offset=d['offset'], - negative_utc=d['negative_utc']) + timestamp=Timestamp.from_dict(d["timestamp"]), + offset=d["offset"], + negative_utc=d["negative_utc"], + ) @classmethod def from_datetime(cls, dt: datetime.datetime): @@ -207,7 +195,7 @@ class TimestampWithTimezone(BaseModel): """ dt = iso8601.parse_date(s) tstz = cls.from_datetime(dt) - if dt.tzname() == '-00:00': + if dt.tzname() == "-00:00": tstz = attr.evolve(tstz, negative_utc=True) return tstz @@ -215,54 +203,42 @@ class TimestampWithTimezone(BaseModel): @attr.s(frozen=True) class Origin(BaseModel): """Represents a software source: a VCS and an URL.""" - url = attr.ib( - type=str, - validator=type_validator()) + + url = attr.ib(type=str, validator=type_validator()) @attr.s(frozen=True) class OriginVisit(BaseModel): """Represents a visit of an origin at a given point in time, by a SWH loader.""" - origin = attr.ib( - type=str, - validator=type_validator()) - date = attr.ib( - type=datetime.datetime, - validator=type_validator()) + + origin = attr.ib(type=str, validator=type_validator()) + date = attr.ib(type=datetime.datetime, validator=type_validator()) status = attr.ib( - type=str, - validator=attr.validators.in_(['ongoing', 'full', 'partial'])) - type = attr.ib( - type=str, - validator=type_validator()) - snapshot = attr.ib( - type=Optional[Sha1Git], - validator=type_validator()) + type=str, validator=attr.validators.in_(["ongoing", "full", "partial"]) + ) + type = attr.ib(type=str, validator=type_validator()) + snapshot = attr.ib(type=Optional[Sha1Git], validator=type_validator()) metadata = attr.ib( - type=Optional[Dict[str, object]], - validator=type_validator(), - default=None) - visit = attr.ib( - type=Optional[int], - validator=type_validator(), - default=None) + type=Optional[Dict[str, object]], validator=type_validator(), default=None + ) + visit = attr.ib(type=Optional[int], validator=type_validator(), default=None) """Should not be set before calling 'origin_visit_add()'.""" def to_dict(self): """Serializes the date as a string and omits the visit id if it is `None`.""" ov = super().to_dict() - if ov['visit'] is None: - del ov['visit'] + if ov["visit"] is None: + del ov["visit"] return ov @classmethod def from_dict(cls, d): """Parses the date from a string, and accepts missing visit ids.""" - if isinstance(d['date'], str): + if isinstance(d["date"], str): d = d.copy() - d['date'] = dateutil.parser.parse(d['date']) + d["date"] = dateutil.parser.parse(d["date"]) return super().from_dict(d) @@ -271,57 +247,48 @@ class OriginVisitUpdate(BaseModel): """Represents a visit update of an origin at a given point in time. """ - origin = attr.ib( - type=str, - validator=type_validator()) - visit = attr.ib( - type=int, - validator=type_validator()) - date = attr.ib( - type=datetime.datetime, - validator=type_validator()) + origin = attr.ib(type=str, validator=type_validator()) + visit = attr.ib(type=int, validator=type_validator()) + + date = attr.ib(type=datetime.datetime, validator=type_validator()) status = attr.ib( - type=str, - validator=attr.validators.in_(['ongoing', 'full', 'partial'])) - snapshot = attr.ib( - type=Optional[Sha1Git], - validator=type_validator()) + type=str, validator=attr.validators.in_(["ongoing", "full", "partial"]) + ) + snapshot = attr.ib(type=Optional[Sha1Git], validator=type_validator()) metadata = attr.ib( - type=Optional[Dict[str, object]], - validator=type_validator(), - default=None) + type=Optional[Dict[str, object]], validator=type_validator(), default=None + ) class TargetType(Enum): """The type of content pointed to by a snapshot branch. Usually a revision or an alias.""" - CONTENT = 'content' - DIRECTORY = 'directory' - REVISION = 'revision' - RELEASE = 'release' - SNAPSHOT = 'snapshot' - ALIAS = 'alias' + + CONTENT = "content" + DIRECTORY = "directory" + REVISION = "revision" + RELEASE = "release" + SNAPSHOT = "snapshot" + ALIAS = "alias" class ObjectType(Enum): """The type of content pointed to by a release. Usually a revision""" - CONTENT = 'content' - DIRECTORY = 'directory' - REVISION = 'revision' - RELEASE = 'release' - SNAPSHOT = 'snapshot' + + CONTENT = "content" + DIRECTORY = "directory" + REVISION = "revision" + RELEASE = "release" + SNAPSHOT = "snapshot" @attr.s(frozen=True) class SnapshotBranch(BaseModel): """Represents one of the branches of a snapshot.""" - target = attr.ib( - type=bytes, - validator=type_validator()) - target_type = attr.ib( - type=TargetType, - validator=type_validator()) + + target = attr.ib(type=bytes, validator=type_validator()) + target_type = attr.ib(type=TargetType, validator=type_validator()) @target.validator def check_target(self, attribute, value): @@ -329,26 +296,21 @@ class SnapshotBranch(BaseModel): valid sha1_git.""" if self.target_type != TargetType.ALIAS and self.target is not None: if len(value) != 20: - raise ValueError('Wrong length for bytes identifier: %d' % - len(value)) + raise ValueError("Wrong length for bytes identifier: %d" % len(value)) @classmethod def from_dict(cls, d): - return cls( - target=d['target'], - target_type=TargetType(d['target_type'])) + return cls(target=d["target"], target_type=TargetType(d["target_type"])) @attr.s(frozen=True) class Snapshot(BaseModel, HashableObject): """Represents the full state of an origin at a given point in time.""" + branches = attr.ib( - type=Dict[bytes, Optional[SnapshotBranch]], - validator=type_validator()) - id = attr.ib( - type=Sha1Git, - validator=type_validator(), - default=b'') + type=Dict[bytes, Optional[SnapshotBranch]], validator=type_validator() + ) + id = attr.ib(type=Sha1Git, validator=type_validator(), default=b"") @staticmethod def compute_hash(object_dict): @@ -360,44 +322,27 @@ class Snapshot(BaseModel, HashableObject): return cls( branches={ name: SnapshotBranch.from_dict(branch) if branch else None - for (name, branch) in d.pop('branches').items() + for (name, branch) in d.pop("branches").items() }, - **d) + **d + ) @attr.s(frozen=True) class Release(BaseModel, HashableObject): - name = attr.ib( - type=bytes, - validator=type_validator()) - message = attr.ib( - type=Optional[bytes], - validator=type_validator()) - target = attr.ib( - type=Optional[Sha1Git], - validator=type_validator()) - target_type = attr.ib( - type=ObjectType, - validator=type_validator()) - synthetic = attr.ib( - type=bool, - validator=type_validator()) - author = attr.ib( - type=Optional[Person], - validator=type_validator(), - default=None) + name = attr.ib(type=bytes, validator=type_validator()) + message = attr.ib(type=Optional[bytes], validator=type_validator()) + target = attr.ib(type=Optional[Sha1Git], validator=type_validator()) + target_type = attr.ib(type=ObjectType, validator=type_validator()) + synthetic = attr.ib(type=bool, validator=type_validator()) + author = attr.ib(type=Optional[Person], validator=type_validator(), default=None) date = attr.ib( - type=Optional[TimestampWithTimezone], - validator=type_validator(), - default=None) + type=Optional[TimestampWithTimezone], validator=type_validator(), default=None + ) metadata = attr.ib( - type=Optional[Dict[str, object]], - validator=type_validator(), - default=None) - id = attr.ib( - type=Sha1Git, - validator=type_validator(), - default=b'') + type=Optional[Dict[str, object]], validator=type_validator(), default=None + ) + id = attr.ib(type=Sha1Git, validator=type_validator(), default=b"") @staticmethod def compute_hash(object_dict): @@ -407,72 +352,51 @@ class Release(BaseModel, HashableObject): def check_author(self, attribute, value): """If the author is `None`, checks the date is `None` too.""" if self.author is None and self.date is not None: - raise ValueError('release date must be None if author is None.') + raise ValueError("release date must be None if author is None.") def to_dict(self): rel = super().to_dict() - if rel['metadata'] is None: - del rel['metadata'] + if rel["metadata"] is None: + del rel["metadata"] return rel @classmethod def from_dict(cls, d): d = d.copy() - if d.get('author'): - d['author'] = Person.from_dict(d['author']) - if d.get('date'): - d['date'] = TimestampWithTimezone.from_dict(d['date']) - return cls( - target_type=ObjectType(d.pop('target_type')), - **d) + if d.get("author"): + d["author"] = Person.from_dict(d["author"]) + if d.get("date"): + d["date"] = TimestampWithTimezone.from_dict(d["date"]) + return cls(target_type=ObjectType(d.pop("target_type")), **d) class RevisionType(Enum): - GIT = 'git' - TAR = 'tar' - DSC = 'dsc' - SUBVERSION = 'svn' - MERCURIAL = 'hg' + GIT = "git" + TAR = "tar" + DSC = "dsc" + SUBVERSION = "svn" + MERCURIAL = "hg" @attr.s(frozen=True) class Revision(BaseModel, HashableObject): - message = attr.ib( - type=bytes, - validator=type_validator()) - author = attr.ib( - type=Person, - validator=type_validator()) - committer = attr.ib( - type=Person, - validator=type_validator()) - date = attr.ib( - type=Optional[TimestampWithTimezone], - validator=type_validator()) + message = attr.ib(type=bytes, validator=type_validator()) + author = attr.ib(type=Person, validator=type_validator()) + committer = attr.ib(type=Person, validator=type_validator()) + date = attr.ib(type=Optional[TimestampWithTimezone], validator=type_validator()) committer_date = attr.ib( - type=Optional[TimestampWithTimezone], - validator=type_validator()) - type = attr.ib( - type=RevisionType, - validator=type_validator()) - directory = attr.ib( - type=Sha1Git, - validator=type_validator()) - synthetic = attr.ib( - type=bool, - validator=type_validator()) + type=Optional[TimestampWithTimezone], validator=type_validator() + ) + type = attr.ib(type=RevisionType, validator=type_validator()) + directory = attr.ib(type=Sha1Git, validator=type_validator()) + synthetic = attr.ib(type=bool, validator=type_validator()) metadata = attr.ib( - type=Optional[Dict[str, object]], - validator=type_validator(), - default=None) + type=Optional[Dict[str, object]], validator=type_validator(), default=None + ) parents = attr.ib( - type=List[Sha1Git], - validator=type_validator(), - default=attr.Factory(list)) - id = attr.ib( - type=Sha1Git, - validator=type_validator(), - default=b'') + type=List[Sha1Git], validator=type_validator(), default=attr.Factory(list) + ) + id = attr.ib(type=Sha1Git, validator=type_validator(), default=b"") @staticmethod def compute_hash(object_dict): @@ -481,50 +405,37 @@ class Revision(BaseModel, HashableObject): @classmethod def from_dict(cls, d): d = d.copy() - date = d.pop('date') + date = d.pop("date") if date: date = TimestampWithTimezone.from_dict(date) - committer_date = d.pop('committer_date') + committer_date = d.pop("committer_date") if committer_date: - committer_date = TimestampWithTimezone.from_dict( - committer_date) + committer_date = TimestampWithTimezone.from_dict(committer_date) return cls( - author=Person.from_dict(d.pop('author')), - committer=Person.from_dict(d.pop('committer')), + author=Person.from_dict(d.pop("author")), + committer=Person.from_dict(d.pop("committer")), date=date, committer_date=committer_date, - type=RevisionType(d.pop('type')), - **d) + type=RevisionType(d.pop("type")), + **d + ) @attr.s(frozen=True) class DirectoryEntry(BaseModel): - name = attr.ib( - type=bytes, - validator=type_validator()) - type = attr.ib( - type=str, - validator=attr.validators.in_(['file', 'dir', 'rev'])) - target = attr.ib( - type=Sha1Git, - validator=type_validator()) - perms = attr.ib( - type=int, - validator=type_validator()) + name = attr.ib(type=bytes, validator=type_validator()) + type = attr.ib(type=str, validator=attr.validators.in_(["file", "dir", "rev"])) + target = attr.ib(type=Sha1Git, validator=type_validator()) + perms = attr.ib(type=int, validator=type_validator()) """Usually one of the values of `swh.model.from_disk.DentryPerms`.""" @attr.s(frozen=True) class Directory(BaseModel, HashableObject): - entries = attr.ib( - type=List[DirectoryEntry], - validator=type_validator()) - id = attr.ib( - type=Sha1Git, - validator=type_validator(), - default=b'') + entries = attr.ib(type=List[DirectoryEntry], validator=type_validator()) + id = attr.ib(type=Sha1Git, validator=type_validator(), default=b"") @staticmethod def compute_hash(object_dict): @@ -534,23 +445,22 @@ class Directory(BaseModel, HashableObject): def from_dict(cls, d): d = d.copy() return cls( - entries=[DirectoryEntry.from_dict(entry) - for entry in d.pop('entries')], - **d) + entries=[DirectoryEntry.from_dict(entry) for entry in d.pop("entries")], **d + ) @attr.s(frozen=True) class BaseContent(BaseModel): status = attr.ib( - type=str, - validator=attr.validators.in_(['visible', 'hidden', 'absent'])) + type=str, validator=attr.validators.in_(["visible", "hidden", "absent"]) + ) @staticmethod def _hash_data(data: bytes): """Hash some data, returning most of the fields of a content object""" d = MultiHash.from_data(data).digest() - d['data'] = data - d['length'] = len(data) + d["data"] = data + d["length"] = len(data) return d @@ -558,7 +468,7 @@ class BaseContent(BaseModel): def from_dict(cls, d, use_subclass=True): if use_subclass: # Chooses a subclass to instantiate instead. - if d['status'] == 'absent': + if d["status"] == "absent": return SkippedContent.from_dict(d) else: return Content.from_dict(d) @@ -567,7 +477,7 @@ class BaseContent(BaseModel): def get_hash(self, hash_name): if hash_name not in DEFAULT_ALGORITHMS: - raise ValueError('{} is not a valid hash name.'.format(hash_name)) + raise ValueError("{} is not a valid hash name.".format(hash_name)) return getattr(self, hash_name) def hashes(self) -> Dict[str, bytes]: @@ -577,142 +487,108 @@ class BaseContent(BaseModel): @attr.s(frozen=True) class Content(BaseContent): - sha1 = attr.ib( - type=bytes, - validator=type_validator()) - sha1_git = attr.ib( - type=Sha1Git, - validator=type_validator()) - sha256 = attr.ib( - type=bytes, - validator=type_validator()) - blake2s256 = attr.ib( - type=bytes, - validator=type_validator()) - - length = attr.ib( - type=int, - validator=type_validator()) + sha1 = attr.ib(type=bytes, validator=type_validator()) + sha1_git = attr.ib(type=Sha1Git, validator=type_validator()) + sha256 = attr.ib(type=bytes, validator=type_validator()) + blake2s256 = attr.ib(type=bytes, validator=type_validator()) + + length = attr.ib(type=int, validator=type_validator()) status = attr.ib( type=str, - validator=attr.validators.in_(['visible', 'hidden']), - default='visible') + validator=attr.validators.in_(["visible", "hidden"]), + default="visible", + ) - data = attr.ib( - type=Optional[bytes], - validator=type_validator(), - default=None) + data = attr.ib(type=Optional[bytes], validator=type_validator(), default=None) ctime = attr.ib( - type=Optional[datetime.datetime], - validator=type_validator(), - default=None) + type=Optional[datetime.datetime], validator=type_validator(), default=None + ) @length.validator def check_length(self, attribute, value): """Checks the length is positive.""" if value < 0: - raise ValueError('Length must be positive.') + raise ValueError("Length must be positive.") def to_dict(self): content = super().to_dict() - if content['data'] is None: - del content['data'] + if content["data"] is None: + del content["data"] return content @classmethod - def from_data(cls, data, status='visible', ctime=None) -> 'Content': + def from_data(cls, data, status="visible", ctime=None) -> "Content": """Generate a Content from a given `data` byte string. This populates the Content with the hashes and length for the data passed as argument, as well as the data itself. """ d = cls._hash_data(data) - d['status'] = status - d['ctime'] = ctime + d["status"] = status + d["ctime"] = ctime return cls(**d) @classmethod def from_dict(cls, d): - if isinstance(d.get('ctime'), str): + if isinstance(d.get("ctime"), str): d = d.copy() - d['ctime'] = dateutil.parser.parse(d['ctime']) + d["ctime"] = dateutil.parser.parse(d["ctime"]) return super().from_dict(d, use_subclass=False) - def with_data(self) -> 'Content': + def with_data(self) -> "Content": """Loads the `data` attribute; meaning that it is guaranteed not to be None after this call. This call is almost a no-op, but subclasses may overload this method to lazy-load data (eg. from disk or objstorage).""" if self.data is None: - raise MissingData('Content data is None.') + raise MissingData("Content data is None.") return self @attr.s(frozen=True) class SkippedContent(BaseContent): - sha1 = attr.ib( - type=Optional[bytes], - validator=type_validator()) - sha1_git = attr.ib( - type=Optional[Sha1Git], - validator=type_validator()) - sha256 = attr.ib( - type=Optional[bytes], - validator=type_validator()) - blake2s256 = attr.ib( - type=Optional[bytes], - validator=type_validator()) - - length = attr.ib( - type=Optional[int], - validator=type_validator()) + sha1 = attr.ib(type=Optional[bytes], validator=type_validator()) + sha1_git = attr.ib(type=Optional[Sha1Git], validator=type_validator()) + sha256 = attr.ib(type=Optional[bytes], validator=type_validator()) + blake2s256 = attr.ib(type=Optional[bytes], validator=type_validator()) - status = attr.ib( - type=str, - validator=attr.validators.in_(['absent'])) - reason = attr.ib( - type=Optional[str], - validator=type_validator(), - default=None) + length = attr.ib(type=Optional[int], validator=type_validator()) + + status = attr.ib(type=str, validator=attr.validators.in_(["absent"])) + reason = attr.ib(type=Optional[str], validator=type_validator(), default=None) - origin = attr.ib( - type=Optional[str], - validator=type_validator(), - default=None) + origin = attr.ib(type=Optional[str], validator=type_validator(), default=None) ctime = attr.ib( - type=Optional[datetime.datetime], - validator=type_validator(), - default=None) + type=Optional[datetime.datetime], validator=type_validator(), default=None + ) @reason.validator def check_reason(self, attribute, value): """Checks the reason is full if status != absent.""" assert self.reason == value if value is None: - raise ValueError('Must provide a reason if content is absent.') + raise ValueError("Must provide a reason if content is absent.") @length.validator def check_length(self, attribute, value): """Checks the length is positive or -1.""" if value < -1: - raise ValueError('Length must be positive or -1.') + raise ValueError("Length must be positive or -1.") def to_dict(self): content = super().to_dict() - if content['origin'] is None: - del content['origin'] + if content["origin"] is None: + del content["origin"] return content @classmethod def from_data( - cls, - data: bytes, - reason: str, - ctime: Optional[datetime.datetime] = None) -> 'SkippedContent': + cls, data: bytes, reason: str, ctime: Optional[datetime.datetime] = None + ) -> "SkippedContent": """Generate a SkippedContent from a given `data` byte string. This populates the SkippedContent with the hashes and length for the @@ -722,15 +598,15 @@ class SkippedContent(BaseContent): of its attributes, e.g. for tests. """ d = cls._hash_data(data) - del d['data'] - d['status'] = 'absent' - d['reason'] = reason - d['ctime'] = ctime + del d["data"] + d["status"] = "absent" + d["reason"] = reason + d["ctime"] = ctime return cls(**d) @classmethod def from_dict(cls, d): d2 = d.copy() - if d2.pop('data', None) is not None: + if d2.pop("data", None) is not None: raise ValueError('SkippedContent has no "data" attribute %r' % d) return super().from_dict(d2, use_subclass=False) diff --git a/swh/model/tests/fields/test_compound.py b/swh/model/tests/fields/test_compound.py index dffbb043139221d7ed87197f368a4a2c5349f475..352bba9d7358d60f46cf6bf34f7bf219fb3a7a9f 100644 --- a/swh/model/tests/fields/test_compound.py +++ b/swh/model/tests/fields/test_compound.py @@ -18,12 +18,12 @@ class ValidateCompound(unittest.TestCase): def validate_never(model): return False - self.test_model = 'test model' + self.test_model = "test model" self.test_schema = { - 'int': (True, simple.validate_int), - 'str': (True, simple.validate_str), - 'str2': (True, simple.validate_str), - 'datetime': (False, simple.validate_datetime), + "int": (True, simple.validate_int), + "str": (True, simple.validate_str), + "str2": (True, simple.validate_str), + "datetime": (False, simple.validate_datetime), NON_FIELD_ERRORS: validate_always, } @@ -31,43 +31,48 @@ class ValidateCompound(unittest.TestCase): self.test_schema_shortcut[NON_FIELD_ERRORS] = validate_never self.test_schema_field_failed = self.test_schema.copy() - self.test_schema_field_failed['int'] = (True, [simple.validate_int, - validate_never]) + self.test_schema_field_failed["int"] = ( + True, + [simple.validate_int, validate_never], + ) self.test_value = { - 'str': 'value1', - 'str2': 'value2', - 'int': 42, - 'datetime': datetime.datetime(1990, 1, 1, 12, 0, 0, - tzinfo=datetime.timezone.utc), + "str": "value1", + "str2": "value2", + "int": 42, + "datetime": datetime.datetime( + 1990, 1, 1, 12, 0, 0, tzinfo=datetime.timezone.utc + ), } self.test_value_missing = { - 'str': 'value1', + "str": "value1", } self.test_value_str_error = { - 'str': 1984, - 'str2': 'value2', - 'int': 42, - 'datetime': datetime.datetime(1990, 1, 1, 12, 0, 0, - tzinfo=datetime.timezone.utc), + "str": 1984, + "str2": "value2", + "int": 42, + "datetime": datetime.datetime( + 1990, 1, 1, 12, 0, 0, tzinfo=datetime.timezone.utc + ), } - self.test_value_missing_keys = {'int'} + self.test_value_missing_keys = {"int"} self.test_value_wrong_type = 42 self.present_keys = set(self.test_value) - self.missing_keys = {'missingkey1', 'missingkey2'} + self.missing_keys = {"missingkey1", "missingkey2"} def test_validate_any_key(self): - self.assertTrue( - compound.validate_any_key(self.test_value, self.present_keys)) + self.assertTrue(compound.validate_any_key(self.test_value, self.present_keys)) self.assertTrue( - compound.validate_any_key(self.test_value, - self.present_keys | self.missing_keys)) + compound.validate_any_key( + self.test_value, self.present_keys | self.missing_keys + ) + ) def test_validate_any_key_missing(self): with self.assertRaises(ValidationError) as cm: @@ -75,13 +80,13 @@ class ValidateCompound(unittest.TestCase): exc = cm.exception self.assertIsInstance(str(exc), str) - self.assertEqual(exc.code, 'missing-alternative-field') - self.assertEqual(exc.params['missing_fields'], - ', '.join(sorted(self.missing_keys))) + self.assertEqual(exc.code, "missing-alternative-field") + self.assertEqual( + exc.params["missing_fields"], ", ".join(sorted(self.missing_keys)) + ) def test_validate_all_keys(self): - self.assertTrue( - compound.validate_all_keys(self.test_value, self.present_keys)) + self.assertTrue(compound.validate_all_keys(self.test_value, self.present_keys)) def test_validate_all_keys_missing(self): with self.assertRaises(ValidationError) as cm: @@ -89,41 +94,49 @@ class ValidateCompound(unittest.TestCase): exc = cm.exception self.assertIsInstance(str(exc), str) - self.assertEqual(exc.code, 'missing-mandatory-field') - self.assertEqual(exc.params['missing_fields'], - ', '.join(sorted(self.missing_keys))) + self.assertEqual(exc.code, "missing-mandatory-field") + self.assertEqual( + exc.params["missing_fields"], ", ".join(sorted(self.missing_keys)) + ) with self.assertRaises(ValidationError) as cm: - compound.validate_all_keys(self.test_value, - self.present_keys | self.missing_keys) + compound.validate_all_keys( + self.test_value, self.present_keys | self.missing_keys + ) exc = cm.exception self.assertIsInstance(str(exc), str) - self.assertEqual(exc.code, 'missing-mandatory-field') - self.assertEqual(exc.params['missing_fields'], - ', '.join(sorted(self.missing_keys))) + self.assertEqual(exc.code, "missing-mandatory-field") + self.assertEqual( + exc.params["missing_fields"], ", ".join(sorted(self.missing_keys)) + ) def test_validate_against_schema(self): self.assertTrue( - compound.validate_against_schema(self.test_model, self.test_schema, - self.test_value)) + compound.validate_against_schema( + self.test_model, self.test_schema, self.test_value + ) + ) def test_validate_against_schema_wrong_type(self): with self.assertRaises(ValidationError) as cm: - compound.validate_against_schema(self.test_model, self.test_schema, - self.test_value_wrong_type) + compound.validate_against_schema( + self.test_model, self.test_schema, self.test_value_wrong_type + ) exc = cm.exception self.assertIsInstance(str(exc), str) - self.assertEqual(exc.code, 'model-unexpected-type') - self.assertEqual(exc.params['model'], self.test_model) - self.assertEqual(exc.params['type'], - self.test_value_wrong_type.__class__.__name__) + self.assertEqual(exc.code, "model-unexpected-type") + self.assertEqual(exc.params["model"], self.test_model) + self.assertEqual( + exc.params["type"], self.test_value_wrong_type.__class__.__name__ + ) def test_validate_against_schema_mandatory_keys(self): with self.assertRaises(ValidationError) as cm: - compound.validate_against_schema(self.test_model, self.test_schema, - self.test_value_missing) + compound.validate_against_schema( + self.test_model, self.test_schema, self.test_value_missing + ) # The exception should be of the form: # ValidationError({ @@ -138,15 +151,13 @@ class ValidateCompound(unittest.TestCase): self.assertEqual(len(nested_key), 1) nested = nested_key[0] self.assertIsInstance(nested, ValidationError) - self.assertEqual(nested.code, 'model-field-mandatory') - self.assertEqual(nested.params['field'], key) + self.assertEqual(nested.code, "model-field-mandatory") + self.assertEqual(nested.params["field"], key) def test_validate_whole_schema_shortcut_previous_error(self): with self.assertRaises(ValidationError) as cm: compound.validate_against_schema( - self.test_model, - self.test_schema_shortcut, - self.test_value_missing, + self.test_model, self.test_schema_shortcut, self.test_value_missing, ) exc = cm.exception @@ -156,9 +167,7 @@ class ValidateCompound(unittest.TestCase): def test_validate_whole_schema(self): with self.assertRaises(ValidationError) as cm: compound.validate_against_schema( - self.test_model, - self.test_schema_shortcut, - self.test_value, + self.test_model, self.test_schema_shortcut, self.test_value, ) # The exception should be of the form: @@ -176,14 +185,15 @@ class ValidateCompound(unittest.TestCase): nested = non_field_errors[0] self.assertIsInstance(nested, ValidationError) - self.assertEqual(nested.code, 'model-validation-failed') - self.assertEqual(nested.params['model'], self.test_model) - self.assertEqual(nested.params['validator'], 'validate_never') + self.assertEqual(nested.code, "model-validation-failed") + self.assertEqual(nested.params["model"], self.test_model) + self.assertEqual(nested.params["validator"], "validate_never") def test_validate_against_schema_field_error(self): with self.assertRaises(ValidationError) as cm: - compound.validate_against_schema(self.test_model, self.test_schema, - self.test_value_str_error) + compound.validate_against_schema( + self.test_model, self.test_schema, self.test_value_str_error + ) # The exception should be of the form: # ValidationError({ @@ -192,21 +202,21 @@ class ValidateCompound(unittest.TestCase): exc = cm.exception self.assertIsInstance(str(exc), str) - self.assertEqual(set(exc.error_dict.keys()), {'str'}) + self.assertEqual(set(exc.error_dict.keys()), {"str"}) - str_errors = exc.error_dict['str'] + str_errors = exc.error_dict["str"] self.assertIsInstance(str_errors, list) self.assertEqual(len(str_errors), 1) nested = str_errors[0] self.assertIsInstance(nested, ValidationError) - self.assertEqual(nested.code, 'unexpected-type') + self.assertEqual(nested.code, "unexpected-type") def test_validate_against_schema_field_failed(self): with self.assertRaises(ValidationError) as cm: - compound.validate_against_schema(self.test_model, - self.test_schema_field_failed, - self.test_value) + compound.validate_against_schema( + self.test_model, self.test_schema_field_failed, self.test_value + ) # The exception should be of the form: # ValidationError({ @@ -215,14 +225,14 @@ class ValidateCompound(unittest.TestCase): exc = cm.exception self.assertIsInstance(str(exc), str) - self.assertEqual(set(exc.error_dict.keys()), {'int'}) + self.assertEqual(set(exc.error_dict.keys()), {"int"}) - int_errors = exc.error_dict['int'] + int_errors = exc.error_dict["int"] self.assertIsInstance(int_errors, list) self.assertEqual(len(int_errors), 1) nested = int_errors[0] self.assertIsInstance(nested, ValidationError) - self.assertEqual(nested.code, 'field-validation-failed') - self.assertEqual(nested.params['validator'], 'validate_never') - self.assertEqual(nested.params['field'], 'int') + self.assertEqual(nested.code, "field-validation-failed") + self.assertEqual(nested.params["validator"], "validate_never") + self.assertEqual(nested.params["field"], "int") diff --git a/swh/model/tests/fields/test_hashes.py b/swh/model/tests/fields/test_hashes.py index 7ce0b78be04485a3f3cd11390eddcac2e112da5f..15dbcc28830e4648aca9f323c5b0ea83743305b8 100644 --- a/swh/model/tests/fields/test_hashes.py +++ b/swh/model/tests/fields/test_hashes.py @@ -12,20 +12,20 @@ from swh.model.fields import hashes class ValidateHashes(unittest.TestCase): def setUp(self): self.valid_byte_hashes = { - 'sha1': b'\xf1\xd2\xd2\xf9\x24\xe9\x86\xac\x86\xfd\xf7\xb3\x6c\x94' - b'\xbc\xdf\x32\xbe\xec\x15', - 'sha1_git': b'\x25\x7c\xc5\x64\x2c\xb1\xa0\x54\xf0\x8c\xc8\x3f\x2d' - b'\x94\x3e\x56\xfd\x3e\xbe\x99', - 'sha256': b'\xb5\xbb\x9d\x80\x14\xa0\xf9\xb1\xd6\x1e\x21\xe7\x96' - b'\xd7\x8d\xcc\xdf\x13\x52\xf2\x3c\xd3\x28\x12\xf4\x85' - b'\x0b\x87\x8a\xe4\x94\x4c', + "sha1": b"\xf1\xd2\xd2\xf9\x24\xe9\x86\xac\x86\xfd\xf7\xb3\x6c\x94" + b"\xbc\xdf\x32\xbe\xec\x15", + "sha1_git": b"\x25\x7c\xc5\x64\x2c\xb1\xa0\x54\xf0\x8c\xc8\x3f\x2d" + b"\x94\x3e\x56\xfd\x3e\xbe\x99", + "sha256": b"\xb5\xbb\x9d\x80\x14\xa0\xf9\xb1\xd6\x1e\x21\xe7\x96" + b"\xd7\x8d\xcc\xdf\x13\x52\xf2\x3c\xd3\x28\x12\xf4\x85" + b"\x0b\x87\x8a\xe4\x94\x4c", } self.valid_str_hashes = { - 'sha1': 'f1d2d2f924e986ac86fdf7b36c94bcdf32beec15', - 'sha1_git': '257cc5642cb1a054f08cc83f2d943e56fd3ebe99', - 'sha256': 'b5bb9d8014a0f9b1d61e21e796d78dccdf1352f23cd32812f485' - '0b878ae4944c', + "sha1": "f1d2d2f924e986ac86fdf7b36c94bcdf32beec15", + "sha1_git": "257cc5642cb1a054f08cc83f2d943e56fd3ebe99", + "sha256": "b5bb9d8014a0f9b1d61e21e796d78dccdf1352f23cd32812f485" + "0b878ae4944c", } self.bad_hash = object() @@ -39,112 +39,108 @@ class ValidateHashes(unittest.TestCase): self.assertTrue(hashes.validate_hash(value, hash_type)) def test_invalid_hash_type(self): - hash_type = 'unknown_hash_type' + hash_type = "unknown_hash_type" with self.assertRaises(ValidationError) as cm: - hashes.validate_hash(self.valid_str_hashes['sha1'], hash_type) + hashes.validate_hash(self.valid_str_hashes["sha1"], hash_type) exc = cm.exception self.assertIsInstance(str(exc), str) - self.assertEqual(exc.code, 'unexpected-hash-type') - self.assertEqual(exc.params['hash_type'], hash_type) + self.assertEqual(exc.code, "unexpected-hash-type") + self.assertEqual(exc.params["hash_type"], hash_type) - self.assertIn('Unexpected hash type', str(exc)) + self.assertIn("Unexpected hash type", str(exc)) self.assertIn(hash_type, str(exc)) def test_invalid_bytes_len(self): for hash_type, value in self.valid_byte_hashes.items(): - value = value + b'\x00\x01' + value = value + b"\x00\x01" with self.assertRaises(ValidationError) as cm: hashes.validate_hash(value, hash_type) exc = cm.exception self.assertIsInstance(str(exc), str) - self.assertEqual(exc.code, 'unexpected-hash-length') - self.assertEqual(exc.params['hash_type'], hash_type) - self.assertEqual(exc.params['length'], len(value)) + self.assertEqual(exc.code, "unexpected-hash-length") + self.assertEqual(exc.params["hash_type"], hash_type) + self.assertEqual(exc.params["length"], len(value)) - self.assertIn('Unexpected length', str(exc)) + self.assertIn("Unexpected length", str(exc)) self.assertIn(str(len(value)), str(exc)) def test_invalid_str_len(self): for hash_type, value in self.valid_str_hashes.items(): - value = value + '0001' + value = value + "0001" with self.assertRaises(ValidationError) as cm: hashes.validate_hash(value, hash_type) exc = cm.exception self.assertIsInstance(str(exc), str) - self.assertEqual(exc.code, 'unexpected-hash-length') - self.assertEqual(exc.params['hash_type'], hash_type) - self.assertEqual(exc.params['length'], len(value)) + self.assertEqual(exc.code, "unexpected-hash-length") + self.assertEqual(exc.params["hash_type"], hash_type) + self.assertEqual(exc.params["length"], len(value)) - self.assertIn('Unexpected length', str(exc)) + self.assertIn("Unexpected length", str(exc)) self.assertIn(str(len(value)), str(exc)) def test_invalid_str_contents(self): for hash_type, value in self.valid_str_hashes.items(): - value = '\xa2' + value[1:-1] + '\xc3' + value = "\xa2" + value[1:-1] + "\xc3" with self.assertRaises(ValidationError) as cm: hashes.validate_hash(value, hash_type) exc = cm.exception self.assertIsInstance(str(exc), str) - self.assertEqual(exc.code, 'unexpected-hash-contents') - self.assertEqual(exc.params['hash_type'], hash_type) - self.assertEqual(exc.params['unexpected_chars'], '\xa2, \xc3') + self.assertEqual(exc.code, "unexpected-hash-contents") + self.assertEqual(exc.params["hash_type"], hash_type) + self.assertEqual(exc.params["unexpected_chars"], "\xa2, \xc3") - self.assertIn('Unexpected characters', str(exc)) - self.assertIn('\xc3', str(exc)) - self.assertIn('\xa2', str(exc)) + self.assertIn("Unexpected characters", str(exc)) + self.assertIn("\xc3", str(exc)) + self.assertIn("\xa2", str(exc)) def test_invalid_value_type(self): with self.assertRaises(ValidationError) as cm: - hashes.validate_hash(self.bad_hash, 'sha1') + hashes.validate_hash(self.bad_hash, "sha1") exc = cm.exception self.assertIsInstance(str(exc), str) - self.assertEqual(exc.code, 'unexpected-hash-value-type') - self.assertEqual(exc.params['type'], self.bad_hash.__class__.__name__) + self.assertEqual(exc.code, "unexpected-hash-value-type") + self.assertEqual(exc.params["type"], self.bad_hash.__class__.__name__) - self.assertIn('Unexpected type', str(exc)) + self.assertIn("Unexpected type", str(exc)) self.assertIn(self.bad_hash.__class__.__name__, str(exc)) def test_validate_sha1(self): - self.assertTrue(hashes.validate_sha1(self.valid_byte_hashes['sha1'])) - self.assertTrue(hashes.validate_sha1(self.valid_str_hashes['sha1'])) + self.assertTrue(hashes.validate_sha1(self.valid_byte_hashes["sha1"])) + self.assertTrue(hashes.validate_sha1(self.valid_str_hashes["sha1"])) with self.assertRaises(ValidationError) as cm: hashes.validate_sha1(self.bad_hash) exc = cm.exception self.assertIsInstance(str(exc), str) - self.assertEqual(exc.code, 'unexpected-hash-value-type') - self.assertEqual(exc.params['type'], self.bad_hash.__class__.__name__) + self.assertEqual(exc.code, "unexpected-hash-value-type") + self.assertEqual(exc.params["type"], self.bad_hash.__class__.__name__) def test_validate_sha1_git(self): - self.assertTrue( - hashes.validate_sha1_git(self.valid_byte_hashes['sha1_git'])) - self.assertTrue( - hashes.validate_sha1_git(self.valid_str_hashes['sha1_git'])) + self.assertTrue(hashes.validate_sha1_git(self.valid_byte_hashes["sha1_git"])) + self.assertTrue(hashes.validate_sha1_git(self.valid_str_hashes["sha1_git"])) with self.assertRaises(ValidationError) as cm: hashes.validate_sha1_git(self.bad_hash) exc = cm.exception self.assertIsInstance(str(exc), str) - self.assertEqual(exc.code, 'unexpected-hash-value-type') - self.assertEqual(exc.params['type'], self.bad_hash.__class__.__name__) + self.assertEqual(exc.code, "unexpected-hash-value-type") + self.assertEqual(exc.params["type"], self.bad_hash.__class__.__name__) def test_validate_sha256(self): - self.assertTrue( - hashes.validate_sha256(self.valid_byte_hashes['sha256'])) - self.assertTrue( - hashes.validate_sha256(self.valid_str_hashes['sha256'])) + self.assertTrue(hashes.validate_sha256(self.valid_byte_hashes["sha256"])) + self.assertTrue(hashes.validate_sha256(self.valid_str_hashes["sha256"])) with self.assertRaises(ValidationError) as cm: hashes.validate_sha256(self.bad_hash) exc = cm.exception self.assertIsInstance(str(exc), str) - self.assertEqual(exc.code, 'unexpected-hash-value-type') - self.assertEqual(exc.params['type'], self.bad_hash.__class__.__name__) + self.assertEqual(exc.code, "unexpected-hash-value-type") + self.assertEqual(exc.params["type"], self.bad_hash.__class__.__name__) diff --git a/swh/model/tests/fields/test_simple.py b/swh/model/tests/fields/test_simple.py index ab5e262eceba4eb08fe03f9df667534cc16379f2..25b1f1be068e123b6db20e5f50393d1437380a5b 100644 --- a/swh/model/tests/fields/test_simple.py +++ b/swh/model/tests/fields/test_simple.py @@ -12,19 +12,20 @@ from swh.model.fields import simple class ValidateSimple(unittest.TestCase): def setUp(self): - self.valid_str = 'I am a valid string' + self.valid_str = "I am a valid string" - self.valid_bytes = b'I am a valid bytes object' + self.valid_bytes = b"I am a valid bytes object" - self.enum_values = {'an enum value', 'other', 'and another'} - self.invalid_enum_value = 'invalid enum value' + self.enum_values = {"an enum value", "other", "and another"} + self.invalid_enum_value = "invalid enum value" self.valid_int = 42 self.valid_real = 42.42 - self.valid_datetime = datetime.datetime(1999, 1, 1, 12, 0, 0, - tzinfo=datetime.timezone.utc) + self.valid_datetime = datetime.datetime( + 1999, 1, 1, 12, 0, 0, tzinfo=datetime.timezone.utc + ) self.invalid_datetime_notz = datetime.datetime(1999, 1, 1, 12, 0, 0) def test_validate_int(self): @@ -36,9 +37,9 @@ class ValidateSimple(unittest.TestCase): exc = cm.exception self.assertIsInstance(str(exc), str) - self.assertEqual(exc.code, 'unexpected-type') - self.assertEqual(exc.params['expected_type'], 'Integral') - self.assertEqual(exc.params['type'], 'str') + self.assertEqual(exc.code, "unexpected-type") + self.assertEqual(exc.params["expected_type"], "Integral") + self.assertEqual(exc.params["type"], "str") def test_validate_str(self): self.assertTrue(simple.validate_str(self.valid_str)) @@ -49,18 +50,18 @@ class ValidateSimple(unittest.TestCase): exc = cm.exception self.assertIsInstance(str(exc), str) - self.assertEqual(exc.code, 'unexpected-type') - self.assertEqual(exc.params['expected_type'], 'str') - self.assertEqual(exc.params['type'], 'int') + self.assertEqual(exc.code, "unexpected-type") + self.assertEqual(exc.params["expected_type"], "str") + self.assertEqual(exc.params["type"], "int") with self.assertRaises(ValidationError) as cm: simple.validate_str(self.valid_bytes) exc = cm.exception self.assertIsInstance(str(exc), str) - self.assertEqual(exc.code, 'unexpected-type') - self.assertEqual(exc.params['expected_type'], 'str') - self.assertEqual(exc.params['type'], 'bytes') + self.assertEqual(exc.code, "unexpected-type") + self.assertEqual(exc.params["expected_type"], "str") + self.assertEqual(exc.params["type"], "bytes") def test_validate_bytes(self): self.assertTrue(simple.validate_bytes(self.valid_bytes)) @@ -71,18 +72,18 @@ class ValidateSimple(unittest.TestCase): exc = cm.exception self.assertIsInstance(str(exc), str) - self.assertEqual(exc.code, 'unexpected-type') - self.assertEqual(exc.params['expected_type'], 'bytes') - self.assertEqual(exc.params['type'], 'int') + self.assertEqual(exc.code, "unexpected-type") + self.assertEqual(exc.params["expected_type"], "bytes") + self.assertEqual(exc.params["type"], "int") with self.assertRaises(ValidationError) as cm: simple.validate_bytes(self.valid_str) exc = cm.exception self.assertIsInstance(str(exc), str) - self.assertEqual(exc.code, 'unexpected-type') - self.assertEqual(exc.params['expected_type'], 'bytes') - self.assertEqual(exc.params['type'], 'str') + self.assertEqual(exc.code, "unexpected-type") + self.assertEqual(exc.params["expected_type"], "bytes") + self.assertEqual(exc.params["type"], "str") def test_validate_datetime(self): self.assertTrue(simple.validate_datetime(self.valid_datetime)) @@ -95,9 +96,9 @@ class ValidateSimple(unittest.TestCase): exc = cm.exception self.assertIsInstance(str(exc), str) - self.assertEqual(exc.code, 'unexpected-type') - self.assertEqual(exc.params['expected_type'], 'one of datetime, Real') - self.assertEqual(exc.params['type'], 'str') + self.assertEqual(exc.code, "unexpected-type") + self.assertEqual(exc.params["expected_type"], "one of datetime, Real") + self.assertEqual(exc.params["type"], "str") def test_validate_datetime_invalide_tz(self): with self.assertRaises(ValidationError) as cm: @@ -105,7 +106,7 @@ class ValidateSimple(unittest.TestCase): exc = cm.exception self.assertIsInstance(str(exc), str) - self.assertEqual(exc.code, 'datetime-without-tzinfo') + self.assertEqual(exc.code, "datetime-without-tzinfo") def test_validate_enum(self): for value in self.enum_values: @@ -117,7 +118,8 @@ class ValidateSimple(unittest.TestCase): exc = cm.exception self.assertIsInstance(str(exc), str) - self.assertEqual(exc.code, 'unexpected-value') - self.assertEqual(exc.params['value'], self.invalid_enum_value) - self.assertEqual(exc.params['expected_values'], - ', '.join(sorted(self.enum_values))) + self.assertEqual(exc.code, "unexpected-value") + self.assertEqual(exc.params["value"], self.invalid_enum_value) + self.assertEqual( + exc.params["expected_values"], ", ".join(sorted(self.enum_values)) + ) diff --git a/swh/model/tests/generate_testdata.py b/swh/model/tests/generate_testdata.py index 5a274e049eaaed6b21a25c55a998379e3fdd27ce..0280a6ab15582e49d1d79b1ba9104d55aa5abd7e 100644 --- a/swh/model/tests/generate_testdata.py +++ b/swh/model/tests/generate_testdata.py @@ -11,13 +11,18 @@ from typing import List, Dict from swh.model.hashutil import MultiHash -PROTOCOLS = ['git', 'http', 'https', 'deb', 'svn', 'mock'] -DOMAINS = ['example.com', 'some.long.host.name', 'xn--n28h.tld'] -PATHS = ['', '/', '/stuff', '/stuff/', - '/path/to/resource', - '/path/with/anchor#id=42', - '/path/with/qargs?q=1&b'] -CONTENT_STATUS = ['visible', 'hidden', 'absent'] +PROTOCOLS = ["git", "http", "https", "deb", "svn", "mock"] +DOMAINS = ["example.com", "some.long.host.name", "xn--n28h.tld"] +PATHS = [ + "", + "/", + "/stuff", + "/stuff/", + "/path/to/resource", + "/path/with/anchor#id=42", + "/path/with/qargs?q=1&b", +] +CONTENT_STATUS = ["visible", "hidden", "absent"] MAX_DATE = 3e9 # around 2065 @@ -25,7 +30,7 @@ def gen_all_origins(): for protocol in PROTOCOLS: for domain in DOMAINS: for urlpath in PATHS: - yield {'url': '%s://%s%s' % (protocol, domain, urlpath)} + yield {"url": "%s://%s%s" % (protocol, domain, urlpath)} ORIGINS = list(gen_all_origins()) @@ -46,16 +51,17 @@ def gen_content(): data = bytes(randint(0, 255) for i in range(size)) status = choice(CONTENT_STATUS) h = MultiHash.from_data(data) - ctime = datetime.fromtimestamp( - random() * MAX_DATE, timezone(choice(all_timezones))) - content = {'data': data, - 'status': status, - 'length': size, - 'ctime': ctime, - **h.digest()} - if status == 'absent': - content['reason'] = 'why not' - content['data'] = None + ctime = datetime.fromtimestamp(random() * MAX_DATE, timezone(choice(all_timezones))) + content = { + "data": data, + "status": status, + "length": size, + "ctime": ctime, + **h.digest(), + } + if status == "absent": + content["reason"] = "why not" + content["data"] = None return content diff --git a/swh/model/tests/generate_testdata_from_disk.py b/swh/model/tests/generate_testdata_from_disk.py index 35d4f480aa3b0657f9c1abdbd15af91ce82b5ad9..063e39093618a1e07c7dc9f55acc184a05d83643 100644 --- a/swh/model/tests/generate_testdata_from_disk.py +++ b/swh/model/tests/generate_testdata_from_disk.py @@ -13,80 +13,80 @@ from swh.model.hashutil import ALGORITHMS, hash_to_hex def generate_from_directory(varname, directory, indent=0): """Generate test data from a given directory""" + def get_data(member, path): yield (path, member.get_data()) if isinstance(member, Directory): for name, child in member.items(): yield from get_data(child, os.path.join(path, name)) - data = dict(get_data(directory, b'')) + data = dict(get_data(directory, b"")) out = [] def format_hash(h, indent=0): - spindent = ' ' * indent + spindent = " " * indent if len(h) > 20: - cutoff = len(h)//2 + cutoff = len(h) // 2 parts = h[:cutoff], h[cutoff:] else: parts = [h] - out.append('hash_to_bytes(\n') + out.append("hash_to_bytes(\n") for part in parts: - out.append(spindent + ' %s\n' % repr(hash_to_hex(part))) - out.append(spindent + ')') + out.append(spindent + " %s\n" % repr(hash_to_hex(part))) + out.append(spindent + ")") def format_dict_items(d, indent=0): - spindent = ' ' * indent + spindent = " " * indent for key, value in sorted(d.items()): if isinstance(key, bytes): - out.append(spindent + repr(key) + ': {\n') + out.append(spindent + repr(key) + ": {\n") format_dict_items(value, indent=indent + 4) - out.append(spindent + '}') + out.append(spindent + "}") else: - out.append(spindent + repr(key) + ': ') - if key == 'entries': + out.append(spindent + repr(key) + ": ") + if key == "entries": if not value: - out.append('[]') + out.append("[]") else: - out.append('[') + out.append("[") last_index = len(value) - 1 for i, entry in enumerate( - sorted(value, key=itemgetter('name'))): + sorted(value, key=itemgetter("name")) + ): if i: - out.append(' ') - out.append('{\n') + out.append(" ") + out.append("{\n") format_dict_items(entry, indent=indent + 4) if i != last_index: - out.append(spindent + '},') - out.append(spindent + '}]') - elif key in ALGORITHMS | {'id', 'target'}: + out.append(spindent + "},") + out.append(spindent + "}]") + elif key in ALGORITHMS | {"id", "target"}: format_hash(value, indent=indent) elif isinstance(value, DentryPerms): out.append(str(value)) else: out.append(repr(value)) - out.append(',\n') + out.append(",\n") - spindent = ' ' * indent - out.append(spindent + '%s = {\n' % varname) + spindent = " " * indent + out.append(spindent + "%s = {\n" % varname) format_dict_items(data, indent=4 + indent) - out.append(spindent + '}') + out.append(spindent + "}") - return ''.join(out) + return "".join(out) -if __name__ == '__main__': +if __name__ == "__main__": if not sys.argv[1:]: print("Usage: %s dir1 dir2" % sys.argv[0], file=sys.stderr) exit(2) for dirname in sys.argv[1:]: basename = os.path.basename(dirname) - varname = 'expected_%s' % basename + varname = "expected_%s" % basename testdata = generate_from_directory( - varname, - Directory.from_disk(path=os.fsencode(dirname)), - indent=8 + varname, Directory.from_disk(path=os.fsencode(dirname)), indent=8 ) print(testdata) print() diff --git a/swh/model/tests/test_cli.py b/swh/model/tests/test_cli.py index 4d4ff0116efe58fe26fe6cc5733462c4b58af533..fac1d89d7b414b395d876ea0e57f7090ea197050 100644 --- a/swh/model/tests/test_cli.py +++ b/swh/model/tests/test_cli.py @@ -18,7 +18,6 @@ from swh.model.tests.test_from_disk import DataMixin @pytest.mark.fs class TestIdentify(DataMixin, unittest.TestCase): - def setUp(self): super().setUp() self.runner = CliRunner() @@ -32,125 +31,114 @@ class TestIdentify(DataMixin, unittest.TestCase): self.make_contents(self.tmpdir_name) for filename, content in self.contents.items(): path = os.path.join(self.tmpdir_name, filename) - result = self.runner.invoke(cli.identify, - ['--type', 'content', path]) - self.assertPidOK(result, - 'swh:1:cnt:' + hash_to_hex(content['sha1_git'])) + result = self.runner.invoke(cli.identify, ["--type", "content", path]) + self.assertPidOK(result, "swh:1:cnt:" + hash_to_hex(content["sha1_git"])) def test_content_id_from_stdin(self): """identify file content""" self.make_contents(self.tmpdir_name) for _, content in self.contents.items(): - result = self.runner.invoke(cli.identify, - input=content['data']) - self.assertPidOK(result, - 'swh:1:cnt:' + hash_to_hex(content['sha1_git'])) + result = self.runner.invoke(cli.identify, input=content["data"]) + self.assertPidOK(result, "swh:1:cnt:" + hash_to_hex(content["sha1_git"])) def test_directory_id(self): """identify an entire directory""" self.make_from_tarball(self.tmpdir_name) - path = os.path.join(self.tmpdir_name, b'sample-folder') - result = self.runner.invoke(cli.identify, - ['--type', 'directory', path]) - self.assertPidOK(result, - 'swh:1:dir:e8b0f1466af8608c8a3fb9879db172b887e80759') + path = os.path.join(self.tmpdir_name, b"sample-folder") + result = self.runner.invoke(cli.identify, ["--type", "directory", path]) + self.assertPidOK(result, "swh:1:dir:e8b0f1466af8608c8a3fb9879db172b887e80759") def test_snapshot_id(self): """identify a snapshot""" - tarball = os.path.join(os.path.dirname(__file__), 'data', 'repos', - 'sample-repo.tgz') - with tempfile.TemporaryDirectory(prefix='swh.model.cli') as d: - with tarfile.open(tarball, 'r:gz') as t: + tarball = os.path.join( + os.path.dirname(__file__), "data", "repos", "sample-repo.tgz" + ) + with tempfile.TemporaryDirectory(prefix="swh.model.cli") as d: + with tarfile.open(tarball, "r:gz") as t: t.extractall(d) - repo_dir = os.path.join(d, 'sample-repo') - result = self.runner.invoke(cli.identify, - ['--type', 'snapshot', repo_dir]) + repo_dir = os.path.join(d, "sample-repo") + result = self.runner.invoke( + cli.identify, ["--type", "snapshot", repo_dir] + ) self.assertPidOK( - result, - 'swh:1:snp:abc888898124270905a0ef3c67e872ce08e7e0c1') + result, "swh:1:snp:abc888898124270905a0ef3c67e872ce08e7e0c1" + ) def test_origin_id(self): """identify an origin URL""" - url = 'https://github.com/torvalds/linux' - result = self.runner.invoke(cli.identify, ['--type', 'origin', url]) - self.assertPidOK(result, - 'swh:1:ori:b63a575fe3faab7692c9f38fb09d4bb45651bb0f') + url = "https://github.com/torvalds/linux" + result = self.runner.invoke(cli.identify, ["--type", "origin", url]) + self.assertPidOK(result, "swh:1:ori:b63a575fe3faab7692c9f38fb09d4bb45651bb0f") def test_symlink(self): """identify symlink --- both itself and target""" - regular = os.path.join(self.tmpdir_name, b'foo.txt') - link = os.path.join(self.tmpdir_name, b'bar.txt') - open(regular, 'w').write('foo\n') + regular = os.path.join(self.tmpdir_name, b"foo.txt") + link = os.path.join(self.tmpdir_name, b"bar.txt") + open(regular, "w").write("foo\n") os.symlink(os.path.basename(regular), link) result = self.runner.invoke(cli.identify, [link]) - self.assertPidOK(result, - 'swh:1:cnt:257cc5642cb1a054f08cc83f2d943e56fd3ebe99') + self.assertPidOK(result, "swh:1:cnt:257cc5642cb1a054f08cc83f2d943e56fd3ebe99") - result = self.runner.invoke(cli.identify, ['--no-dereference', link]) - self.assertPidOK(result, - 'swh:1:cnt:996f1789ff67c0e3f69ef5933a55d54c5d0e9954') + result = self.runner.invoke(cli.identify, ["--no-dereference", link]) + self.assertPidOK(result, "swh:1:cnt:996f1789ff67c0e3f69ef5933a55d54c5d0e9954") def test_show_filename(self): """filename is shown by default""" self.make_contents(self.tmpdir_name) for filename, content in self.contents.items(): path = os.path.join(self.tmpdir_name, filename) - result = self.runner.invoke(cli.identify, - ['--type', 'content', path]) + result = self.runner.invoke(cli.identify, ["--type", "content", path]) self.assertEqual(result.exit_code, 0) - self.assertEqual(result.output.rstrip(), - 'swh:1:cnt:%s\t%s' % - (hash_to_hex(content['sha1_git']), path.decode())) + self.assertEqual( + result.output.rstrip(), + "swh:1:cnt:%s\t%s" % (hash_to_hex(content["sha1_git"]), path.decode()), + ) def test_hide_filename(self): """filename is hidden upon request""" self.make_contents(self.tmpdir_name) for filename, content in self.contents.items(): path = os.path.join(self.tmpdir_name, filename) - result = self.runner.invoke(cli.identify, - ['--type', 'content', '--no-filename', - path]) - self.assertPidOK(result, - 'swh:1:cnt:' + hash_to_hex(content['sha1_git'])) + result = self.runner.invoke( + cli.identify, ["--type", "content", "--no-filename", path] + ) + self.assertPidOK(result, "swh:1:cnt:" + hash_to_hex(content["sha1_git"])) def test_auto_content(self): """automatic object type detection: content""" - with tempfile.NamedTemporaryFile(prefix='swh.model.cli') as f: + with tempfile.NamedTemporaryFile(prefix="swh.model.cli") as f: result = self.runner.invoke(cli.identify, [f.name]) self.assertEqual(result.exit_code, 0) - self.assertRegex(result.output, r'^swh:\d+:cnt:') + self.assertRegex(result.output, r"^swh:\d+:cnt:") def test_auto_directory(self): """automatic object type detection: directory""" - with tempfile.TemporaryDirectory(prefix='swh.model.cli') as dirname: + with tempfile.TemporaryDirectory(prefix="swh.model.cli") as dirname: result = self.runner.invoke(cli.identify, [dirname]) self.assertEqual(result.exit_code, 0) - self.assertRegex(result.output, r'^swh:\d+:dir:') + self.assertRegex(result.output, r"^swh:\d+:dir:") def test_auto_origin(self): """automatic object type detection: origin""" - result = self.runner.invoke(cli.identify, - ['https://github.com/torvalds/linux']) + result = self.runner.invoke(cli.identify, ["https://github.com/torvalds/linux"]) self.assertEqual(result.exit_code, 0) - self.assertRegex(result.output, r'^swh:\d+:ori:') + self.assertRegex(result.output, r"^swh:\d+:ori:") def test_verify_content(self): """identifier verification""" self.make_contents(self.tmpdir_name) for filename, content in self.contents.items(): - expected_id = 'swh:1:cnt:' + hash_to_hex(content['sha1_git']) + expected_id = "swh:1:cnt:" + hash_to_hex(content["sha1_git"]) # match path = os.path.join(self.tmpdir_name, filename) - result = self.runner.invoke(cli.identify, - ['--verify', expected_id, path]) + result = self.runner.invoke(cli.identify, ["--verify", expected_id, path]) self.assertEqual(result.exit_code, 0) # mismatch - with open(path, 'a') as f: - f.write('trailing garbage to make verification fail') - result = self.runner.invoke(cli.identify, - ['--verify', expected_id, path]) + with open(path, "a") as f: + f.write("trailing garbage to make verification fail") + result = self.runner.invoke(cli.identify, ["--verify", expected_id, path]) self.assertEqual(result.exit_code, 1) diff --git a/swh/model/tests/test_from_disk.py b/swh/model/tests/test_from_disk.py index d9881a1529e46c71f5484607dab5cb68b0803251..0ebf30a1f372102563c28cb625dee1178e6e1c87 100644 --- a/swh/model/tests/test_from_disk.py +++ b/swh/model/tests/test_from_disk.py @@ -12,13 +12,11 @@ import unittest from typing import ClassVar, Optional from swh.model import from_disk -from swh.model.from_disk import ( - Content, DentryPerms, Directory, DiskBackedContent -) +from swh.model.from_disk import Content, DentryPerms, Directory, DiskBackedContent from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex from swh.model import model -TEST_DATA = os.path.join(os.path.dirname(__file__), 'data') +TEST_DATA = os.path.join(os.path.dirname(__file__), "data") class ModeToPerms(unittest.TestCase): @@ -54,38 +52,62 @@ class ModeToPerms(unittest.TestCase): class TestDiskBackedContent(unittest.TestCase): def test_with_data(self): expected_content = model.Content( - length=42, status='visible', data=b'foo bar', - sha1=b'foo', sha1_git=b'bar', sha256=b'baz', blake2s256=b'qux') - with tempfile.NamedTemporaryFile(mode='w+b') as fd: + length=42, + status="visible", + data=b"foo bar", + sha1=b"foo", + sha1_git=b"bar", + sha256=b"baz", + blake2s256=b"qux", + ) + with tempfile.NamedTemporaryFile(mode="w+b") as fd: content = DiskBackedContent( - length=42, status='visible', path=fd.name, - sha1=b'foo', sha1_git=b'bar', sha256=b'baz', blake2s256=b'qux') - fd.write(b'foo bar') + length=42, + status="visible", + path=fd.name, + sha1=b"foo", + sha1_git=b"bar", + sha256=b"baz", + blake2s256=b"qux", + ) + fd.write(b"foo bar") fd.seek(0) content_with_data = content.with_data() assert expected_content == content_with_data def test_lazy_data(self): - with tempfile.NamedTemporaryFile(mode='w+b') as fd: - fd.write(b'foo') + with tempfile.NamedTemporaryFile(mode="w+b") as fd: + fd.write(b"foo") fd.seek(0) content = DiskBackedContent( - length=42, status='visible', path=fd.name, - sha1=b'foo', sha1_git=b'bar', sha256=b'baz', blake2s256=b'qux') - fd.write(b'bar') + length=42, + status="visible", + path=fd.name, + sha1=b"foo", + sha1_git=b"bar", + sha256=b"baz", + blake2s256=b"qux", + ) + fd.write(b"bar") fd.seek(0) content_with_data = content.with_data() - fd.write(b'baz') + fd.write(b"baz") fd.seek(0) - assert content_with_data.data == b'bar' + assert content_with_data.data == b"bar" def test_with_data_cannot_read(self): - with tempfile.NamedTemporaryFile(mode='w+b') as fd: + with tempfile.NamedTemporaryFile(mode="w+b") as fd: content = DiskBackedContent( - length=42, status='visible', path=fd.name, - sha1=b'foo', sha1_git=b'bar', sha256=b'baz', blake2s256=b'qux') + length=42, + status="visible", + path=fd.name, + sha1=b"foo", + sha1_git=b"bar", + sha256=b"baz", + blake2s256=b"qux", + ) with pytest.raises(OSError): content.with_data() @@ -93,361 +115,329 @@ class TestDiskBackedContent(unittest.TestCase): def test_missing_path(self): with pytest.raises(TypeError): DiskBackedContent( - length=42, status='visible', - sha1=b'foo', sha1_git=b'bar', sha256=b'baz', blake2s256=b'qux') + length=42, + status="visible", + sha1=b"foo", + sha1_git=b"bar", + sha256=b"baz", + blake2s256=b"qux", + ) with pytest.raises(TypeError): DiskBackedContent( - length=42, status='visible', path=None, - sha1=b'foo', sha1_git=b'bar', sha256=b'baz', blake2s256=b'qux') + length=42, + status="visible", + path=None, + sha1=b"foo", + sha1_git=b"bar", + sha256=b"baz", + blake2s256=b"qux", + ) class DataMixin: maxDiff = None # type: ClassVar[Optional[int]] def setUp(self): - self.tmpdir = tempfile.TemporaryDirectory( - prefix='swh.model.from_disk' - ) + self.tmpdir = tempfile.TemporaryDirectory(prefix="swh.model.from_disk") self.tmpdir_name = os.fsencode(self.tmpdir.name) self.contents = { - b'file': { - 'data': b'42\n', - 'sha1': hash_to_bytes( - '34973274ccef6ab4dfaaf86599792fa9c3fe4689' - ), - 'sha256': hash_to_bytes( - '084c799cd551dd1d8d5c5f9a5d593b2e' - '931f5e36122ee5c793c1d08a19839cc0' - ), - 'sha1_git': hash_to_bytes( - 'd81cc0710eb6cf9efd5b920a8453e1e07157b6cd'), - 'blake2s256': hash_to_bytes( - 'd5fe1939576527e42cfd76a9455a2432' - 'fe7f56669564577dd93c4280e76d661d' - ), - 'length': 3, - 'mode': 0o100644 + b"file": { + "data": b"42\n", + "sha1": hash_to_bytes("34973274ccef6ab4dfaaf86599792fa9c3fe4689"), + "sha256": hash_to_bytes( + "084c799cd551dd1d8d5c5f9a5d593b2e" + "931f5e36122ee5c793c1d08a19839cc0" + ), + "sha1_git": hash_to_bytes("d81cc0710eb6cf9efd5b920a8453e1e07157b6cd"), + "blake2s256": hash_to_bytes( + "d5fe1939576527e42cfd76a9455a2432" + "fe7f56669564577dd93c4280e76d661d" + ), + "length": 3, + "mode": 0o100644, }, } self.symlinks = { - b'symlink': { - 'data': b'target', - 'blake2s256': hash_to_bytes( - '595d221b30fdd8e10e2fdf18376e688e' - '9f18d56fd9b6d1eb6a822f8c146c6da6' - ), - 'sha1': hash_to_bytes( - '0e8a3ad980ec179856012b7eecf4327e99cd44cd' - ), - 'sha1_git': hash_to_bytes( - '1de565933b05f74c75ff9a6520af5f9f8a5a2f1d' - ), - 'sha256': hash_to_bytes( - '34a04005bcaf206eec990bd9637d9fdb' - '6725e0a0c0d4aebf003f17f4c956eb5c' - ), - 'length': 6, - 'perms': DentryPerms.symlink, + b"symlink": { + "data": b"target", + "blake2s256": hash_to_bytes( + "595d221b30fdd8e10e2fdf18376e688e" + "9f18d56fd9b6d1eb6a822f8c146c6da6" + ), + "sha1": hash_to_bytes("0e8a3ad980ec179856012b7eecf4327e99cd44cd"), + "sha1_git": hash_to_bytes("1de565933b05f74c75ff9a6520af5f9f8a5a2f1d"), + "sha256": hash_to_bytes( + "34a04005bcaf206eec990bd9637d9fdb" + "6725e0a0c0d4aebf003f17f4c956eb5c" + ), + "length": 6, + "perms": DentryPerms.symlink, } } self.specials = { - b'fifo': os.mkfifo, + b"fifo": os.mkfifo, } self.empty_content = { - 'data': b'', - 'length': 0, - 'blake2s256': hash_to_bytes( - '69217a3079908094e11121d042354a7c' - '1f55b6482ca1a51e1b250dfd1ed0eef9' + "data": b"", + "length": 0, + "blake2s256": hash_to_bytes( + "69217a3079908094e11121d042354a7c" "1f55b6482ca1a51e1b250dfd1ed0eef9" ), - 'sha1': hash_to_bytes( - 'da39a3ee5e6b4b0d3255bfef95601890afd80709' + "sha1": hash_to_bytes("da39a3ee5e6b4b0d3255bfef95601890afd80709"), + "sha1_git": hash_to_bytes("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391"), + "sha256": hash_to_bytes( + "e3b0c44298fc1c149afbf4c8996fb924" "27ae41e4649b934ca495991b7852b855" ), - 'sha1_git': hash_to_bytes( - 'e69de29bb2d1d6434b8b29ae775ad8c2e48c5391' - ), - 'sha256': hash_to_bytes( - 'e3b0c44298fc1c149afbf4c8996fb924' - '27ae41e4649b934ca495991b7852b855' - ), - 'perms': DentryPerms.content, + "perms": DentryPerms.content, } self.empty_directory = { - 'id': hash_to_bytes( - '4b825dc642cb6eb9a060e54bf8d69288fbee4904' - ), - 'entries': [], + "id": hash_to_bytes("4b825dc642cb6eb9a060e54bf8d69288fbee4904"), + "entries": [], } # Generated with generate_testdata_from_disk self.tarball_contents = { - b'': { - 'entries': [{ - 'name': b'bar', - 'perms': DentryPerms.directory, - 'target': hash_to_bytes( - '3c1f578394f4623f74a0ba7fe761729f59fc6ec4' - ), - 'type': 'dir', - }, { - 'name': b'empty-folder', - 'perms': DentryPerms.directory, - 'target': hash_to_bytes( - '4b825dc642cb6eb9a060e54bf8d69288fbee4904' - ), - 'type': 'dir', - }, { - 'name': b'foo', - 'perms': DentryPerms.directory, - 'target': hash_to_bytes( - '2b41c40f0d1fbffcba12497db71fba83fcca96e5' - ), - 'type': 'dir', - }, { - 'name': b'link-to-another-quote', - 'perms': DentryPerms.symlink, - 'target': hash_to_bytes( - '7d5c08111e21c8a9f71540939998551683375fad' - ), - 'type': 'file', - }, { - 'name': b'link-to-binary', - 'perms': DentryPerms.symlink, - 'target': hash_to_bytes( - 'e86b45e538d9b6888c969c89fbd22a85aa0e0366' - ), - 'type': 'file', - }, { - 'name': b'link-to-foo', - 'perms': DentryPerms.symlink, - 'target': hash_to_bytes( - '19102815663d23f8b75a47e7a01965dcdc96468c' - ), - 'type': 'file', - }, { - 'name': b'some-binary', - 'perms': DentryPerms.executable_content, - 'target': hash_to_bytes( - '68769579c3eaadbe555379b9c3538e6628bae1eb' - ), - 'type': 'file', - }], - 'id': hash_to_bytes( - 'e8b0f1466af8608c8a3fb9879db172b887e80759' - ), + b"": { + "entries": [ + { + "name": b"bar", + "perms": DentryPerms.directory, + "target": hash_to_bytes( + "3c1f578394f4623f74a0ba7fe761729f59fc6ec4" + ), + "type": "dir", + }, + { + "name": b"empty-folder", + "perms": DentryPerms.directory, + "target": hash_to_bytes( + "4b825dc642cb6eb9a060e54bf8d69288fbee4904" + ), + "type": "dir", + }, + { + "name": b"foo", + "perms": DentryPerms.directory, + "target": hash_to_bytes( + "2b41c40f0d1fbffcba12497db71fba83fcca96e5" + ), + "type": "dir", + }, + { + "name": b"link-to-another-quote", + "perms": DentryPerms.symlink, + "target": hash_to_bytes( + "7d5c08111e21c8a9f71540939998551683375fad" + ), + "type": "file", + }, + { + "name": b"link-to-binary", + "perms": DentryPerms.symlink, + "target": hash_to_bytes( + "e86b45e538d9b6888c969c89fbd22a85aa0e0366" + ), + "type": "file", + }, + { + "name": b"link-to-foo", + "perms": DentryPerms.symlink, + "target": hash_to_bytes( + "19102815663d23f8b75a47e7a01965dcdc96468c" + ), + "type": "file", + }, + { + "name": b"some-binary", + "perms": DentryPerms.executable_content, + "target": hash_to_bytes( + "68769579c3eaadbe555379b9c3538e6628bae1eb" + ), + "type": "file", + }, + ], + "id": hash_to_bytes("e8b0f1466af8608c8a3fb9879db172b887e80759"), }, - b'bar': { - 'entries': [{ - 'name': b'barfoo', - 'perms': DentryPerms.directory, - 'target': hash_to_bytes( - 'c3020f6bf135a38c6df3afeb5fb38232c5e07087' - ), - 'type': 'dir', - }], - 'id': hash_to_bytes( - '3c1f578394f4623f74a0ba7fe761729f59fc6ec4' - ), + b"bar": { + "entries": [ + { + "name": b"barfoo", + "perms": DentryPerms.directory, + "target": hash_to_bytes( + "c3020f6bf135a38c6df3afeb5fb38232c5e07087" + ), + "type": "dir", + } + ], + "id": hash_to_bytes("3c1f578394f4623f74a0ba7fe761729f59fc6ec4"), }, - b'bar/barfoo': { - 'entries': [{ - 'name': b'another-quote.org', - 'perms': DentryPerms.content, - 'target': hash_to_bytes( - '133693b125bad2b4ac318535b84901ebb1f6b638' - ), - 'type': 'file', - }], - 'id': hash_to_bytes( - 'c3020f6bf135a38c6df3afeb5fb38232c5e07087' - ), + b"bar/barfoo": { + "entries": [ + { + "name": b"another-quote.org", + "perms": DentryPerms.content, + "target": hash_to_bytes( + "133693b125bad2b4ac318535b84901ebb1f6b638" + ), + "type": "file", + } + ], + "id": hash_to_bytes("c3020f6bf135a38c6df3afeb5fb38232c5e07087"), }, - b'bar/barfoo/another-quote.org': { - 'blake2s256': hash_to_bytes( - 'd26c1cad82d43df0bffa5e7be11a60e3' - '4adb85a218b433cbce5278b10b954fe8' - ), - 'length': 72, - 'perms': DentryPerms.content, - 'sha1': hash_to_bytes( - '90a6138ba59915261e179948386aa1cc2aa9220a' - ), - 'sha1_git': hash_to_bytes( - '133693b125bad2b4ac318535b84901ebb1f6b638' - ), - 'sha256': hash_to_bytes( - '3db5ae168055bcd93a4d08285dc99ffe' - 'e2883303b23fac5eab850273a8ea5546' + b"bar/barfoo/another-quote.org": { + "blake2s256": hash_to_bytes( + "d26c1cad82d43df0bffa5e7be11a60e3" + "4adb85a218b433cbce5278b10b954fe8" + ), + "length": 72, + "perms": DentryPerms.content, + "sha1": hash_to_bytes("90a6138ba59915261e179948386aa1cc2aa9220a"), + "sha1_git": hash_to_bytes("133693b125bad2b4ac318535b84901ebb1f6b638"), + "sha256": hash_to_bytes( + "3db5ae168055bcd93a4d08285dc99ffe" + "e2883303b23fac5eab850273a8ea5546" ), }, - b'empty-folder': { - 'entries': [], - 'id': hash_to_bytes( - '4b825dc642cb6eb9a060e54bf8d69288fbee4904' - ), + b"empty-folder": { + "entries": [], + "id": hash_to_bytes("4b825dc642cb6eb9a060e54bf8d69288fbee4904"), }, - b'foo': { - 'entries': [{ - 'name': b'barfoo', - 'perms': DentryPerms.symlink, - 'target': hash_to_bytes( - '8185dfb2c0c2c597d16f75a8a0c37668567c3d7e' - ), - 'type': 'file', - }, { - 'name': b'quotes.md', - 'perms': DentryPerms.content, - 'target': hash_to_bytes( - '7c4c57ba9ff496ad179b8f65b1d286edbda34c9a' - ), - 'type': 'file', - }, { - 'name': b'rel-link-to-barfoo', - 'perms': DentryPerms.symlink, - 'target': hash_to_bytes( - 'acac326ddd63b0bc70840659d4ac43619484e69f' - ), - 'type': 'file', - }], - 'id': hash_to_bytes( - '2b41c40f0d1fbffcba12497db71fba83fcca96e5' - ), + b"foo": { + "entries": [ + { + "name": b"barfoo", + "perms": DentryPerms.symlink, + "target": hash_to_bytes( + "8185dfb2c0c2c597d16f75a8a0c37668567c3d7e" + ), + "type": "file", + }, + { + "name": b"quotes.md", + "perms": DentryPerms.content, + "target": hash_to_bytes( + "7c4c57ba9ff496ad179b8f65b1d286edbda34c9a" + ), + "type": "file", + }, + { + "name": b"rel-link-to-barfoo", + "perms": DentryPerms.symlink, + "target": hash_to_bytes( + "acac326ddd63b0bc70840659d4ac43619484e69f" + ), + "type": "file", + }, + ], + "id": hash_to_bytes("2b41c40f0d1fbffcba12497db71fba83fcca96e5"), }, - b'foo/barfoo': { - 'blake2s256': hash_to_bytes( - 'e1252f2caa4a72653c4efd9af871b62b' - 'f2abb7bb2f1b0e95969204bd8a70d4cd' - ), - 'data': b'bar/barfoo', - 'length': 10, - 'perms': DentryPerms.symlink, - 'sha1': hash_to_bytes( - '9057ee6d0162506e01c4d9d5459a7add1fedac37' - ), - 'sha1_git': hash_to_bytes( - '8185dfb2c0c2c597d16f75a8a0c37668567c3d7e' - ), - 'sha256': hash_to_bytes( - '29ad3f5725321b940332c78e403601af' - 'ff61daea85e9c80b4a7063b6887ead68' + b"foo/barfoo": { + "blake2s256": hash_to_bytes( + "e1252f2caa4a72653c4efd9af871b62b" + "f2abb7bb2f1b0e95969204bd8a70d4cd" + ), + "data": b"bar/barfoo", + "length": 10, + "perms": DentryPerms.symlink, + "sha1": hash_to_bytes("9057ee6d0162506e01c4d9d5459a7add1fedac37"), + "sha1_git": hash_to_bytes("8185dfb2c0c2c597d16f75a8a0c37668567c3d7e"), + "sha256": hash_to_bytes( + "29ad3f5725321b940332c78e403601af" + "ff61daea85e9c80b4a7063b6887ead68" ), }, - b'foo/quotes.md': { - 'blake2s256': hash_to_bytes( - 'bf7ce4fe304378651ee6348d3e9336ed' - '5ad603d33e83c83ba4e14b46f9b8a80b' - ), - 'length': 66, - 'perms': DentryPerms.content, - 'sha1': hash_to_bytes( - '1bf0bb721ac92c18a19b13c0eb3d741cbfadebfc' - ), - 'sha1_git': hash_to_bytes( - '7c4c57ba9ff496ad179b8f65b1d286edbda34c9a' - ), - 'sha256': hash_to_bytes( - 'caca942aeda7b308859eb56f909ec96d' - '07a499491690c453f73b9800a93b1659' + b"foo/quotes.md": { + "blake2s256": hash_to_bytes( + "bf7ce4fe304378651ee6348d3e9336ed" + "5ad603d33e83c83ba4e14b46f9b8a80b" + ), + "length": 66, + "perms": DentryPerms.content, + "sha1": hash_to_bytes("1bf0bb721ac92c18a19b13c0eb3d741cbfadebfc"), + "sha1_git": hash_to_bytes("7c4c57ba9ff496ad179b8f65b1d286edbda34c9a"), + "sha256": hash_to_bytes( + "caca942aeda7b308859eb56f909ec96d" + "07a499491690c453f73b9800a93b1659" ), }, - b'foo/rel-link-to-barfoo': { - 'blake2s256': hash_to_bytes( - 'd9c327421588a1cf61f316615005a2e9' - 'c13ac3a4e96d43a24138d718fa0e30db' - ), - 'data': b'../bar/barfoo', - 'length': 13, - 'perms': DentryPerms.symlink, - 'sha1': hash_to_bytes( - 'dc51221d308f3aeb2754db48391b85687c2869f4' - ), - 'sha1_git': hash_to_bytes( - 'acac326ddd63b0bc70840659d4ac43619484e69f' - ), - 'sha256': hash_to_bytes( - '8007d20db2af40435f42ddef4b8ad76b' - '80adbec26b249fdf0473353f8d99df08' + b"foo/rel-link-to-barfoo": { + "blake2s256": hash_to_bytes( + "d9c327421588a1cf61f316615005a2e9" + "c13ac3a4e96d43a24138d718fa0e30db" + ), + "data": b"../bar/barfoo", + "length": 13, + "perms": DentryPerms.symlink, + "sha1": hash_to_bytes("dc51221d308f3aeb2754db48391b85687c2869f4"), + "sha1_git": hash_to_bytes("acac326ddd63b0bc70840659d4ac43619484e69f"), + "sha256": hash_to_bytes( + "8007d20db2af40435f42ddef4b8ad76b" + "80adbec26b249fdf0473353f8d99df08" ), }, - b'link-to-another-quote': { - 'blake2s256': hash_to_bytes( - '2d0e73cea01ba949c1022dc10c8a43e6' - '6180639662e5dc2737b843382f7b1910' - ), - 'data': b'bar/barfoo/another-quote.org', - 'length': 28, - 'perms': DentryPerms.symlink, - 'sha1': hash_to_bytes( - 'cbeed15e79599c90de7383f420fed7acb48ea171' - ), - 'sha1_git': hash_to_bytes( - '7d5c08111e21c8a9f71540939998551683375fad' - ), - 'sha256': hash_to_bytes( - 'e6e17d0793aa750a0440eb9ad5b80b25' - '8076637ef0fb68f3ac2e59e4b9ac3ba6' + b"link-to-another-quote": { + "blake2s256": hash_to_bytes( + "2d0e73cea01ba949c1022dc10c8a43e6" + "6180639662e5dc2737b843382f7b1910" + ), + "data": b"bar/barfoo/another-quote.org", + "length": 28, + "perms": DentryPerms.symlink, + "sha1": hash_to_bytes("cbeed15e79599c90de7383f420fed7acb48ea171"), + "sha1_git": hash_to_bytes("7d5c08111e21c8a9f71540939998551683375fad"), + "sha256": hash_to_bytes( + "e6e17d0793aa750a0440eb9ad5b80b25" + "8076637ef0fb68f3ac2e59e4b9ac3ba6" ), }, - b'link-to-binary': { - 'blake2s256': hash_to_bytes( - '9ce18b1adecb33f891ca36664da676e1' - '2c772cc193778aac9a137b8dc5834b9b' - ), - 'data': b'some-binary', - 'length': 11, - 'perms': DentryPerms.symlink, - 'sha1': hash_to_bytes( - 'd0248714948b3a48a25438232a6f99f0318f59f1' - ), - 'sha1_git': hash_to_bytes( - 'e86b45e538d9b6888c969c89fbd22a85aa0e0366' - ), - 'sha256': hash_to_bytes( - '14126e97d83f7d261c5a6889cee73619' - '770ff09e40c5498685aba745be882eff' + b"link-to-binary": { + "blake2s256": hash_to_bytes( + "9ce18b1adecb33f891ca36664da676e1" + "2c772cc193778aac9a137b8dc5834b9b" + ), + "data": b"some-binary", + "length": 11, + "perms": DentryPerms.symlink, + "sha1": hash_to_bytes("d0248714948b3a48a25438232a6f99f0318f59f1"), + "sha1_git": hash_to_bytes("e86b45e538d9b6888c969c89fbd22a85aa0e0366"), + "sha256": hash_to_bytes( + "14126e97d83f7d261c5a6889cee73619" + "770ff09e40c5498685aba745be882eff" ), }, - b'link-to-foo': { - 'blake2s256': hash_to_bytes( - '08d6cad88075de8f192db097573d0e82' - '9411cd91eb6ec65e8fc16c017edfdb74' - ), - 'data': b'foo', - 'length': 3, - 'perms': DentryPerms.symlink, - 'sha1': hash_to_bytes( - '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33' - ), - 'sha1_git': hash_to_bytes( - '19102815663d23f8b75a47e7a01965dcdc96468c' - ), - 'sha256': hash_to_bytes( - '2c26b46b68ffc68ff99b453c1d304134' - '13422d706483bfa0f98a5e886266e7ae' + b"link-to-foo": { + "blake2s256": hash_to_bytes( + "08d6cad88075de8f192db097573d0e82" + "9411cd91eb6ec65e8fc16c017edfdb74" + ), + "data": b"foo", + "length": 3, + "perms": DentryPerms.symlink, + "sha1": hash_to_bytes("0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33"), + "sha1_git": hash_to_bytes("19102815663d23f8b75a47e7a01965dcdc96468c"), + "sha256": hash_to_bytes( + "2c26b46b68ffc68ff99b453c1d304134" + "13422d706483bfa0f98a5e886266e7ae" ), }, - b'some-binary': { - 'blake2s256': hash_to_bytes( - '922e0f7015035212495b090c27577357' - 'a740ddd77b0b9e0cd23b5480c07a18c6' - ), - 'length': 5, - 'perms': DentryPerms.executable_content, - 'sha1': hash_to_bytes( - '0bbc12d7f4a2a15b143da84617d95cb223c9b23c' - ), - 'sha1_git': hash_to_bytes( - '68769579c3eaadbe555379b9c3538e6628bae1eb' - ), - 'sha256': hash_to_bytes( - 'bac650d34a7638bb0aeb5342646d24e3' - 'b9ad6b44c9b383621faa482b990a367d' + b"some-binary": { + "blake2s256": hash_to_bytes( + "922e0f7015035212495b090c27577357" + "a740ddd77b0b9e0cd23b5480c07a18c6" + ), + "length": 5, + "perms": DentryPerms.executable_content, + "sha1": hash_to_bytes("0bbc12d7f4a2a15b143da84617d95cb223c9b23c"), + "sha1_git": hash_to_bytes("68769579c3eaadbe555379b9c3538e6628bae1eb"), + "sha256": hash_to_bytes( + "bac650d34a7638bb0aeb5342646d24e3" + "b9ad6b44c9b383621faa482b990a367d" ), }, } @@ -455,28 +445,27 @@ class DataMixin: def tearDown(self): self.tmpdir.cleanup() - def assertContentEqual(self, left, right, *, # noqa - check_path=False): + def assertContentEqual(self, left, right, *, check_path=False): # noqa if not isinstance(left, Content): - raise ValueError('%s is not a Content' % left) + raise ValueError("%s is not a Content" % left) if isinstance(right, Content): right = right.get_data() # Compare dictionaries keys = DEFAULT_ALGORITHMS | { - 'length', - 'perms', + "length", + "perms", } if check_path: - keys |= {'path'} + keys |= {"path"} failed = [] for key in keys: try: lvalue = left.data[key] - if key == 'perms' and 'perms' not in right: - rvalue = from_disk.mode_to_perms(right['mode']) + if key == "perms" and "perms" not in right: + rvalue = from_disk.mode_to_perms(right["mode"]) else: rvalue = right[key] except KeyError: @@ -488,36 +477,35 @@ class DataMixin: if failed: raise self.failureException( - 'Content mismatched:\n' + - '\n'.join( - 'content[%s] = %r != %r' % ( - key, left.data.get(key), right.get(key)) + "Content mismatched:\n" + + "\n".join( + "content[%s] = %r != %r" % (key, left.data.get(key), right.get(key)) for key in failed ) ) def assertDirectoryEqual(self, left, right): # NoQA if not isinstance(left, Directory): - raise ValueError('%s is not a Directory' % left) + raise ValueError("%s is not a Directory" % left) if isinstance(right, Directory): right = right.get_data() - assert left.entries == right['entries'] - assert left.hash == right['id'] + assert left.entries == right["entries"] + assert left.hash == right["id"] assert left.to_model() == model.Directory.from_dict(right) def make_contents(self, directory): for filename, content in self.contents.items(): path = os.path.join(directory, filename) - with open(path, 'wb') as f: - f.write(content['data']) - os.chmod(path, content['mode']) + with open(path, "wb") as f: + f.write(content["data"]) + os.chmod(path, content["mode"]) def make_symlinks(self, directory): for filename, symlink in self.symlinks.items(): path = os.path.join(directory, filename) - os.symlink(symlink['data'], path) + os.symlink(symlink["data"], path) def make_specials(self, directory): for filename, fn in self.specials.items(): @@ -525,9 +513,9 @@ class DataMixin: fn(path) def make_from_tarball(self, directory): - tarball = os.path.join(TEST_DATA, 'dir-folders', 'sample-folder.tgz') + tarball = os.path.join(TEST_DATA, "dir-folders", "sample-folder.tgz") - with tarfile.open(tarball, 'r:gz') as f: + with tarfile.open(tarball, "r:gz") as f: f.extractall(os.fsdecode(directory)) @@ -537,8 +525,9 @@ class TestContent(DataMixin, unittest.TestCase): def test_data_to_content(self): for filename, content in self.contents.items(): - conv_content = Content.from_bytes(mode=content['mode'], - data=content['data']) + conv_content = Content.from_bytes( + mode=content["mode"], data=content["data"] + ) self.assertContentEqual(conv_content, content) self.assertIn(hash_to_hex(conv_content.hash), repr(conv_content)) @@ -559,13 +548,12 @@ class SymlinkToContent(DataMixin, unittest.TestCase): for filename, symlink in self.symlinks.items(): path = os.path.join(self.tmpdir_name, filename) perms = 0o120000 - model_content = \ - Content.from_symlink(path=path, mode=perms).to_model() + model_content = Content.from_symlink(path=path, mode=perms).to_model() right = symlink.copy() - for key in ('perms', 'path', 'mode'): + for key in ("perms", "path", "mode"): right.pop(key, None) - right['status'] = 'visible' + right["status"] = "visible" assert model_content == model.Content.from_dict(right) @@ -594,7 +582,7 @@ class FileToContent(DataMixin, unittest.TestCase): conv_content = Content.from_file(path=path) self.assertContentEqual(conv_content, self.empty_content) - for path in ['/dev/null', '/dev/zero']: + for path in ["/dev/null", "/dev/zero"]: path = os.path.join(self.tmpdir_name, filename) conv_content = Content.from_file(path=path) self.assertContentEqual(conv_content, self.empty_content) @@ -605,9 +593,9 @@ class FileToContent(DataMixin, unittest.TestCase): model_content = Content.from_file(path=path).to_model() right = symlink.copy() - for key in ('perms', 'path', 'mode'): + for key in ("perms", "path", "mode"): right.pop(key, None) - right['status'] = 'visible' + right["status"] = "visible" assert model_content == model.Content.from_dict(right) def test_file_to_content_model(self): @@ -616,12 +604,12 @@ class FileToContent(DataMixin, unittest.TestCase): model_content = Content.from_file(path=path).to_model() right = content.copy() - for key in ('perms', 'mode'): + for key in ("perms", "mode"): right.pop(key, None) assert model_content.with_data() == model.Content.from_dict(right) - right['path'] = path - del right['data'] + right["path"] = path + del right["data"] assert model_content == DiskBackedContent.from_dict(right) def test_special_to_content_model(self): @@ -630,18 +618,18 @@ class FileToContent(DataMixin, unittest.TestCase): model_content = Content.from_file(path=path).to_model() right = self.empty_content.copy() - for key in ('perms', 'path', 'mode'): + for key in ("perms", "path", "mode"): right.pop(key, None) - right['status'] = 'visible' + right["status"] = "visible" assert model_content == model.Content.from_dict(right) - for path in ['/dev/null', '/dev/zero']: + for path in ["/dev/null", "/dev/zero"]: model_content = Content.from_file(path=path).to_model() right = self.empty_content.copy() - for key in ('perms', 'path', 'mode'): + for key in ("perms", "path", "mode"): right.pop(key, None) - right['status'] = 'visible' + right["status"] = "visible" assert model_content == model.Content.from_dict(right) def test_symlink_max_length(self): @@ -649,15 +637,15 @@ class FileToContent(DataMixin, unittest.TestCase): for filename, symlink in self.symlinks.items(): path = os.path.join(self.tmpdir_name, filename) content = Content.from_file(path=path) - if content.data['length'] > max_content_length: - with pytest.raises(Exception, match='too large'): + if content.data["length"] > max_content_length: + with pytest.raises(Exception, match="too large"): Content.from_file( - path=path, - max_content_length=max_content_length) + path=path, max_content_length=max_content_length + ) else: limited_content = Content.from_file( - path=path, - max_content_length=max_content_length) + path=path, max_content_length=max_content_length + ) assert content == limited_content def test_file_max_length(self): @@ -666,16 +654,15 @@ class FileToContent(DataMixin, unittest.TestCase): path = os.path.join(self.tmpdir_name, filename) content = Content.from_file(path=path) limited_content = Content.from_file( - path=path, - max_content_length=max_content_length) - assert content.data['length'] == limited_content.data['length'] - assert content.data['status'] == 'visible' - if content.data['length'] > max_content_length: - assert limited_content.data['status'] == 'absent' - assert limited_content.data['reason'] \ - == 'Content too large' + path=path, max_content_length=max_content_length + ) + assert content.data["length"] == limited_content.data["length"] + assert content.data["status"] == "visible" + if content.data["length"] > max_content_length: + assert limited_content.data["status"] == "absent" + assert limited_content.data["reason"] == "Content too large" else: - assert limited_content.data['status'] == 'visible' + assert limited_content.data["status"] == "visible" def test_special_file_max_length(self): for max_content_length in [None, 0, 1]: @@ -683,182 +670,171 @@ class FileToContent(DataMixin, unittest.TestCase): path = os.path.join(self.tmpdir_name, filename) content = Content.from_file(path=path) limited_content = Content.from_file( - path=path, - max_content_length=max_content_length) + path=path, max_content_length=max_content_length + ) assert limited_content == content def test_file_to_content_with_path(self): for filename, content in self.contents.items(): content_w_path = content.copy() path = os.path.join(self.tmpdir_name, filename) - content_w_path['path'] = path + content_w_path["path"] = path conv_content = Content.from_file(path=path) - self.assertContentEqual(conv_content, content_w_path, - check_path=True) + self.assertContentEqual(conv_content, content_w_path, check_path=True) @pytest.mark.fs class DirectoryToObjects(DataMixin, unittest.TestCase): def setUp(self): super().setUp() - contents = os.path.join(self.tmpdir_name, b'contents') + contents = os.path.join(self.tmpdir_name, b"contents") os.mkdir(contents) self.make_contents(contents) - symlinks = os.path.join(self.tmpdir_name, b'symlinks') + symlinks = os.path.join(self.tmpdir_name, b"symlinks") os.mkdir(symlinks) self.make_symlinks(symlinks) - specials = os.path.join(self.tmpdir_name, b'specials') + specials = os.path.join(self.tmpdir_name, b"specials") os.mkdir(specials) self.make_specials(specials) - empties = os.path.join(self.tmpdir_name, b'empty1', b'empty2') + empties = os.path.join(self.tmpdir_name, b"empty1", b"empty2") os.makedirs(empties) def test_directory_to_objects(self): directory = Directory.from_disk(path=self.tmpdir_name) for name, value in self.contents.items(): - self.assertContentEqual(directory[b'contents/' + name], value) + self.assertContentEqual(directory[b"contents/" + name], value) for name, value in self.symlinks.items(): - self.assertContentEqual(directory[b'symlinks/' + name], value) + self.assertContentEqual(directory[b"symlinks/" + name], value) for name in self.specials: self.assertContentEqual( - directory[b'specials/' + name], - self.empty_content, + directory[b"specials/" + name], self.empty_content, ) self.assertEqual( - directory[b'empty1/empty2'].get_data(), - self.empty_directory, + directory[b"empty1/empty2"].get_data(), self.empty_directory, ) # Raise on non existent file with self.assertRaisesRegex(KeyError, "b'nonexistent'"): - directory[b'empty1/nonexistent'] + directory[b"empty1/nonexistent"] # Raise on non existent directory with self.assertRaisesRegex(KeyError, "b'nonexistentdir'"): - directory[b'nonexistentdir/file'] + directory[b"nonexistentdir/file"] objs = directory.collect() - self.assertCountEqual(['content', 'directory'], objs) + self.assertCountEqual(["content", "directory"], objs) - self.assertEqual(len(objs['directory']), 6) - self.assertEqual(len(objs['content']), - len(self.contents) - + len(self.symlinks) - + 1) + self.assertEqual(len(objs["directory"]), 6) + self.assertEqual( + len(objs["content"]), len(self.contents) + len(self.symlinks) + 1 + ) def test_directory_to_objects_ignore_empty(self): directory = Directory.from_disk( - path=self.tmpdir_name, - dir_filter=from_disk.ignore_empty_directories + path=self.tmpdir_name, dir_filter=from_disk.ignore_empty_directories ) for name, value in self.contents.items(): - self.assertContentEqual(directory[b'contents/' + name], value) + self.assertContentEqual(directory[b"contents/" + name], value) for name, value in self.symlinks.items(): - self.assertContentEqual(directory[b'symlinks/' + name], value) + self.assertContentEqual(directory[b"symlinks/" + name], value) for name in self.specials: self.assertContentEqual( - directory[b'specials/' + name], - self.empty_content, + directory[b"specials/" + name], self.empty_content, ) # empty directories have been ignored recursively with self.assertRaisesRegex(KeyError, "b'empty1'"): - directory[b'empty1'] + directory[b"empty1"] with self.assertRaisesRegex(KeyError, "b'empty1'"): - directory[b'empty1/empty2'] + directory[b"empty1/empty2"] objs = directory.collect() - self.assertCountEqual(['content', 'directory'], objs) + self.assertCountEqual(["content", "directory"], objs) - self.assertEqual(len(objs['directory']), 4) - self.assertEqual(len(objs['content']), - len(self.contents) - + len(self.symlinks) - + 1) + self.assertEqual(len(objs["directory"]), 4) + self.assertEqual( + len(objs["content"]), len(self.contents) + len(self.symlinks) + 1 + ) def test_directory_to_objects_ignore_name(self): directory = Directory.from_disk( path=self.tmpdir_name, - dir_filter=from_disk.ignore_named_directories([b'symlinks']) + dir_filter=from_disk.ignore_named_directories([b"symlinks"]), ) for name, value in self.contents.items(): - self.assertContentEqual(directory[b'contents/' + name], value) + self.assertContentEqual(directory[b"contents/" + name], value) for name in self.specials: self.assertContentEqual( - directory[b'specials/' + name], - self.empty_content, + directory[b"specials/" + name], self.empty_content, ) self.assertEqual( - directory[b'empty1/empty2'].get_data(), - self.empty_directory, + directory[b"empty1/empty2"].get_data(), self.empty_directory, ) with self.assertRaisesRegex(KeyError, "b'symlinks'"): - directory[b'symlinks'] + directory[b"symlinks"] objs = directory.collect() - self.assertCountEqual(['content', 'directory'], objs) + self.assertCountEqual(["content", "directory"], objs) - self.assertEqual(len(objs['directory']), 5) - self.assertEqual(len(objs['content']), - len(self.contents) - + 1) + self.assertEqual(len(objs["directory"]), 5) + self.assertEqual(len(objs["content"]), len(self.contents) + 1) def test_directory_to_objects_ignore_name_case(self): directory = Directory.from_disk( path=self.tmpdir_name, - dir_filter=from_disk.ignore_named_directories([b'symLiNks'], - case_sensitive=False) + dir_filter=from_disk.ignore_named_directories( + [b"symLiNks"], case_sensitive=False + ), ) for name, value in self.contents.items(): - self.assertContentEqual(directory[b'contents/' + name], value) + self.assertContentEqual(directory[b"contents/" + name], value) for name in self.specials: self.assertContentEqual( - directory[b'specials/' + name], - self.empty_content, + directory[b"specials/" + name], self.empty_content, ) self.assertEqual( - directory[b'empty1/empty2'].get_data(), - self.empty_directory, + directory[b"empty1/empty2"].get_data(), self.empty_directory, ) with self.assertRaisesRegex(KeyError, "b'symlinks'"): - directory[b'symlinks'] + directory[b"symlinks"] objs = directory.collect() - self.assertCountEqual(['content', 'directory'], objs) + self.assertCountEqual(["content", "directory"], objs) - self.assertEqual(len(objs['directory']), 5) - self.assertEqual(len(objs['content']), - len(self.contents) - + 1) + self.assertEqual(len(objs["directory"]), 5) + self.assertEqual(len(objs["content"]), len(self.contents) + 1) def test_directory_entry_order(self): with tempfile.TemporaryDirectory() as dirname: dirname = os.fsencode(dirname) - open(os.path.join(dirname, b'foo.'), 'a') - open(os.path.join(dirname, b'foo0'), 'a') - os.mkdir(os.path.join(dirname, b'foo')) + open(os.path.join(dirname, b"foo."), "a") + open(os.path.join(dirname, b"foo0"), "a") + os.mkdir(os.path.join(dirname, b"foo")) directory = Directory.from_disk(path=dirname) - assert [entry['name'] for entry in directory.entries] \ - == [b'foo.', b'foo', b'foo0'] + assert [entry["name"] for entry in directory.entries] == [ + b"foo.", + b"foo", + b"foo0", + ] @pytest.mark.fs @@ -869,7 +845,7 @@ class TarballTest(DataMixin, unittest.TestCase): def test_contents_match(self): directory = Directory.from_disk( - path=os.path.join(self.tmpdir_name, b'sample-folder') + path=os.path.join(self.tmpdir_name, b"sample-folder") ) for name, expected in self.tarball_contents.items(): @@ -879,47 +855,47 @@ class TarballTest(DataMixin, unittest.TestCase): elif isinstance(obj, Directory): self.assertDirectoryEqual(obj, expected) else: - raise self.failureException('Unknown type for %s' % obj) + raise self.failureException("Unknown type for %s" % obj) class DirectoryManipulation(DataMixin, unittest.TestCase): def test_directory_access_nested(self): d = Directory() - d[b'a'] = Directory() - d[b'a/b'] = Directory() + d[b"a"] = Directory() + d[b"a/b"] = Directory() - self.assertEqual(d[b'a/b'].get_data(), self.empty_directory) + self.assertEqual(d[b"a/b"].get_data(), self.empty_directory) def test_directory_del_nested(self): d = Directory() - d[b'a'] = Directory() - d[b'a/b'] = Directory() + d[b"a"] = Directory() + d[b"a/b"] = Directory() with self.assertRaisesRegex(KeyError, "b'c'"): - del d[b'a/b/c'] + del d[b"a/b/c"] with self.assertRaisesRegex(KeyError, "b'level2'"): - del d[b'a/level2/c'] + del d[b"a/level2/c"] - del d[b'a/b'] + del d[b"a/b"] - self.assertEqual(d[b'a'].get_data(), self.empty_directory) + self.assertEqual(d[b"a"].get_data(), self.empty_directory) def test_directory_access_self(self): d = Directory() - self.assertIs(d, d[b'']) - self.assertIs(d, d[b'/']) - self.assertIs(d, d[b'//']) + self.assertIs(d, d[b""]) + self.assertIs(d, d[b"/"]) + self.assertIs(d, d[b"//"]) def test_directory_access_wrong_type(self): d = Directory() - with self.assertRaisesRegex(ValueError, 'bytes from Directory'): - d['foo'] - with self.assertRaisesRegex(ValueError, 'bytes from Directory'): + with self.assertRaisesRegex(ValueError, "bytes from Directory"): + d["foo"] + with self.assertRaisesRegex(ValueError, "bytes from Directory"): d[42] def test_directory_repr(self): - entries = [b'a', b'b', b'c'] + entries = [b"a", b"b", b"c"] d = Directory() for entry in entries: d[entry] = Directory() @@ -932,32 +908,32 @@ class DirectoryManipulation(DataMixin, unittest.TestCase): def test_directory_set_wrong_type_name(self): d = Directory() - with self.assertRaisesRegex(ValueError, 'bytes Directory entry'): - d['foo'] = Directory() - with self.assertRaisesRegex(ValueError, 'bytes Directory entry'): + with self.assertRaisesRegex(ValueError, "bytes Directory entry"): + d["foo"] = Directory() + with self.assertRaisesRegex(ValueError, "bytes Directory entry"): d[42] = Directory() def test_directory_set_nul_in_name(self): d = Directory() - with self.assertRaisesRegex(ValueError, 'nul bytes'): - d[b'\x00\x01'] = Directory() + with self.assertRaisesRegex(ValueError, "nul bytes"): + d[b"\x00\x01"] = Directory() def test_directory_set_empty_name(self): d = Directory() - with self.assertRaisesRegex(ValueError, 'must have a name'): - d[b''] = Directory() - with self.assertRaisesRegex(ValueError, 'must have a name'): - d[b'/'] = Directory() + with self.assertRaisesRegex(ValueError, "must have a name"): + d[b""] = Directory() + with self.assertRaisesRegex(ValueError, "must have a name"): + d[b"/"] = Directory() def test_directory_set_wrong_type(self): d = Directory() - with self.assertRaisesRegex(ValueError, 'Content or Directory'): - d[b'entry'] = object() + with self.assertRaisesRegex(ValueError, "Content or Directory"): + d[b"entry"] = object() def test_directory_del_wrong_type(self): d = Directory() - with self.assertRaisesRegex(ValueError, 'bytes Directory entry'): - del d['foo'] - with self.assertRaisesRegex(ValueError, 'bytes Directory entry'): + with self.assertRaisesRegex(ValueError, "bytes Directory entry"): + del d["foo"] + with self.assertRaisesRegex(ValueError, "bytes Directory entry"): del d[42] diff --git a/swh/model/tests/test_generate_testdata.py b/swh/model/tests/test_generate_testdata.py index 56fff657e77a11e61edf710b4828302bcaf3cd60..aa9c8af305cff8cc6108853f6b01ceafcad3c8c5 100644 --- a/swh/model/tests/test_generate_testdata.py +++ b/swh/model/tests/test_generate_testdata.py @@ -28,7 +28,7 @@ def test_gen_origins_default(): def test_gen_origins_max(): nmax = len(ORIGINS) - origins = gen_origins(nmax+1) + origins = gen_origins(nmax + 1) assert len(origins) == nmax models = {Origin.from_dict(d).url for d in origins} # ensure we did not generate the same origin twice diff --git a/swh/model/tests/test_hashutil.py b/swh/model/tests/test_hashutil.py index abdff979f5d01b80615f79da6cf986001dc11cb0..ff99cf242e128d3326f4e480dd197a1021250549 100644 --- a/swh/model/tests/test_hashutil.py +++ b/swh/model/tests/test_hashutil.py @@ -19,19 +19,18 @@ class BaseHashutil(unittest.TestCase): # Reset function cache hashutil._blake2_hash_cache = {} - self.data = b'1984\n' + self.data = b"1984\n" self.hex_checksums = { - 'sha1': '62be35bf00ff0c624f4a621e2ea5595a049e0731', - 'sha1_git': '568aaf43d83b2c3df8067f3bedbb97d83260be6d', - 'sha256': '26602113b4b9afd9d55466b08580d3c2' - '4a9b50ee5b5866c0d91fab0e65907311', - 'blake2s256': '63cfb259e1fdb485bc5c55749697a6b21ef31fb7445f6c78a' - 'c9422f9f2dc8906', + "sha1": "62be35bf00ff0c624f4a621e2ea5595a049e0731", + "sha1_git": "568aaf43d83b2c3df8067f3bedbb97d83260be6d", + "sha256": "26602113b4b9afd9d55466b08580d3c2" + "4a9b50ee5b5866c0d91fab0e65907311", + "blake2s256": "63cfb259e1fdb485bc5c55749697a6b21ef31fb7445f6c78a" + "c9422f9f2dc8906", } self.checksums = { - type: bytes.fromhex(cksum) - for type, cksum in self.hex_checksums.items() + type: bytes.fromhex(cksum) for type, cksum in self.hex_checksums.items() } self.bytehex_checksums = { @@ -40,15 +39,14 @@ class BaseHashutil(unittest.TestCase): } self.git_hex_checksums = { - 'blob': self.hex_checksums['sha1_git'], - 'tree': '5b2e883aa33d2efab98442693ea4dd5f1b8871b0', - 'commit': '79e4093542e72f0fcb7cbd75cb7d270f9254aa8f', - 'tag': 'd6bf62466f287b4d986c545890716ce058bddf67', + "blob": self.hex_checksums["sha1_git"], + "tree": "5b2e883aa33d2efab98442693ea4dd5f1b8871b0", + "commit": "79e4093542e72f0fcb7cbd75cb7d270f9254aa8f", + "tag": "d6bf62466f287b4d986c545890716ce058bddf67", } self.git_checksums = { - type: bytes.fromhex(cksum) - for type, cksum in self.git_hex_checksums.items() + type: bytes.fromhex(cksum) for type, cksum in self.git_hex_checksums.items() } @@ -56,24 +54,24 @@ class MultiHashTest(BaseHashutil): def test_multi_hash_data(self): checksums = MultiHash.from_data(self.data).digest() self.assertEqual(checksums, self.checksums) - self.assertFalse('length' in checksums) + self.assertFalse("length" in checksums) def test_multi_hash_data_with_length(self): expected_checksums = self.checksums.copy() - expected_checksums['length'] = len(self.data) + expected_checksums["length"] = len(self.data) - algos = set(['length']).union(hashutil.DEFAULT_ALGORITHMS) + algos = set(["length"]).union(hashutil.DEFAULT_ALGORITHMS) checksums = MultiHash.from_data(self.data, hash_names=algos).digest() self.assertEqual(checksums, expected_checksums) - self.assertTrue('length' in checksums) + self.assertTrue("length" in checksums) def test_multi_hash_data_unknown_hash(self): with self.assertRaises(ValueError) as cm: - MultiHash.from_data(self.data, ['unknown-hash']) + MultiHash.from_data(self.data, ["unknown-hash"]) - self.assertIn('Unexpected hashing algorithm', cm.exception.args[0]) - self.assertIn('unknown-hash', cm.exception.args[0]) + self.assertIn("Unexpected hashing algorithm", cm.exception.args[0]) + self.assertIn("unknown-hash", cm.exception.args[0]) def test_multi_hash_file(self): fobj = io.BytesIO(self.data) @@ -96,9 +94,9 @@ class MultiHashTest(BaseHashutil): def test_multi_hash_file_missing_length(self): fobj = io.BytesIO(self.data) with self.assertRaises(ValueError) as cm: - MultiHash.from_file(fobj, hash_names=['sha1_git']) + MultiHash.from_file(fobj, hash_names=["sha1_git"]) - self.assertIn('Missing length', cm.exception.args[0]) + self.assertIn("Missing length", cm.exception.args[0]) def test_multi_hash_path(self): with tempfile.NamedTemporaryFile(delete=False) as f: @@ -111,7 +109,6 @@ class MultiHashTest(BaseHashutil): class Hashutil(BaseHashutil): - def test_hash_git_data(self): checksums = { git_type: hashutil.hash_git_data(self.data, git_type) @@ -122,10 +119,10 @@ class Hashutil(BaseHashutil): def test_hash_git_data_unknown_git_type(self): with self.assertRaises(ValueError) as cm: - hashutil.hash_git_data(self.data, 'unknown-git-type') + hashutil.hash_git_data(self.data, "unknown-git-type") - self.assertIn('Unexpected git object type', cm.exception.args[0]) - self.assertIn('unknown-git-type', cm.exception.args[0]) + self.assertIn("Unexpected git object type", cm.exception.args[0]) + self.assertIn("unknown-git-type", cm.exception.args[0]) def test_hash_to_hex(self): for type in self.checksums: @@ -143,192 +140,199 @@ class Hashutil(BaseHashutil): def test_hash_to_bytehex(self): for algo in self.checksums: - self.assertEqual(self.hex_checksums[algo].encode('ascii'), - hashutil.hash_to_bytehex(self.checksums[algo])) + self.assertEqual( + self.hex_checksums[algo].encode("ascii"), + hashutil.hash_to_bytehex(self.checksums[algo]), + ) def test_bytehex_to_hash(self): for algo in self.checksums: - self.assertEqual(self.checksums[algo], - hashutil.bytehex_to_hash( - self.hex_checksums[algo].encode())) + self.assertEqual( + self.checksums[algo], + hashutil.bytehex_to_hash(self.hex_checksums[algo].encode()), + ) def test_new_hash_unsupported_hashing_algorithm(self): try: - hashutil._new_hash('blake2:10') + hashutil._new_hash("blake2:10") except ValueError as e: - self.assertEqual(str(e), - 'Unexpected hashing algorithm blake2:10, ' - 'expected one of blake2b512, blake2s256, ' - 'sha1, sha1_git, sha256') - - @patch('hashlib.new') + self.assertEqual( + str(e), + "Unexpected hashing algorithm blake2:10, " + "expected one of blake2b512, blake2s256, " + "sha1, sha1_git, sha256", + ) + + @patch("hashlib.new") def test_new_hash_blake2b_blake2b512_builtin(self, mock_hashlib_new): - if 'blake2b512' not in hashlib.algorithms_available: - self.skipTest('blake2b512 not built-in') + if "blake2b512" not in hashlib.algorithms_available: + self.skipTest("blake2b512 not built-in") mock_hashlib_new.return_value = sentinel = object() - h = hashutil._new_hash('blake2b512') + h = hashutil._new_hash("blake2b512") self.assertIs(h, sentinel) - mock_hashlib_new.assert_called_with('blake2b512') + mock_hashlib_new.assert_called_with("blake2b512") - @patch('hashlib.new') + @patch("hashlib.new") def test_new_hash_blake2s_blake2s256_builtin(self, mock_hashlib_new): - if 'blake2s256' not in hashlib.algorithms_available: - self.skipTest('blake2s256 not built-in') + if "blake2s256" not in hashlib.algorithms_available: + self.skipTest("blake2s256 not built-in") mock_hashlib_new.return_value = sentinel = object() - h = hashutil._new_hash('blake2s256') + h = hashutil._new_hash("blake2s256") self.assertIs(h, sentinel) - mock_hashlib_new.assert_called_with('blake2s256') + mock_hashlib_new.assert_called_with("blake2s256") def test_new_hash_blake2b_builtin(self): removed_hash = False try: - if 'blake2b512' in hashlib.algorithms_available: + if "blake2b512" in hashlib.algorithms_available: removed_hash = True - hashlib.algorithms_available.remove('blake2b512') - if 'blake2b' not in hashlib.algorithms_available: - self.skipTest('blake2b not built in') + hashlib.algorithms_available.remove("blake2b512") + if "blake2b" not in hashlib.algorithms_available: + self.skipTest("blake2b not built in") - with patch('hashlib.blake2b') as mock_blake2b: + with patch("hashlib.blake2b") as mock_blake2b: mock_blake2b.return_value = sentinel = object() - h = hashutil._new_hash('blake2b512') + h = hashutil._new_hash("blake2b512") self.assertIs(h, sentinel) - mock_blake2b.assert_called_with(digest_size=512//8) + mock_blake2b.assert_called_with(digest_size=512 // 8) finally: if removed_hash: - hashlib.algorithms_available.add('blake2b512') + hashlib.algorithms_available.add("blake2b512") def test_new_hash_blake2s_builtin(self): removed_hash = False try: - if 'blake2s256' in hashlib.algorithms_available: + if "blake2s256" in hashlib.algorithms_available: removed_hash = True - hashlib.algorithms_available.remove('blake2s256') - if 'blake2s' not in hashlib.algorithms_available: - self.skipTest('blake2s not built in') + hashlib.algorithms_available.remove("blake2s256") + if "blake2s" not in hashlib.algorithms_available: + self.skipTest("blake2s not built in") - with patch('hashlib.blake2s') as mock_blake2s: + with patch("hashlib.blake2s") as mock_blake2s: mock_blake2s.return_value = sentinel = object() - h = hashutil._new_hash('blake2s256') + h = hashutil._new_hash("blake2s256") self.assertIs(h, sentinel) - mock_blake2s.assert_called_with(digest_size=256//8) + mock_blake2s.assert_called_with(digest_size=256 // 8) finally: if removed_hash: - hashlib.algorithms_available.add('blake2s256') + hashlib.algorithms_available.add("blake2s256") def test_new_hash_blake2b_pyblake2(self): - if 'blake2b512' in hashlib.algorithms_available: - self.skipTest('blake2b512 built in') - if 'blake2b' in hashlib.algorithms_available: - self.skipTest('blake2b built in') + if "blake2b512" in hashlib.algorithms_available: + self.skipTest("blake2b512 built in") + if "blake2b" in hashlib.algorithms_available: + self.skipTest("blake2b built in") - with patch('pyblake2.blake2b') as mock_blake2b: + with patch("pyblake2.blake2b") as mock_blake2b: mock_blake2b.return_value = sentinel = object() - h = hashutil._new_hash('blake2b512') + h = hashutil._new_hash("blake2b512") self.assertIs(h, sentinel) - mock_blake2b.assert_called_with(digest_size=512//8) + mock_blake2b.assert_called_with(digest_size=512 // 8) def test_new_hash_blake2s_pyblake2(self): - if 'blake2s256' in hashlib.algorithms_available: - self.skipTest('blake2s256 built in') - if 'blake2s' in hashlib.algorithms_available: - self.skipTest('blake2s built in') + if "blake2s256" in hashlib.algorithms_available: + self.skipTest("blake2s256 built in") + if "blake2s" in hashlib.algorithms_available: + self.skipTest("blake2s built in") - with patch('pyblake2.blake2s') as mock_blake2s: + with patch("pyblake2.blake2s") as mock_blake2s: mock_blake2s.return_value = sentinel = object() - h = hashutil._new_hash('blake2s256') + h = hashutil._new_hash("blake2s256") self.assertIs(h, sentinel) - mock_blake2s.assert_called_with(digest_size=256//8) + mock_blake2s.assert_called_with(digest_size=256 // 8) class HashlibGit(unittest.TestCase): - def setUp(self): - self.blob_data = b'42\n' - - self.tree_data = b''.join([b'40000 barfoo\0', - bytes.fromhex('c3020f6bf135a38c6df' - '3afeb5fb38232c5e07087'), - b'100644 blah\0', - bytes.fromhex('63756ef0df5e4f10b6efa' - '33cfe5c758749615f20'), - b'100644 hello\0', - bytes.fromhex('907b308167f0880fb2a' - '5c0e1614bb0c7620f9dc3')]) - - self.commit_data = """tree 1c61f7259dcb770f46b194d941df4f08ff0a3970 + self.blob_data = b"42\n" + + self.tree_data = b"".join( + [ + b"40000 barfoo\0", + bytes.fromhex("c3020f6bf135a38c6df" "3afeb5fb38232c5e07087"), + b"100644 blah\0", + bytes.fromhex("63756ef0df5e4f10b6efa" "33cfe5c758749615f20"), + b"100644 hello\0", + bytes.fromhex("907b308167f0880fb2a" "5c0e1614bb0c7620f9dc3"), + ] + ) + + self.commit_data = b"""\ +tree 1c61f7259dcb770f46b194d941df4f08ff0a3970 author Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com> 1444054085 +0200 committer Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com> 1444054085 +0200 initial -""".encode('utf-8') # NOQA +""" # noqa self.tag_data = """object 24d012aaec0bc5a4d2f62c56399053d6cc72a241 type commit tag 0.0.1 tagger Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com> 1444225145 +0200 blah -""".encode('utf-8') # NOQA +""".encode( + "utf-8" + ) # NOQA self.checksums = { - 'blob_sha1_git': bytes.fromhex('d81cc0710eb6cf9efd5b920a8453e1' - 'e07157b6cd'), - 'tree_sha1_git': bytes.fromhex('ac212302c45eada382b27bfda795db' - '121dacdb1c'), - 'commit_sha1_git': bytes.fromhex('e960570b2e6e2798fa4cfb9af2c399' - 'd629189653'), - 'tag_sha1_git': bytes.fromhex('bc2b99ba469987bcf1272c189ed534' - 'e9e959f120'), + "blob_sha1_git": bytes.fromhex( + "d81cc0710eb6cf9efd5b920a8453e1" "e07157b6cd" + ), + "tree_sha1_git": bytes.fromhex( + "ac212302c45eada382b27bfda795db" "121dacdb1c" + ), + "commit_sha1_git": bytes.fromhex( + "e960570b2e6e2798fa4cfb9af2c399" "d629189653" + ), + "tag_sha1_git": bytes.fromhex( + "bc2b99ba469987bcf1272c189ed534" "e9e959f120" + ), } def test_unknown_header_type(self): with self.assertRaises(ValueError) as cm: - hashutil.hash_git_data(b'any-data', 'some-unknown-type') + hashutil.hash_git_data(b"any-data", "some-unknown-type") - self.assertIn('Unexpected git object type', cm.exception.args[0]) + self.assertIn("Unexpected git object type", cm.exception.args[0]) def test_hashdata_content(self): # when - actual_hash = hashutil.hash_git_data(self.blob_data, git_type='blob') + actual_hash = hashutil.hash_git_data(self.blob_data, git_type="blob") # then - self.assertEqual(actual_hash, - self.checksums['blob_sha1_git']) + self.assertEqual(actual_hash, self.checksums["blob_sha1_git"]) def test_hashdata_tree(self): # when - actual_hash = hashutil.hash_git_data(self.tree_data, git_type='tree') + actual_hash = hashutil.hash_git_data(self.tree_data, git_type="tree") # then - self.assertEqual(actual_hash, - self.checksums['tree_sha1_git']) + self.assertEqual(actual_hash, self.checksums["tree_sha1_git"]) def test_hashdata_revision(self): # when - actual_hash = hashutil.hash_git_data(self.commit_data, - git_type='commit') + actual_hash = hashutil.hash_git_data(self.commit_data, git_type="commit") # then - self.assertEqual(actual_hash, - self.checksums['commit_sha1_git']) + self.assertEqual(actual_hash, self.checksums["commit_sha1_git"]) def test_hashdata_tag(self): # when - actual_hash = hashutil.hash_git_data(self.tag_data, git_type='tag') + actual_hash = hashutil.hash_git_data(self.tag_data, git_type="tag") # then - self.assertEqual(actual_hash, - self.checksums['tag_sha1_git']) + self.assertEqual(actual_hash, self.checksums["tag_sha1_git"]) diff --git a/swh/model/tests/test_hypothesis_strategies.py b/swh/model/tests/test_hypothesis_strategies.py index 76011843c6484bcd49605750179cf72510325d6b..b790f9a37f44568f5e55dfba46126a413c41c67d 100644 --- a/swh/model/tests/test_hypothesis_strategies.py +++ b/swh/model/tests/test_hypothesis_strategies.py @@ -9,14 +9,11 @@ import attr from hypothesis import given, settings from swh.model.hashutil import DEFAULT_ALGORITHMS -from swh.model.hypothesis_strategies import ( - objects, object_dicts, snapshots -) +from swh.model.hypothesis_strategies import objects, object_dicts, snapshots from swh.model.model import TargetType -target_types = ( - 'content', 'directory', 'revision', 'release', 'snapshot', 'alias') +target_types = ("content", "directory", "revision", "release", "snapshot", "alias") @given(objects()) @@ -35,8 +32,7 @@ def assert_nested_dict(obj): elif isinstance(obj, list): for value in obj: assert_nested_dict(value) - elif isinstance(obj, (int, float, str, bytes, bool, type(None), - datetime.datetime)): + elif isinstance(obj, (int, float, str, bytes, bool, type(None), datetime.datetime)): pass else: assert False, obj @@ -46,21 +42,21 @@ def assert_nested_dict(obj): def test_dicts_generation(obj_type_and_obj): (obj_type, object_) = obj_type_and_obj assert_nested_dict(object_) - if obj_type == 'content': - COMMON_KEYS = set(DEFAULT_ALGORITHMS) | {'length', 'status', 'ctime'} - if object_['status'] == 'visible': - assert set(object_) <= COMMON_KEYS | {'data'} - elif object_['status'] == 'absent': - assert set(object_) == COMMON_KEYS | {'reason'} - elif object_['status'] == 'hidden': - assert set(object_) <= COMMON_KEYS | {'data'} + if obj_type == "content": + COMMON_KEYS = set(DEFAULT_ALGORITHMS) | {"length", "status", "ctime"} + if object_["status"] == "visible": + assert set(object_) <= COMMON_KEYS | {"data"} + elif object_["status"] == "absent": + assert set(object_) == COMMON_KEYS | {"reason"} + elif object_["status"] == "hidden": + assert set(object_) <= COMMON_KEYS | {"data"} else: assert False, object_ - elif obj_type == 'release': - assert object_['target_type'] in target_types - elif obj_type == 'snapshot': - for branch in object_['branches'].values(): - assert branch is None or branch['target_type'] in target_types + elif obj_type == "release": + assert object_["target_type"] in target_types + elif obj_type == "snapshot": + for branch in object_["branches"].values(): + assert branch is None or branch["target_type"] in target_types @given(objects()) @@ -68,21 +64,21 @@ def test_model_to_dicts(obj_type_and_obj): (obj_type, object_) = obj_type_and_obj obj_dict = object_.to_dict() assert_nested_dict(obj_dict) - if obj_type == 'content': - COMMON_KEYS = set(DEFAULT_ALGORITHMS) | {'length', 'status', 'ctime'} - if obj_dict['status'] == 'visible': - assert set(obj_dict) == COMMON_KEYS | {'data'} - elif obj_dict['status'] == 'absent': - assert set(obj_dict) == COMMON_KEYS | {'reason'} - elif obj_dict['status'] == 'hidden': - assert set(obj_dict) == COMMON_KEYS | {'data'} + if obj_type == "content": + COMMON_KEYS = set(DEFAULT_ALGORITHMS) | {"length", "status", "ctime"} + if obj_dict["status"] == "visible": + assert set(obj_dict) == COMMON_KEYS | {"data"} + elif obj_dict["status"] == "absent": + assert set(obj_dict) == COMMON_KEYS | {"reason"} + elif obj_dict["status"] == "hidden": + assert set(obj_dict) == COMMON_KEYS | {"data"} else: assert False, obj_dict - elif obj_type == 'release': - assert obj_dict['target_type'] in target_types - elif obj_type == 'snapshot': - for branch in obj_dict['branches'].values(): - assert branch is None or branch['target_type'] in target_types + elif obj_type == "release": + assert obj_dict["target_type"] in target_types + elif obj_type == "snapshot": + for branch in obj_dict["branches"].values(): + assert branch is None or branch["target_type"] in target_types _min_snp_size = 10 @@ -111,8 +107,10 @@ def test_snapshots_strategy(snapshot): for alias in aliases: processed_alias = set() current_alias = alias - while (branches[current_alias] is not None - and branches[current_alias].target_type == TargetType.ALIAS): + while ( + branches[current_alias] is not None + and branches[current_alias].target_type == TargetType.ALIAS + ): assert branches[current_alias].target not in processed_alias processed_alias.add(current_alias) current_alias = branches[current_alias].target diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py index 8c6c70d1944cc86a85f50e3b1ee8638682483186..da0e2aa56ffe085737a4b2eda976afe6c404df2b 100644 --- a/swh/model/tests/test_identifiers.py +++ b/swh/model/tests/test_identifiers.py @@ -10,74 +10,68 @@ import unittest from swh.model import hashutil, identifiers from swh.model.exceptions import ValidationError from swh.model.hashutil import hash_to_bytes as _x -from swh.model.identifiers import (CONTENT, DIRECTORY, - RELEASE, REVISION, - SNAPSHOT, PersistentId) +from swh.model.identifiers import ( + CONTENT, + DIRECTORY, + RELEASE, + REVISION, + SNAPSHOT, + PersistentId, +) class UtilityFunctionsIdentifier(unittest.TestCase): def setUp(self): - self.str_id = 'c2e41aae41ac17bd4a650770d6ee77f62e52235b' + self.str_id = "c2e41aae41ac17bd4a650770d6ee77f62e52235b" self.bytes_id = binascii.unhexlify(self.str_id) self.bad_type_id = object() def test_identifier_to_bytes(self): for id in [self.str_id, self.bytes_id]: - self.assertEqual(identifiers.identifier_to_bytes(id), - self.bytes_id) + self.assertEqual(identifiers.identifier_to_bytes(id), self.bytes_id) # wrong length with self.assertRaises(ValueError) as cm: identifiers.identifier_to_bytes(id[:-2]) - self.assertIn('length', str(cm.exception)) + self.assertIn("length", str(cm.exception)) with self.assertRaises(ValueError) as cm: identifiers.identifier_to_bytes(self.bad_type_id) - self.assertIn('type', str(cm.exception)) + self.assertIn("type", str(cm.exception)) def test_identifier_to_str(self): for id in [self.str_id, self.bytes_id]: - self.assertEqual(identifiers.identifier_to_str(id), - self.str_id) + self.assertEqual(identifiers.identifier_to_str(id), self.str_id) # wrong length with self.assertRaises(ValueError) as cm: identifiers.identifier_to_str(id[:-2]) - self.assertIn('length', str(cm.exception)) + self.assertIn("length", str(cm.exception)) with self.assertRaises(ValueError) as cm: identifiers.identifier_to_str(self.bad_type_id) - self.assertIn('type', str(cm.exception)) + self.assertIn("type", str(cm.exception)) class UtilityFunctionsDateOffset(unittest.TestCase): def setUp(self): self.dates = { - b'1448210036': { - 'seconds': 1448210036, - 'microseconds': 0, - }, - b'1448210036.002342': { - 'seconds': 1448210036, - 'microseconds': 2342, - }, - b'1448210036.12': { - 'seconds': 1448210036, - 'microseconds': 120000, - } + b"1448210036": {"seconds": 1448210036, "microseconds": 0,}, + b"1448210036.002342": {"seconds": 1448210036, "microseconds": 2342,}, + b"1448210036.12": {"seconds": 1448210036, "microseconds": 120000,}, } self.broken_dates = [ 1448210036.12, ] self.offsets = { - 0: b'+0000', - -630: b'-1030', - 800: b'+1320', + 0: b"+0000", + -630: b"-1030", + 800: b"+1320", } def test_format_date(self): @@ -97,128 +91,126 @@ class UtilityFunctionsDateOffset(unittest.TestCase): class ContentIdentifier(unittest.TestCase): def setUp(self): self.content = { - 'status': 'visible', - 'length': 5, - 'data': b'1984\n', - 'ctime': datetime.datetime(2015, 11, 22, 16, 33, 56, - tzinfo=datetime.timezone.utc), + "status": "visible", + "length": 5, + "data": b"1984\n", + "ctime": datetime.datetime( + 2015, 11, 22, 16, 33, 56, tzinfo=datetime.timezone.utc + ), } - self.content_id = hashutil.MultiHash.from_data( - self.content['data']).digest() + self.content_id = hashutil.MultiHash.from_data(self.content["data"]).digest() def test_content_identifier(self): - self.assertEqual(identifiers.content_identifier(self.content), - self.content_id) + self.assertEqual(identifiers.content_identifier(self.content), self.content_id) directory_example = { - 'id': 'd7ed3d2c31d608823be58b1cbe57605310615231', - 'entries': [ + "id": "d7ed3d2c31d608823be58b1cbe57605310615231", + "entries": [ { - 'type': 'file', - 'perms': 33188, - 'name': b'README', - 'target': _x('37ec8ea2110c0b7a32fbb0e872f6e7debbf95e21') + "type": "file", + "perms": 33188, + "name": b"README", + "target": _x("37ec8ea2110c0b7a32fbb0e872f6e7debbf95e21"), }, { - 'type': 'file', - 'perms': 33188, - 'name': b'Rakefile', - 'target': _x('3bb0e8592a41ae3185ee32266c860714980dbed7') + "type": "file", + "perms": 33188, + "name": b"Rakefile", + "target": _x("3bb0e8592a41ae3185ee32266c860714980dbed7"), }, { - 'type': 'dir', - 'perms': 16384, - 'name': b'app', - 'target': _x('61e6e867f5d7ba3b40540869bc050b0c4fed9e95') + "type": "dir", + "perms": 16384, + "name": b"app", + "target": _x("61e6e867f5d7ba3b40540869bc050b0c4fed9e95"), }, { - 'type': 'file', - 'perms': 33188, - 'name': b'1.megabyte', - 'target': _x('7c2b2fbdd57d6765cdc9d84c2d7d333f11be7fb3') + "type": "file", + "perms": 33188, + "name": b"1.megabyte", + "target": _x("7c2b2fbdd57d6765cdc9d84c2d7d333f11be7fb3"), }, { - 'type': 'dir', - 'perms': 16384, - 'name': b'config', - 'target': _x('591dfe784a2e9ccc63aaba1cb68a765734310d98') + "type": "dir", + "perms": 16384, + "name": b"config", + "target": _x("591dfe784a2e9ccc63aaba1cb68a765734310d98"), }, { - 'type': 'dir', - 'perms': 16384, - 'name': b'public', - 'target': _x('9588bf4522c2b4648bfd1c61d175d1f88c1ad4a5') + "type": "dir", + "perms": 16384, + "name": b"public", + "target": _x("9588bf4522c2b4648bfd1c61d175d1f88c1ad4a5"), }, { - 'type': 'file', - 'perms': 33188, - 'name': b'development.sqlite3', - 'target': _x('e69de29bb2d1d6434b8b29ae775ad8c2e48c5391') + "type": "file", + "perms": 33188, + "name": b"development.sqlite3", + "target": _x("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391"), }, { - 'type': 'dir', - 'perms': 16384, - 'name': b'doc', - 'target': _x('154705c6aa1c8ead8c99c7915373e3c44012057f') + "type": "dir", + "perms": 16384, + "name": b"doc", + "target": _x("154705c6aa1c8ead8c99c7915373e3c44012057f"), }, { - 'type': 'dir', - 'perms': 16384, - 'name': b'db', - 'target': _x('85f157bdc39356b7bc7de9d0099b4ced8b3b382c') + "type": "dir", + "perms": 16384, + "name": b"db", + "target": _x("85f157bdc39356b7bc7de9d0099b4ced8b3b382c"), }, { - 'type': 'dir', - 'perms': 16384, - 'name': b'log', - 'target': _x('5e3d3941c51cce73352dff89c805a304ba96fffe') + "type": "dir", + "perms": 16384, + "name": b"log", + "target": _x("5e3d3941c51cce73352dff89c805a304ba96fffe"), }, { - 'type': 'dir', - 'perms': 16384, - 'name': b'script', - 'target': _x('1b278423caf176da3f3533592012502aa10f566c') + "type": "dir", + "perms": 16384, + "name": b"script", + "target": _x("1b278423caf176da3f3533592012502aa10f566c"), }, { - 'type': 'dir', - 'perms': 16384, - 'name': b'test', - 'target': _x('035f0437c080bfd8711670b3e8677e686c69c763') + "type": "dir", + "perms": 16384, + "name": b"test", + "target": _x("035f0437c080bfd8711670b3e8677e686c69c763"), }, { - 'type': 'dir', - 'perms': 16384, - 'name': b'vendor', - 'target': _x('7c0dc9ad978c1af3f9a4ce061e50f5918bd27138') + "type": "dir", + "perms": 16384, + "name": b"vendor", + "target": _x("7c0dc9ad978c1af3f9a4ce061e50f5918bd27138"), }, { - 'type': 'rev', - 'perms': 57344, - 'name': b'will_paginate', - 'target': _x('3d531e169db92a16a9a8974f0ae6edf52e52659e') + "type": "rev", + "perms": 57344, + "name": b"will_paginate", + "target": _x("3d531e169db92a16a9a8974f0ae6edf52e52659e"), }, - # in git order, the dir named "order" should be between the files # named "order." and "order0" { - 'type': 'dir', - 'perms': 16384, - 'name': b'order', - 'target': _x('62cdb7020ff920e5aa642c3d4066950dd1f01f4d') + "type": "dir", + "perms": 16384, + "name": b"order", + "target": _x("62cdb7020ff920e5aa642c3d4066950dd1f01f4d"), }, { - 'type': 'file', - 'perms': 16384, - 'name': b'order.', - 'target': _x('0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33') + "type": "file", + "perms": 16384, + "name": b"order.", + "target": _x("0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33"), }, { - 'type': 'file', - 'perms': 16384, - 'name': b'order0', - 'target': _x('bbe960a25ea311d21d40669e93df2003ba9b90a2') + "type": "file", + "perms": 16384, + "name": b"order0", + "target": _x("bbe960a25ea311d21d40669e93df2003ba9b90a2"), }, ], } @@ -229,57 +221,56 @@ class DirectoryIdentifier(unittest.TestCase): self.directory = directory_example self.empty_directory = { - 'id': '4b825dc642cb6eb9a060e54bf8d69288fbee4904', - 'entries': [], + "id": "4b825dc642cb6eb9a060e54bf8d69288fbee4904", + "entries": [], } def test_dir_identifier(self): self.assertEqual( - identifiers.directory_identifier(self.directory), - self.directory['id']) + identifiers.directory_identifier(self.directory), self.directory["id"] + ) def test_dir_identifier_entry_order(self): # Reverse order of entries, check the id is still the same. - directory = {'entries': reversed(self.directory['entries'])} + directory = {"entries": reversed(self.directory["entries"])} self.assertEqual( - identifiers.directory_identifier(directory), - self.directory['id']) + identifiers.directory_identifier(directory), self.directory["id"] + ) def test_dir_identifier_empty_directory(self): self.assertEqual( identifiers.directory_identifier(self.empty_directory), - self.empty_directory['id']) + self.empty_directory["id"], + ) linus_tz = datetime.timezone(datetime.timedelta(minutes=-420)) revision_example = { - 'id': 'bc0195aad0daa2ad5b0d76cce22b167bc3435590', - 'directory': _x('85a74718d377195e1efd0843ba4f3260bad4fe07'), - 'parents': [_x('01e2d0627a9a6edb24c37db45db5ecb31e9de808')], - 'author': { - 'name': b'Linus Torvalds', - 'email': b'torvalds@linux-foundation.org', - 'fullname': b'Linus Torvalds <torvalds@linux-foundation.org>' + "id": "bc0195aad0daa2ad5b0d76cce22b167bc3435590", + "directory": _x("85a74718d377195e1efd0843ba4f3260bad4fe07"), + "parents": [_x("01e2d0627a9a6edb24c37db45db5ecb31e9de808")], + "author": { + "name": b"Linus Torvalds", + "email": b"torvalds@linux-foundation.org", + "fullname": b"Linus Torvalds <torvalds@linux-foundation.org>", }, - 'date': datetime.datetime(2015, 7, 12, 15, 10, 30, - tzinfo=linus_tz), - 'committer': { - 'name': b'Linus Torvalds', - 'email': b'torvalds@linux-foundation.org', - 'fullname': b'Linus Torvalds <torvalds@linux-foundation.org>' + "date": datetime.datetime(2015, 7, 12, 15, 10, 30, tzinfo=linus_tz), + "committer": { + "name": b"Linus Torvalds", + "email": b"torvalds@linux-foundation.org", + "fullname": b"Linus Torvalds <torvalds@linux-foundation.org>", }, - 'committer_date': datetime.datetime(2015, 7, 12, 15, 10, 30, - tzinfo=linus_tz), - 'message': b'Linux 4.2-rc2\n', - 'type': 'git', - 'synthetic': False + "committer_date": datetime.datetime(2015, 7, 12, 15, 10, 30, tzinfo=linus_tz), + "message": b"Linux 4.2-rc2\n", + "type": "git", + "synthetic": False, } class RevisionIdentifier(unittest.TestCase): def setUp(self): - gpgsig = b'''\ + gpgsig = b"""\ -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.13 (Darwin) @@ -296,270 +287,236 @@ jdTswYL6+MUdL8sB9pZ82D+BP/YAdHe69CyTu1lk9RT2pYtI/kkfjHubXBCYEJSG lf1Qb5GDsQrZWgD+jtWTywOYHtCBwyCKSAXxSARMbNPeak9WPlcW/Jmu+fUcMe2x dg1KdHOa34shrKDaOVzW =od6m ------END PGP SIGNATURE-----''' +-----END PGP SIGNATURE-----""" self.revision = revision_example self.revision_none_metadata = { - 'id': 'bc0195aad0daa2ad5b0d76cce22b167bc3435590', - 'directory': _x('85a74718d377195e1efd0843ba4f3260bad4fe07'), - 'parents': [_x('01e2d0627a9a6edb24c37db45db5ecb31e9de808')], - 'author': { - 'name': b'Linus Torvalds', - 'email': b'torvalds@linux-foundation.org', - }, - 'date': datetime.datetime(2015, 7, 12, 15, 10, 30, - tzinfo=linus_tz), - 'committer': { - 'name': b'Linus Torvalds', - 'email': b'torvalds@linux-foundation.org', - }, - 'committer_date': datetime.datetime(2015, 7, 12, 15, 10, 30, - tzinfo=linus_tz), - 'message': b'Linux 4.2-rc2\n', - 'metadata': None, + "id": "bc0195aad0daa2ad5b0d76cce22b167bc3435590", + "directory": _x("85a74718d377195e1efd0843ba4f3260bad4fe07"), + "parents": [_x("01e2d0627a9a6edb24c37db45db5ecb31e9de808")], + "author": { + "name": b"Linus Torvalds", + "email": b"torvalds@linux-foundation.org", + }, + "date": datetime.datetime(2015, 7, 12, 15, 10, 30, tzinfo=linus_tz), + "committer": { + "name": b"Linus Torvalds", + "email": b"torvalds@linux-foundation.org", + }, + "committer_date": datetime.datetime( + 2015, 7, 12, 15, 10, 30, tzinfo=linus_tz + ), + "message": b"Linux 4.2-rc2\n", + "metadata": None, } self.synthetic_revision = { - 'id': b'\xb2\xa7\xe1&\x04\x92\xe3D\xfa\xb3\xcb\xf9\x1b\xc1<\x91' - b'\xe0T&\xfd', - 'author': { - 'name': b'Software Heritage', - 'email': b'robot@softwareheritage.org', - }, - 'date': { - 'timestamp': {'seconds': 1437047495}, - 'offset': 0, - 'negative_utc': False, - }, - 'type': 'tar', - 'committer': { - 'name': b'Software Heritage', - 'email': b'robot@softwareheritage.org', + "id": b"\xb2\xa7\xe1&\x04\x92\xe3D\xfa\xb3\xcb\xf9\x1b\xc1<\x91" + b"\xe0T&\xfd", + "author": { + "name": b"Software Heritage", + "email": b"robot@softwareheritage.org", + }, + "date": { + "timestamp": {"seconds": 1437047495}, + "offset": 0, + "negative_utc": False, + }, + "type": "tar", + "committer": { + "name": b"Software Heritage", + "email": b"robot@softwareheritage.org", + }, + "committer_date": 1437047495, + "synthetic": True, + "parents": [None], + "message": b"synthetic revision message\n", + "directory": b"\xd1\x1f\x00\xa6\xa0\xfe\xa6\x05SA\xd2U\x84\xb5\xa9" + b"e\x16\xc0\xd2\xb8", + "metadata": { + "original_artifact": [ + { + "archive_type": "tar", + "name": "gcc-5.2.0.tar.bz2", + "sha1_git": "39d281aff934d44b439730057e55b055e206a586", + "sha1": "fe3f5390949d47054b613edc36c557eb1d51c18e", + "sha256": "5f835b04b5f7dd4f4d2dc96190ec1621b8d89f" + "2dc6f638f9f8bc1b1014ba8cad", + } + ] }, - 'committer_date': 1437047495, - 'synthetic': True, - 'parents': [None], - 'message': b'synthetic revision message\n', - 'directory': b'\xd1\x1f\x00\xa6\xa0\xfe\xa6\x05SA\xd2U\x84\xb5\xa9' - b'e\x16\xc0\xd2\xb8', - 'metadata': {'original_artifact': [ - {'archive_type': 'tar', - 'name': 'gcc-5.2.0.tar.bz2', - 'sha1_git': '39d281aff934d44b439730057e55b055e206a586', - 'sha1': 'fe3f5390949d47054b613edc36c557eb1d51c18e', - 'sha256': '5f835b04b5f7dd4f4d2dc96190ec1621b8d89f' - '2dc6f638f9f8bc1b1014ba8cad'}]}, - } # cat commit.txt | git hash-object -t commit --stdin self.revision_with_extra_headers = { - 'id': '010d34f384fa99d047cdd5e2f41e56e5c2feee45', - 'directory': _x('85a74718d377195e1efd0843ba4f3260bad4fe07'), - 'parents': [_x('01e2d0627a9a6edb24c37db45db5ecb31e9de808')], - 'author': { - 'name': b'Linus Torvalds', - 'email': b'torvalds@linux-foundation.org', - 'fullname': b'Linus Torvalds <torvalds@linux-foundation.org>', - }, - 'date': datetime.datetime(2015, 7, 12, 15, 10, 30, - tzinfo=linus_tz), - 'committer': { - 'name': b'Linus Torvalds', - 'email': b'torvalds@linux-foundation.org', - 'fullname': b'Linus Torvalds <torvalds@linux-foundation.org>', - }, - 'committer_date': datetime.datetime(2015, 7, 12, 15, 10, 30, - tzinfo=linus_tz), - 'message': b'Linux 4.2-rc2\n', - 'metadata': { - 'extra_headers': [ - ['svn-repo-uuid', '046f1af7-66c2-d61b-5410-ce57b7db7bff'], - ['svn-revision', 10], + "id": "010d34f384fa99d047cdd5e2f41e56e5c2feee45", + "directory": _x("85a74718d377195e1efd0843ba4f3260bad4fe07"), + "parents": [_x("01e2d0627a9a6edb24c37db45db5ecb31e9de808")], + "author": { + "name": b"Linus Torvalds", + "email": b"torvalds@linux-foundation.org", + "fullname": b"Linus Torvalds <torvalds@linux-foundation.org>", + }, + "date": datetime.datetime(2015, 7, 12, 15, 10, 30, tzinfo=linus_tz), + "committer": { + "name": b"Linus Torvalds", + "email": b"torvalds@linux-foundation.org", + "fullname": b"Linus Torvalds <torvalds@linux-foundation.org>", + }, + "committer_date": datetime.datetime( + 2015, 7, 12, 15, 10, 30, tzinfo=linus_tz + ), + "message": b"Linux 4.2-rc2\n", + "metadata": { + "extra_headers": [ + ["svn-repo-uuid", "046f1af7-66c2-d61b-5410-ce57b7db7bff"], + ["svn-revision", 10], ] - } + }, } self.revision_with_gpgsig = { - 'id': '44cc742a8ca17b9c279be4cc195a93a6ef7a320e', - 'directory': _x('b134f9b7dc434f593c0bab696345548b37de0558'), - 'parents': [_x('689664ae944b4692724f13b709a4e4de28b54e57'), - _x('c888305e1efbaa252d01b4e5e6b778f865a97514')], - 'author': { - 'name': b'Jiang Xin', - 'email': b'worldhello.net@gmail.com', - 'fullname': b'Jiang Xin <worldhello.net@gmail.com>', - }, - 'date': { - 'timestamp': 1428538899, - 'offset': 480, - }, - 'committer': { - 'name': b'Jiang Xin', - 'email': b'worldhello.net@gmail.com', - }, - 'committer_date': { - 'timestamp': 1428538899, - 'offset': 480, - }, - 'metadata': { - 'extra_headers': [ - ['gpgsig', gpgsig], - ], - }, - 'message': b'''Merge branch 'master' of git://github.com/alexhenrie/git-po + "id": "44cc742a8ca17b9c279be4cc195a93a6ef7a320e", + "directory": _x("b134f9b7dc434f593c0bab696345548b37de0558"), + "parents": [ + _x("689664ae944b4692724f13b709a4e4de28b54e57"), + _x("c888305e1efbaa252d01b4e5e6b778f865a97514"), + ], + "author": { + "name": b"Jiang Xin", + "email": b"worldhello.net@gmail.com", + "fullname": b"Jiang Xin <worldhello.net@gmail.com>", + }, + "date": {"timestamp": 1428538899, "offset": 480,}, + "committer": {"name": b"Jiang Xin", "email": b"worldhello.net@gmail.com",}, + "committer_date": {"timestamp": 1428538899, "offset": 480,}, + "metadata": {"extra_headers": [["gpgsig", gpgsig],],}, + "message": b"""Merge branch 'master' of git://github.com/alexhenrie/git-po * 'master' of git://github.com/alexhenrie/git-po: l10n: ca.po: update translation -''' +""", } self.revision_no_message = { - 'id': '4cfc623c9238fa92c832beed000ce2d003fd8333', - 'directory': _x('b134f9b7dc434f593c0bab696345548b37de0558'), - 'parents': [_x('689664ae944b4692724f13b709a4e4de28b54e57'), - _x('c888305e1efbaa252d01b4e5e6b778f865a97514')], - 'author': { - 'name': b'Jiang Xin', - 'email': b'worldhello.net@gmail.com', - 'fullname': b'Jiang Xin <worldhello.net@gmail.com>', - }, - 'date': { - 'timestamp': 1428538899, - 'offset': 480, - }, - 'committer': { - 'name': b'Jiang Xin', - 'email': b'worldhello.net@gmail.com', - }, - 'committer_date': { - 'timestamp': 1428538899, - 'offset': 480, - }, - 'message': None, + "id": "4cfc623c9238fa92c832beed000ce2d003fd8333", + "directory": _x("b134f9b7dc434f593c0bab696345548b37de0558"), + "parents": [ + _x("689664ae944b4692724f13b709a4e4de28b54e57"), + _x("c888305e1efbaa252d01b4e5e6b778f865a97514"), + ], + "author": { + "name": b"Jiang Xin", + "email": b"worldhello.net@gmail.com", + "fullname": b"Jiang Xin <worldhello.net@gmail.com>", + }, + "date": {"timestamp": 1428538899, "offset": 480,}, + "committer": {"name": b"Jiang Xin", "email": b"worldhello.net@gmail.com",}, + "committer_date": {"timestamp": 1428538899, "offset": 480,}, + "message": None, } self.revision_empty_message = { - 'id': '7442cd78bd3b4966921d6a7f7447417b7acb15eb', - 'directory': _x('b134f9b7dc434f593c0bab696345548b37de0558'), - 'parents': [_x('689664ae944b4692724f13b709a4e4de28b54e57'), - _x('c888305e1efbaa252d01b4e5e6b778f865a97514')], - 'author': { - 'name': b'Jiang Xin', - 'email': b'worldhello.net@gmail.com', - 'fullname': b'Jiang Xin <worldhello.net@gmail.com>', - }, - 'date': { - 'timestamp': 1428538899, - 'offset': 480, - }, - 'committer': { - 'name': b'Jiang Xin', - 'email': b'worldhello.net@gmail.com', - }, - 'committer_date': { - 'timestamp': 1428538899, - 'offset': 480, - }, - 'message': b'', + "id": "7442cd78bd3b4966921d6a7f7447417b7acb15eb", + "directory": _x("b134f9b7dc434f593c0bab696345548b37de0558"), + "parents": [ + _x("689664ae944b4692724f13b709a4e4de28b54e57"), + _x("c888305e1efbaa252d01b4e5e6b778f865a97514"), + ], + "author": { + "name": b"Jiang Xin", + "email": b"worldhello.net@gmail.com", + "fullname": b"Jiang Xin <worldhello.net@gmail.com>", + }, + "date": {"timestamp": 1428538899, "offset": 480,}, + "committer": {"name": b"Jiang Xin", "email": b"worldhello.net@gmail.com",}, + "committer_date": {"timestamp": 1428538899, "offset": 480,}, + "message": b"", } self.revision_only_fullname = { - 'id': '010d34f384fa99d047cdd5e2f41e56e5c2feee45', - 'directory': _x('85a74718d377195e1efd0843ba4f3260bad4fe07'), - 'parents': [_x('01e2d0627a9a6edb24c37db45db5ecb31e9de808')], - 'author': { - 'fullname': b'Linus Torvalds <torvalds@linux-foundation.org>', - }, - 'date': datetime.datetime(2015, 7, 12, 15, 10, 30, - tzinfo=linus_tz), - 'committer': { - 'fullname': b'Linus Torvalds <torvalds@linux-foundation.org>', - }, - 'committer_date': datetime.datetime(2015, 7, 12, 15, 10, 30, - tzinfo=linus_tz), - 'message': b'Linux 4.2-rc2\n', - 'metadata': { - 'extra_headers': [ - ['svn-repo-uuid', '046f1af7-66c2-d61b-5410-ce57b7db7bff'], - ['svn-revision', 10], + "id": "010d34f384fa99d047cdd5e2f41e56e5c2feee45", + "directory": _x("85a74718d377195e1efd0843ba4f3260bad4fe07"), + "parents": [_x("01e2d0627a9a6edb24c37db45db5ecb31e9de808")], + "author": {"fullname": b"Linus Torvalds <torvalds@linux-foundation.org>",}, + "date": datetime.datetime(2015, 7, 12, 15, 10, 30, tzinfo=linus_tz), + "committer": { + "fullname": b"Linus Torvalds <torvalds@linux-foundation.org>", + }, + "committer_date": datetime.datetime( + 2015, 7, 12, 15, 10, 30, tzinfo=linus_tz + ), + "message": b"Linux 4.2-rc2\n", + "metadata": { + "extra_headers": [ + ["svn-repo-uuid", "046f1af7-66c2-d61b-5410-ce57b7db7bff"], + ["svn-revision", 10], ] - } + }, } def test_revision_identifier(self): self.assertEqual( identifiers.revision_identifier(self.revision), - identifiers.identifier_to_str(self.revision['id']), + identifiers.identifier_to_str(self.revision["id"]), ) def test_revision_identifier_none_metadata(self): self.assertEqual( identifiers.revision_identifier(self.revision_none_metadata), - identifiers.identifier_to_str(self.revision_none_metadata['id']), + identifiers.identifier_to_str(self.revision_none_metadata["id"]), ) def test_revision_identifier_synthetic(self): self.assertEqual( identifiers.revision_identifier(self.synthetic_revision), - identifiers.identifier_to_str(self.synthetic_revision['id']), + identifiers.identifier_to_str(self.synthetic_revision["id"]), ) def test_revision_identifier_with_extra_headers(self): self.assertEqual( - identifiers.revision_identifier( - self.revision_with_extra_headers), - identifiers.identifier_to_str( - self.revision_with_extra_headers['id']), + identifiers.revision_identifier(self.revision_with_extra_headers), + identifiers.identifier_to_str(self.revision_with_extra_headers["id"]), ) def test_revision_identifier_with_gpgsig(self): self.assertEqual( - identifiers.revision_identifier( - self.revision_with_gpgsig), - identifiers.identifier_to_str( - self.revision_with_gpgsig['id']), + identifiers.revision_identifier(self.revision_with_gpgsig), + identifiers.identifier_to_str(self.revision_with_gpgsig["id"]), ) def test_revision_identifier_no_message(self): self.assertEqual( - identifiers.revision_identifier( - self.revision_no_message), - identifiers.identifier_to_str( - self.revision_no_message['id']), + identifiers.revision_identifier(self.revision_no_message), + identifiers.identifier_to_str(self.revision_no_message["id"]), ) def test_revision_identifier_empty_message(self): self.assertEqual( - identifiers.revision_identifier( - self.revision_empty_message), - identifiers.identifier_to_str( - self.revision_empty_message['id']), + identifiers.revision_identifier(self.revision_empty_message), + identifiers.identifier_to_str(self.revision_empty_message["id"]), ) def test_revision_identifier_only_fullname(self): self.assertEqual( - identifiers.revision_identifier( - self.revision_only_fullname), - identifiers.identifier_to_str( - self.revision_only_fullname['id']), + identifiers.revision_identifier(self.revision_only_fullname), + identifiers.identifier_to_str(self.revision_only_fullname["id"]), ) release_example = { - 'id': '2b10839e32c4c476e9d94492756bb1a3e1ec4aa8', - 'target': b't\x1b"R\xa5\xe1Ml`\xa9\x13\xc7z`\x99\xab\xe7:\x85J', - 'target_type': 'revision', - 'name': b'v2.6.14', - 'author': { - 'name': b'Linus Torvalds', - 'email': b'torvalds@g5.osdl.org', - 'fullname': b'Linus Torvalds <torvalds@g5.osdl.org>' + "id": "2b10839e32c4c476e9d94492756bb1a3e1ec4aa8", + "target": b't\x1b"R\xa5\xe1Ml`\xa9\x13\xc7z`\x99\xab\xe7:\x85J', + "target_type": "revision", + "name": b"v2.6.14", + "author": { + "name": b"Linus Torvalds", + "email": b"torvalds@g5.osdl.org", + "fullname": b"Linus Torvalds <torvalds@g5.osdl.org>", }, - 'date': datetime.datetime(2005, 10, 27, 17, 2, 33, - tzinfo=linus_tz), - 'message': b'''\ + "date": datetime.datetime(2005, 10, 27, 17, 2, 33, tzinfo=linus_tz), + "message": b"""\ Linux 2.6.14 release -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.1 (GNU/Linux) @@ -568,8 +525,8 @@ iD8DBQBDYWq6F3YsRnbiHLsRAmaeAJ9RCez0y8rOBbhSv344h86l/VVcugCeIhO1 wdLOnvj91G4wxYqrvThthbE= =7VeT -----END PGP SIGNATURE----- -''', - 'synthetic': False, +""", + "synthetic": False, } @@ -580,11 +537,11 @@ class ReleaseIdentifier(unittest.TestCase): self.release = release_example self.release_no_author = { - 'id': b'&y\x1a\x8b\xcf\x0em3\xf4:\xefv\x82\xbd\xb5U#mV\xde', - 'target': '9ee1c939d1cb936b1f98e8d81aeffab57bae46ab', - 'target_type': 'revision', - 'name': b'v2.6.12', - 'message': b'''\ + "id": b"&y\x1a\x8b\xcf\x0em3\xf4:\xefv\x82\xbd\xb5U#mV\xde", + "target": "9ee1c939d1cb936b1f98e8d81aeffab57bae46ab", + "target_type": "revision", + "name": b"v2.6.12", + "message": b"""\ This is the final 2.6.12 release -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.2.4 (GNU/Linux) @@ -593,157 +550,141 @@ iD8DBQBCsykyF3YsRnbiHLsRAvPNAJ482tCZwuxp/bJRz7Q98MHlN83TpACdHr37 o6X/3T+vm8K3bf3driRr34c= =sBHn -----END PGP SIGNATURE----- -''', - 'synthetic': False, +""", + "synthetic": False, } self.release_no_message = { - 'id': 'b6f4f446715f7d9543ef54e41b62982f0db40045', - 'target': '9ee1c939d1cb936b1f98e8d81aeffab57bae46ab', - 'target_type': 'revision', - 'name': b'v2.6.12', - 'author': { - 'name': b'Linus Torvalds', - 'email': b'torvalds@g5.osdl.org', - }, - 'date': datetime.datetime(2005, 10, 27, 17, 2, 33, - tzinfo=linus_tz), - 'message': None, + "id": "b6f4f446715f7d9543ef54e41b62982f0db40045", + "target": "9ee1c939d1cb936b1f98e8d81aeffab57bae46ab", + "target_type": "revision", + "name": b"v2.6.12", + "author": {"name": b"Linus Torvalds", "email": b"torvalds@g5.osdl.org",}, + "date": datetime.datetime(2005, 10, 27, 17, 2, 33, tzinfo=linus_tz), + "message": None, } self.release_empty_message = { - 'id': '71a0aea72444d396575dc25ac37fec87ee3c6492', - 'target': '9ee1c939d1cb936b1f98e8d81aeffab57bae46ab', - 'target_type': 'revision', - 'name': b'v2.6.12', - 'author': { - 'name': b'Linus Torvalds', - 'email': b'torvalds@g5.osdl.org', - }, - 'date': datetime.datetime(2005, 10, 27, 17, 2, 33, - tzinfo=linus_tz), - 'message': b'', + "id": "71a0aea72444d396575dc25ac37fec87ee3c6492", + "target": "9ee1c939d1cb936b1f98e8d81aeffab57bae46ab", + "target_type": "revision", + "name": b"v2.6.12", + "author": {"name": b"Linus Torvalds", "email": b"torvalds@g5.osdl.org",}, + "date": datetime.datetime(2005, 10, 27, 17, 2, 33, tzinfo=linus_tz), + "message": b"", } self.release_negative_utc = { - 'id': '97c8d2573a001f88e72d75f596cf86b12b82fd01', - 'name': b'20081029', - 'target': '54e9abca4c77421e2921f5f156c9fe4a9f7441c7', - 'target_type': 'revision', - 'date': { - 'timestamp': {'seconds': 1225281976}, - 'offset': 0, - 'negative_utc': True, - }, - 'author': { - 'name': b'Otavio Salvador', - 'email': b'otavio@debian.org', - 'id': 17640, - }, - 'synthetic': False, - 'message': b'tagging version 20081029\n\nr56558\n', + "id": "97c8d2573a001f88e72d75f596cf86b12b82fd01", + "name": b"20081029", + "target": "54e9abca4c77421e2921f5f156c9fe4a9f7441c7", + "target_type": "revision", + "date": { + "timestamp": {"seconds": 1225281976}, + "offset": 0, + "negative_utc": True, + }, + "author": { + "name": b"Otavio Salvador", + "email": b"otavio@debian.org", + "id": 17640, + }, + "synthetic": False, + "message": b"tagging version 20081029\n\nr56558\n", } self.release_newline_in_author = { - 'author': { - 'email': b'esycat@gmail.com', - 'fullname': b'Eugene Janusov\n<esycat@gmail.com>', - 'name': b'Eugene Janusov\n', - }, - 'date': { - 'negative_utc': None, - 'offset': 600, - 'timestamp': { - 'microseconds': 0, - 'seconds': 1377480558, - }, - }, - 'id': b'\\\x98\xf5Y\xd04\x16-\xe2->\xbe\xb9T3\xe6\xf8\x88R1', - 'message': b'Release of v0.3.2.', - 'name': b'0.3.2', - 'synthetic': False, - 'target': (b'\xc0j\xa3\xd9;x\xa2\x86\\I5\x17' - b'\x000\xf8\xc2\xd79o\xd3'), - 'target_type': 'revision', + "author": { + "email": b"esycat@gmail.com", + "fullname": b"Eugene Janusov\n<esycat@gmail.com>", + "name": b"Eugene Janusov\n", + }, + "date": { + "negative_utc": None, + "offset": 600, + "timestamp": {"microseconds": 0, "seconds": 1377480558,}, + }, + "id": b"\\\x98\xf5Y\xd04\x16-\xe2->\xbe\xb9T3\xe6\xf8\x88R1", + "message": b"Release of v0.3.2.", + "name": b"0.3.2", + "synthetic": False, + "target": (b"\xc0j\xa3\xd9;x\xa2\x86\\I5\x17" b"\x000\xf8\xc2\xd79o\xd3"), + "target_type": "revision", } self.release_snapshot_target = dict(self.release) - self.release_snapshot_target['target_type'] = 'snapshot' - self.release_snapshot_target['id'] = ( - 'c29c3ddcc6769a04e54dd69d63a6fdcbc566f850') + self.release_snapshot_target["target_type"] = "snapshot" + self.release_snapshot_target["id"] = "c29c3ddcc6769a04e54dd69d63a6fdcbc566f850" def test_release_identifier(self): self.assertEqual( identifiers.release_identifier(self.release), - identifiers.identifier_to_str(self.release['id']) + identifiers.identifier_to_str(self.release["id"]), ) def test_release_identifier_no_author(self): self.assertEqual( identifiers.release_identifier(self.release_no_author), - identifiers.identifier_to_str(self.release_no_author['id']) + identifiers.identifier_to_str(self.release_no_author["id"]), ) def test_release_identifier_no_message(self): self.assertEqual( identifiers.release_identifier(self.release_no_message), - identifiers.identifier_to_str(self.release_no_message['id']) + identifiers.identifier_to_str(self.release_no_message["id"]), ) def test_release_identifier_empty_message(self): self.assertEqual( identifiers.release_identifier(self.release_empty_message), - identifiers.identifier_to_str(self.release_empty_message['id']) + identifiers.identifier_to_str(self.release_empty_message["id"]), ) def test_release_identifier_negative_utc(self): self.assertEqual( identifiers.release_identifier(self.release_negative_utc), - identifiers.identifier_to_str(self.release_negative_utc['id']) + identifiers.identifier_to_str(self.release_negative_utc["id"]), ) def test_release_identifier_newline_in_author(self): self.assertEqual( identifiers.release_identifier(self.release_newline_in_author), - identifiers.identifier_to_str(self.release_newline_in_author['id']) + identifiers.identifier_to_str(self.release_newline_in_author["id"]), ) def test_release_identifier_snapshot_target(self): self.assertEqual( identifiers.release_identifier(self.release_snapshot_target), - identifiers.identifier_to_str(self.release_snapshot_target['id']) + identifiers.identifier_to_str(self.release_snapshot_target["id"]), ) snapshot_example = { - 'id': _x('6e65b86363953b780d92b0a928f3e8fcdd10db36'), - 'branches': { - b'directory': { - 'target': _x('1bd0e65f7d2ff14ae994de17a1e7fe65111dcad8'), - 'target_type': 'directory', + "id": _x("6e65b86363953b780d92b0a928f3e8fcdd10db36"), + "branches": { + b"directory": { + "target": _x("1bd0e65f7d2ff14ae994de17a1e7fe65111dcad8"), + "target_type": "directory", }, - b'content': { - 'target': _x('fe95a46679d128ff167b7c55df5d02356c5a1ae1'), - 'target_type': 'content', + b"content": { + "target": _x("fe95a46679d128ff167b7c55df5d02356c5a1ae1"), + "target_type": "content", }, - b'alias': { - 'target': b'revision', - 'target_type': 'alias', + b"alias": {"target": b"revision", "target_type": "alias",}, + b"revision": { + "target": _x("aafb16d69fd30ff58afdd69036a26047f3aebdc6"), + "target_type": "revision", }, - b'revision': { - 'target': _x('aafb16d69fd30ff58afdd69036a26047f3aebdc6'), - 'target_type': 'revision', + b"release": { + "target": _x("7045404f3d1c54e6473c71bbb716529fbad4be24"), + "target_type": "release", }, - b'release': { - 'target': _x('7045404f3d1c54e6473c71bbb716529fbad4be24'), - 'target_type': 'release', + b"snapshot": { + "target": _x("1a8893e6a86f444e8be8e7bda6cb34fb1735a00e"), + "target_type": "snapshot", }, - b'snapshot': { - 'target': _x('1a8893e6a86f444e8be8e7bda6cb34fb1735a00e'), - 'target_type': 'snapshot', - }, - b'dangling': None, - } + b"dangling": None, + }, } @@ -752,25 +693,18 @@ class SnapshotIdentifier(unittest.TestCase): super().setUp() self.empty = { - 'id': '1a8893e6a86f444e8be8e7bda6cb34fb1735a00e', - 'branches': {}, + "id": "1a8893e6a86f444e8be8e7bda6cb34fb1735a00e", + "branches": {}, } self.dangling_branch = { - 'id': 'c84502e821eb21ed84e9fd3ec40973abc8b32353', - 'branches': { - b'HEAD': None, - }, + "id": "c84502e821eb21ed84e9fd3ec40973abc8b32353", + "branches": {b"HEAD": None,}, } self.unresolved = { - 'id': '84b4548ea486e4b0a7933fa541ff1503a0afe1e0', - 'branches': { - b'foo': { - 'target': b'bar', - 'target_type': 'alias', - }, - }, + "id": "84b4548ea486e4b0a7933fa541ff1503a0afe1e0", + "branches": {b"foo": {"target": b"bar", "target_type": "alias",},}, } self.all_types = snapshot_example @@ -778,13 +712,13 @@ class SnapshotIdentifier(unittest.TestCase): def test_empty_snapshot(self): self.assertEqual( identifiers.snapshot_identifier(self.empty), - identifiers.identifier_to_str(self.empty['id']), + identifiers.identifier_to_str(self.empty["id"]), ) def test_dangling_branch(self): self.assertEqual( identifiers.snapshot_identifier(self.dangling_branch), - identifiers.identifier_to_str(self.dangling_branch['id']), + identifiers.identifier_to_str(self.dangling_branch["id"]), ) def test_unresolved(self): @@ -793,172 +727,239 @@ class SnapshotIdentifier(unittest.TestCase): def test_unresolved_force(self): self.assertEqual( - identifiers.snapshot_identifier( - self.unresolved, - ignore_unresolved=True, - ), - identifiers.identifier_to_str(self.unresolved['id']), + identifiers.snapshot_identifier(self.unresolved, ignore_unresolved=True,), + identifiers.identifier_to_str(self.unresolved["id"]), ) def test_all_types(self): self.assertEqual( identifiers.snapshot_identifier(self.all_types), - identifiers.identifier_to_str(self.all_types['id']), + identifiers.identifier_to_str(self.all_types["id"]), ) def test_persistent_identifier(self): - _snapshot_id = _x('c7c108084bc0bf3d81436bf980b46e98bd338453') - _release_id = '22ece559cc7cc2364edc5e5593d63ae8bd229f9f' - _revision_id = '309cf2674ee7a0749978cf8265ab91a60aea0f7d' - _directory_id = 'd198bc9d7a6bcf6db04f476d29314f157507d505' - _content_id = '94a9ed024d3859793618152ea559a168bbcbb5e2' - _snapshot = {'id': _snapshot_id} - _release = {'id': _release_id} - _revision = {'id': _revision_id} - _directory = {'id': _directory_id} - _content = {'sha1_git': _content_id} + _snapshot_id = _x("c7c108084bc0bf3d81436bf980b46e98bd338453") + _release_id = "22ece559cc7cc2364edc5e5593d63ae8bd229f9f" + _revision_id = "309cf2674ee7a0749978cf8265ab91a60aea0f7d" + _directory_id = "d198bc9d7a6bcf6db04f476d29314f157507d505" + _content_id = "94a9ed024d3859793618152ea559a168bbcbb5e2" + _snapshot = {"id": _snapshot_id} + _release = {"id": _release_id} + _revision = {"id": _revision_id} + _directory = {"id": _directory_id} + _content = {"sha1_git": _content_id} for full_type, _hash, expected_persistent_id, version, _meta in [ - (SNAPSHOT, _snapshot_id, - 'swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453', - None, {}), - (RELEASE, _release_id, - 'swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f', - 1, {}), - (REVISION, _revision_id, - 'swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d', - None, {}), - (DIRECTORY, _directory_id, - 'swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505', - None, {}), - (CONTENT, _content_id, - 'swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2', - 1, {}), - (SNAPSHOT, _snapshot, - 'swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453', - None, {}), - (RELEASE, _release, - 'swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f', - 1, {}), - (REVISION, _revision, - 'swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d', - None, {}), - (DIRECTORY, _directory, - 'swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505', - None, {}), - (CONTENT, _content, - 'swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2', - 1, {}), - (CONTENT, _content, - 'swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2;origin=1', - 1, {'origin': '1'}), + ( + SNAPSHOT, + _snapshot_id, + "swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453", + None, + {}, + ), + ( + RELEASE, + _release_id, + "swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f", + 1, + {}, + ), + ( + REVISION, + _revision_id, + "swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d", + None, + {}, + ), + ( + DIRECTORY, + _directory_id, + "swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505", + None, + {}, + ), + ( + CONTENT, + _content_id, + "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2", + 1, + {}, + ), + ( + SNAPSHOT, + _snapshot, + "swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453", + None, + {}, + ), + ( + RELEASE, + _release, + "swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f", + 1, + {}, + ), + ( + REVISION, + _revision, + "swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d", + None, + {}, + ), + ( + DIRECTORY, + _directory, + "swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505", + None, + {}, + ), + ( + CONTENT, + _content, + "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2", + 1, + {}, + ), + ( + CONTENT, + _content, + "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2;origin=1", + 1, + {"origin": "1"}, + ), ]: if version: actual_value = identifiers.persistent_identifier( - full_type, _hash, version, metadata=_meta) + full_type, _hash, version, metadata=_meta + ) else: actual_value = identifiers.persistent_identifier( - full_type, _hash, metadata=_meta) + full_type, _hash, metadata=_meta + ) self.assertEqual(actual_value, expected_persistent_id) def test_persistent_identifier_wrong_input(self): - _snapshot_id = 'notahash4bc0bf3d81436bf980b46e98bd338453' - _snapshot = {'id': _snapshot_id} + _snapshot_id = "notahash4bc0bf3d81436bf980b46e98bd338453" + _snapshot = {"id": _snapshot_id} for _type, _hash in [ - (SNAPSHOT, _snapshot_id), - (SNAPSHOT, _snapshot), - ('foo', ''), + (SNAPSHOT, _snapshot_id), + (SNAPSHOT, _snapshot), + ("foo", ""), ]: with self.assertRaises(ValidationError): identifiers.persistent_identifier(_type, _hash) def test_parse_persistent_identifier(self): for pid, _type, _version, _hash in [ - ('swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2', - CONTENT, 1, '94a9ed024d3859793618152ea559a168bbcbb5e2'), - ('swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505', - DIRECTORY, 1, 'd198bc9d7a6bcf6db04f476d29314f157507d505'), - ('swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d', - REVISION, 1, '309cf2674ee7a0749978cf8265ab91a60aea0f7d'), - ('swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f', - RELEASE, 1, '22ece559cc7cc2364edc5e5593d63ae8bd229f9f'), - ('swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453', - SNAPSHOT, 1, 'c7c108084bc0bf3d81436bf980b46e98bd338453'), + ( + "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2", + CONTENT, + 1, + "94a9ed024d3859793618152ea559a168bbcbb5e2", + ), + ( + "swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505", + DIRECTORY, + 1, + "d198bc9d7a6bcf6db04f476d29314f157507d505", + ), + ( + "swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d", + REVISION, + 1, + "309cf2674ee7a0749978cf8265ab91a60aea0f7d", + ), + ( + "swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f", + RELEASE, + 1, + "22ece559cc7cc2364edc5e5593d63ae8bd229f9f", + ), + ( + "swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453", + SNAPSHOT, + 1, + "c7c108084bc0bf3d81436bf980b46e98bd338453", + ), ]: expected_result = PersistentId( - namespace='swh', + namespace="swh", scheme_version=_version, object_type=_type, object_id=_hash, - metadata={} + metadata={}, ) actual_result = identifiers.parse_persistent_identifier(pid) self.assertEqual(actual_result, expected_result) for pid, _type, _version, _hash, _metadata in [ - ('swh:1:cnt:9c95815d9e9d91b8dae8e05d8bbc696fe19f796b;lines=1-18;origin=https://github.com/python/cpython', # noqa - CONTENT, 1, '9c95815d9e9d91b8dae8e05d8bbc696fe19f796b', - { - 'lines': '1-18', - 'origin': 'https://github.com/python/cpython' - }), - ('swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;origin=deb://Debian/packages/linuxdoc-tools', # noqa - DIRECTORY, 1, '0b6959356d30f1a4e9b7f6bca59b9a336464c03d', - { - 'origin': 'deb://Debian/packages/linuxdoc-tools' - }) + ( + "swh:1:cnt:9c95815d9e9d91b8dae8e05d8bbc696fe19f796b;lines=1-18;origin=https://github.com/python/cpython", # noqa + CONTENT, + 1, + "9c95815d9e9d91b8dae8e05d8bbc696fe19f796b", + {"lines": "1-18", "origin": "https://github.com/python/cpython"}, + ), + ( + "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;origin=deb://Debian/packages/linuxdoc-tools", # noqa + DIRECTORY, + 1, + "0b6959356d30f1a4e9b7f6bca59b9a336464c03d", + {"origin": "deb://Debian/packages/linuxdoc-tools"}, + ), ]: expected_result = PersistentId( - namespace='swh', + namespace="swh", scheme_version=_version, object_type=_type, object_id=_hash, - metadata=_metadata + metadata=_metadata, ) actual_result = identifiers.parse_persistent_identifier(pid) self.assertEqual(actual_result, expected_result) def test_parse_persistent_identifier_parsing_error(self): for pid in [ - ('swh:1:cnt'), - ('swh:1:'), - ('swh:'), - ('swh:1:cnt:'), - ('foo:1:cnt:abc8bc9d7a6bcf6db04f476d29314f157507d505'), - ('swh:2:dir:def8bc9d7a6bcf6db04f476d29314f157507d505'), - ('swh:1:foo:fed8bc9d7a6bcf6db04f476d29314f157507d505'), - ('swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;invalid;' - 'malformed'), - ('swh:1:snp:gh6959356d30f1a4e9b7f6bca59b9a336464c03d'), - ('swh:1:snp:foo'), + ("swh:1:cnt"), + ("swh:1:"), + ("swh:"), + ("swh:1:cnt:"), + ("foo:1:cnt:abc8bc9d7a6bcf6db04f476d29314f157507d505"), + ("swh:2:dir:def8bc9d7a6bcf6db04f476d29314f157507d505"), + ("swh:1:foo:fed8bc9d7a6bcf6db04f476d29314f157507d505"), + ("swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;invalid;" "malformed"), + ("swh:1:snp:gh6959356d30f1a4e9b7f6bca59b9a336464c03d"), + ("swh:1:snp:foo"), ]: with self.assertRaises(ValidationError): identifiers.parse_persistent_identifier(pid) def test_persistentid_class_validation_error(self): for _ns, _version, _type, _id in [ - ('foo', 1, CONTENT, 'abc8bc9d7a6bcf6db04f476d29314f157507d505'), - ('swh', 2, DIRECTORY, 'def8bc9d7a6bcf6db04f476d29314f157507d505'), - ('swh', 1, 'foo', 'fed8bc9d7a6bcf6db04f476d29314f157507d505'), - ('swh', 1, SNAPSHOT, 'gh6959356d30f1a4e9b7f6bca59b9a336464c03d'), + ("foo", 1, CONTENT, "abc8bc9d7a6bcf6db04f476d29314f157507d505"), + ("swh", 2, DIRECTORY, "def8bc9d7a6bcf6db04f476d29314f157507d505"), + ("swh", 1, "foo", "fed8bc9d7a6bcf6db04f476d29314f157507d505"), + ("swh", 1, SNAPSHOT, "gh6959356d30f1a4e9b7f6bca59b9a336464c03d"), ]: with self.assertRaises(ValidationError): PersistentId( namespace=_ns, scheme_version=_version, object_type=_type, - object_id=_id + object_id=_id, ) class OriginIdentifier(unittest.TestCase): def setUp(self): self.origin = { - 'url': 'https://github.com/torvalds/linux', + "url": "https://github.com/torvalds/linux", } def test_content_identifier(self): - self.assertEqual(identifiers.origin_identifier(self.origin), - 'b63a575fe3faab7692c9f38fb09d4bb45651bb0f') + self.assertEqual( + identifiers.origin_identifier(self.origin), + "b63a575fe3faab7692c9f38fb09d4bb45651bb0f", + ) diff --git a/swh/model/tests/test_merkle.py b/swh/model/tests/test_merkle.py index 734f7c036143163a24b7e9c9be3be9103d6070fa..7e3538b7a51cb88de893743f80c3a9a4a3eeaa5f 100644 --- a/swh/model/tests/test_merkle.py +++ b/swh/model/tests/test_merkle.py @@ -9,7 +9,7 @@ from swh.model import merkle class MerkleTestNode(merkle.MerkleNode): - type = 'tested_merkle_node_type' + type = "tested_merkle_node_type" def __init__(self, data): super().__init__(data) @@ -17,20 +17,13 @@ class MerkleTestNode(merkle.MerkleNode): def compute_hash(self): self.compute_hash_called += 1 - child_data = [ - child + b'=' + self[child].hash - for child in sorted(self) - ] - - return ( - b'hash(' - + b', '.join([self.data['value']] + child_data) - + b')' - ) + child_data = [child + b"=" + self[child].hash for child in sorted(self)] + + return b"hash(" + b", ".join([self.data["value"]] + child_data) + b")" class MerkleTestLeaf(merkle.MerkleLeaf): - type = 'tested_merkle_leaf_type' + type = "tested_merkle_leaf_type" def __init__(self, data): super().__init__(data) @@ -38,12 +31,12 @@ class MerkleTestLeaf(merkle.MerkleLeaf): def compute_hash(self): self.compute_hash_called += 1 - return b'hash(' + self.data['value'] + b')' + return b"hash(" + self.data["value"] + b")" class TestMerkleLeaf(unittest.TestCase): def setUp(self): - self.data = {'value': b'value'} + self.data = {"value": b"value"} self.instance = MerkleTestLeaf(self.data) def test_equality(self): @@ -68,11 +61,8 @@ class TestMerkleLeaf(unittest.TestCase): def test_collect(self): collected = self.instance.collect() self.assertEqual( - collected, { - self.instance.type: { - self.instance.hash: self.instance.get_data(), - }, - }, + collected, + {self.instance.type: {self.instance.hash: self.instance.get_data(),},}, ) collected2 = self.instance.collect() self.assertEqual(collected2, {}) @@ -81,16 +71,16 @@ class TestMerkleLeaf(unittest.TestCase): self.assertEqual(collected, collected3) def test_leaf(self): - with self.assertRaisesRegex(ValueError, 'is a leaf'): - self.instance[b'key1'] = 'Test' + with self.assertRaisesRegex(ValueError, "is a leaf"): + self.instance[b"key1"] = "Test" - with self.assertRaisesRegex(ValueError, 'is a leaf'): - del self.instance[b'key1'] + with self.assertRaisesRegex(ValueError, "is a leaf"): + del self.instance[b"key1"] - with self.assertRaisesRegex(ValueError, 'is a leaf'): - self.instance[b'key1'] + with self.assertRaisesRegex(ValueError, "is a leaf"): + self.instance[b"key1"] - with self.assertRaisesRegex(ValueError, 'is a leaf'): + with self.assertRaisesRegex(ValueError, "is a leaf"): self.instance.update(self.data) @@ -98,42 +88,36 @@ class TestMerkleNode(unittest.TestCase): maxDiff = None def setUp(self): - self.root = MerkleTestNode({'value': b'root'}) - self.nodes = {b'root': self.root} - for i in (b'a', b'b', b'c'): - value = b'root/' + i - node = MerkleTestNode({ - 'value': value, - }) + self.root = MerkleTestNode({"value": b"root"}) + self.nodes = {b"root": self.root} + for i in (b"a", b"b", b"c"): + value = b"root/" + i + node = MerkleTestNode({"value": value,}) self.root[i] = node self.nodes[value] = node - for j in (b'a', b'b', b'c'): - value2 = value + b'/' + j - node2 = MerkleTestNode({ - 'value': value2, - }) + for j in (b"a", b"b", b"c"): + value2 = value + b"/" + j + node2 = MerkleTestNode({"value": value2,}) node[j] = node2 self.nodes[value2] = node2 - for k in (b'a', b'b', b'c'): - value3 = value2 + b'/' + j - node3 = MerkleTestNode({ - 'value': value3, - }) + for k in (b"a", b"b", b"c"): + value3 = value2 + b"/" + j + node3 = MerkleTestNode({"value": value3,}) node2[j] = node3 self.nodes[value3] = node3 def test_equality(self): - node1 = merkle.MerkleNode({'foo': b'bar'}) - node2 = merkle.MerkleNode({'foo': b'bar'}) + node1 = merkle.MerkleNode({"foo": b"bar"}) + node2 = merkle.MerkleNode({"foo": b"bar"}) node3 = merkle.MerkleNode({}) self.assertEqual(node1, node2) self.assertNotEqual(node1, node3, node1 == node3) - node1['foo'] = node3 + node1["foo"] = node3 self.assertNotEqual(node1, node2) - node2['foo'] = node3 + node2["foo"] = node3 self.assertEqual(node1, node2) def test_hash(self): @@ -144,7 +128,7 @@ class TestMerkleNode(unittest.TestCase): hash = self.root.hash for node in self.nodes.values(): self.assertEqual(node.compute_hash_called, 1) - self.assertIn(node.data['value'], hash) + self.assertIn(node.data["value"], hash) # Should use the cached value hash2 = self.root.hash @@ -159,10 +143,10 @@ class TestMerkleNode(unittest.TestCase): self.assertEqual(node.compute_hash_called, 1) # Force update of the cached value for a deeply nested node - self.root[b'a'][b'b'].update_hash(force=True) + self.root[b"a"][b"b"].update_hash(force=True) for key, node in self.nodes.items(): # update_hash rehashes all children - if key.startswith(b'root/a/b'): + if key.startswith(b"root/a/b"): self.assertEqual(node.compute_hash_called, 2) else: self.assertEqual(node.compute_hash_called, 1) @@ -171,7 +155,7 @@ class TestMerkleNode(unittest.TestCase): self.assertEqual(hash, hash4) for key, node in self.nodes.items(): # update_hash also invalidates all parents - if key in (b'root', b'root/a') or key.startswith(b'root/a/b'): + if key in (b"root", b"root/a") or key.startswith(b"root/a/b"): self.assertEqual(node.compute_hash_called, 2) else: self.assertEqual(node.compute_hash_called, 1) @@ -189,55 +173,55 @@ class TestMerkleNode(unittest.TestCase): self.assertCountEqual(nodes, self.nodes.values()) def test_get(self): - for key in (b'a', b'b', b'c'): - self.assertEqual(self.root[key], self.nodes[b'root/' + key]) + for key in (b"a", b"b", b"c"): + self.assertEqual(self.root[key], self.nodes[b"root/" + key]) with self.assertRaisesRegex(KeyError, "b'nonexistent'"): - self.root[b'nonexistent'] + self.root[b"nonexistent"] def test_del(self): hash_root = self.root.hash - hash_a = self.nodes[b'root/a'].hash - del self.root[b'a'][b'c'] + hash_a = self.nodes[b"root/a"].hash + del self.root[b"a"][b"c"] hash_root2 = self.root.hash - hash_a2 = self.nodes[b'root/a'].hash + hash_a2 = self.nodes[b"root/a"].hash self.assertNotEqual(hash_root, hash_root2) self.assertNotEqual(hash_a, hash_a2) - self.assertEqual(self.nodes[b'root/a/c'].parents, []) + self.assertEqual(self.nodes[b"root/a/c"].parents, []) with self.assertRaisesRegex(KeyError, "b'nonexistent'"): - del self.root[b'nonexistent'] + del self.root[b"nonexistent"] def test_update(self): hash_root = self.root.hash - hash_b = self.root[b'b'].hash + hash_b = self.root[b"b"].hash new_children = { - b'c': MerkleTestNode({'value': b'root/b/new_c'}), - b'd': MerkleTestNode({'value': b'root/b/d'}), + b"c": MerkleTestNode({"value": b"root/b/new_c"}), + b"d": MerkleTestNode({"value": b"root/b/d"}), } # collect all nodes self.root.collect() - self.root[b'b'].update(new_children) + self.root[b"b"].update(new_children) # Ensure everyone got reparented - self.assertEqual(new_children[b'c'].parents, [self.root[b'b']]) - self.assertEqual(new_children[b'd'].parents, [self.root[b'b']]) - self.assertEqual(self.nodes[b'root/b/c'].parents, []) + self.assertEqual(new_children[b"c"].parents, [self.root[b"b"]]) + self.assertEqual(new_children[b"d"].parents, [self.root[b"b"]]) + self.assertEqual(self.nodes[b"root/b/c"].parents, []) hash_root2 = self.root.hash self.assertNotEqual(hash_root, hash_root2) - self.assertIn(b'root/b/new_c', hash_root2) - self.assertIn(b'root/b/d', hash_root2) + self.assertIn(b"root/b/new_c", hash_root2) + self.assertIn(b"root/b/d", hash_root2) - hash_b2 = self.root[b'b'].hash + hash_b2 = self.root[b"b"].hash self.assertNotEqual(hash_b, hash_b2) for key, node in self.nodes.items(): - if key in (b'root', b'root/b'): + if key in (b"root", b"root/b"): self.assertEqual(node.compute_hash_called, 2) else: self.assertEqual(node.compute_hash_called, 1) @@ -246,10 +230,14 @@ class TestMerkleNode(unittest.TestCase): collected_after_update = self.root.collect() self.assertCountEqual( collected_after_update[MerkleTestNode.type], - [self.nodes[b'root'].hash, self.nodes[b'root/b'].hash, - new_children[b'c'].hash, new_children[b'd'].hash], + [ + self.nodes[b"root"].hash, + self.nodes[b"root/b"].hash, + new_children[b"c"].hash, + new_children[b"d"].hash, + ], ) # test that noop updates doesn't invalidate anything - self.root[b'a'][b'b'].update({}) + self.root[b"a"][b"b"].update({}) self.assertEqual(self.root.collect(), {}) diff --git a/swh/model/tests/test_model.py b/swh/model/tests/test_model.py index f6164ef1a64bfa4890e8593a12019d87a3f17ef5..b65f75de745cd822a3a3165e3426ee588fa024c8 100644 --- a/swh/model/tests/test_model.py +++ b/swh/model/tests/test_model.py @@ -13,18 +13,31 @@ from hypothesis.strategies import binary import pytest from swh.model.model import ( - Content, SkippedContent, Directory, Revision, Release, Snapshot, - Origin, Timestamp, TimestampWithTimezone, - MissingData, Person + Content, + SkippedContent, + Directory, + Revision, + Release, + Snapshot, + Origin, + Timestamp, + TimestampWithTimezone, + MissingData, + Person, ) from swh.model.hashutil import hash_to_bytes, MultiHash import swh.model.hypothesis_strategies as strategies from swh.model.identifiers import ( - directory_identifier, revision_identifier, release_identifier, - snapshot_identifier + directory_identifier, + revision_identifier, + release_identifier, + snapshot_identifier, ) from swh.model.tests.test_identifiers import ( - directory_example, revision_example, release_example, snapshot_example + directory_example, + revision_example, + release_example, + snapshot_example, ) @@ -32,7 +45,7 @@ from swh.model.tests.test_identifiers import ( def test_todict_inverse_fromdict(objtype_and_obj): (obj_type, obj) = objtype_and_obj - if obj_type in ('origin', 'origin_visit'): + if obj_type in ("origin", "origin_visit"): return obj_as_dict = obj.to_dict() @@ -52,7 +65,7 @@ def test_todict_inverse_fromdict(objtype_and_obj): def test_todict_origins(origin): obj = origin.to_dict() - assert 'type' not in obj + assert "type" not in obj assert type(origin)(url=origin.url) == type(origin).from_dict(obj) @@ -72,6 +85,7 @@ def test_todict_origin_visit_updates(origin_visit_update): # Timestamp + @given(strategies.timestamps()) def test_timestamps_strategy(timestamp): attr.validate(timestamp) @@ -80,153 +94,116 @@ def test_timestamps_strategy(timestamp): def test_timestamp_seconds(): attr.validate(Timestamp(seconds=0, microseconds=0)) with pytest.raises(AttributeTypeError): - Timestamp(seconds='0', microseconds=0) + Timestamp(seconds="0", microseconds=0) - attr.validate(Timestamp(seconds=2**63-1, microseconds=0)) + attr.validate(Timestamp(seconds=2 ** 63 - 1, microseconds=0)) with pytest.raises(ValueError): - Timestamp(seconds=2**63, microseconds=0) + Timestamp(seconds=2 ** 63, microseconds=0) - attr.validate(Timestamp(seconds=-2**63, microseconds=0)) + attr.validate(Timestamp(seconds=-(2 ** 63), microseconds=0)) with pytest.raises(ValueError): - Timestamp(seconds=-2**63-1, microseconds=0) + Timestamp(seconds=-(2 ** 63) - 1, microseconds=0) def test_timestamp_microseconds(): attr.validate(Timestamp(seconds=0, microseconds=0)) with pytest.raises(AttributeTypeError): - Timestamp(seconds=0, microseconds='0') + Timestamp(seconds=0, microseconds="0") - attr.validate(Timestamp(seconds=0, microseconds=10**6-1)) + attr.validate(Timestamp(seconds=0, microseconds=10 ** 6 - 1)) with pytest.raises(ValueError): - Timestamp(seconds=0, microseconds=10**6) + Timestamp(seconds=0, microseconds=10 ** 6) with pytest.raises(ValueError): Timestamp(seconds=0, microseconds=-1) def test_timestamp_from_dict(): - assert Timestamp.from_dict({'seconds': 10, 'microseconds': 5}) + assert Timestamp.from_dict({"seconds": 10, "microseconds": 5}) with pytest.raises(AttributeTypeError): - Timestamp.from_dict({'seconds': '10', 'microseconds': 5}) + Timestamp.from_dict({"seconds": "10", "microseconds": 5}) with pytest.raises(AttributeTypeError): - Timestamp.from_dict({'seconds': 10, 'microseconds': '5'}) + Timestamp.from_dict({"seconds": 10, "microseconds": "5"}) with pytest.raises(ValueError): - Timestamp.from_dict({'seconds': 0, 'microseconds': -1}) + Timestamp.from_dict({"seconds": 0, "microseconds": -1}) - Timestamp.from_dict({'seconds': 0, 'microseconds': 10**6 - 1}) + Timestamp.from_dict({"seconds": 0, "microseconds": 10 ** 6 - 1}) with pytest.raises(ValueError): - Timestamp.from_dict({'seconds': 0, 'microseconds': 10**6}) + Timestamp.from_dict({"seconds": 0, "microseconds": 10 ** 6}) # TimestampWithTimezone + def test_timestampwithtimezone(): ts = Timestamp(seconds=0, microseconds=0) - tstz = TimestampWithTimezone( - timestamp=ts, - offset=0, - negative_utc=False) + tstz = TimestampWithTimezone(timestamp=ts, offset=0, negative_utc=False) attr.validate(tstz) assert tstz.negative_utc is False - attr.validate(TimestampWithTimezone( - timestamp=ts, - offset=10, - negative_utc=False)) + attr.validate(TimestampWithTimezone(timestamp=ts, offset=10, negative_utc=False)) - attr.validate(TimestampWithTimezone( - timestamp=ts, - offset=-10, - negative_utc=False)) + attr.validate(TimestampWithTimezone(timestamp=ts, offset=-10, negative_utc=False)) - tstz = TimestampWithTimezone( - timestamp=ts, - offset=0, - negative_utc=True) + tstz = TimestampWithTimezone(timestamp=ts, offset=0, negative_utc=True) attr.validate(tstz) assert tstz.negative_utc is True with pytest.raises(AttributeTypeError): TimestampWithTimezone( - timestamp=datetime.datetime.now(), - offset=0, - negative_utc=False) + timestamp=datetime.datetime.now(), offset=0, negative_utc=False + ) with pytest.raises(AttributeTypeError): - TimestampWithTimezone( - timestamp=ts, - offset='0', - negative_utc=False) + TimestampWithTimezone(timestamp=ts, offset="0", negative_utc=False) with pytest.raises(AttributeTypeError): - TimestampWithTimezone( - timestamp=ts, - offset=1.0, - negative_utc=False) + TimestampWithTimezone(timestamp=ts, offset=1.0, negative_utc=False) with pytest.raises(AttributeTypeError): - TimestampWithTimezone( - timestamp=ts, - offset=1, - negative_utc=0) + TimestampWithTimezone(timestamp=ts, offset=1, negative_utc=0) with pytest.raises(ValueError): - TimestampWithTimezone( - timestamp=ts, - offset=1, - negative_utc=True) + TimestampWithTimezone(timestamp=ts, offset=1, negative_utc=True) with pytest.raises(ValueError): - TimestampWithTimezone( - timestamp=ts, - offset=-1, - negative_utc=True) + TimestampWithTimezone(timestamp=ts, offset=-1, negative_utc=True) def test_timestampwithtimezone_from_datetime(): tz = datetime.timezone(datetime.timedelta(minutes=+60)) - date = datetime.datetime( - 2020, 2, 27, 14, 39, 19, tzinfo=tz) + date = datetime.datetime(2020, 2, 27, 14, 39, 19, tzinfo=tz) tstz = TimestampWithTimezone.from_datetime(date) assert tstz == TimestampWithTimezone( - timestamp=Timestamp( - seconds=1582810759, - microseconds=0, - ), + timestamp=Timestamp(seconds=1582810759, microseconds=0,), offset=60, negative_utc=False, ) def test_timestampwithtimezone_from_iso8601(): - date = '2020-02-27 14:39:19.123456+0100' + date = "2020-02-27 14:39:19.123456+0100" tstz = TimestampWithTimezone.from_iso8601(date) assert tstz == TimestampWithTimezone( - timestamp=Timestamp( - seconds=1582810759, - microseconds=123456, - ), + timestamp=Timestamp(seconds=1582810759, microseconds=123456,), offset=60, negative_utc=False, ) def test_timestampwithtimezone_from_iso8601_negative_utc(): - date = '2020-02-27 13:39:19-0000' + date = "2020-02-27 13:39:19-0000" tstz = TimestampWithTimezone.from_iso8601(date) assert tstz == TimestampWithTimezone( - timestamp=Timestamp( - seconds=1582810759, - microseconds=0, - ), + timestamp=Timestamp(seconds=1582810759, microseconds=0,), offset=0, negative_utc=True, ) @@ -236,11 +213,9 @@ def test_person_from_fullname(): """The author should have name, email and fullname filled. """ - actual_person = Person.from_fullname(b'tony <ynot@dagobah>') + actual_person = Person.from_fullname(b"tony <ynot@dagobah>") assert actual_person == Person( - fullname=b'tony <ynot@dagobah>', - name=b'tony', - email=b'ynot@dagobah', + fullname=b"tony <ynot@dagobah>", name=b"tony", email=b"ynot@dagobah", ) @@ -248,12 +223,8 @@ def test_person_from_fullname_no_email(): """The author and fullname should be the same as the input (author). """ - actual_person = Person.from_fullname(b'tony') - assert actual_person == Person( - fullname=b'tony', - name=b'tony', - email=None, - ) + actual_person = Person.from_fullname(b"tony") + assert actual_person == Person(fullname=b"tony", name=b"tony", email=None,) def test_person_from_fullname_empty_person(): @@ -261,12 +232,8 @@ def test_person_from_fullname_empty_person(): byte-string. """ - actual_person = Person.from_fullname(b'') - assert actual_person == Person( - fullname=b'', - name=None, - email=None, - ) + actual_person = Person.from_fullname(b"") + assert actual_person == Person(fullname=b"", name=None, email=None,) def test_git_author_line_to_author(): @@ -275,46 +242,26 @@ def test_git_author_line_to_author(): Person.from_fullname(None) tests = { - b'a <b@c.com>': Person( - name=b'a', - email=b'b@c.com', - fullname=b'a <b@c.com>', + b"a <b@c.com>": Person(name=b"a", email=b"b@c.com", fullname=b"a <b@c.com>",), + b"<foo@bar.com>": Person( + name=None, email=b"foo@bar.com", fullname=b"<foo@bar.com>", ), - b'<foo@bar.com>': Person( - name=None, - email=b'foo@bar.com', - fullname=b'<foo@bar.com>', - ), - b'malformed <email': Person( - name=b'malformed', - email=b'email', - fullname=b'malformed <email' + b"malformed <email": Person( + name=b"malformed", email=b"email", fullname=b"malformed <email" ), b'malformed <"<br"@ckets>': Person( - name=b'malformed', + name=b"malformed", email=b'"<br"@ckets', fullname=b'malformed <"<br"@ckets>', ), - b'trailing <sp@c.e> ': Person( - name=b'trailing', - email=b'sp@c.e', - fullname=b'trailing <sp@c.e> ', - ), - b'no<sp@c.e>': Person( - name=b'no', - email=b'sp@c.e', - fullname=b'no<sp@c.e>', - ), - b' more <sp@c.es>': Person( - name=b'more', - email=b'sp@c.es', - fullname=b' more <sp@c.es>', + b"trailing <sp@c.e> ": Person( + name=b"trailing", email=b"sp@c.e", fullname=b"trailing <sp@c.e> ", ), - b' <>': Person( - name=None, - email=None, - fullname=b' <>', + b"no<sp@c.e>": Person(name=b"no", email=b"sp@c.e", fullname=b"no<sp@c.e>",), + b" more <sp@c.es>": Person( + name=b"more", email=b"sp@c.es", fullname=b" more <sp@c.es>", ), + b" <>": Person(name=None, email=None, fullname=b" <>",), } for person in sorted(tests): @@ -324,32 +271,42 @@ def test_git_author_line_to_author(): # Content + def test_content_get_hash(): - hashes = dict( - sha1=b'foo', sha1_git=b'bar', sha256=b'baz', blake2s256=b'qux') - c = Content(length=42, status='visible', **hashes) + hashes = dict(sha1=b"foo", sha1_git=b"bar", sha256=b"baz", blake2s256=b"qux") + c = Content(length=42, status="visible", **hashes) for (hash_name, hash_) in hashes.items(): assert c.get_hash(hash_name) == hash_ def test_content_hashes(): - hashes = dict( - sha1=b'foo', sha1_git=b'bar', sha256=b'baz', blake2s256=b'qux') - c = Content(length=42, status='visible', **hashes) + hashes = dict(sha1=b"foo", sha1_git=b"bar", sha256=b"baz", blake2s256=b"qux") + c = Content(length=42, status="visible", **hashes) assert c.hashes() == hashes def test_content_data(): c = Content( - length=42, status='visible', data=b'foo', - sha1=b'foo', sha1_git=b'bar', sha256=b'baz', blake2s256=b'qux') + length=42, + status="visible", + data=b"foo", + sha1=b"foo", + sha1_git=b"bar", + sha256=b"baz", + blake2s256=b"qux", + ) assert c.with_data() == c def test_content_data_missing(): c = Content( - length=42, status='visible', - sha1=b'foo', sha1_git=b'bar', sha256=b'baz', blake2s256=b'qux') + length=42, + status="visible", + sha1=b"foo", + sha1_git=b"bar", + sha256=b"baz", + blake2s256=b"qux", + ) with pytest.raises(MissingData): c.with_data() @@ -358,7 +315,7 @@ def test_content_data_missing(): def test_content_from_dict(content_d): c = Content.from_data(**content_d) assert c - assert c.ctime == content_d['ctime'] + assert c.ctime == content_d["ctime"] content_d2 = c.to_dict() c2 = Content.from_dict(content_d2) @@ -369,14 +326,14 @@ def test_content_from_dict_str_ctime(): # test with ctime as a string n = datetime.datetime(2020, 5, 6, 12, 34) content_d = { - 'ctime': n.isoformat(), - 'data': b'', - 'length': 0, - 'sha1': b'\x00', - 'sha256': b'\x00', - 'sha1_git': b'\x00', - 'blake2s256': b'\x00', - } + "ctime": n.isoformat(), + "data": b"", + "length": 0, + "sha1": b"\x00", + "sha256": b"\x00", + "sha1_git": b"\x00", + "blake2s256": b"\x00", + } c = Content.from_dict(content_d) assert c.ctime == n @@ -386,29 +343,30 @@ def test_content_from_data(data): c = Content.from_data(data) assert c.data == data assert c.length == len(data) - assert c.status == 'visible' + assert c.status == "visible" for key, value in MultiHash.from_data(data).digest().items(): assert getattr(c, key) == value @given(binary(max_size=4096)) def test_hidden_content_from_data(data): - c = Content.from_data(data, status='hidden') + c = Content.from_data(data, status="hidden") assert c.data == data assert c.length == len(data) - assert c.status == 'hidden' + assert c.status == "hidden" for key, value in MultiHash.from_data(data).digest().items(): assert getattr(c, key) == value # SkippedContent + @given(binary(max_size=4096)) def test_skipped_content_from_data(data): - c = SkippedContent.from_data(data, reason='reason') - assert c.reason == 'reason' + c = SkippedContent.from_data(data, reason="reason") + assert c.reason == "reason" assert c.length == len(data) - assert c.status == 'absent' + assert c.status == "absent" for key, value in MultiHash.from_data(data).digest().items(): assert getattr(c, key) == value @@ -417,19 +375,20 @@ def test_skipped_content_from_data(data): def test_skipped_content_origin_is_str(skipped_content_d): assert SkippedContent.from_dict(skipped_content_d) - skipped_content_d['origin'] = 'http://path/to/origin' + skipped_content_d["origin"] = "http://path/to/origin" assert SkippedContent.from_dict(skipped_content_d) - skipped_content_d['origin'] = Origin(url='http://path/to/origin') - with pytest.raises(ValueError, match='origin'): + skipped_content_d["origin"] = Origin(url="http://path/to/origin") + with pytest.raises(ValueError, match="origin"): SkippedContent.from_dict(skipped_content_d) # ID computation + def test_directory_model_id_computation(): dir_dict = directory_example.copy() - del dir_dict['id'] + del dir_dict["id"] dir_id = hash_to_bytes(directory_identifier(dir_dict)) dir_model = Directory.from_dict(dir_dict) @@ -438,7 +397,7 @@ def test_directory_model_id_computation(): def test_revision_model_id_computation(): rev_dict = revision_example.copy() - del rev_dict['id'] + del rev_dict["id"] rev_id = hash_to_bytes(revision_identifier(rev_dict)) rev_model = Revision.from_dict(rev_dict) @@ -450,9 +409,9 @@ def test_revision_model_id_computation_with_no_date(): """ rev_dict = revision_example.copy() - rev_dict['date'] = None - rev_dict['committer_date'] = None - del rev_dict['id'] + rev_dict["date"] = None + rev_dict["committer_date"] = None + del rev_dict["id"] rev_id = hash_to_bytes(revision_identifier(rev_dict)) rev_model = Revision.from_dict(rev_dict) @@ -463,7 +422,7 @@ def test_revision_model_id_computation_with_no_date(): def test_release_model_id_computation(): rel_dict = release_example.copy() - del rel_dict['id'] + del rel_dict["id"] rel_id = hash_to_bytes(release_identifier(rel_dict)) rel_model = Release.from_dict(rel_dict) @@ -473,7 +432,7 @@ def test_release_model_id_computation(): def test_snapshot_model_id_computation(): snp_dict = snapshot_example.copy() - del snp_dict['id'] + del snp_dict["id"] snp_id = hash_to_bytes(snapshot_identifier(snp_dict)) snp_model = Snapshot.from_dict(snp_dict) diff --git a/swh/model/tests/test_toposort.py b/swh/model/tests/test_toposort.py index 174368f579d1fad763bb5ddb4b3720c6e01d579a..d1f841de5f9df945144a8fac066ccd49776ff491 100644 --- a/swh/model/tests/test_toposort.py +++ b/swh/model/tests/test_toposort.py @@ -25,16 +25,16 @@ def is_toposorted_slow(revision_log): Returns: True if the revision log is topologically sorted. """ - rev_by_id = {r['id']: r for r in revision_log} + rev_by_id = {r["id"]: r for r in revision_log} def all_parents(revision): - for parent in revision['parents']: + for parent in revision["parents"]: yield parent yield from all_parents(rev_by_id[parent]) visited = set() for rev in revision_log: - visited.add(rev['id']) + visited.add(rev["id"]) if not all(parent in visited for parent in all_parents(rev)): return False return True @@ -43,10 +43,10 @@ def is_toposorted_slow(revision_log): class TestToposort(unittest.TestCase): def generate_log(self, graph): for node_id, parents in graph.items(): - yield {'id': node_id, 'parents': tuple(parents)} + yield {"id": node_id, "parents": tuple(parents)} def unordered_log(self, log): - return {(d['id'], tuple(d['parents'])) for d in log} + return {(d["id"], tuple(d["parents"])) for d in log} def check(self, graph): log = list(self.generate_log(graph)) @@ -56,45 +56,28 @@ class TestToposort(unittest.TestCase): self.assertTrue(is_toposorted_slow(toposort(log))) def test_linked_list(self): - self.check({3: [2], - 2: [1], - 1: []}) + self.check({3: [2], 2: [1], 1: []}) def test_fork(self): - self.check({7: [6], - 6: [4], - 5: [3], - 4: [2], - 3: [2], - 2: [1], - 1: []}) + self.check({7: [6], 6: [4], 5: [3], 4: [2], 3: [2], 2: [1], 1: []}) def test_fork_merge(self): - self.check({8: [7, 5], - 7: [6], - 6: [4], - 5: [3], - 4: [2], - 3: [2], - 2: [1], - 1: []}) + self.check({8: [7, 5], 7: [6], 6: [4], 5: [3], 4: [2], 3: [2], 2: [1], 1: []}) def test_two_origins(self): - self.check({9: [8], - 8: [7, 5], - 7: [6], - 6: [4], - 5: [3], - 4: [], - 3: []}) + self.check({9: [8], 8: [7, 5], 7: [6], 6: [4], 5: [3], 4: [], 3: []}) def test_three_way(self): - self.check({9: [8, 4, 2], - 8: [7, 5], - 7: [6], - 6: [4], - 5: [3], - 4: [2], - 3: [2], - 2: [1], - 1: []}) + self.check( + { + 9: [8, 4, 2], + 8: [7, 5], + 7: [6], + 6: [4], + 5: [3], + 4: [2], + 3: [2], + 2: [1], + 1: [], + } + ) diff --git a/swh/model/tests/test_validators.py b/swh/model/tests/test_validators.py index 691c579e4704e96733ecebba027411b1453f7eda..784e6fee3659930d906eb306c4d019cebbf08593 100644 --- a/swh/model/tests/test_validators.py +++ b/swh/model/tests/test_validators.py @@ -16,36 +16,36 @@ def hash_data(raw_content): class TestValidators(unittest.TestCase): def setUp(self): self.valid_visible_content = { - 'status': 'visible', - 'length': 5, - 'data': b'1984\n', - 'ctime': datetime.datetime(2015, 11, 22, 16, 33, 56, - tzinfo=datetime.timezone.utc), + "status": "visible", + "length": 5, + "data": b"1984\n", + "ctime": datetime.datetime( + 2015, 11, 22, 16, 33, 56, tzinfo=datetime.timezone.utc + ), } - self.valid_visible_content.update( - hash_data(self.valid_visible_content['data'])) + self.valid_visible_content.update(hash_data(self.valid_visible_content["data"])) self.valid_absent_content = { - 'status': 'absent', - 'length': 5, - 'ctime': datetime.datetime(2015, 11, 22, 16, 33, 56, - tzinfo=datetime.timezone.utc), - 'reason': 'Content too large', - 'sha1_git': self.valid_visible_content['sha1_git'], - 'origin': 42, + "status": "absent", + "length": 5, + "ctime": datetime.datetime( + 2015, 11, 22, 16, 33, 56, tzinfo=datetime.timezone.utc + ), + "reason": "Content too large", + "sha1_git": self.valid_visible_content["sha1_git"], + "origin": 42, } self.invalid_content_hash_mismatch = self.valid_visible_content.copy() self.invalid_content_hash_mismatch.update( - hash_data(b"this is not the data you're looking for")) + hash_data(b"this is not the data you're looking for") + ) def test_validate_content(self): - self.assertTrue( - validators.validate_content(self.valid_visible_content)) + self.assertTrue(validators.validate_content(self.valid_visible_content)) - self.assertTrue( - validators.validate_content(self.valid_absent_content)) + self.assertTrue(validators.validate_content(self.valid_absent_content)) def test_validate_content_hash_mismatch(self): with self.assertRaises(exceptions.ValidationError) as cm: @@ -62,14 +62,17 @@ class TestValidators(unittest.TestCase): exc = cm.exception self.assertIsInstance(str(exc), str) - self.assertEqual(set(exc.error_dict.keys()), - {exceptions.NON_FIELD_ERRORS}) + self.assertEqual(set(exc.error_dict.keys()), {exceptions.NON_FIELD_ERRORS}) hash_mismatches = exc.error_dict[exceptions.NON_FIELD_ERRORS] self.assertIsInstance(hash_mismatches, list) self.assertEqual(len(hash_mismatches), 4) - self.assertTrue(all(mismatch.code == 'content-hash-mismatch' - for mismatch in hash_mismatches)) - self.assertEqual(set(mismatch.params['hash'] - for mismatch in hash_mismatches), - {'sha1', 'sha1_git', 'sha256', 'blake2s256'}) + self.assertTrue( + all( + mismatch.code == "content-hash-mismatch" for mismatch in hash_mismatches + ) + ) + self.assertEqual( + set(mismatch.params["hash"] for mismatch in hash_mismatches), + {"sha1", "sha1_git", "sha256", "blake2s256"}, + ) diff --git a/swh/model/toposort.py b/swh/model/toposort.py index b0a7231a5e58a70d3d6c36477fd763717fde06cf..6e4cba7e6e395181e64fa09db2a050760d8d93ec 100644 --- a/swh/model/toposort.py +++ b/swh/model/toposort.py @@ -25,8 +25,8 @@ def toposort(revision_log): # Add the roots to the processing queue. queue = collections.deque() for rev in revision_log: - parents = rev['parents'] - in_degree[rev['id']] = len(parents) + parents = rev["parents"] + in_degree[rev["id"]] = len(parents) if not parents: queue.append(rev) for parent in parents: @@ -37,7 +37,7 @@ def toposort(revision_log): while queue: rev = queue.popleft() yield rev - for child in children[rev['id']]: - in_degree[child['id']] -= 1 - if in_degree[child['id']] == 0: + for child in children[rev["id"]]: + in_degree[child["id"]] -= 1 + if in_degree[child["id"]] == 0: queue.append(child) diff --git a/swh/model/validators.py b/swh/model/validators.py index 6d2c37035b04a0185750aae7c0b1c0f02cad5584..6cd7fc110dc13d4074338aa52eede6a76366b431 100644 --- a/swh/model/validators.py +++ b/swh/model/validators.py @@ -14,17 +14,16 @@ def validate_content(content): Args: a content (dictionary) to validate.""" def validate_content_status(status): - return fields.validate_enum(status, {'absent', 'visible', 'hidden'}) + return fields.validate_enum(status, {"absent", "visible", "hidden"}) def validate_keys(content): - hashes = {'sha1', 'sha1_git', 'sha256'} + hashes = {"sha1", "sha1_git", "sha256"} errors = [] out = True - if content['status'] == 'absent': + if content["status"] == "absent": try: - out = out and fields.validate_all_keys(content, {'reason', - 'origin'}) + out = out and fields.validate_all_keys(content, {"reason", "origin"}) except ValidationError as e: errors.append(e) try: @@ -44,34 +43,36 @@ def validate_content(content): def validate_hashes(content): errors = [] - if 'data' in content: - hashes = MultiHash.from_data(content['data']).digest() + if "data" in content: + hashes = MultiHash.from_data(content["data"]).digest() for hash_type, computed_hash in hashes.items(): if hash_type not in content: continue content_hash = hash_to_bytes(content[hash_type]) if content_hash != computed_hash: - errors.append(ValidationError( - 'hash mismatch in content for hash %(hash)s', - params={'hash': hash_type}, - code='content-hash-mismatch', - )) + errors.append( + ValidationError( + "hash mismatch in content for hash %(hash)s", + params={"hash": hash_type}, + code="content-hash-mismatch", + ) + ) if errors: raise ValidationError(errors) return True content_schema = { - 'sha1': (False, fields.validate_sha1), - 'sha1_git': (False, fields.validate_sha1_git), - 'sha256': (False, fields.validate_sha256), - 'status': (True, validate_content_status), - 'length': (True, fields.validate_int), - 'ctime': (True, fields.validate_datetime), - 'reason': (False, fields.validate_str), - 'origin': (False, fields.validate_int), - 'data': (False, fields.validate_bytes), + "sha1": (False, fields.validate_sha1), + "sha1_git": (False, fields.validate_sha1_git), + "sha256": (False, fields.validate_sha256), + "status": (True, validate_content_status), + "length": (True, fields.validate_int), + "ctime": (True, fields.validate_datetime), + "reason": (False, fields.validate_str), + "origin": (False, fields.validate_int), + "data": (False, fields.validate_bytes), NON_FIELD_ERRORS: [validate_keys, validate_hashes], } - return fields.validate_against_schema('content', content_schema, content) + return fields.validate_against_schema("content", content_schema, content) diff --git a/version.txt b/version.txt index c86734c82ec6706b87399423dad77c421c15764d..0a353c8a28622d3e625745573d3b28ce93374818 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.64-0-gc7c1a57 \ No newline at end of file +v0.0.65-0-g94da010 \ No newline at end of file