diff --git a/.gitignore b/.gitignore index 303d302636f5d8ff2d1d08c9764cc601676f2f0e..43b2d615d8483732cf4b72e4855b3201d62fd2e7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,13 @@ -*.pyc -*.sw? *~ +build .coverage +dist +*.egg-info/ .eggs/ +.hypothesis +*.pyc __pycache__ -*.egg-info/ -dist +.pytest_cache +*.sw? +.tox version.txt diff --git a/MANIFEST.in b/MANIFEST.in index e7c46fcaa031efc7023aa243e3ff94ba7996ea65..99c8c1ac0f068c4ad84b7bac65a8316468c3d049 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,6 @@ +include README.md include Makefile include requirements.txt include requirements-swh.txt include version.txt +recursive-include swh/model/tests/data *.tgz diff --git a/PKG-INFO b/PKG-INFO index 653b8da334326f55a86ea9c15ce5b7d3508e2f22..b07e4b866fcb26f1f4f87d4f7060e879115afe6b 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,10 +1,38 @@ -Metadata-Version: 1.0 +Metadata-Version: 2.1 Name: swh.model -Version: 0.0.27 +Version: 0.0.28 Summary: Software Heritage data model Home-page: https://forge.softwareheritage.org/diffusion/DMOD/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN -Description: UNKNOWN +Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest +Project-URL: Funding, https://www.softwareheritage.org/donate +Project-URL: Source, https://forge.softwareheritage.org/source/swh-model +Description: swh-model + ========= + + Implementation of the Data model of the Software Heritage project, used to + archive source code artifacts. + + This module defines the notion of Persistent Identifier (PID) and provides + tools to compute them: + + ```sh + $ swh-identify fork.c kmod.c sched/deadline.c + swh:1:cnt:2e391c754ae730bd2d8520c2ab497c403220c6e3 fork.c + swh:1:cnt:0277d1216f80ae1adeed84a686ed34c9b2931fc2 kmod.c + swh:1:cnt:57b939c81bce5d06fa587df8915f05affbe22b82 sched/deadline.c + + $ swh-identify --no-filename /usr/src/linux/kernel/ + swh:1:dir:f9f858a48d663b3809c9e2f336412717496202ab + ``` + Platform: UNKNOWN +Classifier: Programming Language :: Python :: 3 +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) +Classifier: Operating System :: OS Independent +Classifier: Development Status :: 5 - Production/Stable +Description-Content-Type: text/markdown +Provides-Extra: testing diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f26f274ca14c90dc44905f5b27b8502e27d71f53 --- /dev/null +++ b/README.md @@ -0,0 +1,18 @@ +swh-model +========= + +Implementation of the Data model of the Software Heritage project, used to +archive source code artifacts. + +This module defines the notion of Persistent Identifier (PID) and provides +tools to compute them: + +```sh + $ swh-identify fork.c kmod.c sched/deadline.c + swh:1:cnt:2e391c754ae730bd2d8520c2ab497c403220c6e3 fork.c + swh:1:cnt:0277d1216f80ae1adeed84a686ed34c9b2931fc2 kmod.c + swh:1:cnt:57b939c81bce5d06fa587df8915f05affbe22b82 sched/deadline.c + + $ swh-identify --no-filename /usr/src/linux/kernel/ + swh:1:dir:f9f858a48d663b3809c9e2f336412717496202ab +``` diff --git a/bin/swh-hashtree b/bin/swh-hashtree new file mode 100755 index 0000000000000000000000000000000000000000..faf258fddb4f8f6d21fc13eaee34c9cc077aa27d --- /dev/null +++ b/bin/swh-hashtree @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 + +# Use sample: +# swh-hashtree --path . --ignore '.svn' --ignore '.git-svn' \ +# --ignore-empty-folders +# 38f8d2c3a951f6b94007896d0981077e48bbd702 + +import click +import os + +from swh.model import from_disk, hashutil + + +def combine_filters(*filters): + """Combine several ignore filters""" + if len(filters) == 0: + return from_disk.accept_all_directories + elif len(filters) == 1: + return filters[0] + + def combined_filter(*args, **kwargs): + return all(filter(*args, **kwargs) for filter in filters) + + return combined_filter + + +@click.command() +@click.option('--path', default='.', + help='Optional path to hash.') +@click.option('--ignore-empty-folder', is_flag=True, default=False, + help='Ignore empty folder.') +@click.option('--ignore', multiple=True, + help='Ignore pattern.') +def main(path, ignore_empty_folder=False, ignore=None): + + filters = [] + if ignore_empty_folder: + filters.append(from_disk.ignore_empty_directories) + if ignore: + filters.append( + from_disk.ignore_named_directories( + [os.fsencode(name) for name in ignore] + ) + ) + + try: + d = from_disk.Directory.from_disk(path=os.fsencode(path), + dir_filter=combine_filters(*filters)) + hash = d.hash + except Exception as e: + print(e) + return + else: + print(hashutil.hash_to_hex(hash)) + + +if __name__ == '__main__': + main() diff --git a/docs/Makefile b/docs/Makefile index c30c50ab01ec91da18f0718b7cfd052f046c2e44..b97c7532e5b946df72b8641f22f6e3e2ba84602c 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -1 +1,2 @@ include ../../swh-docs/Makefile.sphinx +-include Makefile.local diff --git a/docs/Makefile.local b/docs/Makefile.local new file mode 100644 index 0000000000000000000000000000000000000000..352ffd352651af5bb929bc5b2c081e77cb42c401 --- /dev/null +++ b/docs/Makefile.local @@ -0,0 +1,14 @@ +sphinx/html: images +sphinx/clean: clean-images + +images: + make -C images/ +clean-images: + make -C images/ clean + +.PHONY: images clean-images + + +# Local Variables: +# mode: makefile +# End: diff --git a/docs/images/Makefile b/docs/images/Makefile index 2bc794e965ba7be048c00263d49e6a8c0769a5b5..ddc859daf3b0c1c6888808a395fe961751fefcd8 100644 --- a/docs/images/Makefile +++ b/docs/images/Makefile @@ -7,19 +7,11 @@ BUILD_TARGETS += $(MERKLE_DAG) all: $(BUILD_TARGETS) -# dia exporters - -%.eps: %.dia - dia -t eps --export $@ $< - %.svg: %.dia - dia -t svg --export $@ $< - -# generic converters - -%.pdf: %.eps - epstopdf $< + inkscape -l $@ $< +%.pdf: %.dia + inkscape -A $@ $< clean: -rm -f $(BUILD_TARGETS) diff --git a/docs/index.rst b/docs/index.rst index 74756e7522401bb4e7fa63d9231fc401524f1c6f..55ab5fd51185bd109cff0e0dc0532096726e6b02 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,7 +1,10 @@ .. _swh-model: -Software Heritage - Development Documentation -============================================= +Software Heritage - Data model +============================== + +Implementation of the :ref:`data-model` to archive source code artifacts. + .. toctree:: :maxdepth: 2 diff --git a/docs/persistent-identifiers.rst b/docs/persistent-identifiers.rst index 29bf797e144d0a0ba19293caf3aca6f0497b55f9..89b0365d264e70700cbb8298aee0b8b9942b28cb 100644 --- a/docs/persistent-identifiers.rst +++ b/docs/persistent-identifiers.rst @@ -128,23 +128,6 @@ Examples of the entire Darktable Git repository taken on 4 May 2017 from GitHub -Resolution ----------- - -Persistent identifiers can be resolved using the Software Heritage Web -application (see :py:mod:`swh.web`). - -In particular, the root endpoint ``/`` can be given a persistent identifier and -will lead to the browsing page of the corresponding object, like this: -``https://archive.softwareheritage.org/<identifier>``. For example: - -* `<https://archive.softwareheritage.org/swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2>`_ -* `<https://archive.softwareheritage.org/swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505>`_ -* `<https://archive.softwareheritage.org/swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d>`_ -* `<https://archive.softwareheritage.org/swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f>`_ -* `<https://archive.softwareheritage.org/swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453>`_ - - Contextual information ====================== @@ -191,3 +174,54 @@ The following piece of contextual information are supported: * software origin: where a given object has been found or observed in the wild, as the URI that was used by Software Heritage to ingest the object into the archive + + +Resolution +========== + + +Dedicated resolvers +------------------- + +Persistent identifiers can be resolved using the Software Heritage Web +application (see :py:mod:`swh.web`). In particular, the **root endpoint** +``/`` can be given a persistent identifier and will lead to the browsing page +of the corresponding object, like this: +``https://archive.softwareheritage.org/<identifier>``. + +A **dedicated** ``/resolve`` **endpoint** of the HTTP API is also available to +explicitly request persistent identifier resolution; see: +:http:get:`/api/1/resolve/(swh_id)/`. + +Examples: + +* `<https://archive.softwareheritage.org/swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2>`_ +* `<https://archive.softwareheritage.org/swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505>`_ +* `<https://archive.softwareheritage.org/api/1/resolve/swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d>`_ +* `<https://archive.softwareheritage.org/api/1/resolve/swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f>`_ +* `<https://archive.softwareheritage.org/api/1/resolve/swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453>`_ + + +External resolvers +------------------ + +The following **independent resolvers** support resolution of Software +Heritage persistent identifiers: + +* `Identifiers.org <https://identifiers.org>`_; see: + `<http://identifiers.org/swh/>`_ (registry identifier `MIR:00000655 + <https://www.ebi.ac.uk/miriam/main/datatypes/MIR:00000655>`_). + +* `Name-to-Thing (N2T) <https://n2t.net/>`_ + +Examples: + +* `<https://identifiers.org/swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2>`_ +* `<https://identifiers.org/swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505>`_ +* `<https://identifiers.org/swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d>`_ +* `<https://n2t.net/swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f>`_ +* `<https://n2t.net/swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453>`_ + +Note that resolution via Identifiers.org does not support contextual +information, due to `syntactic incompatibilities +<http://identifiers.org/documentation#custom_requests>`_. diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000000000000000000000000000000000000..e86f7f4e0aa54f919816a112eec1963b5a780412 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +addopts = --doctest-modules +norecursedirs = docs diff --git a/requirements-test.txt b/requirements-test.txt new file mode 100644 index 0000000000000000000000000000000000000000..e079f8a6038dd2dc8512967540f96ee0de172067 --- /dev/null +++ b/requirements-test.txt @@ -0,0 +1 @@ +pytest diff --git a/setup.py b/setup.py old mode 100644 new mode 100755 index 7e4a47ceb5549af703f343344c5e3161177d948b..c28e4bf0ad4ef5a6931ae5e986ab0b66b64569b0 --- a/setup.py +++ b/setup.py @@ -1,21 +1,42 @@ -import hashlib +#!/usr/bin/env python3 +# Copyright (C) 2015-2018 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information from setuptools import setup, find_packages +import hashlib + +from os import path +from io import open + +here = path.abspath(path.dirname(__file__)) +# Get the long description from the README file +with open(path.join(here, 'README.md'), encoding='utf-8') as f: + long_description = f.read() + + +def parse_requirements(name=None): + if name: + reqf = 'requirements-%s.txt' % name + else: + reqf = 'requirements.txt' -def parse_requirements(): requirements = [] - for reqf in ('requirements.txt', 'requirements-swh.txt'): - with open(reqf) as f: - for line in f.readlines(): - line = line.strip() - if not line or line.startswith('#'): - continue - requirements.append(line) + if not path.exists(reqf): + return requirements + + with open(reqf) as f: + for line in f.readlines(): + line = line.strip() + if not line or line.startswith('#'): + continue + requirements.append(line) return requirements -extra_requirements = [] +blake2_requirements = [] pyblake2_hash_sets = [ # Built-in implementation in Python 3.6+ @@ -32,22 +53,37 @@ for pyblake2_hashes in pyblake2_hash_sets: else: # None of the possible sets of blake2 hashes are available. # use pyblake2 instead - extra_requirements.append('pyblake2') + blake2_requirements.append('pyblake2') setup( name='swh.model', description='Software Heritage data model', + long_description=long_description, + long_description_content_type='text/markdown', author='Software Heritage developers', author_email='swh-devel@inria.fr', url='https://forge.softwareheritage.org/diffusion/DMOD/', - packages=find_packages(), # packages's modules - scripts=[], # scripts to package - install_requires=parse_requirements() + extra_requirements, + packages=find_packages(), + setup_requires=['vcversioner'], + install_requires=(parse_requirements() + parse_requirements('swh') + + blake2_requirements), + extras_require={'testing': parse_requirements('test')}, + vcversioner={}, + include_package_data=True, entry_points=''' [console_scripts] swh-identify=swh.model.cli:identify ''', - setup_requires=['vcversioner'], - vcversioner={}, - include_package_data=True, + classifiers=[ + "Programming Language :: Python :: 3", + "Intended Audience :: Developers", + "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", + "Operating System :: OS Independent", + "Development Status :: 5 - Production/Stable", + ], + project_urls={ + 'Bug Reports': 'https://forge.softwareheritage.org/maniphest', + 'Funding': 'https://www.softwareheritage.org/donate', + 'Source': 'https://forge.softwareheritage.org/source/swh-model', + }, ) diff --git a/swh.model.egg-info/PKG-INFO b/swh.model.egg-info/PKG-INFO index 653b8da334326f55a86ea9c15ce5b7d3508e2f22..b07e4b866fcb26f1f4f87d4f7060e879115afe6b 100644 --- a/swh.model.egg-info/PKG-INFO +++ b/swh.model.egg-info/PKG-INFO @@ -1,10 +1,38 @@ -Metadata-Version: 1.0 +Metadata-Version: 2.1 Name: swh.model -Version: 0.0.27 +Version: 0.0.28 Summary: Software Heritage data model Home-page: https://forge.softwareheritage.org/diffusion/DMOD/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN -Description: UNKNOWN +Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest +Project-URL: Funding, https://www.softwareheritage.org/donate +Project-URL: Source, https://forge.softwareheritage.org/source/swh-model +Description: swh-model + ========= + + Implementation of the Data model of the Software Heritage project, used to + archive source code artifacts. + + This module defines the notion of Persistent Identifier (PID) and provides + tools to compute them: + + ```sh + $ swh-identify fork.c kmod.c sched/deadline.c + swh:1:cnt:2e391c754ae730bd2d8520c2ab497c403220c6e3 fork.c + swh:1:cnt:0277d1216f80ae1adeed84a686ed34c9b2931fc2 kmod.c + swh:1:cnt:57b939c81bce5d06fa587df8915f05affbe22b82 sched/deadline.c + + $ swh-identify --no-filename /usr/src/linux/kernel/ + swh:1:dir:f9f858a48d663b3809c9e2f336412717496202ab + ``` + Platform: UNKNOWN +Classifier: Programming Language :: Python :: 3 +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) +Classifier: Operating System :: OS Independent +Classifier: Development Status :: 5 - Production/Stable +Description-Content-Type: text/markdown +Provides-Extra: testing diff --git a/swh.model.egg-info/SOURCES.txt b/swh.model.egg-info/SOURCES.txt index f6a2f94098b482c8b10045483d3c375f3165ad33..81e3d7dfcf82c118bb475c93c2fbef1a95c992d4 100644 --- a/swh.model.egg-info/SOURCES.txt +++ b/swh.model.egg-info/SOURCES.txt @@ -4,11 +4,16 @@ LICENSE MANIFEST.in Makefile Makefile.local +README.md +pytest.ini requirements-swh.txt +requirements-test.txt requirements.txt setup.py +tox.ini version.txt bin/git-revhash +bin/swh-hashtree bin/swh-revhash debian/changelog debian/compat @@ -18,6 +23,7 @@ debian/rules debian/source/format docs/.gitignore docs/Makefile +docs/Makefile.local docs/conf.py docs/data-model.rst docs/index.rst @@ -56,6 +62,7 @@ swh/model/tests/test_identifiers.py swh/model/tests/test_merkle.py swh/model/tests/test_toposort.py swh/model/tests/test_validators.py +swh/model/tests/data/dir-folders/sample-folder.tgz swh/model/tests/fields/__init__.py swh/model/tests/fields/test_compound.py swh/model/tests/fields/test_hashes.py diff --git a/swh.model.egg-info/requires.txt b/swh.model.egg-info/requires.txt index dbcd3082d3efe925da6522cc818c882fb36e74f8..091cefe89235e78d7ba4a9f88cb34a58f2abbf52 100644 --- a/swh.model.egg-info/requires.txt +++ b/swh.model.egg-info/requires.txt @@ -1,2 +1,5 @@ Click vcversioner + +[testing] +pytest diff --git a/swh/model/from_disk.py b/swh/model/from_disk.py index f9f37290cbf91aaf177b366496d84cda4ead46f0..bfd7c7cd026f271be37aecca9d2435b683304403 100644 --- a/swh/model/from_disk.py +++ b/swh/model/from_disk.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017 The Software Heritage developers +# Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -7,7 +7,7 @@ import enum import os import stat -from . import hashutil +from .hashutil import MultiHash, HASH_BLOCK_SIZE from .merkle import MerkleLeaf, MerkleNode from .identifiers import ( directory_identifier, @@ -77,7 +77,7 @@ class Content(MerkleLeaf): mode (int): a file mode (passed to :func:`mode_to_perms`) data (bytes): raw contents of the file """ - ret = hashutil.hash_data(data) + ret = MultiHash.from_data(data).digest() ret['length'] = len(data) ret['perms'] = mode_to_perms(mode) ret['data'] = data @@ -91,8 +91,8 @@ class Content(MerkleLeaf): @classmethod def from_file(cls, *, path, data=False, save_path=False): - """Compute the Software Heritage content entry corresponding to an on-disk - file. + """Compute the Software Heritage content entry corresponding to an + on-disk file. The returned dictionary contains keys useful for both: - loading the content in the archive (hashes, `length`) @@ -103,6 +103,7 @@ class Content(MerkleLeaf): content entry data (bool): add the file data to the entry save_path (bool): add the file path to the entry + """ file_stat = os.lstat(path) mode = file_stat.st_mode @@ -117,17 +118,19 @@ class Content(MerkleLeaf): length = file_stat.st_size if not data: - ret = hashutil.hash_path(path) + ret = MultiHash.from_path(path).digest() else: + h = MultiHash(length=length) chunks = [] - - def append_chunk(x, chunks=chunks): - chunks.append(x) - with open(path, 'rb') as fobj: - ret = hashutil.hash_file(fobj, length=length, - chunk_cb=append_chunk) - + while True: + chunk = fobj.read(HASH_BLOCK_SIZE) + if not chunk: + break + h.update(chunk) + chunks.append(chunk) + + ret = h.digest() ret['data'] = b''.join(chunks) if save_path: diff --git a/swh/model/hashutil.py b/swh/model/hashutil.py index d8249bc70b5d5b81286a7d945a3fb07d23ea969c..e58f6870fc121ddd674253a918c8849de822589f 100644 --- a/swh/model/hashutil.py +++ b/swh/model/hashutil.py @@ -30,25 +30,26 @@ Basic usage examples: - To compute length, integrate the length to the set of algorithms to compute, for example: - h = MultiHash(hash_names=set({'length'}).union(DEFAULT_ALGORITHMS)) - with open(filepath, 'rb') as f: - h.update(f.read(HASH_BLOCK_SIZE)) - hashes = h.digest() # returns a dict of {hash_algo_name: hash_in_bytes} + .. code-block:: python - for chunk in - # then use h as you would + h = MultiHash(hash_names=set({'length'}).union(DEFAULT_ALGORITHMS)) + with open(filepath, 'rb') as f: + h.update(f.read(HASH_BLOCK_SIZE)) + hashes = h.digest() # returns a dict of {hash_algo_name: hash_in_bytes} - Write alongside computing hashing algorithms (from a stream), example: - h = MultiHash(length=length) - with open(filepath, 'wb') as f: - for chunk in r.iter_content(): # r a stream of sort - h.update(chunk) - f.write(chunk) - hashes = h.hexdigest() # returns a dict of {hash_algo_name: hash_in_hex} + .. code-block:: python + + h = MultiHash(length=length) + with open(filepath, 'wb') as f: + for chunk in r.iter_content(): # r a stream of sort + h.update(chunk) + f.write(chunk) + hashes = h.hexdigest() # returns a dict of {hash_algo_name: hash_in_hex} - Note: Prior to this, we would have to use chunk_cb (cf. hash_file, - hash_path) + Note: Prior to this, we would have to use chunk_cb (cf. hash_file, + hash_path) This module also defines the following (deprecated) hashing functions: @@ -117,28 +118,25 @@ class MultiHash: ret.track_length = track_length @classmethod - def from_file(cls, file, hash_names=DEFAULT_ALGORITHMS, length=None): + def from_file(cls, fobj, hash_names=DEFAULT_ALGORITHMS, length=None): ret = cls(length=length, hash_names=hash_names) - for chunk in file: + while True: + chunk = fobj.read(HASH_BLOCK_SIZE) + if not chunk: + break ret.update(chunk) return ret @classmethod - def from_path(cls, path, hash_names=DEFAULT_ALGORITHMS, length=None, - track_length=True): - if not length: - length = os.path.getsize(path) + def from_path(cls, path, hash_names=DEFAULT_ALGORITHMS): + length = os.path.getsize(path) with open(path, 'rb') as f: ret = cls.from_file(f, hash_names=hash_names, length=length) - # For compatibility reason with `hash_path` - if track_length: - ret.state['length'] = length return ret @classmethod - def from_data(cls, data, hash_names=DEFAULT_ALGORITHMS, length=None): - if not length: - length = len(data) + def from_data(cls, data, hash_names=DEFAULT_ALGORITHMS): + length = len(data) fobj = BytesIO(data) return cls.from_file(fobj, hash_names=hash_names, length=length) diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py index e7608e978af8a002ea2afc0d106a4c69da9c1189..083efd4504c7dbad38a26d4e9aaa50e7369ba650 100644 --- a/swh/model/identifiers.py +++ b/swh/model/identifiers.py @@ -11,8 +11,7 @@ from functools import lru_cache from .exceptions import ValidationError from .fields.hashes import validate_sha1 -from .hashutil import hash_data, hash_git_data, DEFAULT_ALGORITHMS -from .hashutil import hash_to_hex +from .hashutil import hash_git_data, hash_to_hex, MultiHash SNAPSHOT = 'snapshot' @@ -104,7 +103,7 @@ def content_identifier(content): """ - return hash_data(content['data'], DEFAULT_ALGORITHMS) + return MultiHash.from_data(content['data']).digest() def _sort_key(entry): diff --git a/swh/model/tests/data/dir-folders/sample-folder.tgz b/swh/model/tests/data/dir-folders/sample-folder.tgz new file mode 100644 index 0000000000000000000000000000000000000000..cc848944a0d3e71d287027347e25467e61b07428 Binary files /dev/null and b/swh/model/tests/data/dir-folders/sample-folder.tgz differ diff --git a/swh/model/tests/fields/test_compound.py b/swh/model/tests/fields/test_compound.py index b6e13b6c33a7f975102d2faea9635e12f0e07ca8..dffbb043139221d7ed87197f368a4a2c5349f475 100644 --- a/swh/model/tests/fields/test_compound.py +++ b/swh/model/tests/fields/test_compound.py @@ -6,9 +6,7 @@ import datetime import unittest -from nose.tools import istest - -from swh.model.exceptions import ValidationError, NON_FIELD_ERRORS +from swh.model.exceptions import NON_FIELD_ERRORS, ValidationError from swh.model.fields import compound, simple @@ -63,8 +61,7 @@ class ValidateCompound(unittest.TestCase): self.present_keys = set(self.test_value) self.missing_keys = {'missingkey1', 'missingkey2'} - @istest - def validate_any_key(self): + def test_validate_any_key(self): self.assertTrue( compound.validate_any_key(self.test_value, self.present_keys)) @@ -72,8 +69,7 @@ class ValidateCompound(unittest.TestCase): compound.validate_any_key(self.test_value, self.present_keys | self.missing_keys)) - @istest - def validate_any_key_missing(self): + def test_validate_any_key_missing(self): with self.assertRaises(ValidationError) as cm: compound.validate_any_key(self.test_value, self.missing_keys) @@ -83,13 +79,11 @@ class ValidateCompound(unittest.TestCase): self.assertEqual(exc.params['missing_fields'], ', '.join(sorted(self.missing_keys))) - @istest - def validate_all_keys(self): + def test_validate_all_keys(self): self.assertTrue( compound.validate_all_keys(self.test_value, self.present_keys)) - @istest - def validate_all_keys_missing(self): + def test_validate_all_keys_missing(self): with self.assertRaises(ValidationError) as cm: compound.validate_all_keys(self.test_value, self.missing_keys) @@ -109,14 +103,12 @@ class ValidateCompound(unittest.TestCase): self.assertEqual(exc.params['missing_fields'], ', '.join(sorted(self.missing_keys))) - @istest - def validate_against_schema(self): + def test_validate_against_schema(self): self.assertTrue( compound.validate_against_schema(self.test_model, self.test_schema, self.test_value)) - @istest - def validate_against_schema_wrong_type(self): + def test_validate_against_schema_wrong_type(self): with self.assertRaises(ValidationError) as cm: compound.validate_against_schema(self.test_model, self.test_schema, self.test_value_wrong_type) @@ -128,8 +120,7 @@ class ValidateCompound(unittest.TestCase): self.assertEqual(exc.params['type'], self.test_value_wrong_type.__class__.__name__) - @istest - def validate_against_schema_mandatory_keys(self): + def test_validate_against_schema_mandatory_keys(self): with self.assertRaises(ValidationError) as cm: compound.validate_against_schema(self.test_model, self.test_schema, self.test_value_missing) @@ -150,8 +141,7 @@ class ValidateCompound(unittest.TestCase): self.assertEqual(nested.code, 'model-field-mandatory') self.assertEqual(nested.params['field'], key) - @istest - def validate_against_schema_whole_schema_shortcut_previous_error(self): + def test_validate_whole_schema_shortcut_previous_error(self): with self.assertRaises(ValidationError) as cm: compound.validate_against_schema( self.test_model, @@ -163,8 +153,7 @@ class ValidateCompound(unittest.TestCase): self.assertIsInstance(str(exc), str) self.assertNotIn(NON_FIELD_ERRORS, exc.error_dict) - @istest - def validate_against_schema_whole_schema(self): + def test_validate_whole_schema(self): with self.assertRaises(ValidationError) as cm: compound.validate_against_schema( self.test_model, @@ -179,20 +168,19 @@ class ValidateCompound(unittest.TestCase): exc = cm.exception self.assertIsInstance(str(exc), str) - self.assertEquals(set(exc.error_dict.keys()), {NON_FIELD_ERRORS}) + self.assertEqual(set(exc.error_dict.keys()), {NON_FIELD_ERRORS}) non_field_errors = exc.error_dict[NON_FIELD_ERRORS] self.assertIsInstance(non_field_errors, list) - self.assertEquals(len(non_field_errors), 1) + self.assertEqual(len(non_field_errors), 1) nested = non_field_errors[0] self.assertIsInstance(nested, ValidationError) - self.assertEquals(nested.code, 'model-validation-failed') - self.assertEquals(nested.params['model'], self.test_model) - self.assertEquals(nested.params['validator'], 'validate_never') + self.assertEqual(nested.code, 'model-validation-failed') + self.assertEqual(nested.params['model'], self.test_model) + self.assertEqual(nested.params['validator'], 'validate_never') - @istest - def validate_against_schema_field_error(self): + def test_validate_against_schema_field_error(self): with self.assertRaises(ValidationError) as cm: compound.validate_against_schema(self.test_model, self.test_schema, self.test_value_str_error) @@ -204,18 +192,17 @@ class ValidateCompound(unittest.TestCase): exc = cm.exception self.assertIsInstance(str(exc), str) - self.assertEquals(set(exc.error_dict.keys()), {'str'}) + self.assertEqual(set(exc.error_dict.keys()), {'str'}) str_errors = exc.error_dict['str'] self.assertIsInstance(str_errors, list) - self.assertEquals(len(str_errors), 1) + self.assertEqual(len(str_errors), 1) nested = str_errors[0] self.assertIsInstance(nested, ValidationError) - self.assertEquals(nested.code, 'unexpected-type') + self.assertEqual(nested.code, 'unexpected-type') - @istest - def validate_against_schema_field_failed(self): + def test_validate_against_schema_field_failed(self): with self.assertRaises(ValidationError) as cm: compound.validate_against_schema(self.test_model, self.test_schema_field_failed, @@ -228,14 +215,14 @@ class ValidateCompound(unittest.TestCase): exc = cm.exception self.assertIsInstance(str(exc), str) - self.assertEquals(set(exc.error_dict.keys()), {'int'}) + self.assertEqual(set(exc.error_dict.keys()), {'int'}) int_errors = exc.error_dict['int'] self.assertIsInstance(int_errors, list) - self.assertEquals(len(int_errors), 1) + self.assertEqual(len(int_errors), 1) nested = int_errors[0] self.assertIsInstance(nested, ValidationError) - self.assertEquals(nested.code, 'field-validation-failed') - self.assertEquals(nested.params['validator'], 'validate_never') - self.assertEquals(nested.params['field'], 'int') + self.assertEqual(nested.code, 'field-validation-failed') + self.assertEqual(nested.params['validator'], 'validate_never') + self.assertEqual(nested.params['field'], 'int') diff --git a/swh/model/tests/fields/test_hashes.py b/swh/model/tests/fields/test_hashes.py index 0ef303f034bceb0611c139a6b9191f4fd6dfb0d8..7ce0b78be04485a3f3cd11390eddcac2e112da5f 100644 --- a/swh/model/tests/fields/test_hashes.py +++ b/swh/model/tests/fields/test_hashes.py @@ -5,8 +5,6 @@ import unittest -from nose.tools import istest - from swh.model.exceptions import ValidationError from swh.model.fields import hashes @@ -32,18 +30,15 @@ class ValidateHashes(unittest.TestCase): self.bad_hash = object() - @istest - def valid_bytes_hash(self): + def test_valid_bytes_hash(self): for hash_type, value in self.valid_byte_hashes.items(): self.assertTrue(hashes.validate_hash(value, hash_type)) - @istest - def valid_str_hash(self): + def test_valid_str_hash(self): for hash_type, value in self.valid_str_hashes.items(): self.assertTrue(hashes.validate_hash(value, hash_type)) - @istest - def invalid_hash_type(self): + def test_invalid_hash_type(self): hash_type = 'unknown_hash_type' with self.assertRaises(ValidationError) as cm: hashes.validate_hash(self.valid_str_hashes['sha1'], hash_type) @@ -56,8 +51,7 @@ class ValidateHashes(unittest.TestCase): self.assertIn('Unexpected hash type', str(exc)) self.assertIn(hash_type, str(exc)) - @istest - def invalid_bytes_len(self): + def test_invalid_bytes_len(self): for hash_type, value in self.valid_byte_hashes.items(): value = value + b'\x00\x01' with self.assertRaises(ValidationError) as cm: @@ -72,8 +66,7 @@ class ValidateHashes(unittest.TestCase): self.assertIn('Unexpected length', str(exc)) self.assertIn(str(len(value)), str(exc)) - @istest - def invalid_str_len(self): + def test_invalid_str_len(self): for hash_type, value in self.valid_str_hashes.items(): value = value + '0001' with self.assertRaises(ValidationError) as cm: @@ -88,8 +81,7 @@ class ValidateHashes(unittest.TestCase): self.assertIn('Unexpected length', str(exc)) self.assertIn(str(len(value)), str(exc)) - @istest - def invalid_str_contents(self): + def test_invalid_str_contents(self): for hash_type, value in self.valid_str_hashes.items(): value = '\xa2' + value[1:-1] + '\xc3' with self.assertRaises(ValidationError) as cm: @@ -105,8 +97,7 @@ class ValidateHashes(unittest.TestCase): self.assertIn('\xc3', str(exc)) self.assertIn('\xa2', str(exc)) - @istest - def invalid_value_type(self): + def test_invalid_value_type(self): with self.assertRaises(ValidationError) as cm: hashes.validate_hash(self.bad_hash, 'sha1') @@ -118,8 +109,7 @@ class ValidateHashes(unittest.TestCase): self.assertIn('Unexpected type', str(exc)) self.assertIn(self.bad_hash.__class__.__name__, str(exc)) - @istest - def validate_sha1(self): + def test_validate_sha1(self): self.assertTrue(hashes.validate_sha1(self.valid_byte_hashes['sha1'])) self.assertTrue(hashes.validate_sha1(self.valid_str_hashes['sha1'])) @@ -131,8 +121,7 @@ class ValidateHashes(unittest.TestCase): self.assertEqual(exc.code, 'unexpected-hash-value-type') self.assertEqual(exc.params['type'], self.bad_hash.__class__.__name__) - @istest - def validate_sha1_git(self): + def test_validate_sha1_git(self): self.assertTrue( hashes.validate_sha1_git(self.valid_byte_hashes['sha1_git'])) self.assertTrue( @@ -146,8 +135,7 @@ class ValidateHashes(unittest.TestCase): self.assertEqual(exc.code, 'unexpected-hash-value-type') self.assertEqual(exc.params['type'], self.bad_hash.__class__.__name__) - @istest - def validate_sha256(self): + def test_validate_sha256(self): self.assertTrue( hashes.validate_sha256(self.valid_byte_hashes['sha256'])) self.assertTrue( diff --git a/swh/model/tests/fields/test_simple.py b/swh/model/tests/fields/test_simple.py index 6fa2918d8f76954b7b69efb1eadf734503e5befd..ab5e262eceba4eb08fe03f9df667534cc16379f2 100644 --- a/swh/model/tests/fields/test_simple.py +++ b/swh/model/tests/fields/test_simple.py @@ -6,8 +6,6 @@ import datetime import unittest -from nose.tools import istest - from swh.model.exceptions import ValidationError from swh.model.fields import simple @@ -29,12 +27,10 @@ class ValidateSimple(unittest.TestCase): tzinfo=datetime.timezone.utc) self.invalid_datetime_notz = datetime.datetime(1999, 1, 1, 12, 0, 0) - @istest - def validate_int(self): + def test_validate_int(self): self.assertTrue(simple.validate_int(self.valid_int)) - @istest - def validate_int_invalid_type(self): + def test_validate_int_invalid_type(self): with self.assertRaises(ValidationError) as cm: simple.validate_int(self.valid_str) @@ -44,12 +40,10 @@ class ValidateSimple(unittest.TestCase): self.assertEqual(exc.params['expected_type'], 'Integral') self.assertEqual(exc.params['type'], 'str') - @istest - def validate_str(self): + def test_validate_str(self): self.assertTrue(simple.validate_str(self.valid_str)) - @istest - def validate_str_invalid_type(self): + def test_validate_str_invalid_type(self): with self.assertRaises(ValidationError) as cm: simple.validate_str(self.valid_int) @@ -68,12 +62,10 @@ class ValidateSimple(unittest.TestCase): self.assertEqual(exc.params['expected_type'], 'str') self.assertEqual(exc.params['type'], 'bytes') - @istest - def validate_bytes(self): + def test_validate_bytes(self): self.assertTrue(simple.validate_bytes(self.valid_bytes)) - @istest - def validate_bytes_invalid_type(self): + def test_validate_bytes_invalid_type(self): with self.assertRaises(ValidationError) as cm: simple.validate_bytes(self.valid_int) @@ -92,14 +84,12 @@ class ValidateSimple(unittest.TestCase): self.assertEqual(exc.params['expected_type'], 'bytes') self.assertEqual(exc.params['type'], 'str') - @istest - def validate_datetime(self): + def test_validate_datetime(self): self.assertTrue(simple.validate_datetime(self.valid_datetime)) self.assertTrue(simple.validate_datetime(self.valid_int)) self.assertTrue(simple.validate_datetime(self.valid_real)) - @istest - def validate_datetime_invalid_type(self): + def test_validate_datetime_invalid_type(self): with self.assertRaises(ValidationError) as cm: simple.validate_datetime(self.valid_str) @@ -109,8 +99,7 @@ class ValidateSimple(unittest.TestCase): self.assertEqual(exc.params['expected_type'], 'one of datetime, Real') self.assertEqual(exc.params['type'], 'str') - @istest - def validate_datetime_invalide_tz(self): + def test_validate_datetime_invalide_tz(self): with self.assertRaises(ValidationError) as cm: simple.validate_datetime(self.invalid_datetime_notz) @@ -118,13 +107,11 @@ class ValidateSimple(unittest.TestCase): self.assertIsInstance(str(exc), str) self.assertEqual(exc.code, 'datetime-without-tzinfo') - @istest - def validate_enum(self): + def test_validate_enum(self): for value in self.enum_values: self.assertTrue(simple.validate_enum(value, self.enum_values)) - @istest - def validate_enum_invalid_value(self): + def test_validate_enum_invalid_value(self): with self.assertRaises(ValidationError) as cm: simple.validate_enum(self.invalid_enum_value, self.enum_values) diff --git a/swh/model/tests/test_cli.py b/swh/model/tests/test_cli.py index 9e31a4a397402521872eed8f0d0465160e5f6191..e4232fe2c4b4a70aeac2fb8991ab883e6b83b98a 100644 --- a/swh/model/tests/test_cli.py +++ b/swh/model/tests/test_cli.py @@ -8,21 +8,21 @@ import tempfile import unittest from click.testing import CliRunner -from nose.plugins.attrib import attr +import pytest from swh.model import cli -from swh.model.tests.test_from_disk import DataMixin from swh.model.hashutil import hash_to_hex +from swh.model.tests.test_from_disk import DataMixin -@attr('fs') +@pytest.mark.fs class TestIdentify(DataMixin, unittest.TestCase): def setUp(self): super().setUp() self.runner = CliRunner() - def assertPidOK(self, result, pid): + def assertPidOK(self, result, pid): # noqa: N802 self.assertEqual(result.exit_code, 0) self.assertEqual(result.output.split()[0], pid) diff --git a/swh/model/tests/test_from_disk.py b/swh/model/tests/test_from_disk.py index 8e568ec25d21985c00e8e5243992ea0bb753aa19..30f543d639f570be72c24bb55f7bb604cba5f2cd 100644 --- a/swh/model/tests/test_from_disk.py +++ b/swh/model/tests/test_from_disk.py @@ -8,12 +8,14 @@ import tarfile import tempfile import unittest -from nose.plugins.attrib import attr +import pytest from swh.model import from_disk -from swh.model.from_disk import Content, Directory, DentryPerms +from swh.model.from_disk import Content, DentryPerms, Directory from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex +TEST_DATA = os.path.join(os.path.dirname(__file__), 'data') + class ModeToPerms(unittest.TestCase): def setUp(self): @@ -466,11 +468,7 @@ class DataMixin: fn(path) def make_from_tarball(self, directory): - tarball = os.path.join(os.path.dirname(__file__), - '../../../..', - 'swh-storage-testdata', - 'dir-folders', - 'sample-folder.tgz') + tarball = os.path.join(TEST_DATA, 'dir-folders', 'sample-folder.tgz') with tarfile.open(tarball, 'r:gz') as f: f.extractall(os.fsdecode(directory)) @@ -687,7 +685,7 @@ class DirectoryToObjects(DataMixin, unittest.TestCase): + 1) -@attr('fs') +@pytest.mark.fs class TarballTest(DataMixin, unittest.TestCase): def setUp(self): super().setUp() diff --git a/swh/model/tests/test_hashutil.py b/swh/model/tests/test_hashutil.py index 92b3684afe807774a9b8ed315bf1e39095b5e225..0e41068900686474db83d82ddfc7b9e54c60f36d 100644 --- a/swh/model/tests/test_hashutil.py +++ b/swh/model/tests/test_hashutil.py @@ -8,8 +8,6 @@ import io import os import tempfile import unittest - -from nose.tools import istest from unittest.mock import patch from swh.model import hashutil @@ -55,14 +53,12 @@ class BaseHashutil(unittest.TestCase): class MultiHashTest(BaseHashutil): - @istest - def multi_hash_data(self): + def test_multi_hash_data(self): checksums = MultiHash.from_data(self.data).digest() self.assertEqual(checksums, self.checksums) self.assertFalse('length' in checksums) - @istest - def multi_hash_data_with_length(self): + def test_multi_hash_data_with_length(self): expected_checksums = self.checksums.copy() expected_checksums['length'] = len(self.data) @@ -72,64 +68,55 @@ class MultiHashTest(BaseHashutil): self.assertEqual(checksums, expected_checksums) self.assertTrue('length' in checksums) - @istest - def multi_hash_data_unknown_hash(self): + def test_multi_hash_data_unknown_hash(self): with self.assertRaises(ValueError) as cm: MultiHash.from_data(self.data, ['unknown-hash']) self.assertIn('Unexpected hashing algorithm', cm.exception.args[0]) self.assertIn('unknown-hash', cm.exception.args[0]) - @istest - def multi_hash_file(self): + def test_multi_hash_file(self): fobj = io.BytesIO(self.data) checksums = MultiHash.from_file(fobj, length=len(self.data)).digest() self.assertEqual(checksums, self.checksums) - @istest - def multi_hash_file_hexdigest(self): + def test_multi_hash_file_hexdigest(self): fobj = io.BytesIO(self.data) length = len(self.data) checksums = MultiHash.from_file(fobj, length=length).hexdigest() self.assertEqual(checksums, self.hex_checksums) - @istest - def multi_hash_file_bytehexdigest(self): + def test_multi_hash_file_bytehexdigest(self): fobj = io.BytesIO(self.data) length = len(self.data) checksums = MultiHash.from_file(fobj, length=length).bytehexdigest() self.assertEqual(checksums, self.bytehex_checksums) - @istest - def multi_hash_file_missing_length(self): + def test_multi_hash_file_missing_length(self): fobj = io.BytesIO(self.data) with self.assertRaises(ValueError) as cm: MultiHash.from_file(fobj, hash_names=['sha1_git']) self.assertIn('Missing length', cm.exception.args[0]) - @istest - def multi_hash_path(self): + def test_multi_hash_path(self): with tempfile.NamedTemporaryFile(delete=False) as f: f.write(self.data) hashes = MultiHash.from_path(f.name).digest() os.remove(f.name) - self.checksums['length'] = len(self.data) - self.assertEquals(self.checksums, hashes) + self.assertEqual(self.checksums, hashes) class Hashutil(BaseHashutil): - @istest - def hash_data(self): + def test_hash_data(self): checksums = hashutil.hash_data(self.data) self.assertEqual(checksums, self.checksums) self.assertFalse('length' in checksums) - @istest - def hash_data_with_length(self): + def test_hash_data_with_length(self): expected_checksums = self.checksums.copy() expected_checksums['length'] = len(self.data) @@ -139,16 +126,14 @@ class Hashutil(BaseHashutil): self.assertEqual(checksums, expected_checksums) self.assertTrue('length' in checksums) - @istest - def hash_data_unknown_hash(self): + def test_hash_data_unknown_hash(self): with self.assertRaises(ValueError) as cm: hashutil.hash_data(self.data, ['unknown-hash']) self.assertIn('Unexpected hashing algorithm', cm.exception.args[0]) self.assertIn('unknown-hash', cm.exception.args[0]) - @istest - def hash_git_data(self): + def test_hash_git_data(self): checksums = { git_type: hashutil.hash_git_data(self.data, git_type) for git_type in self.git_checksums @@ -156,23 +141,20 @@ class Hashutil(BaseHashutil): self.assertEqual(checksums, self.git_checksums) - @istest - def hash_git_data_unknown_git_type(self): + def test_hash_git_data_unknown_git_type(self): with self.assertRaises(ValueError) as cm: hashutil.hash_git_data(self.data, 'unknown-git-type') self.assertIn('Unexpected git object type', cm.exception.args[0]) self.assertIn('unknown-git-type', cm.exception.args[0]) - @istest - def hash_file(self): + def test_hash_file(self): fobj = io.BytesIO(self.data) checksums = hashutil.hash_file(fobj, length=len(self.data)) self.assertEqual(checksums, self.checksums) - @istest - def hash_file_missing_length(self): + def test_hash_file_missing_length(self): fobj = io.BytesIO(self.data) with self.assertRaises(ValueError) as cm: @@ -180,8 +162,7 @@ class Hashutil(BaseHashutil): self.assertIn('Missing length', cm.exception.args[0]) - @istest - def hash_path(self): + def test_hash_path(self): with tempfile.NamedTemporaryFile(delete=False) as f: f.write(self.data) @@ -189,50 +170,44 @@ class Hashutil(BaseHashutil): os.remove(f.name) self.checksums['length'] = len(self.data) - self.assertEquals(self.checksums, hashes) + self.assertEqual(self.checksums, hashes) - @istest - def hash_to_hex(self): + def test_hash_to_hex(self): for type in self.checksums: hex = self.hex_checksums[type] hash = self.checksums[type] - self.assertEquals(hashutil.hash_to_hex(hex), hex) - self.assertEquals(hashutil.hash_to_hex(hash), hex) + self.assertEqual(hashutil.hash_to_hex(hex), hex) + self.assertEqual(hashutil.hash_to_hex(hash), hex) - @istest - def hash_to_bytes(self): + def test_hash_to_bytes(self): for type in self.checksums: hex = self.hex_checksums[type] hash = self.checksums[type] - self.assertEquals(hashutil.hash_to_bytes(hex), hash) - self.assertEquals(hashutil.hash_to_bytes(hash), hash) + self.assertEqual(hashutil.hash_to_bytes(hex), hash) + self.assertEqual(hashutil.hash_to_bytes(hash), hash) - @istest - def hash_to_bytehex(self): + def test_hash_to_bytehex(self): for algo in self.checksums: self.assertEqual(self.hex_checksums[algo].encode('ascii'), hashutil.hash_to_bytehex(self.checksums[algo])) - @istest - def bytehex_to_hash(self): + def test_bytehex_to_hash(self): for algo in self.checksums: self.assertEqual(self.checksums[algo], hashutil.bytehex_to_hash( self.hex_checksums[algo].encode())) - @istest - def new_hash_unsupported_hashing_algorithm(self): + def test_new_hash_unsupported_hashing_algorithm(self): try: hashutil._new_hash('blake2:10') except ValueError as e: - self.assertEquals(str(e), - 'Unexpected hashing algorithm blake2:10, ' - 'expected one of blake2b512, blake2s256, ' - 'sha1, sha1_git, sha256') + self.assertEqual(str(e), + 'Unexpected hashing algorithm blake2:10, ' + 'expected one of blake2b512, blake2s256, ' + 'sha1, sha1_git, sha256') @patch('hashlib.new') - @istest - def new_hash_blake2b_blake2b512_builtin(self, mock_hashlib_new): + def test_new_hash_blake2b_blake2b512_builtin(self, mock_hashlib_new): if 'blake2b512' not in hashlib.algorithms_available: self.skipTest('blake2b512 not built-in') mock_hashlib_new.return_value = sentinel = object() @@ -243,8 +218,7 @@ class Hashutil(BaseHashutil): mock_hashlib_new.assert_called_with('blake2b512') @patch('hashlib.new') - @istest - def new_hash_blake2s_blake2s256_builtin(self, mock_hashlib_new): + def test_new_hash_blake2s_blake2s256_builtin(self, mock_hashlib_new): if 'blake2s256' not in hashlib.algorithms_available: self.skipTest('blake2s256 not built-in') mock_hashlib_new.return_value = sentinel = object() @@ -254,8 +228,7 @@ class Hashutil(BaseHashutil): self.assertIs(h, sentinel) mock_hashlib_new.assert_called_with('blake2s256') - @istest - def new_hash_blake2b_builtin(self): + def test_new_hash_blake2b_builtin(self): removed_hash = False try: @@ -276,8 +249,7 @@ class Hashutil(BaseHashutil): if removed_hash: hashlib.algorithms_available.add('blake2b512') - @istest - def new_hash_blake2s_builtin(self): + def test_new_hash_blake2s_builtin(self): removed_hash = False try: @@ -298,8 +270,7 @@ class Hashutil(BaseHashutil): if removed_hash: hashlib.algorithms_available.add('blake2s256') - @istest - def new_hash_blake2b_pyblake2(self): + def test_new_hash_blake2b_pyblake2(self): if 'blake2b512' in hashlib.algorithms_available: self.skipTest('blake2b512 built in') if 'blake2b' in hashlib.algorithms_available: @@ -313,8 +284,7 @@ class Hashutil(BaseHashutil): self.assertIs(h, sentinel) mock_blake2b.assert_called_with(digest_size=512//8) - @istest - def new_hash_blake2s_pyblake2(self): + def test_new_hash_blake2s_pyblake2(self): if 'blake2s256' in hashlib.algorithms_available: self.skipTest('blake2s256 built in') if 'blake2s' in hashlib.algorithms_available: @@ -369,15 +339,13 @@ blah 'e9e959f120'), } - @istest - def unknown_header_type(self): + def test_unknown_header_type(self): with self.assertRaises(ValueError) as cm: hashutil.hash_git_data(b'any-data', 'some-unknown-type') self.assertIn('Unexpected git object type', cm.exception.args[0]) - @istest - def hashdata_content(self): + def test_hashdata_content(self): # when actual_hash = hashutil.hash_git_data(self.blob_data, git_type='blob') @@ -385,8 +353,7 @@ blah self.assertEqual(actual_hash, self.checksums['blob_sha1_git']) - @istest - def hashdata_tree(self): + def test_hashdata_tree(self): # when actual_hash = hashutil.hash_git_data(self.tree_data, git_type='tree') @@ -394,8 +361,7 @@ blah self.assertEqual(actual_hash, self.checksums['tree_sha1_git']) - @istest - def hashdata_revision(self): + def test_hashdata_revision(self): # when actual_hash = hashutil.hash_git_data(self.commit_data, git_type='commit') @@ -404,8 +370,7 @@ blah self.assertEqual(actual_hash, self.checksums['commit_sha1_git']) - @istest - def hashdata_tag(self): + def test_hashdata_tag(self): # when actual_hash = hashutil.hash_git_data(self.tag_data, git_type='tag') diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py index 66586085535fcaaf3788b1ce5ffce99760eaf46d..de96865e3bb816a396ca50bd7edf9d3065f3d031 100644 --- a/swh/model/tests/test_identifiers.py +++ b/swh/model/tests/test_identifiers.py @@ -7,14 +7,11 @@ import binascii import datetime import unittest -from nose.tools import istest - from swh.model import hashutil, identifiers - from swh.model.exceptions import ValidationError -from swh.model.identifiers import SNAPSHOT, RELEASE, REVISION, DIRECTORY -from swh.model.identifiers import CONTENT, PERSISTENT_IDENTIFIER_TYPES -from swh.model.identifiers import PersistentId +from swh.model.identifiers import (CONTENT, DIRECTORY, + PERSISTENT_IDENTIFIER_TYPES, RELEASE, + REVISION, SNAPSHOT, PersistentId) class UtilityFunctionsIdentifier(unittest.TestCase): @@ -23,8 +20,7 @@ class UtilityFunctionsIdentifier(unittest.TestCase): self.bytes_id = binascii.unhexlify(self.str_id) self.bad_type_id = object() - @istest - def identifier_to_bytes(self): + def test_identifier_to_bytes(self): for id in [self.str_id, self.bytes_id]: self.assertEqual(identifiers.identifier_to_bytes(id), self.bytes_id) @@ -40,8 +36,7 @@ class UtilityFunctionsIdentifier(unittest.TestCase): self.assertIn('type', str(cm.exception)) - @istest - def identifier_to_str(self): + def test_identifier_to_str(self): for id in [self.str_id, self.bytes_id]: self.assertEqual(identifiers.identifier_to_str(id), self.str_id) @@ -84,19 +79,16 @@ class UtilityFunctionsDateOffset(unittest.TestCase): 800: b'+1320', } - @istest - def format_date(self): + def test_format_date(self): for date_repr, date in self.dates.items(): self.assertEqual(identifiers.format_date(date), date_repr) - @istest - def format_date_fail(self): + def test_format_date_fail(self): for date in self.broken_dates: with self.assertRaises(ValueError): identifiers.format_date(date) - @istest - def format_offset(self): + def test_format_offset(self): for offset, res in self.offsets.items(): self.assertEqual(identifiers.format_offset(offset), res) @@ -113,8 +105,7 @@ class ContentIdentifier(unittest.TestCase): self.content_id = hashutil.hash_data(self.content['data']) - @istest - def content_identifier(self): + def test_content_identifier(self): self.assertEqual(identifiers.content_identifier(self.content), self.content_id) @@ -216,14 +207,12 @@ class DirectoryIdentifier(unittest.TestCase): 'entries': [], } - @istest - def dir_identifier(self): + def test_dir_identifier(self): self.assertEqual( identifiers.directory_identifier(self.directory), self.directory['id']) - @istest - def dir_identifier_empty_directory(self): + def test_dir_identifier_empty_directory(self): self.assertEqual( identifiers.directory_identifier(self.empty_directory), self.empty_directory['id']) @@ -460,29 +449,25 @@ dg1KdHOa34shrKDaOVzW } } - @istest - def revision_identifier(self): + def test_revision_identifier(self): self.assertEqual( identifiers.revision_identifier(self.revision), identifiers.identifier_to_str(self.revision['id']), ) - @istest - def revision_identifier_none_metadata(self): + def test_revision_identifier_none_metadata(self): self.assertEqual( identifiers.revision_identifier(self.revision_none_metadata), identifiers.identifier_to_str(self.revision_none_metadata['id']), ) - @istest - def revision_identifier_synthetic(self): + def test_revision_identifier_synthetic(self): self.assertEqual( identifiers.revision_identifier(self.synthetic_revision), identifiers.identifier_to_str(self.synthetic_revision['id']), ) - @istest - def revision_identifier_with_extra_headers(self): + def test_revision_identifier_with_extra_headers(self): self.assertEqual( identifiers.revision_identifier( self.revision_with_extra_headers), @@ -490,8 +475,7 @@ dg1KdHOa34shrKDaOVzW self.revision_with_extra_headers['id']), ) - @istest - def revision_identifier_with_gpgsig(self): + def test_revision_identifier_with_gpgsig(self): self.assertEqual( identifiers.revision_identifier( self.revision_with_gpgsig), @@ -499,8 +483,7 @@ dg1KdHOa34shrKDaOVzW self.revision_with_gpgsig['id']), ) - @istest - def revision_identifier_no_message(self): + def test_revision_identifier_no_message(self): self.assertEqual( identifiers.revision_identifier( self.revision_no_message), @@ -508,8 +491,7 @@ dg1KdHOa34shrKDaOVzW self.revision_no_message['id']), ) - @istest - def revision_identifier_empty_message(self): + def test_revision_identifier_empty_message(self): self.assertEqual( identifiers.revision_identifier( self.revision_empty_message), @@ -517,8 +499,7 @@ dg1KdHOa34shrKDaOVzW self.revision_empty_message['id']), ) - @istest - def revision_identifier_only_fullname(self): + def test_revision_identifier_only_fullname(self): self.assertEqual( identifiers.revision_identifier( self.revision_only_fullname), @@ -643,43 +624,37 @@ o6X/3T+vm8K3bf3driRr34c= 'target_type': 'revision', } - @istest - def release_identifier(self): + def test_release_identifier(self): self.assertEqual( identifiers.release_identifier(self.release), identifiers.identifier_to_str(self.release['id']) ) - @istest - def release_identifier_no_author(self): + def test_release_identifier_no_author(self): self.assertEqual( identifiers.release_identifier(self.release_no_author), identifiers.identifier_to_str(self.release_no_author['id']) ) - @istest - def release_identifier_no_message(self): + def test_release_identifier_no_message(self): self.assertEqual( identifiers.release_identifier(self.release_no_message), identifiers.identifier_to_str(self.release_no_message['id']) ) - @istest - def release_identifier_empty_message(self): + def test_release_identifier_empty_message(self): self.assertEqual( identifiers.release_identifier(self.release_empty_message), identifiers.identifier_to_str(self.release_empty_message['id']) ) - @istest - def release_identifier_negative_utc(self): + def test_release_identifier_negative_utc(self): self.assertEqual( identifiers.release_identifier(self.release_negative_utc), identifiers.identifier_to_str(self.release_negative_utc['id']) ) - @istest - def release_identifier_newline_in_author(self): + def test_release_identifier_newline_in_author(self): self.assertEqual( identifiers.release_identifier(self.release_newline_in_author), identifiers.identifier_to_str(self.release_newline_in_author['id']) @@ -829,7 +804,7 @@ class SnapshotIdentifier(unittest.TestCase): actual_value = identifiers.persistent_identifier( full_type, _hash, metadata=_meta) - self.assertEquals(actual_value, expected_persistent_id) + self.assertEqual(actual_value, expected_persistent_id) def test_persistent_identifier_wrong_input(self): _snapshot_id = 'notahash4bc0bf3d81436bf980b46e98bd338453' @@ -864,7 +839,7 @@ class SnapshotIdentifier(unittest.TestCase): metadata={} ) actual_result = identifiers.parse_persistent_identifier(pid) - self.assertEquals(actual_result, expected_result) + self.assertEqual(actual_result, expected_result) for pid, _type, _version, _hash, _metadata in [ ('swh:1:cnt:9c95815d9e9d91b8dae8e05d8bbc696fe19f796b;lines=1-18;origin=https://github.com/python/cpython', # noqa @@ -887,7 +862,7 @@ class SnapshotIdentifier(unittest.TestCase): metadata=_metadata ) actual_result = identifiers.parse_persistent_identifier(pid) - self.assertEquals(actual_result, expected_result) + self.assertEqual(actual_result, expected_result) def test_parse_persistent_identifier_parsing_error(self): for pid, _error in [ diff --git a/swh/model/tests/test_merkle.py b/swh/model/tests/test_merkle.py index 9f438928decb2018645e71b66a898980173e024f..8b1180a4094c19005b19ea52d8879fac9ac405fb 100644 --- a/swh/model/tests/test_merkle.py +++ b/swh/model/tests/test_merkle.py @@ -8,7 +8,7 @@ import unittest from swh.model import merkle -class TestedMerkleNode(merkle.MerkleNode): +class MerkleTestNode(merkle.MerkleNode): type = 'tested_merkle_node_type' def __init__(self, data): @@ -29,7 +29,7 @@ class TestedMerkleNode(merkle.MerkleNode): ) -class TestedMerkleLeaf(merkle.MerkleLeaf): +class MerkleTestLeaf(merkle.MerkleLeaf): type = 'tested_merkle_leaf_type' def __init__(self, data): @@ -44,7 +44,7 @@ class TestedMerkleLeaf(merkle.MerkleLeaf): class TestMerkleLeaf(unittest.TestCase): def setUp(self): self.data = {'value': b'value'} - self.instance = TestedMerkleLeaf(self.data) + self.instance = MerkleTestLeaf(self.data) def test_hash(self): self.assertEqual(self.instance.compute_hash_called, 0) @@ -90,25 +90,25 @@ class TestMerkleNode(unittest.TestCase): maxDiff = None def setUp(self): - self.root = TestedMerkleNode({'value': b'root'}) + self.root = MerkleTestNode({'value': b'root'}) self.nodes = {b'root': self.root} for i in (b'a', b'b', b'c'): value = b'root/' + i - node = TestedMerkleNode({ + node = MerkleTestNode({ 'value': value, }) self.root[i] = node self.nodes[value] = node for j in (b'a', b'b', b'c'): value2 = value + b'/' + j - node2 = TestedMerkleNode({ + node2 = MerkleTestNode({ 'value': value2, }) node[j] = node2 self.nodes[value2] = node2 for k in (b'a', b'b', b'c'): value3 = value2 + b'/' + j - node3 = TestedMerkleNode({ + node3 = MerkleTestNode({ 'value': value3, }) node2[j] = node3 @@ -188,8 +188,8 @@ class TestMerkleNode(unittest.TestCase): hash_root = self.root.hash hash_b = self.root[b'b'].hash new_children = { - b'c': TestedMerkleNode({'value': b'root/b/new_c'}), - b'd': TestedMerkleNode({'value': b'root/b/d'}), + b'c': MerkleTestNode({'value': b'root/b/new_c'}), + b'd': MerkleTestNode({'value': b'root/b/d'}), } # collect all nodes @@ -219,7 +219,7 @@ class TestMerkleNode(unittest.TestCase): # Ensure we collected root, root/b, and both new children collected_after_update = self.root.collect() self.assertCountEqual( - collected_after_update[TestedMerkleNode.type], + collected_after_update[MerkleTestNode.type], [self.nodes[b'root'].hash, self.nodes[b'root/b'].hash, new_children[b'c'].hash, new_children[b'd'].hash], ) diff --git a/swh/model/tests/test_toposort.py b/swh/model/tests/test_toposort.py index 66a8ee1c8ae9543b6dffdccf3a7fd389f8150630..174368f579d1fad763bb5ddb4b3720c6e01d579a 100644 --- a/swh/model/tests/test_toposort.py +++ b/swh/model/tests/test_toposort.py @@ -4,6 +4,7 @@ # See top-level LICENSE file for more information import unittest + from swh.model.toposort import toposort diff --git a/swh/model/tests/test_validators.py b/swh/model/tests/test_validators.py index 60a1de42952167c54d94979eb18118a0ad32ee8a..8c8512e9cc6978e577d9e774be578244eacf21ac 100644 --- a/swh/model/tests/test_validators.py +++ b/swh/model/tests/test_validators.py @@ -6,9 +6,7 @@ import datetime import unittest -from nose.tools import istest - -from swh.model import validators, hashutil, exceptions +from swh.model import exceptions, hashutil, validators class TestValidators(unittest.TestCase): @@ -38,16 +36,14 @@ class TestValidators(unittest.TestCase): self.invalid_content_hash_mismatch.update( hashutil.hash_data(b"this is not the data you're looking for")) - @istest - def validate_content(self): + def test_validate_content(self): self.assertTrue( validators.validate_content(self.valid_visible_content)) self.assertTrue( validators.validate_content(self.valid_absent_content)) - @istest - def validate_content_hash_mismatch(self): + def test_validate_content_hash_mismatch(self): with self.assertRaises(exceptions.ValidationError) as cm: validators.validate_content(self.invalid_content_hash_mismatch) @@ -62,8 +58,8 @@ class TestValidators(unittest.TestCase): exc = cm.exception self.assertIsInstance(str(exc), str) - self.assertEquals(set(exc.error_dict.keys()), - {exceptions.NON_FIELD_ERRORS}) + self.assertEqual(set(exc.error_dict.keys()), + {exceptions.NON_FIELD_ERRORS}) hash_mismatches = exc.error_dict[exceptions.NON_FIELD_ERRORS] self.assertIsInstance(hash_mismatches, list) diff --git a/swh/model/validators.py b/swh/model/validators.py index ea64b40f800c2c384b67019306549adc262c216f..6d2c37035b04a0185750aae7c0b1c0f02cad5584 100644 --- a/swh/model/validators.py +++ b/swh/model/validators.py @@ -1,10 +1,11 @@ -# Copyright (C) 2015 The Software Heritage developers +# Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from .exceptions import ValidationError, NON_FIELD_ERRORS -from . import fields, hashutil +from . import fields +from .hashutil import MultiHash, hash_to_bytes def validate_content(content): @@ -44,11 +45,11 @@ def validate_content(content): def validate_hashes(content): errors = [] if 'data' in content: - hashes = hashutil.hash_data(content['data']) + hashes = MultiHash.from_data(content['data']).digest() for hash_type, computed_hash in hashes.items(): if hash_type not in content: continue - content_hash = hashutil.hash_to_bytes(content[hash_type]) + content_hash = hash_to_bytes(content[hash_type]) if content_hash != computed_hash: errors.append(ValidationError( 'hash mismatch in content for hash %(hash)s', diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000000000000000000000000000000000000..0fb07c66bbbafd910825e5048dde06f445d8fd4e --- /dev/null +++ b/tox.ini @@ -0,0 +1,16 @@ +[tox] +envlist=flake8,py3 + +[testenv:py3] +deps = + .[testing] + pytest-cov +commands = + pytest --cov=swh --cov-branch {posargs} + +[testenv:flake8] +skip_install = true +deps = + flake8 +commands = + {envpython} -m flake8 diff --git a/version.txt b/version.txt index 04ecf50935976fe64ce5bc3a8fd124094e1dd418..b5af97970e87e33ff6a989b707c5e85141bd4e58 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.27-0-geb338cd \ No newline at end of file +v0.0.28-0-g4e6bce9 \ No newline at end of file