diff --git a/PKG-INFO b/PKG-INFO index b07e4b866fcb26f1f4f87d4f7060e879115afe6b..3a4aa502b5df44a95859cac637ecbcecec8ce45b 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: swh.model -Version: 0.0.28 +Version: 0.0.29 Summary: Software Heritage data model Home-page: https://forge.softwareheritage.org/diffusion/DMOD/ Author: Software Heritage developers diff --git a/debian/changelog b/debian/changelog deleted file mode 100644 index 2ef6b9eb808090d5b6c0dd602046aae4dc711910..0000000000000000000000000000000000000000 --- a/debian/changelog +++ /dev/null @@ -1,5 +0,0 @@ -swh-model (0.0.1-1) unstable; urgency=low - - * Create swh-model package - - -- Nicolas Dandrimont <olasd@debian.org> Mon, 07 Dec 2015 15:41:28 +0100 diff --git a/debian/compat b/debian/compat deleted file mode 100644 index ec635144f60048986bc560c5576355344005e6e7..0000000000000000000000000000000000000000 --- a/debian/compat +++ /dev/null @@ -1 +0,0 @@ -9 diff --git a/debian/control b/debian/control deleted file mode 100644 index 8e6257b6586ae910f465e7c46105894574ff94ae..0000000000000000000000000000000000000000 --- a/debian/control +++ /dev/null @@ -1,22 +0,0 @@ -Source: swh-model -Maintainer: Software Heritage developers <swh-devel@inria.fr> -Section: python -Priority: optional -Build-Depends: debhelper (>= 9), - dh-python (>= 2), - python3 (>= 3.5) | python3-pyblake2, - python3-all, - python3-click, - python3-pytest, - python3-setuptools, - python3-vcversioner -Standards-Version: 3.9.6 -Homepage: https://forge.softwareheritage.org/diffusion/DMOD/ - -Package: python3-swh.model -Architecture: all -Depends: ${misc:Depends}, ${python3:Depends} -Breaks: python3-swh.loader.core (<< 0.0.16~), - python3-swh.loader.dir (<< 0.0.28~), - python3-swh.loader.svn (<< 0.0.28~) -Description: Software Heritage data model diff --git a/debian/copyright b/debian/copyright deleted file mode 100644 index 81d037db6aa3081367f206e164b8cbf5b86178cc..0000000000000000000000000000000000000000 --- a/debian/copyright +++ /dev/null @@ -1,22 +0,0 @@ -Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ - -Files: * -Copyright: 2015 The Software Heritage developers -License: GPL-3+ - -License: GPL-3+ - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - . - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - . - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - . - On Debian systems, the complete text of the GNU General Public - License version 3 can be found in `/usr/share/common-licenses/GPL-3'. diff --git a/debian/rules b/debian/rules deleted file mode 100755 index ee36e370785d1a1c5d7f369a651c9d31c0f03b49..0000000000000000000000000000000000000000 --- a/debian/rules +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/make -f - -export PYBUILD_NAME=swh.model -export export PYBUILD_TEST_ARGS=-m 'not db and not fs' - -%: - dh $@ --with python3 --buildsystem=pybuild - -override_dh_install: - dh_install - rm -v $(CURDIR)/debian/python3-*/usr/lib/python*/dist-packages/swh/__init__.py diff --git a/debian/source/format b/debian/source/format deleted file mode 100644 index 163aaf8d82b6c54f23c45f32895dbdfdcc27b047..0000000000000000000000000000000000000000 --- a/debian/source/format +++ /dev/null @@ -1 +0,0 @@ -3.0 (quilt) diff --git a/docs/persistent-identifiers.rst b/docs/persistent-identifiers.rst index 89b0365d264e70700cbb8298aee0b8b9942b28cb..a588e9debf6363c4278f99fad71e48cd198a2562 100644 --- a/docs/persistent-identifiers.rst +++ b/docs/persistent-identifiers.rst @@ -225,3 +225,15 @@ Examples: Note that resolution via Identifiers.org does not support contextual information, due to `syntactic incompatibilities <http://identifiers.org/documentation#custom_requests>`_. + + +References +========== + +* Roberto Di Cosmo, Morane Gruenpeter, Stefano Zacchiroli. `Identifiers for + Digital Objects: the Case of Software Source Code Preservation + <https://hal.archives-ouvertes.fr/hal-01865790v4>`_. In Proceedings of `iPRES + 2018 <https://ipres2018.org/>`_: 15th International Conference on Digital + Preservation, Boston, MA, USA, September 2018, 9 pages. + + diff --git a/swh.model.egg-info/PKG-INFO b/swh.model.egg-info/PKG-INFO index b07e4b866fcb26f1f4f87d4f7060e879115afe6b..3a4aa502b5df44a95859cac637ecbcecec8ce45b 100644 --- a/swh.model.egg-info/PKG-INFO +++ b/swh.model.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: swh.model -Version: 0.0.28 +Version: 0.0.29 Summary: Software Heritage data model Home-page: https://forge.softwareheritage.org/diffusion/DMOD/ Author: Software Heritage developers diff --git a/swh.model.egg-info/SOURCES.txt b/swh.model.egg-info/SOURCES.txt index 81e3d7dfcf82c118bb475c93c2fbef1a95c992d4..8fa4241d3cf11fc0f760e280dee51bf158b4d001 100644 --- a/swh.model.egg-info/SOURCES.txt +++ b/swh.model.egg-info/SOURCES.txt @@ -15,12 +15,6 @@ version.txt bin/git-revhash bin/swh-hashtree bin/swh-revhash -debian/changelog -debian/compat -debian/control -debian/copyright -debian/rules -debian/source/format docs/.gitignore docs/Makefile docs/Makefile.local diff --git a/swh/model/hashutil.py b/swh/model/hashutil.py index e58f6870fc121ddd674253a918c8849de822589f..de85857e1294bb82a1e851966878d6887a4b9c7b 100644 --- a/swh/model/hashutil.py +++ b/swh/model/hashutil.py @@ -48,21 +48,6 @@ Basic usage examples: f.write(chunk) hashes = h.hexdigest() # returns a dict of {hash_algo_name: hash_in_hex} - Note: Prior to this, we would have to use chunk_cb (cf. hash_file, - hash_path) - - -This module also defines the following (deprecated) hashing functions: - -- hash_file: Hash the contents of the given file object with the given - algorithms (defaulting to DEFAULT_ALGORITHMS if none provided). - -- hash_data: Hash the given binary blob with the given algorithms - (defaulting to DEFAULT_ALGORITHMS if none provided). - -- hash_path: Hash the contents of the file at the given path with the - given algorithms (defaulting to DEFAULT_ALGORITHMS if none - provided). """ @@ -290,84 +275,6 @@ def _new_hash(algo, length=None): return _new_hashlib_hash(algo) -def hash_file(fobj, length=None, algorithms=DEFAULT_ALGORITHMS, - chunk_cb=None): - """(Deprecated) cf. MultiHash.from_file - - Hash the contents of the given file object with the given algorithms. - - Args: - fobj: a file-like object - length (int): the length of the contents of the file-like - object (for the git-specific algorithms) - algorithms (set): the hashing algorithms to be used, as an - iterable over strings - chunk_cb (fun): a callback function taking a chunk of data as - parameter - - Returns: - a dict mapping each algorithm to a digest (bytes by default). - - Raises: - ValueError if algorithms contains an unknown hash algorithm. - - """ - h = MultiHash(algorithms, length) - while True: - chunk = fobj.read(HASH_BLOCK_SIZE) - if not chunk: - break - h.update(chunk) - if chunk_cb: - chunk_cb(chunk) - - return h.digest() - - -def hash_path(path, algorithms=DEFAULT_ALGORITHMS, chunk_cb=None): - """(deprecated) cf. MultiHash.from_path - - Hash the contents of the file at the given path with the given - algorithms. - - Args: - path (str): the path of the file to hash - algorithms (set): the hashing algorithms used - chunk_cb (fun): a callback function taking a chunk of data as parameter - - Returns: a dict mapping each algorithm to a bytes digest. - - Raises: - ValueError if algorithms contains an unknown hash algorithm. - OSError on file access error - - """ - length = os.path.getsize(path) - with open(path, 'rb') as fobj: - hashes = hash_file(fobj, length, algorithms, chunk_cb=chunk_cb) - hashes['length'] = length - return hashes - - -def hash_data(data, algorithms=DEFAULT_ALGORITHMS): - """(deprecated) cf. MultiHash.from_data - - Hash the given binary blob with the given algorithms. - - Args: - data (bytes): raw content to hash - algorithms (set): the hashing algorithms used - - Returns: a dict mapping each algorithm to a bytes digest - - Raises: - TypeError if data does not support the buffer interface. - ValueError if algorithms contains an unknown hash algorithm. - - """ - return MultiHash.from_data(data, hash_names=algorithms).digest() - - def hash_git_data(data, git_type, base_algo='sha1'): """Hash the given data as a git object of type git_type. diff --git a/swh/model/tests/test_hashutil.py b/swh/model/tests/test_hashutil.py index 0e41068900686474db83d82ddfc7b9e54c60f36d..abdff979f5d01b80615f79da6cf986001dc11cb0 100644 --- a/swh/model/tests/test_hashutil.py +++ b/swh/model/tests/test_hashutil.py @@ -111,27 +111,6 @@ class MultiHashTest(BaseHashutil): class Hashutil(BaseHashutil): - def test_hash_data(self): - checksums = hashutil.hash_data(self.data) - self.assertEqual(checksums, self.checksums) - self.assertFalse('length' in checksums) - - def test_hash_data_with_length(self): - expected_checksums = self.checksums.copy() - expected_checksums['length'] = len(self.data) - - algos = set(['length']).union(hashutil.DEFAULT_ALGORITHMS) - checksums = hashutil.hash_data(self.data, algorithms=algos) - - self.assertEqual(checksums, expected_checksums) - self.assertTrue('length' in checksums) - - def test_hash_data_unknown_hash(self): - with self.assertRaises(ValueError) as cm: - hashutil.hash_data(self.data, ['unknown-hash']) - - self.assertIn('Unexpected hashing algorithm', cm.exception.args[0]) - self.assertIn('unknown-hash', cm.exception.args[0]) def test_hash_git_data(self): checksums = { @@ -148,30 +127,6 @@ class Hashutil(BaseHashutil): self.assertIn('Unexpected git object type', cm.exception.args[0]) self.assertIn('unknown-git-type', cm.exception.args[0]) - def test_hash_file(self): - fobj = io.BytesIO(self.data) - - checksums = hashutil.hash_file(fobj, length=len(self.data)) - self.assertEqual(checksums, self.checksums) - - def test_hash_file_missing_length(self): - fobj = io.BytesIO(self.data) - - with self.assertRaises(ValueError) as cm: - hashutil.hash_file(fobj, algorithms=['sha1_git']) - - self.assertIn('Missing length', cm.exception.args[0]) - - def test_hash_path(self): - with tempfile.NamedTemporaryFile(delete=False) as f: - f.write(self.data) - - hashes = hashutil.hash_path(f.name) - os.remove(f.name) - - self.checksums['length'] = len(self.data) - self.assertEqual(self.checksums, hashes) - def test_hash_to_hex(self): for type in self.checksums: hex = self.hex_checksums[type] diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py index de96865e3bb816a396ca50bd7edf9d3065f3d031..1492b87ed0c23b505df2988c7913caec32912f88 100644 --- a/swh/model/tests/test_identifiers.py +++ b/swh/model/tests/test_identifiers.py @@ -103,7 +103,8 @@ class ContentIdentifier(unittest.TestCase): tzinfo=datetime.timezone.utc), } - self.content_id = hashutil.hash_data(self.content['data']) + self.content_id = hashutil.MultiHash.from_data( + self.content['data']).digest() def test_content_identifier(self): self.assertEqual(identifiers.content_identifier(self.content), diff --git a/swh/model/tests/test_validators.py b/swh/model/tests/test_validators.py index 8c8512e9cc6978e577d9e774be578244eacf21ac..691c579e4704e96733ecebba027411b1453f7eda 100644 --- a/swh/model/tests/test_validators.py +++ b/swh/model/tests/test_validators.py @@ -9,6 +9,10 @@ import unittest from swh.model import exceptions, hashutil, validators +def hash_data(raw_content): + return hashutil.MultiHash.from_data(raw_content).digest() + + class TestValidators(unittest.TestCase): def setUp(self): self.valid_visible_content = { @@ -20,7 +24,7 @@ class TestValidators(unittest.TestCase): } self.valid_visible_content.update( - hashutil.hash_data(self.valid_visible_content['data'])) + hash_data(self.valid_visible_content['data'])) self.valid_absent_content = { 'status': 'absent', @@ -34,7 +38,7 @@ class TestValidators(unittest.TestCase): self.invalid_content_hash_mismatch = self.valid_visible_content.copy() self.invalid_content_hash_mismatch.update( - hashutil.hash_data(b"this is not the data you're looking for")) + hash_data(b"this is not the data you're looking for")) def test_validate_content(self): self.assertTrue( diff --git a/version.txt b/version.txt index b5af97970e87e33ff6a989b707c5e85141bd4e58..8eba90c0f40bfe79683d690c0c710ce0a28a889d 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.28-0-g4e6bce9 \ No newline at end of file +v0.0.29-0-gfa140b2 \ No newline at end of file