Skip to content
Snippets Groups Projects
Commit 461991d9 authored by Nicolas Dandrimont's avatar Nicolas Dandrimont
Browse files

Update upstream source from tag 'debian/upstream/0.0.29'

Update to upstream version '0.0.29'
with Debian dir ceb2d9a6541ffae00bdd235b7dcfdb7f924f9ceb
parents cc2c2e1b afab8bfa
No related branches found
No related tags found
No related merge requests found
Metadata-Version: 2.1
Name: swh.model
Version: 0.0.28
Version: 0.0.29
Summary: Software Heritage data model
Home-page: https://forge.softwareheritage.org/diffusion/DMOD/
Author: Software Heritage developers
......
......@@ -225,3 +225,15 @@ Examples:
Note that resolution via Identifiers.org does not support contextual
information, due to `syntactic incompatibilities
<http://identifiers.org/documentation#custom_requests>`_.
References
==========
* Roberto Di Cosmo, Morane Gruenpeter, Stefano Zacchiroli. `Identifiers for
Digital Objects: the Case of Software Source Code Preservation
<https://hal.archives-ouvertes.fr/hal-01865790v4>`_. In Proceedings of `iPRES
2018 <https://ipres2018.org/>`_: 15th International Conference on Digital
Preservation, Boston, MA, USA, September 2018, 9 pages.
Metadata-Version: 2.1
Name: swh.model
Version: 0.0.28
Version: 0.0.29
Summary: Software Heritage data model
Home-page: https://forge.softwareheritage.org/diffusion/DMOD/
Author: Software Heritage developers
......
......@@ -15,12 +15,6 @@ version.txt
bin/git-revhash
bin/swh-hashtree
bin/swh-revhash
debian/changelog
debian/compat
debian/control
debian/copyright
debian/rules
debian/source/format
docs/.gitignore
docs/Makefile
docs/Makefile.local
......
......@@ -48,21 +48,6 @@ Basic usage examples:
f.write(chunk)
hashes = h.hexdigest() # returns a dict of {hash_algo_name: hash_in_hex}
Note: Prior to this, we would have to use chunk_cb (cf. hash_file,
hash_path)
This module also defines the following (deprecated) hashing functions:
- hash_file: Hash the contents of the given file object with the given
algorithms (defaulting to DEFAULT_ALGORITHMS if none provided).
- hash_data: Hash the given binary blob with the given algorithms
(defaulting to DEFAULT_ALGORITHMS if none provided).
- hash_path: Hash the contents of the file at the given path with the
given algorithms (defaulting to DEFAULT_ALGORITHMS if none
provided).
"""
......@@ -290,84 +275,6 @@ def _new_hash(algo, length=None):
return _new_hashlib_hash(algo)
def hash_file(fobj, length=None, algorithms=DEFAULT_ALGORITHMS,
chunk_cb=None):
"""(Deprecated) cf. MultiHash.from_file
Hash the contents of the given file object with the given algorithms.
Args:
fobj: a file-like object
length (int): the length of the contents of the file-like
object (for the git-specific algorithms)
algorithms (set): the hashing algorithms to be used, as an
iterable over strings
chunk_cb (fun): a callback function taking a chunk of data as
parameter
Returns:
a dict mapping each algorithm to a digest (bytes by default).
Raises:
ValueError if algorithms contains an unknown hash algorithm.
"""
h = MultiHash(algorithms, length)
while True:
chunk = fobj.read(HASH_BLOCK_SIZE)
if not chunk:
break
h.update(chunk)
if chunk_cb:
chunk_cb(chunk)
return h.digest()
def hash_path(path, algorithms=DEFAULT_ALGORITHMS, chunk_cb=None):
"""(deprecated) cf. MultiHash.from_path
Hash the contents of the file at the given path with the given
algorithms.
Args:
path (str): the path of the file to hash
algorithms (set): the hashing algorithms used
chunk_cb (fun): a callback function taking a chunk of data as parameter
Returns: a dict mapping each algorithm to a bytes digest.
Raises:
ValueError if algorithms contains an unknown hash algorithm.
OSError on file access error
"""
length = os.path.getsize(path)
with open(path, 'rb') as fobj:
hashes = hash_file(fobj, length, algorithms, chunk_cb=chunk_cb)
hashes['length'] = length
return hashes
def hash_data(data, algorithms=DEFAULT_ALGORITHMS):
"""(deprecated) cf. MultiHash.from_data
Hash the given binary blob with the given algorithms.
Args:
data (bytes): raw content to hash
algorithms (set): the hashing algorithms used
Returns: a dict mapping each algorithm to a bytes digest
Raises:
TypeError if data does not support the buffer interface.
ValueError if algorithms contains an unknown hash algorithm.
"""
return MultiHash.from_data(data, hash_names=algorithms).digest()
def hash_git_data(data, git_type, base_algo='sha1'):
"""Hash the given data as a git object of type git_type.
......
......@@ -111,27 +111,6 @@ class MultiHashTest(BaseHashutil):
class Hashutil(BaseHashutil):
def test_hash_data(self):
checksums = hashutil.hash_data(self.data)
self.assertEqual(checksums, self.checksums)
self.assertFalse('length' in checksums)
def test_hash_data_with_length(self):
expected_checksums = self.checksums.copy()
expected_checksums['length'] = len(self.data)
algos = set(['length']).union(hashutil.DEFAULT_ALGORITHMS)
checksums = hashutil.hash_data(self.data, algorithms=algos)
self.assertEqual(checksums, expected_checksums)
self.assertTrue('length' in checksums)
def test_hash_data_unknown_hash(self):
with self.assertRaises(ValueError) as cm:
hashutil.hash_data(self.data, ['unknown-hash'])
self.assertIn('Unexpected hashing algorithm', cm.exception.args[0])
self.assertIn('unknown-hash', cm.exception.args[0])
def test_hash_git_data(self):
checksums = {
......@@ -148,30 +127,6 @@ class Hashutil(BaseHashutil):
self.assertIn('Unexpected git object type', cm.exception.args[0])
self.assertIn('unknown-git-type', cm.exception.args[0])
def test_hash_file(self):
fobj = io.BytesIO(self.data)
checksums = hashutil.hash_file(fobj, length=len(self.data))
self.assertEqual(checksums, self.checksums)
def test_hash_file_missing_length(self):
fobj = io.BytesIO(self.data)
with self.assertRaises(ValueError) as cm:
hashutil.hash_file(fobj, algorithms=['sha1_git'])
self.assertIn('Missing length', cm.exception.args[0])
def test_hash_path(self):
with tempfile.NamedTemporaryFile(delete=False) as f:
f.write(self.data)
hashes = hashutil.hash_path(f.name)
os.remove(f.name)
self.checksums['length'] = len(self.data)
self.assertEqual(self.checksums, hashes)
def test_hash_to_hex(self):
for type in self.checksums:
hex = self.hex_checksums[type]
......
......@@ -103,7 +103,8 @@ class ContentIdentifier(unittest.TestCase):
tzinfo=datetime.timezone.utc),
}
self.content_id = hashutil.hash_data(self.content['data'])
self.content_id = hashutil.MultiHash.from_data(
self.content['data']).digest()
def test_content_identifier(self):
self.assertEqual(identifiers.content_identifier(self.content),
......
......@@ -9,6 +9,10 @@ import unittest
from swh.model import exceptions, hashutil, validators
def hash_data(raw_content):
return hashutil.MultiHash.from_data(raw_content).digest()
class TestValidators(unittest.TestCase):
def setUp(self):
self.valid_visible_content = {
......@@ -20,7 +24,7 @@ class TestValidators(unittest.TestCase):
}
self.valid_visible_content.update(
hashutil.hash_data(self.valid_visible_content['data']))
hash_data(self.valid_visible_content['data']))
self.valid_absent_content = {
'status': 'absent',
......@@ -34,7 +38,7 @@ class TestValidators(unittest.TestCase):
self.invalid_content_hash_mismatch = self.valid_visible_content.copy()
self.invalid_content_hash_mismatch.update(
hashutil.hash_data(b"this is not the data you're looking for"))
hash_data(b"this is not the data you're looking for"))
def test_validate_content(self):
self.assertTrue(
......
v0.0.28-0-g4e6bce9
\ No newline at end of file
v0.0.29-0-gfa140b2
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment