Skip to content
Snippets Groups Projects
Verified Commit 836198c4 authored by Antoine R. Dumont's avatar Antoine R. Dumont
Browse files

swh.model.hashutil: Remove unnecessary endpoints

parent 3b9e8e91
No related branches found
No related tags found
1 merge request!12model.hashutil: Open new endpoint to allow to hash stream
......@@ -12,10 +12,6 @@ in a ValueError explaining the error.
This modules defines the following hashing functions:
- hash_stream: Hash the contents of something iterable (file, stream,
...) with the given algorithms (defaulting to DEFAULT_ALGORITHMS if
none provided).
- hash_file: Hash the contents of the given file object with the given
algorithms (defaulting to DEFAULT_ALGORITHMS if none provided).
......@@ -233,24 +229,16 @@ def _new_hash(algo, length=None):
return _new_hashlib_hash(algo)
def _read(fobj):
"""Wrapper function around reading a chunk from fobj.
"""
return fobj.read(HASH_BLOCK_SIZE)
def hash_stream(s, readfn=_read, length=None, algorithms=DEFAULT_ALGORITHMS,
chunk_cb=None, hash_format='bytes'):
"""Hash the contents of a stream
def hash_file(fobj, length=None, algorithms=DEFAULT_ALGORITHMS,
chunk_cb=None, hash_format='bytes'):
"""Hash the contents of the given file object with the given algorithms.
Args:
s: stream or object we can consume by successive call using `readfn`
readfn (fn): Function to read chunk data from s
length (int): the length of the contents of the object (for the
git-specific algorithms)
algorithms (set): the hashing algorithms to be used, as an
iterable over strings
fobj: a file-like object
length: the length of the contents of the file-like object (for the
git-specific algorithms)
algorithms: the hashing algorithms to be used, as an iterable over
strings
hash_format (str): Format required for the output of the
computed hashes (cf. HASH_FORMATS)
......@@ -269,7 +257,7 @@ def hash_stream(s, readfn=_read, length=None, algorithms=DEFAULT_ALGORITHMS,
h = MultiHash(algorithms, length)
while True:
chunk = readfn(s)
chunk = fobj.read(HASH_BLOCK_SIZE)
if not chunk:
break
h.update(chunk)
......@@ -283,32 +271,6 @@ def hash_stream(s, readfn=_read, length=None, algorithms=DEFAULT_ALGORITHMS,
return h.hexdigest()
def hash_file(fobj, length=None, algorithms=DEFAULT_ALGORITHMS,
chunk_cb=None, hash_format='bytes'):
"""Hash the contents of the given file object with the given algorithms.
Args:
fobj: a file-like object
length: the length of the contents of the file-like object (for the
git-specific algorithms)
algorithms: the hashing algorithms to be used, as an iterable over
strings
hash_format (str): Format required for the output of the
computed hashes (cf. HASH_FORMATS)
Returns: a dict mapping each algorithm to a digest (bytes by default).
Raises:
ValueError if:
algorithms contains an unknown hash algorithm.
hash_format is an unknown hash format
"""
return hash_stream(fobj, length=length, algorithms=algorithms,
chunk_cb=chunk_cb, hash_format=hash_format)
def hash_path(path, algorithms=DEFAULT_ALGORITHMS, chunk_cb=None,
hash_format='bytes', track_length=True):
"""Hash the contents of the file at the given path with the given
......
......@@ -125,27 +125,6 @@ class Hashutil(unittest.TestCase):
fobj, length=len(self.data), hash_format='bytehex')
self.assertEqual(checksums, self.bytehex_checksums)
@istest
def hash_stream(self):
class StreamStub:
def __init__(self, data):
self.data = data
def iter_content(self):
yield from io.BytesIO(self.data)
s = StreamStub(self.data).iter_content()
def _readfn(s):
try:
return next(s)
except StopIteration:
return None
checksums = hashutil.hash_stream(
s, readfn=_readfn, length=len(self.data), hash_format='hex')
self.assertEqual(checksums, self.hex_checksums)
@istest
def hash_file_missing_length(self):
fobj = io.BytesIO(self.data)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment