From 836198c411699d4760b4da09d493ae6bbef3c110 Mon Sep 17 00:00:00 2001 From: "Antoine R. Dumont (@ardumont)" <antoine.romain.dumont@gmail.com> Date: Fri, 14 Sep 2018 23:39:08 +0200 Subject: [PATCH] swh.model.hashutil: Remove unnecessary endpoints --- swh/model/hashutil.py | 56 +++++--------------------------- swh/model/tests/test_hashutil.py | 21 ------------ 2 files changed, 9 insertions(+), 68 deletions(-) diff --git a/swh/model/hashutil.py b/swh/model/hashutil.py index 1bdaadbe..69586a82 100644 --- a/swh/model/hashutil.py +++ b/swh/model/hashutil.py @@ -12,10 +12,6 @@ in a ValueError explaining the error. This modules defines the following hashing functions: -- hash_stream: Hash the contents of something iterable (file, stream, - ...) with the given algorithms (defaulting to DEFAULT_ALGORITHMS if - none provided). - - hash_file: Hash the contents of the given file object with the given algorithms (defaulting to DEFAULT_ALGORITHMS if none provided). @@ -233,24 +229,16 @@ def _new_hash(algo, length=None): return _new_hashlib_hash(algo) -def _read(fobj): - """Wrapper function around reading a chunk from fobj. - - """ - return fobj.read(HASH_BLOCK_SIZE) - - -def hash_stream(s, readfn=_read, length=None, algorithms=DEFAULT_ALGORITHMS, - chunk_cb=None, hash_format='bytes'): - """Hash the contents of a stream +def hash_file(fobj, length=None, algorithms=DEFAULT_ALGORITHMS, + chunk_cb=None, hash_format='bytes'): + """Hash the contents of the given file object with the given algorithms. Args: - s: stream or object we can consume by successive call using `readfn` - readfn (fn): Function to read chunk data from s - length (int): the length of the contents of the object (for the - git-specific algorithms) - algorithms (set): the hashing algorithms to be used, as an - iterable over strings + fobj: a file-like object + length: the length of the contents of the file-like object (for the + git-specific algorithms) + algorithms: the hashing algorithms to be used, as an iterable over + strings hash_format (str): Format required for the output of the computed hashes (cf. HASH_FORMATS) @@ -269,7 +257,7 @@ def hash_stream(s, readfn=_read, length=None, algorithms=DEFAULT_ALGORITHMS, h = MultiHash(algorithms, length) while True: - chunk = readfn(s) + chunk = fobj.read(HASH_BLOCK_SIZE) if not chunk: break h.update(chunk) @@ -283,32 +271,6 @@ def hash_stream(s, readfn=_read, length=None, algorithms=DEFAULT_ALGORITHMS, return h.hexdigest() -def hash_file(fobj, length=None, algorithms=DEFAULT_ALGORITHMS, - chunk_cb=None, hash_format='bytes'): - """Hash the contents of the given file object with the given algorithms. - - Args: - fobj: a file-like object - length: the length of the contents of the file-like object (for the - git-specific algorithms) - algorithms: the hashing algorithms to be used, as an iterable over - strings - hash_format (str): Format required for the output of the - computed hashes (cf. HASH_FORMATS) - - Returns: a dict mapping each algorithm to a digest (bytes by default). - - Raises: - ValueError if: - - algorithms contains an unknown hash algorithm. - hash_format is an unknown hash format - - """ - return hash_stream(fobj, length=length, algorithms=algorithms, - chunk_cb=chunk_cb, hash_format=hash_format) - - def hash_path(path, algorithms=DEFAULT_ALGORITHMS, chunk_cb=None, hash_format='bytes', track_length=True): """Hash the contents of the file at the given path with the given diff --git a/swh/model/tests/test_hashutil.py b/swh/model/tests/test_hashutil.py index 4b0efa56..cbe16603 100644 --- a/swh/model/tests/test_hashutil.py +++ b/swh/model/tests/test_hashutil.py @@ -125,27 +125,6 @@ class Hashutil(unittest.TestCase): fobj, length=len(self.data), hash_format='bytehex') self.assertEqual(checksums, self.bytehex_checksums) - @istest - def hash_stream(self): - class StreamStub: - def __init__(self, data): - self.data = data - - def iter_content(self): - yield from io.BytesIO(self.data) - - s = StreamStub(self.data).iter_content() - - def _readfn(s): - try: - return next(s) - except StopIteration: - return None - - checksums = hashutil.hash_stream( - s, readfn=_readfn, length=len(self.data), hash_format='hex') - self.assertEqual(checksums, self.hex_checksums) - @istest def hash_file_missing_length(self): fobj = io.BytesIO(self.data) -- GitLab