From 5ca5dce7216f5a2f3e3cfcf24b5e69270a577487 Mon Sep 17 00:00:00 2001 From: "Antoine R. Dumont (@ardumont)" <antoine.romain.dumont@gmail.com> Date: Fri, 14 Sep 2018 00:21:33 +0200 Subject: [PATCH] hashutil: Allow option to require hexdigest instead of binary digest Related T421 --- swh/model/hashutil.py | 12 +++++++++--- swh/model/tests/test_hashutil.py | 7 +++++++ 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/swh/model/hashutil.py b/swh/model/hashutil.py index 33551616..6675b5cb 100644 --- a/swh/model/hashutil.py +++ b/swh/model/hashutil.py @@ -162,7 +162,8 @@ def _new_hash(algo, length=None): return _new_hashlib_hash(algo) -def hash_file(fobj, length=None, algorithms=DEFAULT_ALGORITHMS, chunk_cb=None): +def hash_file(fobj, length=None, algorithms=DEFAULT_ALGORITHMS, chunk_cb=None, + hexdigest=False): """Hash the contents of the given file object with the given algorithms. Args: @@ -171,11 +172,14 @@ def hash_file(fobj, length=None, algorithms=DEFAULT_ALGORITHMS, chunk_cb=None): git-specific algorithms) algorithms: the hashing algorithms to be used, as an iterable over strings + hexdigest (bool): False returns the hash as binary, otherwise + returns as hex - Returns: a dict mapping each algorithm to a bytes digest. + Returns: a dict mapping each algorithm to a digest (bytes by default). Raises: ValueError if algorithms contains an unknown hash algorithm. + """ hashes = {algo: _new_hash(algo, length) for algo in algorithms} @@ -188,6 +192,8 @@ def hash_file(fobj, length=None, algorithms=DEFAULT_ALGORITHMS, chunk_cb=None): if chunk_cb: chunk_cb(chunk) + if hexdigest: + return {algo: hash.hexdigest() for algo, hash in hashes.items()} return {algo: hash.digest() for algo, hash in hashes.items()} @@ -209,7 +215,7 @@ def hash_path(path, algorithms=DEFAULT_ALGORITHMS, chunk_cb=None): """ length = os.path.getsize(path) with open(path, 'rb') as fobj: - hash = hash_file(fobj, length, algorithms, chunk_cb) + hash = hash_file(fobj, length, algorithms, chunk_cb=chunk_cb) hash['length'] = length return hash diff --git a/swh/model/tests/test_hashutil.py b/swh/model/tests/test_hashutil.py index da49af99..fabbf16b 100644 --- a/swh/model/tests/test_hashutil.py +++ b/swh/model/tests/test_hashutil.py @@ -95,6 +95,13 @@ class Hashutil(unittest.TestCase): checksums = hashutil.hash_file(fobj, length=len(self.data)) self.assertEqual(checksums, self.checksums) + @istest + def hash_file_hexdigest(self): + fobj = io.BytesIO(self.data) + checksums = hashutil.hash_file(fobj, length=len(self.data), + hexdigest=True) + self.assertEqual(checksums, self.hex_checksums) + @istest def hash_file_missing_length(self): fobj = io.BytesIO(self.data) -- GitLab