diff --git a/swh/model/hashutil.py b/swh/model/hashutil.py index 864d043fb924394c1337f46a76a0211aea389cf7..96905d956f1dc2d2bbe0833a4a23f6009dccab06 100644 --- a/swh/model/hashutil.py +++ b/swh/model/hashutil.py @@ -167,12 +167,13 @@ def hash_path(path, algorithms=DEFAULT_ALGORITHMS, chunk_cb=None): return hash -def hash_data(data, algorithms=DEFAULT_ALGORITHMS): +def hash_data(data, algorithms=DEFAULT_ALGORITHMS, with_length=False): """Hash the given binary blob with the given algorithms. Args: - data: a bytes object - algorithms: the hashing algorithms used + data (bytes): raw content to hash + algorithms (list): the hashing algorithms used + with_length (bool): add the length key in the resulting dict Returns: a dict mapping each algorithm to a bytes digest @@ -181,7 +182,11 @@ def hash_data(data, algorithms=DEFAULT_ALGORITHMS): ValueError if algorithms contains an unknown hash algorithm. """ fobj = BytesIO(data) - return hash_file(fobj, len(data), algorithms) + length = len(data) + data = hash_file(fobj, length, algorithms) + if with_length: + data['length'] = length + return data def hash_git_data(data, git_type, base_algo='sha1'): diff --git a/swh/model/tests/test_hashutil.py b/swh/model/tests/test_hashutil.py index 1a4f24a606fc0cda16b4fc3df92ba49799b3145e..8b883f16bf033376c1b8c7c91fb18e38b9b1b56c 100644 --- a/swh/model/tests/test_hashutil.py +++ b/swh/model/tests/test_hashutil.py @@ -47,6 +47,17 @@ class Hashutil(unittest.TestCase): def hash_data(self): checksums = hashutil.hash_data(self.data) self.assertEqual(checksums, self.checksums) + self.assertFalse('length' in checksums) + + @istest + def hash_data_with_length(self): + expected_checksums = self.checksums.copy() + expected_checksums['length'] = len(self.data) + + checksums = hashutil.hash_data(self.data, with_length=True) + + self.assertEqual(checksums, expected_checksums) + self.assertTrue('length' in checksums) @istest def hash_data_unknown_hash(self):