From 91d74ef052f5f9951c8a5b56bf1faf816c4d2e4b Mon Sep 17 00:00:00 2001
From: "Antoine R. Dumont (@ardumont)" <antoine.romain.dumont@gmail.com>
Date: Wed, 20 Dec 2017 10:40:06 +0100
Subject: [PATCH] swh.model.hashutil.hash_data: Optionally integrate length in
 result

---
 swh/model/hashutil.py            | 13 +++++++++----
 swh/model/tests/test_hashutil.py | 11 +++++++++++
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/swh/model/hashutil.py b/swh/model/hashutil.py
index 864d043f..96905d95 100644
--- a/swh/model/hashutil.py
+++ b/swh/model/hashutil.py
@@ -167,12 +167,13 @@ def hash_path(path, algorithms=DEFAULT_ALGORITHMS, chunk_cb=None):
     return hash
 
 
-def hash_data(data, algorithms=DEFAULT_ALGORITHMS):
+def hash_data(data, algorithms=DEFAULT_ALGORITHMS, with_length=False):
     """Hash the given binary blob with the given algorithms.
 
     Args:
-        data: a bytes object
-        algorithms: the hashing algorithms used
+        data (bytes): raw content to hash
+        algorithms (list): the hashing algorithms used
+        with_length (bool): add the length key in the resulting dict
 
     Returns: a dict mapping each algorithm to a bytes digest
 
@@ -181,7 +182,11 @@ def hash_data(data, algorithms=DEFAULT_ALGORITHMS):
         ValueError if algorithms contains an unknown hash algorithm.
     """
     fobj = BytesIO(data)
-    return hash_file(fobj, len(data), algorithms)
+    length = len(data)
+    data = hash_file(fobj, length, algorithms)
+    if with_length:
+        data['length'] = length
+    return data
 
 
 def hash_git_data(data, git_type, base_algo='sha1'):
diff --git a/swh/model/tests/test_hashutil.py b/swh/model/tests/test_hashutil.py
index 1a4f24a6..8b883f16 100644
--- a/swh/model/tests/test_hashutil.py
+++ b/swh/model/tests/test_hashutil.py
@@ -47,6 +47,17 @@ class Hashutil(unittest.TestCase):
     def hash_data(self):
         checksums = hashutil.hash_data(self.data)
         self.assertEqual(checksums, self.checksums)
+        self.assertFalse('length' in checksums)
+
+    @istest
+    def hash_data_with_length(self):
+        expected_checksums = self.checksums.copy()
+        expected_checksums['length'] = len(self.data)
+
+        checksums = hashutil.hash_data(self.data, with_length=True)
+
+        self.assertEqual(checksums, expected_checksums)
+        self.assertTrue('length' in checksums)
 
     @istest
     def hash_data_unknown_hash(self):
-- 
GitLab