From 4959a45064755bc30a935576bf3b96f656c9ebb2 Mon Sep 17 00:00:00 2001
From: "Antoine R. Dumont (@ardumont)" <antoine.romain.dumont@gmail.com>
Date: Sat, 15 Sep 2018 00:48:52 +0200
Subject: [PATCH] swh.model.hashutil: Mark hash_* function as deprecated

And revert to their original behavior (returning only digest as bytes)
---
 swh/model/hashutil.py            | 37 ++++++++++++++------------------
 swh/model/tests/test_hashutil.py | 24 ---------------------
 2 files changed, 16 insertions(+), 45 deletions(-)

diff --git a/swh/model/hashutil.py b/swh/model/hashutil.py
index 339b72fa..ee29b6e6 100644
--- a/swh/model/hashutil.py
+++ b/swh/model/hashutil.py
@@ -259,8 +259,10 @@ def _new_hash(algo, length=None):
 
 
 def hash_file(fobj, length=None, algorithms=DEFAULT_ALGORITHMS,
-              chunk_cb=None, hash_format='bytes'):
-    """Hash the contents of the given file object with the given algorithms.
+              chunk_cb=None):
+    """(Deprecated) cf. MultiHash.from_file
+
+    Hash the contents of the given file object with the given algorithms.
 
     Args:
         fobj: a file-like object
@@ -280,10 +282,6 @@ def hash_file(fobj, length=None, algorithms=DEFAULT_ALGORITHMS,
             hash_format is an unknown hash format
 
     """
-    if hash_format not in HASH_FORMATS:
-        raise ValueError('Unexpected hash format %s, expected one of %s' % (
-            hash_format, HASH_FORMATS))
-
     h = MultiHash(algorithms, length)
     while True:
         chunk = fobj.read(HASH_BLOCK_SIZE)
@@ -293,17 +291,15 @@ def hash_file(fobj, length=None, algorithms=DEFAULT_ALGORITHMS,
         if chunk_cb:
             chunk_cb(chunk)
 
-    if hash_format == 'bytes':
-        return h.digest()
-    if hash_format == 'bytehex':
-        return h.bytehexdigest()
-    return h.hexdigest()
+    return h.digest()
 
 
 def hash_path(path, algorithms=DEFAULT_ALGORITHMS, chunk_cb=None,
-              hash_format='bytes', track_length=True):
-    """Hash the contents of the file at the given path with the given
-       algorithms.
+              track_length=True):
+    """(deprecated) cf. MultiHash.from_path
+
+    Hash the contents of the file at the given path with the given
+    algorithms.
 
     Args:
         path (str): the path of the file to hash
@@ -327,12 +323,13 @@ def hash_path(path, algorithms=DEFAULT_ALGORITHMS, chunk_cb=None,
         algorithms = set(['length']).union(algorithms)
     length = os.path.getsize(path)
     with open(path, 'rb') as fobj:
-        return hash_file(fobj, length, algorithms, chunk_cb=chunk_cb,
-                         hash_format=hash_format)
+        return hash_file(fobj, length, algorithms, chunk_cb=chunk_cb)
+
 
+def hash_data(data, algorithms=DEFAULT_ALGORITHMS):
+    """(deprecated) cf. MultiHash.from_data
 
-def hash_data(data, algorithms=DEFAULT_ALGORITHMS, hash_format='bytes'):
-    """Hash the given binary blob with the given algorithms.
+    Hash the given binary blob with the given algorithms.
 
     Args:
         data (bytes): raw content to hash
@@ -350,9 +347,7 @@ def hash_data(data, algorithms=DEFAULT_ALGORITHMS, hash_format='bytes'):
             hash_format is an unknown hash format
 
     """
-    fobj = BytesIO(data)
-    length = len(data)
-    return hash_file(fobj, length, algorithms, hash_format=hash_format)
+    return MultiHash.from_data(data, hash_names=algorithms).digest()
 
 
 def hash_git_data(data, git_type, base_algo='sha1'):
diff --git a/swh/model/tests/test_hashutil.py b/swh/model/tests/test_hashutil.py
index d288149b..92b3684a 100644
--- a/swh/model/tests/test_hashutil.py
+++ b/swh/model/tests/test_hashutil.py
@@ -147,16 +147,6 @@ class Hashutil(BaseHashutil):
         self.assertIn('Unexpected hashing algorithm', cm.exception.args[0])
         self.assertIn('unknown-hash', cm.exception.args[0])
 
-    @istest
-    def hash_data_unknown_hash_format(self):
-        with self.assertRaises(ValueError) as cm:
-            hashutil.hash_data(
-                self.data, hashutil.DEFAULT_ALGORITHMS,
-                hash_format='unknown-format')
-
-        self.assertIn('Unexpected hash format', cm.exception.args[0])
-        self.assertIn('unknown-format', cm.exception.args[0])
-
     @istest
     def hash_git_data(self):
         checksums = {
@@ -181,20 +171,6 @@ class Hashutil(BaseHashutil):
         checksums = hashutil.hash_file(fobj, length=len(self.data))
         self.assertEqual(checksums, self.checksums)
 
-    @istest
-    def hash_file_hexdigest(self):
-        fobj = io.BytesIO(self.data)
-        checksums = hashutil.hash_file(
-            fobj, length=len(self.data), hash_format='hex')
-        self.assertEqual(checksums, self.hex_checksums)
-
-    @istest
-    def hash_file_bytehexdigest(self):
-        fobj = io.BytesIO(self.data)
-        checksums = hashutil.hash_file(
-            fobj, length=len(self.data), hash_format='bytehex')
-        self.assertEqual(checksums, self.bytehex_checksums)
-
     @istest
     def hash_file_missing_length(self):
         fobj = io.BytesIO(self.data)
-- 
GitLab