From 836198c411699d4760b4da09d493ae6bbef3c110 Mon Sep 17 00:00:00 2001
From: "Antoine R. Dumont (@ardumont)" <antoine.romain.dumont@gmail.com>
Date: Fri, 14 Sep 2018 23:39:08 +0200
Subject: [PATCH] swh.model.hashutil: Remove unnecessary endpoints

---
 swh/model/hashutil.py            | 56 +++++---------------------------
 swh/model/tests/test_hashutil.py | 21 ------------
 2 files changed, 9 insertions(+), 68 deletions(-)

diff --git a/swh/model/hashutil.py b/swh/model/hashutil.py
index 1bdaadbe..69586a82 100644
--- a/swh/model/hashutil.py
+++ b/swh/model/hashutil.py
@@ -12,10 +12,6 @@ in a ValueError explaining the error.
 
 This modules defines the following hashing functions:
 
-- hash_stream: Hash the contents of something iterable (file, stream,
-  ...) with the given algorithms (defaulting to DEFAULT_ALGORITHMS if
-  none provided).
-
 - hash_file: Hash the contents of the given file object with the given
   algorithms (defaulting to DEFAULT_ALGORITHMS if none provided).
 
@@ -233,24 +229,16 @@ def _new_hash(algo, length=None):
     return _new_hashlib_hash(algo)
 
 
-def _read(fobj):
-    """Wrapper function around reading a chunk from fobj.
-
-    """
-    return fobj.read(HASH_BLOCK_SIZE)
-
-
-def hash_stream(s, readfn=_read, length=None, algorithms=DEFAULT_ALGORITHMS,
-                chunk_cb=None, hash_format='bytes'):
-    """Hash the contents of a stream
+def hash_file(fobj, length=None, algorithms=DEFAULT_ALGORITHMS,
+              chunk_cb=None, hash_format='bytes'):
+    """Hash the contents of the given file object with the given algorithms.
 
     Args:
-        s: stream or object we can consume by successive call using `readfn`
-        readfn (fn): Function to read chunk data from s
-        length (int): the length of the contents of the object (for the
-                      git-specific algorithms)
-        algorithms (set): the hashing algorithms to be used, as an
-                          iterable over strings
+        fobj: a file-like object
+        length: the length of the contents of the file-like object (for the
+          git-specific algorithms)
+        algorithms: the hashing algorithms to be used, as an iterable over
+          strings
         hash_format (str): Format required for the output of the
                            computed hashes (cf. HASH_FORMATS)
 
@@ -269,7 +257,7 @@ def hash_stream(s, readfn=_read, length=None, algorithms=DEFAULT_ALGORITHMS,
 
     h = MultiHash(algorithms, length)
     while True:
-        chunk = readfn(s)
+        chunk = fobj.read(HASH_BLOCK_SIZE)
         if not chunk:
             break
         h.update(chunk)
@@ -283,32 +271,6 @@ def hash_stream(s, readfn=_read, length=None, algorithms=DEFAULT_ALGORITHMS,
     return h.hexdigest()
 
 
-def hash_file(fobj, length=None, algorithms=DEFAULT_ALGORITHMS,
-              chunk_cb=None, hash_format='bytes'):
-    """Hash the contents of the given file object with the given algorithms.
-
-    Args:
-        fobj: a file-like object
-        length: the length of the contents of the file-like object (for the
-          git-specific algorithms)
-        algorithms: the hashing algorithms to be used, as an iterable over
-          strings
-        hash_format (str): Format required for the output of the
-                           computed hashes (cf. HASH_FORMATS)
-
-    Returns: a dict mapping each algorithm to a digest (bytes by default).
-
-    Raises:
-        ValueError if:
-
-            algorithms contains an unknown hash algorithm.
-            hash_format is an unknown hash format
-
-    """
-    return hash_stream(fobj, length=length, algorithms=algorithms,
-                       chunk_cb=chunk_cb, hash_format=hash_format)
-
-
 def hash_path(path, algorithms=DEFAULT_ALGORITHMS, chunk_cb=None,
               hash_format='bytes', track_length=True):
     """Hash the contents of the file at the given path with the given
diff --git a/swh/model/tests/test_hashutil.py b/swh/model/tests/test_hashutil.py
index 4b0efa56..cbe16603 100644
--- a/swh/model/tests/test_hashutil.py
+++ b/swh/model/tests/test_hashutil.py
@@ -125,27 +125,6 @@ class Hashutil(unittest.TestCase):
             fobj, length=len(self.data), hash_format='bytehex')
         self.assertEqual(checksums, self.bytehex_checksums)
 
-    @istest
-    def hash_stream(self):
-        class StreamStub:
-            def __init__(self, data):
-                self.data = data
-
-            def iter_content(self):
-                yield from io.BytesIO(self.data)
-
-        s = StreamStub(self.data).iter_content()
-
-        def _readfn(s):
-            try:
-                return next(s)
-            except StopIteration:
-                return None
-
-        checksums = hashutil.hash_stream(
-            s, readfn=_readfn, length=len(self.data), hash_format='hex')
-        self.assertEqual(checksums, self.hex_checksums)
-
     @istest
     def hash_file_missing_length(self):
         fobj = io.BytesIO(self.data)
-- 
GitLab