From 0119f4c1a0b4d12934b35c1eb85ad31d81852f8e Mon Sep 17 00:00:00 2001
From: "Antoine R. Dumont (@ardumont)" <antoine.romain.dumont@gmail.com>
Date: Thu, 27 Sep 2018 14:28:05 +0200
Subject: [PATCH] swh.model: Do multiple reads with a fixed chunk size

---
 swh/model/from_disk.py | 5 ++++-
 swh/model/hashutil.py  | 7 +++++--
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/swh/model/from_disk.py b/swh/model/from_disk.py
index 4767c398..a9fc2d3b 100644
--- a/swh/model/from_disk.py
+++ b/swh/model/from_disk.py
@@ -124,7 +124,10 @@ class Content(MerkleLeaf):
             h = MultiHash(length=length)
             chunks = []
             with open(path, 'rb') as fobj:
-                for chunk in fobj:
+                while True:
+                    chunk = fobj.read(HASH_BLOCK_SIZE)
+                    if not chunk:
+                        break
                     h.update(chunk)
                     chunks.append(chunk)
 
diff --git a/swh/model/hashutil.py b/swh/model/hashutil.py
index d8249bc7..24c2f696 100644
--- a/swh/model/hashutil.py
+++ b/swh/model/hashutil.py
@@ -117,9 +117,12 @@ class MultiHash:
         ret.track_length = track_length
 
     @classmethod
-    def from_file(cls, file, hash_names=DEFAULT_ALGORITHMS, length=None):
+    def from_file(cls, fobj, hash_names=DEFAULT_ALGORITHMS, length=None):
         ret = cls(length=length, hash_names=hash_names)
-        for chunk in file:
+        while True:
+            chunk = fobj.read(HASH_BLOCK_SIZE)
+            if not chunk:
+                break
             ret.update(chunk)
         return ret
 
-- 
GitLab