From 0119f4c1a0b4d12934b35c1eb85ad31d81852f8e Mon Sep 17 00:00:00 2001 From: "Antoine R. Dumont (@ardumont)" <antoine.romain.dumont@gmail.com> Date: Thu, 27 Sep 2018 14:28:05 +0200 Subject: [PATCH] swh.model: Do multiple reads with a fixed chunk size --- swh/model/from_disk.py | 5 ++++- swh/model/hashutil.py | 7 +++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/swh/model/from_disk.py b/swh/model/from_disk.py index 4767c398..a9fc2d3b 100644 --- a/swh/model/from_disk.py +++ b/swh/model/from_disk.py @@ -124,7 +124,10 @@ class Content(MerkleLeaf): h = MultiHash(length=length) chunks = [] with open(path, 'rb') as fobj: - for chunk in fobj: + while True: + chunk = fobj.read(HASH_BLOCK_SIZE) + if not chunk: + break h.update(chunk) chunks.append(chunk) diff --git a/swh/model/hashutil.py b/swh/model/hashutil.py index d8249bc7..24c2f696 100644 --- a/swh/model/hashutil.py +++ b/swh/model/hashutil.py @@ -117,9 +117,12 @@ class MultiHash: ret.track_length = track_length @classmethod - def from_file(cls, file, hash_names=DEFAULT_ALGORITHMS, length=None): + def from_file(cls, fobj, hash_names=DEFAULT_ALGORITHMS, length=None): ret = cls(length=length, hash_names=hash_names) - for chunk in file: + while True: + chunk = fobj.read(HASH_BLOCK_SIZE) + if not chunk: + break ret.update(chunk) return ret -- GitLab