Skip to content
Snippets Groups Projects
Verified Commit 34870256 authored by Antoine R. Dumont's avatar Antoine R. Dumont
Browse files

hashutil: Migrate towards MultiHash api

parent eb338cda
No related branches found
No related tags found
No related merge requests found
# Copyright (C) 2017 The Software Heritage developers
# Copyright (C) 2017-2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
......@@ -7,7 +7,7 @@ import enum
import os
import stat
from . import hashutil
from .hashutil import MultiHash, HASH_BLOCK_SIZE
from .merkle import MerkleLeaf, MerkleNode
from .identifiers import (
directory_identifier,
......@@ -77,8 +77,9 @@ class Content(MerkleLeaf):
mode (int): a file mode (passed to :func:`mode_to_perms`)
data (bytes): raw contents of the file
"""
ret = hashutil.hash_data(data)
ret['length'] = len(data)
length = len(data)
ret = MultiHash.from_data(data, length=length).digest()
ret['length'] = length
ret['perms'] = mode_to_perms(mode)
ret['data'] = data
......@@ -91,8 +92,8 @@ class Content(MerkleLeaf):
@classmethod
def from_file(cls, *, path, data=False, save_path=False):
"""Compute the Software Heritage content entry corresponding to an on-disk
file.
"""Compute the Software Heritage content entry corresponding to an
on-disk file.
The returned dictionary contains keys useful for both:
- loading the content in the archive (hashes, `length`)
......@@ -103,6 +104,7 @@ class Content(MerkleLeaf):
content entry
data (bool): add the file data to the entry
save_path (bool): add the file path to the entry
"""
file_stat = os.lstat(path)
mode = file_stat.st_mode
......@@ -117,17 +119,16 @@ class Content(MerkleLeaf):
length = file_stat.st_size
if not data:
ret = hashutil.hash_path(path)
ret = MultiHash.from_path(path).digest()
else:
h = MultiHash(length=length)
chunks = []
def append_chunk(x, chunks=chunks):
chunks.append(x)
with open(path, 'rb') as fobj:
ret = hashutil.hash_file(fobj, length=length,
chunk_cb=append_chunk)
for chunk in fobj:
h.update(chunk)
chunks.append(chunk)
ret = h.digest()
ret['data'] = b''.join(chunks)
if save_path:
......
......@@ -11,8 +11,7 @@ from functools import lru_cache
from .exceptions import ValidationError
from .fields.hashes import validate_sha1
from .hashutil import hash_data, hash_git_data, DEFAULT_ALGORITHMS
from .hashutil import hash_to_hex
from .hashutil import hash_git_data, hash_to_hex, MultiHash
SNAPSHOT = 'snapshot'
......@@ -104,7 +103,7 @@ def content_identifier(content):
"""
return hash_data(content['data'], DEFAULT_ALGORITHMS)
return MultiHash.from_data(content['data']).digest()
def _sort_key(entry):
......
# Copyright (C) 2015 The Software Heritage developers
# Copyright (C) 2015-2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from .exceptions import ValidationError, NON_FIELD_ERRORS
from . import fields, hashutil
from . import fields
from .hashutil import MultiHash, hash_to_bytes
def validate_content(content):
......@@ -44,11 +45,11 @@ def validate_content(content):
def validate_hashes(content):
errors = []
if 'data' in content:
hashes = hashutil.hash_data(content['data'])
hashes = MultiHash.from_data(content['data']).digest()
for hash_type, computed_hash in hashes.items():
if hash_type not in content:
continue
content_hash = hashutil.hash_to_bytes(content[hash_type])
content_hash = hash_to_bytes(content[hash_type])
if content_hash != computed_hash:
errors.append(ValidationError(
'hash mismatch in content for hash %(hash)s',
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment