From 37364c24b50911f107cfde7b3a283dd52e1a76c9 Mon Sep 17 00:00:00 2001 From: Antoine Lambert <anlambert@softwareheritage.org> Date: Mon, 6 Dec 2021 17:38:59 +0100 Subject: [PATCH] hashutil: Add support for md5 sum Enable to compute md5 sum through the hashutil.MultiHash class. Nevertheless, md5 is not put in DEFAULT_ALGORITHMS set and must be explicitely requested by client code. Related to T2400 --- swh/model/hashutil.py | 2 +- swh/model/tests/test_hashutil.py | 34 ++++++++++++++++++++++++++++++-- 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/swh/model/hashutil.py b/swh/model/hashutil.py index eaacb230..86ecc6f0 100644 --- a/swh/model/hashutil.py +++ b/swh/model/hashutil.py @@ -58,7 +58,7 @@ from io import BytesIO import os from typing import Callable, Dict, Optional -ALGORITHMS = set(["sha1", "sha256", "sha1_git", "blake2s256", "blake2b512"]) +ALGORITHMS = set(["sha1", "sha256", "sha1_git", "blake2s256", "blake2b512", "md5"]) """Hashing algorithms supported by this module""" DEFAULT_ALGORITHMS = set(["sha1", "sha256", "sha1_git", "blake2s256"]) diff --git a/swh/model/tests/test_hashutil.py b/swh/model/tests/test_hashutil.py index ec540d2f..c864bd8f 100644 --- a/swh/model/tests/test_hashutil.py +++ b/swh/model/tests/test_hashutil.py @@ -13,7 +13,7 @@ from unittest.mock import patch import pytest from swh.model import hashutil -from swh.model.hashutil import MultiHash +from swh.model.hashutil import DEFAULT_ALGORITHMS, MultiHash, hash_to_bytehex @contextlib.contextmanager @@ -112,6 +112,36 @@ def test_multi_hash_file_bytehexdigest(hash_test_data): assert checksums == hash_test_data.bytehex_checksums +def test_multi_hash_file_with_md5(hash_test_data): + fobj = io.BytesIO(hash_test_data.data) + + checksums = MultiHash.from_file( + fobj, hash_names=DEFAULT_ALGORITHMS | {"md5"}, length=len(hash_test_data.data) + ).digest() + md5sum = {"md5": hashlib.md5(hash_test_data.data).digest()} + assert checksums == {**hash_test_data.checksums, **md5sum} + + +def test_multi_hash_file_hexdigest_with_md5(hash_test_data): + fobj = io.BytesIO(hash_test_data.data) + length = len(hash_test_data.data) + checksums = MultiHash.from_file( + fobj, hash_names=DEFAULT_ALGORITHMS | {"md5"}, length=length + ).hexdigest() + md5sum = {"md5": hashlib.md5(hash_test_data.data).hexdigest()} + assert checksums == {**hash_test_data.hex_checksums, **md5sum} + + +def test_multi_hash_file_bytehexdigest_with_md5(hash_test_data): + fobj = io.BytesIO(hash_test_data.data) + length = len(hash_test_data.data) + checksums = MultiHash.from_file( + fobj, hash_names=DEFAULT_ALGORITHMS | {"md5"}, length=length + ).bytehexdigest() + md5sum = {"md5": hash_to_bytehex(hashlib.md5(hash_test_data.data).digest())} + assert checksums == {**hash_test_data.bytehex_checksums, **md5sum} + + def test_multi_hash_file_missing_length(hash_test_data): fobj = io.BytesIO(hash_test_data.data) with pytest.raises(ValueError, match="Missing length"): @@ -177,7 +207,7 @@ def test_new_hash_unsupported_hashing_algorithm(): expected_message = ( "Unexpected hashing algorithm blake2:10, " "expected one of blake2b512, blake2s256, " - "sha1, sha1_git, sha256" + "md5, sha1, sha1_git, sha256" ) with pytest.raises(ValueError, match=expected_message): hashutil._new_hash("blake2:10") -- GitLab