diff --git a/swh/model/hashutil.py b/swh/model/hashutil.py index eaacb2305eb53946736d6ad5fb1c18c4086d8879..86ecc6f0b64e67c0f9d070901e36e9f3b85db136 100644 --- a/swh/model/hashutil.py +++ b/swh/model/hashutil.py @@ -58,7 +58,7 @@ from io import BytesIO import os from typing import Callable, Dict, Optional -ALGORITHMS = set(["sha1", "sha256", "sha1_git", "blake2s256", "blake2b512"]) +ALGORITHMS = set(["sha1", "sha256", "sha1_git", "blake2s256", "blake2b512", "md5"]) """Hashing algorithms supported by this module""" DEFAULT_ALGORITHMS = set(["sha1", "sha256", "sha1_git", "blake2s256"]) diff --git a/swh/model/tests/test_hashutil.py b/swh/model/tests/test_hashutil.py index ec540d2fd2f1184fb9668008daaf34c19f2c8ade..c864bd8f9efbca0714e6e12a39b79c3512d8aed8 100644 --- a/swh/model/tests/test_hashutil.py +++ b/swh/model/tests/test_hashutil.py @@ -13,7 +13,7 @@ from unittest.mock import patch import pytest from swh.model import hashutil -from swh.model.hashutil import MultiHash +from swh.model.hashutil import DEFAULT_ALGORITHMS, MultiHash, hash_to_bytehex @contextlib.contextmanager @@ -112,6 +112,36 @@ def test_multi_hash_file_bytehexdigest(hash_test_data): assert checksums == hash_test_data.bytehex_checksums +def test_multi_hash_file_with_md5(hash_test_data): + fobj = io.BytesIO(hash_test_data.data) + + checksums = MultiHash.from_file( + fobj, hash_names=DEFAULT_ALGORITHMS | {"md5"}, length=len(hash_test_data.data) + ).digest() + md5sum = {"md5": hashlib.md5(hash_test_data.data).digest()} + assert checksums == {**hash_test_data.checksums, **md5sum} + + +def test_multi_hash_file_hexdigest_with_md5(hash_test_data): + fobj = io.BytesIO(hash_test_data.data) + length = len(hash_test_data.data) + checksums = MultiHash.from_file( + fobj, hash_names=DEFAULT_ALGORITHMS | {"md5"}, length=length + ).hexdigest() + md5sum = {"md5": hashlib.md5(hash_test_data.data).hexdigest()} + assert checksums == {**hash_test_data.hex_checksums, **md5sum} + + +def test_multi_hash_file_bytehexdigest_with_md5(hash_test_data): + fobj = io.BytesIO(hash_test_data.data) + length = len(hash_test_data.data) + checksums = MultiHash.from_file( + fobj, hash_names=DEFAULT_ALGORITHMS | {"md5"}, length=length + ).bytehexdigest() + md5sum = {"md5": hash_to_bytehex(hashlib.md5(hash_test_data.data).digest())} + assert checksums == {**hash_test_data.bytehex_checksums, **md5sum} + + def test_multi_hash_file_missing_length(hash_test_data): fobj = io.BytesIO(hash_test_data.data) with pytest.raises(ValueError, match="Missing length"): @@ -177,7 +207,7 @@ def test_new_hash_unsupported_hashing_algorithm(): expected_message = ( "Unexpected hashing algorithm blake2:10, " "expected one of blake2b512, blake2s256, " - "sha1, sha1_git, sha256" + "md5, sha1, sha1_git, sha256" ) with pytest.raises(ValueError, match=expected_message): hashutil._new_hash("blake2:10")