From c45625f61a9b8374cdcaf7ee65df331cf1e8f07a Mon Sep 17 00:00:00 2001 From: Antoine Lambert <anlambert@softwareheritage.org> Date: Wed, 21 Feb 2024 11:36:35 +0100 Subject: [PATCH] tests: Ensure nar hash extid matches nar hash of target Add an extra check in function fetch_extids_from_checksums to ensure a NAR hash extid matches the NAR hash of the targeted archived object. Related to swh/infra/sysadm-environment#5256. --- requirements-test.txt | 1 + swh/loader/tests/__init__.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/requirements-test.txt b/requirements-test.txt index f9a1547b..ba4ed4b2 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -5,6 +5,7 @@ urllib3 swh-core[testing] swh-scheduler[testing] >= 2.0.0 swh-storage[testing] >= 2.0.0 +swh-vault types-click types-python-dateutil types-pyyaml diff --git a/swh/loader/tests/__init__.py b/swh/loader/tests/__init__.py index 0046cfee..119f3b18 100644 --- a/swh/loader/tests/__init__.py +++ b/swh/loader/tests/__init__.py @@ -7,13 +7,17 @@ from collections import defaultdict import os from pathlib import PosixPath import subprocess +import tempfile from typing import Dict, Iterable, List, Optional, Tuple, Union +from swh.loader.core.nar import Nar from swh.model.hashutil import hash_to_bytes from swh.model.model import ExtID, OriginVisitStatus, Snapshot, TargetType +from swh.model.swhids import ObjectType from swh.storage.algos.origin import origin_get_latest_visit_status from swh.storage.algos.snapshot import snapshot_get_all_branches from swh.storage.interface import StorageInterface +from swh.vault.to_disk import DirectoryBuilder def assert_last_visit_matches( @@ -283,6 +287,32 @@ def fetch_extids_from_checksums( extid = storage.extid_get_from_extid(id_type, ids, extid_version) extids.extend(extid) + for extid_ in extids: + if extid_.extid_type.startswith("nar-"): + # check NAR hashes of archived directory or content match the expected ones + target_swhid = extid_.target + with tempfile.TemporaryDirectory() as tmp_dir: + nar = Nar(hash_names=list(checksums.keys())) + if target_swhid.object_type == ObjectType.DIRECTORY: + dir_builder = DirectoryBuilder( + storage=storage, + root=tmp_dir.encode(), + dir_id=target_swhid.object_id, + ) + dir_builder.build() + path_to_hash = tmp_dir + else: + path_to_hash = os.path.join(tmp_dir, "content") + content_bytes = storage.content_get_data( + {"sha1_git": target_swhid.object_id} + ) + assert content_bytes is not None + with open(path_to_hash, "wb") as content: + content.write(content_bytes) + + nar.serialize(PosixPath(path_to_hash)) + assert nar.hexdigest() == checksums + return extids -- GitLab