From c45625f61a9b8374cdcaf7ee65df331cf1e8f07a Mon Sep 17 00:00:00 2001
From: Antoine Lambert <anlambert@softwareheritage.org>
Date: Wed, 21 Feb 2024 11:36:35 +0100
Subject: [PATCH] tests: Ensure nar hash extid matches nar hash of target

Add an extra check in function fetch_extids_from_checksums to ensure
a NAR hash extid matches the NAR hash of the targeted archived object.

Related to swh/infra/sysadm-environment#5256.
---
 requirements-test.txt        |  1 +
 swh/loader/tests/__init__.py | 30 ++++++++++++++++++++++++++++++
 2 files changed, 31 insertions(+)

diff --git a/requirements-test.txt b/requirements-test.txt
index f9a1547b..ba4ed4b2 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -5,6 +5,7 @@ urllib3
 swh-core[testing]
 swh-scheduler[testing] >= 2.0.0
 swh-storage[testing] >= 2.0.0
+swh-vault
 types-click
 types-python-dateutil
 types-pyyaml
diff --git a/swh/loader/tests/__init__.py b/swh/loader/tests/__init__.py
index 0046cfee..119f3b18 100644
--- a/swh/loader/tests/__init__.py
+++ b/swh/loader/tests/__init__.py
@@ -7,13 +7,17 @@ from collections import defaultdict
 import os
 from pathlib import PosixPath
 import subprocess
+import tempfile
 from typing import Dict, Iterable, List, Optional, Tuple, Union
 
+from swh.loader.core.nar import Nar
 from swh.model.hashutil import hash_to_bytes
 from swh.model.model import ExtID, OriginVisitStatus, Snapshot, TargetType
+from swh.model.swhids import ObjectType
 from swh.storage.algos.origin import origin_get_latest_visit_status
 from swh.storage.algos.snapshot import snapshot_get_all_branches
 from swh.storage.interface import StorageInterface
+from swh.vault.to_disk import DirectoryBuilder
 
 
 def assert_last_visit_matches(
@@ -283,6 +287,32 @@ def fetch_extids_from_checksums(
             extid = storage.extid_get_from_extid(id_type, ids, extid_version)
             extids.extend(extid)
 
+        for extid_ in extids:
+            if extid_.extid_type.startswith("nar-"):
+                # check NAR hashes of archived directory or content match the expected ones
+                target_swhid = extid_.target
+                with tempfile.TemporaryDirectory() as tmp_dir:
+                    nar = Nar(hash_names=list(checksums.keys()))
+                    if target_swhid.object_type == ObjectType.DIRECTORY:
+                        dir_builder = DirectoryBuilder(
+                            storage=storage,
+                            root=tmp_dir.encode(),
+                            dir_id=target_swhid.object_id,
+                        )
+                        dir_builder.build()
+                        path_to_hash = tmp_dir
+                    else:
+                        path_to_hash = os.path.join(tmp_dir, "content")
+                        content_bytes = storage.content_get_data(
+                            {"sha1_git": target_swhid.object_id}
+                        )
+                        assert content_bytes is not None
+                        with open(path_to_hash, "wb") as content:
+                            content.write(content_bytes)
+
+                    nar.serialize(PosixPath(path_to_hash))
+                    assert nar.hexdigest() == checksums
+
     return extids
 
 
-- 
GitLab