From c773989051118788dbe2b305c1c7b33ade5f5fc1 Mon Sep 17 00:00:00 2001 From: Valentin Lorentz <vlorentz@softwareheritage.org> Date: Fri, 3 Mar 2023 12:15:33 +0100 Subject: [PATCH] Call storage.content_get_data() with hash dicts instead of only sha1 --- swh/vault/cookers/git_bare.py | 19 +++++++++++++++++-- swh/vault/to_disk.py | 9 ++++++--- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/swh/vault/cookers/git_bare.py b/swh/vault/cookers/git_bare.py index 453fc24..497876c 100644 --- a/swh/vault/cookers/git_bare.py +++ b/swh/vault/cookers/git_bare.py @@ -39,7 +39,18 @@ import re import subprocess import tarfile import tempfile -from typing import Any, Dict, Iterable, Iterator, List, NoReturn, Optional, Set, Tuple +from typing import ( + Any, + Dict, + Iterable, + Iterator, + List, + NoReturn, + Optional, + Set, + Tuple, + cast, +) import zlib import sentry_sdk @@ -63,6 +74,7 @@ from swh.model.model import ObjectType as ModelObjectType from swh.model.swhids import CoreSWHID, ObjectType from swh.storage.algos.revisions_walker import DFSRevisionsWalker from swh.storage.algos.snapshot import snapshot_get_all_branches +from swh.storage.interface import HashDict from swh.vault.cookers.base import BaseVaultCooker from swh.vault.to_disk import HIDDEN_MESSAGE, SKIPPED_MESSAGE @@ -690,7 +702,10 @@ class GitBareCooker(BaseVaultCooker): contents_and_data: Iterator[Tuple[Content, Optional[bytes]]] if self.objstorage is None: contents_and_data = ( - (content, self.storage.content_get_data(content.sha1)) + ( + content, + self.storage.content_get_data(cast(HashDict, content.hashes())), + ) for content in visible_contents ) else: diff --git a/swh/vault/to_disk.py b/swh/vault/to_disk.py index 2721642..b2b1650 100644 --- a/swh/vault/to_disk.py +++ b/swh/vault/to_disk.py @@ -11,7 +11,7 @@ from typing import Any, Dict, Iterator, List from swh.model import hashutil from swh.model.from_disk import DentryPerms, mode_to_perms from swh.storage.algos.dir_iterators import dir_iterator -from swh.storage.interface import StorageInterface +from swh.storage.interface import HashDict, StorageInterface MISSING_MESSAGE = ( b"This content is missing from the Software Heritage archive " @@ -48,8 +48,11 @@ def get_filtered_files_content( for file_data in files_data: status = file_data["status"] if status == "visible": - sha1 = file_data["sha1"] - data = storage.content_get_data(sha1) + hashes: HashDict = { + "sha1": file_data["sha1"], + "sha1_git": file_data["sha1_git"], + } + data = storage.content_get_data(hashes) if data is None: content = SKIPPED_MESSAGE else: -- GitLab