Skip to content
Snippets Groups Projects
Commit d639308d authored by Antoine Lambert's avatar Antoine Lambert
Browse files

to_disk: Perform directory recursive iteration client-side

To avoid storage server timeouts when recursively iterating on a large
file tree, perform that operation client-side.

Closes T1177
parent 31b3d34c
No related branches found
No related tags found
1 merge request!29vault.to_disk: Perform directory recursive iteration client-side
......@@ -17,7 +17,7 @@ Build-Depends: debhelper (>= 9),
python3-swh.model (>= 0.0.18~),
python3-swh.objstorage (>= 0.0.17~),
python3-swh.scheduler (>= 0.0.26~),
python3-swh.storage (>= 0.0.100~),
python3-swh.storage (>= 0.0.106~),
python3-vcversioner
Standards-Version: 3.9.6
Homepage: https://forge.softwareheritage.org/diffusion/DVAU/
......@@ -28,7 +28,7 @@ Depends: python3-swh.core (>= 0.0.28~),
python3-swh.model (>= 0.0.18~),
python3-swh.objstorage (>= 0.0.17~),
python3-swh.scheduler (>= 0.0.26~),
python3-swh.storage (>= 0.0.100~),
python3-swh.storage (>= 0.0.106~),
${misc:Depends},
${python3:Depends}
Description: Software Heritage Vault
......@@ -2,4 +2,4 @@ swh.core >= 0.0.40
swh.model >= 0.0.18
swh.objstorage >= 0.0.17
swh.scheduler >= 0.0.26
swh.storage >= 0.0.100
swh.storage >= 0.0.106
......@@ -9,6 +9,7 @@ import os
from swh.model import hashutil
from swh.model.from_disk import mode_to_perms, DentryPerms
from swh.storage.algos.dir_iterators import dir_iterator
SKIPPED_MESSAGE = (b'This content has not been retrieved in the '
b'Software Heritage archive due to its size.')
......@@ -73,11 +74,9 @@ class DirectoryBuilder:
def build(self):
"""Perform the reconstruction of the directory in the given root."""
# Retrieve data from the database.
data = self.storage.directory_ls(self.dir_id, recursive=True)
# Split into files, revisions and directory data.
entries = collections.defaultdict(list)
for entry in data:
for entry in dir_iterator(self.storage, self.dir_id):
entries[entry['type']].append(entry)
# Recreate the directory's subtree and then the files into it.
......@@ -95,9 +94,9 @@ class DirectoryBuilder:
# right order
bsep = os.path.sep.encode()
directories = sorted(directories,
key=lambda x: len(x['name'].split(bsep)))
key=lambda x: len(x['path'].split(bsep)))
for dir in directories:
os.makedirs(os.path.join(self.root, dir['name']))
os.makedirs(os.path.join(self.root, dir['path']))
def _create_files(self, files_data):
"""Create the files in the tree and fetch their contents."""
......@@ -105,14 +104,14 @@ class DirectoryBuilder:
files_data = apply_chunked(f, files_data, 1000)
for file_data in files_data:
path = os.path.join(self.root, file_data['name'])
path = os.path.join(self.root, file_data['path'])
self._create_file(path, file_data['content'], file_data['perms'])
def _create_revisions(self, revs_data):
"""Create the revisions in the tree as broken symlinks to the target
identifier."""
for file_data in revs_data:
path = os.path.join(self.root, file_data['name'])
path = os.path.join(self.root, file_data['path'])
self._create_file(path, hashutil.hash_to_hex(file_data['target']),
mode=0o120000)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment