Skip to content
Snippets Groups Projects
Commit 141b4531 authored by vlorentz's avatar vlorentz
Browse files

Fix crash on directories pointing to missing contents

This can happen for various reasons, such as running the vault on
a mirror that is partially replayed
parent 6da99426
No related branches found
No related tags found
1 merge request!155Fix crash on directories pointing to missing contents
......@@ -70,16 +70,16 @@ def test_get_filtered_files_content__unknown_status(swh_storage):
"target": content.sha1_git,
},
{
"status": None,
"status": "blah",
"target": b"c" * 20,
},
]
with pytest.raises(AssertionError, match="unexpected status None"):
with pytest.raises(AssertionError, match="unexpected status 'blah'"):
list(get_filtered_files_content(swh_storage, files_data))
def test_directory_builder(swh_storage, tmp_path):
def _fill_storage(swh_storage, exclude_cnt3=False):
cnt1 = Content.from_data(b"foo bar")
cnt2 = Content.from_data(b"bar baz")
cnt3 = Content.from_data(b"baz qux")
......@@ -115,9 +115,18 @@ def test_directory_builder(swh_storage, tmp_path):
),
)
)
swh_storage.content_add([cnt1, cnt2, cnt3])
if exclude_cnt3:
swh_storage.content_add([cnt1, cnt2])
else:
swh_storage.content_add([cnt1, cnt2, cnt3])
swh_storage.directory_add([dir1, dir2])
return dir2
def test_directory_builder(swh_storage, tmp_path):
dir2 = _fill_storage(swh_storage)
root = tmp_path / "root"
builder = DirectoryBuilder(swh_storage, bytes(root), dir2.id)
......@@ -132,3 +141,22 @@ def test_directory_builder(swh_storage, tmp_path):
root / "subdirectory" / "content2",
root / "content3",
}
assert (root / "subdirectory" / "content1").open().read() == "foo bar"
assert (root / "subdirectory" / "content2").open().read() == "bar baz"
assert (root / "content3").open().read() == "baz qux"
def test_directory_builder_missing_content(swh_storage, tmp_path):
dir2 = _fill_storage(swh_storage, exclude_cnt3=True)
root = tmp_path / "root"
builder = DirectoryBuilder(swh_storage, bytes(root), dir2.id)
assert not root.exists()
builder.build()
assert root.is_dir()
assert "This content is missing" in (root / "content3").open().read()
......@@ -13,6 +13,11 @@ from swh.model.from_disk import DentryPerms, mode_to_perms
from swh.storage.algos.dir_iterators import dir_iterator
from swh.storage.interface import StorageInterface
MISSING_MESSAGE = (
b"This content is missing from the Software Heritage archive "
b"(or from the mirror used while retrieving it)."
)
SKIPPED_MESSAGE = (
b"This content has not been retrieved in the "
b"Software Heritage archive due to its size."
......@@ -42,17 +47,19 @@ def get_filtered_files_content(
"""
for file_data in files_data:
status = file_data["status"]
if status == "absent":
content = SKIPPED_MESSAGE
elif status == "hidden":
content = HIDDEN_MESSAGE
elif status == "visible":
if status == "visible":
sha1 = file_data["sha1"]
data = storage.content_get_data(sha1)
if data is None:
content = SKIPPED_MESSAGE
else:
content = data
elif status == "absent":
content = SKIPPED_MESSAGE
elif status == "hidden":
content = HIDDEN_MESSAGE
elif status is None:
content = MISSING_MESSAGE
else:
assert False, (
f"unexpected status {status!r} "
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment