Skip to content
Snippets Groups Projects
Verified Commit 556273ca authored by Antoine R. Dumont's avatar Antoine R. Dumont
Browse files

private/deposit_checks: Adapt reading tarballs using the File api

Using the File api is consistent across various storage backends. The previous
implementation was only working for file storage. As we are migrating towards remote
backends, this no longer is enough.

Unfortunately though, we cannot rely on zipfile.is_zipfile or tarfile.is_tarfile as
those only works with local filepath. So we need to rely on testing & failing with
exception trapping to read properly the tarballs...

Refs. swh/infra/sysadm-environment#5129
parent 53c5765d
No related branches found
No related tags found
No related merge requests found
......@@ -114,20 +114,29 @@ class APIChecks(APIPrivateView, APIGet, DepositReadMixin):
<detail-error>) otherwise.
"""
archive_path = archive_request.archive.path
archive = archive_request.archive
archive_name = archive.name
if not known_archive_format(archive_path):
if not known_archive_format(archive_name):
return False, MANDATORY_ARCHIVE_UNSUPPORTED
try:
if zipfile.is_zipfile(archive_path):
with zipfile.ZipFile(archive_path) as zipfile_:
files = zipfile_.namelist()
elif tarfile.is_tarfile(archive_path):
with tarfile.open(archive_path) as tarfile_:
files = tarfile_.getnames()
else:
return False, MANDATORY_ARCHIVE_UNSUPPORTED
# Use python's File api which is consistent across different types of
# storage backends (e.g. file, azure, ...)
with archive.open("rb") as archive_fp:
try:
with zipfile.ZipFile(archive_fp) as zip_fp:
files = zip_fp.namelist()
except Exception:
try:
# rewind since the first tryout reading may have moved the
# cursor
archive_fp.seek(0)
with tarfile.open(fileobj=archive_fp) as tar_fp:
files = tar_fp.getnames()
except Exception:
return False, MANDATORY_ARCHIVE_UNSUPPORTED
except Exception:
return False, MANDATORY_ARCHIVE_UNREADABLE
if len(files) > 1:
......
......@@ -75,11 +75,11 @@ def test_deposit_ok(
@pytest.mark.parametrize("extension", ["zip", "tar", "tar.gz", "tar.bz2", "tar.xz"])
def test_deposit_invalid_tarball(
tmp_path, authenticated_client, deposit_collection, extension
tmp_path, authenticated_client, deposit_collection, extension, atom_dataset
):
"""Deposit with tarball (of 1 tarball) should fail the checks: rejected"""
deposit = create_deposit_archive_with_archive(
tmp_path, extension, authenticated_client, deposit_collection.name
tmp_path, extension, authenticated_client, deposit_collection.name, atom_dataset
)
for url in private_check_url_endpoints(deposit_collection, deposit):
response = authenticated_client.get(url)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment