From 3417639a6ddd6f877dfe23f84c58a60e0296d497 Mon Sep 17 00:00:00 2001 From: "Antoine R. Dumont (@ardumont)" <ardumont@softwareheritage.org> Date: Mon, 20 Nov 2023 14:36:41 +0100 Subject: [PATCH] private/deposit_checks: Ensure checks is done on filename If done on the full path, the conditional technically fails the check for the wrong reason. This also adds tests on this case. Refs. swh/infra/sysadm-environment#5129 --- swh/deposit/api/private/deposit_check.py | 5 ++- .../tests/api/test_deposit_private_check.py | 42 +++++++++++++++++-- swh/deposit/tests/common.py | 30 ++++++++----- 3 files changed, 62 insertions(+), 15 deletions(-) diff --git a/swh/deposit/api/private/deposit_check.py b/swh/deposit/api/private/deposit_check.py index 3b9c1271..d803173e 100644 --- a/swh/deposit/api/private/deposit_check.py +++ b/swh/deposit/api/private/deposit_check.py @@ -1,9 +1,10 @@ -# Copyright (C) 2017-2020 The Software Heritage developers +# Copyright (C) 2017-2023 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from itertools import chain +import os import re from shutil import get_unpack_formats import tarfile @@ -115,7 +116,7 @@ class APIChecks(APIPrivateView, APIGet, DepositReadMixin): """ archive = archive_request.archive - archive_name = archive.name + archive_name = os.path.basename(archive.name) if not known_archive_format(archive_name): return False, MANDATORY_ARCHIVE_UNSUPPORTED diff --git a/swh/deposit/tests/api/test_deposit_private_check.py b/swh/deposit/tests/api/test_deposit_private_check.py index 084e32e1..dca61b6c 100644 --- a/swh/deposit/tests/api/test_deposit_private_check.py +++ b/swh/deposit/tests/api/test_deposit_private_check.py @@ -1,8 +1,10 @@ -# Copyright (C) 2017-2022 The Software Heritage developers +# Copyright (C) 2017-2023 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import random + from django.urls import reverse_lazy as reverse import pytest from rest_framework import status @@ -30,7 +32,7 @@ from swh.deposit.tests.common import ( ) from swh.deposit.utils import NAMESPACES -from ..common import post_archive, post_atom +from ..common import SUPPORTED_TARBALL_MODES, post_archive, post_atom PRIVATE_CHECK_DEPOSIT_NC = PRIVATE_CHECK_DEPOSIT + "-nc" @@ -122,7 +124,7 @@ def test_deposit_ko_missing_tarball( def test_deposit_ko_unsupported_tarball( tmp_path, authenticated_client, deposit_collection, ready_deposit_invalid_archive ): - """Deposit with an unsupported tarball should fail the checks: rejected""" + """Deposit with unsupported tarball should fail checks and be rejected""" deposit = ready_deposit_invalid_archive assert DEPOSIT_STATUS_DEPOSITED == deposit.status @@ -149,6 +151,40 @@ def test_deposit_ko_unsupported_tarball( deposit.save() +def test_deposit_ko_unsupported_tarball_prebasic_check( + tmp_path, authenticated_client, deposit_collection, atom_dataset +): + """Deposit with unsupported tarball extension should fail checks and be rejected""" + + invalid_gz_mode = random.choice( + [f"{ext}-foobar" for ext in SUPPORTED_TARBALL_MODES] + ) + invalid_extension = f"tar.{invalid_gz_mode}" + + deposit = create_deposit_with_archive( + tmp_path, + invalid_extension, + authenticated_client, + deposit_collection.name, + atom_dataset, + ) + assert DEPOSIT_STATUS_DEPOSITED == deposit.status + for url in private_check_url_endpoints(deposit_collection, deposit): + response = authenticated_client.get(url) + + assert response.status_code == status.HTTP_200_OK + data = response.json() + + assert data["status"] == DEPOSIT_STATUS_REJECTED + details = data["details"] + # archive checks failure + assert len(details["archive"]) == 1 + assert details["archive"][0]["summary"] == MANDATORY_ARCHIVE_UNSUPPORTED + + deposit = Deposit.objects.get(pk=deposit.id) + assert deposit.status == DEPOSIT_STATUS_REJECTED + + def test_check_deposit_metadata_ok( authenticated_client, deposit_collection, ready_deposit_ok ): diff --git a/swh/deposit/tests/common.py b/swh/deposit/tests/common.py index 2fbf8782..e7b8971c 100644 --- a/swh/deposit/tests/common.py +++ b/swh/deposit/tests/common.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2019 The Software Heritage developers +# Copyright (C) 2017-2023 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -14,6 +14,8 @@ from django.core.files.uploadedfile import InMemoryUploadedFile from swh.core import tarball +SUPPORTED_TARBALL_MODES = ["xz", "gz", "bz2"] + def compute_info(archive_path): """Given a path, compute information on path.""" @@ -40,7 +42,13 @@ def compute_info(archive_path): def _compress(path, extension, dir_path): - """Compress path according to extension""" + """Compress path according to extension in the dir_path. + + Note: For test purposes, extension can be suffixed with -foobar. This will create a + tarball nonethess (with extension -foobar stripped, if the mode is supported). This + is to ensure those "improper" tarball gets filtered out by technical checks. + + """ if extension == "zip" or extension == "tar": return tarball.compress(path, extension, dir_path) elif "." in extension: @@ -48,19 +56,21 @@ def _compress(path, extension, dir_path): if split_ext[0] != "tar": raise ValueError( "Development error, only zip or tar archive supported, " - "%s not supported" % extension + f"{extension} not supported" ) - # deal with specific tar - mode = split_ext[1] - supported_mode = ["xz", "gz", "bz2"] - if mode not in supported_mode: + # Deal with specific tar. For test purposes, we can create dummy + # {extension}-foobar tarballs (they will be rejected later) + mode = split_ext[1].rstrip("-foobar") + + if mode not in SUPPORTED_TARBALL_MODES: raise ValueError( - "Development error, only %s supported, %s not supported" - % (supported_mode, mode) + f"Development error, only {SUPPORTED_TARBALL_MODES} supported, " + f"{mode} not supported" ) + files = tarball._ls(dir_path) - with tarfile.open(path, "w:%s" % mode) as t: + with tarfile.open(path, f"w:{mode}") as t: for fpath, fname in files: t.add(fpath, arcname=fname, recursive=False) -- GitLab