From 00cc60839e3aefda25c6c907df5d8e39d3bb6b6e Mon Sep 17 00:00:00 2001 From: David Douard <david.douard@sdfa3.org> Date: Mon, 16 Dec 2019 12:56:14 +0100 Subject: [PATCH] Move load-deposit task creation code in the deposit-check private API endpoint Also replace utils.origin_url_from() by a Deposit.origin_url property, and ensure the archive file format of the uploaded file is supported. The rename of invalid.tar.gz as invalid.gz is required for tests to pass which should not be the case. It will be investigated in a later revision, but for now we want tests to be green. This requires to make config dict available in SWHPrivateAPIView so we can have access to a scheduler from there. --- swh/deposit/api/common.py | 11 +----- swh/deposit/api/private/__init__.py | 4 +- swh/deposit/api/private/deposit_check.py | 19 ++++++++++ swh/deposit/api/private/deposit_read.py | 3 +- swh/deposit/client/__init__.py | 5 ++- swh/deposit/models.py | 5 +++ .../tests/api/test_deposit_private_check.py | 2 +- swh/deposit/tests/test_utils.py | 37 ------------------- swh/deposit/utils.py | 25 ------------- 9 files changed, 32 insertions(+), 79 deletions(-) diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py index a53b2d6e..dbff46e0 100644 --- a/swh/deposit/api/common.py +++ b/swh/deposit/api/common.py @@ -19,13 +19,12 @@ from rest_framework.views import APIView from swh.model import hashutil from swh.scheduler.utils import create_oneshot_task_dict -from swh.deposit.utils import origin_url_from from ..config import ( SWHDefaultConfig, EDIT_SE_IRI, EM_IRI, CONT_FILE_IRI, ARCHIVE_KEY, METADATA_KEY, RAW_METADATA_KEY, STATE_IRI, DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_PARTIAL, - DEPOSIT_STATUS_VERIFIED, PRIVATE_CHECK_DEPOSIT, + PRIVATE_CHECK_DEPOSIT, DEPOSIT_STATUS_LOAD_SUCCESS, ARCHIVE_TYPE, METADATA_TYPE ) from ..errors import ( @@ -184,14 +183,6 @@ class SWHBaseDeposit(SWHDefaultConfig, SWHAPIView, metaclass=ABCMeta): 'check-deposit', deposit_check_url=check_url) check_task_id = scheduler.create_tasks([task])[0]['id'] deposit.check_task_id = check_task_id - elif (deposit.status == DEPOSIT_STATUS_VERIFIED and - not deposit.load_task_id): - - url = origin_url_from(deposit) - task = create_oneshot_task_dict( - 'load-deposit', url=url, deposit_id=deposit.id) - load_task_id = scheduler.create_task([task])[0]['id'] - deposit.load_task_id = load_task_id deposit.save() diff --git a/swh/deposit/api/private/__init__.py b/swh/deposit/api/private/__init__.py index 4ed1154d..f0f86945 100644 --- a/swh/deposit/api/private/__init__.py +++ b/swh/deposit/api/private/__init__.py @@ -5,7 +5,7 @@ from swh.deposit import utils -from ...config import METADATA_TYPE +from ...config import METADATA_TYPE, SWHDefaultConfig from ...models import DepositRequest, Deposit from rest_framework.permissions import AllowAny @@ -56,7 +56,7 @@ class DepositReadMixin: return utils.merge(*metadata) -class SWHPrivateAPIView(SWHAPIView): +class SWHPrivateAPIView(SWHDefaultConfig, SWHAPIView): """Mixin intended as private api (so no authentication) based API view (for the private ones). diff --git a/swh/deposit/api/private/deposit_check.py b/swh/deposit/api/private/deposit_check.py index 8961d914..7f0387fa 100644 --- a/swh/deposit/api/private/deposit_check.py +++ b/swh/deposit/api/private/deposit_check.py @@ -8,8 +8,12 @@ import re import tarfile import zipfile +from itertools import chain +from shutil import get_unpack_formats + from rest_framework import status +from swh.scheduler.utils import create_oneshot_task_dict from . import DepositReadMixin, SWHPrivateAPIView from ..common import SWHGetDepositAPI @@ -33,6 +37,11 @@ PATTERN_ARCHIVE_EXTENSION = re.compile( r'.*\.(%s)$' % '|'.join(ARCHIVE_EXTENSIONS)) +def known_archive_format(filename): + return any(filename.endswith(t) for t in + chain(*(x[1] for x in get_unpack_formats()))) + + class SWHChecksDeposit(SWHPrivateAPIView, SWHGetDepositAPI, DepositReadMixin): """Dedicated class to read a deposit's raw archives content. @@ -92,6 +101,10 @@ class SWHChecksDeposit(SWHPrivateAPIView, SWHGetDepositAPI, DepositReadMixin): """ archive_path = archive_request.archive.path + + if not known_archive_format(archive_path): + return False, MANDATORY_ARCHIVE_UNSUPPORTED + try: if zipfile.is_zipfile(archive_path): with zipfile.ZipFile(archive_path) as f: @@ -203,6 +216,12 @@ class SWHChecksDeposit(SWHPrivateAPIView, SWHGetDepositAPI, DepositReadMixin): response = { 'status': deposit.status, } + if not deposit.load_task_id and self.config['checks']: + url = deposit.origin_url + task = create_oneshot_task_dict( + 'load-deposit', url=url, deposit_id=deposit.id) + load_task_id = self.scheduler.create_tasks([task])[0]['id'] + deposit.load_task_id = load_task_id deposit.save() diff --git a/swh/deposit/api/private/deposit_read.py b/swh/deposit/api/private/deposit_read.py index 8b834b04..c9a575c5 100644 --- a/swh/deposit/api/private/deposit_read.py +++ b/swh/deposit/api/private/deposit_read.py @@ -15,7 +15,6 @@ from rest_framework import status from swh.core import tarball from swh.model import identifiers from swh.deposit.utils import normalize_date -from swh.deposit import utils from . import DepositReadMixin, SWHPrivateAPIView from ...config import SWH_PERSON, ARCHIVE_TYPE @@ -177,7 +176,7 @@ class SWHDepositReadMetadata(SWHPrivateAPIView, SWHGetDepositAPI, data = { 'origin': { 'type': 'deposit', - 'url': utils.origin_url_from(deposit), + 'url': deposit.origin_url, } } diff --git a/swh/deposit/client/__init__.py b/swh/deposit/client/__init__.py index b4b7f7e2..35b34193 100644 --- a/swh/deposit/client/__init__.py +++ b/swh/deposit/client/__init__.py @@ -14,6 +14,7 @@ import xmltodict import logging from abc import ABCMeta, abstractmethod +from urllib.parse import urljoin from swh.core.config import SWHConfig @@ -82,7 +83,7 @@ class BaseApiDepositClient(SWHConfig): self.config = config self._client = _client - self.base_url = self.config['url'] + self.base_url = self.config['url'].strip('/') + '/' auth = self.config['auth'] if auth == {}: self.auth = None @@ -109,7 +110,7 @@ class BaseApiDepositClient(SWHConfig): if self.auth: kwargs['auth'] = self.auth - full_url = '%s%s' % (self.base_url.rstrip('/'), url) + full_url = urljoin(self.base_url, url.lstrip('/')) return method_fn(full_url, *args, **kwargs) diff --git a/swh/deposit/models.py b/swh/deposit/models.py index e7c5440d..1a7a78ea 100644 --- a/swh/deposit/models.py +++ b/swh/deposit/models.py @@ -151,6 +151,11 @@ class Deposit(models.Model): d['status_detail'] = self.status_detail return str(d) + @property + def origin_url(self): + return '%s/%s' % (self.client.provider_url.rstrip('/'), + self.external_id) + def client_directory_path(instance, filename): """Callable to upload archive in MEDIA_ROOT/user_<id>/<filename> diff --git a/swh/deposit/tests/api/test_deposit_private_check.py b/swh/deposit/tests/api/test_deposit_private_check.py index 1c90113a..50f0b1d0 100644 --- a/swh/deposit/tests/api/test_deposit_private_check.py +++ b/swh/deposit/tests/api/test_deposit_private_check.py @@ -237,7 +237,7 @@ def create_deposit_archive_with_archive( # now we create an archive holding the first created archive invalid_archive = create_archive_with_archive( - root_path, 'invalid.tar.gz', archive) + root_path, 'invalid.tgz', archive) # we deposit it response = client.post( diff --git a/swh/deposit/tests/test_utils.py b/swh/deposit/tests/test_utils.py index f44710a6..a7486f7e 100644 --- a/swh/deposit/tests/test_utils.py +++ b/swh/deposit/tests/test_utils.py @@ -8,43 +8,6 @@ import pytest from unittest.mock import patch from swh.deposit import utils -from swh.deposit.models import Deposit, DepositClient - - -def test_origin_url_from(): - """With correctly setup-ed deposit, all is fine - - """ - for provider_url, external_id in ( - ('http://somewhere.org', 'uuid'), - ('http://overthejungle.org', 'diuu'), - ): - deposit = Deposit( - client=DepositClient(provider_url=provider_url), - external_id=external_id - ) - - actual_origin_url = utils.origin_url_from(deposit) - - assert actual_origin_url == '%s/%s' % ( - provider_url.rstrip('/'), external_id) - - -def test_origin_url_from_ko(): - """Badly configured deposit should raise - - """ - for provider_url, external_id in ( - (None, 'uuid'), - ('http://overthejungle.org', None), - ): - deposit = Deposit( - client=DepositClient(provider_url=provider_url), - external_id=None - ) - - with pytest.raises(AssertionError): - utils.origin_url_from(deposit) def test_merge(): diff --git a/swh/deposit/utils.py b/swh/deposit/utils.py index beb31ef6..86775ac3 100644 --- a/swh/deposit/utils.py +++ b/swh/deposit/utils.py @@ -10,31 +10,6 @@ from types import GeneratorType from swh.model.identifiers import normalize_timestamp -def origin_url_from(deposit): - """Given a deposit instance, return the associated origin url. - - This expects a deposit and the associated client to be correctly - configured. - - Args: - deposit (Deposit): The deposit from which derives the origin url - - Raises: - AssertionError if: - - the client's provider_url field is not configured. - - the deposit's external_id field is not configured. - - Returns - The associated origin url - - """ - external_id = deposit.external_id - assert external_id is not None - base_url = deposit.client.provider_url - assert base_url is not None - return '%s/%s' % (base_url.rstrip('/'), external_id) - - def merge(*dicts): """Given an iterator of dicts, merge them losing no information. -- GitLab