Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • lunar/swh-deposit
  • anlambert/swh-deposit
  • swh/devel/swh-deposit
  • douardda/swh-deposit
  • ardumont/swh-deposit
  • marmoute/swh-deposit
  • rboyer/swh-deposit
7 results
Show changes
Showing
with 1392 additions and 913 deletions
:orphan:
This page was moved to: :ref:`deposit-user-manual`
[project]
name = "swh.deposit"
authors = [
{name="Software Heritage developers", email="swh-devel@inria.fr"},
]
description = "Software Heritage deposit server"
readme = {file = "README.rst", content-type = "text/x-rst"}
requires-python = ">=3.7"
classifiers = [
"Programming Language :: Python :: 3",
"Intended Audience :: Developers",
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
"Operating System :: OS Independent",
"Development Status :: 5 - Production/Stable",
]
dynamic = ["version", "dependencies", "optional-dependencies"]
[tool.setuptools.packages.find]
include = ["swh.*"]
[tool.setuptools.dynamic]
dependencies = {file = ["requirements.txt", "requirements-swh.txt"]}
[tool.setuptools.dynamic.optional-dependencies]
server = {file = ["requirements-server.txt", "requirements-swh-server.txt"]}
azure = {file = ["requirements-azure.txt"]}
testing = {file = [
"requirements-test.txt",
"requirements-server.txt",
"requirements-swh-server.txt",
"requirements-azure.txt",
]}
[project.entry-points."swh.cli.subcommands"]
"swh.deposit" = "swh.deposit.cli"
[project.entry-points."swh.workers"]
"swh.deposit" = "swh.deposit.loader:register"
[project.urls]
"Homepage" = "https://gitlab.softwareheritage.org/swh/devel/swh-deposit"
"Bug Reports" = "https://gitlab.softwareheritage.org/swh/devel/swh-deposit/-/issues"
"Funding" = "https://www.softwareheritage.org/donate"
"Documentation" = "https://docs.softwareheritage.org/devel/swh-deposit/"
"Source" = "https://gitlab.softwareheritage.org/swh/devel/swh-deposit.git"
[build-system]
requires = ["setuptools", "setuptools-scm"]
build-backend = "setuptools.build_meta"
[tool.setuptools_scm]
fallback_version = "0.0.1"
[tool.black]
target-version = ['py39', 'py310', 'py311', 'py312']
[tool.isort]
multi_line_output = 3
include_trailing_comma = true
force_grid_wrap = 0
use_parentheses = true
ensure_newline_before_comments = true
line_length = 88
force_sort_within_sections = true
known_first_party = ['swh']
[tool.django-stubs]
django_settings_module = "swh.deposit.settings.development"
[tool.mypy]
namespace_packages = true
warn_unused_ignores = true
explicit_package_bases = true
# ^ Needed for mypy to detect py.typed from swh packages installed
# in editable mode
plugins = [
"mypy_django_plugin.main",
]
# 3rd party libraries without stubs (yet)
[[tool.mypy.overrides]]
module = [
"storages.backends.azure_storage.*"
]
ignore_missing_imports = true
[tool.flake8]
select = ["C", "E", "F", "W", "B950"]
ignore = [
"E203", # whitespaces before ':' <https://github.com/psf/black/issues/315>
"E231", # missing whitespace after ','
"E501", # line too long, use B950 warning from flake8-bugbear instead
"W503" # line break before binary operator <https://github.com/psf/black/issues/52>
]
max-line-length = 88
[tool.pytest.ini_options]
addopts = """
-p no:flask
-p no:pytest_swh_scheduler
-p no:pytest_swh_storage
-p no:pytest_swh_core
--ignore=swh/deposit/settings
--strict-markers
"""
norecursedirs = "build docs .*"
asyncio_mode = "strict"
consider_namespace_packages = true
DJANGO_SETTINGS_MODULE = "swh.deposit.settings.testing"
[pytest]
norecursedirs = docs
DJANGO_SETTINGS_MODULE = swh.deposit.settings.testing
django-storages[azure]
Django < 2.0
django
djangorestframework
psycopg2
setuptools
pymemcache
swh.core[http]
swh.loader.tar >= 0.0.39
swh.loader.core >= 0.0.32
swh.scheduler >= 0.0.39
swh.model >= 0.0.26
swh.core[http] >= 0.4
swh.loader.core >= 0.0.71
swh.scheduler >= 2.3.0
swh.model >= 6.13.0
swh.auth[django] >= 0.5.3
swh.storage >= 0.28.0
swh.core >= 0.0.60
# These dependencies will be installed by deposit clients.
# Server dependencies go to requirements-swh-server.txt instead.
swh.core[http] >= 3.0.0
swh.model >= 6.13.0
pytest<4
pytest
pytest-django
swh.scheduler[testing]
pytest-mock
swh.scheduler[testing] >= 2.3.0
swh.loader.core[testing]
pytest-postgresql >= 5
requests_mock
django-stubs
djangorestframework-stubs >= 1.4
django-test-migrations
celery-types
types-requests
types-pyyaml
vcversioner
# These dependencies will be installed by deposit clients.
# Server dependencies go to requirements-server.txt instead.
click
xmltodict
iso8601
requests
sentry-sdk
xmlschema
#!/usr/bin/env python3
# Copyright (C) 2015-2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from setuptools import setup, find_packages
from os import path
from io import open
here = path.abspath(path.dirname(__file__))
# Get the long description from the README file
with open(path.join(here, 'README.md'), encoding='utf-8') as f:
long_description = f.read()
def parse_requirements(*names):
requirements = []
for name in names:
if name:
reqf = 'requirements-%s.txt' % name
else:
reqf = 'requirements.txt'
if not path.exists(reqf):
return requirements
with open(reqf) as f:
for line in f.readlines():
line = line.strip()
if not line or line.startswith('#'):
continue
requirements.append(line)
return requirements
setup(
name='swh.deposit',
description='Software Heritage Deposit Server',
long_description=long_description,
long_description_content_type='text/markdown',
author='Software Heritage developers',
author_email='swh-devel@inria.fr',
url='https://forge.softwareheritage.org/source/swh-deposit/',
packages=find_packages(),
install_requires=parse_requirements(None, 'swh'),
tests_require=parse_requirements('test'),
setup_requires=['vcversioner'],
extras_require={
'testing': parse_requirements('test', 'server', 'swh-server'),
'server': parse_requirements('server', 'swh-server')},
vcversioner={},
include_package_data=True,
entry_points='''
[console_scripts]
swh-deposit=swh.deposit.cli:main
[swh.cli.subcommands]
deposit=swh.deposit.cli:deposit
''',
classifiers=[
"Programming Language :: Python :: 3",
"Intended Audience :: Developers",
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
"Operating System :: OS Independent",
"Development Status :: 5 - Production/Stable",
],
project_urls={
'Bug Reports': 'https://forge.softwareheritage.org/maniphest',
'Funding': 'https://www.softwareheritage.org/donate',
'Source': 'https://forge.softwareheritage.org/source/swh-deposit',
},
)
__path__ = __import__('pkgutil').extend_path(__path__, __name__)
# Copyright (C) 2020-2025 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from importlib.metadata import PackageNotFoundError, distribution
try:
__version__ = distribution("swh-deposit").version
except PackageNotFoundError:
__version__ = "devel"
# Copyright (C) 2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
# Copyright (C) 2017-2018 The Software Heritage developers
# Copyright (C) 2017-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from rest_framework import status
from .common import SWHPostDepositAPI, ACCEPT_ARCHIVE_CONTENT_TYPES
from ..config import EDIT_SE_IRI
from ..errors import make_error_dict, BAD_REQUEST
from ..parsers import SWHFileUploadZipParser, SWHFileUploadTarParser
from ..parsers import SWHAtomEntryParser
from ..parsers import SWHMultiPartParser
from typing import Optional, Tuple
class SWHDeposit(SWHPostDepositAPI):
from django.shortcuts import render
from rest_framework import status
from rest_framework.generics import ListAPIView
from swh.deposit.api.common import (
ACCEPT_ARCHIVE_CONTENT_TYPES,
APIPost,
ParsedRequestHeaders,
Receipt,
get_collection_by_name,
)
from swh.deposit.api.utils import DefaultPagination, DepositSerializer
from swh.deposit.config import DEPOSIT_STATUS_LOAD_SUCCESS, EDIT_IRI
from swh.deposit.models import Deposit
from swh.deposit.parsers import (
SWHAtomEntryParser,
SWHFileUploadTarParser,
SWHFileUploadZipParser,
SWHMultiPartParser,
)
class CollectionAPI(ListAPIView, APIPost):
"""Deposit request class defining api endpoints for sword deposit.
What's known as 'Col IRI' in the sword specification.
What's known as 'Col-IRI' in the sword specification.
HTTP verbs supported: POST
HTTP verbs supported: GET and POST
"""
parser_classes = (SWHMultiPartParser,
SWHFileUploadZipParser,
SWHFileUploadTarParser,
SWHAtomEntryParser)
def additional_checks(self, req, headers, collection_name,
deposit_id=None):
slug = headers['slug']
if not slug:
msg = 'Missing SLUG header in request'
verbose_description = 'Provide in the SLUG header one identifier, for example the url pointing to the resource you are depositing.' # noqa
return make_error_dict(BAD_REQUEST, msg, verbose_description)
return {}
def process_post(self, req, headers, collection_name, deposit_id=None):
parser_classes = (
SWHMultiPartParser,
SWHFileUploadZipParser,
SWHFileUploadTarParser,
SWHAtomEntryParser,
)
serializer_class = DepositSerializer
pagination_class = DefaultPagination
def get(self, request, *args, **kwargs):
"""List the user's collection if the user has access to said collection."""
self.checks(request, kwargs["collection_name"])
paginated_result = super().get(request, *args, **kwargs)
data = paginated_result.data
# Build pagination link headers
links = []
for link_name in ["next", "previous"]:
link = data.get(link_name)
if link is None:
continue
links.append(f'<{link}>; rel="{link_name}"')
response = render(
request,
"deposit/collection_list.xml",
context={
"count": data["count"],
"results": [dict(d) for d in data["results"]],
},
content_type="application/xml",
status=status.HTTP_200_OK,
)
response["Link"] = ",".join(links)
return response
def get_queryset(self):
"""List the deposits for the authenticated user (pagination is handled by the
`pagination_class` class attribute).
"""
return Deposit.objects.filter(client=self.request.user.id).order_by("id")
def process_post(
self,
req,
headers: ParsedRequestHeaders,
collection_name: str,
deposit: Optional[Deposit] = None,
) -> Tuple[int, str, Receipt]:
"""Create a first deposit as:
- archive deposit (1 zip)
- multipart (1 zip + 1 atom entry)
......@@ -53,9 +101,7 @@ class SWHDeposit(SWHPostDepositAPI):
If everything is ok, a 201 response (created) with a
deposit receipt.
Otherwise, depending on the upload, the following errors
can be returned:
Raises:
- archive deposit:
- 400 (bad request) if the request is not providing an external
identifier
......@@ -82,12 +128,46 @@ class SWHDeposit(SWHPostDepositAPI):
provided
"""
assert deposit_id is None
assert deposit is None
deposit = self._deposit_create(req, collection_name, external_id=headers.slug)
if req.content_type in ACCEPT_ARCHIVE_CONTENT_TYPES:
data = self._binary_upload(req, headers, collection_name)
elif req.content_type.startswith('multipart/'):
data = self._multipart_upload(req, headers, collection_name)
receipt = self._binary_upload(req, headers, collection_name, deposit)
elif req.content_type.startswith("multipart/"):
receipt = self._multipart_upload(req, headers, collection_name, deposit)
else:
data = self._atom_entry(req, headers, collection_name)
return status.HTTP_201_CREATED, EDIT_SE_IRI, data
receipt = self._atom_entry(req, headers, collection_name, deposit)
return status.HTTP_201_CREATED, EDIT_IRI, receipt
def _deposit_create(
self, request, collection_name: str, external_id: Optional[str]
) -> Deposit:
collection = get_collection_by_name(collection_name)
client = self.get_client(request)
deposit_parent: Optional[Deposit] = None
if external_id:
# TODO: delete this when clients stopped relying on the slug
try:
# find a deposit parent (same external id, status load to success)
deposit_parent = (
Deposit.objects.filter(
client=client,
external_id=external_id,
status=DEPOSIT_STATUS_LOAD_SUCCESS,
)
.order_by("-id")[0:1]
.get()
)
except Deposit.DoesNotExist:
# then no parent for that deposit, deposit_parent already None
pass
return Deposit(
collection=collection,
external_id=external_id or "",
client=client,
parent=deposit_parent,
)
# Copyright (C) 2017-2019 The Software Heritage developers
# Copyright (C) 2017-2025 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import hashlib
from abc import ABCMeta, abstractmethod
from django.urls import reverse
from django.http import HttpResponse
import contextlib
import datetime
import hashlib
import json
from typing import Any, Dict, Iterator, Optional, Sequence, Tuple, Type, Union
import uuid
from xml.etree import ElementTree
import attr
from django.core.files.uploadedfile import UploadedFile
from django.http import FileResponse, HttpResponse
from django.shortcuts import render
from django.template.loader import render_to_string
from django.urls import reverse
from django.utils import timezone
from rest_framework import status
from rest_framework.authentication import BasicAuthentication
from rest_framework.permissions import IsAuthenticated, AllowAny
from rest_framework.authentication import BaseAuthentication, BasicAuthentication
from rest_framework.permissions import BasePermission, IsAuthenticated
from rest_framework.request import Request
from rest_framework.views import APIView
from swh.model import hashutil
from ..config import (
SWHDefaultConfig, EDIT_SE_IRI, EM_IRI, CONT_FILE_IRI,
ARCHIVE_KEY, METADATA_KEY, RAW_METADATA_KEY, STATE_IRI,
DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_PARTIAL,
DEPOSIT_STATUS_LOAD_SUCCESS, ARCHIVE_TYPE, METADATA_TYPE
from swh.deposit.api.converters import convert_status_detail
from swh.deposit.auth import HasDepositPermission, KeycloakBasicAuthentication
from swh.deposit.config import (
ARCHIVE_KEY,
ARCHIVE_TYPE,
CONT_FILE_IRI,
DEPOSIT_STATUS_DEPOSITED,
DEPOSIT_STATUS_LOAD_SUCCESS,
DEPOSIT_STATUS_PARTIAL,
EDIT_IRI,
EM_IRI,
METADATA_TYPE,
RAW_METADATA_KEY,
SE_IRI,
STATE_IRI,
APIConfig,
)
from swh.deposit.errors import (
BAD_REQUEST,
CHECKSUM_MISMATCH,
ERROR_CONTENT,
FORBIDDEN,
MAX_UPLOAD_SIZE_EXCEEDED,
MEDIATION_NOT_ALLOWED,
METHOD_NOT_ALLOWED,
NOT_FOUND,
PARSING_ERROR,
DepositError,
ParserError,
)
from swh.deposit.loader.checks import check_metadata
from swh.deposit.models import (
DEPOSIT_METADATA_ONLY,
Deposit,
DepositClient,
DepositCollection,
DepositRequest,
)
from swh.deposit.parsers import parse_xml
from swh.deposit.utils import (
compute_metadata_context,
extended_swhid_from_qualified,
extract_release_data,
parse_swh_deposit_origin,
parse_swh_metadata_provenance,
parse_swh_reference,
)
from ..errors import (
MAX_UPLOAD_SIZE_EXCEEDED, BAD_REQUEST, ERROR_CONTENT,
CHECKSUM_MISMATCH, make_error_dict, MEDIATION_NOT_ALLOWED,
make_error_response_from_dict, FORBIDDEN,
NOT_FOUND, make_error_response, METHOD_NOT_ALLOWED,
ParserError, PARSING_ERROR
from swh.model import hashutil
from swh.model.model import (
MetadataAuthority,
MetadataAuthorityType,
Origin,
RawExtrinsicMetadata,
)
from ..models import (
Deposit, DepositRequest, DepositCollection,
DepositClient
from swh.model.swhids import (
ExtendedObjectType,
ExtendedSWHID,
QualifiedSWHID,
ValidationError,
)
from ..parsers import parse_xml
from swh.scheduler.utils import create_oneshot_task
ACCEPT_PACKAGINGS = ["http://purl.org/net/sword/package/SimpleZip"]
ACCEPT_ARCHIVE_CONTENT_TYPES = ["application/zip", "application/x-tar"]
@attr.s
class ParsedRequestHeaders:
content_type = attr.ib(type=str)
content_length = attr.ib(type=Optional[int])
in_progress = attr.ib(type=bool)
content_disposition = attr.ib(type=Optional[str])
content_md5sum = attr.ib(type=Optional[bytes])
packaging = attr.ib(type=Optional[str])
slug = attr.ib(type=Optional[str])
on_behalf_of = attr.ib(type=Optional[str])
metadata_relevant = attr.ib(type=Optional[str])
swhid = attr.ib(type=Optional[str])
@attr.s
class Receipt:
"""Data computed while handling the request body that will be served in the
Deposit Receipt."""
deposit_id = attr.ib(type=int)
deposit_date = attr.ib(type=datetime.datetime)
status = attr.ib(type=str)
archive = attr.ib(type=Optional[str])
def _compute_md5(filehandler: UploadedFile) -> bytes:
h = hashlib.md5()
for chunk in filehandler:
h.update(chunk)
return h.digest()
def get_deposit_by_id(
deposit_id: int, collection_name: Optional[str] = None
) -> Deposit:
"""Gets an existing Deposit object if it exists, or raises `DepositError`.
If `collection` is not None, also checks the deposit belongs to the collection."""
try:
deposit = Deposit.objects.get(pk=deposit_id)
except Deposit.DoesNotExist:
raise DepositError(NOT_FOUND, f"Deposit {deposit_id} does not exist")
if collection_name and deposit.collection.name != collection_name:
get_collection_by_name(collection_name) # raises if does not exist
ACCEPT_PACKAGINGS = ['http://purl.org/net/sword/package/SimpleZip']
ACCEPT_ARCHIVE_CONTENT_TYPES = ['application/zip', 'application/x-tar']
raise DepositError(
NOT_FOUND,
f"Deposit {deposit_id} does not belong to collection {collection_name}",
)
return deposit
class SWHAPIView(APIView):
"""Mixin intended as a based API view to enforce the basic
authentication check
"""
authentication_classes = (BasicAuthentication, )
permission_classes = (IsAuthenticated, )
def get_collection_by_name(collection_name: str):
"""Gets an existing Deposit object if it exists, or raises `DepositError`."""
try:
collection = DepositCollection.objects.get(name=collection_name)
except DepositCollection.DoesNotExist:
raise DepositError(NOT_FOUND, f"Unknown collection name {collection_name}")
assert collection is not None
class SWHPrivateAPIView(SWHAPIView):
"""Mixin intended as private api (so no authentication) based API view
(for the private ones).
return collection
"""
authentication_classes = ()
permission_classes = (AllowAny, )
def guess_deposit_origin_url(deposit: Deposit):
"""Guesses an origin url for the given deposit."""
external_id = deposit.external_id
if not external_id:
# The client provided neither an origin_url nor a slug. That's inconvenient,
# but SWORD requires we support it. So let's generate a random slug.
external_id = str(uuid.uuid4())
return "%s/%s" % (deposit.client.provider_url.rstrip("/"), external_id)
class SWHBaseDeposit(SWHDefaultConfig, SWHAPIView, metaclass=ABCMeta):
"""Base deposit request class sharing multiple common behaviors.
def check_url_match_provider(url: str, provider_url: str) -> None:
"""Check url matches the provider url.
Raises DepositError in case of mismatch
"""
provider_url = provider_url.rstrip("/") + "/"
if not url.startswith(provider_url):
raise DepositError(
FORBIDDEN,
f"URL mismatch: {url} must start with {provider_url}",
)
class APIBase(APIConfig, APIView, metaclass=ABCMeta):
"""Base deposit request class sharing multiple common behaviors."""
_client: Optional[DepositClient] = None
def __init__(self):
super().__init__()
auth_provider = self.config.get("authentication_provider")
if auth_provider == "basic":
self.authentication_classes: Sequence[Type[BaseAuthentication]] = (
BasicAuthentication,
)
self.permission_classes: Sequence[Type[BasePermission]] = (IsAuthenticated,)
elif auth_provider == "keycloak":
self.authentication_classes: Sequence[Type[BaseAuthentication]] = (
KeycloakBasicAuthentication,
)
self.permission_classes: Sequence[Type[BasePermission]] = (
IsAuthenticated,
HasDepositPermission,
)
else:
raise ValueError(
"Configuration key 'authentication_provider' should be provided with"
f"either 'basic' or 'keycloak' value not {auth_provider!r}."
)
def _read_headers(self, req):
def _read_headers(self, request: Request) -> ParsedRequestHeaders:
"""Read and unify the necessary headers from the request (those are
not stored in the same location or not properly formatted).
Args:
req (Request): Input request
request: Input request
Returns:
Dictionary with the following keys (some associated values may be
......@@ -83,271 +226,256 @@ class SWHBaseDeposit(SWHDefaultConfig, SWHAPIView, metaclass=ABCMeta):
- on-behalf-of
"""
meta = req._request.META
content_type = req.content_type
content_length = meta.get('CONTENT_LENGTH')
meta = request._request.META
content_length = meta.get("CONTENT_LENGTH")
if content_length and isinstance(content_length, str):
content_length = int(content_length)
# final deposit if not provided
in_progress = meta.get('HTTP_IN_PROGRESS', False)
content_disposition = meta.get('HTTP_CONTENT_DISPOSITION')
in_progress = meta.get("HTTP_IN_PROGRESS", False)
if isinstance(in_progress, str):
in_progress = in_progress.lower() == 'true'
in_progress = in_progress.lower() == "true"
content_md5sum = meta.get('HTTP_CONTENT_MD5')
content_md5sum = meta.get("HTTP_CONTENT_MD5")
if content_md5sum:
content_md5sum = bytes.fromhex(content_md5sum)
packaging = meta.get('HTTP_PACKAGING')
slug = meta.get('HTTP_SLUG')
on_behalf_of = meta.get('HTTP_ON_BEHALF_OF')
metadata_relevant = meta.get('HTTP_METADATA_RELEVANT')
return {
'content-type': content_type,
'content-length': content_length,
'in-progress': in_progress,
'content-disposition': content_disposition,
'content-md5sum': content_md5sum,
'packaging': packaging,
'slug': slug,
'on-behalf-of': on_behalf_of,
'metadata-relevant': metadata_relevant,
}
def _compute_md5(self, filehandler):
"""Compute uploaded file's md5 sum.
Args:
filehandler (InMemoryUploadedFile): the file to compute the md5
hash
Returns:
the md5 checksum (str)
"""
h = hashlib.md5()
for chunk in filehandler:
h.update(chunk)
return h.digest()
def _deposit_put(self, deposit_id=None, in_progress=False,
external_id=None):
return ParsedRequestHeaders(
content_type=request.content_type,
content_length=content_length,
in_progress=in_progress,
content_disposition=meta.get("HTTP_CONTENT_DISPOSITION"),
content_md5sum=content_md5sum,
packaging=meta.get("HTTP_PACKAGING"),
slug=meta.get("HTTP_SLUG"),
on_behalf_of=meta.get("HTTP_ON_BEHALF_OF"),
metadata_relevant=meta.get("HTTP_METADATA_RELEVANT"),
swhid=meta.get("HTTP_X_CHECK_SWHID"),
)
@contextlib.contextmanager
def _deposit_put(
self, deposit: Deposit, in_progress: bool = False
) -> Iterator[None]:
"""Save/Update a deposit in db.
Args:
deposit_id (int): deposit identifier
in_progress (dict): The deposit's status
external_id (str): The external identifier to associate to
the deposit
Returns:
The Deposit instance saved or updated.
Acts as a context manager, ensuring the deposit object exists before entering
the block, and completes the deposit after (successfully) exiting the block.
Args:
deposit: deposit being updated/created
in_progress: deposit status
"""
if in_progress is False:
complete_date = timezone.now()
status_type = DEPOSIT_STATUS_DEPOSITED
if in_progress:
deposit.status = DEPOSIT_STATUS_PARTIAL
deposit.save()
yield
else:
complete_date = None
status_type = DEPOSIT_STATUS_PARTIAL
if deposit.pk is None:
# We need to save the Deposit to the database so DepositRequest objects
# can be created with the Deposit as foreign key
deposit.status = DEPOSIT_STATUS_PARTIAL
deposit.save()
yield
self._complete_deposit(deposit)
def _complete_deposit(self, deposit: Deposit) -> None:
"""Marks the deposit as 'deposited', then schedule a check task if configured
to do so."""
if not deposit_id:
try:
# find a deposit parent (same external id, status load
# to success)
deposit_parent = Deposit.objects.filter(
external_id=external_id,
status=DEPOSIT_STATUS_LOAD_SUCCESS).order_by('-id')[0:1].get() # noqa
except Deposit.DoesNotExist:
deposit_parent = None
deposit = Deposit(collection=self._collection,
external_id=external_id,
complete_date=complete_date,
status=status_type,
client=self._client,
parent=deposit_parent)
else:
deposit = Deposit.objects.get(pk=deposit_id)
deposit.complete_date = timezone.now()
deposit.status = DEPOSIT_STATUS_DEPOSITED
deposit.save()
# update metadata
deposit.complete_date = complete_date
deposit.status = status_type
if not deposit.origin_url:
deposit.origin_url = guess_deposit_origin_url(deposit)
if self.config["checks"]:
scheduler = self.scheduler
if deposit.status == DEPOSIT_STATUS_DEPOSITED and not deposit.check_task_id:
task = create_oneshot_task(
"check-deposit",
collection=deposit.collection.name,
deposit_id=deposit.id,
retries_left=3,
)
check_task_id = scheduler.create_tasks([task])[0].id
deposit.check_task_id = str(check_task_id)
deposit.save()
return deposit
def _deposit_request_put(self, deposit, deposit_request_data,
replace_metadata=False, replace_archives=False):
def _deposit_request_put(
self,
deposit: Deposit,
deposit_request_data: Dict[str, Any],
replace_metadata: bool = False,
replace_archives: bool = False,
) -> DepositRequest:
"""Save a deposit request with metadata attached to a deposit.
Args:
deposit (Deposit): The deposit concerned by the request
deposit_request_data (dict): The dictionary with at most 2 deposit
request types (archive, metadata) to associate to the deposit
replace_metadata (bool): Flag defining if we add or update
deposit: The deposit concerned by the request
deposit_request_data: The dictionary with at most 2 deposit
request types (archive, metadata) to associate to the deposit
replace_metadata: Flag defining if we add or update
existing metadata to the deposit
replace_archives (bool): Flag defining if we add or update
replace_archives: Flag defining if we add or update
archives to existing deposit
Returns:
None
the DepositRequest object stored in the backend
"""
if replace_metadata:
DepositRequest.objects.filter(
deposit=deposit,
type=METADATA_TYPE).delete()
DepositRequest.objects.filter(deposit=deposit, type=METADATA_TYPE).delete()
if replace_archives:
DepositRequest.objects.filter(
deposit=deposit,
type=ARCHIVE_TYPE).delete()
DepositRequest.objects.filter(deposit=deposit, type=ARCHIVE_TYPE).delete()
deposit_request = None
archive_file = deposit_request_data.get(ARCHIVE_KEY)
if archive_file:
deposit_request = DepositRequest(
type=ARCHIVE_TYPE,
deposit=deposit,
archive=archive_file)
type=ARCHIVE_TYPE, deposit=deposit, archive=archive_file
)
deposit_request.save()
metadata = deposit_request_data.get(METADATA_KEY)
if metadata:
raw_metadata = deposit_request_data.get(RAW_METADATA_KEY)
raw_metadata = deposit_request_data.get(RAW_METADATA_KEY)
if raw_metadata:
deposit_request = DepositRequest(
type=METADATA_TYPE,
deposit=deposit,
metadata=metadata,
raw_metadata=raw_metadata)
raw_metadata=raw_metadata.decode("utf-8"),
)
deposit_request.save()
assert deposit_request is not None
if deposit_request:
# Set release infos
release_data = extract_release_data(deposit)
if release_data:
deposit.software_version = release_data.software_version
deposit.release_notes = release_data.release_notes
deposit.save()
def _delete_archives(self, collection_name, deposit_id):
"""Delete archives reference from the deposit id.
assert deposit_request is not None
return deposit_request
"""
try:
deposit = Deposit.objects.get(pk=deposit_id)
except Deposit.DoesNotExist:
return make_error_dict(
NOT_FOUND,
'The deposit %s does not exist' % deposit_id)
DepositRequest.objects.filter(
deposit=deposit,
type=ARCHIVE_TYPE).delete()
def _delete_archives(self, collection_name: str, deposit: Deposit) -> Dict:
"""Delete archive references from the deposit id."""
DepositRequest.objects.filter(deposit=deposit, type=ARCHIVE_TYPE).delete()
return {}
def _delete_deposit(self, collection_name, deposit_id):
def _delete_deposit(self, collection_name: str, deposit: Deposit) -> Dict:
"""Delete deposit reference.
Args:
collection_name (str): Client's name
deposit_id (id): The deposit to delete
collection_name: Client's collection
deposit: The deposit to delete
Returns
Empty dict when ok.
Dict with error key to describe the failure.
"""
try:
deposit = Deposit.objects.get(pk=deposit_id)
except Deposit.DoesNotExist:
return make_error_dict(
NOT_FOUND,
'The deposit %s does not exist' % deposit_id)
if deposit.collection.name != collection_name:
summary = 'Cannot delete a deposit from another collection'
summary = "Cannot delete a deposit from another collection"
description = "Deposit %s does not belong to the collection %s" % (
deposit_id, collection_name)
return make_error_dict(
BAD_REQUEST,
summary=summary,
verbose_description=description)
deposit.id,
collection_name,
)
raise DepositError(
BAD_REQUEST, summary=summary, verbose_description=description
)
DepositRequest.objects.filter(deposit=deposit).delete()
deposit.delete()
return {}
def _check_preconditions_on(self, filehandler, md5sum,
content_length=None):
"""Check preconditions on provided file are respected. That is the
length and/or the md5sum hash match the file's content.
def _check_file_length(
self,
filehandler: UploadedFile,
content_length: Optional[int] = None,
) -> None:
"""Check the filehandler passed as argument has exactly the
expected content_length
Args:
filehandler (InMemoryUploadedFile): The file to check
md5sum (hex str): md5 hash expected from the file's content
content_length (int): the expected length if provided.
Returns:
Either none if no error or a dictionary with a key error
detailing the problem.
filehandler: The file to check
content_length: the expected length if provided.
Raises:
DepositError if the actual length does not match
"""
max_upload_size = self.config["max_upload_size"]
if content_length:
if content_length > self.config['max_upload_size']:
return make_error_dict(
MAX_UPLOAD_SIZE_EXCEEDED,
'Upload size limit exceeded (max %s bytes).' %
self.config['max_upload_size'],
'Please consider sending the archive in '
'multiple steps.')
length = filehandler.size
if length != content_length:
return make_error_dict(status.HTTP_412_PRECONDITION_FAILED,
'Wrong length')
raise DepositError(status.HTTP_412_PRECONDITION_FAILED, "Wrong length")
if filehandler.size > max_upload_size:
raise DepositError(
MAX_UPLOAD_SIZE_EXCEEDED,
f"Upload size limit exceeded (max {max_upload_size} bytes)."
"Please consider sending the archive in multiple steps.",
)
def _check_file_md5sum(
self,
filehandler: UploadedFile,
md5sum: Optional[bytes],
) -> None:
"""Check the filehandler passed as argument has the expected md5sum
Args:
filehandler: The file to check
md5sum: md5 hash expected from the file's content
Raises:
DepositError if the md5sum does not match
"""
if md5sum:
_md5sum = self._compute_md5(filehandler)
_md5sum = _compute_md5(filehandler)
if _md5sum != md5sum:
return make_error_dict(
raise DepositError(
CHECKSUM_MISMATCH,
'Wrong md5 hash',
'The checksum sent %s and the actual checksum '
'%s does not match.' % (hashutil.hash_to_hex(md5sum),
hashutil.hash_to_hex(_md5sum)))
return None
def _binary_upload(self, req, headers, collection_name, deposit_id=None,
replace_metadata=False, replace_archives=False):
"Wrong md5 hash",
f"The checksum sent {hashutil.hash_to_hex(md5sum)} and the actual "
f"checksum {hashutil.hash_to_hex(_md5sum)} does not match.",
)
def _binary_upload(
self,
request: Request,
headers: ParsedRequestHeaders,
collection_name: str,
deposit: Deposit,
replace_metadata: bool = False,
replace_archives: bool = False,
) -> Receipt:
"""Binary upload routine.
Other than such a request, a 415 response is returned.
Args:
req (Request): the request holding information to parse
request: the request holding information to parse
and inject in db
headers (dict): request headers formatted
collection_name (str): the associated client
deposit_id (id): deposit identifier if provided
replace_metadata (bool): 'Update or add' request to existing
headers: parsed request headers
collection_name: the associated client
deposit: deposit to be updated
replace_metadata: 'Update or add' request to existing
deposit. If False (default), this adds new metadata request to
existing ones. Otherwise, this will replace existing metadata.
replace_archives (bool): 'Update or add' request to existing
replace_archives: 'Update or add' request to existing
deposit. If False (default), this adds new archive request to
existing ones. Otherwise, this will replace existing archives.
ones.
Returns:
In the optimal case a dict with the following keys:
- deposit_id (int): Deposit identifier
- deposit_date (date): Deposit date
- archive: None (no archive is provided here)
Otherwise, a dictionary with the key error and the
associated failures, either:
Raises:
- 400 (bad request) if the request is not providing an external
identifier
- 413 (request entity too large) if the length of the
......@@ -357,69 +485,78 @@ class SWHBaseDeposit(SWHDefaultConfig, SWHAPIView, metaclass=ABCMeta):
- 415 (unsupported media type) if a wrong media type is provided
"""
content_length = headers['content-length']
content_length = headers.content_length
if not content_length:
return make_error_dict(
raise DepositError(
BAD_REQUEST,
'CONTENT_LENGTH header is mandatory',
'For archive deposit, the '
'CONTENT_LENGTH header must be sent.')
"CONTENT_LENGTH header is mandatory",
"For archive deposit, the CONTENT_LENGTH header must be sent.",
)
content_disposition = headers['content-disposition']
content_disposition = headers.content_disposition
if not content_disposition:
return make_error_dict(
raise DepositError(
BAD_REQUEST,
'CONTENT_DISPOSITION header is mandatory',
'For archive deposit, the '
'CONTENT_DISPOSITION header must be sent.')
"CONTENT_DISPOSITION header is mandatory",
"For archive deposit, the CONTENT_DISPOSITION header must be sent.",
)
packaging = headers['packaging']
packaging = headers.packaging
if packaging and packaging not in ACCEPT_PACKAGINGS:
return make_error_dict(
raise DepositError(
BAD_REQUEST,
'Only packaging %s is supported' %
ACCEPT_PACKAGINGS,
'The packaging provided %s is not supported' % packaging)
f"Only packaging {ACCEPT_PACKAGINGS} is supported",
f"The packaging provided {packaging} is not supported",
)
filehandler = req.FILES['file']
filehandler = request.FILES["file"]
assert isinstance(filehandler, UploadedFile), filehandler
precondition_status_response = self._check_preconditions_on(
filehandler, headers['content-md5sum'], content_length)
if precondition_status_response:
return precondition_status_response
external_id = headers['slug']
self._check_file_length(filehandler, content_length)
self._check_file_md5sum(filehandler, headers.content_md5sum)
# actual storage of data
archive_metadata = filehandler
deposit = self._deposit_put(deposit_id=deposit_id,
in_progress=headers['in-progress'],
external_id=external_id)
self._deposit_request_put(
deposit, {ARCHIVE_KEY: archive_metadata},
replace_metadata=replace_metadata,
replace_archives=replace_archives)
return {
'deposit_id': deposit.id,
'deposit_date': deposit.reception_date,
'status': deposit.status,
'archive': filehandler.name,
}
def _read_metadata(self, metadata_stream):
"""Given a metadata stream, reads the metadata and returns both the
parsed and the raw metadata.
with self._deposit_put(
deposit=deposit,
in_progress=headers.in_progress,
):
self._deposit_request_put(
deposit,
{ARCHIVE_KEY: archive_metadata},
replace_metadata=replace_metadata,
replace_archives=replace_archives,
)
return Receipt(
deposit_id=deposit.id,
deposit_date=deposit.reception_date,
status=deposit.status,
archive=filehandler.name,
)
def _read_metadata(self, metadata_stream) -> Tuple[bytes, ElementTree.Element]:
"""
Given a metadata stream, reads the metadata and returns the metadata in three
forms:
* verbatim (as raw bytes), for archival in long-term storage
* parsed as a Python dict, for archival in postgresql's jsonb type
* parsed as ElementTree, to extract information immediately
"""
raw_metadata = metadata_stream.read()
metadata = parse_xml(raw_metadata)
return raw_metadata, metadata
def _multipart_upload(self, req, headers, collection_name,
deposit_id=None, replace_metadata=False,
replace_archives=False):
metadata_tree = parse_xml(raw_metadata)
return raw_metadata, metadata_tree
def _multipart_upload(
self,
request: Request,
headers: ParsedRequestHeaders,
collection_name: str,
deposit: Deposit,
replace_metadata: bool = False,
replace_archives: bool = False,
) -> Receipt:
"""Multipart upload supported with exactly:
- 1 archive (zip)
- 1 atom entry
......@@ -427,28 +564,20 @@ class SWHBaseDeposit(SWHDefaultConfig, SWHAPIView, metaclass=ABCMeta):
Other than such a request, a 415 response is returned.
Args:
req (Request): the request holding information to parse
request: the request holding information to parse
and inject in db
headers (dict): request headers formatted
collection_name (str): the associated client
deposit_id (id): deposit identifier if provided
replace_metadata (bool): 'Update or add' request to existing
headers: parsed request headers
collection_name: the associated client
deposit: deposit to be updated
replace_metadata: 'Update or add' request to existing
deposit. If False (default), this adds new metadata request to
existing ones. Otherwise, this will replace existing metadata.
replace_archives (bool): 'Update or add' request to existing
replace_archives: 'Update or add' request to existing
deposit. If False (default), this adds new archive request to
existing ones. Otherwise, this will replace existing archives.
ones.
Returns:
In the optimal case a dict with the following keys:
- deposit_id (int): Deposit identifier
- deposit_date (date): Deposit date
- archive: None (no archive is provided here)
Otherwise, a dictionary with the key error and the
associated failures, either:
Raises:
- 400 (bad request) if the request is not providing an external
identifier
- 412 (precondition failed) if the potentially md5 hash provided
......@@ -458,196 +587,444 @@ class SWHBaseDeposit(SWHDefaultConfig, SWHAPIView, metaclass=ABCMeta):
- 415 (unsupported media type) if a wrong media type is provided
"""
external_id = headers['slug']
content_types_present = set()
data = {
'application/zip': None, # expected either zip
'application/x-tar': None, # or x-tar
'application/atom+xml': None,
data: Dict[str, Optional[Any]] = {
"application/zip": None, # expected either zip
"application/x-tar": None, # or x-tar
"application/atom+xml": None,
}
for key, value in req.FILES.items():
for key, value in request.FILES.items():
fh = value
if fh.content_type in content_types_present:
return make_error_dict(
content_type = fh.content_type
if content_type in content_types_present:
raise DepositError(
ERROR_CONTENT,
'Only 1 application/zip (or application/x-tar) archive '
'and 1 atom+xml entry is supported (as per sword2.0 '
'specification)',
'You provided more than 1 application/(zip|x-tar) '
'or more than 1 application/atom+xml content-disposition '
'header in the multipart deposit')
content_types_present.add(fh.content_type)
data[fh.content_type] = fh
"Only 1 application/zip (or application/x-tar) archive "
"and 1 atom+xml entry is supported (as per sword2.0 "
"specification)",
"You provided more than 1 application/(zip|x-tar) "
"or more than 1 application/atom+xml content-disposition "
"header in the multipart deposit",
)
content_types_present.add(content_type)
assert content_type is not None
data[content_type] = fh
if len(content_types_present) != 2:
return make_error_dict(
raise DepositError(
ERROR_CONTENT,
'You must provide both 1 application/zip (or '
'application/x-tar) and 1 atom+xml entry for multipart '
'deposit',
'You need to provide only 1 application/(zip|x-tar) '
'and 1 application/atom+xml content-disposition header '
'in the multipart deposit')
filehandler = data['application/zip']
if not filehandler:
filehandler = data['application/x-tar']
"You must provide both 1 application/zip (or "
"application/x-tar) and 1 atom+xml entry for multipart "
"deposit",
"You need to provide only 1 application/(zip|x-tar) "
"and 1 application/atom+xml content-disposition header "
"in the multipart deposit",
)
filehandler = data["application/zip"] or data["application/x-tar"]
if filehandler is None:
raise DepositError(
BAD_REQUEST,
"You must provide an archive, either as application/zip or "
"application/x-tar",
)
assert isinstance(filehandler, UploadedFile), filehandler
precondition_status_response = self._check_preconditions_on(
filehandler,
headers['content-md5sum'])
self._check_file_length(filehandler)
self._check_file_md5sum(filehandler, headers.content_md5sum)
if precondition_status_response:
return precondition_status_response
if data["application/atom+xml"] is None:
raise DepositError(
BAD_REQUEST, "You must provide an application/atom+xml entry."
)
try:
raw_metadata, metadata = self._read_metadata(
data['application/atom+xml'])
raw_metadata, metadata_tree = self._read_metadata(
data["application/atom+xml"]
)
except ParserError:
return make_error_dict(
raise DepositError(
PARSING_ERROR,
'Malformed xml metadata',
"Malformed xml metadata",
"The xml received is malformed. "
"Please ensure your metadata file is correctly formatted.")
"Please ensure your metadata file is correctly formatted.",
)
self._set_deposit_origin_from_metadata(deposit, metadata_tree, headers)
# actual storage of data
deposit = self._deposit_put(deposit_id=deposit_id,
in_progress=headers['in-progress'],
external_id=external_id)
with self._deposit_put(
deposit=deposit,
in_progress=headers.in_progress,
):
deposit_request_data = {
ARCHIVE_KEY: filehandler,
RAW_METADATA_KEY: raw_metadata,
}
self._deposit_request_put(
deposit, deposit_request_data, replace_metadata, replace_archives
)
return Receipt(
deposit_id=deposit.id,
deposit_date=deposit.reception_date,
archive=filehandler.name,
status=deposit.status,
)
def _store_metadata_deposit(
self,
deposit: Deposit,
swhid_reference: Union[str, QualifiedSWHID],
metadata_tree: ElementTree.Element,
raw_metadata: bytes,
deposit_origin: Optional[str] = None,
) -> Tuple[ExtendedSWHID, Deposit, DepositRequest]:
"""When all user inputs pass the checks, this associates the raw_metadata to the
swhid_reference in the raw extrinsic metadata storage. In case of any issues,
a bad request response is returned to the user with the details.
Checks:
- metadata are technically parsable
- metadata pass the functional checks
- SWHID (if any) is technically valid
Args:
deposit: Deposit reference
swhid_reference: The swhid or the origin to attach metadata information to
metadata_tree: Full element tree of metadata to check for validity
(parsed out of raw_metadata)
raw_metadata: The actual raw metadata to send in the storage metadata
deposit_origin: Optional deposit origin url to use if any (e.g. deposit
update scenario provides one)
Raises:
DepositError in case of incorrect inputs from the deposit client
(e.g. functionally invalid metadata, ...)
Returns:
Tuple of target swhid, deposit, and deposit request
"""
metadata_ok, error_details = check_metadata(metadata_tree)
if not metadata_ok:
assert error_details, "Details should be set when a failure occurs"
raise DepositError(
BAD_REQUEST,
"Functional metadata checks failure",
convert_status_detail(error_details),
)
metadata_authority = MetadataAuthority(
type=MetadataAuthorityType.DEPOSIT_CLIENT,
url=deposit.client.provider_url,
)
metadata_fetcher = self.swh_deposit_fetcher()
# replace metadata within the deposit backend
deposit_request_data = {
ARCHIVE_KEY: filehandler,
METADATA_KEY: metadata,
RAW_METADATA_KEY: raw_metadata,
}
self._deposit_request_put(
deposit, deposit_request_data, replace_metadata, replace_archives)
return {
'deposit_id': deposit.id,
'deposit_date': deposit.reception_date,
'archive': filehandler.name,
'status': deposit.status,
}
def _atom_entry(self, req, headers, collection_name,
deposit_id=None,
replace_metadata=False,
replace_archives=False):
# actually add the metadata to the completed deposit
deposit_request = self._deposit_request_put(deposit, deposit_request_data)
target_swhid: ExtendedSWHID # origin URL or CoreSWHID
if isinstance(swhid_reference, str):
target_swhid = Origin(swhid_reference).swhid()
metadata_context = {}
else:
metadata_context = compute_metadata_context(swhid_reference)
if deposit_origin: # metadata deposit update on completed deposit
metadata_context["origin"] = deposit_origin
target_swhid = extended_swhid_from_qualified(swhid_reference)
self._check_swhid_in_archive(target_swhid)
# metadata deposited by the client
metadata_object = RawExtrinsicMetadata(
target=target_swhid, # core swhid or origin
discovery_date=deposit_request.date,
authority=metadata_authority,
fetcher=metadata_fetcher,
format="sword-v2-atom-codemeta",
metadata=raw_metadata,
**metadata_context,
)
# metadata on the metadata object
swh_deposit_authority = self.swh_deposit_authority()
swh_deposit_fetcher = self.swh_deposit_fetcher()
metametadata_object = RawExtrinsicMetadata(
target=metadata_object.swhid(),
discovery_date=deposit_request.date,
authority=swh_deposit_authority,
fetcher=swh_deposit_fetcher,
format="xml-deposit-info",
metadata=render_to_string(
"deposit/deposit_info.xml", context={"deposit": deposit}
).encode(),
)
# write to metadata storage
self.storage_metadata.metadata_authority_add(
[metadata_authority, swh_deposit_authority]
)
self.storage_metadata.metadata_fetcher_add(
[metadata_fetcher, swh_deposit_fetcher]
)
self.storage_metadata.raw_extrinsic_metadata_add(
[metadata_object, metametadata_object]
)
return (target_swhid, deposit, deposit_request)
def _check_swhid_in_archive(self, target_swhid: ExtendedSWHID) -> None:
"""Check the target object already exists in the archive,
and raises a BAD_REQUEST if it does not."""
if target_swhid.object_type in (ExtendedObjectType.CONTENT,):
if list(
self.storage.content_missing_per_sha1_git([target_swhid.object_id])
):
raise DepositError(
BAD_REQUEST,
f"Cannot load metadata on {target_swhid}, this content "
f"object does not exist in the archive (yet?).",
)
elif target_swhid.object_type in (
ExtendedObjectType.DIRECTORY,
ExtendedObjectType.REVISION,
ExtendedObjectType.RELEASE,
ExtendedObjectType.SNAPSHOT,
):
target_type_name = target_swhid.object_type.name.lower()
method = getattr(self.storage, target_type_name + "_missing")
if list(method([target_swhid.object_id])):
raise DepositError(
BAD_REQUEST,
f"Cannot load metadata on {target_swhid}, this {target_type_name} "
f"object does not exist in the archive (yet?).",
)
elif target_swhid.object_type in (ExtendedObjectType.ORIGIN,):
if None in list(self.storage.origin_get_by_sha1([target_swhid.object_id])):
raise DepositError(
BAD_REQUEST,
"Cannot load metadata on origin, it is not (yet?) known to the "
"archive.",
)
else:
# This should not happen, because target_swhid is generated from either
# a core swhid or an origin URL.
# Let's just check it again so the "switch" is exhaustive.
raise ValueError(
f"_check_swhid_in_archive expected core SWHID or origin SWHID, "
f"but got {target_swhid}."
)
def _atom_entry(
self,
request: Request,
headers: ParsedRequestHeaders,
collection_name: str,
deposit: Deposit,
replace_metadata: bool = False,
replace_archives: bool = False,
) -> Receipt:
"""Atom entry deposit.
Args:
req (Request): the request holding information to parse
request: the request holding information to parse
and inject in db
headers (dict): request headers formatted
collection_name (str): the associated client
deposit_id (id): deposit identifier if provided
replace_metadata (bool): 'Update or add' request to existing
headers: parsed request headers
collection_name: the associated client
deposit: deposit to be updated
replace_metadata: 'Update or add' request to existing
deposit. If False (default), this adds new metadata request to
existing ones. Otherwise, this will replace existing metadata.
replace_archives (bool): 'Update or add' request to existing
replace_archives: 'Update or add' request to existing
deposit. If False (default), this adds new archive request to
existing ones. Otherwise, this will replace existing archives.
ones.
Returns:
In the optimal case a dict with the following keys:
- deposit_id: deposit id associated to the deposit
- deposit_date: date of the deposit
- archive: None (no archive is provided here)
Otherwise, a dictionary with the key error and the
associated failures, either:
Raises:
- 400 (bad request) if the request is not providing an external
identifier
- 400 (bad request) if the request's body is empty
- 415 (unsupported media type) if a wrong media type is provided
"""
metadata_stream = request.data
empty_atom_entry_summary = "Empty body request is not supported."
empty_atom_entry_desc = (
"Atom entry request is about non-empty metadata deposit."
)
if not metadata_stream:
raise DepositError(
BAD_REQUEST, empty_atom_entry_summary, empty_atom_entry_desc
)
try:
raw_metadata, metadata = self._read_metadata(req.data)
raw_metadata, metadata_tree = self._read_metadata(metadata_stream)
except ParserError:
return make_error_dict(
raise DepositError(
BAD_REQUEST,
'Malformed xml metadata',
"Malformed xml metadata",
"The xml received is malformed. "
"Please ensure your metadata file is correctly formatted.")
if not metadata:
return make_error_dict(
BAD_REQUEST,
'Empty body request is not supported',
'Atom entry deposit is supposed to send for metadata. '
'If the body is empty, there is no metadata.')
"Please ensure your metadata file is correctly formatted.",
)
external_id = metadata.get('external_identifier', headers['slug'])
if len(metadata_tree) == 0:
raise DepositError(
BAD_REQUEST, empty_atom_entry_summary, empty_atom_entry_desc
)
deposit = self._deposit_put(deposit_id=deposit_id,
in_progress=headers['in-progress'],
external_id=external_id)
self._set_deposit_origin_from_metadata(deposit, metadata_tree, headers)
self._deposit_request_put(
deposit, {METADATA_KEY: metadata, RAW_METADATA_KEY: raw_metadata},
replace_metadata, replace_archives)
return {
'deposit_id': deposit.id,
'deposit_date': deposit.reception_date,
'archive': None,
'status': deposit.status,
}
def _empty_post(self, req, headers, collection_name, deposit_id):
"""Empty post to finalize an empty deposit.
# Determine if we are in the metadata-only deposit case
try:
swhid_ref = parse_swh_reference(metadata_tree)
except ValidationError as e:
raise DepositError(
PARSING_ERROR,
"Invalid SWHID reference",
str(e),
)
if swhid_ref is not None and (
deposit.origin_url or deposit.parent or deposit.external_id
):
raise DepositError(
BAD_REQUEST,
"<swh:reference> is for metadata-only deposits and "
"<swh:create_origin> / <swh:add_to_origin> / Slug are for "
"code deposits, only one may be used on a given deposit.",
)
if swhid_ref is not None:
# It's suggested to user to provide it
metadata_provenance_url = parse_swh_metadata_provenance(metadata_tree)
if metadata_provenance_url:
# If the provenance is provided, ensure it matches client provider url
check_url_match_provider(
metadata_provenance_url, deposit.client.provider_url
)
deposit.save() # We need a deposit id
target_swhid, depo, depo_request = self._store_metadata_deposit(
deposit, swhid_ref, metadata_tree, raw_metadata
)
deposit.status = DEPOSIT_STATUS_LOAD_SUCCESS
if isinstance(swhid_ref, QualifiedSWHID):
deposit.swhid = str(extended_swhid_from_qualified(swhid_ref))
deposit.swhid_context = str(swhid_ref)
deposit.type = DEPOSIT_METADATA_ONLY
deposit.complete_date = depo_request.date
deposit.reception_date = depo_request.date
deposit.save()
return Receipt(
deposit_id=deposit.id,
deposit_date=depo_request.date,
status=deposit.status,
archive=None,
)
with self._deposit_put(
deposit=deposit,
in_progress=headers.in_progress,
):
self._deposit_request_put(
deposit,
{RAW_METADATA_KEY: raw_metadata},
replace_metadata,
replace_archives,
)
return Receipt(
deposit_id=deposit.id,
deposit_date=deposit.reception_date,
status=deposit.status,
archive=None,
)
def _set_deposit_origin_from_metadata(self, deposit, metadata, headers):
(create_origin, add_to_origin) = parse_swh_deposit_origin(metadata)
if create_origin and add_to_origin:
raise DepositError(
BAD_REQUEST,
"<swh:create_origin> and <swh:add_to_origin> are mutually exclusive, "
"as they respectively create a new origin and add to an existing "
"origin.",
)
if create_origin:
origin_url = create_origin
check_url_match_provider(origin_url, deposit.client.provider_url)
deposit.origin_url = origin_url
if add_to_origin:
origin_url = add_to_origin
check_url_match_provider(origin_url, deposit.client.provider_url)
parent = (
Deposit.objects.filter(
client=deposit.client,
origin_url=origin_url,
status=DEPOSIT_STATUS_LOAD_SUCCESS,
)
.order_by("-id")
.first()
)
if not parent:
raise DepositError(
NOT_FOUND,
f"<swh:add_to_origin> references URL {origin_url!r}, which does not exist "
f"or was not created by a Deposit. "
f"Use <swh:create_origin> instead if you want to create a new Deposit.",
)
deposit.parent = parent
deposit.origin_url = origin_url
def _empty_post(
self,
request: Request,
headers: ParsedRequestHeaders,
collection_name: str,
deposit: Deposit,
) -> Receipt:
"""Empty post to finalize a deposit.
Args:
req (Request): the request holding information to parse
request: the request holding information to parse
and inject in db
headers (dict): request headers formatted
collection_name (str): the associated client
deposit_id (id): deposit identifier
Returns:
Dictionary of result with the deposit's id, the date
it was completed and no archive.
"""
deposit = Deposit.objects.get(pk=deposit_id)
deposit.complete_date = timezone.now()
deposit.status = DEPOSIT_STATUS_DEPOSITED
deposit.save()
return {
'deposit_id': deposit_id,
'deposit_date': deposit.complete_date,
'status': deposit.status,
'archive': None,
}
def _make_iris(self, req, collection_name, deposit_id):
"""Define the IRI endpoints
Args:
req (Request): The initial request
collection_name (str): client/collection's name
deposit_id (id): Deposit identifier
Returns:
Dictionary of keys with the iris' urls.
headers: parsed request headers
collection_name: the associated client
deposit: deposit to be finalized
"""
args = [collection_name, deposit_id]
return {
iri: req.build_absolute_uri(reverse(iri, args=args))
for iri in [EM_IRI, EDIT_SE_IRI, CONT_FILE_IRI, STATE_IRI]
}
def additional_checks(self, req, headers, collection_name,
deposit_id=None):
self._complete_deposit(deposit)
assert deposit.complete_date is not None
return Receipt(
deposit_id=deposit.id,
deposit_date=deposit.complete_date,
status=deposit.status,
archive=None,
)
def additional_checks(
self,
request: Request,
headers: ParsedRequestHeaders,
collection_name: str,
deposit: Optional[Deposit],
) -> Dict[str, Any]:
"""Permit the child class to enrich additional checks.
Returns:
......@@ -656,88 +1033,93 @@ class SWHBaseDeposit(SWHDefaultConfig, SWHAPIView, metaclass=ABCMeta):
"""
return {}
def checks(self, req, collection_name, deposit_id=None):
try:
self._collection = DepositCollection.objects.get(
name=collection_name)
except DepositCollection.DoesNotExist:
return make_error_dict(
NOT_FOUND,
'Unknown collection name %s' % collection_name)
username = req.user.username
if username: # unauthenticated request can have the username empty
def get_client(self, request) -> DepositClient:
# This class depends on AuthenticatedAPIView, so request.user.username
# is always set
username = request.user.username
assert username is not None
if self._client is None:
try:
self._client = DepositClient.objects.get(username=username)
except DepositClient.DoesNotExist:
return make_error_dict(NOT_FOUND,
'Unknown client name %s' % username)
if self._collection.id not in self._client.collections:
return make_error_dict(
FORBIDDEN,
'Client %s cannot access collection %s' % (
username, collection_name))
if deposit_id:
try:
deposit = Deposit.objects.get(pk=deposit_id)
except Deposit.DoesNotExist:
return make_error_dict(
NOT_FOUND,
'Deposit with id %s does not exist' %
deposit_id)
checks = self.restrict_access(req, deposit)
if checks:
return checks
headers = self._read_headers(req)
if headers['on-behalf-of']:
return make_error_dict(MEDIATION_NOT_ALLOWED,
'Mediation is not supported.')
checks = self.additional_checks(req, headers,
collection_name, deposit_id)
if 'error' in checks:
return checks
return {'headers': headers}
def restrict_access(self, req, deposit=None):
if deposit:
if (req.method != 'GET' and
deposit.status != DEPOSIT_STATUS_PARTIAL):
summary = "You can only act on deposit with status '%s'" % (
DEPOSIT_STATUS_PARTIAL, )
description = "This deposit has status '%s'" % deposit.status
return make_error_dict(
BAD_REQUEST, summary=summary,
verbose_description=description)
def _basic_not_allowed_method(self, req, method):
return make_error_response(
req, METHOD_NOT_ALLOWED,
'%s method is not supported on this endpoint' % method)
def get(self, req, *args, **kwargs):
return self._basic_not_allowed_method(req, 'GET')
def post(self, req, *args, **kwargs):
return self._basic_not_allowed_method(req, 'POST')
raise DepositError(NOT_FOUND, f"Unknown client name {username}")
def put(self, req, *args, **kwargs):
return self._basic_not_allowed_method(req, 'PUT')
assert self._client.username == username
def delete(self, req, *args, **kwargs):
return self._basic_not_allowed_method(req, 'DELETE')
return self._client
class SWHGetDepositAPI(SWHBaseDeposit, metaclass=ABCMeta):
"""Mixin for class to support GET method.
"""
def get(self, req, collection_name, deposit_id, format=None):
def checks(
self, request: Request, collection_name: str, deposit: Optional[Deposit] = None
) -> ParsedRequestHeaders:
if deposit is None:
collection = get_collection_by_name(collection_name)
else:
assert collection_name == deposit.collection.name
collection = deposit.collection
client = self.get_client(request)
collection_id = collection.id
collections = client.collections
assert collections is not None
if collection_id not in collections:
raise DepositError(
FORBIDDEN,
f"Client {client.username} cannot access collection {collection_name}",
)
headers = self._read_headers(request)
if deposit is not None:
self.restrict_access(request, headers, deposit)
if headers.on_behalf_of:
raise DepositError(MEDIATION_NOT_ALLOWED, "Mediation is not supported.")
self.additional_checks(request, headers, collection_name, deposit)
return headers
def restrict_access(
self, request: Request, headers: ParsedRequestHeaders, deposit: Deposit
) -> None:
"""Allow modifications on deposit with status 'partial' only, reject the rest."""
if request.method != "GET" and deposit.status != DEPOSIT_STATUS_PARTIAL:
summary = "You can only act on deposit with status '%s'" % (
DEPOSIT_STATUS_PARTIAL,
)
description = f"This deposit has status '{deposit.status}'"
raise DepositError(
BAD_REQUEST, summary=summary, verbose_description=description
)
def _basic_not_allowed_method(self, request: Request, method: str):
raise DepositError(
METHOD_NOT_ALLOWED,
f"{method} method is not supported on this endpoint",
)
def get(
self, request: Request, *args, **kwargs
) -> Union[HttpResponse, FileResponse]:
return self._basic_not_allowed_method(request, "GET")
def post(self, request: Request, *args, **kwargs) -> HttpResponse:
return self._basic_not_allowed_method(request, "POST")
def put(self, request: Request, *args, **kwargs) -> HttpResponse:
return self._basic_not_allowed_method(request, "PUT")
def delete(self, request: Request, *args, **kwargs) -> HttpResponse:
return self._basic_not_allowed_method(request, "DELETE")
class APIGet(APIBase, metaclass=ABCMeta):
"""Mixin for class to support GET method."""
def get( # type: ignore
self, request: Request, collection_name: str, deposit_id: int
) -> Union[HttpResponse, FileResponse]:
"""Endpoint to create/add resources to deposit.
Returns:
......@@ -746,23 +1128,27 @@ class SWHGetDepositAPI(SWHBaseDeposit, metaclass=ABCMeta):
404 if the deposit or the collection does not exist
"""
checks = self.checks(req, collection_name, deposit_id)
if 'error' in checks:
return make_error_response_from_dict(req, checks['error'])
r = self.process_get(
req, collection_name, deposit_id)
if isinstance(r, tuple):
status, content, content_type = r
return HttpResponse(content,
status=status,
content_type=content_type)
return r
deposit = get_deposit_by_id(deposit_id, collection_name)
self.checks(request, collection_name, deposit)
r = self.process_get(request, collection_name, deposit)
status, content, content_type = r
if content_type == "swh/generator":
with content as path:
return FileResponse(
open(path, "rb"), status=status, content_type="application/tar"
)
if content_type == "application/json":
return HttpResponse(
json.dumps(content), status=status, content_type=content_type
)
return HttpResponse(content, status=status, content_type=content_type)
@abstractmethod
def process_get(self, req, collection_name, deposit_id):
def process_get(
self, request: Request, collection_name: str, deposit: Deposit
) -> Tuple[int, Any, str]:
"""Routine to deal with the deposit's get processing.
Returns:
......@@ -772,11 +1158,12 @@ class SWHGetDepositAPI(SWHBaseDeposit, metaclass=ABCMeta):
pass
class SWHPostDepositAPI(SWHBaseDeposit, metaclass=ABCMeta):
"""Mixin for class to support DELETE method.
class APIPost(APIBase, metaclass=ABCMeta):
"""Mixin for class to support POST method."""
"""
def post(self, req, collection_name, deposit_id=None, format=None):
def post( # type: ignore
self, request: Request, collection_name: str, deposit_id: Optional[int] = None
) -> HttpResponse:
"""Endpoint to create/add resources to deposit.
Returns:
......@@ -785,47 +1172,83 @@ class SWHPostDepositAPI(SWHBaseDeposit, metaclass=ABCMeta):
404 if the deposit or the collection does not exist
"""
checks = self.checks(req, collection_name, deposit_id)
if 'error' in checks:
return make_error_response_from_dict(req, checks['error'])
headers = checks['headers']
_status, _iri_key, data = self.process_post(
req, headers, collection_name, deposit_id)
error = data.get('error')
if error:
return make_error_response_from_dict(req, error)
data['packagings'] = ACCEPT_PACKAGINGS
iris = self._make_iris(req, collection_name, data['deposit_id'])
data.update(iris)
response = render(req, 'deposit/deposit_receipt.xml',
context=data,
content_type='application/xml',
status=_status)
response._headers['location'] = 'Location', data[_iri_key]
if deposit_id is None:
deposit = None
else:
deposit = get_deposit_by_id(deposit_id, collection_name)
headers = self.checks(request, collection_name, deposit)
status, iri_key, receipt = self.process_post(
request, headers, collection_name, deposit
)
return self._make_deposit_receipt(
request,
collection_name,
status,
iri_key,
receipt,
)
def _make_deposit_receipt(
self,
request,
collection_name: str,
status: int,
iri_key: str,
receipt: Receipt,
) -> HttpResponse:
"""Returns an HttpResponse with a SWORD Deposit receipt as content."""
# Build the IRIs in the receipt
args = [collection_name, receipt.deposit_id]
iris = {
iri: request.build_absolute_uri(reverse(iri, args=args))
for iri in [EM_IRI, EDIT_IRI, CONT_FILE_IRI, SE_IRI, STATE_IRI]
}
context = {
**attr.asdict(receipt),
**iris,
"packagings": ACCEPT_PACKAGINGS,
}
response = render(
request,
"deposit/deposit_receipt.xml",
context=context,
content_type="application/xml",
status=status,
)
response["Location"] = iris[iri_key]
return response
@abstractmethod
def process_post(self, req, headers, collection_name, deposit_id=None):
def process_post(
self,
request,
headers: ParsedRequestHeaders,
collection_name: str,
deposit: Optional[Deposit] = None,
) -> Tuple[int, str, Receipt]:
"""Routine to deal with the deposit's processing.
Returns
Tuple of:
- response status code (200, 201, etc...)
- key iri (EM_IRI, EDIT_SE_IRI, etc...)
- dictionary of the processing result
- key iri (EM_IRI, EDIT_IRI, etc...)
- Receipt
"""
pass
class SWHPutDepositAPI(SWHBaseDeposit, metaclass=ABCMeta):
"""Mixin for class to support PUT method.
class APIPut(APIBase, metaclass=ABCMeta):
"""Mixin for class to support PUT method."""
"""
def put(self, req, collection_name, deposit_id, format=None):
def put( # type: ignore
self, request: Request, collection_name: str, deposit_id: int
) -> HttpResponse:
"""Endpoint to update deposit resources.
Returns:
......@@ -834,21 +1257,23 @@ class SWHPutDepositAPI(SWHBaseDeposit, metaclass=ABCMeta):
404 if the deposit or the collection does not exist
"""
checks = self.checks(req, collection_name, deposit_id)
if 'error' in checks:
return make_error_response_from_dict(req, checks['error'])
headers = checks['headers']
data = self.process_put(req, headers, collection_name, deposit_id)
error = data.get('error')
if error:
return make_error_response_from_dict(req, error)
if deposit_id is None:
deposit = None
else:
deposit = get_deposit_by_id(deposit_id, collection_name)
headers = self.checks(request, collection_name, deposit)
self.process_put(request, headers, collection_name, deposit)
return HttpResponse(status=status.HTTP_204_NO_CONTENT)
@abstractmethod
def process_put(self, req, headers, collection_name, deposit_id):
def process_put(
self,
request: Request,
headers: ParsedRequestHeaders,
collection_name: str,
deposit: Deposit,
) -> None:
"""Routine to deal with updating a deposit in some way.
Returns
......@@ -858,11 +1283,12 @@ class SWHPutDepositAPI(SWHBaseDeposit, metaclass=ABCMeta):
pass
class SWHDeleteDepositAPI(SWHBaseDeposit, metaclass=ABCMeta):
"""Mixin for class to support DELETE method.
class APIDelete(APIBase, metaclass=ABCMeta):
"""Mixin for class to support DELETE method."""
"""
def delete(self, req, collection_name, deposit_id):
def delete( # type: ignore
self, request: Request, collection_name: str, deposit_id: Optional[int] = None
) -> HttpResponse:
"""Endpoint to delete some deposit's resources (archives, deposit).
Returns:
......@@ -871,23 +1297,21 @@ class SWHDeleteDepositAPI(SWHBaseDeposit, metaclass=ABCMeta):
404 if the deposit or the collection does not exist
"""
checks = self.checks(req, collection_name, deposit_id)
if 'error' in checks:
return make_error_response_from_dict(req, checks['error'])
data = self.process_delete(req, collection_name, deposit_id)
error = data.get('error')
if error:
return make_error_response_from_dict(req, error)
assert deposit_id is not None
deposit = get_deposit_by_id(deposit_id, collection_name)
self.checks(request, collection_name, deposit)
self.process_delete(request, collection_name, deposit)
return HttpResponse(status=status.HTTP_204_NO_CONTENT)
@abstractmethod
def process_delete(self, req, collection_name, deposit_id):
def process_delete(
self, request: Request, collection_name: str, deposit: Deposit
) -> None:
"""Routine to delete a resource.
This is mostly not allowed except for the
EM_IRI (cf. .api.deposit_update.SWHUpdateArchiveDeposit)
EM_IRI (cf. .api.deposit_update.APIUpdateArchive)
"""
pass
# Copyright (C) 2017-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from django.http import HttpResponse
from django.shortcuts import render
from rest_framework import status
from swh.deposit.api.common import APIBase, get_deposit_by_id
from swh.deposit.models import DEPOSIT_STATUS_DETAIL, DepositRequest
class ContentAPI(APIBase):
"""Deposit request class defining api endpoints for sword deposit.
What's known as 'Cont-IRI' and 'File-IRI' in the sword specification.
HTTP verbs supported: GET
"""
def get( # type: ignore
self, req, collection_name: str, deposit_id: int
) -> HttpResponse:
deposit = get_deposit_by_id(deposit_id, collection_name)
self.checks(req, collection_name, deposit)
requests = DepositRequest.objects.filter(deposit=deposit)
context = {
"deposit": deposit,
"status_detail": DEPOSIT_STATUS_DETAIL[deposit.status],
"requests": requests,
}
return render(
req,
"deposit/content.xml",
context=context,
content_type="application/xml",
status=status.HTTP_200_OK,
)
# Copyright (C) 2017-2018 The Software Heritage developers
# Copyright (C) 2017-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
......@@ -21,7 +21,11 @@ def convert_status_detail(status_detail):
'archive': [{
'summary': "summary-string",
'fields': [impacted-fields-list],
}]
}],
'loading': [
'error 1',
'error 2',
],
}
Args:
......@@ -36,22 +40,26 @@ def convert_status_detail(status_detail):
return None
def _str_fields(data):
fields = data.get('fields')
fields = data.get("fields")
if not fields:
return ''
return ' (%s)' % ', '.join(map(str, fields))
return ""
return " (%s)" % ", ".join(map(str, fields))
msg = []
for key in ['metadata', 'archive']:
for key in ["metadata", "archive"]:
_detail = status_detail.get(key)
if _detail:
for data in _detail:
msg.append('- %s%s\n' % (data['summary'], _str_fields(data)))
msg.append("- %s%s\n" % (data["summary"], _str_fields(data)))
_detail = status_detail.get("url")
if _detail:
msg.append("- %s%s\n" % (_detail["summary"], _str_fields(_detail)))
_detail = status_detail.get('url')
_detail = status_detail.get("loading")
if _detail:
msg.append('- %s%s\n' % (_detail['summary'], _str_fields(_detail)))
msg.extend(f"- {error}\n" for error in _detail)
if not msg:
return None
return ''.join(msg)
return "".join(msg)
# Copyright (C) 2017 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from django.shortcuts import render
from rest_framework import status
from .common import SWHBaseDeposit
from ..errors import NOT_FOUND, make_error_response
from ..errors import make_error_response_from_dict
from ..models import DEPOSIT_STATUS_DETAIL, Deposit, DepositRequest
class SWHDepositContent(SWHBaseDeposit):
def get(self, req, collection_name, deposit_id, format=None):
checks = self.checks(req, collection_name, deposit_id)
if 'error' in checks:
return make_error_response_from_dict(req, checks['error'])
try:
deposit = Deposit.objects.get(pk=deposit_id)
if deposit.collection.name != collection_name:
raise Deposit.DoesNotExist
except Deposit.DoesNotExist:
return make_error_response(
req, NOT_FOUND,
'deposit %s does not belong to collection %s' % (
deposit_id, collection_name))
requests = DepositRequest.objects.filter(deposit=deposit)
context = {
'deposit_id': deposit.id,
'status': deposit.status,
'status_detail': DEPOSIT_STATUS_DETAIL[deposit.status],
'requests': requests,
}
return render(req, 'deposit/content.xml',
context=context,
content_type='application/xml',
status=status.HTTP_200_OK)
# Copyright (C) 2017-2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from django.shortcuts import render
from rest_framework import status
from .common import SWHBaseDeposit
from .converters import convert_status_detail
from ..errors import NOT_FOUND, make_error_response
from ..errors import make_error_response_from_dict
from ..models import DEPOSIT_STATUS_DETAIL, Deposit
class SWHDepositStatus(SWHBaseDeposit):
"""Deposit status.
What's known as 'State IRI' in the sword specification.
HTTP verbs supported: GET
"""
def get(self, req, collection_name, deposit_id, format=None):
checks = self.checks(req, collection_name, deposit_id)
if 'error' in checks:
return make_error_response_from_dict(req, checks['error'])
try:
deposit = Deposit.objects.get(pk=deposit_id)
if deposit.collection.name != collection_name:
raise Deposit.DoesNotExist
except Deposit.DoesNotExist:
return make_error_response(
req, NOT_FOUND,
'deposit %s does not belong to collection %s' % (
deposit_id, collection_name))
status_detail = convert_status_detail(deposit.status_detail)
if not status_detail:
status_detail = DEPOSIT_STATUS_DETAIL[deposit.status]
context = {
'deposit_id': deposit.id,
'status_detail': status_detail,
}
keys = ('status', 'swh_id', 'swh_id_context', 'swh_anchor_id',
'swh_anchor_id_context', 'external_id')
for k in keys:
context[k] = getattr(deposit, k, None)
return render(req, 'deposit/status.xml',
context=context,
content_type='application/xml',
status=status.HTTP_200_OK)
# Copyright (C) 2017-2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from rest_framework import status
from .common import SWHPostDepositAPI, SWHPutDepositAPI, SWHDeleteDepositAPI
from .common import ACCEPT_ARCHIVE_CONTENT_TYPES
from ..config import CONT_FILE_IRI, EDIT_SE_IRI, EM_IRI
from ..errors import make_error_dict, BAD_REQUEST
from ..parsers import SWHFileUploadZipParser, SWHFileUploadTarParser
from ..parsers import SWHAtomEntryParser
from ..parsers import SWHMultiPartParser
class SWHUpdateArchiveDeposit(SWHPostDepositAPI, SWHPutDepositAPI,
SWHDeleteDepositAPI):
"""Deposit request class defining api endpoints for sword deposit.
What's known as 'EM IRI' in the sword specification.
HTTP verbs supported: PUT, POST, DELETE
"""
parser_classes = (SWHFileUploadZipParser, SWHFileUploadTarParser, )
def process_put(self, req, headers, collection_name, deposit_id):
"""Replace existing content for the existing deposit.
source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_editingcontent_binary # noqa
Returns:
204 No content
"""
if req.content_type not in ACCEPT_ARCHIVE_CONTENT_TYPES:
msg = 'Packaging format supported is restricted to %s' % (
', '.join(ACCEPT_ARCHIVE_CONTENT_TYPES))
return make_error_dict(BAD_REQUEST, msg)
return self._binary_upload(req, headers, collection_name,
deposit_id=deposit_id,
replace_archives=True)
def process_post(self, req, headers, collection_name, deposit_id):
"""Add new content to the existing deposit.
source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_addingcontent_mediaresource # noqa
Returns:
201 Created
Headers: Location: [Cont-File-IRI]
Body: [optional Deposit Receipt]
"""
if req.content_type not in ACCEPT_ARCHIVE_CONTENT_TYPES:
msg = 'Packaging format supported is restricted to %s' % (
', '.join(ACCEPT_ARCHIVE_CONTENT_TYPES))
return 'unused', 'unused', make_error_dict(BAD_REQUEST, msg)
return (status.HTTP_201_CREATED, CONT_FILE_IRI,
self._binary_upload(req, headers, collection_name, deposit_id))
def process_delete(self, req, collection_name, deposit_id):
"""Delete content (archives) from existing deposit.
source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_deletingcontent # noqa
Returns:
204 Created
"""
return self._delete_archives(collection_name, deposit_id)
class SWHUpdateMetadataDeposit(SWHPostDepositAPI, SWHPutDepositAPI,
SWHDeleteDepositAPI):
"""Deposit request class defining api endpoints for sword deposit.
What's known as 'Edit IRI' (and SE IRI) in the sword specification.
HTTP verbs supported: POST (SE IRI), PUT (Edit IRI), DELETE
"""
parser_classes = (SWHMultiPartParser, SWHAtomEntryParser)
def process_put(self, req, headers, collection_name, deposit_id):
"""Replace existing deposit's metadata/archive with new ones.
source:
- http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_editingcontent_metadata # noqa
- http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_editingcontent_multipart # noqa
Returns:
204 No content
"""
if req.content_type.startswith('multipart/'):
return self._multipart_upload(req, headers, collection_name,
deposit_id=deposit_id,
replace_archives=True,
replace_metadata=True)
return self._atom_entry(req, headers, collection_name,
deposit_id=deposit_id, replace_metadata=True)
def process_post(self, req, headers, collection_name, deposit_id):
"""Add new metadata/archive to existing deposit.
source:
- http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_addingcontent_metadata # noqa
- http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_addingcontent_multipart # noqa
This also deals with an empty post corner case to finalize a
deposit.
Returns:
In optimal case for a multipart and atom-entry update, a
201 Created response. The body response will hold a
deposit. And the response headers will contain an entry
'Location' with the EM-IRI.
For the empty post case, this returns a 200.
"""
if req.content_type.startswith('multipart/'):
return (status.HTTP_201_CREATED, EM_IRI,
self._multipart_upload(req, headers, collection_name,
deposit_id=deposit_id))
# check for final empty post
# source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html
# #continueddeposit_complete
if headers['content-length'] == 0 and headers['in-progress'] is False:
data = self._empty_post(req, headers, collection_name, deposit_id)
return (status.HTTP_200_OK, EDIT_SE_IRI, data)
return (status.HTTP_201_CREATED, EM_IRI,
self._atom_entry(req, headers, collection_name,
deposit_id=deposit_id))
def process_delete(self, req, collection_name, deposit_id):
"""Delete the container (deposit).
source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_deleteconteiner # noqa
"""
return self._delete_deposit(collection_name, deposit_id)