Skip to content
Snippets Groups Projects
Commit b153a693 authored by Antoine Lambert's avatar Antoine Lambert
Browse files

interface: Add download_url method and implement it in backend

This new method returns a direct download URL for a cooked bundle
if the vault cache backend supports the feature.

The backend implementation simply wraps a call to the download_url
method from the objstorage used as vault cache.

Related to #885.
parent 1614bb65
No related branches found
No related tags found
No related merge requests found
# Copyright (C) 2020 The Software Heritage developers
# Copyright (C) 2020-2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
pytest_plugins = [
"swh.storage.pytest_plugin",
"swh.loader.pytest_plugin",
]
swh.core[db,http] >= 2
swh.model >= 6
swh.objstorage >= 2
swh.objstorage >= 2.3.0
swh.scheduler >= 1.2
swh.storage >= 1.3
attrs
dulwich >= 0.18.7
pytest
pytest-httpserver
pytest-mock
swh.core[testing]
swh.loader.core
......
# Copyright (C) 2017-2022 The Software Heritage developers
# Copyright (C) 2017-2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import collections
from datetime import timedelta
from email.mime.text import MIMEText
import logging
import smtplib
......@@ -379,6 +380,25 @@ class VaultBackend(VaultDB):
self.update_access_ts(bundle_type, swhid, cur=cur)
return self.cache.get(bundle_type, swhid)
@db_transaction()
def download_url(
self,
bundle_type: str,
swhid: CoreSWHID,
content_disposition: Optional[str] = None,
expiry: Optional[timedelta] = None,
raise_notfound=True,
db=None,
cur=None,
) -> Optional[str]:
"""Obtain a bundle direct download link from the cache if supported"""
available = self.is_available(bundle_type, swhid, cur=cur)
if not available:
if raise_notfound:
raise NotFoundExc(f"{bundle_type} {swhid} is not available.")
return None
return self.cache.download_url(bundle_type, swhid, content_disposition, expiry)
@db_transaction()
def update_access_ts(self, bundle_type: str, swhid: CoreSWHID, db=None, cur=None):
"""Update the last access timestamp of a bundle"""
......
# Copyright (C) 2016-2022 The Software Heritage developers
# Copyright (C) 2016-2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from datetime import timedelta
from typing import Optional
from swh.model import hashutil
from swh.model.swhids import CoreSWHID
from swh.objstorage.factory import get_objstorage
......@@ -27,6 +30,16 @@ class VaultCache:
sid = self._get_internal_id(bundle_type, swhid)
return self.objstorage.get(sid)
def download_url(
self,
bundle_type,
swhid: CoreSWHID,
content_disposition: Optional[str] = None,
expiry: Optional[timedelta] = None,
) -> Optional[str]:
sid = self._get_internal_id(bundle_type, swhid)
return self.objstorage.download_url(sid, content_disposition, expiry)
def delete(self, bundle_type, swhid: CoreSWHID):
sid = self._get_internal_id(bundle_type, swhid)
return self.objstorage.delete(sid)
......
# Copyright (C) 2017-2021 The Software Heritage developers
# Copyright (C) 2017-2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from datetime import timedelta
from typing import Any, Dict, List, Optional, Tuple
from swh.model.swhids import CoreSWHID
......@@ -19,6 +20,15 @@ class InMemoryVaultBackend:
def fetch(self, bundle_type: str, swhid: CoreSWHID) -> Optional[bytes]:
return self._cache.get(bundle_type, swhid)
def download_url(
self,
bundle_type: str,
swhid: CoreSWHID,
content_disposition: Optional[str] = None,
expiry: Optional[timedelta] = None,
) -> Optional[str]:
return None
def cook(
self, bundle_type: str, swhid: CoreSWHID, email: Optional[str] = None
) -> Dict[str, Any]:
......
# Copyright (C) 2017-2020 The Software Heritage developers
# Copyright (C) 2017-2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from datetime import timedelta
from typing import Any, Dict, List, Optional, Tuple
from typing_extensions import Protocol, runtime_checkable
......@@ -22,6 +23,17 @@ class VaultInterface(Protocol):
"""Fetch information from a bundle"""
...
@remote_api_endpoint("download_url")
def download_url(
self,
bundle_type: str,
swhid: CoreSWHID,
content_disposition: Optional[str] = None,
expiry: Optional[timedelta] = None,
) -> Optional[str]:
"""Obtain bundle direct download link if the vault cache backend supports it."""
...
@remote_api_endpoint("cook")
def cook(
self, bundle_type: str, swhid: CoreSWHID, email: Optional[str] = None
......
# Copyright (C) 2020-2022 The Software Heritage developers
# Copyright (C) 2020-2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
......@@ -43,6 +43,26 @@ vault_postgresql_proc = factories.postgresql_proc(
postgres_vault = factories.postgresql("vault_postgresql_proc")
def pytest_collection_modifyitems(items):
"""Skip tests using httpserver fixture if pytest-httpserver is
not available (debian < 12 for instance)"""
try:
from pytest_httpserver import HTTPServer # noqa
except ImportError:
pytest_httpserver_available = False
else:
pytest_httpserver_available = True
for item in items:
try:
fixtures = item.fixturenames
if "httpserver" in fixtures and not pytest_httpserver_available:
item.add_marker(
pytest.mark.skip(reason="pytest-httpserver not installed")
)
except Exception:
pass
@pytest.fixture
def swh_vault_config(postgres_vault, tmp_path) -> Dict[str, Any]:
tmp_path = str(tmp_path)
......
# Copyright (C) 2017-2022 The Software Heritage developers
# Copyright (C) 2017-2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
......@@ -12,6 +12,7 @@ from unittest.mock import MagicMock, patch
import attr
import psycopg2
import pytest
import requests
from swh.core.sentry import init_sentry
from swh.model.model import Content
......@@ -419,3 +420,45 @@ def test_retry_failed_bundle(swh_vault):
swh_vault.cook(TEST_TYPE, TEST_SWHID)
info = swh_vault.progress(TEST_TYPE, TEST_SWHID)
assert info["task_status"] == "new"
def test_download_url_cache_pathslicing_backend(swh_vault):
swhid, content = fake_cook(swh_vault, TEST_TYPE, b"content")
# download URL feature is not available with pathslicing backend for vault cache
assert swh_vault.download_url(TEST_TYPE, swhid) is None
@pytest.fixture
def swh_vault_config_http_cache(swh_vault_config, httpserver):
swh_vault_config["cache"] = {
"cls": "http",
"url": httpserver.url_for("/"),
"compression": "none",
}
return swh_vault_config
@pytest.fixture
def swh_vault_http_cache(swh_vault_config_http_cache):
from swh.vault import get_vault
return get_vault("local", **swh_vault_config_http_cache)
def test_download_url_cache_http_backend(swh_vault_http_cache, mocker, httpserver):
unknown_swhid = Content.from_data(b"foo").swhid()
with pytest.raises(
NotFoundExc, match=f"{TEST_TYPE} {unknown_swhid} is not available."
):
swh_vault_http_cache.download_url(TEST_TYPE, unknown_swhid)
mocker.patch.object(
swh_vault_http_cache, "progress", return_value={"task_status": "done"}
)
content = b"content"
swhid = Content.from_data(content).swhid()
objid = swh_vault_http_cache.cache._get_internal_id(TEST_TYPE, swhid)["sha1"]
httpserver.expect_request(f"/{objid.hex()}").respond_with_data(content)
download_url = swh_vault_http_cache.download_url(TEST_TYPE, swhid)
assert download_url is not None
assert requests.get(download_url).content == content
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment