From b153a693b9b816dac7ad2e8b613a279c02ad2cba Mon Sep 17 00:00:00 2001 From: Antoine Lambert <anlambert@softwareheritage.org> Date: Wed, 19 Jul 2023 17:20:15 +0200 Subject: [PATCH] interface: Add download_url method and implement it in backend This new method returns a direct download URL for a cooked bundle if the vault cache backend supports the feature. The backend implementation simply wraps a call to the download_url method from the objstorage used as vault cache. Related to #885. --- conftest.py | 3 +-- requirements-swh.txt | 2 +- requirements-test.txt | 1 + swh/vault/backend.py | 22 +++++++++++++++- swh/vault/cache.py | 15 ++++++++++- swh/vault/in_memory_backend.py | 12 ++++++++- swh/vault/interface.py | 14 +++++++++- swh/vault/tests/conftest.py | 22 +++++++++++++++- swh/vault/tests/test_backend.py | 45 ++++++++++++++++++++++++++++++++- 9 files changed, 127 insertions(+), 9 deletions(-) diff --git a/conftest.py b/conftest.py index b6c0a26..08cf7cf 100644 --- a/conftest.py +++ b/conftest.py @@ -1,9 +1,8 @@ -# Copyright (C) 2020 The Software Heritage developers +# Copyright (C) 2020-2023 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information pytest_plugins = [ "swh.storage.pytest_plugin", - "swh.loader.pytest_plugin", ] diff --git a/requirements-swh.txt b/requirements-swh.txt index 3aff41c..ba4823c 100644 --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,5 +1,5 @@ swh.core[db,http] >= 2 swh.model >= 6 -swh.objstorage >= 2 +swh.objstorage >= 2.3.0 swh.scheduler >= 1.2 swh.storage >= 1.3 diff --git a/requirements-test.txt b/requirements-test.txt index 5e78bfb..3d188ed 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,6 +1,7 @@ attrs dulwich >= 0.18.7 pytest +pytest-httpserver pytest-mock swh.core[testing] swh.loader.core diff --git a/swh/vault/backend.py b/swh/vault/backend.py index a14d349..da82445 100644 --- a/swh/vault/backend.py +++ b/swh/vault/backend.py @@ -1,9 +1,10 @@ -# Copyright (C) 2017-2022 The Software Heritage developers +# Copyright (C) 2017-2023 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import collections +from datetime import timedelta from email.mime.text import MIMEText import logging import smtplib @@ -379,6 +380,25 @@ class VaultBackend(VaultDB): self.update_access_ts(bundle_type, swhid, cur=cur) return self.cache.get(bundle_type, swhid) + @db_transaction() + def download_url( + self, + bundle_type: str, + swhid: CoreSWHID, + content_disposition: Optional[str] = None, + expiry: Optional[timedelta] = None, + raise_notfound=True, + db=None, + cur=None, + ) -> Optional[str]: + """Obtain a bundle direct download link from the cache if supported""" + available = self.is_available(bundle_type, swhid, cur=cur) + if not available: + if raise_notfound: + raise NotFoundExc(f"{bundle_type} {swhid} is not available.") + return None + return self.cache.download_url(bundle_type, swhid, content_disposition, expiry) + @db_transaction() def update_access_ts(self, bundle_type: str, swhid: CoreSWHID, db=None, cur=None): """Update the last access timestamp of a bundle""" diff --git a/swh/vault/cache.py b/swh/vault/cache.py index 488926e..08b2689 100644 --- a/swh/vault/cache.py +++ b/swh/vault/cache.py @@ -1,8 +1,11 @@ -# Copyright (C) 2016-2022 The Software Heritage developers +# Copyright (C) 2016-2023 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +from datetime import timedelta +from typing import Optional + from swh.model import hashutil from swh.model.swhids import CoreSWHID from swh.objstorage.factory import get_objstorage @@ -27,6 +30,16 @@ class VaultCache: sid = self._get_internal_id(bundle_type, swhid) return self.objstorage.get(sid) + def download_url( + self, + bundle_type, + swhid: CoreSWHID, + content_disposition: Optional[str] = None, + expiry: Optional[timedelta] = None, + ) -> Optional[str]: + sid = self._get_internal_id(bundle_type, swhid) + return self.objstorage.download_url(sid, content_disposition, expiry) + def delete(self, bundle_type, swhid: CoreSWHID): sid = self._get_internal_id(bundle_type, swhid) return self.objstorage.delete(sid) diff --git a/swh/vault/in_memory_backend.py b/swh/vault/in_memory_backend.py index 74d6ee0..76dae32 100644 --- a/swh/vault/in_memory_backend.py +++ b/swh/vault/in_memory_backend.py @@ -1,8 +1,9 @@ -# Copyright (C) 2017-2021 The Software Heritage developers +# Copyright (C) 2017-2023 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +from datetime import timedelta from typing import Any, Dict, List, Optional, Tuple from swh.model.swhids import CoreSWHID @@ -19,6 +20,15 @@ class InMemoryVaultBackend: def fetch(self, bundle_type: str, swhid: CoreSWHID) -> Optional[bytes]: return self._cache.get(bundle_type, swhid) + def download_url( + self, + bundle_type: str, + swhid: CoreSWHID, + content_disposition: Optional[str] = None, + expiry: Optional[timedelta] = None, + ) -> Optional[str]: + return None + def cook( self, bundle_type: str, swhid: CoreSWHID, email: Optional[str] = None ) -> Dict[str, Any]: diff --git a/swh/vault/interface.py b/swh/vault/interface.py index e6991ee..68fabb8 100644 --- a/swh/vault/interface.py +++ b/swh/vault/interface.py @@ -1,8 +1,9 @@ -# Copyright (C) 2017-2020 The Software Heritage developers +# Copyright (C) 2017-2023 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +from datetime import timedelta from typing import Any, Dict, List, Optional, Tuple from typing_extensions import Protocol, runtime_checkable @@ -22,6 +23,17 @@ class VaultInterface(Protocol): """Fetch information from a bundle""" ... + @remote_api_endpoint("download_url") + def download_url( + self, + bundle_type: str, + swhid: CoreSWHID, + content_disposition: Optional[str] = None, + expiry: Optional[timedelta] = None, + ) -> Optional[str]: + """Obtain bundle direct download link if the vault cache backend supports it.""" + ... + @remote_api_endpoint("cook") def cook( self, bundle_type: str, swhid: CoreSWHID, email: Optional[str] = None diff --git a/swh/vault/tests/conftest.py b/swh/vault/tests/conftest.py index 70c1409..dfbf5f6 100644 --- a/swh/vault/tests/conftest.py +++ b/swh/vault/tests/conftest.py @@ -1,4 +1,4 @@ -# Copyright (C) 2020-2022 The Software Heritage developers +# Copyright (C) 2020-2023 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -43,6 +43,26 @@ vault_postgresql_proc = factories.postgresql_proc( postgres_vault = factories.postgresql("vault_postgresql_proc") +def pytest_collection_modifyitems(items): + """Skip tests using httpserver fixture if pytest-httpserver is + not available (debian < 12 for instance)""" + try: + from pytest_httpserver import HTTPServer # noqa + except ImportError: + pytest_httpserver_available = False + else: + pytest_httpserver_available = True + for item in items: + try: + fixtures = item.fixturenames + if "httpserver" in fixtures and not pytest_httpserver_available: + item.add_marker( + pytest.mark.skip(reason="pytest-httpserver not installed") + ) + except Exception: + pass + + @pytest.fixture def swh_vault_config(postgres_vault, tmp_path) -> Dict[str, Any]: tmp_path = str(tmp_path) diff --git a/swh/vault/tests/test_backend.py b/swh/vault/tests/test_backend.py index f4fba67..6d0e51a 100644 --- a/swh/vault/tests/test_backend.py +++ b/swh/vault/tests/test_backend.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2022 The Software Heritage developers +# Copyright (C) 2017-2023 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -12,6 +12,7 @@ from unittest.mock import MagicMock, patch import attr import psycopg2 import pytest +import requests from swh.core.sentry import init_sentry from swh.model.model import Content @@ -419,3 +420,45 @@ def test_retry_failed_bundle(swh_vault): swh_vault.cook(TEST_TYPE, TEST_SWHID) info = swh_vault.progress(TEST_TYPE, TEST_SWHID) assert info["task_status"] == "new" + + +def test_download_url_cache_pathslicing_backend(swh_vault): + swhid, content = fake_cook(swh_vault, TEST_TYPE, b"content") + # download URL feature is not available with pathslicing backend for vault cache + assert swh_vault.download_url(TEST_TYPE, swhid) is None + + +@pytest.fixture +def swh_vault_config_http_cache(swh_vault_config, httpserver): + swh_vault_config["cache"] = { + "cls": "http", + "url": httpserver.url_for("/"), + "compression": "none", + } + return swh_vault_config + + +@pytest.fixture +def swh_vault_http_cache(swh_vault_config_http_cache): + from swh.vault import get_vault + + return get_vault("local", **swh_vault_config_http_cache) + + +def test_download_url_cache_http_backend(swh_vault_http_cache, mocker, httpserver): + unknown_swhid = Content.from_data(b"foo").swhid() + with pytest.raises( + NotFoundExc, match=f"{TEST_TYPE} {unknown_swhid} is not available." + ): + swh_vault_http_cache.download_url(TEST_TYPE, unknown_swhid) + + mocker.patch.object( + swh_vault_http_cache, "progress", return_value={"task_status": "done"} + ) + content = b"content" + swhid = Content.from_data(content).swhid() + objid = swh_vault_http_cache.cache._get_internal_id(TEST_TYPE, swhid)["sha1"] + httpserver.expect_request(f"/{objid.hex()}").respond_with_data(content) + download_url = swh_vault_http_cache.download_url(TEST_TYPE, swhid) + assert download_url is not None + assert requests.get(download_url).content == content -- GitLab