diff --git a/conftest.py b/conftest.py index b6c0a26f047d6b82c598a90e8ab414acb3492558..08cf7cf9a66d73c7328dc341c13dc1592a9b8988 100644 --- a/conftest.py +++ b/conftest.py @@ -1,9 +1,8 @@ -# Copyright (C) 2020 The Software Heritage developers +# Copyright (C) 2020-2023 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information pytest_plugins = [ "swh.storage.pytest_plugin", - "swh.loader.pytest_plugin", ] diff --git a/requirements-swh.txt b/requirements-swh.txt index 3aff41c3b3169fdf92a67666ac8e4ecd37ee0073..ba4823c8096c5af6ac98e565260a8fdf3e859623 100644 --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,5 +1,5 @@ swh.core[db,http] >= 2 swh.model >= 6 -swh.objstorage >= 2 +swh.objstorage >= 2.3.0 swh.scheduler >= 1.2 swh.storage >= 1.3 diff --git a/requirements-test.txt b/requirements-test.txt index 5e78bfb0a0c176ee2c2a35cad4c15bb0d7bc86c2..3d188edeffe9ee08fbc8f55acf56697c3351fd1a 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,6 +1,7 @@ attrs dulwich >= 0.18.7 pytest +pytest-httpserver pytest-mock swh.core[testing] swh.loader.core diff --git a/swh/vault/backend.py b/swh/vault/backend.py index a14d349fcc4f7e17d1e94b127ae3eabc992cf42d..da8244519d94d2b014caecad4185c93a38f37db4 100644 --- a/swh/vault/backend.py +++ b/swh/vault/backend.py @@ -1,9 +1,10 @@ -# Copyright (C) 2017-2022 The Software Heritage developers +# Copyright (C) 2017-2023 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import collections +from datetime import timedelta from email.mime.text import MIMEText import logging import smtplib @@ -379,6 +380,25 @@ class VaultBackend(VaultDB): self.update_access_ts(bundle_type, swhid, cur=cur) return self.cache.get(bundle_type, swhid) + @db_transaction() + def download_url( + self, + bundle_type: str, + swhid: CoreSWHID, + content_disposition: Optional[str] = None, + expiry: Optional[timedelta] = None, + raise_notfound=True, + db=None, + cur=None, + ) -> Optional[str]: + """Obtain a bundle direct download link from the cache if supported""" + available = self.is_available(bundle_type, swhid, cur=cur) + if not available: + if raise_notfound: + raise NotFoundExc(f"{bundle_type} {swhid} is not available.") + return None + return self.cache.download_url(bundle_type, swhid, content_disposition, expiry) + @db_transaction() def update_access_ts(self, bundle_type: str, swhid: CoreSWHID, db=None, cur=None): """Update the last access timestamp of a bundle""" diff --git a/swh/vault/cache.py b/swh/vault/cache.py index 488926e873e33bba12e89ed941197be0eec1ef45..08b26893d3f320f78a92f590a59c46fc509c26ba 100644 --- a/swh/vault/cache.py +++ b/swh/vault/cache.py @@ -1,8 +1,11 @@ -# Copyright (C) 2016-2022 The Software Heritage developers +# Copyright (C) 2016-2023 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +from datetime import timedelta +from typing import Optional + from swh.model import hashutil from swh.model.swhids import CoreSWHID from swh.objstorage.factory import get_objstorage @@ -27,6 +30,16 @@ class VaultCache: sid = self._get_internal_id(bundle_type, swhid) return self.objstorage.get(sid) + def download_url( + self, + bundle_type, + swhid: CoreSWHID, + content_disposition: Optional[str] = None, + expiry: Optional[timedelta] = None, + ) -> Optional[str]: + sid = self._get_internal_id(bundle_type, swhid) + return self.objstorage.download_url(sid, content_disposition, expiry) + def delete(self, bundle_type, swhid: CoreSWHID): sid = self._get_internal_id(bundle_type, swhid) return self.objstorage.delete(sid) diff --git a/swh/vault/in_memory_backend.py b/swh/vault/in_memory_backend.py index 74d6ee0af8a8a673c0282fd081dc7174ac445562..76dae324ee34957b240347726cf424d50546368e 100644 --- a/swh/vault/in_memory_backend.py +++ b/swh/vault/in_memory_backend.py @@ -1,8 +1,9 @@ -# Copyright (C) 2017-2021 The Software Heritage developers +# Copyright (C) 2017-2023 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +from datetime import timedelta from typing import Any, Dict, List, Optional, Tuple from swh.model.swhids import CoreSWHID @@ -19,6 +20,15 @@ class InMemoryVaultBackend: def fetch(self, bundle_type: str, swhid: CoreSWHID) -> Optional[bytes]: return self._cache.get(bundle_type, swhid) + def download_url( + self, + bundle_type: str, + swhid: CoreSWHID, + content_disposition: Optional[str] = None, + expiry: Optional[timedelta] = None, + ) -> Optional[str]: + return None + def cook( self, bundle_type: str, swhid: CoreSWHID, email: Optional[str] = None ) -> Dict[str, Any]: diff --git a/swh/vault/interface.py b/swh/vault/interface.py index e6991ee2ead7e14ace12bb652eb87c70c0acbd23..68fabb8ca7e01112ce451ec85a756c6b18ae6146 100644 --- a/swh/vault/interface.py +++ b/swh/vault/interface.py @@ -1,8 +1,9 @@ -# Copyright (C) 2017-2020 The Software Heritage developers +# Copyright (C) 2017-2023 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +from datetime import timedelta from typing import Any, Dict, List, Optional, Tuple from typing_extensions import Protocol, runtime_checkable @@ -22,6 +23,17 @@ class VaultInterface(Protocol): """Fetch information from a bundle""" ... + @remote_api_endpoint("download_url") + def download_url( + self, + bundle_type: str, + swhid: CoreSWHID, + content_disposition: Optional[str] = None, + expiry: Optional[timedelta] = None, + ) -> Optional[str]: + """Obtain bundle direct download link if the vault cache backend supports it.""" + ... + @remote_api_endpoint("cook") def cook( self, bundle_type: str, swhid: CoreSWHID, email: Optional[str] = None diff --git a/swh/vault/tests/conftest.py b/swh/vault/tests/conftest.py index 70c140997037cf2ce58a9c48ea0c19d74a618c40..dfbf5f6c761d56c1132c8f3befccba5a3bee79cc 100644 --- a/swh/vault/tests/conftest.py +++ b/swh/vault/tests/conftest.py @@ -1,4 +1,4 @@ -# Copyright (C) 2020-2022 The Software Heritage developers +# Copyright (C) 2020-2023 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -43,6 +43,26 @@ vault_postgresql_proc = factories.postgresql_proc( postgres_vault = factories.postgresql("vault_postgresql_proc") +def pytest_collection_modifyitems(items): + """Skip tests using httpserver fixture if pytest-httpserver is + not available (debian < 12 for instance)""" + try: + from pytest_httpserver import HTTPServer # noqa + except ImportError: + pytest_httpserver_available = False + else: + pytest_httpserver_available = True + for item in items: + try: + fixtures = item.fixturenames + if "httpserver" in fixtures and not pytest_httpserver_available: + item.add_marker( + pytest.mark.skip(reason="pytest-httpserver not installed") + ) + except Exception: + pass + + @pytest.fixture def swh_vault_config(postgres_vault, tmp_path) -> Dict[str, Any]: tmp_path = str(tmp_path) diff --git a/swh/vault/tests/test_backend.py b/swh/vault/tests/test_backend.py index f4fba67b44ad537f78fe4c70fbb8ecd49943c292..6d0e51ac23b3143be99159d6ad4cf9350137cc96 100644 --- a/swh/vault/tests/test_backend.py +++ b/swh/vault/tests/test_backend.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2022 The Software Heritage developers +# Copyright (C) 2017-2023 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -12,6 +12,7 @@ from unittest.mock import MagicMock, patch import attr import psycopg2 import pytest +import requests from swh.core.sentry import init_sentry from swh.model.model import Content @@ -419,3 +420,45 @@ def test_retry_failed_bundle(swh_vault): swh_vault.cook(TEST_TYPE, TEST_SWHID) info = swh_vault.progress(TEST_TYPE, TEST_SWHID) assert info["task_status"] == "new" + + +def test_download_url_cache_pathslicing_backend(swh_vault): + swhid, content = fake_cook(swh_vault, TEST_TYPE, b"content") + # download URL feature is not available with pathslicing backend for vault cache + assert swh_vault.download_url(TEST_TYPE, swhid) is None + + +@pytest.fixture +def swh_vault_config_http_cache(swh_vault_config, httpserver): + swh_vault_config["cache"] = { + "cls": "http", + "url": httpserver.url_for("/"), + "compression": "none", + } + return swh_vault_config + + +@pytest.fixture +def swh_vault_http_cache(swh_vault_config_http_cache): + from swh.vault import get_vault + + return get_vault("local", **swh_vault_config_http_cache) + + +def test_download_url_cache_http_backend(swh_vault_http_cache, mocker, httpserver): + unknown_swhid = Content.from_data(b"foo").swhid() + with pytest.raises( + NotFoundExc, match=f"{TEST_TYPE} {unknown_swhid} is not available." + ): + swh_vault_http_cache.download_url(TEST_TYPE, unknown_swhid) + + mocker.patch.object( + swh_vault_http_cache, "progress", return_value={"task_status": "done"} + ) + content = b"content" + swhid = Content.from_data(content).swhid() + objid = swh_vault_http_cache.cache._get_internal_id(TEST_TYPE, swhid)["sha1"] + httpserver.expect_request(f"/{objid.hex()}").respond_with_data(content) + download_url = swh_vault_http_cache.download_url(TEST_TYPE, swhid) + assert download_url is not None + assert requests.get(download_url).content == content