From b153a693b9b816dac7ad2e8b613a279c02ad2cba Mon Sep 17 00:00:00 2001
From: Antoine Lambert <anlambert@softwareheritage.org>
Date: Wed, 19 Jul 2023 17:20:15 +0200
Subject: [PATCH] interface: Add download_url method and implement it in
 backend

This new method returns a direct download URL for a cooked bundle
if the vault cache backend supports the feature.

The backend implementation simply wraps a call to the download_url
method from the objstorage used as vault cache.

Related to #885.
---
 conftest.py                     |  3 +--
 requirements-swh.txt            |  2 +-
 requirements-test.txt           |  1 +
 swh/vault/backend.py            | 22 +++++++++++++++-
 swh/vault/cache.py              | 15 ++++++++++-
 swh/vault/in_memory_backend.py  | 12 ++++++++-
 swh/vault/interface.py          | 14 +++++++++-
 swh/vault/tests/conftest.py     | 22 +++++++++++++++-
 swh/vault/tests/test_backend.py | 45 ++++++++++++++++++++++++++++++++-
 9 files changed, 127 insertions(+), 9 deletions(-)

diff --git a/conftest.py b/conftest.py
index b6c0a26..08cf7cf 100644
--- a/conftest.py
+++ b/conftest.py
@@ -1,9 +1,8 @@
-# Copyright (C) 2020 The Software Heritage developers
+# Copyright (C) 2020-2023 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 pytest_plugins = [
     "swh.storage.pytest_plugin",
-    "swh.loader.pytest_plugin",
 ]
diff --git a/requirements-swh.txt b/requirements-swh.txt
index 3aff41c..ba4823c 100644
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,5 +1,5 @@
 swh.core[db,http] >= 2
 swh.model >= 6
-swh.objstorage >= 2
+swh.objstorage >= 2.3.0
 swh.scheduler >= 1.2
 swh.storage >= 1.3
diff --git a/requirements-test.txt b/requirements-test.txt
index 5e78bfb..3d188ed 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -1,6 +1,7 @@
 attrs
 dulwich >= 0.18.7
 pytest
+pytest-httpserver
 pytest-mock
 swh.core[testing]
 swh.loader.core
diff --git a/swh/vault/backend.py b/swh/vault/backend.py
index a14d349..da82445 100644
--- a/swh/vault/backend.py
+++ b/swh/vault/backend.py
@@ -1,9 +1,10 @@
-# Copyright (C) 2017-2022  The Software Heritage developers
+# Copyright (C) 2017-2023  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import collections
+from datetime import timedelta
 from email.mime.text import MIMEText
 import logging
 import smtplib
@@ -379,6 +380,25 @@ class VaultBackend(VaultDB):
         self.update_access_ts(bundle_type, swhid, cur=cur)
         return self.cache.get(bundle_type, swhid)
 
+    @db_transaction()
+    def download_url(
+        self,
+        bundle_type: str,
+        swhid: CoreSWHID,
+        content_disposition: Optional[str] = None,
+        expiry: Optional[timedelta] = None,
+        raise_notfound=True,
+        db=None,
+        cur=None,
+    ) -> Optional[str]:
+        """Obtain a bundle direct download link from the cache if supported"""
+        available = self.is_available(bundle_type, swhid, cur=cur)
+        if not available:
+            if raise_notfound:
+                raise NotFoundExc(f"{bundle_type} {swhid} is not available.")
+            return None
+        return self.cache.download_url(bundle_type, swhid, content_disposition, expiry)
+
     @db_transaction()
     def update_access_ts(self, bundle_type: str, swhid: CoreSWHID, db=None, cur=None):
         """Update the last access timestamp of a bundle"""
diff --git a/swh/vault/cache.py b/swh/vault/cache.py
index 488926e..08b2689 100644
--- a/swh/vault/cache.py
+++ b/swh/vault/cache.py
@@ -1,8 +1,11 @@
-# Copyright (C) 2016-2022  The Software Heritage developers
+# Copyright (C) 2016-2023  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
+from datetime import timedelta
+from typing import Optional
+
 from swh.model import hashutil
 from swh.model.swhids import CoreSWHID
 from swh.objstorage.factory import get_objstorage
@@ -27,6 +30,16 @@ class VaultCache:
         sid = self._get_internal_id(bundle_type, swhid)
         return self.objstorage.get(sid)
 
+    def download_url(
+        self,
+        bundle_type,
+        swhid: CoreSWHID,
+        content_disposition: Optional[str] = None,
+        expiry: Optional[timedelta] = None,
+    ) -> Optional[str]:
+        sid = self._get_internal_id(bundle_type, swhid)
+        return self.objstorage.download_url(sid, content_disposition, expiry)
+
     def delete(self, bundle_type, swhid: CoreSWHID):
         sid = self._get_internal_id(bundle_type, swhid)
         return self.objstorage.delete(sid)
diff --git a/swh/vault/in_memory_backend.py b/swh/vault/in_memory_backend.py
index 74d6ee0..76dae32 100644
--- a/swh/vault/in_memory_backend.py
+++ b/swh/vault/in_memory_backend.py
@@ -1,8 +1,9 @@
-# Copyright (C) 2017-2021  The Software Heritage developers
+# Copyright (C) 2017-2023  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
+from datetime import timedelta
 from typing import Any, Dict, List, Optional, Tuple
 
 from swh.model.swhids import CoreSWHID
@@ -19,6 +20,15 @@ class InMemoryVaultBackend:
     def fetch(self, bundle_type: str, swhid: CoreSWHID) -> Optional[bytes]:
         return self._cache.get(bundle_type, swhid)
 
+    def download_url(
+        self,
+        bundle_type: str,
+        swhid: CoreSWHID,
+        content_disposition: Optional[str] = None,
+        expiry: Optional[timedelta] = None,
+    ) -> Optional[str]:
+        return None
+
     def cook(
         self, bundle_type: str, swhid: CoreSWHID, email: Optional[str] = None
     ) -> Dict[str, Any]:
diff --git a/swh/vault/interface.py b/swh/vault/interface.py
index e6991ee..68fabb8 100644
--- a/swh/vault/interface.py
+++ b/swh/vault/interface.py
@@ -1,8 +1,9 @@
-# Copyright (C) 2017-2020  The Software Heritage developers
+# Copyright (C) 2017-2023  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
+from datetime import timedelta
 from typing import Any, Dict, List, Optional, Tuple
 
 from typing_extensions import Protocol, runtime_checkable
@@ -22,6 +23,17 @@ class VaultInterface(Protocol):
         """Fetch information from a bundle"""
         ...
 
+    @remote_api_endpoint("download_url")
+    def download_url(
+        self,
+        bundle_type: str,
+        swhid: CoreSWHID,
+        content_disposition: Optional[str] = None,
+        expiry: Optional[timedelta] = None,
+    ) -> Optional[str]:
+        """Obtain bundle direct download link if the vault cache backend supports it."""
+        ...
+
     @remote_api_endpoint("cook")
     def cook(
         self, bundle_type: str, swhid: CoreSWHID, email: Optional[str] = None
diff --git a/swh/vault/tests/conftest.py b/swh/vault/tests/conftest.py
index 70c1409..dfbf5f6 100644
--- a/swh/vault/tests/conftest.py
+++ b/swh/vault/tests/conftest.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2020-2022  The Software Heritage developers
+# Copyright (C) 2020-2023  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
@@ -43,6 +43,26 @@ vault_postgresql_proc = factories.postgresql_proc(
 postgres_vault = factories.postgresql("vault_postgresql_proc")
 
 
+def pytest_collection_modifyitems(items):
+    """Skip tests using httpserver fixture if pytest-httpserver is
+    not available (debian < 12 for instance)"""
+    try:
+        from pytest_httpserver import HTTPServer  # noqa
+    except ImportError:
+        pytest_httpserver_available = False
+    else:
+        pytest_httpserver_available = True
+    for item in items:
+        try:
+            fixtures = item.fixturenames
+            if "httpserver" in fixtures and not pytest_httpserver_available:
+                item.add_marker(
+                    pytest.mark.skip(reason="pytest-httpserver not installed")
+                )
+        except Exception:
+            pass
+
+
 @pytest.fixture
 def swh_vault_config(postgres_vault, tmp_path) -> Dict[str, Any]:
     tmp_path = str(tmp_path)
diff --git a/swh/vault/tests/test_backend.py b/swh/vault/tests/test_backend.py
index f4fba67..6d0e51a 100644
--- a/swh/vault/tests/test_backend.py
+++ b/swh/vault/tests/test_backend.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017-2022  The Software Heritage developers
+# Copyright (C) 2017-2023  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
@@ -12,6 +12,7 @@ from unittest.mock import MagicMock, patch
 import attr
 import psycopg2
 import pytest
+import requests
 
 from swh.core.sentry import init_sentry
 from swh.model.model import Content
@@ -419,3 +420,45 @@ def test_retry_failed_bundle(swh_vault):
         swh_vault.cook(TEST_TYPE, TEST_SWHID)
     info = swh_vault.progress(TEST_TYPE, TEST_SWHID)
     assert info["task_status"] == "new"
+
+
+def test_download_url_cache_pathslicing_backend(swh_vault):
+    swhid, content = fake_cook(swh_vault, TEST_TYPE, b"content")
+    # download URL feature is not available with pathslicing backend for vault cache
+    assert swh_vault.download_url(TEST_TYPE, swhid) is None
+
+
+@pytest.fixture
+def swh_vault_config_http_cache(swh_vault_config, httpserver):
+    swh_vault_config["cache"] = {
+        "cls": "http",
+        "url": httpserver.url_for("/"),
+        "compression": "none",
+    }
+    return swh_vault_config
+
+
+@pytest.fixture
+def swh_vault_http_cache(swh_vault_config_http_cache):
+    from swh.vault import get_vault
+
+    return get_vault("local", **swh_vault_config_http_cache)
+
+
+def test_download_url_cache_http_backend(swh_vault_http_cache, mocker, httpserver):
+    unknown_swhid = Content.from_data(b"foo").swhid()
+    with pytest.raises(
+        NotFoundExc, match=f"{TEST_TYPE} {unknown_swhid} is not available."
+    ):
+        swh_vault_http_cache.download_url(TEST_TYPE, unknown_swhid)
+
+    mocker.patch.object(
+        swh_vault_http_cache, "progress", return_value={"task_status": "done"}
+    )
+    content = b"content"
+    swhid = Content.from_data(content).swhid()
+    objid = swh_vault_http_cache.cache._get_internal_id(TEST_TYPE, swhid)["sha1"]
+    httpserver.expect_request(f"/{objid.hex()}").respond_with_data(content)
+    download_url = swh_vault_http_cache.download_url(TEST_TYPE, swhid)
+    assert download_url is not None
+    assert requests.get(download_url).content == content
-- 
GitLab