From 3f4ac7cbe5fdca0ae6d2694e9ee84438c27fbfc1 Mon Sep 17 00:00:00 2001
From: David Douard <david.douard@sdfa3.org>
Date: Wed, 20 Nov 2024 15:56:40 +0100
Subject: [PATCH] api/graph: use a more generic UNAUTHENTICATED_HOSTS config to
 check access to graph API

Replace usage of the hardcoded and swh specific
SWH_WEB_INTERNAL_SERVER_NAMES by a more generic UNAUTHENTICATED_HOSTS
config option to bypass authentication for the graph public APIs.

This uses the same validation logic as django's ALLOWED_HOSTS one.
---
 swh/web/api/tests/views/test_graph.py | 33 +++++++++++++++++++++++----
 swh/web/api/views/graph.py            | 10 ++++++--
 swh/web/config.py                     |  1 +
 swh/web/settings/common.py            |  3 +++
 4 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/swh/web/api/tests/views/test_graph.py b/swh/web/api/tests/views/test_graph.py
index 2a2ecce82..0ebe9ec38 100644
--- a/swh/web/api/tests/views/test_graph.py
+++ b/swh/web/api/tests/views/test_graph.py
@@ -15,11 +15,24 @@ from django.http.response import StreamingHttpResponse
 from swh.model.hashutil import hash_to_bytes
 from swh.model.swhids import ExtendedObjectType, ExtendedSWHID
 from swh.web.api.views.graph import API_GRAPH_PERM
-from swh.web.config import SWH_WEB_INTERNAL_SERVER_NAMES, get_config
+from swh.web.config import get_config
 from swh.web.tests.helpers import check_http_get_response
 from swh.web.utils import reverse
 
 
+@pytest.fixture(autouse=True)
+def graph_config(config_updater):
+    config_updater(
+        {
+            "graph": {
+                "server_url": "http://example.org/graph/",
+                "max_edges": {"staff": 0, "user": 100000, "anonymous": 1000},
+            },
+            "unauthenticated_api_hosts": ["local.network"],
+        }
+    )
+
+
 def test_graph_endpoint_no_authentication_for_vpn_users(api_client, requests_mock):
     graph_query = "stats"
     url = reverse("api-1-graph", url_args={"graph_query": graph_query})
@@ -29,13 +42,25 @@ def test_graph_endpoint_no_authentication_for_vpn_users(api_client, requests_moc
         headers={"Content-Type": "application/json"},
     )
     check_http_get_response(
-        api_client, url, status_code=200, server_name=SWH_WEB_INTERNAL_SERVER_NAMES[0]
+        api_client,
+        url,
+        status_code=200,
+        server_name="localhost",
+    )
+    check_http_get_response(
+        api_client,
+        url,
+        status_code=200,
+        server_name="local.network",
     )
 
 
 def test_graph_endpoint_needs_authentication(api_client):
     url = reverse("api-1-graph", url_args={"graph_query": "stats"})
     check_http_get_response(api_client, url, status_code=401)
+    check_http_get_response(
+        api_client, url, status_code=401, server_name="public.network"
+    )
 
 
 def _authenticate_graph_user(api_client, keycloak_oidc, is_staff=False):
@@ -365,9 +390,7 @@ def test_graph_endpoint_max_edges_settings(api_client, keycloak_oidc, requests_m
 
     # currently unauthenticated user can only use the graph endpoint from
     # Software Heritage VPN
-    check_http_get_response(
-        api_client, url, status_code=200, server_name=SWH_WEB_INTERNAL_SERVER_NAMES[0]
-    )
+    check_http_get_response(api_client, url, status_code=200, server_name="localhost")
     assert (
         f"max_edges={graph_config['max_edges']['anonymous']}"
         in requests_mock.request_history[0].url
diff --git a/swh/web/api/views/graph.py b/swh/web/api/views/graph.py
index 5ab66aa3a..1660b7cb8 100644
--- a/swh/web/api/views/graph.py
+++ b/swh/web/api/views/graph.py
@@ -9,7 +9,9 @@ from urllib.parse import unquote, urlparse, urlunparse
 
 import requests
 
+from django.conf import settings
 from django.http import QueryDict
+from django.http.request import split_domain_port, validate_host
 from django.http.response import StreamingHttpResponse
 from rest_framework.decorators import renderer_classes
 from rest_framework.renderers import JSONRenderer
@@ -22,7 +24,7 @@ from swh.model.swhids import ExtendedObjectType, ExtendedSWHID
 from swh.web.api.apidoc import api_doc
 from swh.web.api.apiurls import api_route
 from swh.web.api.renderers import PlainTextRenderer
-from swh.web.config import SWH_WEB_INTERNAL_SERVER_NAMES, get_config
+from swh.web.config import get_config
 from swh.web.utils import archive, strtobool
 
 API_GRAPH_PERM = "swh.web.api.graph"
@@ -128,7 +130,11 @@ def api_graph(request: Request) -> None:
 def api_graph_proxy(
     request: Request, graph_query: str
 ) -> Union[Response, StreamingHttpResponse]:
-    if request.get_host() not in SWH_WEB_INTERNAL_SERVER_NAMES:
+    domain, port = split_domain_port(request.get_host())
+    if not (domain and validate_host(domain, settings.UNAUTHENTICATED_HOSTS)):
+        # request does not come from an identified allowed host/network (see
+        # 'unauthenticated_api_hosts' config entry), check for proper auth and
+        # permission
         if not bool(request.user and request.user.is_authenticated):
             return Response("Authentication credentials were not provided.", status=401)
         if not request.user.has_perm(API_GRAPH_PERM):
diff --git a/swh/web/config.py b/swh/web/config.py
index 6b72f27ee..8c92dab9f 100644
--- a/swh/web/config.py
+++ b/swh/web/config.py
@@ -72,6 +72,7 @@ DEFAULT_CONFIG = {
             "timeout": 10,
         },
     ),
+    "unauthenticated_api_hosts": ("list", []),
     "search_config": (
         "dict",
         {
diff --git a/swh/web/settings/common.py b/swh/web/settings/common.py
index 2deae761b..bfbdba43e 100644
--- a/swh/web/settings/common.py
+++ b/swh/web/settings/common.py
@@ -44,6 +44,9 @@ DEBUG = swh_web_config["debug"]
 DEBUG_PROPAGATE_EXCEPTIONS = swh_web_config["debug"]
 
 ALLOWED_HOSTS = ["127.0.0.1", "localhost"] + swh_web_config["allowed_hosts"]
+UNAUTHENTICATED_HOSTS = ["127.0.0.1", "localhost"] + swh_web_config[
+    "unauthenticated_api_hosts"
+]
 
 # Applications definition
 
-- 
GitLab