From 3f4ac7cbe5fdca0ae6d2694e9ee84438c27fbfc1 Mon Sep 17 00:00:00 2001 From: David Douard <david.douard@sdfa3.org> Date: Wed, 20 Nov 2024 15:56:40 +0100 Subject: [PATCH] api/graph: use a more generic UNAUTHENTICATED_HOSTS config to check access to graph API Replace usage of the hardcoded and swh specific SWH_WEB_INTERNAL_SERVER_NAMES by a more generic UNAUTHENTICATED_HOSTS config option to bypass authentication for the graph public APIs. This uses the same validation logic as django's ALLOWED_HOSTS one. --- swh/web/api/tests/views/test_graph.py | 33 +++++++++++++++++++++++---- swh/web/api/views/graph.py | 10 ++++++-- swh/web/config.py | 1 + swh/web/settings/common.py | 3 +++ 4 files changed, 40 insertions(+), 7 deletions(-) diff --git a/swh/web/api/tests/views/test_graph.py b/swh/web/api/tests/views/test_graph.py index 2a2ecce82..0ebe9ec38 100644 --- a/swh/web/api/tests/views/test_graph.py +++ b/swh/web/api/tests/views/test_graph.py @@ -15,11 +15,24 @@ from django.http.response import StreamingHttpResponse from swh.model.hashutil import hash_to_bytes from swh.model.swhids import ExtendedObjectType, ExtendedSWHID from swh.web.api.views.graph import API_GRAPH_PERM -from swh.web.config import SWH_WEB_INTERNAL_SERVER_NAMES, get_config +from swh.web.config import get_config from swh.web.tests.helpers import check_http_get_response from swh.web.utils import reverse +@pytest.fixture(autouse=True) +def graph_config(config_updater): + config_updater( + { + "graph": { + "server_url": "http://example.org/graph/", + "max_edges": {"staff": 0, "user": 100000, "anonymous": 1000}, + }, + "unauthenticated_api_hosts": ["local.network"], + } + ) + + def test_graph_endpoint_no_authentication_for_vpn_users(api_client, requests_mock): graph_query = "stats" url = reverse("api-1-graph", url_args={"graph_query": graph_query}) @@ -29,13 +42,25 @@ def test_graph_endpoint_no_authentication_for_vpn_users(api_client, requests_moc headers={"Content-Type": "application/json"}, ) check_http_get_response( - api_client, url, status_code=200, server_name=SWH_WEB_INTERNAL_SERVER_NAMES[0] + api_client, + url, + status_code=200, + server_name="localhost", + ) + check_http_get_response( + api_client, + url, + status_code=200, + server_name="local.network", ) def test_graph_endpoint_needs_authentication(api_client): url = reverse("api-1-graph", url_args={"graph_query": "stats"}) check_http_get_response(api_client, url, status_code=401) + check_http_get_response( + api_client, url, status_code=401, server_name="public.network" + ) def _authenticate_graph_user(api_client, keycloak_oidc, is_staff=False): @@ -365,9 +390,7 @@ def test_graph_endpoint_max_edges_settings(api_client, keycloak_oidc, requests_m # currently unauthenticated user can only use the graph endpoint from # Software Heritage VPN - check_http_get_response( - api_client, url, status_code=200, server_name=SWH_WEB_INTERNAL_SERVER_NAMES[0] - ) + check_http_get_response(api_client, url, status_code=200, server_name="localhost") assert ( f"max_edges={graph_config['max_edges']['anonymous']}" in requests_mock.request_history[0].url diff --git a/swh/web/api/views/graph.py b/swh/web/api/views/graph.py index 5ab66aa3a..1660b7cb8 100644 --- a/swh/web/api/views/graph.py +++ b/swh/web/api/views/graph.py @@ -9,7 +9,9 @@ from urllib.parse import unquote, urlparse, urlunparse import requests +from django.conf import settings from django.http import QueryDict +from django.http.request import split_domain_port, validate_host from django.http.response import StreamingHttpResponse from rest_framework.decorators import renderer_classes from rest_framework.renderers import JSONRenderer @@ -22,7 +24,7 @@ from swh.model.swhids import ExtendedObjectType, ExtendedSWHID from swh.web.api.apidoc import api_doc from swh.web.api.apiurls import api_route from swh.web.api.renderers import PlainTextRenderer -from swh.web.config import SWH_WEB_INTERNAL_SERVER_NAMES, get_config +from swh.web.config import get_config from swh.web.utils import archive, strtobool API_GRAPH_PERM = "swh.web.api.graph" @@ -128,7 +130,11 @@ def api_graph(request: Request) -> None: def api_graph_proxy( request: Request, graph_query: str ) -> Union[Response, StreamingHttpResponse]: - if request.get_host() not in SWH_WEB_INTERNAL_SERVER_NAMES: + domain, port = split_domain_port(request.get_host()) + if not (domain and validate_host(domain, settings.UNAUTHENTICATED_HOSTS)): + # request does not come from an identified allowed host/network (see + # 'unauthenticated_api_hosts' config entry), check for proper auth and + # permission if not bool(request.user and request.user.is_authenticated): return Response("Authentication credentials were not provided.", status=401) if not request.user.has_perm(API_GRAPH_PERM): diff --git a/swh/web/config.py b/swh/web/config.py index 6b72f27ee..8c92dab9f 100644 --- a/swh/web/config.py +++ b/swh/web/config.py @@ -72,6 +72,7 @@ DEFAULT_CONFIG = { "timeout": 10, }, ), + "unauthenticated_api_hosts": ("list", []), "search_config": ( "dict", { diff --git a/swh/web/settings/common.py b/swh/web/settings/common.py index 2deae761b..bfbdba43e 100644 --- a/swh/web/settings/common.py +++ b/swh/web/settings/common.py @@ -44,6 +44,9 @@ DEBUG = swh_web_config["debug"] DEBUG_PROPAGATE_EXCEPTIONS = swh_web_config["debug"] ALLOWED_HOSTS = ["127.0.0.1", "localhost"] + swh_web_config["allowed_hosts"] +UNAUTHENTICATED_HOSTS = ["127.0.0.1", "localhost"] + swh_web_config[ + "unauthenticated_api_hosts" +] # Applications definition -- GitLab