From 6d8ffa24cc236fe6028555e3bd8e2ebd7ee0069f Mon Sep 17 00:00:00 2001
From: Antoine Lambert <anlambert@softwareheritage.org>
Date: Fri, 12 Apr 2024 11:35:48 +0200
Subject: [PATCH] settings: Ensure client IP is correctly extracted from HTTP
 header

The X-Original-Forwarded-For header value has the following format:

  client[, proxy1, proxy2]

So ensure to handle all cases when extracting client IP from it to
avoid error when django-ratelimit processes a content view request.
---
 swh/web/browse/tests/views/test_content.py | 19 +++++++++++++++++++
 swh/web/settings/common.py                 |  4 ++++
 swh/web/settings/production.py             |  4 ----
 swh/web/tests/helpers.py                   |  2 ++
 4 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/swh/web/browse/tests/views/test_content.py b/swh/web/browse/tests/views/test_content.py
index 690a0b355..476249d7b 100644
--- a/swh/web/browse/tests/views/test_content.py
+++ b/swh/web/browse/tests/views/test_content.py
@@ -1292,6 +1292,25 @@ def test_browse_content_rate_limit(client, content_text, view_name):
     check_http_get_response(client, url, status_code=429)
 
 
+@override_settings(RATELIMIT_ENABLE=True)
+@pytest.mark.parametrize(
+    "forwarded_for",
+    ["1234:1234:1234::123", "123.123.123.123", "1234:1234:1234::123, 123.123.123.123"],
+)
+def test_browse_content_rate_limit_forwarded_for(client, content_text, forwarded_for):
+    url = reverse(
+        "browse-content",
+        url_args={"query_string": f"sha1_git:{content_text['sha1_git']}"},
+    )
+
+    check_http_get_response(
+        client, url, status_code=200, HTTP_X_ORIGINAL_FORWARDED_FOR=forwarded_for
+    )
+    check_http_get_response(
+        client, url, status_code=429, HTTP_X_ORIGINAL_FORWARDED_FOR=forwarded_for
+    )
+
+
 def test_browse_content_failed_encoding_detection(
     client, content_text_non_utf8, mocker
 ):
diff --git a/swh/web/settings/common.py b/swh/web/settings/common.py
index c06153d0f..61023c7d8 100644
--- a/swh/web/settings/common.py
+++ b/swh/web/settings/common.py
@@ -413,3 +413,7 @@ DEFAULT_AUTO_FIELD = "django.db.models.AutoField"
 
 RATELIMIT_USE_CACHE = "rate-limit"
 RATELIMIT_ENABLE = False
+# get real client IP address when behind production reverse proxy
+RATELIMIT_IP_META_KEY = lambda request: request.META.get(  # noqa
+    "HTTP_X_ORIGINAL_FORWARDED_FOR", request.META["REMOTE_ADDR"]
+).split(",", maxsplit=1)[0]
diff --git a/swh/web/settings/production.py b/swh/web/settings/production.py
index b1b9d0904..3efaee6a6 100644
--- a/swh/web/settings/production.py
+++ b/swh/web/settings/production.py
@@ -87,7 +87,3 @@ if SECRET_KEY == DEFAULT_CONFIG["secret_key"][-1]:
 
 browse_content_rate_limit = swh_web_config.get("browse_content_rate_limit", {})
 RATELIMIT_ENABLE = browse_content_rate_limit.get("enabled", True)
-# get real client IP address when behind production reverse proxy
-RATELIMIT_IP_META_KEY = lambda request: request.META.get(  # noqa
-    "HTTP_X_ORIGINAL_FORWARDED_FOR", request.META["REMOTE_ADDR"]
-)
diff --git a/swh/web/tests/helpers.py b/swh/web/tests/helpers.py
index 510aefde5..690fcbd55 100644
--- a/swh/web/tests/helpers.py
+++ b/swh/web/tests/helpers.py
@@ -53,6 +53,7 @@ def check_http_get_response(
     content_type: str = "*/*",
     http_origin: Optional[str] = None,
     server_name: Optional[str] = None,
+    **headers,
 ) -> HttpResponseBase:
     """Helper function to check HTTP response for a GET request.
 
@@ -72,6 +73,7 @@ def check_http_get_response(
             HTTP_ACCEPT=content_type,
             HTTP_ORIGIN=http_origin,
             SERVER_NAME=server_name if server_name else "testserver",
+            **headers,
         ),
         status_code=status_code,
         content_type=content_type,
-- 
GitLab