From 1dcab2cedca016e611cbd693ffe30e9f4450c0a1 Mon Sep 17 00:00:00 2001
From: Nicolas Dandrimont <nicolas@dandrimont.eu>
Date: Sun, 31 Mar 2024 03:30:12 +0200
Subject: [PATCH] exc: be more careful when not escaping error descriptions

This avoids an XSS avenue within errors
---
 swh/web/browse/tests/views/test_directory.py | 14 ++++++++++++++
 swh/web/browse/utils.py                      | 14 ++++++++++----
 swh/web/utils/exc.py                         | 11 +++++------
 3 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/swh/web/browse/tests/views/test_directory.py b/swh/web/browse/tests/views/test_directory.py
index 35bbfcfba..c19d2203c 100644
--- a/swh/web/browse/tests/views/test_directory.py
+++ b/swh/web/browse/tests/views/test_directory.py
@@ -685,3 +685,17 @@ def test_directory_view_missing_readme_bytes(client, archive_data, person, date)
         template_used="browse-directory.html",
     )
     assert_contains(resp, "Readme bytes are not available")
+
+
+def test_directory_notfound_xss(client):
+    dir_url = reverse(
+        "browse-origin-directory",
+        query_params={"origin_url": "<script>alert(1)</script>"},
+    )
+
+    resp = check_html_get_response(
+        client, dir_url, status_code=404, template_used="error.html"
+    )
+
+    assert_not_contains(resp, "<script>alert(1)</script>", status_code=404)
+    assert_contains(resp, "&lt;script&gt;", status_code=404)
diff --git a/swh/web/browse/utils.py b/swh/web/browse/utils.py
index 880f9c808..ce1b62d87 100644
--- a/swh/web/browse/utils.py
+++ b/swh/web/browse/utils.py
@@ -11,7 +11,7 @@ from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
 import chardet
 import magic
 
-from django.utils.html import escape
+from django.utils.html import escape, format_html
 from django.utils.safestring import mark_safe
 
 from swh.web.config import get_config
@@ -291,11 +291,17 @@ def gen_link(
     attrs = " "
     if link_attrs:
         for k, v in link_attrs.items():
-            attrs += '%s="%s" ' % (k, v)
+            if k != escape(k):
+                raise ValueError("weird attribute passed to gen_link")
+            attrs += '%s="%s" ' % (k, escape(v))
     if not link_text:
         link_text = url
-    link = '<a%shref="%s">%s</a>' % (attrs, escape(url), escape(link_text))
-    return mark_safe(link)
+    return format_html(
+        '<a{attrs}href="{url}">{link_text}</a>',
+        attrs=mark_safe(attrs),
+        url=url,
+        link_text=link_text,
+    )
 
 
 def _snapshot_context_query_params(
diff --git a/swh/web/utils/exc.py b/swh/web/utils/exc.py
index 0a18d8a07..df742da4a 100644
--- a/swh/web/utils/exc.py
+++ b/swh/web/utils/exc.py
@@ -12,8 +12,7 @@ import sentry_sdk
 from django.core import exceptions
 from django.http import HttpRequest, HttpResponse
 from django.shortcuts import render
-from django.utils.html import escape
-from django.utils.safestring import mark_safe
+from django.utils.html import conditional_escape, escape
 from rest_framework.exceptions import APIException
 from rest_framework.renderers import JSONRenderer
 
@@ -109,7 +108,7 @@ def _generate_error_page(
             {
                 "error_code": error_code,
                 "error_message": http_status_code_message[error_code],
-                "error_description": mark_safe(error_description),
+                "error_description": error_description,
             },
             status=error_code,
         )
@@ -200,9 +199,9 @@ def handle_view_exception(request: HttpRequest, exc: Exception) -> HttpResponse:
         error_code = 403
     if isinstance(exc, NotFoundExc):
         error_code = 404
-    else:
-        # some NotFoundExc texts have HTML links we want to preserve
-        error_description = escape(error_description)
+
+    # some NotFoundExc texts have HTML links we want to preserve
+    error_description = conditional_escape(error_description)
     resp = _generate_error_page(request, error_code, error_description)
     if get_config()["debug"]:
         resp.traceback = error_description  # type: ignore[attr-defined]
-- 
GitLab