From 9edc14f552856d214e4fd399fb7eb9566c1d81ed Mon Sep 17 00:00:00 2001
From: Antoine Lambert <anlambert@softwareheritage.org>
Date: Tue, 2 Apr 2024 14:03:29 +0200
Subject: [PATCH] browse: Remove HTML links in NotFoundExc message

Those are no longer rendered after previous commits preventing XSS
so prefer to simply display URLs instead.
---
 cypress/e2e/errors.cy.js          | 24 +++++++++++++-----------
 swh/web/browse/views/content.py   |  9 +++++----
 swh/web/browse/views/directory.py | 10 ++++++----
 swh/web/browse/views/release.py   |  8 +++++---
 swh/web/browse/views/revision.py  |  8 +++++---
 5 files changed, 34 insertions(+), 25 deletions(-)

diff --git a/cypress/e2e/errors.cy.js b/cypress/e2e/errors.cy.js
index 4708eba7f..c20648ee4 100644
--- a/cypress/e2e/errors.cy.js
+++ b/cypress/e2e/errors.cy.js
@@ -107,17 +107,19 @@ describe('Test Errors', function() {
         with correct content hash`, function() {
       const url = this.Urls.browse_content(`sha1_git:${origin.content[0].sha1git}`) +
                   `?origin_url=${this.unarchivedRepo.url}`;
-      urlShouldShowError(url, {
-        code: '404',
-        msg: 'The Software Heritage archive has a content ' +
-            'with the hash you provided but the origin ' +
-            'mentioned in your request appears broken: ' +
-            this.unarchivedRepo.url + '. ' +
-            'Please check the URL and try again.\n\n' +
-            'Nevertheless, you can still browse the content ' +
-            'without origin information: ' +
-            '/browse/content/sha1_git:' +
-            origin.content[0].sha1git + '/'
+      cy.visit('/').window().then(win => {
+        urlShouldShowError(url, {
+          code: '404',
+          msg: 'The Software Heritage archive has a content ' +
+              'with the hash you provided but the origin ' +
+              'mentioned in your request appears broken: ' +
+              this.unarchivedRepo.url + '. ' +
+              'Please check the URL and try again.\n\n' +
+              'Nevertheless, you can still browse the content ' +
+              'without origin information: ' +
+              `${win.location.protocol}//${win.location.host}/browse/content/sha1_git:` +
+              origin.content[0].sha1git + '/'
+        });
       });
     });
   });
diff --git a/swh/web/browse/views/content.py b/swh/web/browse/views/content.py
index 9488cf8bf..78a734cb8 100644
--- a/swh/web/browse/views/content.py
+++ b/swh/web/browse/views/content.py
@@ -20,7 +20,6 @@ from swh.web.browse.browseurls import browse_route
 from swh.web.browse.snapshot_context import get_snapshot_context
 from swh.web.browse.utils import (
     content_display_max_size,
-    gen_link,
     prepare_content_for_display,
     request_content,
 )
@@ -302,7 +301,9 @@ def content_display(
         except NotFoundExc as e:
             if str(e).startswith("Origin") and origin_url is not None:
                 raw_cnt_url = reverse(
-                    "browse-content", url_args={"query_string": query_string}
+                    "browse-content",
+                    url_args={"query_string": query_string},
+                    request=request,
                 )
                 error_message = format_html(
                     "The Software Heritage archive has a content "
@@ -311,8 +312,8 @@ def content_display(
                     "Please check the URL and try again.\n\n"
                     "Nevertheless, you can still browse the content "
                     "without origin information: {}",
-                    gen_link(origin_url),
-                    gen_link(raw_cnt_url),
+                    origin_url,
+                    raw_cnt_url,
                 )
                 raise NotFoundExc(error_message)
             else:
diff --git a/swh/web/browse/views/directory.py b/swh/web/browse/views/directory.py
index bc28ca954..ecadd027e 100644
--- a/swh/web/browse/views/directory.py
+++ b/swh/web/browse/views/directory.py
@@ -13,7 +13,7 @@ from django.utils.html import format_html
 from swh.model.swhids import ObjectType
 from swh.web.browse.browseurls import browse_route
 from swh.web.browse.snapshot_context import get_snapshot_context
-from swh.web.browse.utils import gen_link, get_directory_entries, get_readme_to_display
+from swh.web.browse.utils import get_directory_entries, get_readme_to_display
 from swh.web.utils import archive, gen_path_info, reverse, swh_object_icons
 from swh.web.utils.exc import (
     NotFoundExc,
@@ -76,7 +76,9 @@ def _directory_browse(
         except NotFoundExc as e:
             if str(e).startswith("Origin") and origin_url is not None:
                 raw_dir_url = reverse(
-                    "browse-directory", url_args={"sha1_git": dir_sha1_git}
+                    "browse-directory",
+                    url_args={"sha1_git": dir_sha1_git},
+                    request=request,
                 )
                 error_message = format_html(
                     "The Software Heritage archive has a directory "
@@ -85,8 +87,8 @@ def _directory_browse(
                     "Please check the URL and try again.\n\n"
                     "Nevertheless, you can still browse the directory "
                     "without origin information: {}",
-                    gen_link(origin_url),
-                    gen_link(raw_dir_url),
+                    origin_url,
+                    raw_dir_url,
                 )
                 raise NotFoundExc(error_message)
             else:
diff --git a/swh/web/browse/views/release.py b/swh/web/browse/views/release.py
index 47a659512..653d56f80 100644
--- a/swh/web/browse/views/release.py
+++ b/swh/web/browse/views/release.py
@@ -60,7 +60,9 @@ def release_browse(request: HttpRequest, sha1_git: str) -> HttpResponse:
                 visit_type=request.GET.get("visit_type"),
             )
         except NotFoundExc as e:
-            raw_rel_url = reverse("browse-release", url_args={"sha1_git": sha1_git})
+            raw_rel_url = reverse(
+                "browse-release", url_args={"sha1_git": sha1_git}, request=request
+            )
             error_message = format_html(
                 "The Software Heritage archive has a release "
                 "with the hash you provided but the origin "
@@ -68,8 +70,8 @@ def release_browse(request: HttpRequest, sha1_git: str) -> HttpResponse:
                 "Please check the URL and try again.\n\n"
                 "Nevertheless, you can still browse the release "
                 "without origin information: {}",
-                gen_link(origin_url),
-                gen_link(raw_rel_url),
+                origin_url,
+                raw_rel_url,
             )
             if str(e).startswith("Origin"):
                 raise NotFoundExc(error_message)
diff --git a/swh/web/browse/views/revision.py b/swh/web/browse/views/revision.py
index fc85e4a8a..dddb201df 100644
--- a/swh/web/browse/views/revision.py
+++ b/swh/web/browse/views/revision.py
@@ -352,7 +352,9 @@ def revision_browse(request: HttpRequest, sha1_git: str) -> HttpResponse:
                 visit_type=request.GET.get("visit_type"),
             )
         except NotFoundExc as e:
-            raw_rev_url = reverse("browse-revision", url_args={"sha1_git": sha1_git})
+            raw_rev_url = reverse(
+                "browse-revision", url_args={"sha1_git": sha1_git}, request=request
+            )
             error_message = format_html(
                 "The Software Heritage archive has a revision "
                 "with the hash you provided but the origin "
@@ -360,8 +362,8 @@ def revision_browse(request: HttpRequest, sha1_git: str) -> HttpResponse:
                 "Please check the URL and try again.\n\n"
                 "Nevertheless, you can still browse the revision "
                 "without origin information: {}",
-                gen_link(origin_url),
-                gen_link(raw_rev_url),
+                origin_url,
+                raw_rev_url,
             )
             if str(e).startswith("Origin"):
                 raise NotFoundExc(error_message)
-- 
GitLab