From 97c8291f0bb3e69e6ec08aa18942c42f1eef2963 Mon Sep 17 00:00:00 2001
From: Antoine Lambert <antoine.lambert@inria.fr>
Date: Mon, 4 May 2020 16:21:55 +0200
Subject: [PATCH] templates/content-display: Fix view when an image cannot be
 rendered

Previously, a suprising "Error 200" was displayed in place of a message indicating
that the file cannot be rendered.

Also simplify some code in swh.web.tests.data module.

Closes T2389
---
 swh/web/browse/utils.py                       | 18 +----
 swh/web/common/utils.py                       | 14 ++++
 .../templates/includes/content-display.html   |  5 +-
 swh/web/tests/browse/views/test_content.py    | 21 ++++++
 swh/web/tests/data.py                         | 69 +++++++++++++------
 swh/web/tests/strategies.py                   | 14 +++-
 6 files changed, 99 insertions(+), 42 deletions(-)

diff --git a/swh/web/browse/utils.py b/swh/web/browse/utils.py
index dac07c634..ff6d72caa 100644
--- a/swh/web/browse/utils.py
+++ b/swh/web/browse/utils.py
@@ -21,6 +21,7 @@ from swh.web.common.utils import (
     reverse,
     format_utc_iso_date,
     rst_to_html,
+    browsers_supported_image_mimes,
 )
 from swh.web.config import get_config
 
@@ -241,19 +242,6 @@ def request_content(
     return content_data
 
 
-_browsers_supported_image_mimes = set(
-    [
-        "image/gif",
-        "image/png",
-        "image/jpeg",
-        "image/bmp",
-        "image/webp",
-        "image/svg",
-        "image/svg+xml",
-    ]
-)
-
-
 def prepare_content_for_display(content_data, mime_type, path):
     """Function that prepares a content for HTML display.
 
@@ -288,10 +276,8 @@ def prepare_content_for_display(content_data, mime_type, path):
         mime_type = mime_type.replace("application/", "text/")
 
     if mime_type.startswith("image/"):
-        if mime_type in _browsers_supported_image_mimes:
+        if mime_type in browsers_supported_image_mimes:
             content_data = base64.b64encode(content_data).decode("ascii")
-        else:
-            content_data = None
 
     if mime_type.startswith("image/svg"):
         mime_type = "image/svg+xml"
diff --git a/swh/web/common/utils.py b/swh/web/common/utils.py
index 0d119c7e7..65fe3b8a8 100644
--- a/swh/web/common/utils.py
+++ b/swh/web/common/utils.py
@@ -241,6 +241,19 @@ def get_client_ip(request):
     return ip
 
 
+browsers_supported_image_mimes = set(
+    [
+        "image/gif",
+        "image/png",
+        "image/jpeg",
+        "image/bmp",
+        "image/webp",
+        "image/svg",
+        "image/svg+xml",
+    ]
+)
+
+
 def context_processor(request):
     """
     Django context processor used to inject variables
@@ -260,6 +273,7 @@ def context_processor(request):
         "available_languages": None,
         "swh_client_config": config["client_config"],
         "oidc_enabled": bool(config["keycloak"]["server_url"]),
+        "browsers_supported_image_mimes": browsers_supported_image_mimes,
     }
 
 
diff --git a/swh/web/templates/includes/content-display.html b/swh/web/templates/includes/content-display.html
index 40eed9f20..ed815f758 100644
--- a/swh/web/templates/includes/content-display.html
+++ b/swh/web/templates/includes/content-display.html
@@ -28,7 +28,7 @@ See top-level LICENSE file for more information
         <div class="highlightjs">
           <pre><code class="{{ language }}">{{ content }}</code></pre>
         </div>
-      {% elif "image/" in mimetype and content %}
+      {% elif mimetype in browsers_supported_image_mimes and content %}
         <img src="data:{{ mimetype }};base64,{{ content }}"/>
       {% elif "application/pdf" == mimetype %}
         <div class="text-center">
@@ -40,8 +40,7 @@ See top-level LICENSE file for more information
           <canvas id="pdf-canvas"></canvas>
         </div>
       {% elif content %}
-        Content with mime type {{ mimetype }} and encoding
-        {{ encoding }} cannot be displayed.
+        Content with mime type {{ mimetype }} and encoding {{ encoding }} cannot be displayed.
       {% else %}
         {% include "includes/http-error.html" %}
       {% endif %}
diff --git a/swh/web/tests/browse/views/test_content.py b/swh/web/tests/browse/views/test_content.py
index 27f237ec5..b92905890 100644
--- a/swh/web/tests/browse/views/test_content.py
+++ b/swh/web/tests/browse/views/test_content.py
@@ -27,6 +27,7 @@ from swh.web.tests.strategies import (
     content_text_non_utf8,
     content_text_no_highlight,
     content_image_type,
+    content_unsupported_image_type_rendering,
     content_text,
     invalid_sha1,
     unknown_content,
@@ -139,6 +140,26 @@ def test_content_view_image(client, archive_data, content):
     assert_contains(resp, url_raw)
 
 
+@given(content_unsupported_image_type_rendering())
+def test_content_view_image_no_rendering(client, archive_data, content):
+    url = reverse("browse-content", url_args={"query_string": content["sha1"]})
+
+    resp = client.get(url)
+
+    mimetype = content["mimetype"]
+    encoding = content["encoding"]
+
+    assert resp.status_code == 200
+    assert_template_used(resp, "browse/content.html")
+    assert_contains(
+        resp,
+        (
+            f"Content with mime type {mimetype} and encoding {encoding} "
+            "cannot be displayed."
+        ),
+    )
+
+
 @given(content_text())
 def test_content_view_text_with_path(client, archive_data, content):
     path = content["path"]
diff --git a/swh/web/tests/data.py b/swh/web/tests/data.py
index baa1129f5..b2f7c5fba 100644
--- a/swh/web/tests/data.py
+++ b/swh/web/tests/data.py
@@ -12,6 +12,7 @@ from swh.indexer.fossology_license import FossologyLicenseIndexer
 from swh.indexer.mimetype import MimetypeIndexer
 from swh.indexer.ctags import CtagsIndexer
 from swh.indexer.storage import get_indexer_storage
+from swh.model.model import Content
 from swh.model.hashutil import hash_to_hex, hash_to_bytes, DEFAULT_ALGORITHMS
 from swh.model.model import Directory, Origin
 from swh.loader.git.from_disk import GitLoaderFromArchive
@@ -137,6 +138,24 @@ _TEST_ORIGINS = [
 _contents = {}
 
 
+def _add_extra_contents(storage, contents):
+    pbm_image_data = b"""P1
+# PBM example
+24 7
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 1 1 1 1 0 0 1 1 1 1 0 0 1 1 1 1 0 0 1 1 1 1 0
+0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 1 0
+0 1 1 1 0 0 0 1 1 1 0 0 0 1 1 1 0 0 0 1 1 1 1 0
+0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0
+0 1 0 0 0 0 0 1 1 1 1 0 0 1 1 1 1 0 0 1 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0"""
+
+    # add file with mimetype image/x-portable-bitmap in the archive content
+    pbm_content = Content.from_data(pbm_image_data)
+    storage.content_add([pbm_content])
+    contents.add(pbm_content.sha1)
+
+
 # Tests data initialization
 def _init_tests_data():
     # To hold reference to the memory storage
@@ -209,38 +228,44 @@ def _init_tests_data():
             dir_id = rev["directory"]
             directories.add(hash_to_hex(dir_id))
             for entry in dir_iterator(storage, dir_id):
-                content_path[entry["sha1"]] = "/".join(
-                    [hash_to_hex(dir_id), entry["path"].decode("utf-8")]
-                )
                 if entry["type"] == "file":
                     contents.add(entry["sha1"])
+                    content_path[entry["sha1"]] = "/".join(
+                        [hash_to_hex(dir_id), entry["path"].decode("utf-8")]
+                    )
                 elif entry["type"] == "dir":
                     directories.add(hash_to_hex(entry["target"]))
 
+    _add_extra_contents(storage, contents)
+
     # Get all checksums for each content
     result = storage.content_get_metadata(contents)
     contents = []
     for sha1, contents_metadata in result.items():
-        for content_metadata in contents_metadata:
-            contents.append(
-                {
-                    algo: hash_to_hex(content_metadata[algo])
-                    for algo in DEFAULT_ALGORITHMS
-                }
-            )
+        sha1 = contents_metadata[0]["sha1"]
+        content_metadata = {
+            algo: hash_to_hex(contents_metadata[0][algo]) for algo in DEFAULT_ALGORITHMS
+        }
+
+        path = ""
+        if sha1 in content_path:
             path = content_path[sha1]
-            cnt = next(storage.content_get([sha1]))
-            mimetype, encoding = get_mimetype_and_encoding_for_content(cnt["data"])
-            _, _, cnt["data"] = _re_encode_content(mimetype, encoding, cnt["data"])
-            content_display_data = prepare_content_for_display(
-                cnt["data"], mimetype, path
-            )
-            contents[-1]["path"] = path
-            contents[-1]["mimetype"] = mimetype
-            contents[-1]["encoding"] = encoding
-            contents[-1]["hljs_language"] = content_display_data["language"]
-            contents[-1]["data"] = content_display_data["content_data"]
-            _contents[contents[-1]["sha1"]] = contents[-1]
+        cnt = next(storage.content_get([sha1]))
+        mimetype, encoding = get_mimetype_and_encoding_for_content(cnt["data"])
+        _, _, cnt["data"] = _re_encode_content(mimetype, encoding, cnt["data"])
+        content_display_data = prepare_content_for_display(cnt["data"], mimetype, path)
+
+        content_metadata.update(
+            {
+                "path": path,
+                "mimetype": mimetype,
+                "encoding": encoding,
+                "hljs_language": content_display_data["language"],
+                "data": content_display_data["content_data"],
+            }
+        )
+        _contents[hash_to_hex(sha1)] = content_metadata
+        contents.append(content_metadata)
 
     # Create indexer storage instance that will be shared by indexers
     idx_storage = get_indexer_storage("memory", {})
diff --git a/swh/web/tests/strategies.py b/swh/web/tests/strategies.py
index 13a4ca7fa..6bb49e589 100644
--- a/swh/web/tests/strategies.py
+++ b/swh/web/tests/strategies.py
@@ -29,6 +29,7 @@ from swh.model.hypothesis_strategies import (
     origins as new_origin_strategy,
     snapshots as new_snapshot,
 )
+from swh.web.common.utils import browsers_supported_image_mimes
 from swh.web.tests.data import get_tests_data
 
 # Module dedicated to the generation of input data for tests through
@@ -129,7 +130,18 @@ def content_image_type():
     Hypothesis strategy returning random image contents ingested
     into the test archive.
     """
-    return content().filter(lambda c: c["mimetype"].startswith("image/"))
+    return content().filter(lambda c: c["mimetype"] in browsers_supported_image_mimes)
+
+
+def content_unsupported_image_type_rendering():
+    """
+    Hypothesis strategy returning random image contents ingested
+    into the test archive that can not be rendered by browsers.
+    """
+    return content().filter(
+        lambda c: c["mimetype"].startswith("image/")
+        and c["mimetype"] not in browsers_supported_image_mimes
+    )
 
 
 def content_utf8_detected_as_binary():
-- 
GitLab