Skip to content
Snippets Groups Projects
Commit 84d685ca authored by Antoine Lambert's avatar Antoine Lambert
Browse files

browse/utils: Fix error when charset_normalizer.detect failed

The charset_normalizer.detect function can return a dict filled with
None values when it failed to detect an encoding so ensure to add a
None check to avoid runtime error when a content encoding cannot be
detected.
parent 002b56dc
No related branches found
No related tags found
No related merge requests found
......@@ -1290,3 +1290,20 @@ def test_browse_content_rate_limit(client, content_text, view_name):
check_http_get_response(client, url, status_code=200)
check_http_get_response(client, url, status_code=429)
def test_browse_content_failed_encoding_detection(
client, content_text_non_utf8, mocker
):
# simulate charset_normalizer.detect failure
detect = mocker.patch("charset_normalizer.detect")
detect.return_value = {"confidence": None, "encoding": None, "language": ""}
url = reverse(
"browse-content",
url_args={"query_string": f"sha1_git:{content_text_non_utf8['sha1_git']}"},
)
# content should be rendered even if encoding detection failed
check_http_get_response(client, url, status_code=200)
detect.assert_called()
......@@ -109,7 +109,7 @@ def re_encode_content(
if mimetype.startswith("text/") and encoding not in ("us-ascii", "utf-8"):
# first check if charset_normalizer detects an encoding with confidence
result = charset_normalizer.detect(content_data)
if cast(float, result["confidence"]) >= 0.9:
if result.get("confidence") and cast(float, result["confidence"]) >= 0.9:
encoding = cast(str, result["encoding"])
content_data = content_data.decode(encoding, "replace").encode("utf-8")
elif encoding == "unknown-8bit":
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment