From eb077d9cb6c4615bf8550ac84a747dd3565acd76 Mon Sep 17 00:00:00 2001
From: Antoine Lambert <antoine.lambert@inria.fr>
Date: Mon, 12 Oct 2020 16:23:22 +0200
Subject: [PATCH] common/archive: Fix empty content handling in
 lookup_content_raw

---
 swh/web/common/archive.py            |  2 +-
 swh/web/tests/common/test_archive.py |  7 +++++++
 swh/web/tests/data.py                |  3 +++
 swh/web/tests/strategies.py          | 21 +++++++++++++++++++--
 4 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/swh/web/common/archive.py b/swh/web/common/archive.py
index e0ef3a520..1824f1de7 100644
--- a/swh/web/common/archive.py
+++ b/swh/web/common/archive.py
@@ -866,7 +866,7 @@ def lookup_content_raw(q: str) -> Dict[str, Any]:
     c = lookup_content(q)
     content_sha1_bytes = hashutil.hash_to_bytes(c["checksums"]["sha1"])
     content_data = storage.content_get_data(content_sha1_bytes)
-    if not content_data:
+    if content_data is None:
         algo, hash_ = query.parse_hash(q)
         raise NotFoundExc(
             f"Bytes of content with {algo} checksum equals "
diff --git a/swh/web/tests/common/test_archive.py b/swh/web/tests/common/test_archive.py
index 050f4cbf2..4b06f5b72 100644
--- a/swh/web/tests/common/test_archive.py
+++ b/swh/web/tests/common/test_archive.py
@@ -26,6 +26,7 @@ from swh.web.tests.strategies import (
     contents,
     contents_with_ctags,
     directory,
+    empty_content,
     empty_directory,
     invalid_sha1,
     new_origin,
@@ -627,6 +628,12 @@ def test_lookup_content_raw(archive_data, content):
     assert actual_content == expected_content
 
 
+@given(empty_content())
+def test_lookup_empty_content_raw(archive_data, empty_content):
+    content_raw = archive.lookup_content_raw(f"sha1_git:{empty_content['sha1_git']}")
+    assert content_raw["data"] == b""
+
+
 def test_lookup_content_not_found():
     unknown_content_ = random_content()
 
diff --git a/swh/web/tests/data.py b/swh/web/tests/data.py
index 1d7433547..a52225bd3 100644
--- a/swh/web/tests/data.py
+++ b/swh/web/tests/data.py
@@ -287,6 +287,9 @@ def _init_tests_data():
     # Add the empty directory to the test archive
     storage.directory_add([Directory(entries=())])
 
+    # Add empty content to the test archive
+    storage.content_add([Content.from_data(data=b"")])
+
     # Return tests data
     return {
         "search": search,
diff --git a/swh/web/tests/strategies.py b/swh/web/tests/strategies.py
index 8a8499ed9..e132111e3 100644
--- a/swh/web/tests/strategies.py
+++ b/swh/web/tests/strategies.py
@@ -20,11 +20,17 @@ from hypothesis.strategies import (
     text,
 )
 
-from swh.model.hashutil import hash_to_bytes, hash_to_hex
+from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex
 from swh.model.hypothesis_strategies import origins as new_origin_strategy
 from swh.model.hypothesis_strategies import snapshots as new_snapshot
 from swh.model.identifiers import directory_identifier
-from swh.model.model import Person, Revision, RevisionType, TimestampWithTimezone
+from swh.model.model import (
+    Content,
+    Person,
+    Revision,
+    RevisionType,
+    TimestampWithTimezone,
+)
 from swh.storage.algos.revisions_walker import get_revisions_walker
 from swh.storage.algos.snapshot import snapshot_get_latest
 from swh.web.common.utils import browsers_supported_image_mimes
@@ -93,6 +99,17 @@ def contents():
     return lists(content(), min_size=2, max_size=8)
 
 
+def empty_content():
+    """
+    Hypothesis strategy returning the empty content ingested
+    into the test archive.
+    """
+    empty_content = Content.from_data(data=b"").to_dict()
+    for algo in DEFAULT_ALGORITHMS:
+        empty_content[algo] = hash_to_hex(empty_content[algo])
+    return just(empty_content)
+
+
 def content_text():
     """
     Hypothesis strategy returning random textual contents ingested
-- 
GitLab