From 5ccf8a80ee633d2b105f57dc6634ff26af130f19 Mon Sep 17 00:00:00 2001
From: Nicolas Dandrimont <nicolas@dandrimont.eu>
Date: Mon, 2 Mar 2020 14:03:58 +0100
Subject: [PATCH] Draw contents from a byte string instead of generating
 arbitrary hashes

This generates more realistic contents and avoids spurious HashCollisions when
generating a set of objects using these hypothesis strategies, at the cost of
slightly worse "boundary checking" (i.e. we won't check contents with a length >
4096 bytes).
---
 swh/model/hypothesis_strategies.py | 44 +++++++++++++++---------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/swh/model/hypothesis_strategies.py b/swh/model/hypothesis_strategies.py
index a5dd4f11..6fe27b79 100644
--- a/swh/model/hypothesis_strategies.py
+++ b/swh/model/hypothesis_strategies.py
@@ -7,11 +7,11 @@ import attr
 import datetime
 
 from hypothesis.strategies import (
-    binary, builds, characters, composite, dictionaries, from_regex,
-    integers, just, lists, none, one_of, sampled_from, text, tuples,
+    binary, builds, characters, composite, dictionaries,
+    from_regex, integers, just, lists, none, one_of,
+    sampled_from, sets, text, tuples,
 )
 
-
 from .from_disk import DentryPerms
 from .model import (
     Person, Timestamp, TimestampWithTimezone, Origin, OriginVisit,
@@ -139,33 +139,33 @@ def contents():
     return one_of(present_contents(), skipped_contents())
 
 
-@composite
-def present_contents(draw):
-    return draw(builds(
-        Content,
-        length=integers(min_value=0, max_value=2**63-1),
-        sha1=sha1(),
-        sha1_git=sha1_git(),
-        sha256=binary(min_size=32, max_size=32),
-        blake2s256=binary(min_size=32, max_size=32),
+def present_contents():
+    return builds(
+        Content.from_data,
+        binary(max_size=4096),
         status=one_of(just('visible'), just('hidden')),
-        data=binary(),
-    ))
+    )
 
 
 @composite
 def skipped_contents(draw):
-    return draw(builds(
-        SkippedContent,
-        length=integers(min_value=-1, max_value=2**63-1),
-        sha1=optional(sha1()),
-        sha1_git=optional(sha1_git()),
-        sha256=optional(binary(min_size=32, max_size=32)),
-        blake2s256=optional(binary(min_size=32, max_size=32)),
-        status=just('absent'),
+    nullify_attrs = draw(
+        sets(sampled_from(['sha1', 'sha1_git', 'sha256', 'blake2s256']))
+    )
+
+    new_attrs = {
+        k: None
+        for k in nullify_attrs
+    }
+
+    ret = draw(builds(
+        SkippedContent.from_data,
+        binary(max_size=4096),
         reason=pgsql_text(),
     ))
 
+    return attr.evolve(ret, **new_attrs)
+
 
 def branch_names():
     return binary(min_size=1)
-- 
GitLab