From a20e6735d9ab49301026d93dee29a1a80d134a4e Mon Sep 17 00:00:00 2001
From: David Douard <david.douard@sdfa3.org>
Date: Thu, 12 Oct 2023 13:39:26 +0200
Subject: [PATCH] journal_checker: Check the 'check_references' flag is not set

---
 swh/scrubber/journal_checker.py          |  6 ++++
 swh/scrubber/tests/test_journal_kafka.py | 43 ++++++++++++++++++++++--
 2 files changed, 46 insertions(+), 3 deletions(-)

diff --git a/swh/scrubber/journal_checker.py b/swh/scrubber/journal_checker.py
index 1132864..5ae8de4 100644
--- a/swh/scrubber/journal_checker.py
+++ b/swh/scrubber/journal_checker.py
@@ -49,6 +49,12 @@ class JournalChecker:
         self.db = db
         self.config_id = config_id
 
+        if self.config.check_references:
+            raise ValueError(
+                "The journal checcker cannot check for references, please set "
+                "the 'check_references' to False in the config entry %s.",
+                self.config_id,
+            )
         self.journal_client_config = journal.copy()
         if "object_types" in self.journal_client_config:
             raise ValueError(
diff --git a/swh/scrubber/tests/test_journal_kafka.py b/swh/scrubber/tests/test_journal_kafka.py
index 7628d06..d32363b 100644
--- a/swh/scrubber/tests/test_journal_kafka.py
+++ b/swh/scrubber/tests/test_journal_kafka.py
@@ -67,7 +67,11 @@ def test_no_corruption(
     for object_type in ("directory", "revision", "release", "snapshot"):
         journal_cfg["group_id"] = gid + object_type
         config_id = scrubber_db.config_add(
-            f"cfg_{object_type}", datastore, getattr(ObjectType, object_type.upper()), 1
+            name=f"cfg_{object_type}",
+            datastore=datastore,
+            object_type=getattr(ObjectType, object_type.upper()),
+            nb_partitions=1,
+            check_references=False,
         )
         jc = JournalChecker(
             db=scrubber_db,
@@ -90,7 +94,11 @@ def test_corrupt_snapshot(
     corrupt_idx,
 ):
     config_id = scrubber_db.config_add(
-        "cfg_snapshot", datastore, ObjectType.SNAPSHOT, 1
+        name="cfg_snapshot",
+        datastore=datastore,
+        object_type=ObjectType.SNAPSHOT,
+        nb_partitions=1,
+        check_references=False,
     )
     snapshots = list(swh_model_data.SNAPSHOTS)
     snapshots[corrupt_idx] = attr.evolve(snapshots[corrupt_idx], id=b"\x00" * 20)
@@ -131,7 +139,11 @@ def test_corrupt_snapshots(
     datastore,
 ):
     config_id = scrubber_db.config_add(
-        "cfg_snapshot", datastore, ObjectType.SNAPSHOT, 1
+        name="cfg_snapshot",
+        datastore=datastore,
+        object_type=ObjectType.SNAPSHOT,
+        nb_partitions=1,
+        check_references=False,
     )
     snapshots = list(swh_model_data.SNAPSHOTS)
     for i in (0, 1):
@@ -155,3 +167,28 @@ def test_corrupt_snapshots(
             "swh:1:snp:0101010101010101010101010101010101010101",
         ]
     }
+
+
+def test_check_references_raises(
+    scrubber_db,
+    kafka_server,
+    kafka_prefix,
+    kafka_consumer_group,
+    datastore,
+):
+    config_id = scrubber_db.config_add(
+        name="cfg_snapshot",
+        datastore=datastore,
+        object_type=ObjectType.SNAPSHOT,
+        nb_partitions=1,
+        check_references=True,
+    )
+    journal_config = journal_client_config(
+        kafka_server, kafka_prefix, kafka_consumer_group
+    )
+    with pytest.raises(ValueError):
+        JournalChecker(
+            db=scrubber_db,
+            config_id=config_id,
+            journal=journal_config,
+        )
-- 
GitLab