From a20e6735d9ab49301026d93dee29a1a80d134a4e Mon Sep 17 00:00:00 2001 From: David Douard <david.douard@sdfa3.org> Date: Thu, 12 Oct 2023 13:39:26 +0200 Subject: [PATCH] journal_checker: Check the 'check_references' flag is not set --- swh/scrubber/journal_checker.py | 6 ++++ swh/scrubber/tests/test_journal_kafka.py | 43 ++++++++++++++++++++++-- 2 files changed, 46 insertions(+), 3 deletions(-) diff --git a/swh/scrubber/journal_checker.py b/swh/scrubber/journal_checker.py index 1132864..5ae8de4 100644 --- a/swh/scrubber/journal_checker.py +++ b/swh/scrubber/journal_checker.py @@ -49,6 +49,12 @@ class JournalChecker: self.db = db self.config_id = config_id + if self.config.check_references: + raise ValueError( + "The journal checcker cannot check for references, please set " + "the 'check_references' to False in the config entry %s.", + self.config_id, + ) self.journal_client_config = journal.copy() if "object_types" in self.journal_client_config: raise ValueError( diff --git a/swh/scrubber/tests/test_journal_kafka.py b/swh/scrubber/tests/test_journal_kafka.py index 7628d06..d32363b 100644 --- a/swh/scrubber/tests/test_journal_kafka.py +++ b/swh/scrubber/tests/test_journal_kafka.py @@ -67,7 +67,11 @@ def test_no_corruption( for object_type in ("directory", "revision", "release", "snapshot"): journal_cfg["group_id"] = gid + object_type config_id = scrubber_db.config_add( - f"cfg_{object_type}", datastore, getattr(ObjectType, object_type.upper()), 1 + name=f"cfg_{object_type}", + datastore=datastore, + object_type=getattr(ObjectType, object_type.upper()), + nb_partitions=1, + check_references=False, ) jc = JournalChecker( db=scrubber_db, @@ -90,7 +94,11 @@ def test_corrupt_snapshot( corrupt_idx, ): config_id = scrubber_db.config_add( - "cfg_snapshot", datastore, ObjectType.SNAPSHOT, 1 + name="cfg_snapshot", + datastore=datastore, + object_type=ObjectType.SNAPSHOT, + nb_partitions=1, + check_references=False, ) snapshots = list(swh_model_data.SNAPSHOTS) snapshots[corrupt_idx] = attr.evolve(snapshots[corrupt_idx], id=b"\x00" * 20) @@ -131,7 +139,11 @@ def test_corrupt_snapshots( datastore, ): config_id = scrubber_db.config_add( - "cfg_snapshot", datastore, ObjectType.SNAPSHOT, 1 + name="cfg_snapshot", + datastore=datastore, + object_type=ObjectType.SNAPSHOT, + nb_partitions=1, + check_references=False, ) snapshots = list(swh_model_data.SNAPSHOTS) for i in (0, 1): @@ -155,3 +167,28 @@ def test_corrupt_snapshots( "swh:1:snp:0101010101010101010101010101010101010101", ] } + + +def test_check_references_raises( + scrubber_db, + kafka_server, + kafka_prefix, + kafka_consumer_group, + datastore, +): + config_id = scrubber_db.config_add( + name="cfg_snapshot", + datastore=datastore, + object_type=ObjectType.SNAPSHOT, + nb_partitions=1, + check_references=True, + ) + journal_config = journal_client_config( + kafka_server, kafka_prefix, kafka_consumer_group + ) + with pytest.raises(ValueError): + JournalChecker( + db=scrubber_db, + config_id=config_id, + journal=journal_config, + ) -- GitLab