diff --git a/swh/scrubber/cli.py b/swh/scrubber/cli.py index 71fdb8b87fc98c9d996470ae40944b6e64bf4704..e3c00e2f85675610fa4c1f4f2260eb5564012a72 100644 --- a/swh/scrubber/cli.py +++ b/swh/scrubber/cli.py @@ -122,6 +122,8 @@ def scrubber_check_cli_group(ctx): ) @click.option("--nb-partitions", default=4096, type=int) @click.option("--name", default=None, type=str) +@click.option("--check-hashes/--no-check-hashes", default=True) +@click.option("--check-references/--no-check-references", default=None) @click.pass_context def scrubber_check_init( ctx, @@ -129,19 +131,28 @@ def scrubber_check_init( object_type: str, nb_partitions: int, name: Optional[str], + check_hashes: bool, + check_references: Optional[bool], ): """Initialise a scrubber check configuration for the datastore defined in the configuration file and given object_type. A checker configuration configuration consists simply in a set of: - - object type: the type of object being checked, - - number of partitions: the number of partitions the hash space is divided + - backend: the datastore type being scrubbed (storage or journal), + + - object-type: the type of object being checked, + + - nb-pertitions: the number of partitions the hash space is divided in; must be a power of 2, + - name: an unique name for easier reference, - linked to the storage provided in the configuration file. + - check-hashes: flag (default to True) to select the hash validation step for + this scrubbing configuration, + - check-references: flag (default to True for storage and False for the journal + backend) to select the reference validation step for this scrubbing configuration. """ if not object_type or not name: raise click.ClickException( @@ -152,6 +163,8 @@ def scrubber_check_init( db = ctx.obj["db"] if backend == "storage": + if check_references is None: + check_references = True if "storage" not in conf: raise click.ClickException( "You must have a storage configured in your config file." @@ -163,6 +176,8 @@ def scrubber_check_init( datastore = get_storage_datastore(storage=get_storage(**conf["storage"])) db.datastore_get_or_add(datastore) elif backend == "journal": + if check_references is None: + check_references = False if "journal" not in conf: raise click.ClickException( "You must have a journal configured in your config file." @@ -177,8 +192,15 @@ def scrubber_check_init( if db.config_get_by_name(name): raise click.ClickException(f"Configuration {name} already exists") + assert check_references is not None + config_id = db.config_add( - name, datastore, getattr(ObjectType, object_type.upper()), nb_partitions + name, + datastore, + getattr(ObjectType, object_type.upper()), + nb_partitions, + check_hashes=check_hashes, + check_references=check_references, ) click.echo( f"Created configuration {name} [{config_id}] for checking {object_type} " diff --git a/swh/scrubber/tests/test_cli.py b/swh/scrubber/tests/test_cli.py index 06a2b5265f50c8eee8d3ac971677cc8d2eb8a04e..da4a1d5f1e870283c9631020ad81963e11be7c29 100644 --- a/swh/scrubber/tests/test_cli.py +++ b/swh/scrubber/tests/test_cli.py @@ -154,6 +154,84 @@ def test_check_init(mocker, scrubber_db, swh_storage): assert result.output.strip() == msg +def test_check_init_storage_flags(mocker, scrubber_db, swh_storage): + mocker.patch("swh.scrubber.get_scrubber_db", return_value=scrubber_db) + arg_list = [ + "check", + "init", + "storage", + "--object-type", + "snapshot", + "--nb-partitions", + "4", + "--name", + ] + + name = "cfg1" + result = invoke( + scrubber_db, + arg_list + [name], + storage=swh_storage, + ) + assert result.exit_code == 0, result.output + + cfg_entry = scrubber_db.config_get(scrubber_db.config_get_by_name(name)) + assert cfg_entry.check_hashes is True + assert cfg_entry.check_references is True + + name = "cfg2" + result = invoke( + scrubber_db, + arg_list + [name, "--no-check-references"], + storage=swh_storage, + ) + assert result.exit_code == 0, result.output + + cfg_entry = scrubber_db.config_get(scrubber_db.config_get_by_name(name)) + assert cfg_entry.check_hashes is True + assert cfg_entry.check_references is False + + name = "cfg3" + result = invoke( + scrubber_db, + arg_list + [name, "--no-check-hashes"], + storage=swh_storage, + ) + assert result.exit_code == 0, result.output + + cfg_entry = scrubber_db.config_get(scrubber_db.config_get_by_name(name)) + assert cfg_entry.check_hashes is False + assert cfg_entry.check_references is True + + +def test_check_init_journal_flags( + mocker, scrubber_db, kafka_server, kafka_prefix, kafka_consumer_group +): + mocker.patch("swh.scrubber.get_scrubber_db", return_value=scrubber_db) + arg_list = [ + "check", + "init", + "journal", + "--object-type", + "snapshot", + "--name", + ] + + name = "cfg1" + result = invoke( + scrubber_db, + arg_list + [name], + kafka_server=kafka_server, + kafka_prefix=kafka_prefix, + kafka_consumer_group=kafka_consumer_group, + ) + assert result.exit_code == 0, result.output + + cfg_entry = scrubber_db.config_get(scrubber_db.config_get_by_name(name)) + assert cfg_entry.check_hashes is True + assert cfg_entry.check_references is False + + def test_check_storage(mocker, scrubber_db, swh_storage): storage_checker = MagicMock() StorageChecker = mocker.patch(