Newer
Older
# Copyright (C) 2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os
import click
from swh.core.cli import CONTEXT_SETTINGS
from swh.core.cli import swh as swh_cli_group
@swh_cli_group.group(name="scrubber", context_settings=CONTEXT_SETTINGS)
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
@click.option(
"--config-file",
"-C",
default=None,
type=click.Path(exists=True, dir_okay=False,),
help="Configuration file.",
)
@click.pass_context
def scrubber_cli_group(ctx, config_file):
"""main command group of the datastore scrubber
"""
from swh.core import config
from . import get_scrubber_db
if not config_file:
config_file = os.environ.get("SWH_CONFIG_FILENAME")
if config_file:
if not os.path.exists(config_file):
raise ValueError("%s does not exist" % config_file)
conf = config.read(config_file)
else:
conf = {}
if "scrubber_db" not in conf:
ctx.fail("You must have a scrubber_db configured in your config file.")
ctx.ensure_object(dict)
ctx.obj["config"] = conf
ctx.obj["db"] = get_scrubber_db(**conf["scrubber_db"])
@scrubber_cli_group.group(name="check")
def scrubber_check_cli_group(ctx):
"""group of commands which read from data stores and report errors.
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
pass
@scrubber_check_cli_group.command(name="storage")
@click.option(
"--object-type",
type=click.Choice(
# use a hardcoded list to prevent having to load the
# replay module at cli loading time
[
"snapshot",
"revision",
"release",
"directory",
# TODO:
# "raw_extrinsic_metadata",
# "extid",
]
),
)
@click.option("--start-object", default="0" * 40)
@click.option("--end-object", default="f" * 40)
@click.pass_context
def scrubber_check_storage(ctx, object_type: str, start_object: str, end_object: str):
conf = ctx.obj["config"]
if "storage" not in conf:
ctx.fail("You must have a storage configured in your config file.")
from swh.storage import get_storage
from .check_storage import StorageChecker
checker = StorageChecker(
db=ctx.obj["db"],
storage=get_storage(**conf["storage"]),
object_type=object_type,
start_object=start_object,
end_object=end_object,
)
checker.check_storage()