From c044c25b8f8fd70170128985bfb29232bfdea0de Mon Sep 17 00:00:00 2001 From: David Douard <david.douard@sdfa3.org> Date: Wed, 16 Oct 2024 17:49:07 +0200 Subject: [PATCH] cli/db: factorize cmd arguments handling logic of several `swh db` commands Extract the argument/option handling logic (for init-admin, init, version, and upgrade commands) in a single handle_cmd_args() function. --- swh/core/cli/db.py | 309 ++++++++++++++++++++------------------------- 1 file changed, 134 insertions(+), 175 deletions(-) mode change 100755 => 100644 swh/core/cli/db.py diff --git a/swh/core/cli/db.py b/swh/core/cli/db.py old mode 100755 new mode 100644 index b9a74c6..4fe602c --- a/swh/core/cli/db.py +++ b/swh/core/cli/db.py @@ -6,7 +6,7 @@ import logging from os import environ -from typing import Optional +from typing import Any, Dict, List, Optional, Tuple import warnings import click @@ -137,7 +137,7 @@ def db_init_admin( scheduler If the db connection string is not given, it will be looked for in the - configuration file. Note that this step need admin right on the database, + configuration file. Note that this step needs admin right on the database, so this usage is not meant for production environment (but rather test environments.) @@ -201,48 +201,16 @@ def db_init_admin( declared in the 'storage' section of the config file. """ - from swh.core.config import get_swh_backend_module, list_db_config_entries from swh.core.db.db_utils import init_admin_extensions - package = module - args = [] - - if initialize_all: - assert ":" not in module - for cfgmod, path, dbcfg, cnxstr in list_db_config_entries(ctx.obj["config"]): - if cfgmod == module: - fullmodule, _ = get_swh_backend_module( - swh_package=cfgmod, cls=dbcfg["cls"] - ) - args.append((cfgmod, fullmodule, cnxstr, dbcfg)) - else: - if dbname is not None: - # default behavior - cfg = {"cls": "postgresql", "db": dbname} - fullmodule, _ = get_swh_backend_module(swh_package=module, cls="postgresql") - else: - if ":" in module: # it's a path to a config entry - # read the db access for module 'module' from the config file - package, cfg, dbname = get_dburl_from_config_key( - ctx.obj["config"], module - ) - # the actual module is retrieved from the entry_point for the cls - fullmodule, _ = get_swh_backend_module( - swh_package=package, cls=cfg["cls"] - ) - else: - # use the db cnx from the config file; the expected config entry is the - # given module name - cfg = ctx.obj["config"].get(module, {}) - dbname, cfg = get_dburl_from_config(cfg) - # the actual module is retrieved from the entry_point for the cls - fullmodule, _ = get_swh_backend_module( - swh_package=module, cls=cfg["cls"] - ) - assert dbname is not None - args.append((package, fullmodule, dbname, cfg)) + args = handle_cmd_args( + cfg=ctx.obj["config"], + module=module, + do_all=initialize_all, + dbname=dbname, + ) - for package, fullmodule, dbname, cfg in args: + for package, fullmodule, _, dbname, cfg in args: logger.debug("db_init_admin %s:%s dbname=%s", package, cfg["cls"], dbname) init_admin_extensions(f"{package}:{cfg['cls']}", dbname) @@ -328,53 +296,19 @@ def db_init(ctx, module, dbname, flavor, module_config_key, initialize_all): module/config entry. """ - from swh.core.config import get_swh_backend_module, list_db_config_entries # TODO: sanity check all the incompatible options... - - package = module - init_args = [] - if initialize_all: - for cfgmod, path, dbcfg, cnxstr in list_db_config_entries(ctx.obj["config"]): - if cfgmod == module: - fullmodule, backend_class = get_swh_backend_module( - swh_package=cfgmod, cls=dbcfg["cls"] - ) - init_args.append((cfgmod, fullmodule, backend_class, cnxstr, dbcfg)) - else: - if dbname is not None: - cfg = {"cls": "postgresql", "db": dbname} - fullmodule, backend_class = get_swh_backend_module( - swh_package=module, cls="postgresql" - ) - else: - if ":" in module: # it's a path to a config entry - package, cfg, dbname = get_dburl_from_config_key( - ctx.obj["config"], module - ) - # the actual module is retrieved from the entry_point for the cls - fullmodule, backend_class = get_swh_backend_module( - swh_package=package, cls=cfg["cls"] - ) - else: - # use the db cnx from the config file; the expected config entry is the - # given package name - cfg = ctx.obj["config"].get(module_config_key or module, {}) - dbname, cfg = get_dburl_from_config(cfg) - # the actual module is retrieved from the entry_point for the cls - fullmodule, backend_class = get_swh_backend_module( - swh_package=module, cls=cfg["cls"] - ) - if not dbname: - raise click.BadParameter( - "Missing the postgresql connection configuration. Either fix your " - "configuration file or use the --dbname option." - ) - init_args.append((package, fullmodule, backend_class, dbname, cfg)) - # XXX it probably does not make much sense to have a non-None flavor when # initializing several db at once... this case should raise an error - for package, fullmodule, backend_class, dbname, cfg in init_args: + + args = handle_cmd_args( + cfg=ctx.obj["config"], + module=module, + do_all=initialize_all, + dbname=dbname, + ) + + for package, fullmodule, backend_class, dbname, cfg in args: initialize_one(package, fullmodule, backend_class, flavor, dbname, cfg) @@ -526,58 +460,29 @@ def db_version(ctx, module, show_history, all_backends, module_config_key=None): swh db version --all scrubber """ - from swh.core.config import get_swh_backend_module, list_db_config_entries - from swh.core.db.db_utils import get_database_info, import_swhmodule - - backends = [] - - if all_backends: - assert ":" not in module - for cfgmod, path, dbcfg, cnxstr in list_db_config_entries(ctx.obj["config"]): - if cfgmod == module: - _, backend_class = get_swh_backend_module( - swh_package=cfgmod, cls=dbcfg["cls"] - ) - db_module, db_version, db_flavor = get_database_info(cnxstr) - backends.append( - (db_module, db_version, db_flavor, dbcfg, cnxstr, backend_class) - ) - else: - if ":" in module: # it's a path to a config entry - swhmod, cfg, dbname = get_dburl_from_config_key(ctx.obj["config"], module) - # the actual module is retrieved from the entry_point for the cls - _, backend_class = get_swh_backend_module( - swh_package=swhmod, cls=cfg["cls"] - ) - module = f"{swhmod}:{cfg['cls']}" - else: - # use the db cnx from the config file; the expected config entry is the - # given module name - cfg = ctx.obj["config"].get(module_config_key or module, {}) - dbname, cfg = get_dburl_from_config(cfg) - _, backend_class = get_swh_backend_module( - swh_package=module, cls=cfg["cls"] - ) - if not dbname: - raise click.BadParameter( - "Missing the postgresql connection configuration. Either fix your " - "configuration file or use the --dbname option." - ) + from swh.core.db.db_utils import ( + get_database_info, + import_swhmodule, + swh_db_versions, + ) - logger.debug("db_version dbname=%s", dbname) + backends = handle_cmd_args( + cfg=ctx.obj["config"], + module=module, + do_all=all_backends, + config_key=module_config_key, + ) - db_module, db_version, db_flavor = get_database_info(dbname) + for package, _, backend_class, cnxstr, cfg in backends: + db_module, db_version, db_flavor = get_database_info(cnxstr) if db_module is None: click.secho( "WARNING the database does not have a dbmodule table.", fg="red", bold=True, ) - db_module = module - assert db_module == module, f"{db_module} (in the db) != {module} (given)" - backends.append((db_module, db_version, db_flavor, cfg, dbname, backend_class)) + db_module = f"{package}:{cfg['cls']}" - for db_module, db_version, db_flavor, cfg, dbname, backend_class in backends: click.echo("") click.secho(f"module: {db_module}", fg="green", bold=True) if ":" not in db_module: @@ -609,9 +514,7 @@ def db_version(ctx, module, show_history, all_backends, module_config_key=None): if not show_history: click.secho(f"version: {db_version}", fg="green", bold=True) else: - from swh.core.db.db_utils import swh_db_versions - - versions = swh_db_versions(dbname) + versions = swh_db_versions(cnxstr) for version, tstamp, desc in versions: click.echo(f"{version} [{tstamp}] {desc}") @@ -663,7 +566,6 @@ def db_upgrade( swh db upgrade scrubber:scrubber_db --to-version=10 """ - from swh.core.config import get_swh_backend_module, list_db_config_entries from swh.core.db.db_utils import ( get_database_info, import_swhmodule, @@ -674,52 +576,17 @@ def db_upgrade( # TODO: mark --module-config-key as deprecated # TODO: check options consistency - package = module - args = [] - if upgrade_all: - assert ":" not in module - for cfgmod, path, dbcfg, cnxstr in list_db_config_entries(ctx.obj["config"]): - if cfgmod == module: - fullmodule, backend_class = get_swh_backend_module( - swh_package=cfgmod, cls=dbcfg["cls"] - ) - args.append((cfgmod, fullmodule, backend_class, cnxstr, dbcfg)) - else: - if dbname is not None: - # default behavior - cfg = {"cls": "postgresql", "db": dbname} - fullmodule, backend_class = get_swh_backend_module( - swh_package=module, cls="postgresql" - ) - else: - if ":" in module: # it's a path to a config entry - package, cfg, dbname = get_dburl_from_config_key( - ctx.obj["config"], module - ) - # the actual module is retrieved from the entry_point for the cls - fullmodule, backend_class = get_swh_backend_module( - swh_package=package, cls=cfg["cls"] - ) - else: - # use the db cnx from the config file; the expected config entry is the - # given module name - cfg = ctx.obj["config"].get(module_config_key or module, {}) - dbname, cfg = get_dburl_from_config(cfg) - fullmodule, backend_class = get_swh_backend_module( - swh_package=module, cls=cfg["cls"] - ) - args.append((package, fullmodule, backend_class, dbname, cfg)) - - for package, fullmodule, backend_class, dbname, cfg in args: - go_to_version = to_version - # for dbname, module, cfg in zip(dbnames, modules, cfgs): - if not dbname: - raise click.BadParameter( - "Missing the postgresql connection configuration. Either fix your " - "configuration file or use the --dbname option." - ) + backends = handle_cmd_args( + cfg=ctx.obj["config"], + module=module, + do_all=upgrade_all, + dbname=dbname, + config_key=module_config_key, + ) + for package, fullmodule, backend_class, dbname, cfg in backends: logger.debug("db_version dbname=%s", dbname) + go_to_version = to_version db_module, db_version, db_flavor = get_database_info(dbname) backend = f"{package}:{cfg['cls']}" if db_module is None: @@ -818,3 +685,95 @@ def get_dburl_from_config_key(cfg, key): dburl = cfg[cfgpath[-1]] return swhmod, cfg, dburl + + +def handle_cmd_args( + cfg: Dict[str, Any], + module: str, + do_all: bool = False, + dbname: Optional[str] = None, + config_key: Optional[str] = None, +) -> List[Tuple[str, str, Optional[type], str, Dict[str, Any]]]: + """Helper function to build the list of backends to handle in a cli command + + For each identified backend, returns a tuple: + (package, module, backend_class, cnxstr, cfg) + where: + - `package`: the (swh) package this backend is implemented in (e.g. + 'storage', 'scheduler' etc.) + - `module`: the (full path) module in which the backend class can be found + (e.g. 'swh.storage.postgresql.storage') + - `backend_class`: the class implementing the backend, + - `cnxstr`: the (libpq) connection string to the database, + - `cfg`: the config structure in which this backend is configured; if the + backend is defined in a nested configuration, this is only the most + specific config structure for this backend. + + For each backend, the code implementing the backend class needs to be + registered in the `swh.<package>.classes` entry point under the + `cfg["cls"]` name. However, there is bw compatibility for swh packages not + yet updated to register these backends in the entry points. + + If `dbname` is given, return only one element in the list, with a config made + of {'cls': 'postgresql', 'db': dbname}. + + If `do_all` is True, look for every backend in the configuration (cfg) under + the section `module`. + + If `module` is a simple word ('storage', 'scheduler', etc.), look for the + last db backend found in the config file under the `module` section. + + If `module` is like "storage:steps:0:db", look for this config entry in the + config file. + + If `module` is an actual module path (e.g. 'storage.proxies.masking'), then + `dbname` must be given and the configuration is not looked for in the + config file. + + Note: this rather complex logic will be simplified when all the swh + packages are updated and do not need bw compat handling code any more. + """ + from swh.core.config import get_swh_backend_module, list_db_config_entries + + package = module + backends = [] + + if do_all: + assert ":" not in module + for cfgmod, path, dbcfg, cnxstr in list_db_config_entries(cfg): + if cfgmod == module: + fullmodule, backend_class = get_swh_backend_module( + swh_package=cfgmod, cls=dbcfg["cls"] + ) + backends.append((cfgmod, fullmodule, backend_class, cnxstr, dbcfg)) + else: + if dbname is not None: + # default behavior + dbcfg = {"cls": "postgresql", "db": dbname} + fullmodule, backend_class = get_swh_backend_module( + swh_package=module, cls="postgresql" + ) + else: + if ":" in module: # it's a path to a config entry + # read the db access for module 'module' from the config file + package, dbcfg, dbname = get_dburl_from_config_key(cfg, module) + # the actual module is retrieved from the entry_point for the cls + fullmodule, backend_class = get_swh_backend_module( + swh_package=package, cls=dbcfg["cls"] + ) + else: + # use the db cnx from the config file; the expected config entry is the + # given module name + dbname, dbcfg = get_dburl_from_config(cfg.get(config_key or module, {})) + # the actual module is retrieved from the entry_point for the cls + fullmodule, backend_class = get_swh_backend_module( + swh_package=module, cls=dbcfg["cls"] + ) + if not dbname: + raise click.BadParameter( + "Missing the postgresql connection configuration. Either fix your " + "configuration file or use the --dbname option." + ) + + backends.append((package, fullmodule, backend_class, dbname, dbcfg)) + return backends -- GitLab