From 26c3be97b25daae6b81554df8f0788cc7e9d0c35 Mon Sep 17 00:00:00 2001 From: David Douard <david.douard@sdfa3.org> Date: Tue, 23 Apr 2024 09:20:41 +0200 Subject: [PATCH] Move SQL scripts for the masking proxy under swh/storage/proxies/masking Make the MaskingProxy uses its own db rather than sharing the main storage one with fragile use of flavors to shoehorn it in there. The migration script for the main storage db requires to recreate the db flavor table and (enum) type since this later cannot be altered to remove an entry. On the masking part, we set the db model version to 193 because the currently deployed version is actually set to the main storage's db version (thus 192). --- swh/storage/postgresql/storage.py | 2 +- swh/storage/proxies/masking/__init__.py | 7 +++ swh/storage/proxies/masking/db.py | 3 ++ .../proxies/masking/sql/10-superuser-init.sql | 1 + swh/storage/proxies/masking/sql/20-types.sql | 2 + .../masking/sql/30-schema.sql} | 13 ------ .../proxies/masking/sql/60-indexes.sql | 5 ++ .../proxies/masking/sql/upgrades/193.sql | 10 ++++ swh/storage/sql/10-superuser-init.sql | 1 - swh/storage/sql/15-flavor.sql | 3 +- swh/storage/sql/20-enums.sql | 7 --- swh/storage/sql/30-schema.sql | 8 ---- swh/storage/sql/40-funcs.sql | 8 ---- swh/storage/sql/60-indexes.sql | 7 --- swh/storage/sql/upgrades/193.sql | 46 +++++++++++++++++++ swh/storage/tests/masking/conftest.py | 6 +-- swh/storage/tests/masking/test_cli.py | 27 +++++++++++ swh/storage/tests/masking/test_db.py | 8 ++++ 18 files changed, 113 insertions(+), 51 deletions(-) create mode 100644 swh/storage/proxies/masking/sql/10-superuser-init.sql create mode 100644 swh/storage/proxies/masking/sql/20-types.sql rename swh/storage/{sql/80-masked-objects.sql => proxies/masking/sql/30-schema.sql} (78%) create mode 100644 swh/storage/proxies/masking/sql/60-indexes.sql create mode 100644 swh/storage/proxies/masking/sql/upgrades/193.sql create mode 100644 swh/storage/sql/upgrades/193.sql diff --git a/swh/storage/postgresql/storage.py b/swh/storage/postgresql/storage.py index 362c242b8..15a746e7a 100644 --- a/swh/storage/postgresql/storage.py +++ b/swh/storage/postgresql/storage.py @@ -216,7 +216,7 @@ def _get_paginated_sha1_partition( class Storage: """SWH storage datastore proxy, encompassing DB and object storage""" - current_version: int = 192 + current_version: int = 193 def __init__( self, diff --git a/swh/storage/proxies/masking/__init__.py b/swh/storage/proxies/masking/__init__.py index 7ee106be1..b148328e6 100644 --- a/swh/storage/proxies/masking/__init__.py +++ b/swh/storage/proxies/masking/__init__.py @@ -25,6 +25,13 @@ from .db import MaskingQuery MASKING_OVERHEAD_METRIC = "swh_storage_masking_overhead_seconds" +def get_datastore(cls, db): + assert cls == "postgresql" + from .db import MaskingAdmin + + return MaskingAdmin.connect(db) + + def masking_overhead_timer(method_name: str) -> DifferentialTimer: """Return a properly setup DifferentialTimer for ``method_name`` of the storage""" return DifferentialTimer(MASKING_OVERHEAD_METRIC, tags={"endpoint": method_name}) diff --git a/swh/storage/proxies/masking/db.py b/swh/storage/proxies/masking/db.py index 22c37acf4..3718b9c0a 100644 --- a/swh/storage/proxies/masking/db.py +++ b/swh/storage/proxies/masking/db.py @@ -86,6 +86,9 @@ class MaskedObject: class MaskingDb(BaseDb): + # we started with 192, because this used to be part of the main storage db + current_version = 193 + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) diff --git a/swh/storage/proxies/masking/sql/10-superuser-init.sql b/swh/storage/proxies/masking/sql/10-superuser-init.sql new file mode 100644 index 000000000..d7b82ed3b --- /dev/null +++ b/swh/storage/proxies/masking/sql/10-superuser-init.sql @@ -0,0 +1 @@ +create extension if not exists "uuid-ossp"; -- for masking proxy diff --git a/swh/storage/proxies/masking/sql/20-types.sql b/swh/storage/proxies/masking/sql/20-types.sql new file mode 100644 index 000000000..95443dbf2 --- /dev/null +++ b/swh/storage/proxies/masking/sql/20-types.sql @@ -0,0 +1,2 @@ +create type extended_object_type as enum ('content', 'directory', 'revision', 'release', 'snapshot', 'origin', 'raw_extrinsic_metadata'); +comment on type extended_object_type is 'Data object types stored in data model'; diff --git a/swh/storage/sql/80-masked-objects.sql b/swh/storage/proxies/masking/sql/30-schema.sql similarity index 78% rename from swh/storage/sql/80-masked-objects.sql rename to swh/storage/proxies/masking/sql/30-schema.sql index ef77d2c2d..373049d23 100644 --- a/swh/storage/sql/80-masked-objects.sql +++ b/swh/storage/proxies/masking/sql/30-schema.sql @@ -1,7 +1,3 @@ -select swh_get_dbflavor() = 'only_masking' as dbflavor_only_masking \gset - --- This skips this whole file unless the dbflavor is `only_masking` -\if :dbflavor_only_masking create type masked_state as enum ('visible', 'decision_pending', 'restricted'); comment on type masked_state is 'The degree to which an object is masked'; @@ -13,8 +9,6 @@ create table if not exists masking_request ( reason text not null ); -create unique index if not exists masking_request_slug_idx on masking_request using btree(slug); - comment on table masking_request is 'A recorded request for masking certain objects'; comment on column masking_request.id is 'Opaque id of the request'; comment on column masking_request.slug is 'Human-readable id of the request'; @@ -47,10 +41,3 @@ comment on column masked_object.object_id is 'The object_id part of the object'' comment on column masked_object.object_type is 'The object_type part of the object''s SWHID'; comment on column masked_object.request is 'Reference to the affecting request'; comment on column masked_object.state is 'The degree to which the object is masked as a result of the request'; - - -create index if not exists masked_object_request_idx on masked_object using btree(request, object_type, object_id); -comment on index masked_object_request_idx is 'Allow listing all the objects associated by request, ordered by SWHID'; - --- :dbflavor_only_masking -\endif diff --git a/swh/storage/proxies/masking/sql/60-indexes.sql b/swh/storage/proxies/masking/sql/60-indexes.sql new file mode 100644 index 000000000..6f41cba7f --- /dev/null +++ b/swh/storage/proxies/masking/sql/60-indexes.sql @@ -0,0 +1,5 @@ + +create unique index if not exists masking_request_slug_idx on masking_request using btree(slug); + +create index if not exists masked_object_request_idx on masked_object using btree(request, object_type, object_id); +comment on index masked_object_request_idx is 'Allow listing all the objects associated by request, ordered by SWHID'; diff --git a/swh/storage/proxies/masking/sql/upgrades/193.sql b/swh/storage/proxies/masking/sql/upgrades/193.sql new file mode 100644 index 000000000..2e442ba46 --- /dev/null +++ b/swh/storage/proxies/masking/sql/upgrades/193.sql @@ -0,0 +1,10 @@ +-- +-- SWH Masking Proxy DB schema upgrade +-- from_version: 192 +-- to_version: 193 +-- description: Actually creates a dedicated DB for the masking proxy +-- This does nothing but dropping the flavor table and type + +drop function if exists swh_get_dbflavor; +drop table if exists dbflavor; +drop type if exists database_flavor; diff --git a/swh/storage/sql/10-superuser-init.sql b/swh/storage/sql/10-superuser-init.sql index e5e2cacb5..97b3817cf 100644 --- a/swh/storage/sql/10-superuser-init.sql +++ b/swh/storage/sql/10-superuser-init.sql @@ -3,7 +3,6 @@ create extension if not exists btree_gist; create extension if not exists pgcrypto; create extension if not exists pg_trgm; -create extension if not exists "uuid-ossp"; -- for masking proxy -- courtesy of Andreas 'ads' Scherbaum in -- https://andreas.scherbaum.la/blog/archives/346-create-language-if-not-exist.html diff --git a/swh/storage/sql/15-flavor.sql b/swh/storage/sql/15-flavor.sql index 4610a7c9e..c4d59b9ba 100644 --- a/swh/storage/sql/15-flavor.sql +++ b/swh/storage/sql/15-flavor.sql @@ -2,8 +2,7 @@ create type database_flavor as enum ( 'default', -- default: full index availability for deduplication and read queries 'mirror', -- mirror: reduced indexes to allow for out of order insertions - 'read_replica', -- read replica: minimal indexes to allow read queries - 'only_masking' -- only masking: only deploy enough schema for the masking proxy + 'read_replica' -- read replica: minimal indexes to allow read queries ); comment on type database_flavor is 'Flavor of the current database'; diff --git a/swh/storage/sql/20-enums.sql b/swh/storage/sql/20-enums.sql index acb4221e4..4c153e30d 100644 --- a/swh/storage/sql/20-enums.sql +++ b/swh/storage/sql/20-enums.sql @@ -1,10 +1,6 @@ --- --- Software Heritage Data Types --- -select swh_get_dbflavor() != 'only_masking' as dbflavor_not_only_masking \gset - --- When dbflavor is `only_masking`, skip all types except for extended_object_type -\if :dbflavor_not_only_masking create type content_status as enum ('absent', 'visible', 'hidden'); comment on type content_status is 'Content visibility'; @@ -28,8 +24,5 @@ create type origin_visit_state as enum ( ); comment on type origin_visit_state IS 'Possible origin visit status values'; --- :dbflavor_not_only_masking -\endif - create type extended_object_type as enum ('content', 'directory', 'revision', 'release', 'snapshot', 'origin', 'raw_extrinsic_metadata'); comment on type extended_object_type is 'Data object types stored in data model'; diff --git a/swh/storage/sql/30-schema.sql b/swh/storage/sql/30-schema.sql index c963dda9c..9259f13d9 100644 --- a/swh/storage/sql/30-schema.sql +++ b/swh/storage/sql/30-schema.sql @@ -1,8 +1,3 @@ -select swh_get_dbflavor() != 'only_masking' as dbflavor_not_only_masking \gset - --- This skips this whole file if the dbflavor is `only_masking` -\if :dbflavor_not_only_masking - --- --- SQL implementation of the Software Heritage data model --- @@ -537,6 +532,3 @@ comment on column object_references.source_type is 'Object type for the source o comment on column object_references.source is 'Object id for the source of the edge'; comment on column object_references.target_type is 'Object type for the target of the edge'; comment on column object_references.target is 'Object id for the target of the edge'; - --- :dbflavor_not_only_masking -\endif diff --git a/swh/storage/sql/40-funcs.sql b/swh/storage/sql/40-funcs.sql index b2db89923..803d23735 100644 --- a/swh/storage/sql/40-funcs.sql +++ b/swh/storage/sql/40-funcs.sql @@ -1,8 +1,3 @@ -select swh_get_dbflavor() != 'only_masking' as dbflavor_not_only_masking \gset - --- This skips this whole file if the dbflavor is `only_masking` -\if :dbflavor_not_only_masking - create or replace function hash_sha1(text) returns text as $$ @@ -1052,6 +1047,3 @@ create trigger update_counts_from_bucketed for each row when (NEW.line % 256 = 0) execute procedure swh_update_counters_from_buckets(); - --- :dbflavor_not_only_masking -\endif diff --git a/swh/storage/sql/60-indexes.sql b/swh/storage/sql/60-indexes.sql index 6f326250f..c26c40d77 100644 --- a/swh/storage/sql/60-indexes.sql +++ b/swh/storage/sql/60-indexes.sql @@ -4,10 +4,6 @@ select swh_get_dbflavor() = 'read_replica' as dbflavor_read_replica \gset select swh_get_dbflavor() != 'read_replica' as dbflavor_does_deduplication \gset select swh_get_dbflavor() = 'mirror' as dbflavor_mirror \gset select swh_get_dbflavor() = 'default' as dbflavor_default \gset -select swh_get_dbflavor() != 'only_masking' as dbflavor_not_only_masking \gset - --- This skips this whole file if the dbflavor is `only_masking` -\if :dbflavor_not_only_masking -- content @@ -325,6 +321,3 @@ alter table object_counts_bucketed add primary key using index object_counts_buc -- used to query by (extid_type, extid) + to deduplicate the whole row create unique index concurrently on extid(extid_type, extid, extid_version, target_type, target); create index concurrently on extid(target_type, target); - --- :dbflavor_not_only_masking -\endif diff --git a/swh/storage/sql/upgrades/193.sql b/swh/storage/sql/upgrades/193.sql new file mode 100644 index 000000000..47edc32eb --- /dev/null +++ b/swh/storage/sql/upgrades/193.sql @@ -0,0 +1,46 @@ +-- SWH DB schema upgrade +-- from_version: 192 +-- to_version: 193 +-- description: Remove the only_masking db flavor +-- This will fail if the db actually uses the only_masking flavor + +-- cannot remove a value from a enum, so we have to recreate it + + +-- (re)create the database flavor type +create type database_flavor_new as enum ( + 'default', -- default: full index availability for deduplication and read queries + 'mirror', -- mirror: reduced indexes to allow for out of order insertions + 'read_replica' -- read replica: minimal indexes to allow read queries +); +comment on type database_flavor_new is 'Flavor of the current database'; +-- and the flavor database +create table dbflavor_new ( + flavor database_flavor_new, + single_row char(1) primary key default 'x', + check (single_row = 'x') +); +comment on table dbflavor_new is 'Database flavor storage'; +comment on column dbflavor_new.flavor is 'Database flavor currently deployed'; +comment on column dbflavor_new.single_row is 'Bogus column to force the table to have a single row'; + +-- fill dbflavor_new from dbflavor + +insert into dbflavor_new select cast(flavor::text AS database_flavor_new) from dbflavor; + +drop function if exists swh_get_dbflavor; + + +-- then drop old versions of the flavor table and type +drop table dbflavor; +drop type database_flavor; + +-- move flavor stuff to alt names +alter type database_flavor_new rename to database_flavor; +alter table dbflavor_new rename to dbflavor; + +create or replace function swh_get_dbflavor() returns database_flavor language sql stable as $$ + select coalesce((select flavor from dbflavor), 'default'); +$$; + +comment on function swh_get_dbflavor is 'Get the flavor of the database currently deployed'; diff --git a/swh/storage/tests/masking/conftest.py b/swh/storage/tests/masking/conftest.py index 46d795b6e..6bddd9e53 100644 --- a/swh/storage/tests/masking/conftest.py +++ b/swh/storage/tests/masking/conftest.py @@ -9,16 +9,14 @@ import pytest from pytest_postgresql import factories from swh.core.db.db_utils import initialize_database_for_module -from swh.storage.postgresql.storage import Storage as StorageDatastore from swh.storage.proxies.masking.db import MaskingAdmin, MaskingQuery masking_db_postgresql_proc = factories.postgresql_proc( load=[ partial( initialize_database_for_module, - modname="storage", - flavor="only_masking", - version=StorageDatastore.current_version, + modname="storage.proxies.masking", + version=MaskingAdmin.current_version, ), ], ) diff --git a/swh/storage/tests/masking/test_cli.py b/swh/storage/tests/masking/test_cli.py index eab5469c6..f02c98369 100644 --- a/swh/storage/tests/masking/test_cli.py +++ b/swh/storage/tests/masking/test_cli.py @@ -12,6 +12,8 @@ import textwrap from click.testing import CliRunner import pytest +from swh.core.cli.db import db as swhdb +from swh.core.db.db_utils import get_database_info from swh.model.swhids import ExtendedSWHID, ValidationError from ...proxies.masking.cli import ( @@ -37,6 +39,31 @@ from ...proxies.masking.db import ( ) +def test_cli_db_create(postgresql): + """Create a db then initializing it should be ok""" + module_name = "storage.proxies.masking" + + db_params = postgresql.info + dbname = "masking-db" + conninfo = ( + f"postgresql://{db_params.user}@{db_params.host}:{db_params.port}/{dbname}" + ) + + # This creates the db and installs the necessary admin extensions + result = CliRunner().invoke(swhdb, ["create", module_name, "--dbname", conninfo]) + assert result.exit_code == 0, f"Unexpected output: {result.output}" + + # This initializes the schema and data + result = CliRunner().invoke(swhdb, ["init", module_name, "--dbname", conninfo]) + + assert result.exit_code == 0, f"Unexpected output: {result.output}" + + dbmodule, dbversion, dbflavor = get_database_info(conninfo) + assert dbmodule == "storage.proxies.masking" + assert dbversion == MaskingAdmin.current_version + assert dbflavor is None + + @pytest.fixture def masking_admin_config(masking_db_postgresql): return {"masking_admin": {"masking_db": masking_db_postgresql.info.dsn}} diff --git a/swh/storage/tests/masking/test_db.py b/swh/storage/tests/masking/test_db.py index 94e31834b..5cab2f4ae 100644 --- a/swh/storage/tests/masking/test_db.py +++ b/swh/storage/tests/masking/test_db.py @@ -8,6 +8,7 @@ import uuid import pytest +from swh.core.db.db_utils import get_database_info from swh.storage.proxies.masking.db import ( DuplicateRequest, MaskedState, @@ -19,6 +20,13 @@ from swh.storage.proxies.masking.db import ( from swh.storage.tests.storage_data import StorageData +def test_db_version(masking_admin: MaskingAdmin): + dbmodule, dbversion, dbflavor = get_database_info(masking_admin.conn.dsn) + assert dbmodule == "storage.proxies.masking" + assert dbversion == MaskingAdmin.current_version + assert dbflavor is None + + def test_create_find_request(masking_admin: MaskingAdmin): created = masking_admin.create_request(slug="foo", reason="bar") -- GitLab