diff --git a/swh/storage/postgresql/storage.py b/swh/storage/postgresql/storage.py index 362c242b87b6b4dda9382b7a64f3e381dda2ee8c..15a746e7a108a4961d45459deb2068ac5add49ad 100644 --- a/swh/storage/postgresql/storage.py +++ b/swh/storage/postgresql/storage.py @@ -216,7 +216,7 @@ def _get_paginated_sha1_partition( class Storage: """SWH storage datastore proxy, encompassing DB and object storage""" - current_version: int = 192 + current_version: int = 193 def __init__( self, diff --git a/swh/storage/proxies/masking/__init__.py b/swh/storage/proxies/masking/__init__.py index 7ee106be1f1ed861743bc79ecdbfa4e1ab34baf7..b148328e6e1bd8e814a2bfd57406c373c0d54121 100644 --- a/swh/storage/proxies/masking/__init__.py +++ b/swh/storage/proxies/masking/__init__.py @@ -25,6 +25,13 @@ from .db import MaskingQuery MASKING_OVERHEAD_METRIC = "swh_storage_masking_overhead_seconds" +def get_datastore(cls, db): + assert cls == "postgresql" + from .db import MaskingAdmin + + return MaskingAdmin.connect(db) + + def masking_overhead_timer(method_name: str) -> DifferentialTimer: """Return a properly setup DifferentialTimer for ``method_name`` of the storage""" return DifferentialTimer(MASKING_OVERHEAD_METRIC, tags={"endpoint": method_name}) diff --git a/swh/storage/proxies/masking/db.py b/swh/storage/proxies/masking/db.py index 22c37acf4dc1dd0d1a20df58e94ae5b7a7a04c39..3718b9c0ac739351898b3d3d7e04e07834d548ca 100644 --- a/swh/storage/proxies/masking/db.py +++ b/swh/storage/proxies/masking/db.py @@ -86,6 +86,9 @@ class MaskedObject: class MaskingDb(BaseDb): + # we started with 192, because this used to be part of the main storage db + current_version = 193 + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) diff --git a/swh/storage/proxies/masking/sql/10-superuser-init.sql b/swh/storage/proxies/masking/sql/10-superuser-init.sql new file mode 100644 index 0000000000000000000000000000000000000000..d7b82ed3bf7b96b4d3ae22b07898b81a55909100 --- /dev/null +++ b/swh/storage/proxies/masking/sql/10-superuser-init.sql @@ -0,0 +1 @@ +create extension if not exists "uuid-ossp"; -- for masking proxy diff --git a/swh/storage/proxies/masking/sql/20-types.sql b/swh/storage/proxies/masking/sql/20-types.sql new file mode 100644 index 0000000000000000000000000000000000000000..95443dbf2fbb7b0ff5f943356bb58f7a296ddeb0 --- /dev/null +++ b/swh/storage/proxies/masking/sql/20-types.sql @@ -0,0 +1,2 @@ +create type extended_object_type as enum ('content', 'directory', 'revision', 'release', 'snapshot', 'origin', 'raw_extrinsic_metadata'); +comment on type extended_object_type is 'Data object types stored in data model'; diff --git a/swh/storage/sql/80-masked-objects.sql b/swh/storage/proxies/masking/sql/30-schema.sql similarity index 78% rename from swh/storage/sql/80-masked-objects.sql rename to swh/storage/proxies/masking/sql/30-schema.sql index ef77d2c2df6c60c5c80ffefc0392fadcc443e36c..373049d231ba2b5466b455061616b66f2c04b53d 100644 --- a/swh/storage/sql/80-masked-objects.sql +++ b/swh/storage/proxies/masking/sql/30-schema.sql @@ -1,7 +1,3 @@ -select swh_get_dbflavor() = 'only_masking' as dbflavor_only_masking \gset - --- This skips this whole file unless the dbflavor is `only_masking` -\if :dbflavor_only_masking create type masked_state as enum ('visible', 'decision_pending', 'restricted'); comment on type masked_state is 'The degree to which an object is masked'; @@ -13,8 +9,6 @@ create table if not exists masking_request ( reason text not null ); -create unique index if not exists masking_request_slug_idx on masking_request using btree(slug); - comment on table masking_request is 'A recorded request for masking certain objects'; comment on column masking_request.id is 'Opaque id of the request'; comment on column masking_request.slug is 'Human-readable id of the request'; @@ -47,10 +41,3 @@ comment on column masked_object.object_id is 'The object_id part of the object'' comment on column masked_object.object_type is 'The object_type part of the object''s SWHID'; comment on column masked_object.request is 'Reference to the affecting request'; comment on column masked_object.state is 'The degree to which the object is masked as a result of the request'; - - -create index if not exists masked_object_request_idx on masked_object using btree(request, object_type, object_id); -comment on index masked_object_request_idx is 'Allow listing all the objects associated by request, ordered by SWHID'; - --- :dbflavor_only_masking -\endif diff --git a/swh/storage/proxies/masking/sql/60-indexes.sql b/swh/storage/proxies/masking/sql/60-indexes.sql new file mode 100644 index 0000000000000000000000000000000000000000..6f41cba7f4d62325c9ab945709af14c8fc552cfa --- /dev/null +++ b/swh/storage/proxies/masking/sql/60-indexes.sql @@ -0,0 +1,5 @@ + +create unique index if not exists masking_request_slug_idx on masking_request using btree(slug); + +create index if not exists masked_object_request_idx on masked_object using btree(request, object_type, object_id); +comment on index masked_object_request_idx is 'Allow listing all the objects associated by request, ordered by SWHID'; diff --git a/swh/storage/proxies/masking/sql/upgrades/193.sql b/swh/storage/proxies/masking/sql/upgrades/193.sql new file mode 100644 index 0000000000000000000000000000000000000000..2e442ba4661280a17a5c5d0facb19dcb53eae1b0 --- /dev/null +++ b/swh/storage/proxies/masking/sql/upgrades/193.sql @@ -0,0 +1,10 @@ +-- +-- SWH Masking Proxy DB schema upgrade +-- from_version: 192 +-- to_version: 193 +-- description: Actually creates a dedicated DB for the masking proxy +-- This does nothing but dropping the flavor table and type + +drop function if exists swh_get_dbflavor; +drop table if exists dbflavor; +drop type if exists database_flavor; diff --git a/swh/storage/sql/10-superuser-init.sql b/swh/storage/sql/10-superuser-init.sql index e5e2cacb5f38f2658db47f43d5f01bbbabbd7217..97b3817cf50ec2362568fb8b9ed1f7445e130af6 100644 --- a/swh/storage/sql/10-superuser-init.sql +++ b/swh/storage/sql/10-superuser-init.sql @@ -3,7 +3,6 @@ create extension if not exists btree_gist; create extension if not exists pgcrypto; create extension if not exists pg_trgm; -create extension if not exists "uuid-ossp"; -- for masking proxy -- courtesy of Andreas 'ads' Scherbaum in -- https://andreas.scherbaum.la/blog/archives/346-create-language-if-not-exist.html diff --git a/swh/storage/sql/15-flavor.sql b/swh/storage/sql/15-flavor.sql index 4610a7c9e5e9b1d252d9f8604f200e0baad8ec9a..c4d59b9baa5279700be3812b654b4bad16711d83 100644 --- a/swh/storage/sql/15-flavor.sql +++ b/swh/storage/sql/15-flavor.sql @@ -2,8 +2,7 @@ create type database_flavor as enum ( 'default', -- default: full index availability for deduplication and read queries 'mirror', -- mirror: reduced indexes to allow for out of order insertions - 'read_replica', -- read replica: minimal indexes to allow read queries - 'only_masking' -- only masking: only deploy enough schema for the masking proxy + 'read_replica' -- read replica: minimal indexes to allow read queries ); comment on type database_flavor is 'Flavor of the current database'; diff --git a/swh/storage/sql/20-enums.sql b/swh/storage/sql/20-enums.sql index acb4221e4004afdb082f841bdeecb477efbcbf88..4c153e30d037b26ba978572c670f7ce016ea50dd 100644 --- a/swh/storage/sql/20-enums.sql +++ b/swh/storage/sql/20-enums.sql @@ -1,10 +1,6 @@ --- --- Software Heritage Data Types --- -select swh_get_dbflavor() != 'only_masking' as dbflavor_not_only_masking \gset - --- When dbflavor is `only_masking`, skip all types except for extended_object_type -\if :dbflavor_not_only_masking create type content_status as enum ('absent', 'visible', 'hidden'); comment on type content_status is 'Content visibility'; @@ -28,8 +24,5 @@ create type origin_visit_state as enum ( ); comment on type origin_visit_state IS 'Possible origin visit status values'; --- :dbflavor_not_only_masking -\endif - create type extended_object_type as enum ('content', 'directory', 'revision', 'release', 'snapshot', 'origin', 'raw_extrinsic_metadata'); comment on type extended_object_type is 'Data object types stored in data model'; diff --git a/swh/storage/sql/30-schema.sql b/swh/storage/sql/30-schema.sql index c963dda9c867880322b7f55ab925d22b6a8a584b..9259f13d9c712274e746b5eedeac763e5d33461b 100644 --- a/swh/storage/sql/30-schema.sql +++ b/swh/storage/sql/30-schema.sql @@ -1,8 +1,3 @@ -select swh_get_dbflavor() != 'only_masking' as dbflavor_not_only_masking \gset - --- This skips this whole file if the dbflavor is `only_masking` -\if :dbflavor_not_only_masking - --- --- SQL implementation of the Software Heritage data model --- @@ -537,6 +532,3 @@ comment on column object_references.source_type is 'Object type for the source o comment on column object_references.source is 'Object id for the source of the edge'; comment on column object_references.target_type is 'Object type for the target of the edge'; comment on column object_references.target is 'Object id for the target of the edge'; - --- :dbflavor_not_only_masking -\endif diff --git a/swh/storage/sql/40-funcs.sql b/swh/storage/sql/40-funcs.sql index b2db899237bce71096b2f908693a5ca0a33d1aac..803d237354e4dcdb792976410f341167a9cc5610 100644 --- a/swh/storage/sql/40-funcs.sql +++ b/swh/storage/sql/40-funcs.sql @@ -1,8 +1,3 @@ -select swh_get_dbflavor() != 'only_masking' as dbflavor_not_only_masking \gset - --- This skips this whole file if the dbflavor is `only_masking` -\if :dbflavor_not_only_masking - create or replace function hash_sha1(text) returns text as $$ @@ -1052,6 +1047,3 @@ create trigger update_counts_from_bucketed for each row when (NEW.line % 256 = 0) execute procedure swh_update_counters_from_buckets(); - --- :dbflavor_not_only_masking -\endif diff --git a/swh/storage/sql/60-indexes.sql b/swh/storage/sql/60-indexes.sql index 6f326250f1b9bbc8c6bad062e7029951558798b0..c26c40d77ccc7ae06db9cae815c7b5bd20b4a367 100644 --- a/swh/storage/sql/60-indexes.sql +++ b/swh/storage/sql/60-indexes.sql @@ -4,10 +4,6 @@ select swh_get_dbflavor() = 'read_replica' as dbflavor_read_replica \gset select swh_get_dbflavor() != 'read_replica' as dbflavor_does_deduplication \gset select swh_get_dbflavor() = 'mirror' as dbflavor_mirror \gset select swh_get_dbflavor() = 'default' as dbflavor_default \gset -select swh_get_dbflavor() != 'only_masking' as dbflavor_not_only_masking \gset - --- This skips this whole file if the dbflavor is `only_masking` -\if :dbflavor_not_only_masking -- content @@ -325,6 +321,3 @@ alter table object_counts_bucketed add primary key using index object_counts_buc -- used to query by (extid_type, extid) + to deduplicate the whole row create unique index concurrently on extid(extid_type, extid, extid_version, target_type, target); create index concurrently on extid(target_type, target); - --- :dbflavor_not_only_masking -\endif diff --git a/swh/storage/sql/upgrades/193.sql b/swh/storage/sql/upgrades/193.sql new file mode 100644 index 0000000000000000000000000000000000000000..47edc32ebe896248e5f4f32d822171fe34f48e26 --- /dev/null +++ b/swh/storage/sql/upgrades/193.sql @@ -0,0 +1,46 @@ +-- SWH DB schema upgrade +-- from_version: 192 +-- to_version: 193 +-- description: Remove the only_masking db flavor +-- This will fail if the db actually uses the only_masking flavor + +-- cannot remove a value from a enum, so we have to recreate it + + +-- (re)create the database flavor type +create type database_flavor_new as enum ( + 'default', -- default: full index availability for deduplication and read queries + 'mirror', -- mirror: reduced indexes to allow for out of order insertions + 'read_replica' -- read replica: minimal indexes to allow read queries +); +comment on type database_flavor_new is 'Flavor of the current database'; +-- and the flavor database +create table dbflavor_new ( + flavor database_flavor_new, + single_row char(1) primary key default 'x', + check (single_row = 'x') +); +comment on table dbflavor_new is 'Database flavor storage'; +comment on column dbflavor_new.flavor is 'Database flavor currently deployed'; +comment on column dbflavor_new.single_row is 'Bogus column to force the table to have a single row'; + +-- fill dbflavor_new from dbflavor + +insert into dbflavor_new select cast(flavor::text AS database_flavor_new) from dbflavor; + +drop function if exists swh_get_dbflavor; + + +-- then drop old versions of the flavor table and type +drop table dbflavor; +drop type database_flavor; + +-- move flavor stuff to alt names +alter type database_flavor_new rename to database_flavor; +alter table dbflavor_new rename to dbflavor; + +create or replace function swh_get_dbflavor() returns database_flavor language sql stable as $$ + select coalesce((select flavor from dbflavor), 'default'); +$$; + +comment on function swh_get_dbflavor is 'Get the flavor of the database currently deployed'; diff --git a/swh/storage/tests/masking/conftest.py b/swh/storage/tests/masking/conftest.py index 46d795b6e25faae29b1c180b917c28a61251648b..6bddd9e53deaf924855f8c600418e49398c5b240 100644 --- a/swh/storage/tests/masking/conftest.py +++ b/swh/storage/tests/masking/conftest.py @@ -9,16 +9,14 @@ import pytest from pytest_postgresql import factories from swh.core.db.db_utils import initialize_database_for_module -from swh.storage.postgresql.storage import Storage as StorageDatastore from swh.storage.proxies.masking.db import MaskingAdmin, MaskingQuery masking_db_postgresql_proc = factories.postgresql_proc( load=[ partial( initialize_database_for_module, - modname="storage", - flavor="only_masking", - version=StorageDatastore.current_version, + modname="storage.proxies.masking", + version=MaskingAdmin.current_version, ), ], ) diff --git a/swh/storage/tests/masking/test_cli.py b/swh/storage/tests/masking/test_cli.py index eab5469c6f08e36c84413a408d5ab7df3e36208b..f02c98369262b832cd5d833d18c65de608d1b016 100644 --- a/swh/storage/tests/masking/test_cli.py +++ b/swh/storage/tests/masking/test_cli.py @@ -12,6 +12,8 @@ import textwrap from click.testing import CliRunner import pytest +from swh.core.cli.db import db as swhdb +from swh.core.db.db_utils import get_database_info from swh.model.swhids import ExtendedSWHID, ValidationError from ...proxies.masking.cli import ( @@ -37,6 +39,31 @@ from ...proxies.masking.db import ( ) +def test_cli_db_create(postgresql): + """Create a db then initializing it should be ok""" + module_name = "storage.proxies.masking" + + db_params = postgresql.info + dbname = "masking-db" + conninfo = ( + f"postgresql://{db_params.user}@{db_params.host}:{db_params.port}/{dbname}" + ) + + # This creates the db and installs the necessary admin extensions + result = CliRunner().invoke(swhdb, ["create", module_name, "--dbname", conninfo]) + assert result.exit_code == 0, f"Unexpected output: {result.output}" + + # This initializes the schema and data + result = CliRunner().invoke(swhdb, ["init", module_name, "--dbname", conninfo]) + + assert result.exit_code == 0, f"Unexpected output: {result.output}" + + dbmodule, dbversion, dbflavor = get_database_info(conninfo) + assert dbmodule == "storage.proxies.masking" + assert dbversion == MaskingAdmin.current_version + assert dbflavor is None + + @pytest.fixture def masking_admin_config(masking_db_postgresql): return {"masking_admin": {"masking_db": masking_db_postgresql.info.dsn}} diff --git a/swh/storage/tests/masking/test_db.py b/swh/storage/tests/masking/test_db.py index 94e31834bd0a8472576d66b96b6e5f2137511692..5cab2f4aeaadeebc7f0daaf4aaf02abcd9681add 100644 --- a/swh/storage/tests/masking/test_db.py +++ b/swh/storage/tests/masking/test_db.py @@ -8,6 +8,7 @@ import uuid import pytest +from swh.core.db.db_utils import get_database_info from swh.storage.proxies.masking.db import ( DuplicateRequest, MaskedState, @@ -19,6 +20,13 @@ from swh.storage.proxies.masking.db import ( from swh.storage.tests.storage_data import StorageData +def test_db_version(masking_admin: MaskingAdmin): + dbmodule, dbversion, dbflavor = get_database_info(masking_admin.conn.dsn) + assert dbmodule == "storage.proxies.masking" + assert dbversion == MaskingAdmin.current_version + assert dbflavor is None + + def test_create_find_request(masking_admin: MaskingAdmin): created = masking_admin.create_request(slug="foo", reason="bar")