From 09cecc5a69fdb85e23ce53772c8a82daac26dac9 Mon Sep 17 00:00:00 2001 From: "Antoine R. Dumont (@ardumont)" <antoine.romain.dumont@gmail.com> Date: Tue, 25 Feb 2020 16:16:38 +0100 Subject: [PATCH 1/2] pgstorage: Empty temp tables instead of dropping them Similarly to what we do in the swh.storage. Related to D2580 --- sql/upgrades/129.sql | 119 ++++++++++++++++++++++++++++++ swh/indexer/sql/30-swh-schema.sql | 2 +- swh/indexer/sql/40-swh-func.sql | 38 +++++----- 3 files changed, 139 insertions(+), 20 deletions(-) create mode 100644 sql/upgrades/129.sql diff --git a/sql/upgrades/129.sql b/sql/upgrades/129.sql new file mode 100644 index 00000000..d0e4ab0d --- /dev/null +++ b/sql/upgrades/129.sql @@ -0,0 +1,119 @@ +-- SWH Indexer DB schema upgrade +-- from_version: 128 +-- to_version: 129 +-- description: + +insert into dbversion(version, release, description) +values(129, now(), 'Work In Progress'); + +create or replace function swh_mktemp(tblname regclass) + returns void + language plpgsql +as $$ +begin + execute format(' + create temporary table if not exists tmp_%1$I + (like %1$I including defaults) + on commit delete rows; + alter table tmp_%1$I drop column if exists object_id; + ', tblname); + return; +end +$$; + +-- create a temporary table for content_mimetype tmp_content_mimetype, +create or replace function swh_mktemp_content_mimetype() + returns void + language sql +as $$ + create temporary table if not exists tmp_content_mimetype ( + like content_mimetype including defaults + ) on commit delete rows; +$$; + +-- create a temporary table for retrieving content_language +create or replace function swh_mktemp_content_language() + returns void + language sql +as $$ + create temporary table if not exists tmp_content_language ( + like content_language including defaults + ) on commit delete rows; +$$; + +comment on function swh_mktemp_content_language() is 'Helper table to add content language'; + + +-- create a temporary table for content_ctags tmp_content_ctags, +create or replace function swh_mktemp_content_ctags() + returns void + language sql +as $$ + create temporary table if not exists tmp_content_ctags ( + like content_ctags including defaults + ) on commit delete rows; +$$; + +comment on function swh_mktemp_content_ctags() is 'Helper table to add content ctags'; + +-- create a temporary table for content_fossology_license tmp_content_fossology_license, +create or replace function swh_mktemp_content_fossology_license() + returns void + language sql +as $$ + create temporary table if not exists tmp_content_fossology_license ( + id sha1, + license text, + indexer_configuration_id integer + ) on commit delete rows; +$$; + +comment on function swh_mktemp_content_fossology_license() is 'Helper table to add content license'; + + +-- create a temporary table for retrieving content_metadata +create or replace function swh_mktemp_content_metadata() + returns void + language sql +as $$ + create temporary table if not exists tmp_content_metadata ( + like content_metadata including defaults + ) on commit delete rows; +$$; + +comment on function swh_mktemp_content_metadata() is 'Helper table to add content metadata'; + + +-- create a temporary table for retrieving revision_intrinsic_metadata +create or replace function swh_mktemp_revision_intrinsic_metadata() + returns void + language sql +as $$ + create temporary table if not exists tmp_revision_intrinsic_metadata ( + like revision_intrinsic_metadata including defaults + ) on commit delete rows; +$$; + +comment on function swh_mktemp_revision_intrinsic_metadata() is 'Helper table to add revision intrinsic metadata'; + +-- create a temporary table for retrieving origin_intrinsic_metadata +create or replace function swh_mktemp_origin_intrinsic_metadata() + returns void + language sql +as $$ + create temporary table if not exists tmp_origin_intrinsic_metadata ( + like origin_intrinsic_metadata including defaults + ) on commit delete rows; +$$; + +comment on function swh_mktemp_origin_intrinsic_metadata() is 'Helper table to add origin intrinsic metadata'; + +create or replace function swh_mktemp_indexer_configuration() + returns void + language sql +as $$ + create temporary table if not exists tmp_indexer_configuration ( + like indexer_configuration including defaults + ) on commit delete rows; + alter table tmp_indexer_configuration drop column if exists id; +$$; diff --git a/swh/indexer/sql/30-swh-schema.sql b/swh/indexer/sql/30-swh-schema.sql index 78ba0a17..a3205c9e 100644 --- a/swh/indexer/sql/30-swh-schema.sql +++ b/swh/indexer/sql/30-swh-schema.sql @@ -14,7 +14,7 @@ create table dbversion ); insert into dbversion(version, release, description) - values(128, now(), 'Work In Progress'); + values(129, now(), 'Work In Progress'); -- Computing metadata on sha1's contents -- a SHA1 checksum (not necessarily originating from Git) diff --git a/swh/indexer/sql/40-swh-func.sql b/swh/indexer/sql/40-swh-func.sql index d2c62b77..e358312a 100644 --- a/swh/indexer/sql/40-swh-func.sql +++ b/swh/indexer/sql/40-swh-func.sql @@ -19,9 +19,9 @@ create or replace function swh_mktemp(tblname regclass) as $$ begin execute format(' - create temporary table tmp_%1$I + create temporary table if not exists tmp_%1$I (like %1$I including defaults) - on commit drop; + on commit delete rows; alter table tmp_%1$I drop column if exists object_id; ', tblname); return; @@ -33,9 +33,9 @@ create or replace function swh_mktemp_content_mimetype() returns void language sql as $$ - create temporary table tmp_content_mimetype ( + create temporary table if not exists tmp_content_mimetype ( like content_mimetype including defaults - ) on commit drop; + ) on commit delete rows; $$; comment on function swh_mktemp_content_mimetype() IS 'Helper table to add mimetype information'; @@ -114,9 +114,9 @@ create or replace function swh_mktemp_content_language() returns void language sql as $$ - create temporary table tmp_content_language ( + create temporary table if not exists tmp_content_language ( like content_language including defaults - ) on commit drop; + ) on commit delete rows; $$; comment on function swh_mktemp_content_language() is 'Helper table to add content language'; @@ -127,9 +127,9 @@ create or replace function swh_mktemp_content_ctags() returns void language sql as $$ - create temporary table tmp_content_ctags ( + create temporary table if not exists tmp_content_ctags ( like content_ctags including defaults - ) on commit drop; + ) on commit delete rows; $$; comment on function swh_mktemp_content_ctags() is 'Helper table to add content ctags'; @@ -202,11 +202,11 @@ create or replace function swh_mktemp_content_fossology_license() returns void language sql as $$ - create temporary table tmp_content_fossology_license ( + create temporary table if not exists tmp_content_fossology_license ( id sha1, license text, indexer_configuration_id integer - ) on commit drop; + ) on commit delete rows; $$; comment on function swh_mktemp_content_fossology_license() is 'Helper table to add content license'; @@ -291,9 +291,9 @@ create or replace function swh_mktemp_content_metadata() returns void language sql as $$ - create temporary table tmp_content_metadata ( + create temporary table if not exists tmp_content_metadata ( like content_metadata including defaults - ) on commit drop; + ) on commit delete rows; $$; comment on function swh_mktemp_content_metadata() is 'Helper table to add content metadata'; @@ -342,9 +342,9 @@ create or replace function swh_mktemp_revision_intrinsic_metadata() returns void language sql as $$ - create temporary table tmp_revision_intrinsic_metadata ( + create temporary table if not exists tmp_revision_intrinsic_metadata ( like revision_intrinsic_metadata including defaults - ) on commit drop; + ) on commit delete rows; $$; comment on function swh_mktemp_revision_intrinsic_metadata() is 'Helper table to add revision intrinsic metadata'; @@ -354,9 +354,9 @@ create or replace function swh_mktemp_origin_intrinsic_metadata() returns void language sql as $$ - create temporary table tmp_origin_intrinsic_metadata ( + create temporary table if not exists tmp_origin_intrinsic_metadata ( like origin_intrinsic_metadata including defaults - ) on commit drop; + ) on commit delete rows; $$; comment on function swh_mktemp_origin_intrinsic_metadata() is 'Helper table to add origin intrinsic metadata'; @@ -365,10 +365,10 @@ create or replace function swh_mktemp_indexer_configuration() returns void language sql as $$ - create temporary table tmp_indexer_configuration ( + create temporary table if not exists tmp_indexer_configuration ( like indexer_configuration including defaults - ) on commit drop; - alter table tmp_indexer_configuration drop column id; + ) on commit delete rows; + alter table tmp_indexer_configuration drop column if exists id; $$; -- GitLab From 267cbc79d0566855207a5e20ab40d11df3a0f5b4 Mon Sep 17 00:00:00 2001 From: "Antoine R. Dumont (@ardumont)" <antoine.romain.dumont@gmail.com> Date: Tue, 25 Feb 2020 16:59:45 +0100 Subject: [PATCH 2/2] fossology_license: Improve add query endpoint This also clarifies the intent. --- sql/upgrades/130.sql | 42 +++++++++++++++++++++++++++++++ swh/indexer/sql/30-swh-schema.sql | 2 +- swh/indexer/sql/40-swh-func.sql | 15 +++++------ 3 files changed, 51 insertions(+), 8 deletions(-) create mode 100644 sql/upgrades/130.sql diff --git a/sql/upgrades/130.sql b/sql/upgrades/130.sql new file mode 100644 index 00000000..efea5540 --- /dev/null +++ b/sql/upgrades/130.sql @@ -0,0 +1,42 @@ +-- SWH Indexer DB schema upgrade +-- from_version: 129 +-- to_version: 130 +-- description: + +insert into dbversion(version, release, description) +values(130, now(), 'Work In Progress'); + +create or replace function swh_content_fossology_license_add(conflict_update boolean) + returns void + language plpgsql +as $$ +begin + -- insert unknown licenses first + insert into fossology_license (name) + select distinct license from tmp_content_fossology_license tmp + where not exists (select 1 from fossology_license where name=tmp.license) + on conflict(name) do nothing; + + if conflict_update then + insert into content_fossology_license (id, license_id, indexer_configuration_id) + select tcl.id, + (select id from fossology_license where name = tcl.license) as license, + indexer_configuration_id + from tmp_content_fossology_license tcl + on conflict(id, license_id, indexer_configuration_id) + do update set license_id = excluded.license_id; + return; + end if; + + insert into content_fossology_license (id, license_id, indexer_configuration_id) + select tcl.id, + (select id from fossology_license where name = tcl.license) as license, + indexer_configuration_id + from tmp_content_fossology_license tcl + on conflict(id, license_id, indexer_configuration_id) + do nothing; + return; +end +$$; + +comment on function swh_content_fossology_license_add(boolean) IS 'Add new content licenses'; diff --git a/swh/indexer/sql/30-swh-schema.sql b/swh/indexer/sql/30-swh-schema.sql index a3205c9e..eec0f3b8 100644 --- a/swh/indexer/sql/30-swh-schema.sql +++ b/swh/indexer/sql/30-swh-schema.sql @@ -14,7 +14,7 @@ create table dbversion ); insert into dbversion(version, release, description) - values(129, now(), 'Work In Progress'); + values(130, now(), 'Work In Progress'); -- Computing metadata on sha1's contents -- a SHA1 checksum (not necessarily originating from Git) diff --git a/swh/indexer/sql/40-swh-func.sql b/swh/indexer/sql/40-swh-func.sql index e358312a..1b325ecb 100644 --- a/swh/indexer/sql/40-swh-func.sql +++ b/swh/indexer/sql/40-swh-func.sql @@ -228,13 +228,14 @@ begin on conflict(name) do nothing; if conflict_update then - -- delete from content_fossology_license c - -- using tmp_content_fossology_license tmp, indexer_configuration i - -- where c.id = tmp.id and i.id=tmp.indexer_configuration_id - delete from content_fossology_license - where id in (select tmp.id - from tmp_content_fossology_license tmp - inner join indexer_configuration i on i.id=tmp.indexer_configuration_id); + insert into content_fossology_license (id, license_id, indexer_configuration_id) + select tcl.id, + (select id from fossology_license where name = tcl.license) as license, + indexer_configuration_id + from tmp_content_fossology_license tcl + on conflict(id, license_id, indexer_configuration_id) + do update set license_id = excluded.license_id; + return; end if; insert into content_fossology_license (id, license_id, indexer_configuration_id) -- GitLab