From 09cecc5a69fdb85e23ce53772c8a82daac26dac9 Mon Sep 17 00:00:00 2001
From: "Antoine R. Dumont (@ardumont)" <antoine.romain.dumont@gmail.com>
Date: Tue, 25 Feb 2020 16:16:38 +0100
Subject: [PATCH 1/2] pgstorage: Empty temp tables instead of dropping them

Similarly to what we do in the swh.storage.

Related to D2580
---
 sql/upgrades/129.sql              | 119 ++++++++++++++++++++++++++++++
 swh/indexer/sql/30-swh-schema.sql |   2 +-
 swh/indexer/sql/40-swh-func.sql   |  38 +++++-----
 3 files changed, 139 insertions(+), 20 deletions(-)
 create mode 100644 sql/upgrades/129.sql

diff --git a/sql/upgrades/129.sql b/sql/upgrades/129.sql
new file mode 100644
index 00000000..d0e4ab0d
--- /dev/null
+++ b/sql/upgrades/129.sql
@@ -0,0 +1,119 @@
+-- SWH Indexer DB schema upgrade
+-- from_version: 128
+-- to_version: 129
+-- description:
+
+insert into dbversion(version, release, description)
+values(129, now(), 'Work In Progress');
+
+create or replace function swh_mktemp(tblname regclass)
+    returns void
+    language plpgsql
+as $$
+begin
+    execute format('
+	create temporary table if not exists tmp_%1$I
+	    (like %1$I including defaults)
+	    on commit delete rows;
+      alter table tmp_%1$I drop column if exists object_id;
+	', tblname);
+    return;
+end
+$$;
+
+-- create a temporary table for content_mimetype tmp_content_mimetype,
+create or replace function swh_mktemp_content_mimetype()
+    returns void
+    language sql
+as $$
+  create temporary table if not exists tmp_content_mimetype (
+    like content_mimetype including defaults
+  ) on commit delete rows;
+$$;
+
+-- create a temporary table for retrieving content_language
+create or replace function swh_mktemp_content_language()
+    returns void
+    language sql
+as $$
+  create temporary table if not exists tmp_content_language (
+    like content_language including defaults
+  ) on commit delete rows;
+$$;
+
+comment on function swh_mktemp_content_language() is 'Helper table to add content language';
+
+
+-- create a temporary table for content_ctags tmp_content_ctags,
+create or replace function swh_mktemp_content_ctags()
+    returns void
+    language sql
+as $$
+  create temporary table if not exists tmp_content_ctags (
+    like content_ctags including defaults
+  ) on commit delete rows;
+$$;
+
+comment on function swh_mktemp_content_ctags() is 'Helper table to add content ctags';
+
+-- create a temporary table for content_fossology_license tmp_content_fossology_license,
+create or replace function swh_mktemp_content_fossology_license()
+    returns void
+    language sql
+as $$
+  create temporary table if not exists tmp_content_fossology_license (
+    id                       sha1,
+    license                  text,
+    indexer_configuration_id integer
+  ) on commit delete rows;
+$$;
+
+comment on function swh_mktemp_content_fossology_license() is 'Helper table to add content license';
+
+
+-- create a temporary table for retrieving content_metadata
+create or replace function swh_mktemp_content_metadata()
+    returns void
+    language sql
+as $$
+  create temporary table if not exists tmp_content_metadata (
+    like content_metadata including defaults
+  ) on commit delete rows;
+$$;
+
+comment on function swh_mktemp_content_metadata() is 'Helper table to add content metadata';
+
+
+-- create a temporary table for retrieving revision_intrinsic_metadata
+create or replace function swh_mktemp_revision_intrinsic_metadata()
+    returns void
+    language sql
+as $$
+  create temporary table if not exists tmp_revision_intrinsic_metadata (
+    like revision_intrinsic_metadata including defaults
+  ) on commit delete rows;
+$$;
+
+comment on function swh_mktemp_revision_intrinsic_metadata() is 'Helper table to add revision intrinsic metadata';
+
+-- create a temporary table for retrieving origin_intrinsic_metadata
+create or replace function swh_mktemp_origin_intrinsic_metadata()
+    returns void
+    language sql
+as $$
+  create temporary table if not exists tmp_origin_intrinsic_metadata (
+    like origin_intrinsic_metadata including defaults
+  ) on commit delete rows;
+$$;
+
+comment on function swh_mktemp_origin_intrinsic_metadata() is 'Helper table to add origin intrinsic metadata';
+
+create or replace function swh_mktemp_indexer_configuration()
+    returns void
+    language sql
+as $$
+    create temporary table if not exists tmp_indexer_configuration (
+      like indexer_configuration including defaults
+    ) on commit delete rows;
+    alter table tmp_indexer_configuration drop column if exists id;
+$$;
diff --git a/swh/indexer/sql/30-swh-schema.sql b/swh/indexer/sql/30-swh-schema.sql
index 78ba0a17..a3205c9e 100644
--- a/swh/indexer/sql/30-swh-schema.sql
+++ b/swh/indexer/sql/30-swh-schema.sql
@@ -14,7 +14,7 @@ create table dbversion
 );
 
 insert into dbversion(version, release, description)
-      values(128, now(), 'Work In Progress');
+      values(129, now(), 'Work In Progress');
 -- Computing metadata on sha1's contents
 
 -- a SHA1 checksum (not necessarily originating from Git)
diff --git a/swh/indexer/sql/40-swh-func.sql b/swh/indexer/sql/40-swh-func.sql
index d2c62b77..e358312a 100644
--- a/swh/indexer/sql/40-swh-func.sql
+++ b/swh/indexer/sql/40-swh-func.sql
@@ -19,9 +19,9 @@ create or replace function swh_mktemp(tblname regclass)
 as $$
 begin
     execute format('
-	create temporary table tmp_%1$I
+	create temporary table if not exists tmp_%1$I
 	    (like %1$I including defaults)
-	    on commit drop;
+	    on commit delete rows;
       alter table tmp_%1$I drop column if exists object_id;
 	', tblname);
     return;
@@ -33,9 +33,9 @@ create or replace function swh_mktemp_content_mimetype()
     returns void
     language sql
 as $$
-  create temporary table tmp_content_mimetype (
+  create temporary table if not exists tmp_content_mimetype (
     like content_mimetype including defaults
-  ) on commit drop;
+  ) on commit delete rows;
 $$;
 
 comment on function swh_mktemp_content_mimetype() IS 'Helper table to add mimetype information';
@@ -114,9 +114,9 @@ create or replace function swh_mktemp_content_language()
     returns void
     language sql
 as $$
-  create temporary table tmp_content_language (
+  create temporary table if not exists tmp_content_language (
     like content_language including defaults
-  ) on commit drop;
+  ) on commit delete rows;
 $$;
 
 comment on function swh_mktemp_content_language() is 'Helper table to add content language';
@@ -127,9 +127,9 @@ create or replace function swh_mktemp_content_ctags()
     returns void
     language sql
 as $$
-  create temporary table tmp_content_ctags (
+  create temporary table if not exists tmp_content_ctags (
     like content_ctags including defaults
-  ) on commit drop;
+  ) on commit delete rows;
 $$;
 
 comment on function swh_mktemp_content_ctags() is 'Helper table to add content ctags';
@@ -202,11 +202,11 @@ create or replace function swh_mktemp_content_fossology_license()
     returns void
     language sql
 as $$
-  create temporary table tmp_content_fossology_license (
+  create temporary table if not exists tmp_content_fossology_license (
     id                       sha1,
     license                  text,
     indexer_configuration_id integer
-  ) on commit drop;
+  ) on commit delete rows;
 $$;
 
 comment on function swh_mktemp_content_fossology_license() is 'Helper table to add content license';
@@ -291,9 +291,9 @@ create or replace function swh_mktemp_content_metadata()
     returns void
     language sql
 as $$
-  create temporary table tmp_content_metadata (
+  create temporary table if not exists tmp_content_metadata (
     like content_metadata including defaults
-  ) on commit drop;
+  ) on commit delete rows;
 $$;
 
 comment on function swh_mktemp_content_metadata() is 'Helper table to add content metadata';
@@ -342,9 +342,9 @@ create or replace function swh_mktemp_revision_intrinsic_metadata()
     returns void
     language sql
 as $$
-  create temporary table tmp_revision_intrinsic_metadata (
+  create temporary table if not exists tmp_revision_intrinsic_metadata (
     like revision_intrinsic_metadata including defaults
-  ) on commit drop;
+  ) on commit delete rows;
 $$;
 
 comment on function swh_mktemp_revision_intrinsic_metadata() is 'Helper table to add revision intrinsic metadata';
@@ -354,9 +354,9 @@ create or replace function swh_mktemp_origin_intrinsic_metadata()
     returns void
     language sql
 as $$
-  create temporary table tmp_origin_intrinsic_metadata (
+  create temporary table if not exists tmp_origin_intrinsic_metadata (
     like origin_intrinsic_metadata including defaults
-  ) on commit drop;
+  ) on commit delete rows;
 $$;
 
 comment on function swh_mktemp_origin_intrinsic_metadata() is 'Helper table to add origin intrinsic metadata';
@@ -365,10 +365,10 @@ create or replace function swh_mktemp_indexer_configuration()
     returns void
     language sql
 as $$
-    create temporary table tmp_indexer_configuration (
+    create temporary table if not exists tmp_indexer_configuration (
       like indexer_configuration including defaults
-    ) on commit drop;
-    alter table tmp_indexer_configuration drop column id;
+    ) on commit delete rows;
+    alter table tmp_indexer_configuration drop column if exists id;
 $$;
 
 
-- 
GitLab


From 267cbc79d0566855207a5e20ab40d11df3a0f5b4 Mon Sep 17 00:00:00 2001
From: "Antoine R. Dumont (@ardumont)" <antoine.romain.dumont@gmail.com>
Date: Tue, 25 Feb 2020 16:59:45 +0100
Subject: [PATCH 2/2] fossology_license: Improve add query endpoint

This also clarifies the intent.
---
 sql/upgrades/130.sql              | 42 +++++++++++++++++++++++++++++++
 swh/indexer/sql/30-swh-schema.sql |  2 +-
 swh/indexer/sql/40-swh-func.sql   | 15 +++++------
 3 files changed, 51 insertions(+), 8 deletions(-)
 create mode 100644 sql/upgrades/130.sql

diff --git a/sql/upgrades/130.sql b/sql/upgrades/130.sql
new file mode 100644
index 00000000..efea5540
--- /dev/null
+++ b/sql/upgrades/130.sql
@@ -0,0 +1,42 @@
+-- SWH Indexer DB schema upgrade
+-- from_version: 129
+-- to_version: 130
+-- description:
+
+insert into dbversion(version, release, description)
+values(130, now(), 'Work In Progress');
+
+create or replace function swh_content_fossology_license_add(conflict_update boolean)
+    returns void
+    language plpgsql
+as $$
+begin
+    -- insert unknown licenses first
+    insert into fossology_license (name)
+    select distinct license from tmp_content_fossology_license tmp
+    where not exists (select 1 from fossology_license where name=tmp.license)
+    on conflict(name) do nothing;
+
+    if conflict_update then
+        insert into content_fossology_license (id, license_id, indexer_configuration_id)
+        select tcl.id,
+              (select id from fossology_license where name = tcl.license) as license,
+              indexer_configuration_id
+        from tmp_content_fossology_license tcl
+            on conflict(id, license_id, indexer_configuration_id)
+            do update set license_id = excluded.license_id;
+        return;
+    end if;
+
+    insert into content_fossology_license (id, license_id, indexer_configuration_id)
+    select tcl.id,
+          (select id from fossology_license where name = tcl.license) as license,
+          indexer_configuration_id
+    from tmp_content_fossology_license tcl
+        on conflict(id, license_id, indexer_configuration_id)
+        do nothing;
+    return;
+end
+$$;
+
+comment on function swh_content_fossology_license_add(boolean) IS 'Add new content licenses';
diff --git a/swh/indexer/sql/30-swh-schema.sql b/swh/indexer/sql/30-swh-schema.sql
index a3205c9e..eec0f3b8 100644
--- a/swh/indexer/sql/30-swh-schema.sql
+++ b/swh/indexer/sql/30-swh-schema.sql
@@ -14,7 +14,7 @@ create table dbversion
 );
 
 insert into dbversion(version, release, description)
-      values(129, now(), 'Work In Progress');
+      values(130, now(), 'Work In Progress');
 -- Computing metadata on sha1's contents
 
 -- a SHA1 checksum (not necessarily originating from Git)
diff --git a/swh/indexer/sql/40-swh-func.sql b/swh/indexer/sql/40-swh-func.sql
index e358312a..1b325ecb 100644
--- a/swh/indexer/sql/40-swh-func.sql
+++ b/swh/indexer/sql/40-swh-func.sql
@@ -228,13 +228,14 @@ begin
     on conflict(name) do nothing;
 
     if conflict_update then
-        -- delete from content_fossology_license c
-        --   using tmp_content_fossology_license tmp, indexer_configuration i
-        --   where c.id = tmp.id and i.id=tmp.indexer_configuration_id
-        delete from content_fossology_license
-        where id in (select tmp.id
-                     from tmp_content_fossology_license tmp
-                     inner join indexer_configuration i on i.id=tmp.indexer_configuration_id);
+        insert into content_fossology_license (id, license_id, indexer_configuration_id)
+        select tcl.id,
+              (select id from fossology_license where name = tcl.license) as license,
+              indexer_configuration_id
+        from tmp_content_fossology_license tcl
+            on conflict(id, license_id, indexer_configuration_id)
+            do update set license_id = excluded.license_id;
+        return;
     end if;
 
     insert into content_fossology_license (id, license_id, indexer_configuration_id)
-- 
GitLab