Skip to content

sql: Clean up task/task_run data model

This archives current task and task_run tables, creating new ones filtering only necessary tasks (last 2 months' oneshot tasks plus some recurring tasks; lister, indexer, ...). Those filtered tasks are the ones scheduled by the runner and runner priority services.

This archiving will allow those services to be faster (corresponding query execution time will outputs results faster without the archived data).

Related to swh/meta$1243 (iteration tryouts) Related to swh/infra/sysadm-environment#3837 (closed)

Test Plan

Staging swh-scheduler db dump mounted on docker db:

time psql -e service=swh-scheduler-dev -c '\i sql/updates/33.sql'
Pager usage is off.
Null display is "(null)".
Timing is on.
Border style is 2.

Current Host Server Date Time : Wed 12 Jan 2022 10:58:53 AM CET

Administrative queries:

                        :settings       -- Server Settings
                        :conninfo       -- Server connections
                        :activity       -- Server activity
                        :locks          -- Lock info
                        :waits          -- Waiting queires
                        :dbsize         -- Database Size
                        :tablesize      -- Tables Size
                        :uselesscol     -- Useless columns
                        :uptime         -- Server uptime
                        :menu           -- Help Menu
                        \h              -- Help with SQL commands
                        \?              -- Help with psql commands

Development queries:

                        :sp             -- Current Search Path
                        :clear          -- Clear screen
                        :ll             -- List

Expanded display is used automatically.
rollback;
psql:sql/updates/33.sql:6: WARNING:  there is no transaction in progress
ROLLBACK
Time: 0.152 ms
begin;
BEGIN
Time: 0.100 ms
insert into dbversion (version, release, description)
       values (33, now(), 'Work In Progress');
INSERT 0 1
Time: 0.501 ms
create table new_task (
  like task
  including all
  excluding indexes
  excluding constraints
);
CREATE TABLE
Time: 3.948 ms
create table new_task_run (
  like task_run
  including all
  excluding indexes
  excluding constraints
);
CREATE TABLE
Time: 2.324 ms
insert into new_task(id, type, arguments, next_run, current_interval, status, policy,
  retries_left, priority)
select id, type, arguments, next_run, current_interval, status, policy,
  retries_left, priority
from task
where (
    policy='recurring' and (type = 'load-nixguix' or
                            type like 'list-%' or
                            type like 'index-%')
  ) or (
    policy = 'oneshot' and next_run > now() - interval '2 months'
  );
INSERT 0 634838
Time: 6245.212 ms (00:06.245)
insert into new_task_run
select id, task, backend_id, scheduled, started, ended, metadata, status
from task_run where task in (
  select distinct id from new_task
);
INSERT 0 961657
Time: 4541.407 ms (00:04.541)
select last_value from task_id_seq;
+------------+
| last_value |
+------------+
|   29197936 |
+------------+
(1 row)

Time: 0.371 ms
select last_value from task_run_id_seq;
+------------+
| last_value |
+------------+
|   72602306 |
+------------+
(1 row)

Time: 0.219 ms
alter table task rename to archive_task;
ALTER TABLE
Time: 0.276 ms
alter table task_run rename to archive_task_run;
ALTER TABLE
Time: 0.190 ms
alter table new_task rename to task;
ALTER TABLE
Time: 0.197 ms
alter table new_task_run rename to task_run;
ALTER TABLE
Time: 0.132 ms
select last_value from task_id_seq;
+------------+
| last_value |
+------------+
|   29197936 |
+------------+
(1 row)

Time: 0.085 ms
select last_value from task_run_id_seq;
+------------+
| last_value |
+------------+
|   72602306 |
+------------+
(1 row)

Time: 0.074 ms
alter table task
add primary key(id);
ALTER TABLE
Time: 219.584 ms
alter table task
alter column type set not null;
ALTER TABLE
Time: 0.251 ms
alter table task
add constraint task_type_fk
foreign key (type) references task_type (type);
ALTER TABLE
Time: 157.253 ms
alter table task
add constraint task_priority_fk
foreign key (priority) references priority_ratio (id);
ALTER TABLE
Time: 74.358 ms
alter table task
add constraint task_check_policy
check (policy <> 'recurring' or current_interval is not null)
not valid;
ALTER TABLE
Time: 0.586 ms
alter table task
  validate constraint task_check_policy;
ALTER TABLE
Time: 72.120 ms
alter table task_run
add primary key(id);
ALTER TABLE
Time: 358.370 ms
alter table task_run
alter column status set not null,
alter column status set default 'scheduled';
ALTER TABLE
Time: 0.573 ms
alter table task_run
alter column task set not null,
add constraint task_id_fk
foreign key (task) references task (id);
ALTER TABLE
Time: 459.268 ms
create index on task(type);
CREATE INDEX
Time: 400.429 ms
create index on task(next_run);
CREATE INDEX
Time: 214.107 ms
create index on task using btree(type, md5(arguments::text));
CREATE INDEX
Time: 1850.221 ms (00:01.850)
create index on task(priority);
CREATE INDEX
Time: 232.837 ms
create index on task_run(task);
CREATE INDEX
Time: 367.688 ms
create index on task_run(backend_id);
CREATE INDEX
Time: 1741.770 ms (00:01.742)
create index on task_run(task asc, started asc);
CREATE INDEX
Time: 494.755 ms
create index on task(type, next_run)
where status = 'next_run_not_scheduled'::task_status;
CREATE INDEX
Time: 65.994 ms
drop trigger update_task_on_task_end on archive_task_run;
DROP TRIGGER
Time: 0.366 ms
create trigger update_task_on_task_end
  after update of status on task_run
  for each row
  when (new.status NOT IN ('scheduled', 'started'))
  execute procedure swh_scheduler_update_task_on_task_end ();
CREATE TRIGGER
Time: 0.364 ms
psql -e service=swh-scheduler-dev -c '\i sql/updates/33.sql'  0.01s user 0.00s system 0% cpu 17.302 total

Migrated from D6921 (view on Phabricator)

Merge request reports