clean up up old pattern-ed indexer tasks ('index-%') [1]
reclaim space used by old archive_task and archive_task_run
It takes space in both db1.staging & albertina scheduler dbs.
[1]
begin;delete from task_run where task in (select id from task where type like 'index-%');delete from task where type like 'index-%';delete from archive_task_run where task in (select id from archive_task where type like 'index-%');delete from archive_task where type like 'index-%';delete from task_type where type like 'index-%';commit;
Edited
Designs
Child items ...
Show closed items
Linked items 0
Link issues together to show that they're related.
Learn more.
The issue in your swh-scheduler mr tests is purely local; swh.indexer has dropped its entrypoints, so pip install -e needs to be run again to update them in your venv.
swh-scheduler=# delete from task_run where task in (select id from task where type like 'index-%');delete from task where type like 'index-%';delete from archive_task_run where task in (select id from archive_task where type like 'index-%');delete from archive_task where type like 'index-%';delete from task_type where type like 'index-%';DELETE 3698008DELETE 4852313DELETE 17555670DELETE 15301132DELETE 7
production (ongoing)
softwareheritage-scheduler=# delete from task_run where task in (select id from task where type like 'index-%');delete from task where type like 'index-%';DELETE 3849348DELETE 13233946softwareheritage-scheduler=# delete from archive_task_run where task in (select id from archive_task where type like 'index-%');delete from archive_task where type like 'index-%';delete from task_type where type like 'index-%';DELETE 140549938DELETE 192708590DELETE 9
There were old archive_task and archive_task_run tables [1] still hanging around.
I've cleaned them up on scheduler staging following that plan (out of the drop table
archive_task_run and drop table archive_task failure output).
[1]
public | archive_task | table | swh-scheduler | permanent | heap | 14 GB | Schedule of recurring tasks public | archive_task_run | table | swh-scheduler | permanent | heap | 12 GB | History of task runs sent to the job-running backend
[2] plan
-- Functions depending on old archive_task_run tables (instead of task_run)-- They are getting dropped and reinstalled depending on the proper task_run table-- Sequence owned by previous archive_task_run table instead of current equivalent task_runalter sequence task_run_id_seq owned by task_run.id;-- Replace previous iterations of functionsdrop function swh_scheduler_schedule_task_run;create or replace function swh_scheduler_schedule_task_run (task_id bigint, backend_id text, metadata jsonb default '{}'::jsonb, ts timestamptz default now()) returns task_run language sqlas $$ insert into task_run (task, backend_id, metadata, scheduled, status) values (task_id, backend_id, metadata, ts, 'scheduled') returning *;$$;drop function swh_scheduler_start_task_run;create or replace function swh_scheduler_start_task_run (backend_id text, metadata jsonb default '{}'::jsonb, ts timestamptz default now()) returns task_run language sqlas $$ update task_run set started = ts, status = 'started', metadata = coalesce(task_run.metadata, '{}'::jsonb) || swh_scheduler_start_task_run.metadata where task_run.backend_id = swh_scheduler_start_task_run.backend_id returning *;$$;drop function swh_scheduler_end_task_run;create or replace function swh_scheduler_end_task_run (backend_id text, status task_run_status, metadata jsonb default '{}'::jsonb, ts timestamptz default now()) returns task_run language sqlas $$ update task_run set ended = ts, status = swh_scheduler_end_task_run.status, metadata = coalesce(task_run.metadata, '{}'::jsonb) || swh_scheduler_end_task_run.metadata where task_run.backend_id = swh_scheduler_end_task_run.backend_id returning *;$$;-- this allows to drop the archive_task_run now-- now dropping archive_task table and dependent objectsalter sequence task_id_seq owned by task.id;drop function swh_scheduler_peek_no_priority_tasks;create or replace function swh_scheduler_peek_no_priority_tasks (task_type text, ts timestamptz default now(), num_tasks bigint default NULL) returns setof task language sql stableas $$select * from task where next_run <= ts and type = task_type and status = 'next_run_not_scheduled' and priority is null order by next_run limit num_tasks;$$;comment on function swh_scheduler_peek_no_priority_tasks (text, timestamptz, bigint)is 'Retrieve tasks without priority';drop function swh_scheduler_peek_tasks_with_priority;create or replace function swh_scheduler_peek_tasks_with_priority (task_type text, ts timestamptz default now(), num_tasks_priority bigint default NULL, task_priority task_priority default 'normal') returns setof task language sql stableas $$ select * from task t where t.next_run <= ts and t.type = task_type and t.status = 'next_run_not_scheduled' and t.priority = task_priority order by t.next_run limit num_tasks_priority;$$;comment on function swh_scheduler_peek_tasks_with_priority(text, timestamptz, bigint, task_priority)is 'Retrieve tasks with a given priority';drop function swh_scheduler_peek_any_ready_priority_tasks;create or replace function swh_scheduler_peek_any_ready_priority_tasks ( task_type text, ts timestamptz default now(), num_tasks bigint default NULL ) returns setof task language sql stableas $$ select * from task t where t.next_run <= ts and t.type = task_type and t.status = 'next_run_not_scheduled' and t.priority is not null order by t.next_run limit num_tasks;$$;comment on function swh_scheduler_peek_any_ready_priority_tasks(text, timestamptz, bigint)is 'List tasks with any priority ready for scheduling';drop function swh_scheduler_grab_any_ready_priority_tasks;create or replace function swh_scheduler_grab_any_ready_priority_tasks ( task_type text, ts timestamptz default now(), num_tasks bigint default NULL ) returns setof task language sqlas $$ update task set status='next_run_scheduled' from ( select id from swh_scheduler_peek_any_ready_priority_tasks( task_type, ts, num_tasks ) ) next_tasks where task.id = next_tasks.id returning task.*;$$;comment on function swh_scheduler_grab_any_ready_priority_tasks (text, timestamptz, bigint)is 'Grab any priority tasks ready for scheduling and change their status';drop function swh_scheduler_grab_ready_tasks;create or replace function swh_scheduler_grab_ready_tasks (task_type text, ts timestamptz default now(), num_tasks bigint default NULL) returns setof task language sqlas $$ update task set status='next_run_scheduled' from ( select id from swh_scheduler_peek_no_priority_tasks(task_type, ts, num_tasks) ) next_tasks where task.id = next_tasks.id returning task.*;$$;comment on function swh_scheduler_grab_ready_tasks (text, timestamptz, bigint)is 'Grab (no priority) tasks ready for scheduling and change their status';-- drop table archive_task_run;-- drop table archive_task;