Deploy latest loaders (e.g. swh.loader.git v2.3.0, ...)
staging:
-
Release swh.loader.git -
swh-apps: Build images ^ -
swh-loader-git -
swh-loader-highpriority
-
-
swh-charts: Update values.yaml -
grafana: add deployment tag -
Checks
production:
-
Unstuck python3-swh.loader.git package [1] -
(failed) Stop most loader's queue consumption (git, archive, cran, deposit, debian, ...) [2] (to try and avoid spamming emails...) -
workers: Update impacted services (among other things python3-swh.loader.{git,core}) -
Fix swh.loader.bzr package which created issue once deployed -
Restart swh-worker@loader_{git,high_priority,archive,cran,debian,...} -
grafana: add deployment tag -
Checks
[2] (i tried but the celery cancelling consumption cli would not respond appropriately)
# as root@pergamon
REASON="upgrading swh.loader.git & swh.loader.core.\nRefs. https://gitlab.softwareheritage.org/swh/infra/sysadm-environment/-/issues/4869"
TYPES="git bzr archive cran debian deposit maven nixguix npm opam pypi"
clush -b -w @swh-workers "puppet agent --disable \"$REASON\""
for type in $TYPES; do
echo "loader $type"
clush -b -w @swh-workers systemctl disable swh-worker@loader_$type
done
clush -b -w @swh-workers "systemctl stop cron"
# as ardumont@pergamon
# adapt this for the right hosts
hosts=(worker{01..16}.internal.softwareheritage.org)
TYPES_STR=$(echo $TYPES | sed 's/ /,/g')
# and the right worker instances
workers=(loader_{$TYPES_STR})
# these are generic
all_instances=(${^workers}@${^hosts})
export SWH_CONFIG_FILENAME=~/.config/swh/scheduler.yml
for queue in `python3 -m celery -A swh.scheduler.celery_backend.config.app inspect -d ${(j:,:)all_instances} active_queues -j | head -n +1 | jq -r '[. | to_entries | .[].value | flatten | .[].name] | unique | .[]' | grep -v oneshot3`; do
python3 -m celery -A swh.scheduler.celery_backend.config.app control \
-d ${(j:,:)all_instances} cancel_consumer $queue
done
# in another tmux pane
root@pergamon:/etc/bind# date; sleep 900; date; echo "15min elapsed time, you can upgrade and restart services now"
Tue 02 May 2023 02:20:56 PM UTC
...
Edited by Antoine R. Dumont