swh: Add the support of bulk loading jobs
Related to swh/infra/sysadm-environment#5365 (closed)
helm diff
[swh] Comparing changes between branches production and bulk-load (per environment)...
Your branch is up to date with 'origin/production'.
[swh] Generate config in production branch for environment staging, namespace swh...
[swh] Generate config in production branch for environment staging, namespace swh-cassandra...
[swh] Generate config in production branch for environment staging, namespace swh-cassandra-next-version...
[swh] Generate config in bulk-load branch for environment staging...
[swh] Generate config in bulk-load branch for environment staging...
[swh] Generate config in bulk-load branch for environment staging...
Your branch is up to date with 'origin/production'.
[swh] Generate config in production branch for environment production, namespace swh...
[swh] Generate config in production branch for environment production, namespace swh-cassandra...
[swh] Generate config in production branch for environment production, namespace swh-cassandra-next-version...
[swh] Generate config in bulk-load branch for environment production...
[swh] Generate config in bulk-load branch for environment production...
[swh] Generate config in bulk-load branch for environment production...
------------- diff for environment staging namespace swh -------------
_ __ __
_| |_ _ / _|/ _| between /tmp/swh-chart.swh.3ljfKaPW/staging-swh.before, 135 documents
/ _' | | | | |_| |_ and /tmp/swh-chart.swh.3ljfKaPW/staging-swh.after, 135 documents
| (_| | |_| | _| _|
\__,_|\__, |_| |_| returned two differences
|___/
data (v1/ConfigMap/swh/toolbox-script-utils)
+ one map entry added:
bulk_load.sh: |
#!/bin/bash
set -eux
echo "Downloading origins list from ${ORIGINS_URL} to ${ORIGINS}"
wget $ORIGINS_URL -O $ORIGINS
echo "Number of origins to schedule: $(wc -l $ORIGINS)"
echo "Sending origin list to rabbitmq ..."
cat $ORIGINS | swh scheduler origin \
send-origins-from-file-to-celery $TASK_TYPE --threshold=$MAX_TASKS \
--queue-name-prefix oneshot
echo "Done"
spec.template.metadata.annotations.checksum/configScript (apps/v1/Deployment/swh/swh-toolbox)
± value change
- 2fba7447fefabafa67f488ed476f58ba26d2a620961dc5e87b555af76ca27e9e
+ 44c7ed42829afd247b54b4c28250a0d69ca2135bb706edd623fd73381a57c68f
------------- diff for environment staging namespace swh-cassandra -------------
_ __ __
_| |_ _ / _|/ _| between /tmp/swh-chart.swh.3ljfKaPW/staging-swh-cassandra.before, 423 documents
/ _' | | | | |_| |_ and /tmp/swh-chart.swh.3ljfKaPW/staging-swh-cassandra.after, 424 documents
| (_| | |_| | _| _|
\__,_|\__, |_| |_| returned three differences
|___/
(file level)
---
# Source: swh/templates/toolbox/bulk-load-job.yaml
apiVersion: batch/v1
kind: Job
metadata:
name: toolbox-bulk-load-mbed
namespace: swh-cassandra
labels:
app: toolbox-bulk-load-mbed
spec:
backoffLimit: 0
template:
metadata:
labels:
app: toolbox-bulk-load-mbed
spec:
restartPolicy: Never
initContainers:
- name: prepare-configuration-scheduler
image: "debian:bullseye"
imagePullPolicy: IfNotPresent
command:
- /bin/bash
args:
- "-c"
- "eval echo "\"$(</etc/swh/configuration-template/config.yml.template)\"" > /etc/swh/config-scheduler.yml"
volumeMounts:
- name: configuration
mountPath: /etc/swh
- name: configuration-template
mountPath: /etc/swh/configuration-template
env:
- name: AMQP_PASSWORD
valueFrom:
secretKeyRef:
name: amqp-secrets
key: swhproducer-password
optional: false
- name: POSTGRESQL_PASSWORD
valueFrom:
secretKeyRef:
name: swh-scheduler-postgresql-common-secret
key: postgres-swh-scheduler-password
optional: false
containers:
- name: bulk-load
image: "container-registry.softwareheritage.org/swh/infra/swh-apps/toolbox:20240702.2"
imagePullPolicy: IfNotPresent
command:
- /opt/swh/bin/bulk_load.sh
resources:
requests:
memory: 128Mi
cpu: 100m
env:
- name: SWH_CONFIG_FILENAME
value: /etc/swh/config-scheduler.yml
- name: ORIGINS
value: mbed.lst
- name: ORIGINS_URL
value: "https://gitlab.softwareheritage.org/-/project/80/uploads/44f84f66479d3365ae5e8a40f1ffb709/mbed-test.lst"
- name: TASK_TYPE
value: load-hg
- name: MAX_TASKS
value: 10
volumeMounts:
- name: configuration
mountPath: /etc/swh
- name: toolbox-script-utils
mountPath: /opt/swh/bin
readOnly: true
volumes:
- name: configuration
emptyDir: {}
- name: configuration-template
configMap:
name: toolbox-scheduler-template
items:
- key: config.yml.template
path: config.yml.template
- name: config-utils
configMap:
name: config-utils
defaultMode: 0555
- name: toolbox-script-utils
configMap:
name: toolbox-script-utils
defaultMode: 0555
data (v1/ConfigMap/swh-cassandra/toolbox-script-utils)
+ one map entry added:
bulk_load.sh: |
#!/bin/bash
set -eux
echo "Downloading origins list from ${ORIGINS_URL} to ${ORIGINS}"
wget $ORIGINS_URL -O $ORIGINS
echo "Number of origins to schedule: $(wc -l $ORIGINS)"
echo "Sending origin list to rabbitmq ..."
cat $ORIGINS | swh scheduler origin \
send-origins-from-file-to-celery $TASK_TYPE --threshold=$MAX_TASKS \
--queue-name-prefix oneshot
echo "Done"
spec.template.metadata.annotations.checksum/configScript (apps/v1/Deployment/swh-cassandra/swh-toolbox)
± value change
- 58e98ba68094151bcb7e2a352360285952284213af9feb3daf1838b0d3aaa1f9
+ 15bd1c0b22ea86d5b8f721d0fd62e377c0df0ab66b4b5d6bd25140809a9caae8
------------- diff for environment staging namespace swh-cassandra-next-version -------------
_ __ __
_| |_ _ / _|/ _| between /tmp/swh-chart.swh.3ljfKaPW/staging-swh-cassandra-next-version.before, 307 documents
/ _' | | | | |_| |_ and /tmp/swh-chart.swh.3ljfKaPW/staging-swh-cassandra-next-version.after, 307 documents
| (_| | |_| | _| _|
\__,_|\__, |_| |_| returned two differences
|___/
data (v1/ConfigMap/swh-cassandra-next-version/toolbox-script-utils)
+ one map entry added:
bulk_load.sh: |
#!/bin/bash
set -eux
echo "Downloading origins list from ${ORIGINS_URL} to ${ORIGINS}"
wget $ORIGINS_URL -O $ORIGINS
echo "Number of origins to schedule: $(wc -l $ORIGINS)"
echo "Sending origin list to rabbitmq ..."
cat $ORIGINS | swh scheduler origin \
send-origins-from-file-to-celery $TASK_TYPE --threshold=$MAX_TASKS \
--queue-name-prefix oneshot
echo "Done"
spec.template.metadata.annotations.checksum/configScript (apps/v1/Deployment/swh-cassandra-next-version/swh-toolbox)
± value change
- 633640fb690a5c42dbb80de75655d6964db1d4d8bdaeb0ef84dc51d53cdd9433
+ f9e8ef30d568019ef187714d32a53f05912208d6ce078c4407a168428dc1a3de
------------- diff for environment production namespace swh -------------
_ __ __
_| |_ _ / _|/ _| between /tmp/swh-chart.swh.3ljfKaPW/production-swh.before, 427 documents
/ _' | | | | |_| |_ and /tmp/swh-chart.swh.3ljfKaPW/production-swh.after, 427 documents
| (_| | |_| | _| _|
\__,_|\__, |_| |_| returned two differences
|___/
data (v1/ConfigMap/swh/toolbox-script-utils)
+ one map entry added:
bulk_load.sh: |
#!/bin/bash
set -eux
echo "Downloading origins list from ${ORIGINS_URL} to ${ORIGINS}"
wget $ORIGINS_URL -O $ORIGINS
echo "Number of origins to schedule: $(wc -l $ORIGINS)"
echo "Sending origin list to rabbitmq ..."
cat $ORIGINS | swh scheduler origin \
send-origins-from-file-to-celery $TASK_TYPE --threshold=$MAX_TASKS \
--queue-name-prefix oneshot
echo "Done"
spec.template.metadata.annotations.checksum/configScript (apps/v1/Deployment/swh/swh-toolbox)
± value change
- 0663f34c2e638e42453ab132ff3b2c37324890c5b4d3405b042bd846aa1777ad
+ 44b0188ef62d0f4b80b18a8f7e6b8a398da2e46616bab560e6a15947bb4cc88d
------------- diff for environment production namespace swh-cassandra -------------
_ __ __
_| |_ _ / _|/ _| between /tmp/swh-chart.swh.3ljfKaPW/production-swh-cassandra.before, 122 documents
/ _' | | | | |_| |_ and /tmp/swh-chart.swh.3ljfKaPW/production-swh-cassandra.after, 122 documents
| (_| | |_| | _| _|
\__,_|\__, |_| |_| returned two differences
|___/
data (v1/ConfigMap/swh-cassandra/toolbox-script-utils)
+ one map entry added:
bulk_load.sh: |
#!/bin/bash
set -eux
echo "Downloading origins list from ${ORIGINS_URL} to ${ORIGINS}"
wget $ORIGINS_URL -O $ORIGINS
echo "Number of origins to schedule: $(wc -l $ORIGINS)"
echo "Sending origin list to rabbitmq ..."
cat $ORIGINS | swh scheduler origin \
send-origins-from-file-to-celery $TASK_TYPE --threshold=$MAX_TASKS \
--queue-name-prefix oneshot
echo "Done"
spec.template.metadata.annotations.checksum/configScript (apps/v1/Deployment/swh-cassandra/swh-toolbox)
± value change
- cd22e6d1e8a2238da42be244284b34e4f5f5f67ab8bc0e795450a3fdf82fa8f9
+ 875b259528e4464783a6ffa091a58e2a139f54911112ab66d20a82a61264884e