From ad72073a752c0058ed74bf3da83f137d7e5048f8 Mon Sep 17 00:00:00 2001
From: Vincent SELLIER <vincent.sellier@softwareheritage.org>
Date: Wed, 22 Mar 2023 20:31:40 +0100
Subject: [PATCH] Add an image for swh-scrubber service

Related to swh/infra/sysadm-environment#4707
---
 apps/swh-scrubber/Dockerfile              | 41 +++++++++++
 apps/swh-scrubber/entrypoint.sh           | 29 ++++++++
 apps/swh-scrubber/requirements-frozen.txt | 86 +++++++++++++++++++++++
 apps/swh-scrubber/requirements.txt        |  1 +
 4 files changed, 157 insertions(+)
 create mode 100644 apps/swh-scrubber/Dockerfile
 create mode 100644 apps/swh-scrubber/entrypoint.sh
 create mode 100644 apps/swh-scrubber/requirements-frozen.txt
 create mode 100644 apps/swh-scrubber/requirements.txt

diff --git a/apps/swh-scrubber/Dockerfile b/apps/swh-scrubber/Dockerfile
new file mode 100644
index 000000000..18a6fe0e5
--- /dev/null
+++ b/apps/swh-scrubber/Dockerfile
@@ -0,0 +1,41 @@
+# Deeply inspired from the Dockerfile of the swh-graph project
+FROM python:3.10-bullseye
+
+RUN apt-get -y update && \
+    apt-get -y upgrade && \
+    apt-get install -y libcmph-dev librdkafka-dev && \
+    apt clean && \
+    addgroup --gid 1000 swh && \
+    useradd --gid 1000 --uid 1000 -m -d /opt/swh swh && \
+    mkdir /etc/swh
+
+USER swh
+WORKDIR /opt/swh
+
+COPY --chown=swh:swh requirements-frozen.txt /opt/swh
+
+ENV PYTHONPATH=/opt/swh
+ENV PATH=/opt/swh/.local/bin:$PATH
+
+RUN /usr/local/bin/python -m pip install --upgrade pip && \
+    pip install --no-cache-dir -r requirements-frozen.txt
+
+COPY --chown=swh:swh entrypoint.sh /opt/swh
+RUN chmod u+x /opt/swh/entrypoint.sh
+
+ENV SWH_CONFIG_FILENAME=/etc/swh/config.yml
+ENV LOGLEVEL INFO
+ENV STATSD_PORT=9125
+ENV STATSD_HOST=prometheus-statsd-exporter
+# STATSD_TAGS: scrubber_instance:<database>-<objecttype>-<id>
+ENV STATSD_TAGS=
+# OBJECT_TYPE: The type of object to run on (origin/origin-visit/...)
+ENV OBJECT_TYPE=
+# PARTITION_COUNT: ^2 number of ranges to split the object
+ENV PARTITION_COUNT=
+# FIRST_PARTITION: The first partition id to check (inclusive)
+ENV FIRST_PARTITION=
+# LAST_PARTITION: The last partition id to check (exclusive)
+ENV LAST_PARTITION=
+
+ENTRYPOINT "/opt/swh/entrypoint.sh"
diff --git a/apps/swh-scrubber/entrypoint.sh b/apps/swh-scrubber/entrypoint.sh
new file mode 100644
index 000000000..18bef683f
--- /dev/null
+++ b/apps/swh-scrubber/entrypoint.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+if [ -e "${SWH_CONFIG_FILENAME}" ]; then
+  echo "The config file ${SWH_CONFIG_FILENAME} does not exist."
+  exit 1
+fi
+
+ENV_VARS="LOGLEVEL STATSD_TAGS OBJECT_TYPE PARTITION_COUNT FIRST_PARTITION LAST_PARTITION"
+ERROR=0
+
+for VAR in ${ENV_VARS}; do
+    if [ -z "${!VAR}" ]; then
+        echo "The ${VAR} environment variable must be set"
+        ERROR=1
+    fi
+done
+
+if [ $ERROR -ne 0 ]; then
+    exit 1
+fi
+
+echo "Starting scrubber for OBJECT_TYPE=${OBJECT_TYPE} from FIRST_PARTITION=${FIRST_PARTITION} to LAST_PARTITION=${LAST_PARTITION}"
+exec swh \
+  --log-level $LOGLEVEL \
+  scrubber check storage \
+  --object-type ${OBJECT_TYPE} \
+  --nb-partitions ${NB_PARTITIONS} \
+  --start-partition-id ${START_OBJECT} \
+  --end-partition-id ${END_OBJECT}
diff --git a/apps/swh-scrubber/requirements-frozen.txt b/apps/swh-scrubber/requirements-frozen.txt
new file mode 100644
index 000000000..3ad0b1898
--- /dev/null
+++ b/apps/swh-scrubber/requirements-frozen.txt
@@ -0,0 +1,86 @@
+aiohttp==3.8.4
+aiohttp-utils==3.2.1
+aiosignal==1.3.1
+amqp==5.1.1
+async-timeout==4.0.2
+attrs==22.2.0
+attrs-strict==1.0.0
+billiard==3.6.4.0
+blinker==1.5
+cassandra-driver==3.25.0
+celery==5.2.7
+certifi==2022.12.7
+cffi==1.15.1
+chardet==5.1.0
+charset-normalizer==3.1.0
+click==8.1.3
+click-didyoumean==0.3.0
+click-plugins==1.1.1
+click-repl==0.2.0
+confluent-kafka==2.0.2
+Deprecated==1.2.13
+dulwich==0.21.3
+exceptiongroup==1.1.1
+Flask==2.2.3
+frozenlist==1.3.3
+geomet==0.2.1.post1
+gunicorn==20.1.0
+humanize==4.6.0
+hypothesis==6.70.0
+idna==3.4
+importlib-metadata==4.13.0
+iniconfig==2.0.0
+iso8601==1.1.0
+itsdangerous==2.1.2
+Jinja2==3.1.2
+kombu==5.2.4
+MarkupSafe==2.1.2
+mirakuru==2.5.1
+msgpack==1.0.5
+multidict==6.0.4
+mypy-extensions==1.0.0
+packaging==23.0
+pika==1.3.1
+pkginfo==1.9.6
+pluggy==1.0.0
+port-for==0.6.3
+prompt-toolkit==3.0.38
+psutil==5.9.4
+psycopg2==2.9.5
+pycparser==2.21
+pytest==7.2.2
+pytest-postgresql==3.1.3
+python-dateutil==2.8.2
+python-debian==0.1.49
+python-magic==0.4.27
+python-mimeparse==1.6.0
+pytz==2022.7.1
+PyYAML==6.0
+redis==4.5.3
+requests==2.28.2
+retrying==1.3.4
+sentry-sdk==1.17.0
+six==1.16.0
+sortedcontainers==2.4.0
+swh.core==2.21.2
+swh.counters==0.9.2
+swh.journal==1.3.1
+swh.loader.core==5.2.0
+swh.loader.git==2.2.0
+swh.model==6.6.3
+swh.objstorage==2.1.0
+swh.perfecthash==0.1.2
+swh.scheduler==1.7.0
+swh.scrubber==1.0.1
+swh.storage==1.11.0
+tenacity==8.2.2
+toml==0.10.2
+tomli==2.0.1
+typing_extensions==4.5.0
+urllib3==1.26.15
+vine==5.0.0
+wcwidth==0.2.6
+Werkzeug==2.2.3
+wrapt==1.15.0
+yarl==1.8.2
+zipp==3.15.0
diff --git a/apps/swh-scrubber/requirements.txt b/apps/swh-scrubber/requirements.txt
new file mode 100644
index 000000000..82a472e4a
--- /dev/null
+++ b/apps/swh-scrubber/requirements.txt
@@ -0,0 +1 @@
+swh-scrubber
-- 
GitLab