From ad72073a752c0058ed74bf3da83f137d7e5048f8 Mon Sep 17 00:00:00 2001 From: Vincent SELLIER <vincent.sellier@softwareheritage.org> Date: Wed, 22 Mar 2023 20:31:40 +0100 Subject: [PATCH] Add an image for swh-scrubber service Related to swh/infra/sysadm-environment#4707 --- apps/swh-scrubber/Dockerfile | 41 +++++++++++ apps/swh-scrubber/entrypoint.sh | 29 ++++++++ apps/swh-scrubber/requirements-frozen.txt | 86 +++++++++++++++++++++++ apps/swh-scrubber/requirements.txt | 1 + 4 files changed, 157 insertions(+) create mode 100644 apps/swh-scrubber/Dockerfile create mode 100644 apps/swh-scrubber/entrypoint.sh create mode 100644 apps/swh-scrubber/requirements-frozen.txt create mode 100644 apps/swh-scrubber/requirements.txt diff --git a/apps/swh-scrubber/Dockerfile b/apps/swh-scrubber/Dockerfile new file mode 100644 index 000000000..18a6fe0e5 --- /dev/null +++ b/apps/swh-scrubber/Dockerfile @@ -0,0 +1,41 @@ +# Deeply inspired from the Dockerfile of the swh-graph project +FROM python:3.10-bullseye + +RUN apt-get -y update && \ + apt-get -y upgrade && \ + apt-get install -y libcmph-dev librdkafka-dev && \ + apt clean && \ + addgroup --gid 1000 swh && \ + useradd --gid 1000 --uid 1000 -m -d /opt/swh swh && \ + mkdir /etc/swh + +USER swh +WORKDIR /opt/swh + +COPY --chown=swh:swh requirements-frozen.txt /opt/swh + +ENV PYTHONPATH=/opt/swh +ENV PATH=/opt/swh/.local/bin:$PATH + +RUN /usr/local/bin/python -m pip install --upgrade pip && \ + pip install --no-cache-dir -r requirements-frozen.txt + +COPY --chown=swh:swh entrypoint.sh /opt/swh +RUN chmod u+x /opt/swh/entrypoint.sh + +ENV SWH_CONFIG_FILENAME=/etc/swh/config.yml +ENV LOGLEVEL INFO +ENV STATSD_PORT=9125 +ENV STATSD_HOST=prometheus-statsd-exporter +# STATSD_TAGS: scrubber_instance:<database>-<objecttype>-<id> +ENV STATSD_TAGS= +# OBJECT_TYPE: The type of object to run on (origin/origin-visit/...) +ENV OBJECT_TYPE= +# PARTITION_COUNT: ^2 number of ranges to split the object +ENV PARTITION_COUNT= +# FIRST_PARTITION: The first partition id to check (inclusive) +ENV FIRST_PARTITION= +# LAST_PARTITION: The last partition id to check (exclusive) +ENV LAST_PARTITION= + +ENTRYPOINT "/opt/swh/entrypoint.sh" diff --git a/apps/swh-scrubber/entrypoint.sh b/apps/swh-scrubber/entrypoint.sh new file mode 100644 index 000000000..18bef683f --- /dev/null +++ b/apps/swh-scrubber/entrypoint.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +if [ -e "${SWH_CONFIG_FILENAME}" ]; then + echo "The config file ${SWH_CONFIG_FILENAME} does not exist." + exit 1 +fi + +ENV_VARS="LOGLEVEL STATSD_TAGS OBJECT_TYPE PARTITION_COUNT FIRST_PARTITION LAST_PARTITION" +ERROR=0 + +for VAR in ${ENV_VARS}; do + if [ -z "${!VAR}" ]; then + echo "The ${VAR} environment variable must be set" + ERROR=1 + fi +done + +if [ $ERROR -ne 0 ]; then + exit 1 +fi + +echo "Starting scrubber for OBJECT_TYPE=${OBJECT_TYPE} from FIRST_PARTITION=${FIRST_PARTITION} to LAST_PARTITION=${LAST_PARTITION}" +exec swh \ + --log-level $LOGLEVEL \ + scrubber check storage \ + --object-type ${OBJECT_TYPE} \ + --nb-partitions ${NB_PARTITIONS} \ + --start-partition-id ${START_OBJECT} \ + --end-partition-id ${END_OBJECT} diff --git a/apps/swh-scrubber/requirements-frozen.txt b/apps/swh-scrubber/requirements-frozen.txt new file mode 100644 index 000000000..3ad0b1898 --- /dev/null +++ b/apps/swh-scrubber/requirements-frozen.txt @@ -0,0 +1,86 @@ +aiohttp==3.8.4 +aiohttp-utils==3.2.1 +aiosignal==1.3.1 +amqp==5.1.1 +async-timeout==4.0.2 +attrs==22.2.0 +attrs-strict==1.0.0 +billiard==3.6.4.0 +blinker==1.5 +cassandra-driver==3.25.0 +celery==5.2.7 +certifi==2022.12.7 +cffi==1.15.1 +chardet==5.1.0 +charset-normalizer==3.1.0 +click==8.1.3 +click-didyoumean==0.3.0 +click-plugins==1.1.1 +click-repl==0.2.0 +confluent-kafka==2.0.2 +Deprecated==1.2.13 +dulwich==0.21.3 +exceptiongroup==1.1.1 +Flask==2.2.3 +frozenlist==1.3.3 +geomet==0.2.1.post1 +gunicorn==20.1.0 +humanize==4.6.0 +hypothesis==6.70.0 +idna==3.4 +importlib-metadata==4.13.0 +iniconfig==2.0.0 +iso8601==1.1.0 +itsdangerous==2.1.2 +Jinja2==3.1.2 +kombu==5.2.4 +MarkupSafe==2.1.2 +mirakuru==2.5.1 +msgpack==1.0.5 +multidict==6.0.4 +mypy-extensions==1.0.0 +packaging==23.0 +pika==1.3.1 +pkginfo==1.9.6 +pluggy==1.0.0 +port-for==0.6.3 +prompt-toolkit==3.0.38 +psutil==5.9.4 +psycopg2==2.9.5 +pycparser==2.21 +pytest==7.2.2 +pytest-postgresql==3.1.3 +python-dateutil==2.8.2 +python-debian==0.1.49 +python-magic==0.4.27 +python-mimeparse==1.6.0 +pytz==2022.7.1 +PyYAML==6.0 +redis==4.5.3 +requests==2.28.2 +retrying==1.3.4 +sentry-sdk==1.17.0 +six==1.16.0 +sortedcontainers==2.4.0 +swh.core==2.21.2 +swh.counters==0.9.2 +swh.journal==1.3.1 +swh.loader.core==5.2.0 +swh.loader.git==2.2.0 +swh.model==6.6.3 +swh.objstorage==2.1.0 +swh.perfecthash==0.1.2 +swh.scheduler==1.7.0 +swh.scrubber==1.0.1 +swh.storage==1.11.0 +tenacity==8.2.2 +toml==0.10.2 +tomli==2.0.1 +typing_extensions==4.5.0 +urllib3==1.26.15 +vine==5.0.0 +wcwidth==0.2.6 +Werkzeug==2.2.3 +wrapt==1.15.0 +yarl==1.8.2 +zipp==3.15.0 diff --git a/apps/swh-scrubber/requirements.txt b/apps/swh-scrubber/requirements.txt new file mode 100644 index 000000000..82a472e4a --- /dev/null +++ b/apps/swh-scrubber/requirements.txt @@ -0,0 +1 @@ +swh-scrubber -- GitLab