Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • ardumont/swh-apps
  • vlorentz/swh-apps
  • swh/infra/swh-apps
  • anlambert/swh-apps
4 results
Show changes
Commits on Source (2)
# Deeply inspired from the Dockerfile of the swh-graph project
FROM python:3.10-bullseye
RUN apt-get -y update && \
apt-get -y upgrade && \
apt-get install -y libcmph-dev librdkafka-dev && \
apt clean && \
addgroup --gid 1000 swh && \
useradd --gid 1000 --uid 1000 -m -d /opt/swh swh && \
mkdir /etc/swh
USER swh
WORKDIR /opt/swh
COPY --chown=swh:swh requirements-frozen.txt /opt/swh
ENV PYTHONPATH=/opt/swh
ENV PATH=/opt/swh/.local/bin:$PATH
RUN /usr/local/bin/python -m pip install --upgrade pip && \
pip install --no-cache-dir -r requirements-frozen.txt
COPY --chown=swh:swh entrypoint.sh /opt/swh
RUN chmod u+x /opt/swh/entrypoint.sh
ENV SWH_CONFIG_FILENAME=/etc/swh/config.yml
ENV LOGLEVEL INFO
ENV STATSD_PORT=9125
ENV STATSD_HOST=prometheus-statsd-exporter
# STATSD_TAGS: scrubber_instance:<database>-<objecttype>-<id>
ENV STATSD_TAGS=
# OBJECT_TYPE: The type of object to run on (origin/origin-visit/...)
ENV OBJECT_TYPE=
# PARTITION_COUNT: ^2 number of ranges to split the object
ENV PARTITION_COUNT=
# FIRST_PARTITION: The first partition id to check (inclusive)
ENV FIRST_PARTITION=
# LAST_PARTITION: The last partition id to check (exclusive)
ENV LAST_PARTITION=
ENTRYPOINT "/opt/swh/entrypoint.sh"
#!/bin/bash
if [ -e "${SWH_CONFIG_FILENAME}" ]; then
echo "The config file ${SWH_CONFIG_FILENAME} does not exist."
exit 1
fi
ENV_VARS="LOGLEVEL STATSD_TAGS OBJECT_TYPE PARTITION_COUNT FIRST_PARTITION LAST_PARTITION"
ERROR=0
for VAR in ${ENV_VARS}; do
if [ -z "${!VAR}" ]; then
echo "The ${VAR} environment variable must be set"
ERROR=1
fi
done
if [ $ERROR -ne 0 ]; then
exit 1
fi
echo "Starting scrubber for OBJECT_TYPE=${OBJECT_TYPE} from FIRST_PARTITION=${FIRST_PARTITION} to LAST_PARTITION=${LAST_PARTITION}"
exec swh \
--log-level $LOGLEVEL \
scrubber check storage \
--object-type ${OBJECT_TYPE} \
--nb-partitions ${NB_PARTITIONS} \
--start-partition-id ${START_OBJECT} \
--end-partition-id ${END_OBJECT}
aiohttp==3.8.4
aiohttp-utils==3.2.1
aiosignal==1.3.1
amqp==5.1.1
async-timeout==4.0.2
attrs==23.1.0
attrs-strict==1.0.0
billiard==3.6.4.0
blinker==1.6.2
cassandra-driver==3.26.0
celery==5.2.7
certifi==2022.12.7
cffi==1.15.1
chardet==5.1.0
charset-normalizer==3.1.0
click==8.1.3
click-didyoumean==0.3.0
click-plugins==1.1.1
click-repl==0.2.0
confluent-kafka==2.1.0
Deprecated==1.2.13
dulwich==0.21.3
exceptiongroup==1.1.1
Flask==2.2.3
frozenlist==1.3.3
geomet==0.2.1.post1
gunicorn==20.1.0
humanize==4.6.0
hypothesis==6.72.1
idna==3.4
importlib-metadata==4.13.0
iniconfig==2.0.0
iso8601==1.1.0
itsdangerous==2.1.2
Jinja2==3.1.2
kombu==5.2.4
MarkupSafe==2.1.2
mirakuru==2.5.1
msgpack==1.0.5
multidict==6.0.4
mypy-extensions==1.0.0
packaging==23.1
pika==1.3.1
pkginfo==1.9.6
pluggy==1.0.0
port-for==0.6.3
prompt-toolkit==3.0.38
psutil==5.9.5
psycopg2==2.9.6
pycparser==2.21
pytest==7.3.1
pytest-postgresql==3.1.3
python-dateutil==2.8.2
python-debian==0.1.49
python-magic==0.4.27
python-mimeparse==1.6.0
pytz==2023.3
PyYAML==6.0
redis==4.4.4
requests==2.28.2
retrying==1.3.4
sentry-sdk==1.20.0
six==1.16.0
sortedcontainers==2.4.0
swh.core==2.22.0
swh.counters==0.9.2
swh.journal==1.3.1
swh.loader.core==5.2.0
swh.loader.git==2.2.0
swh.model==6.7.0
swh.objstorage==2.2.0
swh.perfecthash==0.1.2
swh.scheduler==1.8.0
swh.scrubber==1.0.3
swh.storage==1.12.0
tenacity==8.2.2
toml==0.10.2
tomli==2.0.1
typing_extensions==4.5.0
urllib3==1.26.15
vine==5.0.0
wcwidth==0.2.6
Werkzeug==2.2.3
wrapt==1.15.0
yarl==1.8.2
zipp==3.15.0
swh-scrubber