Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • ardumont/swh-apps
  • vlorentz/swh-apps
  • swh/infra/swh-apps
  • anlambert/swh-apps
4 results
Show changes
Showing
with 2188 additions and 840 deletions
aiohttp==3.8.3
aiohttp-utils==3.1.1
aiosignal==1.3.1
amqp==5.1.1
async-timeout==4.0.2
attrs==22.2.0
attrs-strict==1.0.0
billiard==3.6.4.0
blinker==1.5
cachetools==5.3.0
cassandra-driver==3.25.0
celery==5.2.7
certifi==2022.12.7
cffi==1.15.1
charset-normalizer==2.1.1
click==8.1.3
click-didyoumean==0.3.0
click-plugins==1.1.1
click-repl==0.2.0
confluent-kafka==2.0.2
Deprecated==1.2.13
exceptiongroup==1.1.0
Flask==2.2.2
frozendict==2.3.4
frozenlist==1.3.3
# This file was autogenerated by uv via the following command:
# uv pip compile /src/apps/swh-indexer/requirements.txt -o /src/apps/swh-indexer/tmp0ry3kfen
aiohappyeyeballs==2.6.1
# via aiohttp
aiohttp==3.11.14
# via
# aiohttp-utils
# swh-core
aiohttp-utils==3.2.1
# via swh-core
aiosignal==1.3.2
# via aiohttp
apache-libcloud==3.8.0
# via swh-objstorage
attrs==25.3.0
# via
# aiohttp
# attrs-strict
# hypothesis
# swh-model
attrs-strict==1.0.1
# via swh-model
azure-core==1.32.0
# via azure-storage-blob
azure-storage-blob==12.25.0
# via swh-objstorage
backports-entry-points-selectable==1.3.0
# via
# swh-core
# swh-storage
blinker==1.9.0
# via
# flask
# swh-core
cachetools==5.5.2
# via pyld
cassandra-driver==3.29.2
# via swh-storage
certifi==2025.1.31
# via
# requests
# sentry-sdk
cffi==1.17.1
# via
# cryptography
# swh-perfecthash
charset-normalizer==3.4.1
# via requests
click==8.1.8
# via
# flask
# geomet
# swh-core
# swh-indexer
# swh-objstorage
# swh-storage
confluent-kafka==2.8.2
# via swh-journal
cryptography==44.0.2
# via azure-storage-blob
deprecated==1.2.18
# via
# swh-core
# swh-model
# swh-objstorage
# swh-storage
flask==3.1.0
# via
# swh-core
# swh-counters
# swh-storage
frozendict==2.4.6
# via
# pyld
# swh-indexer
frozenlist==1.5.0
# via
# aiohttp
# aiosignal
geomet==0.2.1.post1
gunicorn==20.1.0
humanize==4.5.0
hypothesis==6.66.0
idna==3.4
importlib-metadata==4.13.0
iniconfig==2.0.0
iso8601==1.1.0
isodate==0.6.1
itsdangerous==2.1.2
Jinja2==3.1.2
kombu==5.2.4
lxml==4.9.2
MarkupSafe==2.1.2
mirakuru==2.4.2
msgpack==1.0.4
multidict==6.0.4
mypy-extensions==0.4.3
packaging==23.0
pika==1.3.1
pluggy==1.0.0
port-for==0.6.3
prompt-toolkit==3.0.36
psutil==5.9.4
psycopg2==2.9.5
pycparser==2.21
PyLD==2.0.3
pyparsing==3.0.9
pytest==7.2.1
pytest-postgresql==3.1.3
python-dateutil==2.8.2
# via cassandra-driver
gunicorn==23.0.0
# via aiohttp-utils
hypothesis==6.130.4
# via swh-model
idna==3.10
# via
# requests
# yarl
iso8601==2.1.0
# via
# swh-core
# swh-indexer
# swh-model
# swh-storage
isodate==0.7.2
# via azure-storage-blob
itsdangerous==2.2.0
# via flask
jinja2==3.1.6
# via flask
latexcodec==3.0.0
# via pybtex
lxml==5.3.1
# via pyld
markupsafe==3.0.2
# via
# jinja2
# werkzeug
msgpack==1.1.0
# via
# swh-core
# swh-journal
# swh-objstorage
multidict==6.2.0
# via
# aiohttp
# yarl
mypy-extensions==1.0.0
# via swh-storage
packaging==24.2
# via gunicorn
propcache==0.3.1
# via
# aiohttp
# yarl
psycopg==3.2.6
# via
# swh-core
# swh-storage
psycopg-pool==3.2.6
# via
# swh-core
# swh-storage
pybtex==0.24.0
# via swh-indexer
pycparser==2.22
# via cffi
pyld==2.0.4
# via swh-indexer
pyparsing==3.2.3
# via rdflib
python-dateutil==2.9.0.post0
# via swh-model
python-magic==0.4.27
python-mimeparse==1.6.0
pytz==2022.7.1
PyYAML==6.0
rdflib==6.2.0
redis==4.4.2
requests==2.28.2
sentry-sdk==1.14.0
six==1.16.0
# via
# swh-core
# swh-indexer
python-mimeparse==2.0.0
# via aiohttp-utils
pyyaml==6.0.2
# via
# pybtex
# swh-core
rdflib==7.1.3
# via swh-indexer
redis==5.2.1
# via
# swh-counters
# swh-storage
requests==2.32.3
# via
# apache-libcloud
# azure-core
# swh-core
# swh-objstorage
sentry-sdk==2.24.1
# via
# swh-core
# swh-indexer
six==1.17.0
# via
# azure-core
# geomet
# pybtex
# python-dateutil
sortedcontainers==2.4.0
swh.core==2.20.0
swh.counters==0.9.1
swh.indexer==2.9.1
swh.journal==1.2.1
swh.model==6.6.1
swh.objstorage==2.1.0
swh.perfecthash==0.1.2
swh.scheduler==1.6.0
swh.storage==1.7.3
tenacity==8.1.0
tomli==2.0.1
typing_extensions==4.4.0
urllib3==1.26.14
vine==5.0.0
wcwidth==0.2.6
Werkzeug==2.2.2
wrapt==1.14.1
xmltodict==0.13.0
yarl==1.8.2
zipp==3.12.0
# via hypothesis
swh-core==4.1.0
# via
# swh-counters
# swh-indexer
# swh-journal
# swh-objstorage
# swh-storage
swh-counters==0.11.0
# via swh-storage
swh-indexer==4.0.0
# via -r /src/apps/swh-indexer/requirements.txt
swh-journal==1.5.3
# via
# swh-counters
# swh-indexer
swh-model==7.1.0
# via
# swh-indexer
# swh-journal
# swh-objstorage
# swh-storage
swh-objstorage==4.0.0
# via
# -r /src/apps/swh-indexer/requirements.txt
# swh-indexer
# swh-storage
swh-perfecthash==1.3.2
# via swh-objstorage
swh-storage==3.0.0
# via swh-indexer
tenacity==9.0.0
# via
# swh-core
# swh-journal
# swh-storage
typing-extensions==4.13.0
# via
# azure-core
# azure-storage-blob
# psycopg
# psycopg-pool
# swh-core
# swh-indexer
# swh-model
# swh-storage
urllib3==2.3.0
# via
# requests
# sentry-sdk
werkzeug==3.1.3
# via flask
wrapt==1.17.2
# via deprecated
xmltodict==0.14.2
# via swh-indexer
yarl==1.18.3
# via aiohttp
swh-objstorage[azure,libcloud]
swh-indexer
FROM python:3.10-bullseye
ARG REGISTRY=container-registry.softwareheritage.org/swh/infra/swh-apps/
ARG base_image=${REGISTRY}base
ARG base_image_version=latest
RUN apt-get -y update && \
apt-get -y upgrade && \
apt-get install -y libcmph-dev librdkafka-dev libsystemd-dev \
opam git r-base-core r-cran-jsonlite \
sed && \
apt clean && \
addgroup --gid 1000 swh && \
useradd --gid 1000 --uid 1000 -m -d /opt/swh swh && \
mkdir /etc/swh
FROM ${base_image}:${base_image_version}
USER swh
WORKDIR /opt/swh
ARG user=swh
ARG workdir=/opt/${user}
ARG configdir=/etc/${user}
COPY --chown=swh:swh requirements-frozen.txt /opt/swh
COPY --chown=swh:swh entrypoint.sh /opt/swh
USER root
RUN apt-get update && \
apt-get install -y libsystemd-dev opam libsvn-dev && \
apt-get clean
ENV PYTHONPATH=/opt/swh
ENV PATH=/opt/swh/.local/bin:$PATH
COPY --chmod=0644 requirements-frozen.txt ${workdir}
RUN --mount=type=cache,target=.cache,uid=1000,gid=1000 \
uv pip sync requirements-frozen.txt
RUN chmod u+x /opt/swh/entrypoint.sh && \
/usr/local/bin/python -m pip install --upgrade pip && \
pip install --no-cache-dir -r requirements-frozen.txt && \
pip install gunicorn
COPY --chmod=0755 entrypoint.sh ${workdir}
ENV SWH_CONFIG_FILENAME=/etc/swh/config.yml
USER ${user}
ENV SWH_CONFIG_FILENAME=${configdir}/config.yml
ENV SWH_WORKER_INSTANCE=lister
ENV CONCURRENCY=1
ENV MAX_TASKS_PER_CHILD=1
ENV LOGLEVEL=INFO
ENTRYPOINT "/opt/swh/entrypoint.sh"
ENTRYPOINT ["/opt/swh/entrypoint.sh"]
......@@ -3,21 +3,25 @@
set -e
case "$1" in
"shell")
shift
echo "Running command $@"
exec bash -i "$@"
;;
*)
echo Starting the swh Celery worker for ${SWH_WORKER_INSTANCE}
exec python -m celery \
--app=swh.scheduler.celery_backend.config.app \
worker \
--pool=prefork --events \
--concurrency=${CONCURRENCY} \
--max-tasks-per-child=${MAX_TASKS_PER_CHILD} \
-Ofair --loglevel=${LOGLEVEL} \
--without-gossip --without-mingle --without-heartbeat \
--hostname "${SWH_WORKER_INSTANCE}@%h"
;;
"shell")
shift
if (( $# == 0)); then
exec bash -i
else
"$@"
fi
;;
*)
echo Starting the swh Celery worker for ${SWH_WORKER_INSTANCE}
exec python -m celery \
--app=swh.scheduler.celery_backend.config.app \
worker \
--pool=prefork --events \
--concurrency=${CONCURRENCY} \
--max-tasks-per-child=${MAX_TASKS_PER_CHILD} \
-Ofair \
--loglevel=${SWH_LOG_LEVEL:-INFO} \
--without-gossip --without-mingle --without-heartbeat \
--hostname "${SWH_WORKER_INSTANCE}@%h"
;;
esac
aiohttp==3.8.4
# This file was autogenerated by uv via the following command:
# uv pip compile /src/apps/swh-lister/requirements.txt -o /src/apps/swh-lister/tmpqbk5qgwk
aiohappyeyeballs==2.6.1
# via aiohttp
aiohttp==3.11.14
# via
# aiohttp-utils
# swh-core
aiohttp-utils==3.2.1
aiosignal==1.3.1
amqp==5.1.1
# via swh-core
aiosignal==1.3.2
# via aiohttp
amqp==5.3.1
# via kombu
asn1crypto==1.5.1
async-timeout==4.0.2
attrs==23.1.0
attrs-strict==1.0.0
beautifulsoup4==4.12.2
billiard==4.1.0
blinker==1.6.2
cassandra-driver==3.28.0
celery==5.3.0
certifi==2023.5.7
cffi==1.15.1
chardet==5.1.0
charset-normalizer==3.1.0
click==8.1.3
click-didyoumean==0.3.0
# via scramp
attrs==25.3.0
# via
# aiohttp
# attrs-strict
# hypothesis
# swh-model
# swh-scheduler
attrs-strict==1.0.1
# via
# swh-model
# swh-scheduler
backports-entry-points-selectable==1.3.0
# via
# swh-core
# swh-storage
beautifulsoup4==4.13.3
# via swh-lister
billiard==4.2.1
# via celery
blinker==1.9.0
# via
# flask
# swh-core
breezy==3.3.4
# via swh-lister
cassandra-driver==3.29.2
# via swh-storage
celery==5.4.0
# via swh-scheduler
certifi==2025.1.31
# via
# requests
# sentry-sdk
cffi==1.17.1
# via swh-perfecthash
charset-normalizer==3.4.1
# via
# python-debian
# requests
click==8.1.8
# via
# celery
# click-didyoumean
# click-plugins
# click-repl
# flask
# geomet
# swh-core
# swh-objstorage
# swh-scheduler
# swh-storage
click-didyoumean==0.3.1
# via celery
click-plugins==1.1.1
click-repl==0.2.0
confluent-kafka==2.1.1
# via celery
click-repl==0.3.0
# via celery
configobj==5.0.9
# via breezy
confluent-kafka==2.8.2
# via swh-journal
dateparser==1.2.1
# via swh-lister
defusedxml==0.7.1
Deprecated==1.2.14
distro==1.8.0
dulwich==0.21.5
exceptiongroup==1.1.1
Flask==2.3.2
frozenlist==1.3.3
# via repomd
deprecated==1.2.18
# via
# swh-core
# swh-model
# swh-objstorage
# swh-storage
distro==1.9.0
# via lazr-restfulclient
dulwich==0.22.8
# via
# breezy
# swh-lister
fastbencode==0.3.1
# via breezy
flask==3.1.0
# via
# swh-core
# swh-counters
# swh-scheduler
# swh-storage
frozenlist==1.5.0
# via
# aiohttp
# aiosignal
geomet==0.2.1.post1
gunicorn==20.1.0
# via cassandra-driver
gunicorn==23.0.0
# via aiohttp-utils
httplib2==0.22.0
humanize==4.6.0
hypothesis==6.76.0
idna==3.4
importlib-metadata==4.13.0
iniconfig==2.0.0
iso8601==1.1.0
itsdangerous==2.1.2
Jinja2==3.1.2
kombu==5.3.0
launchpadlib==1.11.0
lazr.restfulclient==0.14.5
lazr.uri==1.0.6
lxml==4.9.2
MarkupSafe==2.1.3
mirakuru==2.5.1
msgpack==1.0.5
multidict==6.0.4
# via
# launchpadlib
# lazr-restfulclient
humanize==4.12.2
# via swh-scheduler
hypothesis==6.130.4
# via swh-model
idna==3.10
# via
# requests
# yarl
importlib-metadata==8.6.1
# via swh-scheduler
iso8601==2.1.0
# via
# swh-core
# swh-lister
# swh-model
# swh-storage
itsdangerous==2.2.0
# via flask
jinja2==3.1.6
# via flask
kombu==5.4.2
# via celery
launchpadlib==2.1.0
# via swh-lister
lazr-restfulclient==0.14.6
# via launchpadlib
lazr-uri==1.0.7
# via
# launchpadlib
# wadllib
looseversion==1.3.0
# via swh-lister
lxml==5.3.1
# via
# repomd
# swh-lister
markupsafe==3.0.2
# via
# jinja2
# werkzeug
mercurial==7.0
# via swh-lister
merge3==0.0.15
# via breezy
msgpack==1.1.0
# via
# swh-core
# swh-journal
# swh-objstorage
multidict==6.2.0
# via
# aiohttp
# yarl
mypy-extensions==1.0.0
# via swh-storage
numpy==2.2.4
# via pandas
oauthlib==3.2.2
packaging==23.1
pg8000==1.29.6
# via lazr-restfulclient
packaging==24.2
# via gunicorn
pandas==2.2.3
# via pyreadr
patiencediff==0.2.15
# via breezy
pg8000==1.31.2
# via testing-postgresql
pika==1.3.2
pluggy==1.0.0
port-for==0.6.3
prompt-toolkit==3.0.38
psutil==5.9.5
psycopg2==2.9.6
pycparser==2.21
pyparsing==3.0.9
pytest==7.3.1
pytest-postgresql==3.1.3
python-dateutil==2.8.2
python-debian==0.1.49
python-json-logger==2.0.7
# via swh-scheduler
prompt-toolkit==3.0.50
# via click-repl
propcache==0.3.1
# via
# aiohttp
# yarl
psycopg==3.2.6
# via
# swh-core
# swh-lister
# swh-scheduler
# swh-storage
psycopg-pool==3.2.6
# via
# swh-core
# swh-scheduler
# swh-storage
pycparser==2.22
# via cffi
pyparsing==3.2.3
# via httplib2
pyreadr==0.5.3
# via swh-lister
python-dateutil==2.9.0.post0
# via
# celery
# dateparser
# pandas
# pg8000
# swh-model
python-debian==1.0.1
# via swh-lister
python-json-logger==3.3.0
# via -r /src/apps/swh-lister/requirements.txt
python-magic==0.4.27
python-mimeparse==1.6.0
PyYAML==6.0
redis==4.5.5
# via swh-core
python-mimeparse==2.0.0
# via aiohttp-utils
pytz==2025.2
# via
# dateparser
# pandas
pyyaml==6.0.2
# via
# breezy
# swh-core
# swh-scheduler
redis==5.2.1
# via
# swh-counters
# swh-storage
regex==2024.11.6
# via dateparser
repomd==0.2.1
requests==2.31.0
scramp==1.4.4
sentry-sdk==1.25.0
six==1.16.0
# via swh-lister
requests==2.32.3
# via
# swh-core
# swh-lister
# swh-objstorage
# swh-scheduler
scramp==1.4.5
# via pg8000
sentry-sdk==2.24.1
# via
# swh-core
# swh-scheduler
setuptools==78.1.0
# via
# lazr-restfulclient
# lazr-uri
# swh-lister
# swh-scheduler
six==1.17.0
# via
# geomet
# lazr-restfulclient
# python-dateutil
sortedcontainers==2.4.0
soupsieve==2.4.1
swh.core==2.22.2
swh.counters==0.9.2
swh.journal==1.3.3
swh.lister==5.6.1
swh.model==6.7.0
swh.objstorage==2.2.0
swh.perfecthash==0.1.2
swh.scheduler==1.9.0
swh.storage==1.14.2
tenacity==8.2.2
testing.common.database==2.0.3
testing.postgresql==1.3.0
tomli==2.0.1
typing_extensions==4.6.3
tzdata==2023.3
urllib3==2.0.2
vine==5.0.0
wadllib==1.3.6
wcwidth==0.2.6
Werkzeug==2.3.4
wrapt==1.15.0
yarl==1.9.2
zipp==3.15.0
# via hypothesis
soupsieve==2.6
# via beautifulsoup4
subvertpy==0.11.0
# via swh-lister
swh-core==4.1.0
# via
# swh-counters
# swh-journal
# swh-lister
# swh-objstorage
# swh-scheduler
# swh-storage
swh-counters==0.11.0
# via swh-storage
swh-journal==1.5.3
# via swh-counters
swh-lister==6.9.4
# via -r /src/apps/swh-lister/requirements.txt
swh-model==7.1.0
# via
# swh-journal
# swh-objstorage
# swh-storage
swh-objstorage==4.0.0
# via swh-storage
swh-perfecthash==1.3.2
# via swh-objstorage
swh-scheduler==3.0.0
# via swh-lister
swh-storage==3.0.0
# via swh-scheduler
tabulate==0.9.0
# via swh-scheduler
tenacity==9.0.0
# via
# swh-core
# swh-journal
# swh-lister
# swh-storage
testing-common-database==2.0.3
# via testing-postgresql
testing-postgresql==1.3.0
# via swh-lister
toml==0.10.2
# via swh-lister
typing-extensions==4.13.0
# via
# beautifulsoup4
# psycopg
# psycopg-pool
# swh-core
# swh-model
# swh-scheduler
# swh-storage
tzdata==2025.2
# via
# celery
# kombu
# pandas
tzlocal==5.3.1
# via dateparser
urllib3==2.3.0
# via
# breezy
# dulwich
# requests
# sentry-sdk
vine==5.1.0
# via
# amqp
# celery
# kombu
wadllib==2.0.0
# via lazr-restfulclient
wcwidth==0.2.13
# via prompt-toolkit
werkzeug==3.1.3
# via flask
wrapt==1.17.2
# via deprecated
yarl==1.18.3
# via aiohttp
zipp==3.21.0
# via importlib-metadata
# Deeply inspired from the Dockerfile of the swh-graph project
FROM python:3.10-bullseye
ARG REGISTRY=container-registry.softwareheritage.org/swh/infra/swh-apps/
ARG base_image=${REGISTRY}base
ARG base_image_version=latest
RUN apt-get -y update && \
apt-get -y upgrade && \
apt-get install -y libcmph-dev librdkafka-dev \
bzr && \
apt clean && \
addgroup --gid 1000 swh && \
useradd --gid 1000 --uid 1000 -m -d /opt/swh swh && \
mkdir /etc/swh
FROM ${base_image}:${base_image_version}
USER swh
WORKDIR /opt/swh
ARG user=swh
ARG workdir=/opt/${user}
ARG configdir=/etc/${user}
COPY --chown=swh:swh requirements-frozen.txt /opt/swh
COPY --chown=swh:swh entrypoint.sh /opt/swh
COPY --chmod=0644 requirements-frozen.txt ${workdir}
RUN --mount=type=cache,target=.cache,uid=1000,gid=1000 \
uv pip sync requirements-frozen.txt
ENV PYTHONPATH=/opt/swh
ENV PATH=/opt/swh/.local/bin:$PATH
COPY --chmod=0755 entrypoint.sh ${workdir}
RUN chmod u+x /opt/swh/entrypoint.sh && \
/usr/local/bin/python -m pip install --upgrade pip && \
pip install --no-cache-dir -r requirements-frozen.txt && \
pip install gunicorn
ENV SWH_CONFIG_FILENAME=/etc/swh/config.yml
USER ${user}
ENV SWH_CONFIG_FILENAME=${configdir}/config.yml
ENV SWH_WORKER_INSTANCE=loader
ENV CONCURRENCY=1
ENV MAX_TASKS_PER_CHILD=5
ENV LOGLEVEL=INFO
ENTRYPOINT "/opt/swh/entrypoint.sh"
ENTRYPOINT ["/opt/swh/entrypoint.sh"]
......@@ -3,21 +3,30 @@
set -e
case "$1" in
"shell")
shift
echo "Running command $@"
exec bash -i "$@"
;;
*)
echo Starting the swh Celery worker for ${SWH_WORKER_INSTANCE}
exec python -m celery \
--app=swh.scheduler.celery_backend.config.app \
worker \
--pool=prefork \
--concurrency=${CONCURRENCY} \
--max-tasks-per-child=${MAX_TASKS_PER_CHILD} \
-Ofair --loglevel=${LOGLEVEL} \
--without-gossip --without-mingle --without-heartbeat \
--hostname "${SWH_WORKER_INSTANCE}@%h"
;;
"shell")
shift
if (( $# == 0)); then
exec bash -i
else
"$@"
fi
;;
*)
# If a pod is killed by the OOM killer, there can remain previous
# workdir as the ephemeral volumes are not recreated
echo "Cleaning /tmp..."
rm -rf /tmp/*
echo Starting the swh Celery worker for ${SWH_WORKER_INSTANCE}
exec python -m celery \
--app=swh.scheduler.celery_backend.config.app \
worker \
--pool=prefork \
--concurrency=${CONCURRENCY} \
--max-tasks-per-child=${MAX_TASKS_PER_CHILD} \
-Ofair \
--loglevel=${SWH_LOG_LEVEL:-INFO} \
--without-gossip --without-mingle --without-heartbeat \
--hostname "${SWH_WORKER_INSTANCE}@%h"
;;
esac
aiohttp==3.8.4
# This file was autogenerated by uv via the following command:
# uv pip compile /src/apps/swh-loader-bzr/requirements.txt -o /src/apps/swh-loader-bzr/tmpimagbssu
aiohappyeyeballs==2.6.1
# via aiohttp
aiohttp==3.11.14
# via
# aiohttp-utils
# swh-core
aiohttp-utils==3.2.1
aiosignal==1.3.1
amqp==5.1.1
async-timeout==4.0.2
attrs==23.1.0
attrs-strict==1.0.0
billiard==3.6.4.0
blinker==1.6.2
breezy==3.3.2
cassandra-driver==3.27.0
celery==5.2.7
certifi==2022.12.7
cffi==1.15.1
chardet==5.1.0
charset-normalizer==3.1.0
click==8.1.3
click-didyoumean==0.3.0
# via swh-core
aiosignal==1.3.2
# via aiohttp
amqp==5.3.1
# via kombu
attrs==25.3.0
# via
# aiohttp
# attrs-strict
# hypothesis
# swh-model
# swh-scheduler
attrs-strict==1.0.1
# via
# swh-model
# swh-scheduler
backports-entry-points-selectable==1.3.0
# via
# swh-core
# swh-storage
billiard==4.2.1
# via celery
blinker==1.9.0
# via
# flask
# swh-core
breezy==3.3.4
# via swh-loader-bzr
cassandra-driver==3.29.2
# via swh-storage
celery==5.4.0
# via swh-scheduler
certifi==2025.1.31
# via
# requests
# sentry-sdk
cffi==1.17.1
# via swh-perfecthash
chardet==5.2.0
# via swh-loader-core
charset-normalizer==3.4.1
# via
# python-debian
# requests
click==8.1.8
# via
# celery
# click-didyoumean
# click-plugins
# click-repl
# flask
# geomet
# swh-core
# swh-objstorage
# swh-scheduler
# swh-storage
click-didyoumean==0.3.1
# via celery
click-plugins==1.1.1
click-repl==0.2.0
configobj==5.0.8
confluent-kafka==2.1.0
cryptography==40.0.2
Deprecated==1.2.13
dulwich==0.21.3
exceptiongroup==1.1.1
fastbencode==0.2
Flask==2.3.2
frozenlist==1.3.3
# via celery
click-repl==0.3.0
# via celery
configobj==5.0.9
# via breezy
confluent-kafka==2.8.2
# via swh-journal
deprecated==1.2.18
# via
# swh-core
# swh-model
# swh-objstorage
# swh-storage
dulwich==0.22.8
# via breezy
fastbencode==0.3.1
# via breezy
flask==3.1.0
# via
# swh-core
# swh-counters
# swh-scheduler
# swh-storage
frozenlist==1.5.0
# via
# aiohttp
# aiosignal
geomet==0.2.1.post1
gunicorn==20.1.0
humanize==4.6.0
hypothesis==6.75.1
idna==3.4
importlib-metadata==4.13.0
iniconfig==2.0.0
iso8601==1.1.0
itsdangerous==2.1.2
Jinja2==3.1.2
kombu==5.2.4
MarkupSafe==2.1.2
merge3==0.0.13
mirakuru==2.5.1
msgpack==1.0.5
multidict==6.0.4
# via cassandra-driver
gunicorn==23.0.0
# via aiohttp-utils
humanize==4.12.2
# via swh-scheduler
hypothesis==6.130.4
# via swh-model
idna==3.10
# via
# requests
# yarl
importlib-metadata==8.6.1
# via swh-scheduler
iso8601==2.1.0
# via
# swh-core
# swh-loader-core
# swh-model
# swh-storage
itsdangerous==2.2.0
# via flask
jinja2==3.1.6
# via flask
kombu==5.5.1
# via celery
looseversion==1.3.0
# via swh-loader-core
markupsafe==3.0.2
# via
# jinja2
# werkzeug
merge3==0.0.15
# via breezy
msgpack==1.1.0
# via
# swh-core
# swh-journal
# swh-objstorage
multidict==6.2.0
# via
# aiohttp
# yarl
mypy-extensions==1.0.0
packaging==23.1
patiencediff==0.2.13
pika==1.3.1
pkginfo==1.9.6
pluggy==1.0.0
port-for==0.6.3
prompt-toolkit==3.0.38
psutil==5.9.5
psycopg2==2.9.6
pycparser==2.21
pytest==7.3.1
pytest-postgresql==3.1.3
python-dateutil==2.8.2
python-debian==0.1.49
python-json-logger==2.0.7
# via swh-storage
packaging==24.2
# via gunicorn
patiencediff==0.2.15
# via breezy
pika==1.3.2
# via swh-scheduler
pkginfo==1.12.1.2
# via swh-loader-core
prompt-toolkit==3.0.50
# via click-repl
propcache==0.3.1
# via
# aiohttp
# yarl
psutil==7.0.0
# via swh-loader-core
psycopg==3.2.6
# via
# swh-core
# swh-scheduler
# swh-storage
psycopg-pool==3.2.6
# via
# swh-core
# swh-scheduler
# swh-storage
pycparser==2.22
# via cffi
python-dateutil==2.9.0.post0
# via
# celery
# swh-loader-core
# swh-model
python-debian==1.0.1
# via swh-loader-core
python-json-logger==3.3.0
# via -r /src/apps/swh-loader-bzr/requirements.txt
python-magic==0.4.27
python-mimeparse==1.6.0
pytz==2023.3
PyYAML==6.0
redis==4.4.4
requests==2.29.0
sentry-sdk==1.21.1
six==1.16.0
# via swh-core
python-mimeparse==2.0.0
# via aiohttp-utils
pyyaml==6.0.2
# via
# breezy
# swh-core
# swh-scheduler
redis==5.2.1
# via
# swh-counters
# swh-storage
requests==2.32.3
# via
# swh-core
# swh-loader-core
# swh-objstorage
# swh-scheduler
sentry-sdk==2.24.1
# via
# swh-core
# swh-scheduler
setuptools==78.1.0
# via swh-scheduler
six==1.17.0
# via
# geomet
# python-dateutil
sortedcontainers==2.4.0
swh.core==2.22.0
swh.counters==0.9.2
swh.journal==1.3.1
swh.loader.bzr==1.3.4
swh.loader.core==5.3.0
swh.model==6.7.0
swh.objstorage==2.2.0
swh.perfecthash==0.1.2
swh.scheduler==1.8.0
swh.storage==1.12.0
tenacity==8.2.2
# via hypothesis
swh-core==4.1.0
# via
# swh-counters
# swh-journal
# swh-loader-core
# swh-objstorage
# swh-scheduler
# swh-storage
swh-counters==0.11.0
# via swh-storage
swh-journal==1.5.3
# via swh-counters
swh-loader-bzr==1.4.4
# via -r /src/apps/swh-loader-bzr/requirements.txt
swh-loader-core==5.21.1
# via swh-loader-bzr
swh-model==7.1.0
# via
# swh-journal
# swh-loader-bzr
# swh-loader-core
# swh-objstorage
# swh-storage
swh-objstorage==4.0.0
# via
# swh-loader-core
# swh-storage
swh-perfecthash==1.3.2
# via swh-objstorage
swh-scheduler==3.0.0
# via
# swh-loader-bzr
# swh-loader-core
swh-storage==3.0.0
# via
# swh-loader-bzr
# swh-loader-core
# swh-scheduler
tabulate==0.9.0
# via swh-scheduler
tenacity==9.0.0
# via
# swh-core
# swh-journal
# swh-loader-core
# swh-storage
toml==0.10.2
tomli==2.0.1
typing_extensions==4.5.0
urllib3==1.26.15
vine==5.0.0
wcwidth==0.2.6
Werkzeug==2.3.3
wrapt==1.15.0
yarl==1.9.2
zipp==3.15.0
# via swh-loader-core
typing-extensions==4.13.0
# via
# psycopg
# psycopg-pool
# swh-core
# swh-loader-core
# swh-model
# swh-scheduler
# swh-storage
tzdata==2025.1
# via
# celery
# kombu
urllib3==2.3.0
# via
# breezy
# dulwich
# requests
# sentry-sdk
vine==5.1.0
# via
# amqp
# celery
# kombu
wcwidth==0.2.13
# via prompt-toolkit
werkzeug==3.1.3
# via flask
wrapt==1.17.2
# via deprecated
yarl==1.18.3
# via aiohttp
zipp==3.21.0
# via importlib-metadata
# Deeply inspired from the Dockerfile of the swh-graph project
FROM python:3.10-bullseye
ARG REGISTRY=container-registry.softwareheritage.org/swh/infra/swh-apps/
ARG base_image=${REGISTRY}base
ARG base_image_version=latest
RUN apt-get -y update && \
apt-get -y upgrade && \
apt-get install -y libcmph-dev librdkafka-dev \
cvs rsync && \
apt clean && \
addgroup --gid 1000 swh && \
useradd --gid 1000 --uid 1000 -m -d /opt/swh swh && \
mkdir /etc/swh
FROM ${base_image}:${base_image_version}
USER swh
WORKDIR /opt/swh
ARG user=swh
ARG workdir=/opt/${user}
ARG configdir=/etc/${user}
COPY --chown=swh:swh requirements-frozen.txt /opt/swh
COPY --chown=swh:swh entrypoint.sh /opt/swh
USER root
RUN apt-get update && \
apt-get install -y cvs rsync && \
apt-get clean
ENV PYTHONPATH=/opt/swh
ENV PATH=/opt/swh/.local/bin:$PATH
COPY --chmod=0644 requirements-frozen.txt ${workdir}
RUN --mount=type=cache,target=.cache,uid=1000,gid=1000 \
uv pip sync requirements-frozen.txt
RUN chmod u+x /opt/swh/entrypoint.sh && \
/usr/local/bin/python -m pip install --upgrade pip && \
pip install --no-cache-dir -r requirements-frozen.txt && \
pip install gunicorn
COPY --chmod=0755 entrypoint.sh ${workdir}
ENV SWH_CONFIG_FILENAME=/etc/swh/config.yml
USER ${user}
ENV SWH_CONFIG_FILENAME=${configdir}/config.yml
ENV SWH_WORKER_INSTANCE=loader
ENV CONCURRENCY=1
ENV MAX_TASKS_PER_CHILD=5
ENV LOGLEVEL=INFO
ENTRYPOINT "/opt/swh/entrypoint.sh"
ENTRYPOINT ["/opt/swh/entrypoint.sh"]
......@@ -3,21 +3,30 @@
set -e
case "$1" in
"shell")
shift
echo "Running command $@"
exec bash -i "$@"
;;
*)
echo Starting the swh Celery worker for ${SWH_WORKER_INSTANCE}
exec python -m celery \
--app=swh.scheduler.celery_backend.config.app \
worker \
--pool=prefork \
--concurrency=${CONCURRENCY} \
--max-tasks-per-child=${MAX_TASKS_PER_CHILD} \
-Ofair --loglevel=${LOGLEVEL} \
--without-gossip --without-mingle --without-heartbeat \
--hostname "${SWH_WORKER_INSTANCE}@%h"
;;
"shell")
shift
if (( $# == 0)); then
exec bash -i
else
"$@"
fi
;;
*)
# If a pod is killed by the OOM killer, there can remain previous
# workdir as the ephemeral volumes are not recreated
echo "Cleaning /tmp..."
rm -rf /tmp/*
echo Starting the swh Celery worker for ${SWH_WORKER_INSTANCE}
exec python -m celery \
--app=swh.scheduler.celery_backend.config.app \
worker \
--pool=prefork \
--concurrency=${CONCURRENCY} \
--max-tasks-per-child=${MAX_TASKS_PER_CHILD} \
-Ofair \
--loglevel=${SWH_LOG_LEVEL:-INFO} \
--without-gossip --without-mingle --without-heartbeat \
--hostname "${SWH_WORKER_INSTANCE}@%h"
;;
esac
# Deeply inspired from the Dockerfile of the swh-graph project
FROM python:3.10-bullseye
ARG REGISTRY=container-registry.softwareheritage.org/swh/infra/swh-apps/
ARG base_image=${REGISTRY}base
ARG base_image_version=latest
RUN apt-get -y update && \
apt-get -y upgrade && \
apt-get install -y libcmph-dev librdkafka-dev && \
apt clean && \
addgroup --gid 1000 swh && \
useradd --gid 1000 --uid 1000 -m -d /opt/swh swh && \
mkdir /etc/swh
FROM ${base_image}:${base_image_version}
USER swh
WORKDIR /opt/swh
ARG user=swh
ARG workdir=/opt/${user}
ARG configdir=/etc/${user}
COPY --chown=swh:swh requirements-frozen.txt /opt/swh
COPY --chown=swh:swh entrypoint.sh /opt/swh
USER root
RUN apt-get update && \
apt-get install -y strace gdb && \
apt-get clean
ENV PYTHONPATH=/opt/swh
ENV PATH=/opt/swh/.local/bin:$PATH
COPY --chmod=0644 requirements-frozen.txt ${workdir}
RUN --mount=type=cache,target=.cache,uid=1000,gid=1000 \
uv pip sync requirements-frozen.txt
RUN chmod u+x /opt/swh/entrypoint.sh && \
/usr/local/bin/python -m pip install --upgrade pip && \
pip install --no-cache-dir -r requirements-frozen.txt && \
pip install gunicorn
COPY --chmod=0755 entrypoint.sh ${workdir}
ENV SWH_CONFIG_FILENAME=/etc/swh/config.yml
USER ${user}
ENV SWH_CONFIG_FILENAME=${configdir}/config.yml
ENV SWH_WORKER_INSTANCE=loader
ENV CONCURRENCY=1
ENV MAX_TASKS_PER_CHILD=1
ENV LOGLEVEL=INFO
ENTRYPOINT "/opt/swh/entrypoint.sh"
ENTRYPOINT ["/opt/swh/entrypoint.sh"]
......@@ -3,21 +3,30 @@
set -e
case "$1" in
"shell")
shift
echo "Running command $@"
exec bash -i "$@"
;;
*)
echo Starting the swh Celery worker for ${SWH_WORKER_INSTANCE}
exec python -m celery \
--app=swh.scheduler.celery_backend.config.app \
worker \
--pool=prefork \
--concurrency=${CONCURRENCY} \
--max-tasks-per-child=${MAX_TASKS_PER_CHILD} \
-Ofair --loglevel=${LOGLEVEL} \
--without-gossip --without-mingle --without-heartbeat \
--hostname "${SWH_WORKER_INSTANCE}@%h"
;;
"shell")
shift
if (( $# == 0)); then
exec bash -i
else
"$@"
fi
;;
*)
# If a pod is killed by the OOM killer, there can remain previous
# workdir as the ephemeral volumes are not recreated
echo "Cleaning /tmp..."
rm -rf /tmp/*
echo Starting the swh Celery worker for ${SWH_WORKER_INSTANCE}
exec python -m celery \
--app=swh.scheduler.celery_backend.config.app \
worker \
--pool=prefork \
--concurrency=${CONCURRENCY} \
--max-tasks-per-child=${MAX_TASKS_PER_CHILD} \
-Ofair \
--loglevel=${SWH_LOG_LEVEL:-INFO} \
--without-gossip --without-mingle --without-heartbeat \
--hostname "${SWH_WORKER_INSTANCE}@%h"
;;
esac
# Deeply inspired from the Dockerfile of the swh-graph project
FROM python:3.10-bullseye
ARG REGISTRY=container-registry.softwareheritage.org/swh/infra/swh-apps/
ARG base_image=${REGISTRY}base
ARG base_image_version=latest
RUN apt-get -y update && \
apt-get -y upgrade && \
apt-get install -y libcmph-dev librdkafka-dev \
mercurial && \
apt clean && \
addgroup --gid 1000 swh && \
useradd --gid 1000 --uid 1000 -m -d /opt/swh swh && \
mkdir /etc/swh
FROM ${base_image}:${base_image_version}
USER swh
WORKDIR /opt/swh
ARG user=swh
ARG workdir=/opt/${user}
ARG configdir=/etc/${user}
COPY --chown=swh:swh requirements-frozen.txt /opt/swh
COPY --chown=swh:swh entrypoint.sh /opt/swh
USER root
RUN apt-get update && \
apt-get install -y mercurial && \
apt-get clean
ENV PYTHONPATH=/opt/swh
ENV PATH=/opt/swh/.local/bin:$PATH
COPY --chmod=0644 requirements-frozen.txt ${workdir}
RUN --mount=type=cache,target=.cache,uid=1000,gid=1000 \
uv pip sync requirements-frozen.txt
RUN chmod u+x /opt/swh/entrypoint.sh && \
/usr/local/bin/python -m pip install --upgrade pip && \
pip install --no-cache-dir -r requirements-frozen.txt && \
pip install gunicorn
COPY --chmod=0755 entrypoint.sh ${workdir}
ENV SWH_CONFIG_FILENAME=/etc/swh/config.yml
USER ${user}
ENV SWH_CONFIG_FILENAME=${configdir}/config.yml
ENV SWH_WORKER_INSTANCE=loader
ENV CONCURRENCY=1
ENV MAX_TASKS_PER_CHILD=5
ENV LOGLEVEL=INFO
ENTRYPOINT "/opt/swh/entrypoint.sh"
ENTRYPOINT ["/opt/swh/entrypoint.sh"]
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.