Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • ardumont/swh-apps
  • vlorentz/swh-apps
  • swh/infra/swh-apps
  • anlambert/swh-apps
4 results
Show changes
Showing
with 1636 additions and 275 deletions
......@@ -3,21 +3,25 @@
set -e
case "$1" in
"shell")
shift
echo "Running command $@"
exec bash -i "$@"
;;
*)
echo Starting the swh Celery worker for ${SWH_WORKER_INSTANCE}
exec python -m celery \
--app=swh.scheduler.celery_backend.config.app \
worker \
--pool=prefork \
--concurrency=${CONCURRENCY} \
--max-tasks-per-child=${MAX_TASKS_PER_CHILD} \
-Ofair --loglevel=${LOGLEVEL} \
--without-gossip --without-mingle --without-heartbeat \
--hostname "${SWH_WORKER_INSTANCE}@%h"
;;
"shell")
shift
if (( $# == 0)); then
exec bash -i
else
"$@"
fi
;;
*)
echo Starting the swh Celery worker for ${SWH_WORKER_INSTANCE}
exec python -m celery \
--app=swh.scheduler.celery_backend.config.app \
worker \
--pool=prefork \
--concurrency=${CONCURRENCY} \
--max-tasks-per-child=${MAX_TASKS_PER_CHILD} \
-Ofair \
--loglevel=${SWH_LOG_LEVEL:-INFO} \
--without-gossip --without-mingle --without-heartbeat \
--hostname "${SWH_WORKER_INSTANCE}@%h"
;;
esac
aiohttp==3.8.4
# This file was autogenerated by uv via the following command:
# uv pip compile /src/apps/swh-deposit-checkers/requirements.txt -o /src/apps/swh-deposit-checkers/tmpezx7r6y1
aiohappyeyeballs==2.6.1
# via aiohttp
aiohttp==3.11.14
# via
# aiohttp-utils
# swh-core
aiohttp-utils==3.2.1
aiosignal==1.3.1
amqp==5.1.1
async-timeout==4.0.2
attrs==23.1.0
attrs-strict==1.0.0
billiard==3.6.4.0
blinker==1.6.2
cassandra-driver==3.27.0
celery==5.2.7
certifi==2023.5.7
cffi==1.15.1
charset-normalizer==3.1.0
click==8.1.3
click-didyoumean==0.3.0
# via swh-core
aiosignal==1.3.2
# via aiohttp
amqp==5.3.1
# via kombu
attrs==25.3.0
# via
# aiohttp
# attrs-strict
# hypothesis
# swh-model
# swh-scheduler
attrs-strict==1.0.1
# via
# swh-model
# swh-scheduler
backports-entry-points-selectable==1.3.0
# via
# swh-core
# swh-storage
billiard==4.2.1
# via celery
blinker==1.9.0
# via
# flask
# swh-core
cassandra-driver==3.29.2
# via swh-storage
celery==5.4.0
# via swh-scheduler
certifi==2025.1.31
# via
# requests
# sentry-sdk
cffi==1.17.1
# via swh-perfecthash
charset-normalizer==3.4.1
# via requests
click==8.1.8
# via
# celery
# click-didyoumean
# click-plugins
# click-repl
# flask
# geomet
# swh-core
# swh-deposit
# swh-objstorage
# swh-scheduler
# swh-storage
click-didyoumean==0.3.1
# via celery
click-plugins==1.1.1
click-repl==0.2.0
confluent-kafka==2.1.1
cryptography==40.0.2
Deprecated==1.2.13
exceptiongroup==1.1.1
Flask==2.3.2
frozenlist==1.3.3
# via celery
click-repl==0.3.0
# via celery
confluent-kafka==2.8.2
# via swh-journal
deprecated==1.2.18
# via
# swh-core
# swh-model
# swh-objstorage
# swh-storage
elementpath==4.8.0
# via xmlschema
flask==3.1.0
# via
# swh-core
# swh-counters
# swh-scheduler
# swh-storage
frozenlist==1.5.0
# via
# aiohttp
# aiosignal
geomet==0.2.1.post1
gunicorn==20.1.0
humanize==4.6.0
hypothesis==6.75.3
idna==3.4
importlib-metadata==4.13.0
iniconfig==2.0.0
iso8601==1.1.0
itsdangerous==2.1.2
Jinja2==3.1.2
kombu==5.2.4
MarkupSafe==2.1.2
mirakuru==2.5.1
msgpack==1.0.5
multidict==6.0.4
# via cassandra-driver
gunicorn==23.0.0
# via aiohttp-utils
humanize==4.12.2
# via swh-scheduler
hypothesis==6.130.4
# via swh-model
idna==3.10
# via
# requests
# yarl
importlib-metadata==8.6.1
# via swh-scheduler
iso8601==2.1.0
# via
# swh-core
# swh-deposit
# swh-model
# swh-storage
itsdangerous==2.2.0
# via flask
jinja2==3.1.6
# via flask
kombu==5.5.1
# via celery
markupsafe==3.0.2
# via
# jinja2
# werkzeug
msgpack==1.1.0
# via
# swh-core
# swh-journal
# swh-objstorage
multidict==6.2.0
# via
# aiohttp
# yarl
mypy-extensions==1.0.0
packaging==23.1
# via swh-storage
packaging==24.2
# via gunicorn
pika==1.3.2
pluggy==1.0.0
port-for==0.6.3
prompt-toolkit==3.0.38
psutil==5.9.5
psycopg2==2.9.6
pycparser==2.21
pytest==7.3.1
pytest-postgresql==3.1.3
python-dateutil==2.8.2
python-json-logger==2.0.7
# via swh-scheduler
prompt-toolkit==3.0.50
# via click-repl
propcache==0.3.1
# via
# aiohttp
# yarl
psycopg==3.2.6
# via
# swh-core
# swh-scheduler
# swh-storage
psycopg-pool==3.2.6
# via
# swh-core
# swh-scheduler
# swh-storage
pycparser==2.22
# via cffi
python-dateutil==2.9.0.post0
# via
# celery
# swh-model
python-json-logger==3.3.0
# via -r /src/apps/swh-deposit-checkers/requirements.txt
python-magic==0.4.27
python-mimeparse==1.6.0
pytz==2023.3
PyYAML==6.0
redis==4.4.4
requests==2.31.0
sentry-sdk==1.24.0
six==1.16.0
# via swh-core
python-mimeparse==2.0.0
# via aiohttp-utils
pyyaml==6.0.2
# via
# swh-core
# swh-scheduler
redis==5.2.1
# via
# swh-counters
# swh-storage
requests==2.32.3
# via
# swh-core
# swh-deposit
# swh-objstorage
# swh-scheduler
sentry-sdk==2.24.1
# via
# swh-core
# swh-deposit
# swh-scheduler
setuptools==78.1.0
# via swh-scheduler
six==1.17.0
# via
# geomet
# python-dateutil
sortedcontainers==2.4.0
swh.core==2.22.2
swh.counters==0.9.2
swh.deposit==1.2.1
swh.journal==1.3.3
swh.model==6.7.0
swh.objstorage==2.2.0
swh.perfecthash==0.1.2
swh.scheduler==1.8.0
swh.storage==1.12.0
tenacity==8.2.2
tomli==2.0.1
typing_extensions==4.6.0
urllib3==1.26.15
vine==5.0.0
wcwidth==0.2.6
Werkzeug==2.3.4
wrapt==1.15.0
yarl==1.9.2
zipp==3.15.0
# via hypothesis
swh-core==4.1.0
# via
# swh-counters
# swh-deposit
# swh-journal
# swh-objstorage
# swh-scheduler
# swh-storage
swh-counters==0.11.0
# via swh-storage
swh-deposit==3.0.0
# via -r /src/apps/swh-deposit-checkers/requirements.txt
swh-journal==1.5.3
# via swh-counters
swh-model==7.1.0
# via
# swh-deposit
# swh-journal
# swh-objstorage
# swh-storage
swh-objstorage==4.0.0
# via swh-storage
swh-perfecthash==1.3.2
# via swh-objstorage
swh-scheduler==3.0.0
# via -r /src/apps/swh-deposit-checkers/requirements.txt
swh-storage==3.0.0
# via swh-scheduler
tabulate==0.9.0
# via swh-scheduler
tenacity==9.0.0
# via
# swh-core
# swh-journal
# swh-storage
typing-extensions==4.13.0
# via
# psycopg
# psycopg-pool
# swh-core
# swh-model
# swh-scheduler
# swh-storage
tzdata==2025.1
# via
# celery
# kombu
urllib3==2.3.0
# via
# requests
# sentry-sdk
vine==5.1.0
# via
# amqp
# celery
# kombu
wcwidth==0.2.13
# via prompt-toolkit
werkzeug==3.1.3
# via flask
wrapt==1.17.2
# via deprecated
xmlschema==3.4.5
# via swh-deposit
yarl==1.18.3
# via aiohttp
zipp==3.21.0
# via importlib-metadata
ARG REGISTRY=container-registry.softwareheritage.org/swh/infra/swh-apps/
ARG base_image=${REGISTRY}base
ARG base_image_version=latest
FROM ${base_image}:${base_image_version}
ARG user=swh
ARG workdir=/opt/${user}
ARG configdir=/etc/${user}
COPY --chmod=0644 requirements-frozen.txt ${workdir}
RUN --mount=type=cache,target=.cache,uid=1000,gid=1000 \
uv pip sync requirements-frozen.txt
COPY --chmod=0755 entrypoint.sh ${workdir}
USER ${user}
ENV SWH_CONFIG_FILENAME=${configdir}/config.yml
ENV PORT 5006
EXPOSE $PORT
ENV THREADS 2
ENV WORKERS 2
ENV TIMEOUT 3600
ENV DJANGO_SETTINGS_MODULE swh.deposit.settings.production
ENV SWH_MAIN_PACKAGE swh.deposit
ENTRYPOINT ["/opt/swh/entrypoint.sh"]
#!/bin/bash
set -e
case "$1" in
"shell")
shift
if (( $# == 0)); then
exec bash -i
else
"$@"
fi
;;
"swh")
shift
echo "Running swh command $@"
exec swh $@
;;
*)
EXTRA_CLI_FLAGS=()
if [ -n "${SWH_LOG_CONFIG_JSON}" ]; then
EXTRA_CLI_FLAGS+=('--log-config-json' "${SWH_LOG_CONFIG_JSON}")
fi
if [ -n "${STATSD_HOST}" -a -n "${STATSD_PORT}" ]; then
EXTRA_CLI_FLAGS+=('--statsd-host' "${STATSD_HOST}:${STATSD_PORT}")
fi
if [ -n "${STATSD_SERVICE_TYPE}" ]; then
EXTRA_CLI_FLAGS+=('--statsd-prefix' "${STATSD_SERVICE_TYPE}")
fi
if [ -n "${SWH_DEV_MODE}" ]; then
EXTRA_CLI_FLAGS+=("--reload")
fi
echo 'Starting swh deposit API server'
exec gunicorn --bind "0.0.0.0:${PORT}" \
--log-level "${SWH_LOG_LEVEL:-INFO}" \
"${EXTRA_CLI_FLAGS[@]}" \
--threads "${THREADS}" \
--workers "${WORKERS}" \
--timeout "${TIMEOUT}" \
--env DJANGO_SETTINGS_MODULE="${DJANGO_SETTINGS_MODULE}" \
--config 'python:swh.core.api.gunicorn_config' \
'django.core.wsgi:get_wsgi_application()'
esac
# This file was autogenerated by uv via the following command:
# uv pip compile /src/apps/swh-deposit/requirements.txt -o /src/apps/swh-deposit/tmp57zub2r3
aiofiles==24.1.0
# via python-keycloak
aiohappyeyeballs==2.6.1
# via aiohttp
aiohttp==3.11.14
# via
# aiohttp-utils
# swh-core
aiohttp-utils==3.2.1
# via swh-core
aiosignal==1.3.2
# via aiohttp
amqp==5.3.1
# via kombu
anyio==4.9.0
# via httpx
asgiref==3.8.1
# via django
async-property==0.2.2
# via python-keycloak
attrs==25.3.0
# via
# aiohttp
# attrs-strict
# hypothesis
# swh-model
# swh-scheduler
attrs-strict==1.0.1
# via
# swh-model
# swh-scheduler
azure-core==1.32.0
# via
# azure-storage-blob
# django-storages
azure-storage-blob==12.25.0
# via django-storages
backports-entry-points-selectable==1.3.0
# via
# swh-core
# swh-storage
billiard==4.2.1
# via celery
blinker==1.9.0
# via
# flask
# swh-core
cassandra-driver==3.29.2
# via swh-storage
celery==5.4.0
# via swh-scheduler
certifi==2025.1.31
# via
# httpcore
# httpx
# requests
# sentry-sdk
cffi==1.17.1
# via
# cryptography
# swh-perfecthash
chardet==5.2.0
# via swh-loader-core
charset-normalizer==3.4.1
# via
# python-debian
# requests
click==8.1.8
# via
# celery
# click-didyoumean
# click-plugins
# click-repl
# flask
# geomet
# swh-auth
# swh-core
# swh-deposit
# swh-objstorage
# swh-scheduler
# swh-storage
click-didyoumean==0.3.1
# via celery
click-plugins==1.1.1
# via celery
click-repl==0.3.0
# via celery
confluent-kafka==2.8.2
# via swh-journal
cryptography==44.0.2
# via
# azure-storage-blob
# jwcrypto
deprecated==1.2.18
# via
# swh-core
# swh-model
# swh-objstorage
# swh-storage
deprecation==2.1.0
# via python-keycloak
django==5.1.7
# via
# django-storages
# djangorestframework
# swh-auth
# swh-deposit
django-storages==1.14.5
# via -r /src/apps/swh-deposit/requirements.txt
djangorestframework==3.15.2
# via
# swh-auth
# swh-deposit
elementpath==4.8.0
# via xmlschema
flask==3.1.0
# via
# swh-core
# swh-counters
# swh-scheduler
# swh-storage
frozenlist==1.5.0
# via
# aiohttp
# aiosignal
geomet==0.2.1.post1
# via cassandra-driver
gunicorn==23.0.0
# via
# -r /src/apps/swh-deposit/requirements.txt
# aiohttp-utils
h11==0.14.0
# via httpcore
httpcore==1.0.7
# via httpx
httpx==0.28.1
# via python-keycloak
humanize==4.12.2
# via swh-scheduler
hypothesis==6.130.4
# via swh-model
idna==3.10
# via
# anyio
# httpx
# requests
# yarl
importlib-metadata==8.6.1
# via swh-scheduler
iso8601==2.1.0
# via
# swh-core
# swh-deposit
# swh-loader-core
# swh-model
# swh-storage
isodate==0.7.2
# via azure-storage-blob
itsdangerous==2.2.0
# via flask
jinja2==3.1.6
# via flask
jwcrypto==1.5.6
# via python-keycloak
kombu==5.5.1
# via celery
looseversion==1.3.0
# via swh-loader-core
markupsafe==3.0.2
# via
# jinja2
# werkzeug
msgpack==1.1.0
# via
# swh-core
# swh-journal
# swh-objstorage
multidict==6.2.0
# via
# aiohttp
# yarl
mypy-extensions==1.0.0
# via swh-storage
packaging==24.2
# via
# deprecation
# gunicorn
pika==1.3.2
# via swh-scheduler
pkginfo==1.12.1.2
# via swh-loader-core
prompt-toolkit==3.0.50
# via click-repl
propcache==0.3.1
# via
# aiohttp
# yarl
psutil==7.0.0
# via swh-loader-core
psycopg==3.2.6
# via
# swh-core
# swh-deposit
# swh-scheduler
# swh-storage
psycopg-pool==3.2.6
# via
# swh-core
# swh-scheduler
# swh-storage
pycparser==2.22
# via cffi
pymemcache==4.0.0
# via swh-deposit
python-dateutil==2.9.0.post0
# via
# celery
# swh-loader-core
# swh-model
python-debian==1.0.1
# via swh-loader-core
python-json-logger==3.3.0
# via -r /src/apps/swh-deposit/requirements.txt
python-keycloak==5.3.1
# via swh-auth
python-magic==0.4.27
# via swh-core
python-mimeparse==2.0.0
# via aiohttp-utils
pyyaml==6.0.2
# via
# swh-auth
# swh-core
# swh-scheduler
redis==5.2.1
# via
# swh-counters
# swh-storage
requests==2.32.3
# via
# azure-core
# python-keycloak
# requests-toolbelt
# swh-core
# swh-deposit
# swh-loader-core
# swh-objstorage
# swh-scheduler
requests-toolbelt==1.0.0
# via python-keycloak
sentry-sdk==2.24.1
# via
# swh-auth
# swh-core
# swh-deposit
# swh-scheduler
setuptools==78.1.0
# via
# swh-deposit
# swh-scheduler
six==1.17.0
# via
# azure-core
# geomet
# python-dateutil
sniffio==1.3.1
# via anyio
sortedcontainers==2.4.0
# via hypothesis
sqlparse==0.5.3
# via django
swh-auth==0.10.0
# via swh-deposit
swh-core==4.1.0
# via
# swh-auth
# swh-counters
# swh-deposit
# swh-journal
# swh-loader-core
# swh-objstorage
# swh-scheduler
# swh-storage
swh-counters==0.11.0
# via swh-storage
swh-deposit==3.0.0
# via -r /src/apps/swh-deposit/requirements.txt
swh-journal==1.5.3
# via swh-counters
swh-loader-core==5.21.1
# via swh-deposit
swh-model==7.1.0
# via
# swh-deposit
# swh-journal
# swh-loader-core
# swh-objstorage
# swh-storage
swh-objstorage==4.0.0
# via
# swh-loader-core
# swh-storage
swh-perfecthash==1.3.2
# via swh-objstorage
swh-scheduler==3.0.0
# via
# swh-deposit
# swh-loader-core
swh-storage==3.0.0
# via
# swh-deposit
# swh-loader-core
# swh-scheduler
tabulate==0.9.0
# via swh-scheduler
tenacity==9.0.0
# via
# swh-core
# swh-journal
# swh-loader-core
# swh-storage
toml==0.10.2
# via swh-loader-core
typing-extensions==4.13.0
# via
# anyio
# azure-core
# azure-storage-blob
# jwcrypto
# psycopg
# psycopg-pool
# swh-core
# swh-loader-core
# swh-model
# swh-scheduler
# swh-storage
tzdata==2025.1
# via
# celery
# kombu
urllib3==2.3.0
# via
# requests
# sentry-sdk
vine==5.1.0
# via
# amqp
# celery
# kombu
wcwidth==0.2.13
# via prompt-toolkit
werkzeug==3.1.3
# via flask
wrapt==1.17.2
# via deprecated
xmlschema==3.4.5
# via swh-deposit
yarl==1.18.3
# via aiohttp
zipp==3.21.0
# via importlib-metadata
swh.deposit[server]
django-storages[azure]
python-json-logger
gunicorn
ARG REGISTRY=container-registry.softwareheritage.org/swh/infra/swh-apps/
ARG base_image=${REGISTRY}base
ARG base_image_version=latest
ARG userid=1000
ARG groupid=1000
ARG user=swh
ARG workdir=/opt/${user}
ARG configdir=/etc/${user}
FROM rust:1.85-bookworm AS rust_build
# ... build swh-graph rust deps
RUN apt-get update && \
apt-get upgrade -y && \
apt-get install -y build-essential libclang-dev \
zstd protobuf-compiler default-jre && \
apt-get clean
# Install swh-graph feature swh-graph-grpc-serve
RUN --mount=type=cache,target=.cache,uid=1000,gid=1000 \
RUSTFLAGS="-C target-cpu=native" \
cargo install swh-graph swh-graph-grpc-server
FROM ${base_image}:${base_image_version}
USER root
RUN apt-get update && \
apt-get install -y zstd && \
apt-get clean
RUN mkdir -p /srv/graph /srv/dataset
RUN chown ${userid}:${groupid} /srv/graph /srv/dataset
VOLUME /srv/graph /srv/dataset
COPY --chmod=0644 requirements-frozen.txt ${workdir}
RUN --mount=type=cache,target=.cache,uid=1000,gid=1000 \
uv pip sync requirements-frozen.txt
COPY --from=rust_build /usr/local/cargo/bin/swh-graph* /usr/local/bin/
COPY --chmod=0755 entrypoint.sh ${workdir}
USER ${user}
ENV SWH_CONFIG_FILENAME=${configdir}/config.yml
ENV PORT 5009
EXPOSE $PORT
ENTRYPOINT ["/opt/swh/entrypoint.sh"]
#!/bin/bash
set -e
case "$1" in
"shell")
shift
if (( $# == 0)); then
exec bash -i
else
"$@"
fi
exit 0
;;
*)
case "${GRAPH_TYPE}" in
"rpc")
subcmd="rpc-serve -h 0.0.0.0 -p ${PORT}"
if [ ! -z "${GRAPH_PATH}" ]; then
subcmd+=" -g ${GRAPH_PATH}"
fi
;;
"grpc")
subcmd="grpc-serve -g ${GRAPH_PATH} -p ${PORT}"
;;
*)
echo "Unknown graph type <$GRAPH_TYPE> (either rpc or gprc)"
exit 1
esac
echo "Starting the swh-graph ${GRAPH_TYPE} server"
cmd="swh graph -C ${SWH_CONFIG_FILENAME} $subcmd"
echo $cmd
$cmd
esac
# This file was autogenerated by uv via the following command:
# uv pip compile /src/apps/swh-graph/requirements.txt -o /src/apps/swh-graph/tmp3g2jeups
aiohappyeyeballs==2.6.1
# via aiohttp
aiohttp==3.11.14
# via
# aiohttp-utils
# swh-core
# swh-graph
aiohttp-utils==3.2.1
# via swh-core
aiosignal==1.3.2
# via aiohttp
attrs==25.3.0
# via
# aiohttp
# attrs-strict
# hypothesis
# swh-model
attrs-strict==1.0.1
# via swh-model
backports-entry-points-selectable==1.3.0
# via swh-core
blinker==1.9.0
# via
# flask
# swh-core
boto3==1.37.20
# via swh-graph
botocore==1.37.20
# via
# boto3
# s3transfer
certifi==2025.1.31
# via
# requests
# sentry-sdk
charset-normalizer==3.4.1
# via requests
click==8.1.8
# via
# flask
# swh-core
# swh-graph
deprecated==1.2.18
# via
# swh-core
# swh-model
flask==3.1.0
# via swh-core
frozenlist==1.5.0
# via
# aiohttp
# aiosignal
grpcio==1.71.0
# via grpcio-tools
grpcio-tools==1.71.0
# via swh-graph
gunicorn==23.0.0
# via aiohttp-utils
hypothesis==6.130.4
# via swh-model
idna==3.10
# via
# requests
# yarl
iso8601==2.1.0
# via
# swh-core
# swh-model
itsdangerous==2.2.0
# via flask
jinja2==3.1.6
# via flask
jmespath==1.0.1
# via
# boto3
# botocore
markupsafe==3.0.2
# via
# jinja2
# werkzeug
msgpack==1.1.0
# via swh-core
multidict==6.2.0
# via
# aiohttp
# yarl
mypy-protobuf==3.6.0
# via swh-graph
packaging==24.2
# via gunicorn
propcache==0.3.1
# via
# aiohttp
# yarl
protobuf==5.29.4
# via
# grpcio-tools
# mypy-protobuf
# swh-graph
psutil==7.0.0
# via swh-graph
py4j==0.10.9.9
# via swh-graph
python-dateutil==2.9.0.post0
# via
# botocore
# swh-model
python-json-logger==3.3.0
# via -r /src/apps/swh-graph/requirements.txt
python-magic==0.4.27
# via swh-core
python-mimeparse==2.0.0
# via aiohttp-utils
pyyaml==6.0.2
# via swh-core
requests==2.32.3
# via swh-core
s3transfer==0.11.4
# via boto3
sentry-sdk==2.24.1
# via swh-core
setuptools==78.1.0
# via grpcio-tools
six==1.17.0
# via python-dateutil
sortedcontainers==2.4.0
# via hypothesis
swh-core==4.1.0
# via swh-graph
swh-graph==6.7.1
# via -r /src/apps/swh-graph/requirements.txt
swh-model==7.1.0
# via swh-graph
tenacity==9.0.0
# via swh-core
types-protobuf==5.29.1.20250315
# via mypy-protobuf
typing-extensions==4.13.0
# via swh-model
urllib3==2.3.0
# via
# botocore
# requests
# sentry-sdk
werkzeug==3.1.3
# via flask
wrapt==1.17.2
# via deprecated
yarl==1.18.3
# via aiohttp
swh.graph
python-json-logger
# Deeply inspired from the Dockerfile of the swh-graph project
FROM python:3.10-bullseye
ARG REGISTRY=container-registry.softwareheritage.org/swh/infra/swh-apps/
ARG base_image=${REGISTRY}base
ARG base_image_version=latest
RUN apt-get -y update && \
apt-get -y upgrade && \
apt-get install -y libcmph-dev librdkafka-dev && \
apt clean && \
addgroup --gid 1000 swh && \
useradd --gid 1000 --uid 1000 -m -d /opt/graphql swh && \
mkdir /etc/swh
FROM ${base_image}:${base_image_version}
USER swh
WORKDIR /opt/graphql
ARG user=swh
ARG workdir=/opt/${user}
ARG configdir=/etc/${user}
COPY --chown=swh:swh requirements-frozen.txt /opt/graphql
COPY --chown=swh:swh entrypoint.sh /opt/graphql
COPY --chmod=0644 requirements-frozen.txt ${workdir}
RUN --mount=type=cache,target=.cache,uid=1000,gid=1000 \
uv pip sync requirements-frozen.txt
ENV PYTHONPATH=/opt/graphql
ENV PATH=/opt/graphql/.local/bin:$PATH
COPY --chmod=0755 entrypoint.sh ${workdir}
RUN chmod u+x /opt/graphql/entrypoint.sh && \
/usr/local/bin/python -m pip install --upgrade pip && \
pip install --no-cache-dir -r requirements-frozen.txt && \
pip install gunicorn
ENV SWH_CONFIG_FILENAME=/etc/swh/config.yml
USER ${user}
ENV SWH_CONFIG_FILENAME=${configdir}/config.yml
ENV PORT 5013
EXPOSE $PORT
ENV THREADS 4
ENV WORKERS 2
ENV LOG_LEVEL INFO
ENV TIMEOUT 3600
ENTRYPOINT "/opt/graphql/entrypoint.sh"
ENTRYPOINT [ "/opt/swh/entrypoint.sh" ]
......@@ -3,19 +3,35 @@
set -e
case "$1" in
"shell")
exec bash -i
;;
*)
echo Starting the swh-graphql API server
"shell")
shift
if (( $# == 0)); then
exec bash -i
else
"$@"
fi
;;
*)
EXTRA_CLI_FLAGS=()
if [ -n "${SWH_LOG_CONFIG_JSON}" ]; then
EXTRA_CLI_FLAGS+=('--log-config-json' "${SWH_LOG_CONFIG_JSON}")
fi
if [ -n "${STATSD_HOST}" -a -n "${STATSD_PORT}" ]; then
EXTRA_CLI_FLAGS+=('--statsd-host' "${STATSD_HOST}:${STATSD_PORT}")
fi
if [ -n "${STATSD_SERVICE_TYPE}" ]; then
EXTRA_CLI_FLAGS+=('--statsd-prefix' "${STATSD_SERVICE_TYPE}")
fi
exec gunicorn --bind "0.0.0.0:${PORT}" \
--threads "${THREADS}" \
--workers "${WORKERS}" \
--log-level "${LOG_LEVEL}" \
--timeout "${TIMEOUT}" \
--config 'python:swh.core.api.gunicorn_config' \
--worker-class 'uvicorn.workers.UvicornWorker' \
'swh.graphql.server:make_app_from_configfile()'
;;
echo 'Starting the swh-graphql API server'
exec gunicorn --bind "0.0.0.0:${PORT}" \
--log-level "${SWH_LOG_LEVEL:-INFO}" \
"${EXTRA_CLI_FLAGS[@]}" \
--threads "${THREADS}" \
--workers "${WORKERS}" \
--timeout "${TIMEOUT}" \
--config 'python:swh.graphql.gunicorn_config' \
--worker-class 'uvicorn.workers.UvicornWorker' \
'swh.graphql.server:make_app_from_configfile()'
;;
esac
aiocache==0.12.1
aiohttp==3.8.4
# This file was autogenerated by uv via the following command:
# uv pip compile /src/apps/swh-graphql/requirements.txt -o /src/apps/swh-graphql/tmp7ozh5obg
aiocache==0.12.3
# via swh-graphql
aiofiles==24.1.0
# via python-keycloak
aiohappyeyeballs==2.6.1
# via aiohttp
aiohttp==3.11.14
# via
# aiohttp-utils
# swh-core
aiohttp-utils==3.2.1
aiomcache==0.8.1
aiosignal==1.3.1
amqp==5.1.1
anyio==3.7.0
ariadne==0.19.1
asgiref==3.7.2
async-timeout==4.0.2
attrs==23.1.0
attrs-strict==1.0.0
billiard==4.1.0
blinker==1.6.2
cachetools==5.3.1
cassandra-driver==3.28.0
celery==5.3.0
certifi==2023.5.7
cffi==1.15.1
charset-normalizer==3.1.0
click==8.1.3
click-didyoumean==0.3.0
click-plugins==1.1.1
click-repl==0.2.0
confluent-kafka==2.1.1
Deprecated==1.2.14
# via swh-core
aiomcache==0.8.2
# via aiocache
aiosignal==1.3.2
# via aiohttp
anyio==4.9.0
# via
# httpx
# starlette
ariadne==0.26.1
# via
# -r /src/apps/swh-graphql/requirements.txt
# swh-graphql
asgiref==3.8.1
# via -r /src/apps/swh-graphql/requirements.txt
async-property==0.2.2
# via python-keycloak
attrs==25.3.0
# via
# aiohttp
# attrs-strict
# hypothesis
# swh-model
attrs-strict==1.0.1
# via swh-model
backports-entry-points-selectable==1.3.0
# via
# swh-core
# swh-storage
blinker==1.9.0
# via
# flask
# swh-core
cachetools==5.5.2
# via pyld
cassandra-driver==3.29.2
# via swh-storage
certifi==2025.1.31
# via
# elasticsearch
# httpcore
# httpx
# requests
# sentry-sdk
cffi==1.17.1
# via
# cryptography
# swh-perfecthash
charset-normalizer==3.4.1
# via requests
click==8.1.8
# via
# -r /src/apps/swh-graphql/requirements.txt
# flask
# geomet
# swh-auth
# swh-core
# swh-indexer
# swh-objstorage
# swh-search
# swh-storage
# uvicorn
confluent-kafka==2.8.2
# via swh-journal
cryptography==44.0.2
# via jwcrypto
deprecated==1.2.18
# via
# swh-core
# swh-model
# swh-objstorage
# swh-storage
deprecation==2.1.0
ecdsa==0.18.0
elasticsearch==7.17.9
exceptiongroup==1.1.1
Flask==2.3.2
frozendict==2.3.8
frozenlist==1.3.3
# via python-keycloak
elasticsearch==7.17.12
# via swh-search
flask==3.1.0
# via
# swh-core
# swh-counters
# swh-storage
frozendict==2.4.6
# via
# pyld
# swh-indexer
frozenlist==1.5.0
# via
# aiohttp
# aiosignal
geomet==0.2.1.post1
graphql-core==3.2.3
gunicorn==20.1.0
# via cassandra-driver
graphql-core==3.2.5
# via
# -r /src/apps/swh-graphql/requirements.txt
# ariadne
gunicorn==23.0.0
# via
# -r /src/apps/swh-graphql/requirements.txt
# aiohttp-utils
h11==0.14.0
httpcore==0.17.2
httpx==0.24.1
humanize==4.6.0
hypothesis==6.76.0
idna==3.4
importlib-metadata==4.13.0
iniconfig==2.0.0
iso8601==1.1.0
isodate==0.6.1
itsdangerous==2.1.2
Jinja2==3.1.2
kombu==5.3.0
lxml==4.9.2
MarkupSafe==2.1.3
mirakuru==2.5.1
msgpack==1.0.5
multidict==6.0.4
# via
# -r /src/apps/swh-graphql/requirements.txt
# httpcore
# uvicorn
httpcore==1.0.7
# via httpx
httpx==0.28.1
# via
# python-keycloak
# swh-graphql
hypothesis==6.130.4
# via swh-model
idna==3.10
# via
# anyio
# httpx
# requests
# yarl
iso8601==2.1.0
# via
# swh-core
# swh-indexer
# swh-model
# swh-search
# swh-storage
itsdangerous==2.2.0
# via flask
jinja2==3.1.6
# via flask
jwcrypto==1.5.6
# via python-keycloak
latexcodec==3.0.0
# via pybtex
lxml==5.3.1
# via pyld
markupsafe==3.0.2
# via
# jinja2
# werkzeug
msgpack==1.1.0
# via
# swh-core
# swh-journal
# swh-objstorage
multidict==6.2.0
# via
# aiohttp
# yarl
mypy-extensions==1.0.0
packaging==23.1
pika==1.3.2
pluggy==1.0.0
port-for==0.6.3
prompt-toolkit==3.0.38
psutil==5.9.5
psycopg2==2.9.6
pyasn1==0.5.0
pycparser==2.21
PyLD==2.0.3
pyparsing==3.0.9
pytest==7.3.1
pytest-postgresql==3.1.3
python-dateutil==2.8.2
python-jose==3.3.0
python-keycloak==3.0.0
# via swh-storage
packaging==24.2
# via
# deprecation
# gunicorn
propcache==0.3.1
# via
# aiohttp
# yarl
psycopg==3.2.6
# via
# swh-core
# swh-storage
psycopg-pool==3.2.6
# via
# swh-core
# swh-storage
pybtex==0.24.0
# via swh-indexer
pycparser==2.22
# via cffi
pyld==2.0.4
# via swh-indexer
pyparsing==3.2.3
# via rdflib
python-dateutil==2.9.0.post0
# via swh-model
python-json-logger==3.3.0
# via -r /src/apps/swh-graphql/requirements.txt
python-keycloak==5.3.1
# via swh-auth
python-magic==0.4.27
python-mimeparse==1.6.0
PyYAML==6.0
rdflib==6.3.2
redis==4.5.5
requests==2.31.0
# via
# swh-core
# swh-indexer
python-mimeparse==2.0.0
# via aiohttp-utils
pyyaml==6.0.2
# via
# pybtex
# swh-auth
# swh-core
rdflib==7.1.3
# via swh-indexer
redis==5.2.1
# via
# swh-counters
# swh-storage
requests==2.32.3
# via
# python-keycloak
# requests-toolbelt
# swh-core
# swh-objstorage
requests-toolbelt==1.0.0
rsa==4.9
sentry-sdk==1.25.0
six==1.16.0
sniffio==1.3.0
# via python-keycloak
sentry-sdk==2.24.1
# via
# swh-core
# swh-graphql
# swh-indexer
six==1.17.0
# via
# geomet
# pybtex
# python-dateutil
sniffio==1.3.1
# via anyio
sortedcontainers==2.4.0
starlette==0.28.0
swh.auth==0.7.2
swh.core==2.22.2
swh.counters==0.9.2
swh.graphql==0.0.95
swh.indexer==2.9.4
swh.journal==1.3.3
swh.model==6.7.0
swh.objstorage==2.2.0
swh.perfecthash==0.1.2
swh.scheduler==1.9.0
swh.search==0.16.4
swh.storage==1.14.3
tenacity==8.2.2
tomli==2.0.1
tree-sitter==0.20.1
typing_extensions==4.6.3
tzdata==2023.3
urllib3==1.26.16
uvicorn==0.22.0
vine==5.0.0
wcwidth==0.2.6
Werkzeug==2.3.4
wrapt==1.15.0
xmltodict==0.13.0
yarl==1.9.2
zipp==3.15.0
# via hypothesis
starlette==0.46.1
# via
# -r /src/apps/swh-graphql/requirements.txt
# ariadne
# swh-graphql
swh-auth==0.10.0
# via swh-graphql
swh-core==4.1.0
# via
# swh-auth
# swh-counters
# swh-graphql
# swh-indexer
# swh-journal
# swh-objstorage
# swh-search
# swh-storage
swh-counters==0.11.0
# via swh-storage
swh-graphql==0.0.104
# via -r /src/apps/swh-graphql/requirements.txt
swh-indexer==4.0.0
# via swh-search
swh-journal==1.5.3
# via
# swh-counters
# swh-indexer
# swh-search
swh-model==7.1.0
# via
# swh-graphql
# swh-indexer
# swh-journal
# swh-objstorage
# swh-search
# swh-storage
swh-objstorage==4.0.0
# via
# swh-indexer
# swh-storage
swh-perfecthash==1.3.2
# via swh-objstorage
swh-search==0.22.1
# via swh-graphql
swh-storage==3.0.0
# via
# swh-graphql
# swh-indexer
tenacity==9.0.0
# via
# swh-core
# swh-journal
# swh-storage
tree-sitter==0.21.3
# via swh-search
typing-extensions==4.13.0
# via
# -r /src/apps/swh-graphql/requirements.txt
# anyio
# ariadne
# jwcrypto
# psycopg
# psycopg-pool
# swh-core
# swh-indexer
# swh-model
# swh-search
# swh-storage
urllib3==1.26.20
# via
# elasticsearch
# requests
# sentry-sdk
uvicorn==0.34.0
# via swh-graphql
werkzeug==3.1.3
# via flask
wrapt==1.17.2
# via deprecated
xmltodict==0.14.2
# via swh-indexer
yarl==1.18.3
# via aiohttp
......@@ -6,3 +6,5 @@ h11
starlette
typing-extensions
swh-graphql
python-json-logger
gunicorn
ARG REGISTRY=container-registry.softwareheritage.org/swh/infra/swh-apps/
ARG base_image=${REGISTRY}base
ARG base_image_version=latest
FROM ${base_image}:${base_image_version}
ARG user=swh
ARG workdir=/opt/${user}
ARG configdir=/etc/${user}
COPY --chmod=0644 requirements-frozen.txt ${workdir}
RUN --mount=type=cache,target=.cache,uid=1000,gid=1000 \
uv pip sync requirements-frozen.txt
COPY --chmod=0755 entrypoint.sh ${workdir}
USER ${user}
ENV SWH_CONFIG_FILENAME=${configdir}/config.yml
ENV PORT 5007
EXPOSE $PORT
ENV THREADS 4
ENV WORKERS 2
ENV TIMEOUT 3600
ENTRYPOINT ["/opt/swh/entrypoint.sh"]
#!/bin/bash
set -e
case "$1" in
"shell")
shift
if (( $# == 0)); then
exec bash -i
else
"$@"
fi
;;
"swh")
shift
echo "Running swh command $@"
exec swh $@
;;
*)
EXTRA_CLI_FLAGS=()
if [ -n "${SWH_LOG_CONFIG_JSON}" ]; then
EXTRA_CLI_FLAGS+=('--log-config-json' "${SWH_LOG_CONFIG_JSON}")
fi
if [ -n "${STATSD_HOST}" -a -n "${STATSD_PORT}" ]; then
EXTRA_CLI_FLAGS+=('--statsd-host' "${STATSD_HOST}:${STATSD_PORT}")
fi
if [ -n "${STATSD_SERVICE_TYPE}" ]; then
EXTRA_CLI_FLAGS+=('--statsd-prefix' "${STATSD_SERVICE_TYPE}")
fi
echo 'Starting the swh-indexer-storage API server'
exec gunicorn --bind "0.0.0.0:${PORT}" \
--log-level "${SWH_LOG_LEVEL:-INFO}" \
"${EXTRA_CLI_FLAGS[@]}" \
--threads "${THREADS}" \
--workers "${WORKERS}" \
--timeout "${TIMEOUT}" \
--config 'python:swh.core.api.gunicorn_config' \
'swh.indexer.storage.api.server:make_app_from_configfile()'
;;
esac
# This file was autogenerated by uv via the following command:
# uv pip compile /src/apps/swh-indexer-storage/requirements.txt -o /src/apps/swh-indexer-storage/tmp9cb0fvpj
aiohappyeyeballs==2.6.1
# via aiohttp
aiohttp==3.11.14
# via
# aiohttp-utils
# swh-core
aiohttp-utils==3.2.1
# via swh-core
aiosignal==1.3.2
# via aiohttp
attrs==25.3.0
# via
# aiohttp
# attrs-strict
# hypothesis
# swh-model
attrs-strict==1.0.1
# via swh-model
backports-entry-points-selectable==1.3.0
# via
# swh-core
# swh-storage
blinker==1.9.0
# via
# flask
# swh-core
cachetools==5.5.2
# via pyld
cassandra-driver==3.29.2
# via swh-storage
certifi==2025.1.31
# via
# requests
# sentry-sdk
cffi==1.17.1
# via swh-perfecthash
charset-normalizer==3.4.1
# via requests
click==8.1.8
# via
# flask
# geomet
# swh-core
# swh-indexer
# swh-objstorage
# swh-storage
confluent-kafka==2.8.2
# via swh-journal
deprecated==1.2.18
# via
# swh-core
# swh-model
# swh-objstorage
# swh-storage
flask==3.1.0
# via
# swh-core
# swh-counters
# swh-storage
frozendict==2.4.6
# via
# pyld
# swh-indexer
frozenlist==1.5.0
# via
# aiohttp
# aiosignal
geomet==0.2.1.post1
# via cassandra-driver
gunicorn==23.0.0
# via
# -r /src/apps/swh-indexer-storage/requirements.txt
# aiohttp-utils
hypothesis==6.130.4
# via swh-model
idna==3.10
# via
# requests
# yarl
iso8601==2.1.0
# via
# swh-core
# swh-indexer
# swh-model
# swh-storage
itsdangerous==2.2.0
# via flask
jinja2==3.1.6
# via flask
latexcodec==3.0.0
# via pybtex
lxml==5.3.1
# via pyld
markupsafe==3.0.2
# via
# jinja2
# werkzeug
msgpack==1.1.0
# via
# swh-core
# swh-journal
# swh-objstorage
multidict==6.2.0
# via
# aiohttp
# yarl
mypy-extensions==1.0.0
# via swh-storage
packaging==24.2
# via gunicorn
propcache==0.3.1
# via
# aiohttp
# yarl
psycopg==3.2.6
# via
# swh-core
# swh-storage
psycopg-pool==3.2.6
# via
# swh-core
# swh-storage
pybtex==0.24.0
# via swh-indexer
pycparser==2.22
# via cffi
pyld==2.0.4
# via swh-indexer
pyparsing==3.2.3
# via rdflib
python-dateutil==2.9.0.post0
# via swh-model
python-json-logger==3.3.0
# via -r /src/apps/swh-indexer-storage/requirements.txt
python-magic==0.4.27
# via
# swh-core
# swh-indexer
python-mimeparse==2.0.0
# via aiohttp-utils
pyyaml==6.0.2
# via
# pybtex
# swh-core
rdflib==7.1.3
# via swh-indexer
redis==5.2.1
# via
# swh-counters
# swh-storage
requests==2.32.3
# via
# swh-core
# swh-objstorage
sentry-sdk==2.24.1
# via
# swh-core
# swh-indexer
six==1.17.0
# via
# geomet
# pybtex
# python-dateutil
sortedcontainers==2.4.0
# via hypothesis
swh-core==4.1.0
# via
# swh-counters
# swh-indexer
# swh-journal
# swh-objstorage
# swh-storage
swh-counters==0.11.0
# via swh-storage
swh-indexer==4.0.0
# via -r /src/apps/swh-indexer-storage/requirements.txt
swh-journal==1.5.3
# via
# swh-counters
# swh-indexer
swh-model==7.1.0
# via
# swh-indexer
# swh-journal
# swh-objstorage
# swh-storage
swh-objstorage==4.0.0
# via
# swh-indexer
# swh-storage
swh-perfecthash==1.3.2
# via swh-objstorage
swh-storage==3.0.0
# via swh-indexer
tenacity==9.0.0
# via
# swh-core
# swh-journal
# swh-storage
typing-extensions==4.13.0
# via
# psycopg
# psycopg-pool
# swh-core
# swh-indexer
# swh-model
# swh-storage
urllib3==2.3.0
# via
# requests
# sentry-sdk
werkzeug==3.1.3
# via flask
wrapt==1.17.2
# via deprecated
xmltodict==0.14.2
# via swh-indexer
yarl==1.18.3
# via aiohttp
swh-indexer
python-json-logger
gunicorn
# Deeply inspired from the Dockerfile of the swh-graph project
FROM python:3.10-bullseye
ARG REGISTRY=container-registry.softwareheritage.org/swh/infra/swh-apps/
ARG base_image=${REGISTRY}base
ARG base_image_version=latest
FROM ${base_image}:${base_image_version}
ARG user=swh
ARG workdir=/opt/${user}
ARG configdir=/etc/${user}
USER root
RUN echo deb http://deb.debian.org/debian/ bullseye-backports main \
> /etc/apt/sources.list.d/backports.list
RUN echo deb [trusted=yes] https://debian.softwareheritage.org/ bullseye-swh main \
> /etc/apt/sources.list.d/softwareheritage.list
RUN apt-get -y update && \
apt-get -y upgrade && \
apt-get install -y libcmph-dev librdkafka-dev \
fossology-nomossa && \
apt clean && \
addgroup --gid 1000 swh && \
useradd --gid 1000 --uid 1000 -m -d /opt/swh swh && \
mkdir /etc/swh
USER swh
WORKDIR /opt/swh
COPY --chown=swh:swh requirements-frozen.txt /opt/swh
COPY --chown=swh:swh entrypoint.sh /opt/swh
RUN apt-get update && \
apt-get install -y fossology-nomossa && \
apt-get clean
ENV PYTHONPATH=/opt/swh
ENV PATH=/opt/swh/.local/bin:$PATH
COPY --chmod=0644 requirements-frozen.txt ${workdir}
RUN --mount=type=cache,target=.cache,uid=1000,gid=1000 \
uv pip sync requirements-frozen.txt
RUN chmod u+x /opt/swh/entrypoint.sh && \
/usr/local/bin/python -m pip install --upgrade pip && \
pip install --no-cache-dir -r requirements-frozen.txt && \
pip install gunicorn
COPY --chmod=0755 entrypoint.sh ${workdir}
ENV SWH_CONFIG_FILENAME=/etc/swh/config.yml
ENV SWH_WORKER_INSTANCE=loader
USER ${user}
ENV SWH_CONFIG_FILENAME=${configdir}/config.yml
ENV SWH_WORKER_INSTANCE=indexer
ENV CONCURRENCY=1
ENV MAX_TASKS_PER_CHILD=1
ENV LOGLEVEL=INFO
ENTRYPOINT "/opt/swh/entrypoint.sh"
ENTRYPOINT ["/opt/swh/entrypoint.sh"]
#!/bin/bash
if [ -z "${SWH_CONFIG_FILENAME}" ]; then
echo "The SWH_CONFIG_FILENAME environment variable must be set"
exit 1
fi
set -e
if [ -z "${SWH_INDEXER_TYPE}" ]; then
echo "The SWH_INDEXER_TYPE environment variable must be set"
exit 1
fi
case "$1" in
"shell")
shift
if (( $# == 0)); then
exec bash -i
else
"$@"
fi
exit 0
;;
*)
if [ -z "${SWH_INDEXER_TYPE}" ]; then
echo "The SWH_INDEXER_TYPE environment variable must be set"
exit 1
fi
if [ ! -e "${SWH_CONFIG_FILENAME}" ]; then
echo "The config file ${SWH_CONFIG_FILENAME} does not exist"
exit 1
fi
if [ ! -e "${SWH_CONFIG_FILENAME}" ]; then
echo "The config file ${SWH_CONFIG_FILENAME} does not exist"
exit 1
fi
# start the replayer
echo "Starting indexer journal client..."
exec swh \
--log-level $LOGLEVEL \
--log-level azure.core.pipeline.policies.http_logging_policy:WARNING \
indexer \
--config-file $SWH_CONFIG_FILENAME \
journal-client $SWH_INDEXER_TYPE
# start the replayer
echo "Starting indexer journal client..."
exec swh \
--log-level azure.core.pipeline.policies.http_logging_policy:WARNING \
indexer \
--config-file $SWH_CONFIG_FILENAME \
journal-client $SWH_INDEXER_TYPE
;;
esac