Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • ardumont/swh-charts
  • vlorentz/swh-charts
  • swh/infra/ci-cd/swh-charts
  • vsellier/swh-charts
  • anlambert/swh-charts
5 results
Show changes
Commits on Source (11)
......@@ -88,7 +88,7 @@
image: {{ .Values.swh_utils_image }}:{{ .Values.swh_utils_image_version }}
imagePullPolicy: IfNotPresent
command:
- /entrypoints/graph-wait-for-dataset.sh
- /entrypoints/wait-for-dataset.sh
env:
{{- include "swh.graph.volume.witnessfile" (dict "graphPath" .graphPath) | nindent 4 }}
- name: DATASET_LOCATION
......
......@@ -2,7 +2,8 @@
* Create a provenance configmap for service .serviceType
*/}}
{{ define "swh.provenance.configmap" }}
{{- $graphConfigurationRef := required (print "<.configuration.graphConfigurationRef> key is mandatory" ) .configuration.graphConfigurationRef -}}
{{- $graphConfigurationRef := .configuration.graphConfigurationRef -}}
{{- if $graphConfigurationRef -}}
{{- $graphConfiguration := get .Values $graphConfigurationRef -}}
---
apiVersion: v1
......@@ -14,6 +15,7 @@ data:
config.yml.template: |
provenance:
{{- toYaml $graphConfiguration | nindent 6 -}}
{{ end }}
{{- end -}}
{{/*
......@@ -32,10 +34,13 @@ data:
{{/*
* Read default provenance configuration
* Configure provenance configuration with default values.
* This also raises when mandatory configuration is missing.
*/}}
{{ define "swh.provenance.config" }}
{{- $configuration := .configuration -}}
{{- $provenanceImageVersion := get $configuration "imageVersion" | default $.Values.provenance.imageVersion | default $.Values.swh_provenance_image_version -}}
{{- $configuration := set $configuration "imageVersion" $provenanceImageVersion -}}
{{- if not (hasKey $configuration "type") }}
{{- $configuration := set $configuration "type" "rpc" -}}
{{- end -}}
......@@ -44,6 +49,31 @@ data:
{{- $port := eq $type "rpc" | ternary $.Values.provenance.rpcPort $.Values.provenance.grpcPort -}}
{{- $configuration := set $configuration "port" $port -}}
{{- end -}}
{{- $logLevel := $configuration.logLevel | default $.Values.provenance.logLevel | default "INFO" | upper -}}
{{- $configuration := set $configuration "logLevel" $logLevel -}}
{{- if eq $type "rpc" -}}
{{- $graphConfigurationRef := required (print "<.configuration.graphConfigurationRef> key is mandatory" ) .configuration.graphConfigurationRef -}}
{{- $gunicornConfig := fromYaml
(include "swh.gunicorn.config"
(dict "defaultConfiguration" $.Values.provenance.gunicorn
"configuration" $configuration.gunicorn)) -}}
{{- $configuration := set $configuration "gunicornConfig" $gunicornConfig -}}
{{- else -}}
{{- $provenanceIndexDataset := dig "dataset" "provenance" "index" false $configuration -}}
{{- $configuration := set $configuration "provenanceIndexDataset" $provenanceIndexDataset -}}
{{- $provenanceFetchDataset := dig "dataset" "provenance" "fetch" false $configuration -}}
{{- $configuration := set $configuration "provenanceFetchDataset" $provenanceFetchDataset -}}
{{- if or $provenanceIndexDataset $provenanceFetchDataset -}}
{{- $datasetName := required (print "<.configuration.dataset.name> key is mandatory") $configuration.dataset.name -}}
{{- $configuration := set $configuration "datasetName" $datasetName -}}
{{- end -}}
{{- $graphPath := required (print "<.configuration.dataset.graph.path> key is mandatory") $configuration.dataset.graph.path -}}
{{- $configuration := set $configuration "graphPath" $graphPath -}}
{{- $provenancePath := required (print "<.configuration.dataset.provenance.path> key is mandatory") $configuration.dataset.provenance.path -}}
{{- $configuration := set $configuration "provenancePath" $provenancePath -}}
{{- end -}}
{{- toYaml $configuration -}}
{{- end }}
......
{{/* Generate the initialize provenance backend container configuration if needed */}}
{{- define "swh.provenance.volume.witnessfile" -}}
- name: {{ .witness_file_env_variable_name | default "WITNESS_FILE" }}
value: {{ .provenancePath }}/{{ .filename | default ".provenance-is-initialized" }}
{{- end -}}
{{/* Generate the initialize provenance backend container configuration if needed */}}
{{- define "swh.provenance.fetchDataset" -}}
{{- $image_version := get . "imageVersion" | default ( get .Values (print .imagePrefixName "_version") ) |
required (print .imagePrefixName "_version is mandatory in values.yaml ") -}}
- name: {{ .containerName | default "fetch-provenance-dataset" }}
image: {{ get .Values .imagePrefixName }}:{{ $image_version }}
command:
- /entrypoints/provenance-fetch-datasets.sh
env:
{{- include "swh.provenance.volume.witnessfile" (dict "provenancePath" .provenancePath) | nindent 2 }}
- name: SWH_CONFIG_FILENAME
value: /etc/swh/config.yml
- name: PROVENANCE_PATH
value: {{ .provenancePATH }}
- name: GRAPH_PATH
value: {{ .graphPath }}
- name: DATASET_VERSION
value: {{ .datasetName | default "" }}
volumeMounts:
- name: configuration
mountPath: /etc/swh
- name: backend-utils
mountPath: /entrypoints
{{- range $volumeName, $volumeConfig := .extraVolumes }}
- name: {{ $volumeName }}
mountPath: {{ $volumeConfig.mountPath }}
readOnly: {{ $volumeConfig.readOnly | default "false" }}
{{ end }}
{{- end -}}
{{/* init-container to wait for dataset presence. */}}
{{- define "swh.provenance.waitForDataset" -}}
- name: wait-for-dataset
image: {{ .Values.swh_utils_image }}:{{ .Values.swh_utils_image_version }}
imagePullPolicy: IfNotPresent
command:
- /entrypoints/wait-for-dataset.sh
env:
{{- include "swh.provenance.volume.witnessfile" (dict "provenancePath" .provenancePath) | nindent 4 }}
- name: PERIOD
value: {{ .period | default "3" | quote }}
volumeMounts:
- name: backend-utils
mountPath: /entrypoints
readOnly: true
{{- range $volumeName, $volumeConfig := .extraVolumes }}
- name: {{ $volumeName }}
mountPath: {{ $volumeConfig.mountPath }}
readOnly: {{ $volumeConfig.readOnly | default "false" }}
{{ end }}
{{- end -}}
{{/* init-container to index provenance dataset. */}}
{{- define "swh.provenance.indexDataset" -}}
{{- $image_version := get . "imageVersion" | default ( get .Values (print .imagePrefixName "_version") ) |
required (print .imagePrefixName "_version is mandatory in values.yaml ") -}}
- name: {{ .containerName | default "reindex-provenance-dataset" }}
image: {{ get .Values .imagePrefixName }}:{{ $image_version }}
imagePullPolicy: IfNotPresent
command:
- /entrypoints/provenance-index-dataset.sh
env:
{{- include "swh.provenance.volume.witnessfile" (dict "witness_file_env_variable_name" "WITNESS_INDEX_FILE"
"provenancePath" .provenancePath
"filename" ".provenance-is-reindexed") | nindent 4 }}
- name: PROVENANCE_PATH
value: {{ .provenancePath }}
- name: PERIOD
value: {{ .period | default "3" | quote }}
volumeMounts:
- name: backend-utils
mountPath: /entrypoints
readOnly: true
{{- range $volumeName, $volumeConfig := .extraVolumes }}
- name: {{ $volumeName }}
mountPath: {{ $volumeConfig.mountPath }}
readOnly: {{ $volumeConfig.readOnly | default "false" }}
{{ end }}
{{- end -}}
{{ if .Values.provenance.enabled -}}
{{ range $provenanceType, $deploymentConfig := .Values.provenance.deployments }}
{{- if or (not (hasKey $deploymentConfig "enabled")) (get $deploymentConfig "enabled") -}}
{{- $provenanceConfig := include "swh.provenance.config" (dict "configuration" $deploymentConfig
"Values" $.Values) | fromYaml -}}
{{- $type := $provenanceConfig.type -}}
{{- if eq $type "rpc" -}}
{{- $argsDict := dict "serviceType" (print "provenance-" $provenanceType)
"configuration" $deploymentConfig
"configuration" $provenanceConfig
"Values" $.Values -}}
{{ include "swh.provenance.configmap" $argsDict }}
{{ include "swh.provenance.gunicorn.logging" $argsDict }}
{{- end -}}
{{- end -}}
{{ end -}}
{{- end -}}
{{ if .Values.provenance.enabled -}}
{{- $configUtilsChecksum := include (print $.Template.BasePath "/utils/config-utils.yaml") . | sha256sum -}}
{{- $backendUtilsChecksum := include (print $.Template.BasePath "/utils/backend-utils.yaml") . | sha256sum -}}
{{ range $provenanceType, $provenanceConfig := .Values.provenance.deployments }}
{{- if or (not (hasKey $provenanceConfig "enabled")) (get $provenanceConfig "enabled") -}}
{{- if and (or (not (hasKey $provenanceConfig "enabled")) (get $provenanceConfig "enabled"))
(or (not (hasKey $provenanceConfig "startService")) (get $provenanceConfig "startService")) -}}
{{- $serviceType := ( print "provenance-" $provenanceType ) -}}
{{- $configArgsDict := dict "serviceType" $serviceType
"configuration" $provenanceConfig
......@@ -9,13 +12,18 @@
{{- $configChecksum := include "swh.provenance.configmap" $configArgsDict | sha256sum -}}
{{- $configLoggingChecksum := include "swh.provenance.gunicorn.logging" $configArgsDict | sha256sum -}}
{{- $securityContext := $provenanceConfig.securityContext | default $.Values.provenance.securityContext -}}
{{ $gunicornConfig := fromYaml (include "swh.gunicorn.config"
(dict "configuration" $provenanceConfig.gunicorn
"defaultConfiguration" $.Values.provenance.gunicorn)) -}}
{{- $provenanceConfig := include "swh.provenance.config" (dict "configuration" $provenanceConfig
"Values" $.Values) | fromYaml -}}
"Values" $.Values) | fromYaml -}}
{{- $type := $provenanceConfig.type -}}
{{- $port := $provenanceConfig.port -}}
{{- $logLevel := $provenanceConfig.logLevel -}}
{{- $datasetName := $provenanceConfig.datasetName -}}
{{- $graphPath := $provenanceConfig.graphPath -}}
{{- $provenancePath := $provenanceConfig.provenancePath -}}
{{- $gunicornConfig := $provenanceConfig.gunicornConfig -}}
{{- $provenanceFetchDataset := $provenanceConfig.provenanceFetchDataset -}}
{{- $provenanceIndexDataset := $provenanceConfig.provenanceIndexDataset -}}
{{- $provenanceImageVersion := $provenanceConfig.imageVersion -}}
---
apiVersion: apps/v1
kind: Deployment
......@@ -44,6 +52,7 @@ spec:
checksum/config: {{ $configChecksum }}
checksum/config-logging: {{ $configLoggingChecksum }}
checksum/config-utils: {{ $configUtilsChecksum }}
checksum/backend-utils: {{ $backendUtilsChecksum }}
spec:
{{- if $securityContext }}
securityContext:
......@@ -79,6 +88,26 @@ spec:
- name: config-utils
mountPath: /entrypoints
readOnly: true
{{- if $provenanceFetchDataset }}
{{ include "swh.provenance.fetchDataset" (dict "Values" $.Values
"imagePrefixName" "swh_provenance_image"
"datasetName" $datasetName
"provenancePath" $provenancePath
"graphPath" $graphPath
"extraVolumes" $provenanceConfig.extraVolumes) | nindent 8 }}
{{- end }}
{{- if $provenanceIndexDataset }}
{{ include "swh.provenance.indexDataset" (dict "Values" $.Values
"imagePrefixName" "swh_provenance_image"
"imageVersion" $provenanceImageVersion
"provenancePath" $provenancePath
"extraVolumes" $provenanceConfig.extraVolumes) | nindent 8 }}
{{ end }}
{{- if $provenanceFetchDataset }}
{{ include "swh.provenance.waitForDataset" (dict "Values" $.Values
"provenancePath" $provenancePath
"extraVolumes" $provenanceConfig.extraVolumes) | nindent 8 }}
{{ end }}
containers:
- name: {{ $serviceType }}
resources:
......@@ -122,13 +151,26 @@ spec:
- -c
- /opt/swh/entrypoint.sh
env:
{{ if $gunicornConfig -}}
- name: PROVENANCE_TYPE
value: {{ $type }}
- name: PORT
value: {{ $port }}
{{ if eq $type "rpc" -}}
- name: WORKERS
value: {{ $gunicornConfig.workers | quote }}
- name: THREADS
value: {{ $gunicornConfig.threads | quote }}
- name: TIMEOUT
value: {{ $gunicornConfig.timeout | quote }}
- name: SWH_LOG_CONFIG_JSON
value: /etc/swh/logging/logging-gunicorn.json
- name: STATSD_SERVICE_TYPE
value: {{ $serviceType }}
{{ else -}}
- name: PROVENANCE_PATH
value: {{ $provenancePath }}
- name: GRAPH_PATH
value: {{ $graphPath }}
{{ end -}}
- name: STATSD_HOST
value: {{ $.Values.statsdExternalHost | default "prometheus-statsd-exporter" }}
......@@ -136,12 +178,8 @@ spec:
value: {{ $.Values.statsdPort | default "9125" | quote }}
- name: STATSD_TAGS
value: deployment:{{ $serviceType }}
- name: STATSD_SERVICE_TYPE
value: {{ $serviceType }}
- name: SWH_LOG_LEVEL
value: {{ $provenanceConfig.logLevel | default $.Values.provenance.logLevel | default "INFO" | quote }}
- name: SWH_LOG_CONFIG_JSON
value: /etc/swh/logging/logging-gunicorn.json
value: {{ $logLevel }}
{{- if $.Values.provenance.sentry.enabled }}
- name: SWH_SENTRY_ENVIRONMENT
value: {{ $.Values.sentry.environment }}
......@@ -187,6 +225,10 @@ spec:
configMap:
name: config-utils
defaultMode: 0555
- name: backend-utils
configMap:
name: backend-utils
defaultMode: 0555
{{- range $volumeName, $volumeConfig := $provenanceConfig.extraVolumes }}
- name: {{ $volumeName }}
{{- toYaml $volumeConfig.volumeDefinition | nindent 8 }}
......
......@@ -62,7 +62,7 @@ data:
# Finally, we make explicit the graph is ready
touch ${WITNESS_FILE}
graph-wait-for-dataset.sh: |
wait-for-dataset.sh: |
#!/usr/bin/env bash
# Uses env variables WITNESS_FILE
[ -z "${WITNESS_FILE}" ] && \
......@@ -158,6 +158,77 @@ data:
swh graph reindex --ef ${DATASET_LOCATION}/${GRAPH_NAME} && \
touch $WITNESS_REINDEX_FILE
provenance-fetch-datasets.sh: |
#!/usr/bin/env bash
[ -z "${WITNESS_FETCH_FILE}" ] && \
echo "<WITNESS_FETCH_FILE> env variable must be set" && exit 1
[ -z "${DATASET_VERSION}" ] && \
echo "<DATASET_VERSION> env variable must be set" && exit 1
[ -z "${PROVENANCE_PATH}" ] && \
echo "<PROVENANCE_PATH> env variable must be set" && exit 1
[ -z "${GRAPH_PATH}" ] && \
echo "<GRAPH_PATH> env variable must be set" && exit 1
[ -f ${WITNESS_FETCH_FILE} ] && \
echo "Datasets graph & provenance <${DATASET_VERSION}> already present. Skip." && \
exit 0
URL_PROVENANCE="s3://softwareheritage/derived_datasets/${DATASET_VERSION}/provenance/all/"
CMD_GET="aws s3 cp --no-sign-request"
# Retrieve the provenance dataset parquet files
$CMD_GET --recursive "${URL_PROVENANCE}" "${PROVENANCE_PATH}"
# Retrieve the required graph files
URL_GRAPH="s3://softwareheritage/graph/${DATASET_VERSION}/compressed"
for filename in graph.pthash graph.pthash.order graph.node2swhid.bin.zst graph.node2type.bin.zst; do
$CMD_GET "${URL_GRAPH}/${filename}" "${GRAPH_PATH}"
done
# Uncompress the compressed graph files
pushd "${DIR_GRAPH}"
for filename in graph.node2type.bin.zst graph.node2swhid.bin.zst; do
# Uncompress and delete the .zst file
[ -f "${filename}" ] && unzstd --rm "${filename}"
done
popd
# Make explicit the provenance datasets are fetched
touch ${WITNESS_FETCH_FILE}
provenance-index-dataset.sh: |
#!/usr/bin/env bash
[ -z "${WITNESS_SOURCE_FILE}" ] && \
echo "<WITNESS_SOURCE_FILE> env variable must be set" && exit 1
[ -z "${WITNESS_INDEX_FILE}" ] && \
echo "<WITNESS_INDEX_FILE> env variable must be set" && exit 1
[ -z "${PERIOD}" ] && \
echo "<PERIOD> env variable must be set" && exit 1
[ -z "${PROVENANCE_PATH}" ] && \
echo "<PROVENANCE_PATH> env variable must be set" && exit 1
[ -f ${WITNESS_INDEX_FILE} ] && echo "Provenance already indexed, do nothing." && \
exit 0
set -eux
# Let's wait for the dataset installation
while [ ! -f "${WITNESS_SOURCE_FILE}" ]; do
echo "${WITNESS_SOURCE_FILE} missing, waiting provenance dataset installation..."
sleep $PERIOD
done
# To make the query faster, the provenance needs to build index out of the
# current dataset files. We store the output indexes in the same path as
# the dataset.
swh-provenance-index \
--database ${PROVENANCE_PATH} \
--indexes ${PROVENANCE_PATH} && \
touch "${WITNESS_INDEX_FILE}"
initialize-search-backend.sh: |
#!/usr/bin/env bash
......
......@@ -1922,8 +1922,24 @@ provenance:
# enabled: false
# # The server type to deploy, either a rpc or a grpc
# type: grpc
# # For type grpc, the parquet files to use
# parquetFiles: /srv/volumes/datasets/parquet-$version
# # For type grpc, datasets need to be available for the service to use
# dataset:
# # Name of the dataset, graph and provenance must be in sync so same
# # version must be used
# name: 2024-08-23-popular-500-python
# # Graph setup
# graph:
# # Where to store the files
# path: /srv/dataset/graph
# provenance:
# # Whether or not to fetch provenance dataset
# fetch: true
# # Whether or not to build the provenance dataset indices
# index: true
# # Where to store the parquet files
# path: /srv/dataset/provenance
# # Whether to start/stop the service
# startService: true
# hosts: []
# ingress:
# enabled: true
......
......@@ -1466,7 +1466,22 @@ provenance:
test-grpc:
enabled: true
type: grpc
graphConfigurationRef: fakeGraphConfiguration
dataset:
name: 2024-08-23-popular-500-python
provenance:
fetch: true
index: true
path: /srv/dataset/provenance
graph:
path: /srv/dataset/graph
startService: true
extraVolumes:
dataset-persistent:
mountPath: /srv/dataset
volumeDefinition:
persistentVolumeClaim:
claimName: provenance-popular-persistent-pvc
replicas: 1
# gunicorn:
# workers: 4
......@@ -1525,6 +1540,17 @@ externalServices:
volumes:
enabled: true
persistentVolumeClaims:
provenance-popular-persistent-pvc:
enabled: true
appName: provenance-test-grpc
spec:
storageClassName: standard
volumeMode: Filesystem
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
alter-recovery-bundles-pvc:
enabled: true
appName: alter
......
......@@ -41,7 +41,7 @@ swh_objstorage_image_version: '20250319.1'
swh_objstorage_replayer_image: container-registry.softwareheritage.org/swh/infra/swh-apps/objstorage_replayer
swh_objstorage_replayer_image_version: '20250319.1'
swh_provenance_image: container-registry.softwareheritage.org/swh/infra/swh-apps/provenance
swh_provenance_image_version: '20250319.1'
swh_provenance_image_version: '20250320.2'
swh_scheduler_image: container-registry.softwareheritage.org/swh/infra/swh-apps/scheduler
swh_scheduler_image_version: '20250319.1'
swh_scrubber_image: container-registry.softwareheritage.org/swh/infra/swh-apps/scrubber
......