Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • ardumont/swh-charts
  • vlorentz/swh-charts
  • swh/infra/ci-cd/swh-charts
  • vsellier/swh-charts
  • anlambert/swh-charts
5 results
Show changes
Commits on Source (42)
Showing
with 578 additions and 96 deletions
......@@ -7,7 +7,7 @@ apiVersion: v1
kind: ConfigMap
metadata:
name: alertmanager-irc-relay
namespace: cattle-monitoring-system
namespace: {{ .Values.alertmanagerIrcRelay.namespace }}
data:
# For more information, check
# https://gitlab.softwareheritage.org/swh/infra/ci-cd/3rdparty/alertmanager-irc-relay/-/tree/master#configuring-and-running-the-bot
......
......@@ -5,7 +5,7 @@ apiVersion: apps/v1
kind: Deployment
metadata:
name: alertmanager-irc-relay
namespace: cattle-monitoring-system
namespace: {{ .Values.alertmanagerIrcRelay.namespace }}
spec:
selector:
matchLabels:
......
......@@ -3,7 +3,7 @@ apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: alertmanager-irc-relay-internal-ingress-status
namespace: cattle-monitoring-system
namespace: {{ .Values.alertmanagerIrcRelay.namespace }}
annotations:
{{ if .Values.alertmanagerIrcRelay.ingress.tls.clusterIssuer }}
cert-manager.io/cluster-issuer: {{ .Values.alertmanagerIrcRelay.ingress.tls.clusterIssuer }}
......
......@@ -3,7 +3,7 @@ apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: alertmanager-irc-relay-internal-ingress
namespace: cattle-monitoring-system
namespace: {{ .Values.alertmanagerIrcRelay.namespace }}
annotations:
{{ if .Values.alertmanagerIrcRelay.ingress.tls.clusterIssuer }}
cert-manager.io/cluster-issuer: {{ .Values.alertmanagerIrcRelay.ingress.tls.clusterIssuer }}
......@@ -17,7 +17,7 @@ metadata:
# an htpasswd file in the key auth within the secret
nginx.ingress.kubernetes.io/auth-secret-type: auth-file
# name of the secret that contains the user/password definitions
nginx.ingress.kubernetes.io/auth-secret: {{ .Values.alertmanagerIrcRelay.ingress.authentication }}
nginx.ingress.kubernetes.io/auth-secret: {{ .Values.alertmanagerIrcRelay.namespace }}/{{ .Values.alertmanagerIrcRelay.ingress.authentication }}
# message to display with an appropriate context why the authentication is required
nginx.ingress.kubernetes.io/auth-realm: 'Authentication Required'
{{ end }}
......
......@@ -4,7 +4,7 @@ apiVersion: v1
kind: Service
metadata:
name: alertmanager-irc-relay
namespace: cattle-monitoring-system
namespace: {{ .Values.alertmanagerIrcRelay.namespace }}
spec:
selector:
app: alertmanager-irc-relay
......
......@@ -53,6 +53,7 @@ alertmanagerConfig:
alertmanagerIrcRelay:
enabled: false
namespace: cattle-monitoring-system
priorityClassName: cluster-components-system
ingress:
enabled: true
......@@ -60,7 +61,7 @@ alertmanagerIrcRelay:
- alertmanager-irc-relay.admin.swh.network
- alertmanager-irc-relay.internal.admin.swh.network
# secret holding the .htpasswd information
authentication: ingress-nginx/basic-auth
authentication: basic-auth
tls:
enabled: true
# clusterIssuer: letsencrypt-production
......
......@@ -29,7 +29,7 @@ metallb:
ingressNginx:
enabled: false
version: 4.10.1
version: 4.12.1
namespace: ingress-nginx
# Needed when a chart use the application name to
# name deployed objects. The names can't be longer than 63 characters
......@@ -47,10 +47,11 @@ ingressNginx:
# loadBalanceIP: xxx.xxx.xxx.xxx
# annotations:
# annotation1: value
# config:
# # see https://kubernetes.github.io/ingress-nginx/user-guide/nginx-configuration/configmap/
# # for more configuration options
# worker_processes: 1
config:
# Allow pulling secrets from a separate namespace
allow-cross-namespace-resources: true
# see https://kubernetes.github.io/ingress-nginx/user-guide/nginx-configuration/configmap/
# for more configuration options
# requestedCpu: 90m
# requestedMemory: 100Mi
......
......@@ -49,6 +49,13 @@ Create a Kind Ingress for service .serviceType
{{- $annotations = mustMergeOverwrite $annotations ($configuration.ingress.extraAnnotations | default dict) -}}
{{- $annotations = mustMergeOverwrite $annotations ($endpoint_config.extraAnnotations | default dict) -}}
{{- $type := $configuration.type | default "rpc" -}}
{{- if (eq $type "grpc") }}
{{- $annotations = mustMergeOverwrite $annotations
(dict "nginx.ingress.kubernetes.io/ssl-redirect" "true"
"nginx.ingress.kubernetes.io/backend-protocol" "GRPC") -}}
{{- $_ := set $configuration.ingress "className" "nginx" -}}
{{- end }}
{{- range $annKey := keys $annotations -}}
{{- if not (get $annotations $annKey) -}}
{{- $_ := unset $annotations $annKey -}}
......
......@@ -14,6 +14,9 @@
- /entrypoints/graph-fetch-dataset.sh
env:
{{- include "swh.graph.volume.witnessfile" (dict "graphPath" .graphPath) | nindent 2 }}
{{- include "swh.graph.volume.witnessfile" (dict "witness_file_env_variable_name" "WITNESS_DOWNLOADING_FILE"
"filename" ".graph-is-downloading"
"graphPath" .graphPath) | nindent 2 }}
- name: SWH_CONFIG_FILENAME
value: /etc/swh/config.yml
- name: GRAPH_NAME
......@@ -88,11 +91,13 @@
image: {{ .Values.swh_utils_image }}:{{ .Values.swh_utils_image_version }}
imagePullPolicy: IfNotPresent
command:
- /entrypoints/graph-wait-for-dataset.sh
- /entrypoints/wait-for-dataset.sh
env:
{{- include "swh.graph.volume.witnessfile" (dict "graphPath" .graphPath) | nindent 4 }}
- name: DATASET_LOCATION
value: {{ .graphPath }}
- name: SERVICE_NAME
value: "graph"
- name: PERIOD
value: {{ .period | default "3" | quote }}
volumeMounts:
......@@ -121,6 +126,9 @@
{{- include "swh.graph.volume.witnessfile" (dict "witness_file_env_variable_name" "WITNESS_REINDEX_FILE"
"graphPath" .graphPath
"filename" ".graph-is-reindexed") | nindent 4 }}
{{- include "swh.graph.volume.witnessfile" (dict "witness_file_env_variable_name" "WITNESS_REINDEXING_FILE"
"graphPath" .graphPath
"filename" ".graph-is-reindexing") | nindent 4 }}
- name: DATASET_LOCATION
value: {{ .graphPath }}
- name: GRAPH_NAME
......
......@@ -4,14 +4,9 @@
{{- if and (hasKey $graphConfig "ingress") $graphConfig.ingress.enabled -}}
{{- $type := $graphConfig.type -}}
{{- $port := $graphConfig.port | default (ternary $.Values.graph.rpcPort $.Values.graph.grpcPort (eq $type "rpc")) -}}
{{- $annotations := $graphConfig.ingress.extraAnnotations | default dict }}
{{- if (eq $type "grpc") }}
{{- $annotations = mustMergeOverwrite $annotations
(dict "nginx.ingress.kubernetes.io/ssl-redirect" "true"
"nginx.ingress.kubernetes.io/backend-protocol" "GRPC") -}}
{{- $_ := set $graphConfig.ingress "extraAnnotations" $annotations -}}
{{- $_ := set $graphConfig.ingress "className" "nginx" -}}
{{ end }}
{{- if not (hasKey $graphConfig "port") }}
{{- $graphConfig := set $graphConfig "port" $port -}}
{{- end -}}
{{- include "swh.ingress" (dict "serviceType" ( print "graph-" $graphType )
"configuration" $graphConfig
"Values" $.Values) -}}
......
......@@ -2,7 +2,8 @@
* Create a provenance configmap for service .serviceType
*/}}
{{ define "swh.provenance.configmap" }}
{{- $graphConfigurationRef := required (print "<.configuration.graphConfigurationRef> key is mandatory" ) .configuration.graphConfigurationRef -}}
{{- $graphConfigurationRef := .configuration.graphConfigurationRef -}}
{{- if $graphConfigurationRef -}}
{{- $graphConfiguration := get .Values $graphConfigurationRef -}}
---
apiVersion: v1
......@@ -14,6 +15,7 @@ data:
config.yml.template: |
provenance:
{{- toYaml $graphConfiguration | nindent 6 -}}
{{ end }}
{{- end -}}
{{/*
......@@ -30,3 +32,49 @@ data:
"Values" .Values) -}}
{{- end }}
{{/*
* Configure provenance configuration with default values.
* This also raises when mandatory configuration is missing.
*/}}
{{ define "swh.provenance.config" }}
{{- $configuration := .configuration -}}
{{- $provenanceImageVersion := get $configuration "imageVersion" | default $.Values.provenance.imageVersion | default $.Values.swh_provenance_image_version -}}
{{- $configuration := set $configuration "imageVersion" $provenanceImageVersion -}}
{{- if not (hasKey $configuration "type") }}
{{- $configuration := set $configuration "type" "rpc" -}}
{{- end -}}
{{- $type := $configuration.type -}}
{{- if not (hasKey $configuration "port") }}
{{- $port := eq $type "rpc" | ternary $.Values.provenance.rpcPort $.Values.provenance.grpcPort -}}
{{- $configuration := set $configuration "port" $port -}}
{{- end -}}
{{- $logLevel := $configuration.logLevel | default $.Values.provenance.logLevel | default "INFO" | upper -}}
{{- $configuration := set $configuration "logLevel" $logLevel -}}
{{- if eq $type "rpc" -}}
{{- $graphConfigurationRef := required (print "<.configuration.graphConfigurationRef> key is mandatory" ) .configuration.graphConfigurationRef -}}
{{- $gunicornConfig := fromYaml
(include "swh.gunicorn.config"
(dict "defaultConfiguration" $.Values.provenance.gunicorn
"configuration" $configuration.gunicorn)) -}}
{{- $configuration := set $configuration "gunicornConfig" $gunicornConfig -}}
{{- else -}}
{{- $provenanceDatasetFull := dig "dataset" "provenance" "full" true $configuration -}}
{{- $configuration := set $configuration "provenanceDatasetFull" $provenanceDatasetFull -}}
{{- $provenanceIndexDataset := dig "dataset" "provenance" "index" false $configuration -}}
{{- $configuration := set $configuration "provenanceIndexDataset" $provenanceIndexDataset -}}
{{- $provenanceFetchDataset := dig "dataset" "provenance" "fetch" false $configuration -}}
{{- $configuration := set $configuration "provenanceFetchDataset" $provenanceFetchDataset -}}
{{- if or $provenanceIndexDataset $provenanceFetchDataset -}}
{{- $datasetName := required (print "<.configuration.dataset.name> key is mandatory") $configuration.dataset.name -}}
{{- $configuration := set $configuration "datasetName" $datasetName -}}
{{- end -}}
{{- $graphPath := required (print "<.configuration.dataset.graph.path> key is mandatory") $configuration.dataset.graph.path -}}
{{- $configuration := set $configuration "graphPath" $graphPath -}}
{{- $provenancePath := required (print "<.configuration.dataset.provenance.path> key is mandatory") $configuration.dataset.provenance.path -}}
{{- $configuration := set $configuration "provenancePath" $provenancePath -}}
{{- end -}}
{{- toYaml $configuration -}}
{{- end }}
{{/* Generate the initialize provenance backend container configuration if needed */}}
{{- define "swh.provenance.volume.witnessfile" -}}
- name: {{ .witness_file_env_variable_name | default "WITNESS_FILE" }}
value: {{ .provenancePath }}/{{ .filename | default ".provenance-is-initialized" }}
{{- end -}}
{{/* Generate the initialize provenance backend container configuration if needed */}}
{{- define "swh.provenance.fetchDataset" -}}
{{- $imagePrefixName := .imagePrefixName | default "swh_provenance_image" -}}
{{- $imageVersion := .imageVersion | default ( get .Values (print $imagePrefixName "_version") ) -}}
- name: {{ .containerName | default "fetch-provenance-dataset" }}
image: {{ get .Values $imagePrefixName }}:{{ $imageVersion }}
command:
- /entrypoints/provenance-fetch-datasets.sh
env:
{{- include "swh.provenance.volume.witnessfile" (dict "witness_file_env_variable_name" "WITNESS_FETCH_FILE"
"provenancePath" .provenancePath) | nindent 2 }}
{{- include "swh.provenance.volume.witnessfile" (dict "witness_file_env_variable_name" "WITNESS_DOWNLOADING_FILE"
"provenancePath" .provenancePath
"filename" ".provenance-is-downloading") | nindent 2 }}
- name: SWH_CONFIG_FILENAME
value: /etc/swh/config.yml
- name: PROVENANCE_PATH
value: {{ .provenancePath }}
- name: PROVENANCE_DATASET_FULL
value: {{ .datasetFull | quote }}
- name: GRAPH_PATH
value: {{ .graphPath }}
- name: DATASET_VERSION
value: {{ .datasetName | default "" }}
volumeMounts:
- name: configuration
mountPath: /etc/swh
- name: backend-utils
mountPath: /entrypoints
{{- range $volumeName, $volumeConfig := .extraVolumes }}
- name: {{ $volumeName }}
mountPath: {{ $volumeConfig.mountPath }}
readOnly: {{ $volumeConfig.readOnly | default "false" }}
{{ end }}
{{- end -}}
{{/* init-container to wait for dataset presence. */}}
{{- define "swh.provenance.waitForDataset" -}}
- name: wait-for-dataset
image: {{ .Values.swh_utils_image }}:{{ .Values.swh_utils_image_version }}
imagePullPolicy: IfNotPresent
command:
- /entrypoints/wait-for-dataset.sh
env:
{{- include "swh.provenance.volume.witnessfile" (dict "provenancePath" .provenancePath) | nindent 4 }}
- name: SERVICE_NAME
value: "provenance"
- name: PERIOD
value: {{ .period | default "3" | quote }}
volumeMounts:
- name: backend-utils
mountPath: /entrypoints
readOnly: true
{{- range $volumeName, $volumeConfig := .extraVolumes }}
- name: {{ $volumeName }}
mountPath: {{ $volumeConfig.mountPath }}
readOnly: {{ $volumeConfig.readOnly | default "false" }}
{{ end }}
{{- end -}}
{{/* init-container to index provenance dataset. */}}
{{- define "swh.provenance.indexDataset" -}}
{{- $imagePrefixName := .imagePrefixName | default "swh_provenance_image" -}}
{{- $imageVersion := .imageVersion | default ( get .Values (print $imagePrefixName "_version") ) -}}
- name: {{ .containerName | default "index-provenance-dataset" }}
image: {{ get .Values $imagePrefixName }}:{{ $imageVersion }}
imagePullPolicy: IfNotPresent
command:
- /entrypoints/provenance-index-dataset.sh
env:
{{- include "swh.provenance.volume.witnessfile" (dict "witness_file_env_variable_name" "WITNESS_DATASETS_FILE"
"provenancePath" .provenancePath) | nindent 4 }}
{{- include "swh.provenance.volume.witnessfile" (dict "witness_file_env_variable_name" "WITNESS_INDEX_FILE"
"provenancePath" .provenancePath
"filename" ".provenance-is-indexed") | nindent 4 }}
{{- include "swh.provenance.volume.witnessfile" (dict "witness_file_env_variable_name" "WITNESS_INDEXING_FILE"
"provenancePath" .provenancePath
"filename" ".provenance-is-indexing") | nindent 4 }}
- name: PROVENANCE_PATH
value: {{ .provenancePath }}
- name: PERIOD
value: {{ .period | default "3" | quote }}
volumeMounts:
- name: backend-utils
mountPath: /entrypoints
readOnly: true
{{- range $volumeName, $volumeConfig := .extraVolumes }}
- name: {{ $volumeName }}
mountPath: {{ $volumeConfig.mountPath }}
readOnly: {{ $volumeConfig.readOnly | default "false" }}
{{ end }}
{{- end -}}
{{ if .Values.provenance.enabled -}}
{{ range $provenanceType, $deploymentConfig := .Values.provenance.deployments }}
{{- if or (not (hasKey $deploymentConfig "enabled")) (get $deploymentConfig "enabled") -}}
{{- $provenanceConfig := include "swh.provenance.config" (dict "configuration" $deploymentConfig
"Values" $.Values) | fromYaml -}}
{{- $type := $provenanceConfig.type -}}
{{- if eq $type "rpc" -}}
{{- $argsDict := dict "serviceType" (print "provenance-" $provenanceType)
"configuration" $deploymentConfig
"configuration" $provenanceConfig
"Values" $.Values -}}
{{ include "swh.provenance.configmap" $argsDict }}
{{ include "swh.provenance.gunicorn.logging" $argsDict }}
{{- end -}}
{{- end -}}
{{ end -}}
{{- end -}}
{{ if .Values.provenance.enabled -}}
{{- $configUtilsChecksum := include (print $.Template.BasePath "/utils/config-utils.yaml") . | sha256sum -}}
{{- $backendUtilsChecksum := include (print $.Template.BasePath "/utils/backend-utils.yaml") . | sha256sum -}}
{{ range $provenanceType, $provenanceConfig := .Values.provenance.deployments }}
{{- if or (not (hasKey $provenanceConfig "enabled")) (get $provenanceConfig "enabled") -}}
{{- if and (or (not (hasKey $provenanceConfig "enabled")) (get $provenanceConfig "enabled"))
(or (not (hasKey $provenanceConfig "startService")) (get $provenanceConfig "startService")) -}}
{{- $serviceType := ( print "provenance-" $provenanceType ) -}}
{{- $configArgsDict := dict "serviceType" $serviceType
"configuration" $provenanceConfig
......@@ -9,9 +12,19 @@
{{- $configChecksum := include "swh.provenance.configmap" $configArgsDict | sha256sum -}}
{{- $configLoggingChecksum := include "swh.provenance.gunicorn.logging" $configArgsDict | sha256sum -}}
{{- $securityContext := $provenanceConfig.securityContext | default $.Values.provenance.securityContext -}}
{{ $gunicornConfig := fromYaml (include "swh.gunicorn.config"
(dict "configuration" $provenanceConfig.gunicorn
"defaultConfiguration" $.Values.provenance.gunicorn)) -}}
{{- $provenanceConfig := include "swh.provenance.config" (dict "configuration" $provenanceConfig
"Values" $.Values) | fromYaml -}}
{{- $type := $provenanceConfig.type -}}
{{- $port := $provenanceConfig.port -}}
{{- $logLevel := $provenanceConfig.logLevel -}}
{{- $datasetName := $provenanceConfig.datasetName -}}
{{- $graphPath := $provenanceConfig.graphPath -}}
{{- $provenancePath := $provenanceConfig.provenancePath -}}
{{- $gunicornConfig := $provenanceConfig.gunicornConfig -}}
{{- $provenanceFetchDataset := $provenanceConfig.provenanceFetchDataset -}}
{{- $provenanceIndexDataset := $provenanceConfig.provenanceIndexDataset -}}
{{- $provenanceImageVersion := $provenanceConfig.imageVersion -}}
{{- $provenanceDatasetFull := $provenanceConfig.provenanceDatasetFull -}}
---
apiVersion: apps/v1
kind: Deployment
......@@ -40,6 +53,7 @@ spec:
checksum/config: {{ $configChecksum }}
checksum/config-logging: {{ $configLoggingChecksum }}
checksum/config-utils: {{ $configUtilsChecksum }}
checksum/backend-utils: {{ $backendUtilsChecksum }}
spec:
{{- if $securityContext }}
securityContext:
......@@ -60,6 +74,7 @@ spec:
priorityClassName: {{ $.Values.namespace }}-{{ $.Values.provenance.priorityClassName }}
{{- end }}
initContainers:
{{- if eq $type "rpc" }}
- name: prepare-configuration
image: {{ $.Values.swh_utils_image }}:{{ $.Values.swh_utils_image_version }}
imagePullPolicy: IfNotPresent
......@@ -75,6 +90,26 @@ spec:
- name: config-utils
mountPath: /entrypoints
readOnly: true
{{- end }}
{{- if $provenanceFetchDataset }}
{{- include "swh.provenance.fetchDataset" (dict "Values" $.Values
"datasetName" $datasetName
"datasetFull" $provenanceDatasetFull
"provenancePath" $provenancePath
"graphPath" $graphPath
"extraVolumes" $provenanceConfig.extraVolumes) | nindent 8 -}}
{{- end }}
{{- if $provenanceIndexDataset }}
{{- include "swh.provenance.indexDataset" (dict "Values" $.Values
"imageVersion" $provenanceImageVersion
"provenancePath" $provenancePath
"extraVolumes" $provenanceConfig.extraVolumes) | nindent 8 -}}
{{- end }}
{{- if $provenanceFetchDataset }}
{{- include "swh.provenance.waitForDataset" (dict "Values" $.Values
"provenancePath" $provenancePath
"extraVolumes" $provenanceConfig.extraVolumes) | nindent 8 -}}
{{- end }}
containers:
- name: {{ $serviceType }}
resources:
......@@ -93,18 +128,23 @@ spec:
image: {{ $.Values.swh_provenance_image }}:{{ $.Values.swh_provenance_image_version }}
imagePullPolicy: IfNotPresent
ports:
- containerPort: {{ $provenanceConfig.port | default $.Values.provenance.port }}
name: rpc
- containerPort: {{ $port }}
name: {{ $type }}
readinessProbe:
{{- if (eq $type "rpc") }}
httpGet:
path: /
port: rpc
port: {{ $type }}
{{- else }}
tcpSocket:
port: {{ $type }}
{{- end }}
initialDelaySeconds: 15
failureThreshold: 30
periodSeconds: 5
livenessProbe:
tcpSocket:
port: rpc
port: {{ $type }}
initialDelaySeconds: 10
periodSeconds: 5
command:
......@@ -113,13 +153,28 @@ spec:
- -c
- /opt/swh/entrypoint.sh
env:
{{ if $gunicornConfig -}}
- name: PROVENANCE_TYPE
value: {{ $type }}
- name: PORT
value: {{ $port | quote }}
{{ if eq $type "rpc" -}}
- name: WORKERS
value: {{ $gunicornConfig.workers | quote }}
- name: THREADS
value: {{ $gunicornConfig.threads | quote }}
- name: TIMEOUT
value: {{ $gunicornConfig.timeout | quote }}
- name: SWH_CONFIG_FILENAME
value: /etc/swh/config.yml
- name: SWH_LOG_CONFIG_JSON
value: /etc/swh/logging/logging-gunicorn.json
- name: STATSD_SERVICE_TYPE
value: {{ $serviceType }}
{{ else -}}
- name: PROVENANCE_PATH
value: {{ $provenancePath }}
- name: GRAPH_PATH
value: {{ $graphPath }}/graph
{{ end -}}
- name: STATSD_HOST
value: {{ $.Values.statsdExternalHost | default "prometheus-statsd-exporter" }}
......@@ -127,12 +182,8 @@ spec:
value: {{ $.Values.statsdPort | default "9125" | quote }}
- name: STATSD_TAGS
value: deployment:{{ $serviceType }}
- name: STATSD_SERVICE_TYPE
value: {{ $serviceType }}
- name: SWH_LOG_LEVEL
value: {{ $provenanceConfig.logLevel | default $.Values.provenance.logLevel | default "INFO" | quote }}
- name: SWH_LOG_CONFIG_JSON
value: /etc/swh/logging/logging-gunicorn.json
value: {{ $logLevel }}
{{- if $.Values.provenance.sentry.enabled }}
- name: SWH_SENTRY_ENVIRONMENT
value: {{ $.Values.sentry.environment }}
......@@ -150,10 +201,12 @@ spec:
value: "true"
{{- end }}
volumeMounts:
{{- if eq $type "rpc" }}
- name: configuration
mountPath: /etc/swh
- name: configuration-logging
mountPath: /etc/swh/logging
{{ end }}
{{- range $volumeName, $volumeConfig := $provenanceConfig.extraVolumes }}
- name: {{ $volumeName }}
mountPath: {{ $volumeConfig.mountPath }}
......@@ -162,6 +215,7 @@ spec:
volumes:
- name: configuration
emptyDir: {}
{{- if eq $type "rpc" }}
- name: configuration-template
configMap:
name: {{ $serviceType }}-configuration-template
......@@ -174,10 +228,15 @@ spec:
items:
- key: "logging-gunicorn.json"
path: "logging-gunicorn.json"
{{ end }}
- name: config-utils
configMap:
name: config-utils
defaultMode: 0555
- name: backend-utils
configMap:
name: backend-utils
defaultMode: 0555
{{- range $volumeName, $volumeConfig := $provenanceConfig.extraVolumes }}
- name: {{ $volumeName }}
{{- toYaml $volumeConfig.volumeDefinition | nindent 8 }}
......
......@@ -3,8 +3,11 @@
{{- $serviceType := ( print "provenance-" $provenanceType ) -}}
{{- if or (not (hasKey $deploymentConfig "enabled")) (get $deploymentConfig "enabled") }}
{{- if and (hasKey $deploymentConfig "ingress") $deploymentConfig.ingress.enabled -}}
{{- $configuration := include "swh.provenance.config" (dict "configuration" $deploymentConfig
"Values" $.Values) | fromYaml -}}
{{- include "swh.ingress" (dict "serviceType" $serviceType
"configuration" $deploymentConfig
"configuration" $configuration
"Values" $.Values) -}}
{{- end -}}
{{- if $deploymentConfig.extraIngresses -}}
......@@ -14,8 +17,10 @@
{{- $mergedIngressConfig :=
mustMergeOverwrite (omit $deploymentConfig "extraIngresses" "ingress")
(dict "ingress" (mustMergeOverwrite $deploymentConfig.ingress $extraIngressConfig)) -}}
{{- $config := include "swh.provenance.config" (dict "configuration" $mergedIngressConfig
"Values" $.Values) | fromYaml -}}
{{- include "swh.ingress" (dict "serviceType" $serviceType
"configuration" $mergedIngressConfig
"configuration" $config
"extraNameLabel" ( print "extra-" $extraNameLabelCtr )
"Values" $.Values) -}}
{{- end -}}
......
{{ if .Values.provenance.enabled -}}
{{ range $provenanceType, $deploymentConfig := .Values.provenance.deployments }}
{{- if or (not (hasKey $deploymentConfig "enabled")) (get $deploymentConfig "enabled") -}}
{{- if not (hasKey $deploymentConfig "port") }}
{{- $deploymentConfig := set $deploymentConfig "port" $.Values.provenance.port -}}
{{- end -}}
{{- $configuration := include "swh.provenance.config" (dict "configuration" $deploymentConfig
"Values" $.Values) | fromYaml -}}
{{- include "swh.service" (dict "serviceType" ( print "provenance-" $provenanceType )
"configuration" $deploymentConfig
"configuration" $configuration
"Values" $.Values) -}}
{{- end -}}
{{ end -}}
......
......@@ -24,53 +24,55 @@ data:
[ -z "${GRAPH_NAME}" ] && \
echo "<GRAPH_NAME> env variable must be set" && exit 1
[ -f ${WITNESS_FILE} ] && echo "Graph ready, do nothing." && exit 0
[ -f "${WITNESS_FILE}" ] && echo "Graph ready, do nothing." && exit 0
# Let's wait for the dataset installation
while [ ! -f ${WITNESS_SOURCE_FILE} ]; do
while [ ! -f "${WITNESS_SOURCE_FILE}" ]; do
echo "${WITNESS_SOURCE_FILE} missing, waiting graph dataset installation..."
sleep $PERIOD
sleep ${PERIOD}
done
# Let's wait for the reindexation to be done
if [ ! -z "${WITNESS_REINDEX_FILE}" ]; then
while [ ! -f ${WITNESS_REINDEX_FILE} ]; do
while [ ! -f "${WITNESS_REINDEX_FILE}" ]; do
echo "${WITNESS_REINDEX_FILE} missing, waiting graph dataset reindexation..."
sleep $PERIOD
sleep ${PERIOD}
done
fi
# Create empty dataset location destination for copy to be ok
mkdir -p ${DATASET_LOCATION}
mkdir -p "${DATASET_LOCATION}"
graph_stats=${GRAPH_NAME}.stats
graph_stats="${GRAPH_NAME}.stats"
# Symlink all files from dataset source to the destination (including the *.graph)
[ -L "${DATASET_LOCATION}/${graph_stats}" ] || \
ln -sf ${DATASET_SOURCE}/* ${DATASET_LOCATION}/
ln -sf "${DATASET_SOURCE}"/* "${DATASET_LOCATION}/"
graph_name=${GRAPH_NAME}.graph
graph_name="${GRAPH_NAME}.graph"
# We hard-copy the *.graph file
if [ -L "${DATASET_LOCATION}/${graph_name}" ] || ! [ -f ${DATASET_LOCATION}/${graph_name} ]; then
cp -v --remove-destination ${DATASET_SOURCE}/${graph_name} ${DATASET_LOCATION}/;
if [ -L "${DATASET_LOCATION}/${graph_name}" ] || ! [ -f "${DATASET_LOCATION}/${graph_name}" ]; then
cp -v --remove-destination "${DATASET_SOURCE}/${graph_name}" "${DATASET_LOCATION}/";
fi
graph_transposed_name=${GRAPH_NAME}-transposed.graph
if [ -L ${DATASET_LOCATION}/${graph_transposed_name} ] || ! [ -f ${DATASET_LOCATION}/${graph_transposed_name} ]; then
cp -v --remove-destination ${DATASET_SOURCE}/${graph_transposed_name} ${DATASET_LOCATION}/;
graph_transposed_name="${GRAPH_NAME}-transposed.graph"
if [ -L "${DATASET_LOCATION}/${graph_transposed_name}" ] || ! [ -f "${DATASET_LOCATION}/${graph_transposed_name}" ]; then
cp -v --remove-destination "${DATASET_SOURCE}/${graph_transposed_name}" "${DATASET_LOCATION}/";
fi
# Finally, we make explicit the graph is ready
touch ${WITNESS_FILE}
touch "${WITNESS_FILE}"
graph-wait-for-dataset.sh: |
wait-for-dataset.sh: |
#!/usr/bin/env bash
# Uses env variables WITNESS_FILE
[ -z "${WITNESS_FILE}" ] && \
echo "<WITNESS_FILE> env variable must be set" && exit 1
[ -z "${SERVICE_NAME}" ] && \
echo "<SERVICE_NAME> env variable must be set" && exit 1
while [ ! -f ${WITNESS_FILE} ]; do
echo "${WITNESS_FILE} not present, wait for it to start the graph..."
sleep $PERIOD
while [ ! -f "${WITNESS_FILE}" ]; do
echo "${WITNESS_FILE} not present, wait for ${SERVICE_NAME} to start..."
sleep ${PERIOD}
done
graph-fetch-dataset.sh: |
......@@ -84,13 +86,25 @@ data:
echo "<WITNESS_FILE> env variable must be set" && exit 1
[ -z "${GRAPH_NAME}" ] && \
echo "<GRAPH_NAME> env variable must be set" && exit 1
[ -z "${WITNESS_FILE}" ] && \
echo "<WITNESS_FILE> env variable must be set" && exit 1
[ -z "${WITNESS_DOWNLOADING_FILE}" ] && \
echo "<WITNESS_DOWNLOADING_FILE> env variable must be set" && exit 1
set -eux
[ -f ${WITNESS_FILE} ] && \
[ -f "${WITNESS_FILE}" ] && \
echo "Dataset <${DATASET_NAME}> already present. Skip." && \
exit 0
[ -f "${WITNESS_DOWNLOADING_FILE}" ] && \
echo "Dataset <${DATASET_NAME}> download is ongoing. Skip." && \
exit 0
# Mark that we start the download
mkdir -p $(dirname "${WITNESS_DOWNLOADING_FILE}")
touch "${WITNESS_DOWNLOADING_FILE}"
case "${DATASET_NAME}" in
test|example)
# For test (or example) dataset sample, clone the source repository of
......@@ -99,20 +113,19 @@ data:
--depth 1 \
https://gitlab.softwareheritage.org/swh/devel/swh-graph.git/ \
/tmp/swh-graph
# Create empty dataset location destination for copy to be ok
mkdir -p ${DATASET_LOCATION} && rmdir ${DATASET_LOCATION}
# Actual copy of the test dataset
cp -r /tmp/swh-graph/swh/graph/example_dataset/compressed \
${DATASET_LOCATION}
cp -r /tmp/swh-graph/swh/graph/example_dataset/compressed/* \
"${DATASET_LOCATION}"
# Make explicit the graph is ready
touch ${WITNESS_FILE}
touch "${WITNESS_FILE}"
;;
*)
touch "${WITNESS_DOWNLOADING_FILE}"
# Otherwise, download the dataset locally
swh graph download \
--name ${DATASET_NAME} \
${DATASET_LOCATION}
--name "${DATASET_NAME}" \
"${DATASET_LOCATION}"
# Reindex graph dataset (for those anterior to 2024). This should not be
# necessary for most recent graph datasets.
......@@ -120,21 +133,28 @@ data:
# For old datasets missing a .ef though, this just fails with
# `2024-09-02T14:11:56.190692004Z graph-rpc-python3k 0: Cannot map
# Elias-Fano pointer list .../graph.ef`, so we trigger a reindex step
reindex_witness_file=${DATASET_LOCATION}/${GRAPH_NAME}.ef
[ ! -f $reindex_witness_file ] && \
swh graph reindex --ef ${DATASET_LOCATION}/${GRAPH_NAME}
reindex_witness_file="${DATASET_LOCATION}/${GRAPH_NAME}.ef"
[ ! -f "$reindex_witness_file" ] && \
swh graph reindex --ef "${DATASET_LOCATION}/${GRAPH_NAME}"
# Make explicit the graph is ready
touch ${WITNESS_FILE}
touch "${WITNESS_FILE}"
;;
esac
# We are done so we clean up the download witness file
[ -f "${WITNESS_DOWNLOADING_FILE}" ] && rm "${WITNESS_DOWNLOADING_FILE}"
graph-reindex-dataset.sh: |
#!/usr/bin/env bash
[ -z "${WITNESS_SOURCE_FILE}" ] && \
echo "<WITNESS_SOURCE_FILE> env variable must be set" && exit 1
[ -z "${WITNESS_REINDEX_FILE}" ] && \
echo "<WITNESS_REINDEX_FILE> env variable must be set" && exit 1
[ -z "${WITNESS_REINDEXING_FILE}" ] && \
echo "<WITNESS_GRAPH_REINDEXING_FILE> env variable must be set" && exit 1
[ -z "${GRAPH_NAME}" ] && \
echo "<GRAPH_NAME> env variable must be set" && exit 1
[ -z "${PERIOD}" ] && \
echo "<PERIOD> env variable must be set" && exit 1
[ -z "${DATASET_LOCATION}" ] && \
......@@ -142,21 +162,165 @@ data:
[ -z "${GRAPH_NAME}" ] && \
echo "<GRAPH_NAME> env variable must be set" && exit 1
[ -f ${WITNESS_REINDEX_FILE} ] && echo "Graph reindexed, do nothing." && exit 0
[ -f "${WITNESS_REINDEX_FILE}" ] && \
echo "Graph reindexed, do nothing." && \
exit 0
[ -f "${WITNESS_REINDEXING_FILE}" ] && \
echo "Dataset <${DATASET_NAME}> indexation is ongoing. Skip." && \
exit 0
set -eux
# Let's wait for the dataset installation
while [ ! -f ${WITNESS_SOURCE_FILE} ]; do
while [ ! -f "${WITNESS_SOURCE_FILE}" ]; do
echo "${WITNESS_SOURCE_FILE} missing, waiting graph dataset installation..."
sleep $PERIOD
sleep ${PERIOD}
done
[ -f "${WITNESS_REINDEXING_FILE}" ] && \
echo "Graph index already being built. Skip." && exit 0
# Mark that we start the indexation
mkdir -p $(dirname "${WITNESS_REINDEXING_FILE}")
touch "${WITNESS_REINDEXING_FILE}"
# For old datasets missing a .ef or in the wrong format, this fails with
# `Cannot map Elias-Fano pointer list .../graph.ef`. The solution is to
# reindex the dataset
swh graph reindex --ef ${DATASET_LOCATION}/${GRAPH_NAME} && \
touch $WITNESS_REINDEX_FILE
swh graph reindex --ef "${DATASET_LOCATION}/${GRAPH_NAME}" && \
touch "${WITNESS_REINDEX_FILE}"
# We are done so we clean up the download witness file
[ -f "${WITNESS_REINDEXING_FILE}" ] && rm "${WITNESS_REINDEXING_FILE}"
provenance-fetch-datasets.sh: |
#!/usr/bin/env bash
[ -z "${WITNESS_FETCH_FILE}" ] && \
echo "<WITNESS_FETCH_FILE> env variable must be set" && exit 1
[ -z "${WITNESS_DOWNLOADING_FILE}" ] && \
echo "<WITNESS_DOWNLOADING_FILE> env variable must be set" && exit 1
[ -z "${DATASET_VERSION}" ] && \
echo "<DATASET_VERSION> env variable must be set" && exit 1
[ -z "${PROVENANCE_PATH}" ] && \
echo "<PROVENANCE_PATH> env variable must be set" && exit 1
[ -z "${GRAPH_PATH}" ] && \
echo "<GRAPH_PATH> env variable must be set" && exit 1
[ -f "${WITNESS_FETCH_FILE}" ] && \
echo "Datasets graph & provenance <${DATASET_VERSION}> present. Skip." && \
exit 0
[ -f "${WITNESS_DOWNLOADING_FILE}" ] && \
echo "Datasets graph & provenance <${DATASET_VERSION}> downloading. Skip." && \
exit 0
set -e
# Make explicit an init-container is already downloading file
mkdir -p $(dirname "${WITNESS_DOWNLOADING_FILE}")
touch "${WITNESS_DOWNLOADING_FILE}"
# Create destination paths
mkdir -p "${PROVENANCE_PATH}" "${GRAPH_PATH}"
echo "Fetching datasets..."
if [ ${PROVENANCE_DATASET_FULL} = true ]; then
# Retrieve all the provenance dataset (default behavior)
REFS=all
else
# This excludes revisions not targetted by a snapshot
# Ok to use for test purposes
REFS=heads
fi
URL_PROVENANCE="s3://softwareheritage/derived_datasets/${DATASET_VERSION}/provenance/${REFS}/"
CMD_GET="aws s3 cp --no-progress --no-sign-request"
echo "1. Fetching provenance dataset (parquet files)..."
${CMD_GET} --recursive "${URL_PROVENANCE}" "${PROVENANCE_PATH}"
echo "1. Provenance datasets installed!"
echo "2. Fetching extra graph files..."
URL_GRAPH="s3://softwareheritage/graph/${DATASET_VERSION}/compressed"
mkdir -p "${GRAPH_PATH}"
for filename in graph.pthash graph.pthash.order graph.nodes.count.txt \
graph.property.message.bin.zst \
graph.property.message.offset.bin.zst \
graph.property.tag_name.bin.zst \
graph.property.tag_name.offset.bin.zst \
graph.node2swhid.bin.zst graph.node2type.bin.zst; do
${CMD_GET} "${URL_GRAPH}/${filename}" "${GRAPH_PATH}"
done
echo "2. Extra graph files installed!"
echo "3. Uncompressing graph files..."
set -x
# Uncompress the compressed graph *.zst files
for filepath in $(ls "${GRAPH_PATH}"/*.zst); do
# Uncompress and delete the .zst file
[ -f "${filepath}" ] && unzstd --force --rm "${filepath}"
done
set +x
echo "3. Graph files uncompressed!"
echo "Provenance datasets installed!"
# Cleanup witness downloading dataset file
[ -f "${WITNESS_DOWNLOADING_FILE}" ] && \
rm ${WITNESS_DOWNLOADING_FILE}
# Make explicit the provenance datasets are fetched
touch "${WITNESS_FETCH_FILE}"
provenance-index-dataset.sh: |
#!/usr/bin/env bash
[ -z "${WITNESS_DATASETS_FILE}" ] && \
echo "<WITNESS_DATASETS_FILE> env variable must be set" && exit 1
[ -z "${WITNESS_INDEX_FILE}" ] && \
echo "<WITNESS_INDEX_FILE> env variable must be set" && exit 1
[ -z "${WITNESS_INDEXING_FILE}" ] && \
echo "<WITNESS_INDEXING_FILE> env variable must be set" && exit 1
[ -z "${PERIOD}" ] && \
echo "<PERIOD> env variable must be set" && exit 1
[ -z "${PROVENANCE_PATH}" ] && \
echo "<PROVENANCE_PATH> env variable must be set" && exit 1
[ -f "${WITNESS_INDEX_FILE}" ] && echo "Provenance already indexed, skip." && \
exit 0
[ -f "${WITNESS_INDEXING_FILE}" ] && echo "Provenance indices are building, skip." && \
exit 0
set -eu
# Let's wait for the dataset installation
while [ ! -f "${WITNESS_DATASETS_FILE}" ]; do
echo "${WITNESS_DATASETS_FILE} missing, waiting provenance dataset installation..."
sleep ${PERIOD}
done
# Mark explicit an init-container is already indexing provenance
mkdir -p $(dirname "${WITNESS_INDEXING_FILE}")
touch "${WITNESS_INDEXING_FILE}"
echo "Datasets file installed, build provenance dataset indexes..."
echo "provenance path: $PROVENANCE_PATH"
set -x
# To make the query faster, the provenance needs to build index out of the
# current dataset files. We store the output indexes in the same path as
# the dataset.
swh-provenance-index \
--database "file://${PROVENANCE_PATH}" && \
touch "${WITNESS_INDEX_FILE}" && \
echo "Provenance indexes built!" || \
echo "Provenance indexes failed!"
[ -f "${WITNESS_INDEXING_FILE}" ] && rm "${WITNESS_INDEXING_FILE}"
initialize-search-backend.sh: |
#!/usr/bin/env bash
......
......@@ -1895,22 +1895,58 @@ storage_backfiller:
provenance:
enabled: false
# Default service port (overridable per instance)
port: 5014
# Default rpc port
rpcPort: 5014
# Default grpc port
grpcPort: 50141
priorityClassName: frontend-rpc
sentry:
enabled: false
secretKeyRef: common-secrets
secretKeyName: provenance-sentry-dsn
deployments: {}
# instance:
# instance-rpc:
# enabled: false
# # The server type to deploy, either a rpc or a grpc
# type: rpc
# # Port to use, either inferred from the type or explicitly set (can be
# # overridden per instance)
# # port: 5014
# # For type rpc, the graph configuration to use
# graphConfigurationRef: graphConfiguration
# hosts: []
# ingress:
# enabled: true
# endpoints: []
# instance-grpc:
# enabled: false
# # The server type to deploy, either a rpc or a grpc
# type: grpc
# # For type grpc, datasets need to be available for the service to use
# dataset:
# # Name of the dataset, graph and provenance must be in sync so same
# # version must be used
# name: 2024-08-23-popular-500-python
# # Graph setup
# graph:
# # Where to store the files
# path: /srv/dataset/graph
# provenance:
# # Whether or not to fetch provenance dataset
# fetch: true
# # Whether or not to build the provenance dataset indices
# index: true
# # Whether to use a full dataset (true by default). False will use
# # heads dataset (for testing purposes).
# full: true
# # Where to store the parquet files
# path: /srv/dataset/provenance
# # Whether to start/stop the service
# startService: true
# hosts: []
# ingress:
# enabled: true
# endpoints: []
rpcLocalRustWithGrpcGraphConfiguration:
cls: local_rust
......
......@@ -456,11 +456,15 @@ webThrottling:
limiter_rate:
default: 120/h
provenanceConfiguration:
provenanceRpcConfiguration:
cls: remote
url: http://provenance-test:5014
enable_requests_retry: true
provenanceGrpcConfiguration:
cls: grpc
url: provenance-grpc-popular-ingress:80
webDjangoApps:
- swh.web.add_forge_now
- swh.web.archive_coverage
......@@ -477,6 +481,10 @@ webDjangoApps:
- swh.web.provenance
- swh.web.save_bulk
cornerRibbon:
show_corner_ribbon: "true"
corner_ribbon_text: "LocalVersion"
web:
enabled: false
deployments:
......@@ -530,9 +538,10 @@ web:
webhooksConfigurationRef: webhooksConfiguration
djangoAppsRef: webDjangoApps
graphConfigurationRef: graphConfiguration
cornerRibbonRef: cornerRibbon
# throttlingConfigurationRef: webThrottling
# keycloakConfigurationRef: fakeKeycloakConfiguration
provenanceConfigurationRef: provenanceConfiguration
provenanceConfigurationRef: provenanceGrpcConfiguration
hosts:
- web-local-archive-ingress
ingress:
......@@ -1455,18 +1464,48 @@ objstorageReplayer:
provenance:
enabled: false
deployments:
test:
test-rpc:
enabled: true
graphConfigurationRef: fakeGraphConfiguration
graphConfigurationRef: provenanceRpcWithRemoteGrpcGraphPopularConfiguration
replicas: 1
# gunicorn:
# workers: 4
# threads: 1
# timeout: 60
test-grpc:
enabled: true
type: grpc
dataset:
name: 2024-08-23-popular-500-python
provenance:
fetch: true
index: true
# For test purpose, use a dataset subset
full: false
path: /srv/dataset/provenance
graph:
path: /srv/dataset/graph
startService: true
extraVolumes:
dataset-persistent:
mountPath: /srv/dataset
volumeDefinition:
persistentVolumeClaim:
claimName: provenance-popular-persistent-pvc
fakeGraphConfiguration:
cls: graph
url: graph.i.s.o:50091
replicas: 1
hosts:
- provenance-grpc-popular-ingress
ingress:
enabled: true
extraAnnotations:
nginx.ingress.kubernetes.io/proxy-body-size: 4G
nginx.ingress.kubernetes.io/proxy-buffering: "on"
nginx.ingress.kubernetes.io/client-body-buffer-size: 128K
endpoints:
default:
paths:
- path: /
# Rpc Rust graph configuration (the rpc instance spawns a grpc instance too)
rpcLocalRustWithGrpcGraphConfiguration:
......@@ -1482,12 +1521,17 @@ rpcWithRemoteGrpcGraphExampleConfiguration:
port: 80
# The rpc graph instance will communicate with another grpc instance
rpcWithRemoteGrpcGraphPopularConfiguration:
graphRpcWithRemoteGrpcGraphPopularConfiguration:
cls: remote
url: graph-grpc-popular-ingress:80
grpc_server:
port: 80
# The equivalent provenance configuration to discuss with the graph
provenanceRpcWithRemoteGrpcGraphPopularConfiguration:
cls: graph
url: graph-grpc-popular-ingress:80
plainGrpcGraphConfiguration:
max_ram: 1g
......@@ -1512,10 +1556,24 @@ externalServices:
svix-test-ingress:
internalName: svix-test
target: local-cluster-control-plane
grpc-provenance:
internalName: provenance-grpc-popular-ingress
target: local-cluster-control-plane
volumes:
enabled: true
persistentVolumeClaims:
provenance-popular-persistent-pvc:
enabled: true
appName: provenance-test-grpc
spec:
storageClassName: local-persistent
volumeMode: Filesystem
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
alter-recovery-bundles-pvc:
enabled: true
appName: alter
......@@ -1706,7 +1764,7 @@ graph:
kubernetes.io/hostname: local-cluster-worker
type: rpc
port: 5009
graphConfigurationRef: rpcWithRemoteGrpcGraphPopularConfiguration
graphConfigurationRef: graphRpcWithRemoteGrpcGraphPopularConfiguration
startService: true
hosts:
- graph-rpc-popular-ingress
......
......@@ -2070,13 +2070,9 @@ loaders:
medium: Memory
sizeLimit: 25Gi
image: swh_loader_package_image
replicas: 6
queues:
- swh.loader.package.deposit.tasks.LoadDeposit
autoScaling:
queueThreshold: 1
stopWhenNoActivity: false
minReplicaCount: 4
maxReplicaCount: 12
# only used for secrets
depositConfigurationRef: depositDynamicConfiguration
ackLate: true
......@@ -2768,6 +2764,7 @@ alter:
inventoryStorageConfigurationRef: remoteROStorageConfiguration
graphConfigurationRef: alterGraphConfiguration
restorationStorageConfigurationRef: rpcWriterWorkloadStorageCassandraConfiguration
journalWriterMirrorConfigurationRef: journalWriterMirrorConfiguration
removalSearches:
search:
searchConfigurationRef: remoteSearchConfiguration
......