Skip to content
Snippets Groups Projects
Unverified Commit 9695886c authored by Antoine R. Dumont's avatar Antoine R. Dumont
Browse files

provenance/deployment: Allow to prepare grpc backend with data

For the grpc service, this adds the necessary help to fetch the dataset, build
the indices out of the datasets so the service is actually functional.

Refs. swh/infra/sysadm-environment#5608
parent 84fb121c
No related branches found
No related tags found
No related merge requests found
......@@ -88,7 +88,7 @@
image: {{ .Values.swh_utils_image }}:{{ .Values.swh_utils_image_version }}
imagePullPolicy: IfNotPresent
command:
- /entrypoints/graph-wait-for-dataset.sh
- /entrypoints/wait-for-dataset.sh
env:
{{- include "swh.graph.volume.witnessfile" (dict "graphPath" .graphPath) | nindent 4 }}
- name: DATASET_LOCATION
......
......@@ -39,6 +39,8 @@ data:
*/}}
{{ define "swh.provenance.config" }}
{{- $configuration := .configuration -}}
{{- $provenanceImageVersion := get $configuration "imageVersion" | default $.Values.provenance.imageVersion | default $.Values.swh_provenance_image_version -}}
{{- $configuration := set $configuration "imageVersion" $provenanceImageVersion -}}
{{- if not (hasKey $configuration "type") }}
{{- $configuration := set $configuration "type" "rpc" -}}
{{- end -}}
......
{{/* Generate the initialize provenance backend container configuration if needed */}}
{{- define "swh.provenance.volume.witnessfile" -}}
- name: {{ .witness_file_env_variable_name | default "WITNESS_FILE" }}
value: {{ .provenancePath }}/{{ .filename | default ".provenance-is-initialized" }}
{{- end -}}
{{/* Generate the initialize provenance backend container configuration if needed */}}
{{- define "swh.provenance.fetchDataset" -}}
{{- $image_version := get . "imageVersion" | default ( get .Values (print .imagePrefixName "_version") ) |
required (print .imagePrefixName "_version is mandatory in values.yaml ") -}}
- name: {{ .containerName | default "fetch-provenance-dataset" }}
image: {{ get .Values .imagePrefixName }}:{{ $image_version }}
command:
- /entrypoints/provenance-fetch-datasets.sh
env:
{{- include "swh.provenance.volume.witnessfile" (dict "provenancePath" .provenancePath) | nindent 2 }}
- name: SWH_CONFIG_FILENAME
value: /etc/swh/config.yml
- name: PROVENANCE_PATH
value: {{ .provenancePATH }}
- name: GRAPH_PATH
value: {{ .graphPath }}
- name: DATASET_VERSION
value: {{ .datasetName | default "" }}
volumeMounts:
- name: configuration
mountPath: /etc/swh
- name: backend-utils
mountPath: /entrypoints
{{- range $volumeName, $volumeConfig := .extraVolumes }}
- name: {{ $volumeName }}
mountPath: {{ $volumeConfig.mountPath }}
readOnly: {{ $volumeConfig.readOnly | default "false" }}
{{ end }}
{{- end -}}
{{/* init-container to wait for dataset presence. */}}
{{- define "swh.provenance.waitForDataset" -}}
- name: wait-for-dataset
image: {{ .Values.swh_utils_image }}:{{ .Values.swh_utils_image_version }}
imagePullPolicy: IfNotPresent
command:
- /entrypoints/wait-for-dataset.sh
env:
{{- include "swh.provenance.volume.witnessfile" (dict "provenancePath" .provenancePath) | nindent 4 }}
- name: PERIOD
value: {{ .period | default "3" | quote }}
volumeMounts:
- name: backend-utils
mountPath: /entrypoints
readOnly: true
{{- range $volumeName, $volumeConfig := .extraVolumes }}
- name: {{ $volumeName }}
mountPath: {{ $volumeConfig.mountPath }}
readOnly: {{ $volumeConfig.readOnly | default "false" }}
{{ end }}
{{- end -}}
{{/* init-container to index provenance dataset. */}}
{{- define "swh.provenance.indexDataset" -}}
{{- $image_version := get . "imageVersion" | default ( get .Values (print .imagePrefixName "_version") ) |
required (print .imagePrefixName "_version is mandatory in values.yaml ") -}}
- name: {{ .containerName | default "reindex-provenance-dataset" }}
image: {{ get .Values .imagePrefixName }}:{{ $image_version }}
imagePullPolicy: IfNotPresent
command:
- /entrypoints/provenance-index-dataset.sh
env:
{{- include "swh.provenance.volume.witnessfile" (dict "witness_file_env_variable_name" "WITNESS_INDEX_FILE"
"provenancePath" .provenancePath
"filename" ".provenance-is-reindexed") | nindent 4 }}
- name: PROVENANCE_PATH
value: {{ .provenancePath }}
- name: PERIOD
value: {{ .period | default "3" | quote }}
volumeMounts:
- name: backend-utils
mountPath: /entrypoints
readOnly: true
{{- range $volumeName, $volumeConfig := .extraVolumes }}
- name: {{ $volumeName }}
mountPath: {{ $volumeConfig.mountPath }}
readOnly: {{ $volumeConfig.readOnly | default "false" }}
{{ end }}
{{- end -}}
......@@ -16,10 +16,13 @@
{{- $type := $provenanceConfig.type -}}
{{- $port := $provenanceConfig.port -}}
{{- $logLevel := $provenanceConfig.logLevel -}}
{{- $datasetName := $provenanceConfig.datasetName | default "" -}}
{{- $graphPath := $provenanceConfig.graphPath | default "" -}}
{{- $provenancePath := $provenanceConfig.provenancePath | default "" -}}
{{- $gunicornConfig := $provenanceConfig.gunicornConfig | default dict -}}
{{- $datasetName := $provenanceConfig.datasetName -}}
{{- $graphPath := $provenanceConfig.graphPath -}}
{{- $provenancePath := $provenanceConfig.provenancePath -}}
{{- $gunicornConfig := $provenanceConfig.gunicornConfig -}}
{{- $provenanceFetchDataset := $provenanceConfig.provenanceFetchDataset -}}
{{- $provenanceIndexDataset := $provenanceConfig.provenanceIndexDataset -}}
{{- $provenanceImageVersion := $provenanceConfig.imageVersion -}}
---
apiVersion: apps/v1
kind: Deployment
......@@ -83,6 +86,26 @@ spec:
- name: config-utils
mountPath: /entrypoints
readOnly: true
{{- if $provenanceFetchDataset }}
{{ include "swh.provenance.fetchDataset" (dict "Values" $.Values
"imagePrefixName" "swh_provenance_image"
"datasetName" $datasetName
"provenancePath" $provenancePath
"graphPath" $graphPath
"extraVolumes" $provenanceConfig.extraVolumes) | nindent 8 }}
{{- end }}
{{- if $provenanceIndexDataset }}
{{ include "swh.provenance.indexDataset" (dict "Values" $.Values
"imagePrefixName" "swh_provenance_image"
"imageVersion" $provenanceImageVersion
"provenancePath" $provenancePath
"extraVolumes" $provenanceConfig.extraVolumes) | nindent 8 }}
{{ end }}
{{- if $provenanceFetchDataset }}
{{ include "swh.provenance.waitForDataset" (dict "Values" $.Values
"provenancePath" $provenancePath
"extraVolumes" $provenanceConfig.extraVolumes) | nindent 8 }}
{{ end }}
containers:
- name: {{ $serviceType }}
resources:
......
......@@ -62,7 +62,7 @@ data:
# Finally, we make explicit the graph is ready
touch ${WITNESS_FILE}
graph-wait-for-dataset.sh: |
wait-for-dataset.sh: |
#!/usr/bin/env bash
# Uses env variables WITNESS_FILE
[ -z "${WITNESS_FILE}" ] && \
......@@ -158,6 +158,77 @@ data:
swh graph reindex --ef ${DATASET_LOCATION}/${GRAPH_NAME} && \
touch $WITNESS_REINDEX_FILE
provenance-fetch-datasets.sh: |
#!/usr/bin/env bash
[ -z "${WITNESS_FETCH_FILE}" ] && \
echo "<WITNESS_FETCH_FILE> env variable must be set" && exit 1
[ -z "${DATASET_VERSION}" ] && \
echo "<DATASET_VERSION> env variable must be set" && exit 1
[ -z "${PROVENANCE_PATH}" ] && \
echo "<PROVENANCE_PATH> env variable must be set" && exit 1
[ -z "${GRAPH_PATH}" ] && \
echo "<GRAPH_PATH> env variable must be set" && exit 1
[ -f ${WITNESS_FETCH_FILE} ] && \
echo "Datasets graph & provenance <${DATASET_VERSION}> already present. Skip." && \
exit 0
URL_PROVENANCE="s3://softwareheritage/derived_datasets/${DATASET_VERSION}/provenance/all/"
CMD_GET="aws s3 cp --no-sign-request"
# Retrieve the provenance dataset parquet files
$CMD_GET --recursive "${URL_PROVENANCE}" "${PROVENANCE_PATH}"
# Retrieve the required graph files
URL_GRAPH="s3://softwareheritage/graph/${DATASET_VERSION}/compressed"
for filename in graph.pthash graph.pthash.order graph.node2swhid.bin.zst graph.node2type.bin.zst; do
$CMD_GET "${URL_GRAPH}/${filename}" "${GRAPH_PATH}"
done
# Uncompress the compressed graph files
pushd "${DIR_GRAPH}"
for filename in graph.node2type.bin.zst graph.node2swhid.bin.zst; do
# Uncompress and delete the .zst file
[ -f "${filename}" ] && unzstd --rm "${filename}"
done
popd
# Make explicit the provenance datasets are fetched
touch ${WITNESS_FETCH_FILE}
provenance-index-dataset.sh: |
#!/usr/bin/env bash
[ -z "${WITNESS_SOURCE_FILE}" ] && \
echo "<WITNESS_SOURCE_FILE> env variable must be set" && exit 1
[ -z "${WITNESS_INDEX_FILE}" ] && \
echo "<WITNESS_INDEX_FILE> env variable must be set" && exit 1
[ -z "${PERIOD}" ] && \
echo "<PERIOD> env variable must be set" && exit 1
[ -z "${PROVENANCE_PATH}" ] && \
echo "<PROVENANCE_PATH> env variable must be set" && exit 1
[ -f ${WITNESS_INDEX_FILE} ] && echo "Provenance already indexed, do nothing." && \
exit 0
set -eux
# Let's wait for the dataset installation
while [ ! -f "${WITNESS_SOURCE_FILE}" ]; do
echo "${WITNESS_SOURCE_FILE} missing, waiting provenance dataset installation..."
sleep $PERIOD
done
# To make the query faster, the provenance needs to build index out of the
# current dataset files. We store the output indexes in the same path as
# the dataset.
swh-provenance-index \
--database ${PROVENANCE_PATH} \
--indexes ${PROVENANCE_PATH} && \
touch "${WITNESS_INDEX_FILE}"
initialize-search-backend.sh: |
#!/usr/bin/env bash
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment