Skip to content
Snippets Groups Projects

deployment/provenance: Adapt template to manage grpc or rpc service

Closed Antoine R. Dumont requested to merge mr/adapt-provenance-deployment into production
1 file
+ 28
9
Compare changes
  • Side-by-side
  • Inline
@@ -173,31 +173,44 @@ data:
echo "Datasets graph & provenance <${DATASET_VERSION}> already present. Skip." && \
exit 0
set -eux
set -e
echo "Fetching datasets..."
URL_PROVENANCE="s3://softwareheritage/derived_datasets/${DATASET_VERSION}/provenance/all/"
CMD_GET="aws s3 cp --no-sign-request"
# Retrieve the provenance dataset parquet files
$CMD_GET --recursive "${URL_PROVENANCE}" "${PROVENANCE_PATH}"
echo "Fetching provenance dataset (parquet files)..."
CMD="${CMD_GET} --recursive '${URL_PROVENANCE}' '${PROVENANCE_PATH}'"
echo $CMD
$CMD
echo "Provenance datasets installed!"
# Retrieve the required graph files
echo "Fetching extra graph files..."
URL_GRAPH="s3://softwareheritage/graph/${DATASET_VERSION}/compressed"
for filename in graph.pthash graph.pthash.order graph.node2swhid.bin.zst graph.node2type.bin.zst; do
$CMD_GET "${URL_GRAPH}/${filename}" "${GRAPH_PATH}"
CMD="${CMD_GET} '${URL_GRAPH}/${filename}' '${GRAPH_PATH}'"
echo $CMD
$CMD
done
echo "Extra graph files installed!"
echo "Uncompressing graph files..."
# Uncompress the compressed graph *.zst files
for filename in graph.node2type.bin.zst graph.node2swhid.bin.zst; do
filepath="${GRAPH_PATH}/${filename}"
CMD="unzstd --rm '${filepath}'"
# Uncompress and delete the .zst file
[ -f "${filepath}" ] && unzstd --rm "${filepath}"
[ -f "${filepath}" ] && echo $CMD && $CMD
done
echo "Graph files uncompressed!"
# Make explicit the provenance datasets are fetched
touch ${WITNESS_FETCH_FILE}
echo "Provenance datasets installed!"
provenance-index-dataset.sh: |
#!/usr/bin/env bash
[ -z "${WITNESS_DATASETS_FILE}" ] && \
@@ -212,14 +225,17 @@ data:
[ -f ${WITNESS_INDEX_FILE} ] && echo "Provenance already indexed, do nothing." && \
exit 0
set -eux
set -eu
# Let's wait for the dataset installation
while [ ! -f "${WITNESS_SOURCE_FILE}" ]; do
echo "${WITNESS_SOURCE_FILE} missing, waiting provenance dataset installation..."
while [ ! -f "${WITNESS_DATASETS_FILE}" ]; do
echo "${WITNESS_DATASETS_FILE} missing, waiting provenance dataset installation..."
sleep $PERIOD
done
echo "Datasets file installed, build provenance dataset indexes..."
set -x
# To make the query faster, the provenance needs to build index out of the
# current dataset files. We store the output indexes in the same path as
# the dataset.
@@ -227,6 +243,9 @@ data:
--database ${PROVENANCE_PATH} \
--indexes ${PROVENANCE_PATH} && \
touch "${WITNESS_INDEX_FILE}"
set +x
echo "Provenance indexes built!"
initialize-search-backend.sh: |
#!/usr/bin/env bash
Loading