deployment/provenance: Adapt template to manage grpc or rpc service (!559) · Merge requests · Platform / Infrastructure / CI CD / Helm charts for swh packages

Closed Antoine R. Dumont requested to merge mr/adapt-provenance-deployment into production 1 week ago

Viewing commit bc396e58

Show latest version

1 file

+ 28

− 9

Unverified

bc396e58

provenance/backend-utils: Make script verbose & readable · bc396e58
Antoine R. Dumont authored 1 week ago
```
Refs. swh/infra/sysadm-environment#5608
```

swh/templates/utils/backend-utils.yaml

+ 28

− 9

@@ -173,31 +173,44 @@ data:
        echo "Datasets graph & provenance <${DATASET_VERSION}> already present. Skip." && \
        exit 0

    set -eux
    set -e

    echo "Fetching datasets..."

    URL_PROVENANCE="s3://softwareheritage/derived_datasets/${DATASET_VERSION}/provenance/all/"

    CMD_GET="aws s3 cp --no-sign-request"

    # Retrieve the provenance dataset parquet files
    $CMD_GET --recursive "${URL_PROVENANCE}" "${PROVENANCE_PATH}"
    echo "Fetching provenance dataset (parquet files)..."
    CMD="${CMD_GET} --recursive '${URL_PROVENANCE}' '${PROVENANCE_PATH}'"
    echo $CMD
    $CMD
    echo "Provenance datasets installed!"

    # Retrieve the required graph files
    echo "Fetching extra graph files..."
    URL_GRAPH="s3://softwareheritage/graph/${DATASET_VERSION}/compressed"
    for filename in graph.pthash graph.pthash.order graph.node2swhid.bin.zst graph.node2type.bin.zst; do
        $CMD_GET "${URL_GRAPH}/${filename}" "${GRAPH_PATH}"
        CMD="${CMD_GET} '${URL_GRAPH}/${filename}' '${GRAPH_PATH}'"
        echo $CMD
        $CMD
    done
    echo "Extra graph files installed!"

    echo "Uncompressing graph files..."
    # Uncompress the compressed graph *.zst files
    for filename in graph.node2type.bin.zst graph.node2swhid.bin.zst; do
        filepath="${GRAPH_PATH}/${filename}"
        CMD="unzstd --rm '${filepath}'"
        # Uncompress and delete the .zst file
        [ -f "${filepath}" ] && unzstd --rm "${filepath}"
        [ -f "${filepath}" ] && echo $CMD && $CMD
    done
    echo "Graph files uncompressed!"

    # Make explicit the provenance datasets are fetched
    touch ${WITNESS_FETCH_FILE}

    echo "Provenance datasets installed!"

  provenance-index-dataset.sh: |
    #!/usr/bin/env bash
    [ -z "${WITNESS_DATASETS_FILE}" ] && \
@@ -212,14 +225,17 @@ data:
    [ -f ${WITNESS_INDEX_FILE} ] && echo "Provenance already indexed, do nothing." && \
      exit 0

    set -eux
    set -eu

    # Let's wait for the dataset installation
    while [ ! -f "${WITNESS_SOURCE_FILE}" ]; do
        echo "${WITNESS_SOURCE_FILE} missing, waiting provenance dataset installation..."
    while [ ! -f "${WITNESS_DATASETS_FILE}" ]; do
        echo "${WITNESS_DATASETS_FILE} missing, waiting provenance dataset installation..."
        sleep $PERIOD
    done

    echo "Datasets file installed, build provenance dataset indexes..."

    set -x
    # To make the query faster, the provenance needs to build index out of the
    # current dataset files. We store the output indexes in the same path as
    # the dataset.
@@ -227,6 +243,9 @@ data:
      --database ${PROVENANCE_PATH} \
      --indexes ${PROVENANCE_PATH} && \
      touch "${WITNESS_INDEX_FILE}"
    set +x

    echo "Provenance indexes built!"

  initialize-search-backend.sh: |
    #!/usr/bin/env bash

deployment/provenance: Adapt template to manage grpc or rpc service

Merge request reports