Skip to content

staging: Deploy vault in elastic infra

Antoine R. Dumont requested to merge migrate-staging-vault into production

Extra work is required:

  • on puppet side to at least change the dns record from vault.internal.staging.swh.network to alias it to the staging ingress record.
  • and Probably check some firewall rules too
make swh-helm-diff
[swh] Comparing changes between branches production and migrate-staging-vault (per environment)...
Switched to branch 'production'
Your branch is up to date with 'origin/production'.
[swh] Generate config in production branch for environment staging...
Switched to branch 'migrate-staging-vault'
[swh] Generate config in migrate-staging-vault branch for environment staging...
Switched to branch 'production'
Your branch is up to date with 'origin/production'.
[swh] Generate config in production branch for environment production...
Switched to branch 'migrate-staging-vault'
[swh] Generate config in migrate-staging-vault branch for environment production...


------------- diff for environment staging -------------

--- /tmp/swh-chart.swh.nC79Ue3k/staging.before  2023-10-16 14:32:23.895270058 +0200
+++ /tmp/swh-chart.swh.nC79Ue3k/staging.after   2023-10-16 14:32:24.551269267 +0200
@@ -294,21 +294,21 @@
   config.yml.template: |

     storage:
       cls: pipeline
       steps:
       - cls: retry
       - cls: remote
         url: http://storage1.internal.staging.swh.network:5002
     vault:
       cls: remote
-      url: http://vault.internal.staging.swh.network:5005/
+      url: http://vault.internal.staging.swh.network:80/
     max_bundle_size: 1073741824

     celery:
       task_broker: amqp://swhconsumer:${AMQP_PASSWORD}@scheduler0.internal.staging.swh.network:5672/%2f
       task_modules:
         - swh.vault.cooking_tasks
       task_queues:
       - swh.vault.cooking_tasks.SWHBatchCookingTask

       sentry_settings_for_celery_tasks:
@@ -378,21 +378,21 @@
   config.yml.template: |

     storage:
       cls: pipeline
       steps:
       - cls: retry
       - cls: remote
         url: http://storage1.internal.staging.swh.network:5002
     vault:
       cls: remote
-      url: http://vault.internal.staging.swh.network:5005/
+      url: http://vault.internal.staging.swh.network:80/
     max_bundle_size: 1073741824

     celery:
       task_broker: amqp://swhconsumer:${AMQP_PASSWORD}@scheduler0.internal.staging.swh.network:5672/%2f
       task_modules:
         - swh.vault.cooking_tasks
       task_queues:
       - swh.vault.cooking_tasks.SWHCookingTask

       sentry_settings_for_celery_tasks:
@@ -4285,20 +4285,56 @@
     if [ -e "${DB_VERSION}" ]; then
       echo "Unable to find the code version"
       exit 1
     fi

     if [ "$DB_VERSION" -ne "$CODE_VERSION" ]; then
       echo "code and DB versions are different. Blocking the deployment"
       exit 1
     fi
 ---
+# Source: swh/templates/vault/rpc-configmap.yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  namespace: swh
+  name: vault-rpc-configuration-template
+data:
+  config.yml.template: |
+    vault:
+      cls: postgresql
+      db: host=db1.internal.staging.swh.network port=5432 user=swh-vault dbname=swh password=${POSTGRESQL_PASSWORD}
+    storage:
+      cls: pipeline
+      steps:
+      - cls: retry
+      - cls: remote
+        url: http://storage1.internal.staging.swh.network:5002
+    scheduler:
+      cls: remote
+      url: http://scheduler.internal.staging.swh.network
+    objstorage:
+      cls: filtered
+      filters_conf:
+      - type: readonly
+      storage_conf:
+        cls: remote
+        url: http://storage1.internal.staging.swh.network:5003
+    cache:
+      account_name: swhvaultstoragestaging
+      api_secret_key: ${API_SECRET_KEY}
+      cls: azure
+      container_name: contents
+    smtp:
+      host: smtp.inria.fr
+      port: 25
+---
 # Source: swh/charts/keda/templates/crds/crd-clustertriggerauthentications.yaml
 apiVersion: apiextensions.k8s.io/v1
 kind: CustomResourceDefinition
 metadata:
   annotations:
     controller-gen.kubebuilder.io/version: v0.12.0
   labels:
     app.kubernetes.io/name: keda-operator
     helm.sh/chart: keda-2.11.0
     app.kubernetes.io/component: operator
@@ -13920,20 +13956,35 @@
     app: prometheus-statsd-exporter
   ports:
     - name: statsd
       port: 9125
       targetPort: 9125
       protocol: UDP
     - name: http
       port: 9102
       targetPort: 9102
 ---
+# Source: swh/templates/vault/rpc-service.yaml
+apiVersion: v1
+kind: Service
+metadata:
+  name: vault-rpc
+  namespace: swh
+spec:
+  type: ClusterIP
+  selector:
+    app: vault-rpc
+  ports:
+    - port: 5005
+      targetPort: 5005
+      name: rpc
+---
 # Source: swh/charts/keda/templates/manager/deployment.yaml
 apiVersion: apps/v1
 kind: Deployment
 metadata:
   name: keda-operator
   namespace: default
   annotations:
     {}
   labels:
     app: keda-operator
@@ -14429,21 +14480,21 @@
   strategy:
     type: RollingUpdate
     rollingUpdate:
       maxSurge: 1
   template:
     metadata:
       labels:
         app: cooker-batch
       annotations:
         # Force a rollout upgrade if the configuration changes
-        checksum/config: b8640eee471339fd8888636ddfa22e03e3ee34ef3509ab0577c1396f6663f163
+        checksum/config: d62e89c2e2264e26ed8fac609fb4c98850304507287c11f0ee25893e42fa7e6f
     spec:
       affinity:

         nodeAffinity:
           requiredDuringSchedulingIgnoredDuringExecution:
             nodeSelectorTerms:
             - matchExpressions:
               - key: swh/cooker
                 operator: In
                 values:
@@ -14572,21 +14623,21 @@
   strategy:
     type: RollingUpdate
     rollingUpdate:
       maxSurge: 1
   template:
     metadata:
       labels:
         app: cooker-simple
       annotations:
         # Force a rollout upgrade if the configuration changes
-        checksum/config: b8640eee471339fd8888636ddfa22e03e3ee34ef3509ab0577c1396f6663f163
+        checksum/config: d62e89c2e2264e26ed8fac609fb4c98850304507287c11f0ee25893e42fa7e6f
     spec:
       affinity:

         nodeAffinity:
           requiredDuringSchedulingIgnoredDuringExecution:
             nodeSelectorTerms:
             - matchExpressions:
               - key: swh/cooker
                 operator: In
                 values:
@@ -21861,20 +21912,166 @@
           defaultMode: 0777
           items:
           - key: "config.yml.template"
             path: "config.yml.template"

       - name: toolbox-script-utils
         configMap:
           name: toolbox-script-utils
           defaultMode: 0555
 ---
+# Source: swh/templates/vault/rpc-deployment.yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  namespace: swh
+  name: vault-rpc
+  labels:
+    app: vault-rpc
+spec:
+  revisionHistoryLimit: 2
+  selector:
+    matchLabels:
+      app: vault-rpc
+  strategy:
+    type: RollingUpdate
+    rollingUpdate:
+      maxSurge: 1
+  template:
+    metadata:
+      labels:
+        app: vault-rpc
+      annotations:
+        checksum/config: fcd0e620e2b4d860811ac5f3107c86fe2d4863511b518997bc72b32616e5fc27
+    spec:
+      affinity:
+        nodeAffinity:
+          requiredDuringSchedulingIgnoredDuringExecution:
+            nodeSelectorTerms:
+            - matchExpressions:
+              - key: swh/rpc
+                operator: In
+                values:
+                - "true"
+      priorityClassName: swh-frontend-rpc
+
+      initContainers:
+        - name: prepare-configuration
+          image: debian:bullseye
+          imagePullPolicy: IfNotPresent
+          command:
+          - /bin/bash
+          args:
+          - -c
+          - eval echo "\"$(</etc/swh/configuration-template/config.yml.template)\"" > /etc/swh/config.yml
+          env:
+
+
+            - name: POSTGRESQL_PASSWORD
+              valueFrom:
+                secretKeyRef:
+                  name: swh-vault-postgresql-secret
+                  key: postgres-swh-vault-password
+                  # 'name' secret must exist & include that ^ key
+                  optional: false
+
+
+            - name: API_SECRET_KEY
+              valueFrom:
+                secretKeyRef:
+                  name: swh-vault-azure-secret
+                  key: azure-swh-vault-key
+                  # 'name' secret must exist & include that ^ key
+                  optional: false
+          volumeMounts:
+          - name: configuration
+            mountPath: /etc/swh
+          - name: configuration-template
+            mountPath: /etc/swh/configuration-template
+      containers:
+        - name: vault-rpc
+          resources:
+            requests:
+              memory: 512Mi
+              cpu: 500m
+          image: container-registry.softwareheritage.org/swh/infra/swh-apps/vault:20231004.2
+          imagePullPolicy: IfNotPresent
+          ports:
+            - containerPort: 5005
+              name: rpc
+          readinessProbe:
+            httpGet:
+              path: /
+              port: rpc
+            initialDelaySeconds: 15
+            failureThreshold: 30
+            periodSeconds: 5
+          livenessProbe:
+            httpGet:
+              path: /
+              port: rpc
+            initialDelaySeconds: 10
+            periodSeconds: 5
+          command:
+            - /bin/bash
+          args:
+            - -c
+            - /opt/swh/entrypoint.sh
+          env:
+            - name: THREADS
+              value: "5"
+            - name: WORKERS
+              value: "4"
+            - name: TIMEOUT
+              value: "3600"
+            - name: STATSD_HOST
+              value: prometheus-statsd-exporter
+            - name: STATSD_PORT
+              value: "9125"
+            - name: LOG_LEVEL
+              value: INFO
+          volumeMounts:
+          - name: configuration
+            mountPath: /etc/swh
+      volumes:
+      - name: configuration
+        emptyDir: {}
+      - name: configuration-template
+        configMap:
+          name: vault-rpc-configuration-template
+          items:
+          - key: "config.yml.template"
+            path: "config.yml.template"
+---
+# Source: swh/templates/vault/rpc-autoscale.yaml
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  namespace: swh
+  name: vault-rpc
+  labels:
+    app: vault-rpc
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: vault-rpc
+  minReplicas: 2
+  maxReplicas: 4
+  metrics:
+  - type: Resource
+    resource:
+      name: cpu
+      target:
+        type: Utilization
+        averageUtilization: 75
+---
 # Source: swh/templates/scheduler/update-metrics-cronjob.yaml
 apiVersion: batch/v1
 kind: CronJob
 metadata:
   name: scheduler-update-metrics-cronjob
 spec:
   # By default, every 4h at midnight
   schedule: "0/10 * * * *"
   jobTemplate:
     spec:
@@ -22039,20 +22236,49 @@
     http:
       paths:
       - path: /scheduler_metrics/get
         pathType: Prefix
         backend:
           service:
             name: scheduler-rpc
             port:
               number: 5008
 ---
+# Source: swh/templates/vault/rpc-ingress.yaml
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  namespace: swh
+  name: vault-rpc-ingress-default
+  annotations:
+    nginx.ingress.kubernetes.io/whitelist-source-range: 10.42.0.0/16,10.43.0.0/16,127.0.0.0/8,192.168.101.0/24,192.168.130.0/24
+
+    nginx.ingress.kubernetes.io/proxy-body-size: 4G
+    nginx.ingress.kubernetes.io/proxy-connect-timeout: "90"
+    nginx.ingress.kubernetes.io/proxy-read-timeout: "3600"
+    nginx.ingress.kubernetes.io/proxy-request-buffering: "on"
+    nginx.ingress.kubernetes.io/proxy-send-timeout: "90"
+
+
+spec:
+  rules:
+  - host: vault.internal.staging.swh.network
+    http:
+      paths:
+      - path: /
+        pathType: Prefix
+        backend:
+          service:
+            name: vault-rpc
+            port:
+              number: 5005
+---
 # Source: swh/charts/keda/templates/metrics-server/apiservice.yaml
 apiVersion: apiregistration.k8s.io/v1
 kind: APIService
 metadata:
   annotations:
   labels:
     app.kubernetes.io/name: v1beta1.external.metrics.k8s.io
     helm.sh/chart: keda-2.11.0
     app.kubernetes.io/component: operator
     app.kubernetes.io/managed-by: Helm
@@ -27925,21 +28151,21 @@
       cls: remote
       url: http://storage:5002
     search:
       cls: remote
       url: http://search0.internal.staging.swh.network:5010
     scheduler:
       cls: remote
       url: http://scheduler.internal.staging.swh.network
     vault:
       cls: remote
-      url: http://vault.internal.staging.swh.network:5005/
+      url: http://vault.internal.staging.swh.network:80/
     indexer_storage:
       cls: remote
       url: http://storage1.internal.staging.swh.network:5007/
     counters_backend: swh-counters
     counters:
       cls: remote
       url: http://counters0.internal.staging.swh.network:5011/

     secret_key: ${DJANGO_SECRET_KEY}
     production_db:
@@ -44131,21 +44357,21 @@
       app: web
   strategy:
     type: RollingUpdate
     rollingUpdate:
       maxSurge: 1
   template:
     metadata:
       labels:
         app: web
       annotations:
-        checksum/config: 7c245afcc511931f2646b79af339f99774d660ac164cf9a0a32940c09ecc57b4
+        checksum/config: 11c217e4cc8445244dc4501b88f469abd6625529de43a447781dcc82f416e2f6
     spec:
       affinity:
         nodeAffinity:
           requiredDuringSchedulingIgnoredDuringExecution:
             nodeSelectorTerms:
             - matchExpressions:
               - key: swh/web
                 operator: In
                 values:
                 - "true"
@@ -50007,21 +50233,21 @@
       cls: remote
       url: http://storage:5002
     search:
       cls: remote
       url: http://search0.internal.staging.swh.network:5010
     scheduler:
       cls: remote
       url: http://scheduler.internal.staging.swh.network
     vault:
       cls: remote
-      url: http://vault.internal.staging.swh.network:5005/
+      url: http://vault.internal.staging.swh.network:80/
     indexer_storage:
       cls: remote
       url: http://storage1.internal.staging.swh.network:5007/
     counters_backend: swh-counters
     counters:
       cls: remote
       url: http://counters0.internal.staging.swh.network:5011/

     secret_key: ${DJANGO_SECRET_KEY}
     production_db:
@@ -66213,21 +66439,21 @@
       app: web
   strategy:
     type: RollingUpdate
     rollingUpdate:
       maxSurge: 1
   template:
     metadata:
       labels:
         app: web
       annotations:
-        checksum/config: a25a590b55dd983296d96090b518a463dcac5e24c9485e2b142853735e3bee73
+        checksum/config: 09b04557bb85ac38d35e479d6de76ffaf3633299a4d8caf8319321f19084661b
     spec:
       affinity:
         nodeAffinity:
           requiredDuringSchedulingIgnoredDuringExecution:
             nodeSelectorTerms:
             - matchExpressions:
               - key: swh/web
                 operator: In
                 values:
                 - "true"


------------- diff for environment production -------------

No differences

Refs. swh/infra/sysadm-environment#4780 (closed)

Edited by Antoine R. Dumont

Merge request reports