Skip to content

indexer config improvements

Nicolas Dandrimont requested to merge mr/indexer-config-improvements into staging

This refactors the indexer configuration to match recent improvements in templates:

  • use a helper template for the configmap (to allow having a per-instance config hash)
  • use the swh_utils initContainer for config generation
  • use the journalConfigurationRef + journalConfigurationOverrides pattern for journal client config
  • move autoscaling config to the common helper template
helm-diff output
------------- diff for environment staging namespace swh -------------

--- /tmp/swh-chart.swh.Vrczf0mE/staging-swh.before	2024-01-16 12:19:39.592116003 +0100
+++ /tmp/swh-chart.swh.Vrczf0mE/staging-swh.after	2024-01-16 12:19:39.996115392 +0100
@@ -1,33 +1,31 @@
 ---
-# Source: swh/templates/indexers/keda-secrets.yaml
+# Source: swh/templates/indexers/keda-autoscaling.yaml
 apiVersion: v1
 kind: Secret
 metadata:
-  name: keda-indexers-kafka-secrets-extrinsic
+  name: keda-indexers-extrinsic-secrets
   namespace: swh
 type: Opaque
 stringData:
   sasl: "scram_sha512"
-  username: swh-archive-stg
   tls: "enable"
 ---
-# Source: swh/templates/indexers/keda-secrets.yaml
+# Source: swh/templates/indexers/keda-autoscaling.yaml
 apiVersion: v1
 kind: Secret
 metadata:
-  name: keda-indexers-kafka-secrets-origin-intrinsic
+  name: keda-indexers-origin-intrinsic-secrets
   namespace: swh
 type: Opaque
 stringData:
   sasl: "scram_sha512"
-  username: swh-archive-stg
   tls: "enable"
 ---
 # Source: swh/templates/storage-replayer/keda-secrets.yaml
 apiVersion: v1
 kind: Secret
 metadata:
   name: keda-storage-replayer-kafka-secrets
   namespace: swh
 type: Opaque
 stringData:
@@ -223,125 +221,93 @@
     if [ ! -e $WITNESS_FILE ]; then
       touch $WITNESS_FILE
       # journal clients expect a SIGINT, not a SIGTERM
       kill -INT 1
     fi
 ---
 # Source: swh/templates/indexers/configmap.yaml
 apiVersion: v1
 kind: ConfigMap
 metadata:
-  name: indexer-extrinsic-template
+  name: indexer-extrinsic-configuration-template
   namespace: swh
 data:
   config.yml.template: |
     storage:
       cls: pipeline
       steps:
       - cls: retry
       - cls: remote
         url: http://storage-postgresql-read-only-rpc-ingress
     scheduler:
       cls: remote
       url: http://scheduler.internal.staging.swh.network
     indexer_storage:
       cls: remote
       url: http://indexer-storage-rpc-ingress
     objstorage:
       cls: remote
       url: http://objstorage-read-only-rpc-ingress
     journal:
-      brokers: 
+      brokers:
         - journal2.internal.staging.swh.network:9094
-      
+      batch_size: 200
+      cls: kafka
       group_id: swh-archive-stg-swh.indexer.journal_client.extrinsic_metadata
       prefix: swh.journal.objects
-      
-      batch_size: 200
-      
       sasl.mechanism: SCRAM-SHA-512
-      security.protocol: SASL_SSL
-      sasl.username: swh-archive-stg
       sasl.password: ${BROKER_USER_PASSWORD}
-      
+      sasl.username: ${BROKER_USER}
+      security.protocol: SASL_SSL
     tools: 
       configuration: {}
       name: swh-metadata-detector
       version: 0.0.2
-
-  init-container-entrypoint.sh: |
-    #!/bin/bash
-
-    set -e
-
-    CONFIG_FILE=/etc/swh/config.yml
-
-    # substitute environment variables when creating the default config.yml
-    eval echo \""$(</etc/swh/configuration-template/config.yml.template)"\" \
-      > $CONFIG_FILE
-
-    exit 0
 ---
 # Source: swh/templates/indexers/configmap.yaml
 apiVersion: v1
 kind: ConfigMap
 metadata:
-  name: indexer-origin-intrinsic-template
+  name: indexer-origin-intrinsic-configuration-template
   namespace: swh
 data:
   config.yml.template: |
     storage:
       cls: pipeline
       steps:
       - cls: retry
       - cls: remote
         url: http://storage-postgresql-read-only-rpc-ingress
     scheduler:
       cls: remote
       url: http://scheduler.internal.staging.swh.network
     indexer_storage:
       cls: remote
       url: http://indexer-storage-rpc-ingress
     objstorage:
       cls: remote
       url: http://objstorage-read-only-rpc-ingress
     journal:
-      brokers: 
+      brokers:
         - journal2.internal.staging.swh.network:9094
-      
+      batch_size: 200
+      cls: kafka
       group_id: swh-archive-stg-swh.indexer.journal_client.origin_intrinsic_metadata
       prefix: swh.journal.objects
-      
-      batch_size: 200
-      
       sasl.mechanism: SCRAM-SHA-512
-      security.protocol: SASL_SSL
-      sasl.username: swh-archive-stg
       sasl.password: ${BROKER_USER_PASSWORD}
-      
+      sasl.username: ${BROKER_USER}
+      security.protocol: SASL_SSL
     tools: 
       configuration: {}
       name: swh-metadata-detector
       version: 0.0.2
-
-  init-container-entrypoint.sh: |
-    #!/bin/bash
-
-    set -e
-
-    CONFIG_FILE=/etc/swh/config.yml
-
-    # substitute environment variables when creating the default config.yml
-    eval echo \""$(</etc/swh/configuration-template/config.yml.template)"\" \
-      > $CONFIG_FILE
-
-    exit 0
 ---
 # Source: swh/templates/listers/configmap.yaml
 apiVersion: v1
 kind: ConfigMap
 metadata:
   name: lister-utils
   namespace: swh
 data:
   pre-stop-idempotent.sh: |
     #!/bin/bash
@@ -5284,59 +5250,66 @@
   strategy:
     type: RollingUpdate
     rollingUpdate:
       maxSurge: 1
   template:
     metadata:
       labels:
         app: indexer-extrinsic
       annotations:
         # Force a rollout upgrade if the configuration changes
-        checksum/config: aa380ec7d9e8b614159529e34e895a5e839c9b3c3b31c3852bcbc19f1602dca6
+        checksum/config: f1eb1ea4c3682edbf29c9b33f79e24327ffed49466d094a62400c38cb3f92bfd
+        checksum/config-utils: 0cce256531366ac8ea2a0bde2a8f10937284a0f599f45fabe6babbbc096e179d
     spec:
       affinity:
         
         nodeAffinity:
           requiredDuringSchedulingIgnoredDuringExecution:
             nodeSelectorTerms:
             - matchExpressions:
               - key: swh/indexer
                 operator: In
                 values:
                 - "true"
       priorityClassName: swh-frontend-rpc
       
       terminationGracePeriodSeconds: 3600
       initContainers:
         - name: prepare-configuration
-          image: debian:bullseye
+          image: container-registry.softwareheritage.org/swh/infra/swh-apps/utils:20231211.1
           imagePullPolicy: IfNotPresent
-          
+          command:
+          - /entrypoints/prepare-configuration.sh
           env:
+            
+          
+          - name: BROKER_USER
+            valueFrom:
+              secretKeyRef:
+                name: swh-archive-broker-secret
+                key: BROKER_USER
+                # 'name' secret must exist & include that ^ key
+                optional: false
           - name: BROKER_USER_PASSWORD
             valueFrom:
               secretKeyRef:
                 name: swh-archive-broker-secret
                 key: BROKER_USER_PASSWORD
+                # 'name' secret must exist & include that ^ key
                 optional: false
-          
-          command:
-            - /entrypoint.sh
           volumeMounts:
-          - name: configuration-template
-            mountPath: /entrypoint.sh
-            subPath: "init-container-entrypoint.sh"
-            readOnly: true
           - name: configuration
             mountPath: /etc/swh
           - name: configuration-template
             mountPath: /etc/swh/configuration-template
+          - name: config-utils
+            mountPath: /entrypoints
       containers:
       - name: indexers
         image: container-registry.softwareheritage.org/swh/infra/swh-apps/indexer:20240111.1
         imagePullPolicy: IfNotPresent
         command:
           - /opt/swh/entrypoint.sh
         resources:
           requests:
             memory: 100Mi
             cpu: 50m
@@ -5371,43 +5344,45 @@
         volumeMounts:
           - name: indexer-utils
             mountPath: /pre-stop.sh
             subPath: "pre-stop.sh"
           - name: configuration
             mountPath: /etc/swh
           - name: localstorage
             mountPath: /tmp
       volumes:
       - name: localstorage
+        
         ephemeral:
           volumeClaimTemplate:
             metadata:
               labels:
                 type: ephemeral-volume
             spec:
               accessModes:
               - ReadWriteOnce
               resources:
                 requests:
                   storage: 100Gi
               storageClassName: local-path
       - name: configuration
         emptyDir: {}
       - name: configuration-template
         configMap:
-          name: indexer-extrinsic-template
-          defaultMode: 0777
+          name: indexer-extrinsic-configuration-template
           items:
           - key: "config.yml.template"
             path: "config.yml.template"
-          - key: "init-container-entrypoint.sh"
-            path: "init-container-entrypoint.sh"
+      - name: config-utils
+        configMap:
+          name: config-utils
+          defaultMode: 0555
       - name: indexer-utils
         configMap:
           name: indexer-utils
           defaultMode: 0777
           items:
           - key: "pre-stop-idempotent.sh"
             path: "pre-stop.sh"
 ---
 # Source: swh/templates/indexers/deployment.yaml
 apiVersion: apps/v1
@@ -5425,59 +5400,66 @@
   strategy:
     type: RollingUpdate
     rollingUpdate:
       maxSurge: 1
   template:
     metadata:
       labels:
         app: indexer-origin-intrinsic
       annotations:
         # Force a rollout upgrade if the configuration changes
-        checksum/config: aa380ec7d9e8b614159529e34e895a5e839c9b3c3b31c3852bcbc19f1602dca6
+        checksum/config: fc5370d3e5b63b9d1c9ac657aa1c781d17834e46da0efc0ecb190280451fc110
+        checksum/config-utils: 0cce256531366ac8ea2a0bde2a8f10937284a0f599f45fabe6babbbc096e179d
     spec:
       affinity:
         
         nodeAffinity:
           requiredDuringSchedulingIgnoredDuringExecution:
             nodeSelectorTerms:
             - matchExpressions:
               - key: swh/indexer
                 operator: In
                 values:
                 - "true"
       priorityClassName: swh-frontend-rpc
       
       terminationGracePeriodSeconds: 3600
       initContainers:
         - name: prepare-configuration
-          image: debian:bullseye
+          image: container-registry.softwareheritage.org/swh/infra/swh-apps/utils:20231211.1
           imagePullPolicy: IfNotPresent
-          
+          command:
+          - /entrypoints/prepare-configuration.sh
           env:
+            
+          
+          - name: BROKER_USER
+            valueFrom:
+              secretKeyRef:
+                name: swh-archive-broker-secret
+                key: BROKER_USER
+                # 'name' secret must exist & include that ^ key
+                optional: false
           - name: BROKER_USER_PASSWORD
             valueFrom:
               secretKeyRef:
                 name: swh-archive-broker-secret
                 key: BROKER_USER_PASSWORD
+                # 'name' secret must exist & include that ^ key
                 optional: false
-          
-          command:
-            - /entrypoint.sh
           volumeMounts:
-          - name: configuration-template
-            mountPath: /entrypoint.sh
-            subPath: "init-container-entrypoint.sh"
-            readOnly: true
           - name: configuration
             mountPath: /etc/swh
           - name: configuration-template
             mountPath: /etc/swh/configuration-template
+          - name: config-utils
+            mountPath: /entrypoints
       containers:
       - name: indexers
         image: container-registry.softwareheritage.org/swh/infra/swh-apps/indexer:20240111.1
         imagePullPolicy: IfNotPresent
         command:
           - /opt/swh/entrypoint.sh
         resources:
           requests:
             memory: 100Mi
             cpu: 50m
@@ -5512,43 +5494,45 @@
         volumeMounts:
           - name: indexer-utils
             mountPath: /pre-stop.sh
             subPath: "pre-stop.sh"
           - name: configuration
             mountPath: /etc/swh
           - name: localstorage
             mountPath: /tmp
       volumes:
       - name: localstorage
+        
         ephemeral:
           volumeClaimTemplate:
             metadata:
               labels:
                 type: ephemeral-volume
             spec:
               accessModes:
               - ReadWriteOnce
               resources:
                 requests:
                   storage: 100Gi
               storageClassName: local-path
       - name: configuration
         emptyDir: {}
       - name: configuration-template
         configMap:
-          name: indexer-origin-intrinsic-template
-          defaultMode: 0777
+          name: indexer-origin-intrinsic-configuration-template
           items:
           - key: "config.yml.template"
             path: "config.yml.template"
-          - key: "init-container-entrypoint.sh"
-            path: "init-container-entrypoint.sh"
+      - name: config-utils
+        configMap:
+          name: config-utils
+          defaultMode: 0555
       - name: indexer-utils
         configMap:
           name: indexer-utils
           defaultMode: 0777
           items:
           - key: "pre-stop-idempotent.sh"
             path: "pre-stop.sh"
 ---
 # Source: swh/templates/listers/deployment.yaml
 apiVersion: apps/v1
@@ -15544,59 +15528,80 @@
       vhostName: /                   # Optional. If not specified, use the vhost in the
                                      # `host` connection string. Alternatively, you can
                                      # use existing environment variables to read
                                      # configuration from: See details in "Parameter
                                      # list" section hostFromEnv: RABBITMQ_HOST%
 ---
 # Source: swh/templates/indexers/keda-autoscaling.yaml
 apiVersion: keda.sh/v1alpha1
 kind: ScaledObject
 metadata:
-  name: indexer-extrinsic-scaledobject
+  name: indexers-extrinsic-scaledobject
   namespace: swh
 spec:
   scaleTargetRef:
-    name: indexer-extrinsic
+    name: indexers-extrinsic
   pollingInterval: 120
   minReplicaCount: 1
   maxReplicaCount: 6
+  idleReplicaCount: 0
   triggers:
   - type: kafka
     metadata:
       bootstrapServers: journal2.internal.staging.swh.network:9094
       consumerGroup: swh-archive-stg-swh.indexer.journal_client.extrinsic_metadata
       lagThreshold: "1000"
       offsetResetPolicy: earliest
     authenticationRef:
-      name: keda-indexers-trigger-authentication-extrinsic
+      name: keda-indexers-extrinsic-authentication
 ---
 # Source: swh/templates/indexers/keda-autoscaling.yaml
 apiVersion: keda.sh/v1alpha1
+kind: TriggerAuthentication
+metadata:
+  name: keda-indexers-extrinsic-authentication
+  namespace: swh
+spec:
+  secretTargetRef:
+  - parameter: username
+    name: swh-archive-broker-secret
+    key: BROKER_USER
+  - parameter: password
+    name: swh-archive-broker-secret
+    key: BROKER_USER_PASSWORD
+  - parameter: sasl
+    name: keda-indexers-extrinsic-secrets
+    key: sasl
+  - parameter: tls
+    name: keda-indexers-extrinsic-secrets
+    key: tls---
+apiVersion: keda.sh/v1alpha1
 kind: ScaledObject
 metadata:
-  name: indexer-origin-intrinsic-scaledobject
+  name: indexers-origin-intrinsic-scaledobject
   namespace: swh
 spec:
   scaleTargetRef:
-    name: indexer-origin-intrinsic
+    name: indexers-origin-intrinsic
   pollingInterval: 120
   minReplicaCount: 1
   maxReplicaCount: 6
+  idleReplicaCount: 0
   triggers:
   - type: kafka
     metadata:
       bootstrapServers: journal2.internal.staging.swh.network:9094
       consumerGroup: swh-archive-stg-swh.indexer.journal_client.origin_intrinsic_metadata
       lagThreshold: "1000"
       offsetResetPolicy: earliest
     authenticationRef:
-      name: keda-indexers-trigger-authentication-origin-intrinsic
+      name: keda-indexers-origin-intrinsic-authentication
 ---
 # Source: swh/templates/listers/keda-autoscaling.yaml
 apiVersion: keda.sh/v1alpha1
 kind: ScaledObject
 metadata:
   name: lister-arch-operators
   namespace: swh
 spec:
   scaleTargetRef:
     apiVersion:    apps/v1     # Optional. Default: apps/v1
@@ -17256,61 +17261,40 @@
 kind: TriggerAuthentication
 metadata:
   name: amqp-authentication-checker-deposit
   namespace: swh
 spec:
   secretTargetRef:
   - parameter: host            # "host" is required by the scalerObject trigger metadata
     name: common-secrets
     key: rabbitmq-http-host
 ---
-# Source: swh/templates/indexers/keda-secrets.yaml
+# Source: swh/templates/indexers/keda-autoscaling.yaml
 apiVersion: keda.sh/v1alpha1
 kind: TriggerAuthentication
 metadata:
-  name: keda-indexers-trigger-authentication-extrinsic
+  name: keda-indexers-origin-intrinsic-authentication
   namespace: swh
 spec:
   secretTargetRef:
-  - parameter: sasl
-    name: keda-indexers-kafka-secrets-extrinsic
-    key: sasl
   - parameter: username
-    name: keda-indexers-kafka-secrets-extrinsic
-    key: username
-  - parameter: tls
-    name: keda-indexers-kafka-secrets-extrinsic
-    key: tls
+    name: swh-archive-broker-secret
+    key: BROKER_USER
   - parameter: password
     name: swh-archive-broker-secret
     key: BROKER_USER_PASSWORD
----
-# Source: swh/templates/indexers/keda-secrets.yaml
-apiVersion: keda.sh/v1alpha1
-kind: TriggerAuthentication
-metadata:
-  name: keda-indexers-trigger-authentication-origin-intrinsic
-  namespace: swh
-spec:
-  secretTargetRef:
   - parameter: sasl
-    name: keda-indexers-kafka-secrets-origin-intrinsic
+    name: keda-indexers-origin-intrinsic-secrets
     key: sasl
-  - parameter: username
-    name: keda-indexers-kafka-secrets-origin-intrinsic
-    key: username
   - parameter: tls
-    name: keda-indexers-kafka-secrets-origin-intrinsic
+    name: keda-indexers-origin-intrinsic-secrets
     key: tls
-  - parameter: password
-    name: swh-archive-broker-secret
-    key: BROKER_USER_PASSWORD
 ---
 # Source: swh/templates/listers/keda-autoscaling.yaml
 apiVersion: keda.sh/v1alpha1
 kind: TriggerAuthentication
 metadata:
   name: amqp-authentication-lister-arch
   namespace: swh
 spec:
   secretTargetRef:
   - parameter: host            # "host" is required by the scalerObject trigger metadata
Edited by Antoine R. Dumont

Merge request reports

Loading