Skip to content

staging/search: Migrate search journal client to elastic infra

Antoine R. Dumont requested to merge migrate-search-journal-client into staging

They got missed during the inventory.

swh-helm-diff
[swh] Comparing changes between branches production and migrate-search-journal-client (per environment)...
Switched to branch 'production'
Your branch is behind 'origin/production' by 1 commit, and can be fast-forwarded.
  (use "git pull" to update your local branch)
[swh] Generate config in production branch for environment staging, namespace swh...
[swh] Generate config in production branch for environment staging, namespace swh-cassandra...
[swh] Generate config in production branch for environment staging, namespace swh-cassandra-next-version...
Switched to branch 'migrate-search-journal-client'
[swh] Generate config in migrate-search-journal-client branch for environment staging...
[swh] Generate config in migrate-search-journal-client branch for environment staging...
[swh] Generate config in migrate-search-journal-client branch for environment staging...
Switched to branch 'production'
Your branch is behind 'origin/production' by 1 commit, and can be fast-forwarded.
  (use "git pull" to update your local branch)
[swh] Generate config in production branch for environment production, namespace swh...
[swh] Generate config in production branch for environment production, namespace swh-cassandra...
[swh] Generate config in production branch for environment production, namespace swh-cassandra-next-version...
Switched to branch 'migrate-search-journal-client'
[swh] Generate config in migrate-search-journal-client branch for environment production...
[swh] Generate config in migrate-search-journal-client branch for environment production...
[swh] Generate config in migrate-search-journal-client branch for environment production...


------------- diff for environment staging namespace swh -------------

--- /tmp/swh-chart.swh.WFYcaO4u/staging-swh.before      2023-10-26 16:52:24.497116582 +0200
+++ /tmp/swh-chart.swh.WFYcaO4u/staging-swh.after       2023-10-26 16:52:25.201115826 +0200
@@ -2970,21 +2970,20 @@
   name: scheduler-journal-client-configuration-template
   namespace: swh
 data:
   config.yml.template: |
     scheduler:
       cls: remote
       url: http://scheduler.internal.staging.swh.network
     celery:
       task_broker: amqp://swhproducer:${AMQP_PASSWORD}@scheduler0.internal.staging.swh.network:5672/%2f
     journal:
-      cls: kafka
       brokers:
         - journal1.internal.staging.swh.network
         - journal2.internal.staging.swh.network
       group_id: swh.scheduler.journal_client
 ---
 # Source: swh/templates/scheduler/recurrent-configmap.yaml
 apiVersion: v1
 kind: ConfigMap
 metadata:
   namespace: swh
@@ -3026,20 +3025,68 @@
 kind: ConfigMap
 metadata:
   name: scheduler-update-metrics-configuration-template
   namespace: swh
 data:
   config.yml.template: |
     scheduler:
       cls: remote
       url: http://scheduler.internal.staging.swh.network
 ---
+# Source: swh/templates/search/journal-client-configmap.yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: search-journal-client-indexed-configuration-template
+  namespace: swh
+data:
+  config.yml.template: |
+    search:
+      cls: remote
+      url: http://search-rpc-ingress
+    storage:
+      cls: remote
+      url: http://storage1.internal.staging.swh.network:5002
+    journal:
+      brokers:
+        - journal1.internal.staging.swh.network
+        - journal2.internal.staging.swh.network
+      group_id: swh.search.journal_client.indexed-v0.11
+      object_types:
+      - origin_intrinsic_metadata
+      - origin_extrinsic_metadata
+      prefix: swh.journal.indexed
+---
+# Source: swh/templates/search/journal-client-configmap.yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: search-journal-client-objects-configuration-template
+  namespace: swh
+data:
+  config.yml.template: |
+    search:
+      cls: remote
+      url: http://search-rpc-ingress
+    storage:
+      cls: remote
+      url: http://storage1.internal.staging.swh.network:5002
+    journal:
+      brokers:
+        - journal1.internal.staging.swh.network
+        - journal2.internal.staging.swh.network
+      group_id: swh.search.journal_client-v0.11
+      object_types:
+      - origin
+      - origin_visit_status
+      prefix: swh.journal.objects
+---
 # Source: swh/templates/search/rpc-configmap.yaml
 apiVersion: v1
 kind: ConfigMap
 metadata:
   namespace: swh
   name: search-rpc-configuration-template
 data:
   config.yml.template: |
     search:

@@ -3780,25 +3827,24 @@
 data:
   config.yml.template: |

     scrubber:
       cls: postgresql
       db: host=db1.internal.staging.swh.network port=5432 user=swh-scrubber dbname=swh-scrubber password=${SCRUBBER_POSTGRESQL_PASSWORD}
     storage:
       cls: postgresql
       db: host=db1.internal.staging.swh.network port=5432 user=swh dbname=swh password=${POSTGRESQL_PASSWORD}
     journal:
-      cls: kafka
       brokers:
         - journal1.internal.staging.swh.network
         - journal2.internal.staging.swh.network
-      group_id: swh.scheduler.journal_client
+      group_id: changeme
 ---
 # Source: swh/templates/toolbox/configmap.yaml
 apiVersion: v1
 kind: ConfigMap
 metadata:
   name: toolbox-storage-template
   namespace: swh
 data:
   config.yml.template: |

@@ -19671,21 +19717,21 @@
       app: scheduler-journal-client
   strategy:
     type: RollingUpdate
     rollingUpdate:
       maxSurge: 1
   template:
     metadata:
       labels:
         app: scheduler-journal-client
       annotations:
-        checksum/config: cab65250c5d1b0a92c0b8380e1d8913e7e655b21a28a0718af3270f6136d5e72
+        checksum/config: efa36577497adfaee71777769fb7b12fd677b8e9a32af70aa1cbfa3a3efef42f
     spec:
       affinity:
         nodeAffinity:
           requiredDuringSchedulingIgnoredDuringExecution:
             nodeSelectorTerms:
             - matchExpressions:
               - key: swh/scheduler
                 operator: In
                 values:
                 - "true"
@@ -19999,20 +20045,208 @@
       volumes:
       - name: configuration
         emptyDir: {}
       - name: configuration-template
         configMap:
           name: scheduler-rpc-configuration-template
           items:
           - key: "config.yml.template"
             path: "config.yml.template"
 ---
+# Source: swh/templates/search/journal-client-deployment.yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  namespace: swh
+  name: search-journal-client-indexed
+  labels:
+    app: search-journal-client-indexed
+spec:
+  revisionHistoryLimit: 2
+  selector:
+    matchLabels:
+      app: search-journal-client-indexed
+  strategy:
+    type: RollingUpdate
+    rollingUpdate:
+      maxSurge: 1
+  template:
+    metadata:
+      labels:
+        app: search-journal-client-indexed
+      annotations:
+        checksum/config: 57e1233536eb50aecbb7352bbac845092fd4f7acad31380d31a8086e5aa968e6
+    spec:
+      priorityClassName: swh-normal-workload
+
+      initContainers:
+        - name: prepare-configuration
+          image: debian:bullseye
+          imagePullPolicy: IfNotPresent
+          command:
+          - /bin/bash
+          args:
+          - -c
+          - eval echo "\"$(</etc/swh/configuration-template/config.yml.template)\"" > /etc/swh/config.yml
+          volumeMounts:
+          - name: configuration
+            mountPath: /etc/swh
+          - name: configuration-template
+            mountPath: /etc/swh/configuration-template
+      containers:
+        - name: search-journal-client-indexed
+          resources:
+            requests:
+              memory: 512Mi
+              cpu: 500m
+          image: container-registry.softwareheritage.org/swh/infra/swh-apps/search:20231026.1
+          command:
+          - /opt/swh/entrypoint.sh
+          args:
+          - swh
+          - --log-level
+          - INFO
+          - search
+          - --config-file
+          - /etc/swh/config.yml
+          - journal-client
+          - objects
+          env:
+            - name: STATSD_HOST
+              value: prometheus-statsd-exporter
+            - name: STATSD_PORT
+              value: "9125"
+            - name: SWH_CONFIG_FILENAME
+              value: /etc/swh/config.yml
+            - name: LOG_LEVEL
+              value: INFO
+            - name: SWH_SENTRY_ENVIRONMENT
+              value: staging
+            - name: SWH_MAIN_PACKAGE
+              value: swh.search
+            - name: SWH_SENTRY_DSN
+              valueFrom:
+                secretKeyRef:
+                  name: swh-search-sentry-secret
+                  key: sentry-dsn
+                  # if the setting doesn't exist, sentry issue pushes will be disabled
+                  optional: false
+            - name: SWH_SENTRY_DISABLE_LOGGING_EVENTS
+              value: "true"
+          imagePullPolicy: IfNotPresent
+          volumeMounts:
+          - name: configuration
+            mountPath: /etc/swh
+      volumes:
+      - name: configuration
+        emptyDir: {}
+      - name: configuration-template
+        configMap:
+          name: search-journal-client-indexed-configuration-template
+          items:
+          - key: "config.yml.template"
+            path: "config.yml.template"
+---
+# Source: swh/templates/search/journal-client-deployment.yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  namespace: swh
+  name: search-journal-client-objects
+  labels:
+    app: search-journal-client-objects
+spec:
+  revisionHistoryLimit: 2
+  selector:
+    matchLabels:
+      app: search-journal-client-objects
+  strategy:
+    type: RollingUpdate
+    rollingUpdate:
+      maxSurge: 1
+  template:
+    metadata:
+      labels:
+        app: search-journal-client-objects
+      annotations:
+        checksum/config: 69c39885eae3d61b9f2700879ec1b1333d9f2a4be92ca8e22402a384bbd2cef5
+    spec:
+      priorityClassName: swh-normal-workload
+
+      initContainers:
+        - name: prepare-configuration
+          image: debian:bullseye
+          imagePullPolicy: IfNotPresent
+          command:
+          - /bin/bash
+          args:
+          - -c
+          - eval echo "\"$(</etc/swh/configuration-template/config.yml.template)\"" > /etc/swh/config.yml
+          volumeMounts:
+          - name: configuration
+            mountPath: /etc/swh
+          - name: configuration-template
+            mountPath: /etc/swh/configuration-template
+      containers:
+        - name: search-journal-client-objects
+          resources:
+            requests:
+              memory: 512Mi
+              cpu: 500m
+          image: container-registry.softwareheritage.org/swh/infra/swh-apps/search:20231026.1
+          command:
+          - /opt/swh/entrypoint.sh
+          args:
+          - swh
+          - --log-level
+          - INFO
+          - search
+          - --config-file
+          - /etc/swh/config.yml
+          - journal-client
+          - objects
+          env:
+            - name: STATSD_HOST
+              value: prometheus-statsd-exporter
+            - name: STATSD_PORT
+              value: "9125"
+            - name: SWH_CONFIG_FILENAME
+              value: /etc/swh/config.yml
+            - name: LOG_LEVEL
+              value: INFO
+            - name: SWH_SENTRY_ENVIRONMENT
+              value: staging
+            - name: SWH_MAIN_PACKAGE
+              value: swh.search
+            - name: SWH_SENTRY_DSN
+              valueFrom:
+                secretKeyRef:
+                  name: swh-search-sentry-secret
+                  key: sentry-dsn
+                  # if the setting doesn't exist, sentry issue pushes will be disabled
+                  optional: false
+            - name: SWH_SENTRY_DISABLE_LOGGING_EVENTS
+              value: "true"
+          imagePullPolicy: IfNotPresent
+          volumeMounts:
+          - name: configuration
+            mountPath: /etc/swh
+      volumes:
+      - name: configuration
+        emptyDir: {}
+      - name: configuration-template
+        configMap:
+          name: search-journal-client-objects-configuration-template
+          items:
+          - key: "config.yml.template"
+            path: "config.yml.template"
+---
 # Source: swh/templates/search/rpc-deployment.yaml
 apiVersion: apps/v1
 kind: Deployment
 metadata:
   namespace: swh
   name: search-rpc
   labels:
     app: search-rpc
 spec:
   revisionHistoryLimit: 2
@@ -21665,21 +21899,21 @@
   strategy:
     type: RollingUpdate
     rollingUpdate:
       maxSurge: 1
   template:
     metadata:
       labels:
         app: swh-toolbox
       annotations:
         # Force a rollout upgrade if the configuration changes
-        checksum/config: 7e75f30ebf38e5a2889256fd0cdc3f4a89e09dbbfab7853747f3fd154b099322
+        checksum/config: f4e75f1b506f491d70472bfae3a7fa9150bcb6b96e1a275c2fb283b9507a9629
         checksum/configScript: 6ea1b2f2870da707a7699ac13460639d19dda6c6c89cf114c75286dede149823
     spec:
       priorityClassName: swh-tools

       initContainers:
         - name: prepare-configuration-indexer-storage
           image: debian:bullseye
           imagePullPolicy: IfNotPresent
           command:
           - /bin/bash


------------- diff for environment staging namespace swh-cassandra -------------

No differences


------------- diff for environment staging namespace swh-cassandra-next-version -------------

No differences


------------- diff for environment production namespace swh -------------

--- /tmp/swh-chart.swh.WFYcaO4u/production-swh.before   2023-10-26 16:52:25.877115100 +0200
+++ /tmp/swh-chart.swh.WFYcaO4u/production-swh.after    2023-10-26 16:52:26.269114679 +0200
@@ -4987,21 +4987,20 @@
   name: scheduler-journal-client-configuration-template
   namespace: swh
 data:
   config.yml.template: |
     scheduler:
       cls: remote
       url: http://scheduler.internal.softwareheritage.org
     celery:
       task_broker: amqp://swhproducer:${AMQP_PASSWORD}@rabbitmq.internal.softwareheritage.org:5672/%2f
     journal:
-      cls: kafka
       brokers:
         - kafka1.internal.softwareheritage.org
         - kafka2.internal.softwareheritage.org
         - kafka3.internal.softwareheritage.org
         - kafka4.internal.softwareheritage.org
       group_id: swh.scheduler.journal_client
 ---
 # Source: swh/templates/scheduler/recurrent-configmap.yaml
 apiVersion: v1
 kind: ConfigMap
@@ -5153,27 +5152,26 @@
 data:
   config.yml.template: |

     scrubber:
       cls: postgresql
       db: host=db.internal.softwareheritage.org port=5432 user=swh-scrubber dbname=swh-scrubber password=${SCRUBBER_POSTGRESQL_PASSWORD}
     storage:
       cls: postgresql
       db: host=db.internal.softwareheritage.org port=5432 user=swhstorage dbname=softwareheritage password=${POSTGRESQL_PASSWORD}
     journal:
-      cls: kafka
       brokers:
         - kafka1.internal.softwareheritage.org
         - kafka2.internal.softwareheritage.org
         - kafka3.internal.softwareheritage.org
         - kafka4.internal.softwareheritage.org
-      group_id: swh.scheduler.journal_client
+      group_id: changeme
 ---
 # Source: swh/templates/toolbox/configmap.yaml
 apiVersion: v1
 kind: ConfigMap
 metadata:
   name: toolbox-storage-template
   namespace: swh
 data:
   config.yml.template: |

@@ -23835,21 +23833,21 @@
       app: scheduler-journal-client
   strategy:
     type: RollingUpdate
     rollingUpdate:
       maxSurge: 1
   template:
     metadata:
       labels:
         app: scheduler-journal-client
       annotations:
-        checksum/config: dd654fd74f168987ce84a19328031dab0ba2266f958b269c8271d312771f3239
+        checksum/config: af1f0ba8454e5a663ab3c3e915f9198dd52f403e9550887fa81e97bc1a2cefe9
     spec:
       affinity:
         nodeAffinity:
           requiredDuringSchedulingIgnoredDuringExecution:
             nodeSelectorTerms:
             - matchExpressions:
               - key: swh/scheduler
                 operator: In
                 values:
                 - "true"
@@ -24220,21 +24218,21 @@
   strategy:
     type: RollingUpdate
     rollingUpdate:
       maxSurge: 1
   template:
     metadata:
       labels:
         app: swh-toolbox
       annotations:
         # Force a rollout upgrade if the configuration changes
-        checksum/config: 1c03cc66af5f38f0f57323396602918d5a94531b8a3b51f08262aee2f2d2dead
+        checksum/config: d2d405cacbacf84365ecfc2c01986becd13c2daaa2ea0c02c424ccfd5db0bdbf
         checksum/configScript: 3303cfe671f82f98541336ae9e53a936f3dff2016116784815730206e6114797
     spec:
       priorityClassName: swh-tools

       initContainers:
         - name: prepare-configuration-indexer-storage
           image: debian:bullseye
           imagePullPolicy: IfNotPresent
           command:
           - /bin/bash


------------- diff for environment production namespace swh-cassandra -------------

No differences

Refs. swh/infra/sysadm-environment#4780 (closed)

Edited by Antoine R. Dumont

Merge request reports