Skip to content

staging/search: Activate search in elastic infra

Antoine R. Dumont requested to merge migrate-search into staging
make swh-helm-diff
[swh] Comparing changes between branches production and migrate-search (per environment)...
Switched to branch 'production'
Your branch is up to date with 'origin/production'.
[swh] Generate config in production branch for environment staging, namespace swh...
[swh] Generate config in production branch for environment staging, namespace swh-cassandra...
[swh] Generate config in production branch for environment staging, namespace swh-cassandra-next-version...
Switched to branch 'migrate-search'
[swh] Generate config in migrate-search branch for environment staging...
[swh] Generate config in migrate-search branch for environment staging...
[swh] Generate config in migrate-search branch for environment staging...
Switched to branch 'production'
Your branch is up to date with 'origin/production'.
[swh] Generate config in production branch for environment production, namespace swh...
[swh] Generate config in production branch for environment production, namespace swh-cassandra...
[swh] Generate config in production branch for environment production, namespace swh-cassandra-next-version...
Switched to branch 'migrate-search'
[swh] Generate config in migrate-search branch for environment production...
[swh] Generate config in migrate-search branch for environment production...
[swh] Generate config in migrate-search branch for environment production...


------------- diff for environment staging namespace swh -------------

--- /tmp/swh-chart.swh.4VSsv23r/staging-swh.before      2023-10-25 17:19:09.586845524 +0200
+++ /tmp/swh-chart.swh.4VSsv23r/staging-swh.after       2023-10-25 17:19:10.274845026 +0200
@@ -263,21 +263,21 @@
   config.yml: |
     storage:
       cls: pipeline
       steps:
       - cls: retry
       - cls: remote
         url: http://storage1.internal.staging.swh.network:5002

     search:
       cls: remote
-      url: http://search0.internal.staging.swh.network:5010
+      url: http://search-rpc-ingress

     debug: true

     introspection: true

     max_raw_content_size: 10000

     max_query_cost:
       anonymous: 50
       user: 500
@@ -3003,20 +3003,41 @@
 kind: ConfigMap
 metadata:
   name: scheduler-update-metrics-configuration-template
   namespace: swh
 data:
   config.yml.template: |
     scheduler:
       cls: remote
       url: http://scheduler.internal.staging.swh.network
 ---
+# Source: swh/templates/search/rpc-configmap.yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  namespace: swh
+  name: search-rpc-configuration-template
+data:
+  config.yml.template: |
+    search:
+
+      cls: elasticsearch
+      indexes:
+        origin:
+          index: origin-v0.11
+          read_alias: origin-read
+          write_alias: origin-write
+      hosts:
+
+        - host: search-esnode0.internal.staging.swh.network
+          port: 9200
+---
 # Source: swh/templates/statsd-exporter/configmap.yaml
 apiVersion: v1
 kind: ConfigMap
 metadata:
   name: prometheus-statsd-exporter
   namespace: swh
 data:
   config.yml: |
     defaults:
       timer_type: histogram
@@ -4057,21 +4078,21 @@
   config.yml.template: |
     instance_name: webapp.staging.swh.network
     allowed_hosts:
       - webapp.staging.swh.network
       - webapp-postgresql.internal.staging.swh.network
     storage:
       cls: remote
       url: http://storage1.internal.staging.swh.network:5002
     search:
       cls: remote
-      url: http://search0.internal.staging.swh.network:5010
+      url: http://search-rpc-ingress
     scheduler:
       cls: remote
       url: http://scheduler.internal.staging.swh.network
     vault:
       cls: remote
       url: http://vault-rpc-ingress
     indexer_storage:
       cls: remote
       url: http://storage1.internal.staging.swh.network:5007/
     counters_backend: swh-counters
@@ -13710,20 +13731,30 @@
     port: 443
     protocol: TCP
     targetPort: 9443
   selector:
     app: keda-admission-webhooks
 ---
 # Source: swh/templates/external-services/cname.yaml
 apiVersion: v1
 kind: Service
 metadata:
+  name: search-rpc-ingress
+  namespace: swh
+spec:
+  type: ExternalName
+  externalName: archive-staging-rke2-ingress-nginx-controller.ingress-nginx.svc.cluster.local
+---
+# Source: swh/templates/external-services/cname.yaml
+apiVersion: v1
+kind: Service
+metadata:
   name: vault-rpc-ingress
   namespace: swh
 spec:
   type: ExternalName
   externalName: archive-staging-rke2-ingress-nginx-controller.ingress-nginx.svc.cluster.local
 ---
 # Source: swh/templates/graphql/service.yaml
 apiVersion: v1
 kind: Service
 metadata:
@@ -13767,20 +13798,35 @@
   namespace: swh
 spec:
   type: ClusterIP
   selector:
     app: scheduler-rpc
   ports:
     - port: 5008
       targetPort: 5008
       name: rpc
 ---
+# Source: swh/templates/search/rpc-service.yaml
+apiVersion: v1
+kind: Service
+metadata:
+  name: search-rpc
+  namespace: swh
+spec:
+  type: ClusterIP
+  selector:
+    app: search-rpc
+  ports:
+    - port: 5010
+      targetPort: 5010
+      name: rpc
+---
 # Source: swh/templates/statsd-exporter/service.yaml
 apiVersion: v1
 kind: Service
 metadata:
   name: prometheus-statsd-exporter
   namespace: swh
   labels:
     app: prometheus-statsd-exporter
 spec:
   type: ClusterIP
@@ -14317,21 +14363,21 @@
   strategy:
     type: RollingUpdate
     rollingUpdate:
       maxSurge: 1
   template:
     metadata:
       labels:
         app: graphql
       annotations:
         # Force a rollout upgrade if the configuration changes
-        checksum/config: 56fadf3e5e04b05f59eed1134799dcea2c077b69301c5373203f89b6f37f948b
+        checksum/config: f7dda209d6bb07512e26a523854bf434d895e9b3c2a07a92dd0db48d0b00eef6
     spec:
       priorityClassName: swh-frontend-rpc

       containers:
         - name: graphql
           image: container-registry.softwareheritage.org/swh/infra/swh-apps/graphql:20231025.1
           imagePullPolicy: IfNotPresent
           resources:
             requests:
               memory: 150Mi
@@ -19722,20 +19768,137 @@
       volumes:
       - name: configuration
         emptyDir: {}
       - name: configuration-template
         configMap:
           name: scheduler-rpc-configuration-template
           items:
           - key: "config.yml.template"
             path: "config.yml.template"
 ---
+# Source: swh/templates/search/rpc-deployment.yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  namespace: swh
+  name: search-rpc
+  labels:
+    app: search-rpc
+spec:
+  revisionHistoryLimit: 2
+  selector:
+    matchLabels:
+      app: search-rpc
+  strategy:
+    type: RollingUpdate
+    rollingUpdate:
+      maxSurge: 1
+  template:
+    metadata:
+      labels:
+        app: search-rpc
+      annotations:
+        checksum/config: 297c85b03623e02096ab74ad9bc630ddae22ff5783a51bf664b9dd854a418ce6
+    spec:
+      affinity:
+        nodeAffinity:
+          requiredDuringSchedulingIgnoredDuringExecution:
+            nodeSelectorTerms:
+            - matchExpressions:
+              - key: swh/rpc
+                operator: In
+                values:
+                - "true"
+      priorityClassName: swh-frontend-rpc
+
+      initContainers:
+        - name: prepare-configuration
+          image: debian:bullseye
+          imagePullPolicy: IfNotPresent
+          command:
+          - /bin/bash
+          args:
+          - -c
+          - eval echo "\"$(</etc/swh/configuration-template/config.yml.template)\"" > /etc/swh/config.yml
+          volumeMounts:
+          - name: configuration
+            mountPath: /etc/swh
+          - name: configuration-template
+            mountPath: /etc/swh/configuration-template
+      containers:
+        - name: search-rpc
+          resources:
+            requests:
+              memory: 512Mi
+              cpu: 500m
+          image: container-registry.softwareheritage.org/swh/infra/swh-apps/search:20231013.1
+          imagePullPolicy: IfNotPresent
+          ports:
+            - containerPort: 5005
+              name: rpc
+          readinessProbe:
+            httpGet:
+              path: /
+              port: rpc
+            initialDelaySeconds: 15
+            failureThreshold: 30
+            periodSeconds: 5
+          livenessProbe:
+            httpGet:
+              path: /
+              port: rpc
+            initialDelaySeconds: 10
+            periodSeconds: 5
+          command:
+            - /bin/bash
+          args:
+            - -c
+            - /opt/swh/entrypoint.sh
+          env:
+            - name: THREADS
+              value: "5"
+            - name: WORKERS
+              value: "4"
+            - name: TIMEOUT
+              value: "3600"
+            - name: STATSD_HOST
+              value: prometheus-statsd-exporter
+            - name: STATSD_PORT
+              value: "9125"
+            - name: LOG_LEVEL
+              value: INFO
+            - name: SWH_SENTRY_ENVIRONMENT
+              value: staging
+            - name: SWH_MAIN_PACKAGE
+              value: swh.search
+            - name: SWH_SENTRY_DSN
+              valueFrom:
+                secretKeyRef:
+                  name: swh-search-sentry-secret
+                  key: sentry-dsn
+                  # if the setting doesn't exist, sentry issue pushes will be disabled
+                  optional: false
+            - name: SWH_SENTRY_DISABLE_LOGGING_EVENTS
+              value: "true"
+          volumeMounts:
+          - name: configuration
+            mountPath: /etc/swh
+      volumes:
+      - name: configuration
+        emptyDir: {}
+      - name: configuration-template
+        configMap:
+          name: search-rpc-configuration-template
+          items:
+          - key: "config.yml.template"
+            path: "config.yml.template"
+---
 # Source: swh/templates/statsd-exporter/deployment.yaml
 apiVersion: apps/v1
 kind: Deployment
 metadata:
   name: prometheus-statsd-exporter
   namespace: swh
   labels:
     app: prometheus-statsd-exporter
 spec:
   replicas: 1
@@ -21485,21 +21648,21 @@
       app: web
   strategy:
     type: RollingUpdate
     rollingUpdate:
       maxSurge: 1
   template:
     metadata:
       labels:
         app: web
       annotations:
-        checksum/config: eff72d25b88f78efc4d7fd2bebc98cffceff5af247dd83ab2ea9dd151d563900
+        checksum/config: 611c25cd3ce9c62073631abfd236ad0509df1e66b98a0bf833a493f97c4abaed
     spec:
       affinity:
         nodeAffinity:
           requiredDuringSchedulingIgnoredDuringExecution:
             nodeSelectorTerms:
             - matchExpressions:
               - key: swh/web
                 operator: In
                 values:
                 - "true"
@@ -21708,20 +21871,43 @@
         emptyDir: {}
       - name: configuration-template
         configMap:
          name: web-configuration-template
          items:
          - key: "config.yml.template"
            path: "config.yml.template"
       - name: static
         emptyDir: {}
 ---
+# Source: swh/templates/search/rpc-autoscale.yaml
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  namespace: swh
+  name: search-rpc
+  labels:
+    app: search-rpc
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: search-rpc
+  minReplicas: 2
+  maxReplicas: 4
+  metrics:
+  - type: Resource
+    resource:
+      name: cpu
+      target:
+        type: Utilization
+        averageUtilization: 75
+---
 # Source: swh/templates/web/autoscaling.yaml
 apiVersion: autoscaling/v2
 kind: HorizontalPodAutoscaler
 metadata:
   namespace: swh
   name: web
   labels:
     app: web
 spec:
   scaleTargetRef:
@@ -22223,20 +22409,48 @@
     http:
       paths:
       - path: /scheduler_metrics/get
         pathType: Prefix
         backend:
           service:
             name: scheduler-rpc
             port:
               number: 5008
 ---
+# Source: swh/templates/search/rpc-ingress.yaml
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  namespace: swh
+  name: search-rpc-ingress-default
+  annotations:
+
+    nginx.ingress.kubernetes.io/proxy-body-size: 4G
+    nginx.ingress.kubernetes.io/proxy-connect-timeout: "90"
+    nginx.ingress.kubernetes.io/proxy-read-timeout: "3600"
+    nginx.ingress.kubernetes.io/proxy-request-buffering: "on"
+    nginx.ingress.kubernetes.io/proxy-send-timeout: "90"
+
+
+spec:
+  rules:
+  - host: search-rpc-ingress
+    http:
+      paths:
+      - path: /
+        pathType: Prefix
+        backend:
+          service:
+            name: search-rpc
+            port:
+              number: 5010
+---
 # Source: swh/templates/web/ingress.yaml
 apiVersion: networking.k8s.io/v1
 kind: Ingress
 metadata:
   namespace: swh
   name: web-ingress-authenticated
   annotations:

     cert-manager.io/cluster-issuer: letsencrypt-production-gandi
     kubernetes.io/ingress.class: nginx


------------- diff for environment staging namespace swh-cassandra -------------

--- /tmp/swh-chart.swh.4VSsv23r/staging-swh-cassandra.before    2023-10-25 17:19:09.798845371 +0200
+++ /tmp/swh-chart.swh.4VSsv23r/staging-swh-cassandra.after     2023-10-25 17:19:10.490844870 +0200
@@ -349,21 +349,21 @@
   namespace: swh-cassandra
 data:
   # TODO: rename to not have a dot in the name to allow testing
   config.yml: |
     storage:
       cls: remote
       url: http://storage:5002

     search:
       cls: remote
-      url: http://search0.internal.staging.swh.network:5010
+      url: http://search-rpc-ingress

     debug: true

     introspection: true

     max_raw_content_size: 10000

     max_query_cost:
       anonymous: 50
       user: 500
@@ -3885,21 +3885,21 @@
 data:
   config.yml.template: |
     instance_name: webapp-cassandra.internal.staging.swh.network
     allowed_hosts:
       - webapp-cassandra.internal.staging.swh.network
     storage:
       cls: remote
       url: http://storage:5002
     search:
       cls: remote
-      url: http://search0.internal.staging.swh.network:5010
+      url: http://search-rpc-ingress
     scheduler:
       cls: remote
       url: http://scheduler.internal.staging.swh.network
     vault:
       cls: remote
       url: http://vault-rpc-ingress
     indexer_storage:
       cls: remote
       url: http://storage1.internal.staging.swh.network:5007/
     counters_backend: swh-counters
@@ -13534,20 +13534,30 @@
     port: 443
     protocol: TCP
     targetPort: 9443
   selector:
     app: keda-admission-webhooks
 ---
 # Source: swh/templates/external-services/cname.yaml
 apiVersion: v1
 kind: Service
 metadata:
+  name: search-rpc-ingress
+  namespace: swh-cassandra
+spec:
+  type: ExternalName
+  externalName: archive-staging-rke2-ingress-nginx-controller.ingress-nginx.svc.cluster.local
+---
+# Source: swh/templates/external-services/cname.yaml
+apiVersion: v1
+kind: Service
+metadata:
   name: vault-rpc-ingress
   namespace: swh-cassandra
 spec:
   type: ExternalName
   externalName: archive-staging-rke2-ingress-nginx-controller.ingress-nginx.svc.cluster.local
 ---
 # Source: swh/templates/graphql/service.yaml
 apiVersion: v1
 kind: Service
 metadata:
@@ -14291,21 +14301,21 @@
   strategy:
     type: RollingUpdate
     rollingUpdate:
       maxSurge: 1
   template:
     metadata:
       labels:
         app: graphql
       annotations:
         # Force a rollout upgrade if the configuration changes
-        checksum/config: ed48f035f1d36e1d58a01239c5d99127e646be0c1acb440fcef7ceb84ae71c7d
+        checksum/config: 69cb90bb582e0f8187da4170bdff4dd975bc9656604a8d4004b553f9d7be13cd
     spec:
       priorityClassName: swh-cassandra-frontend-rpc

       containers:
         - name: graphql
           image: container-registry.softwareheritage.org/swh/infra/swh-apps/graphql:20231025.1
           imagePullPolicy: IfNotPresent
           resources:
             requests:
               memory: 150Mi
@@ -20540,21 +20550,21 @@
       app: web
   strategy:
     type: RollingUpdate
     rollingUpdate:
       maxSurge: 1
   template:
     metadata:
       labels:
         app: web
       annotations:
-        checksum/config: 09a3336045d642f75d9eb60ab89121c0ab2e8ca3fbe8e85c832b805fb789da09
+        checksum/config: a0671b8184225e41ab72a57b7d0b6079b93dc604c300f5ad1033f50bd9fd6fa6
     spec:
       affinity:
         nodeAffinity:
           requiredDuringSchedulingIgnoredDuringExecution:
             nodeSelectorTerms:
             - matchExpressions:
               - key: swh/web
                 operator: In
                 values:
                 - "true"


------------- diff for environment staging namespace swh-cassandra-next-version -------------

--- /tmp/swh-chart.swh.4VSsv23r/staging-swh-cassandra-next-version.before       2023-10-25 17:19:10.038845198 +0200
+++ /tmp/swh-chart.swh.4VSsv23r/staging-swh-cassandra-next-version.after        2023-10-25 17:19:10.702844716 +0200
@@ -349,21 +349,21 @@
   namespace: swh-cassandra-next-version
 data:
   # TODO: rename to not have a dot in the name to allow testing
   config.yml: |
     storage:
       cls: remote
       url: http://storage:5002

     search:
       cls: remote
-      url: http://search0.internal.staging.swh.network:5010
+      url: http://search-rpc-ingress

     debug: true

     introspection: true

     max_raw_content_size: 10000

     max_query_cost:
       anonymous: 50
       user: 500
@@ -3679,21 +3679,21 @@
 data:
   config.yml.template: |
     instance_name: webapp-cassandra-next-version.internal.staging.swh.network
     allowed_hosts:
       - webapp-cassandra-next-version.internal.staging.swh.network
     storage:
       cls: remote
       url: http://storage:5002
     search:
       cls: remote
-      url: http://search0.internal.staging.swh.network:5010
+      url: http://search-rpc-ingress
     scheduler:
       cls: remote
       url: http://scheduler.internal.staging.swh.network
     vault:
       cls: remote
       url: http://vault-rpc-ingress-next-version
     indexer_storage:
       cls: remote
       url: http://storage1.internal.staging.swh.network:5007/
     counters_backend: swh-counters
@@ -13328,20 +13328,30 @@
     port: 443
     protocol: TCP
     targetPort: 9443
   selector:
     app: keda-admission-webhooks
 ---
 # Source: swh/templates/external-services/cname.yaml
 apiVersion: v1
 kind: Service
 metadata:
+  name: search-rpc-ingress
+  namespace: swh-cassandra-next-version
+spec:
+  type: ExternalName
+  externalName: archive-staging-rke2-ingress-nginx-controller.ingress-nginx.svc.cluster.local
+---
+# Source: swh/templates/external-services/cname.yaml
+apiVersion: v1
+kind: Service
+metadata:
   name: vault-rpc-ingress
   namespace: swh-cassandra-next-version
 spec:
   type: ExternalName
   externalName: archive-staging-rke2-ingress-nginx-controller.ingress-nginx.svc.cluster.local
 ---
 # Source: swh/templates/graphql/service.yaml
 apiVersion: v1
 kind: Service
 metadata:
@@ -14085,21 +14095,21 @@
   strategy:
     type: RollingUpdate
     rollingUpdate:
       maxSurge: 1
   template:
     metadata:
       labels:
         app: graphql
       annotations:
         # Force a rollout upgrade if the configuration changes
-        checksum/config: e8915e19ab005fce017f48bc6caf6ea9a5195f54fc04186ca94bc08b33b9efba
+        checksum/config: f742254ee78919836f79985a254aa097320cdcea0341b2816fe17610a4bbaf24
     spec:
       priorityClassName: swh-cassandra-next-version-frontend-rpc

       containers:
         - name: graphql
           image: container-registry.softwareheritage.org/swh/infra/swh-apps/graphql:20231025.1
           imagePullPolicy: IfNotPresent
           resources:
             requests:
               memory: 150Mi
@@ -20004,21 +20014,21 @@
       app: web
   strategy:
     type: RollingUpdate
     rollingUpdate:
       maxSurge: 1
   template:
     metadata:
       labels:
         app: web
       annotations:
-        checksum/config: f63342dcc16ae9ec1d6c764d085678aac2fa8ba6597ae180517edaf75993353f
+        checksum/config: d533eda0d13732e58af19f577304d19acac1eab285b6b1d2451fe71f515f0a67
     spec:
       affinity:
         nodeAffinity:
           requiredDuringSchedulingIgnoredDuringExecution:
             nodeSelectorTerms:
             - matchExpressions:
               - key: swh/web
                 operator: In
                 values:
                 - "true"


------------- diff for environment production namespace swh -------------

No differences


------------- diff for environment production namespace swh-cassandra -------------

No differences

Refs. swh/infra/sysadm-environment#4780 (closed)

Edited by Antoine R. Dumont

Merge request reports