Skip to content

production: Migrate rpc counters to dynamic infra

Antoine R. Dumont requested to merge migrate-rpc-counters into migrate-counters

This is destined to replace the current rpc service running on counters1.i.s.o [1] Its configuration matches the one in ^ [2]

In terms of resource, i've used the same configuration (2 workers with 1 thread each). But i've doubled the nb of replicas. So we've got a bit more than currently in the static infra.

[2] production configuration
root@counters1:~# cat /etc/softwareheritage/counters/server.yml
---
counters:
  cls: redis
  host: localhost:6379
history:
  cls: prometheus
  prometheus_host: thanos.internal.admin.swh.network
  prometheus_port: 19191
  live_data_start: 1618415227
  cache_base_directory: "/srv/softwareheritage/counters"
  interval: 12h
  labels:
    environment: production
[1] helm diff
[swh] Comparing changes between branches production and migrate-rpc-counters (per environment)...
Your branch is ahead of 'origin/production' by 1 commit.
  (use "git push" to publish your local commits)
[swh] Generate config in production branch for environment staging, namespace swh...
[swh] Generate config in production branch for environment staging, namespace swh-cassandra...
[swh] Generate config in production branch for environment staging, namespace swh-cassandra-next-version...
[swh] Generate config in migrate-rpc-counters branch for environment staging...
[swh] Generate config in migrate-rpc-counters branch for environment staging...
[swh] Generate config in migrate-rpc-counters branch for environment staging...
Your branch is ahead of 'origin/production' by 1 commit.
  (use "git push" to publish your local commits)
[swh] Generate config in production branch for environment production, namespace swh...
[swh] Generate config in production branch for environment production, namespace swh-cassandra...
[swh] Generate config in production branch for environment production, namespace swh-cassandra-next-version...
[swh] Generate config in migrate-rpc-counters branch for environment production...
[swh] Generate config in migrate-rpc-counters branch for environment production...
[swh] Generate config in migrate-rpc-counters branch for environment production...


------------- diff for environment staging namespace swh -------------

No differences


------------- diff for environment staging namespace swh-cassandra -------------

No differences


------------- diff for environment staging namespace swh-cassandra-next-version -------------

No differences


------------- diff for environment production namespace swh -------------

--- /tmp/swh-chart.swh.0IXfXlzL/production-swh.before   2024-02-07 14:54:53.476307371 +0100
+++ /tmp/swh-chart.swh.0IXfXlzL/production-swh.after    2024-02-07 14:54:54.000306413 +0100
@@ -646,20 +646,41 @@
       - directory
       - origin
       - origin_visit
       - origin_visit_status
       - release
       - revision
       - skipped_content
       - snapshot
       prefix: swh.journal.objects
 ---
+# Source: swh/templates/counters/rpc-configmap.yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  namespace: swh
+  name: counters-rpc-configuration-template
+data:
+  config.yml.template: |
+    counters:
+      cls: redis
+      host: counters1.internal.softwareheritage.org:6379
+    history:
+      cls: prometheus
+      interval: 12h
+      labels:
+        environment: production
+      live_data_start: 1618415227
+      prometheus_host: thanos.internal.admin.swh.network
+      prometheus_port: 19191
+      cache_base_directory: /srv/softwareheritage/counters
+---
 # Source: swh/templates/counters/script-utils.yaml
 apiVersion: v1
 kind: ConfigMap
 metadata:
   name: script-utils
   namespace: swh
 data:
   refresh-counters-cache.sh: |
     #!/bin/bash

@@ -17396,20 +17417,37 @@
   namespace: default
 spec:
   ports:
   - name: http
     port: 443
     protocol: TCP
     targetPort: 9443
   selector:
     app: keda-admission-webhooks
 ---
+# Source: swh/templates/counters/rpc-service.yaml
+apiVersion: v1
+kind: Service
+metadata:
+  name: counters-rpc
+  namespace: swh
+  labels:
+    app: counters-rpc
+spec:
+  type: ClusterIP
+  selector:
+    app: counters-rpc
+  ports:
+    - port: 5011
+      targetPort: 5011
+      name: rpc
+---
 # Source: swh/templates/deposit/service.yaml
 apiVersion: v1
 kind: Service
 metadata:
   name: deposit
   namespace: swh
   labels:
     app: deposit
 spec:
   type: ClusterIP
@@ -17421,20 +17459,30 @@
       name: rpc

     - port: 80
       targetPort: 80
       name: webstatic
 ---
 # Source: swh/templates/external-services/cname.yaml
 apiVersion: v1
 kind: Service
 metadata:
+  name: counters-rpc-ingress
+  namespace: swh
+spec:
+  type: ExternalName
+  externalName: archive-production-rke2-ingress-nginx-controller.ingress-nginx.svc.cluster.local
+---
+# Source: swh/templates/external-services/cname.yaml
+apiVersion: v1
+kind: Service
+metadata:
   name: indexer-storage-read-only-rpc-ingress
   namespace: swh
 spec:
   type: ExternalName
   externalName: archive-production-rke2-ingress-nginx-controller.ingress-nginx.svc.cluster.local
 ---
 # Source: swh/templates/external-services/cname.yaml
 apiVersion: v1
 kind: Service
 metadata:
@@ -19116,20 +19164,171 @@
         configMap:
           name: counters-journal-client-configuration-template
           items:
           - key: "config.yml.template"
             path: "config.yml.template"
       - name: config-utils
         configMap:
           name: config-utils
           defaultMode: 0555
 ---
+# Source: swh/templates/counters/rpc-deployment.yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  namespace: swh
+  name: counters-rpc
+  labels:
+    app: counters-rpc
+spec:
+  revisionHistoryLimit: 2
+  replicas: 2
+  selector:
+    matchLabels:
+      app: counters-rpc
+  strategy:
+    type: RollingUpdate
+    rollingUpdate:
+      maxSurge: 1
+  template:
+    metadata:
+      labels:
+        app: counters-rpc
+      annotations:
+        checksum/configmap: 598be262c2cfe9c36bf0fe7e2b9195bd93e318b1af8b8be246dead7ad2d2eef8
+        checksum/script-utils: 790f42f6e7c10aa1b8c44024b4f7b5525af8871e533b5c378f9cdbf34e4d015c
+    spec:
+      affinity:
+        nodeAffinity:
+          requiredDuringSchedulingIgnoredDuringExecution:
+            nodeSelectorTerms:
+            - matchExpressions:
+              - key: swh/rpc
+                operator: In
+                values:
+                - "true"
+      priorityClassName: swh-frontend-rpc
+
+      initContainers:
+        - name: prepare-configuration
+          image: container-registry.softwareheritage.org/swh/infra/swh-apps/utils:20231211.1
+          imagePullPolicy: IfNotPresent
+          command:
+          - /entrypoints/prepare-configuration.sh
+          volumeMounts:
+          - name: configuration
+            mountPath: /etc/swh
+          - name: configuration-template
+            mountPath: /etc/swh/configuration-template
+          - name: config-utils
+            mountPath: /entrypoints
+            readOnly: true
+          env:
+
+
+        - name: fetch-static-history
+          image: container-registry.softwareheritage.org/swh/infra/swh-apps/utils:20231211.1
+          imagePullPolicy: IfNotPresent
+          command:
+          - /entrypoints/fetch-static-history.sh
+          args:
+          - /srv/softwareheritage/counters/static_history.json
+          volumeMounts:
+          - name: script-utils
+            mountPath: /entrypoints
+            readOnly: true
+          - name: staticdata
+            mountPath: /srv/softwareheritage/counters
+      containers:
+        - name: counters-rpc
+          # Workaround until we have a way to define distributed volumes
+          # This initializes the history cache file
+          lifecycle:
+            postStart:
+              exec:
+                command:
+                  - /entrypoints/refresh-counters-cache.sh
+                  - history.json
+                  - static_history.json
+                  - localhost:5011
+          resources:
+            requests:
+              memory: 512Mi
+              cpu: 500m
+          image: container-registry.softwareheritage.org/swh/infra/swh-apps/counters:20240202.1
+          imagePullPolicy: IfNotPresent
+          ports:
+            - containerPort: 5011
+              name: rpc
+          readinessProbe:
+            httpGet:
+              path: /
+              port: rpc
+            initialDelaySeconds: 15
+            failureThreshold: 30
+            periodSeconds: 5
+          livenessProbe:
+            tcpSocket:
+              port: rpc
+            initialDelaySeconds: 10
+            periodSeconds: 5
+          command:
+          - /bin/bash
+          args:
+          - -c
+          - /opt/swh/entrypoint.sh
+          env:
+            - name: THREADS
+              value: "1"
+            - name: WORKERS
+              value: "2"
+            - name: TIMEOUT
+              value: "10"
+            - name: STATSD_HOST
+              value: prometheus-statsd-exporter
+            - name: STATSD_PORT
+              value: "9125"
+            - name: LOG_LEVEL
+              value: INFO
+          volumeMounts:
+          - name: configuration
+            mountPath: /etc/swh
+          - name: staticdata
+            mountPath: /srv/softwareheritage/counters
+          - name: script-utils
+            mountPath: /entrypoints
+            readOnly: true
+      volumes:
+      - name: configuration
+        emptyDir: {}
+      - name: staticdata
+        emptyDir: {}
+      - name: configuration-template
+        configMap:
+          name: counters-rpc-configuration-template
+          items:
+          - key: "config.yml.template"
+            path: "config.yml.template"
+      - name: config-utils
+        configMap:
+          name: config-utils
+          defaultMode: 0555
+      - name: script-utils
+        configMap:
+          name: script-utils
+          defaultMode: 0555
+          items:
+          - key: "fetch-static-history.sh"
+            path: "fetch-static-history.sh"
+          - key: "refresh-counters-cache.sh"
+            path: "refresh-counters-cache.sh"
+---
 # Source: swh/templates/deposit/deployment.yaml
 apiVersion: apps/v1
 kind: Deployment
 metadata:
   namespace: swh
   name: deposit
   labels:
     app: deposit
 spec:
   revisionHistoryLimit: 2
@@ -34681,20 +34880,62 @@
                 path: "config.yml.template"
           - name: pgservice-configuration-template
             configMap:
               name: pgservice-archive-configuration-template
               items:
               - key: "pg-service-conf"
                 path: "pg_service.conf"

           restartPolicy: OnFailure
 ---
+# Source: swh/templates/counters/rpc-ingress.yaml
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  namespace: swh
+  name: counters-rpc-ingress-default
+  labels:
+    app: counters-rpc
+    endpoint-definition: default
+  annotations:
+    nginx.ingress.kubernetes.io/service-upstream: "true"
+    nginx.ingress.kubernetes.io/whitelist-source-range: 10.42.0.0/16,10.43.0.0/16,127.0.0.0/8,192.168.100.0/24,192.168.101.0/24,192.168.200.0/22
+    nginx.ingress.kubernetes.io/proxy-body-size: 4G
+    nginx.ingress.kubernetes.io/proxy-connect-timeout: "90"
+    nginx.ingress.kubernetes.io/proxy-read-timeout: "3600"
+    nginx.ingress.kubernetes.io/proxy-request-buffering: "on"
+    nginx.ingress.kubernetes.io/proxy-send-timeout: "90"
+
+spec:
+  rules:
+  - host: counters-rpc-ingress
+    http:
+      paths:
+      - path: /
+        pathType: Prefix
+        backend:
+          service:
+            name: counters-rpc
+            port:
+              number: 5011
+
+  - host: counters.internal.softwareheritage.org
+    http:
+      paths:
+      - path: /
+        pathType: Prefix
+        backend:
+          service:
+            name: counters-rpc
+            port:
+              number: 5011
+---
 # Source: swh/templates/deposit/ingress.yaml
 apiVersion: networking.k8s.io/v1
 kind: Ingress
 metadata:
   namespace: swh
   name: deposit-ingress-authenticated
   labels:
     app: deposit
     endpoint-definition: authenticated
   annotations:


------------- diff for environment production namespace swh-cassandra -------------

--- /tmp/swh-chart.swh.0IXfXlzL/production-swh-cassandra.before 2024-02-07 14:54:53.652307049 +0100
+++ /tmp/swh-chart.swh.0IXfXlzL/production-swh-cassandra.after  2024-02-07 14:54:54.172306098 +0100
@@ -11198,20 +11198,30 @@
     port: 443
     protocol: TCP
     targetPort: 9443
   selector:
     app: keda-admission-webhooks
 ---
 # Source: swh/templates/external-services/cname.yaml
 apiVersion: v1
 kind: Service
 metadata:
+  name: counters-rpc-ingress
+  namespace: swh-cassandra
+spec:
+  type: ExternalName
+  externalName: archive-production-rke2-ingress-nginx-controller.ingress-nginx.svc.cluster.local
+---
+# Source: swh/templates/external-services/cname.yaml
+apiVersion: v1
+kind: Service
+metadata:
   name: indexer-storage-read-only-rpc-ingress
   namespace: swh-cassandra
 spec:
   type: ExternalName
   externalName: archive-production-rke2-ingress-nginx-controller.ingress-nginx.svc.cluster.local
 ---
 # Source: swh/templates/external-services/cname.yaml
 apiVersion: v1
 kind: Service
 metadata:

Depends on !335 (merged)

Refs. swh/infra/sysadm-environment#5237 (closed)

Edited by Antoine R. Dumont

Merge request reports