Skip to content

Activate rabbitmq metrics scrape in production & staging cluster

Antoine R. Dumont requested to merge scrape-rabbitmq-metrics into production

Deactivating the test-staging-rke2 scraping test in the first commit too.

This works hand-in-hand with [1] which deactivates the scraping from the static prometheus.

[1] swh/infra/puppet/puppet-swh-site!692 (merged)

make cc-helm-diff | colordiff
[cluster-components] Comparing changes between branches production and scrape-rabbitmq-metrics...
Your branch is up to date with 'origin/production'.
[cluster-components] Generate config in production branch for cluster-components/values/admin-rke2.yaml...
[cluster-components] Generate config in production branch for cluster-components/values/archive-production-rke2.yaml...
[cluster-components] Generate config in production branch for cluster-components/values/archive-staging-rke2.yaml...
[cluster-components] Generate config in production branch for cluster-components/values/gitlab-production.yaml...
[cluster-components] Generate config in production branch for cluster-components/values/gitlab-staging.yaml...
[cluster-components] Generate config in production branch for cluster-components/values/minikube.yaml...
[cluster-components] Generate config in production branch for cluster-components/values/rancher.yaml...
[cluster-components] Generate config in production branch for cluster-components/values/test-staging-rke2.yaml...
[cluster-components] Generate config in scrape-rabbitmq-metrics branch for cluster-components/values/admin-rke2.yaml...
[cluster-components] Generate config in scrape-rabbitmq-metrics branch for cluster-components/values/archive-production-rke2.yaml...
[cluster-components] Generate config in scrape-rabbitmq-metrics branch for cluster-components/values/archive-staging-rke2.yaml...
[cluster-components] Generate config in scrape-rabbitmq-metrics branch for cluster-components/values/gitlab-production.yaml...
[cluster-components] Generate config in scrape-rabbitmq-metrics branch for cluster-components/values/gitlab-staging.yaml...
[cluster-components] Generate config in scrape-rabbitmq-metrics branch for cluster-components/values/minikube.yaml...
[cluster-components] Generate config in scrape-rabbitmq-metrics branch for cluster-components/values/rancher.yaml...
[cluster-components] Generate config in scrape-rabbitmq-metrics branch for cluster-components/values/test-staging-rke2.yaml...


------------- diff for cluster-components/values/admin-rke2.yaml -------------

No differences


------------- diff for cluster-components/values/archive-production-rke2.yaml -------------

--- /tmp/swh-chart.cluster-components.AAGp8DlT/archive-production-rke2.yaml.before      2024-02-08 12:05:10.411115287 +0100
+++ /tmp/swh-chart.cluster-components.AAGp8DlT/archive-production-rke2.yaml.after       2024-02-08 12:05:11.699126184 +0100
@@ -18,20 +18,35 @@
   name: cassandra-servers-svc
   labels:
     app: cassandra
 spec:
   type: ExternalName
   externalName: "fake-url"
   ports:
     - name: jmx-exporter
       port: 7070
       targetPort: 7070
+      protocol: TCP---
+apiVersion: v1
+kind: Service
+metadata:
+  namespace: rabbitmq
+  name: rabbitmq-servers-svc
+  labels:
+    app: rabbitmq
+spec:
+  type: ExternalName
+  externalName: "fake-url"
+  ports:
+    - name: rabbitmq-metrics
+      port: 9419
+      targetPort: 9419
       protocol: TCP
 ---
 # Source: cluster-config/templates/alertmanager-config/config.yaml
 apiVersion: monitoring.coreos.com/v1alpha1
 kind: AlertmanagerConfig
 metadata:
   name: irc-relay-config
   namespace: cattle-monitoring-system
 spec:
   route:
@@ -80,20 +95,34 @@
     - ip: 192.168.100.184
     - ip: 192.168.100.185
     - ip: 192.168.100.186
     - ip: 192.168.100.187
     - ip: 192.168.100.188
     - ip: 192.168.100.189
     - ip: 192.168.100.190
     ports:
       - name: jmx-exporter
         port: 7070
+        protocol: TCP---
+apiVersion: v1
+kind: Endpoints
+metadata:
+  namespace: rabbitmq
+  name: rabbitmq-servers-svc
+  labels:
+    app: rabbitmq
+subsets:
+  - addresses:
+    - ip: 192.168.100.104
+    ports:
+      - name: rabbitmq-metrics
+        port: 9419
         protocol: TCP
 ---
 # Source: cluster-config/templates/alerting/swh-alerting.yaml
 apiVersion: monitoring.coreos.com/v1
 kind: PrometheusRule
 metadata:
   name: swh-production.rules
   namespace: cattle-monitoring-system
 spec:
   groups:
@@ -161,11 +190,35 @@
           regex: 192.168.100.18(\d)(.*)
           replacement: cassandra0$1.internal.softwareheritage.org$2
           sourceLabels:
           - __address__
           targetLabel: __address__
         - action: replace
           regex: 192.168.100.19(\d)(.*)
           replacement: cassandra1$1.internal.softwareheritage.org$2
           sourceLabels:
           - __address__
+          targetLabel: __address__---
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  namespace: rabbitmq
+  name: rabbitmq-servers-svc
+  labels:
+    app: rabbitmq
+spec:
+  selector:
+    matchLabels:
+      app: rabbitmq
+  namespaceSelector:
+    any: true
+  endpoints:
+    - port: rabbitmq-metrics
+      interval: 30s
+      honorLabels: true
+      relabelings:
+        - action: replace
+          regex: 192.168.100.104(.*)
+          replacement: saatchi.internal.softwareheritage.org$1
+          sourceLabels:
+          - __address__
           targetLabel: __address__


------------- diff for cluster-components/values/archive-staging-rke2.yaml -------------

--- /tmp/swh-chart.cluster-components.AAGp8DlT/archive-staging-rke2.yaml.before 2024-02-08 12:05:10.487115930 +0100
+++ /tmp/swh-chart.cluster-components.AAGp8DlT/archive-staging-rke2.yaml.after  2024-02-08 12:05:11.767126759 +0100
@@ -74,20 +74,35 @@
   name: cassandra-servers-svc
   labels:
     app: cassandra
 spec:
   type: ExternalName
   externalName: "fake-url"
   ports:
     - name: jmx-exporter
       port: 7070
       targetPort: 7070
+      protocol: TCP---
+apiVersion: v1
+kind: Service
+metadata:
+  namespace: rabbitmq
+  name: rabbitmq-servers-svc
+  labels:
+    app: rabbitmq
+spec:
+  type: ExternalName
+  externalName: "fake-url"
+  ports:
+    - name: rabbitmq-metrics
+      port: 9419
+      targetPort: 9419
       protocol: TCP
 ---
 # Source: cluster-config/templates/svix/services.yaml
 apiVersion: v1
 kind: Service
 metadata:
   name: svix
   namespace: svix-server
 spec:
   type: ClusterIP
@@ -253,20 +268,34 @@
   labels:
     app: cassandra
 subsets:
   - addresses:
     - ip: 192.168.130.181
     - ip: 192.168.130.182
     - ip: 192.168.130.183
     ports:
       - name: jmx-exporter
         port: 7070
+        protocol: TCP---
+apiVersion: v1
+kind: Endpoints
+metadata:
+  namespace: rabbitmq
+  name: rabbitmq-servers-svc
+  labels:
+    app: rabbitmq
+subsets:
+  - addresses:
+    - ip: 192.168.130.50
+    ports:
+      - name: rabbitmq-metrics
+        port: 9419
         protocol: TCP
 ---
 # Source: cluster-config/templates/alerting/swh-alerting.yaml
 apiVersion: monitoring.coreos.com/v1
 kind: PrometheusRule
 metadata:
   name: swh-staging.rules
   namespace: cattle-monitoring-system
 spec:
   groups:
@@ -328,11 +357,35 @@
   endpoints:
     - port: jmx-exporter
       interval: 30s
       honorLabels: true
       relabelings:
         - action: replace
           regex: 192.168.130.18(\d)(.*)
           replacement: cassandra$1.internal.staging.swh.network$2
           sourceLabels:
           - __address__
+          targetLabel: __address__---
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  namespace: rabbitmq
+  name: rabbitmq-servers-svc
+  labels:
+    app: rabbitmq
+spec:
+  selector:
+    matchLabels:
+      app: rabbitmq
+  namespaceSelector:
+    any: true
+  endpoints:
+    - port: rabbitmq-metrics
+      interval: 30s
+      honorLabels: true
+      relabelings:
+        - action: replace
+          regex: 192.168.130.50(.*)
+          replacement: scheduler0.internal.staging.swh.network$1
+          sourceLabels:
+          - __address__
           targetLabel: __address__


------------- diff for cluster-components/values/gitlab-production.yaml -------------

No differences


------------- diff for cluster-components/values/gitlab-staging.yaml -------------

No differences


------------- diff for cluster-components/values/minikube.yaml -------------

No differences


------------- diff for cluster-components/values/rancher.yaml -------------

No differences


------------- diff for cluster-components/values/test-staging-rke2.yaml -------------

--- /tmp/swh-chart.cluster-components.AAGp8DlT/test-staging-rke2.yaml.before    2024-02-08 12:05:11.547124897 +0100
+++ /tmp/swh-chart.cluster-components.AAGp8DlT/test-staging-rke2.yaml.after     2024-02-08 12:05:12.807135556 +0100
@@ -67,38 +67,20 @@
       app: svix-postgres
   ingress:
     - from:
         - podSelector:
             matchLabels:
              app: svix-server
       ports:
         - protocol: TCP
           port: 5432
 ---
-# Source: cluster-config/templates/scrape-external-metrics/service.yaml
-# This defines a service to be monitored by the service monitor---
-apiVersion: v1
-kind: Service
-metadata:
-  namespace: rabbitmq
-  name: rabbitmq-servers-svc
-  labels:
-    app: rabbitmq
-spec:
-  type: ExternalName
-  externalName: "fake-url"
-  ports:
-    - name: rabbitmq-metrics
-      port: 9419
-      targetPort: 9419
-      protocol: TCP
----
 # Source: cluster-config/templates/svix/services.yaml
 apiVersion: v1
 kind: Service
 metadata:
   name: postgres
   namespace: svix-server
 spec:
   type: ClusterIP
   selector:
     app: svix-postgres
@@ -352,20 +334,30 @@
         backend:
           service:
             name: svix
             port:
               number: 8071
   tls:
   - hosts:
     - svix-test.internal.staging.swh.network
     secretName: swh-svix-crt
 ---
+# Source: cluster-config/templates/scrape-external-metrics/endpoints.yaml
+# This defines the external endpoints ips to connect to scrape metrics
+---
+# Source: cluster-config/templates/scrape-external-metrics/service-monitor.yaml
+# This defines the service-monitor to monitor the service which scrapes external metrics
+# This may redefine some metrics, see the relabeling configuration dict key
+---
+# Source: cluster-config/templates/scrape-external-metrics/service.yaml
+# This defines a service to be monitored by the service monitor
+---
 # Source: cluster-config/templates/alertmanager-config/config.yaml
 apiVersion: monitoring.coreos.com/v1alpha1
 kind: AlertmanagerConfig
 metadata:
   name: irc-relay-config
   namespace: cattle-monitoring-system
 spec:
   route:
     groupBy: ['...']
     groupWait: 31s
@@ -387,55 +379,10 @@
         - sendResolved: true
           url: https://alertmanager-irc-relay.internal.admin.swh.network/swh-sysadm
           httpConfig:
             basicAuth:
               username:
                 key: user
                 name: alertmanager-irc-relay-config
               password:
                 key: password
                 name: alertmanager-irc-relay-config
----
-# Source: cluster-config/templates/scrape-external-metrics/endpoints.yaml
-# This defines the external endpoints ips to connect to scrape metrics---
-apiVersion: v1
-kind: Endpoints
-metadata:
-  namespace: rabbitmq
-  name: rabbitmq-servers-svc
-  labels:
-    app: rabbitmq
-subsets:
-  - addresses:
-    - ip: 192.168.130.50
-    ports:
-      - name: rabbitmq-metrics
-        port: 9419
-        protocol: TCP
----
-# Source: cluster-config/templates/scrape-external-metrics/service-monitor.yaml
-# This defines the service-monitor to monitor the service which scrapes external metrics
-# This may redefine some metrics, see the relabeling configuration dict key---
-apiVersion: monitoring.coreos.com/v1
-kind: ServiceMonitor
-metadata:
-  namespace: rabbitmq
-  name: rabbitmq-servers-svc
-  labels:
-    app: rabbitmq
-spec:
-  selector:
-    matchLabels:
-      app: rabbitmq
-  namespaceSelector:
-    any: true
-  endpoints:
-    - port: rabbitmq-metrics
-      interval: 30s
-      honorLabels: true
-      relabelings:
-        - action: replace
-          regex: 192.168.130.50(.*)
-          replacement: scheduler0.internal.staging.swh.network$1
-          sourceLabels:
-          - __address__
-          targetLabel: __address__

Refs. swh/infra/sysadm-environment#5213 (closed)

Edited by Antoine R. Dumont

Merge request reports