From df94f805f3dfbada70951aae314cb132d7fc5a44 Mon Sep 17 00:00:00 2001
From: Vincent SELLIER <vincent.sellier@softwareheritage.org>
Date: Thu, 7 Mar 2024 14:38:31 +0100
Subject: [PATCH 1/2] cluster-component: refactor the alermanager irc relay
 configuration

- Respect the tacit rule of not deploying anything if it's not explicitely
  asked
- Improve the configuration of the AlertManagerConfig and irc deployment
  by adding more configuration points

Related to swh/infra/sysadm-environment#5281
---
 .gitignore                                    |  3 +--
 cluster-components/templates/NOTES.txt        | 11 +++++---
 .../templates/alertmanager-config/config.yaml | 16 ++++++-----
 cluster-components/values.yaml                | 27 +++++++++----------
 cluster-components/values/admin-rke2.yaml     |  2 +-
 .../values/archive-production-rke2.yaml       |  3 +++
 .../values/archive-staging-rke2.yaml          |  3 +++
 .../values/test-staging-rke2.yaml             |  2 +-
 8 files changed, 37 insertions(+), 30 deletions(-)

diff --git a/.gitignore b/.gitignore
index 26df1d9a9..3ffe516d1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,3 @@
 __snapshot__
-/cluster-components/charts/
 /swh/Chart.lock
-/swh/charts/
+charts
diff --git a/cluster-components/templates/NOTES.txt b/cluster-components/templates/NOTES.txt
index 90f96eae9..862d81adf 100644
--- a/cluster-components/templates/NOTES.txt
+++ b/cluster-components/templates/NOTES.txt
@@ -6,12 +6,15 @@ This installed/upgraded:
   http://alertmanager-irc-relay:{{ .Values.alertmanagerIrcRelay.http_port }}/{{ .Values.alertmanagerIrcRelay.room }}
 {{ end }}
 {{- if and .Values.alertmanagerIrcRelay.enabled .Values.alertmanagerIrcRelay.ingress.enabled }}
-- An alert manager irc relay ingress exposed at {{ .Values.alertmanager.ircrelay.host
-  }}. Any alerts sent from a cluster with access to this ingress will be propagated to
+- An alert manager irc relay ingress exposed at :
+{{- range $host := .Values.alertmanagerIrcRelay.ingress.hosts }}
+  - {{ $host }}
+{{ end }}
+  Any alerts sent from a cluster with access to this ingress will be propagated to
   irc #{{ .Values.alertmanagerIrcRelay.room }} room
 {{ end }}
-{{- if and .Values.alertmanager.enabled .Values.alertmanager.ircrelay.enabled }}
-- Allows to relay alertmanager's alerts to the relay exposed at {{ .Values.alertmanager.ircrelay.host }}
+{{- if .Values.alertmanagerConfig.enabled }}
+- Allows to relay alertmanager's alerts to the relay exposed at {{ .Values.alertmanagerConfig.ircRelayHost }}
 {{ end }}
 {{- if .Values.blackboxExporter.enabled }}
 - blackbox exporter. This installs probes to expand monitoring to http(s), DNS, ...
diff --git a/cluster-components/templates/alertmanager-config/config.yaml b/cluster-components/templates/alertmanager-config/config.yaml
index 14fb5ca35..e326426d6 100644
--- a/cluster-components/templates/alertmanager-config/config.yaml
+++ b/cluster-components/templates/alertmanager-config/config.yaml
@@ -1,10 +1,10 @@
-{{- if and .Values.alertmanagerConfig.enabled }}
+{{- if .Values.alertmanagerConfig.enabled }}
 ---
 apiVersion: monitoring.coreos.com/v1alpha1
 kind: AlertmanagerConfig
 metadata:
   name: irc-relay-config
-  namespace: cattle-monitoring-system
+  namespace: {{ .Values.alertmanagerConfig.namespace }}
 spec:
   route:
     groupBy: ['...']
@@ -25,13 +25,15 @@ spec:
     - name: ircrelay
       webhookConfigs:
         - sendResolved: true
-          url: {{ .Values.alertmanager.ircrelay.host }}
+          url: {{ .Values.alertmanagerConfig.ircRelayHost }}
+          {{ if .Values.alertmanagerConfig.authentication.enabled -}}
           httpConfig:
             basicAuth:
               username:
-                key: user
-                name: alertmanager-irc-relay-config
+                key: {{ .Values.alertmanagerConfig.authentication.userKeyRef }}
+                name: {{ .Values.alertmanagerConfig.authentication.secretRef }}
               password:
-                key: password
-                name: alertmanager-irc-relay-config
+                key: {{ .Values.alertmanagerConfig.authentication.passwordKeyRef }}
+                name: {{ .Values.alertmanagerConfig.authentication.secretRef }}
+          {{- end -}}
 {{ end }}
diff --git a/cluster-components/values.yaml b/cluster-components/values.yaml
index 618054917..f88f42134 100644
--- a/cluster-components/values.yaml
+++ b/cluster-components/values.yaml
@@ -10,19 +10,6 @@ cert-manager:
   # Supported in the chart, not seen on the pods...
   priorityClassName: cluster-components-system
 
-# This configuration is swh specific (and independent from the prometheus configuration
-# already done during terraform provisioning). When activated, this allows to relay the
-# cluster's prometheus alerts to the cluster admin's alertmanager ingress irc relay
-alertmanager:
-  enabled: false
-  # Supported in the chart, not seen on the pods...
-  priorityClassName: cluster-components-system
-  ircrelay:
-    enabled: true
-    host: https://alertmanager-irc-relay.internal.admin.swh.network/swh-sysadm
-    # .htaccess or authentication credentials
-    # secret:
-
 prometheus:
   enabled: false
   # Not working somehow... Charts reference it but it's not seen in minikube
@@ -35,9 +22,19 @@ prometheus:
   kube-state-metrics:
     namespaceOverride: cattle-monitoring-system
 
+# This configuration is swh specific (and independent from the prometheus configuration
+# already done during terraform provisioning). When activated, this allows to relay the
+# cluster's prometheus alerts to the cluster admin's alertmanager ingress irc relay
 alertmanagerConfig:
-  enabled: true
-  host: https://alertmanager-irc-relay.admin.swh.network/swh-sysadm
+  enabled: false
+  namespace: cattle-monitoring-system
+  ircRelayHost: https://alertmanager-irc-relay.internal.admin.swh.network/swh-sysadm
+  # .htaccess or authentication credentials
+  authentication:
+    enabled: true
+    secretRef: alertmanager-irc-relay-config
+    userKeyRef: user
+    passwordKeyRef: password
 
 alertmanagerIrcRelay:
   enabled: false
diff --git a/cluster-components/values/admin-rke2.yaml b/cluster-components/values/admin-rke2.yaml
index a51888ef0..a507b4d06 100644
--- a/cluster-components/values/admin-rke2.yaml
+++ b/cluster-components/values/admin-rke2.yaml
@@ -8,7 +8,7 @@ alertmanagerIrcRelay:
       clusterIssuer: letsencrypt-production
 
 alertmanagerConfig:
-  host: https://alertmanager-irc-relay.internal.admin.swh.network/swh-sysadm
+  enabled: true
 
 blackboxExporter:
   enabled: true
diff --git a/cluster-components/values/archive-production-rke2.yaml b/cluster-components/values/archive-production-rke2.yaml
index 885e0f318..7290674b3 100644
--- a/cluster-components/values/archive-production-rke2.yaml
+++ b/cluster-components/values/archive-production-rke2.yaml
@@ -5,6 +5,9 @@ alerting:
   enabled: true
   environment: production
 
+alertmanagerConfig:
+  enabled: true
+
 scrapeExternalMetrics:
   enabled: true
   deployments:
diff --git a/cluster-components/values/archive-staging-rke2.yaml b/cluster-components/values/archive-staging-rke2.yaml
index 57e5b7a4c..f44c65fa0 100644
--- a/cluster-components/values/archive-staging-rke2.yaml
+++ b/cluster-components/values/archive-staging-rke2.yaml
@@ -2,6 +2,9 @@
 alertmanager:
   enabled: true
 
+alertmanagerConfig:
+  enabled: true
+
 podPriority:
   enabled: true
 
diff --git a/cluster-components/values/test-staging-rke2.yaml b/cluster-components/values/test-staging-rke2.yaml
index eb2817b2a..7ecf05f3f 100644
--- a/cluster-components/values/test-staging-rke2.yaml
+++ b/cluster-components/values/test-staging-rke2.yaml
@@ -1,5 +1,5 @@
 # Relay prometheus alerts to the admin cluster's ingress relay
-alertmanager:
+alertmanagerConfig:
   enabled: true
 
 svix:
-- 
GitLab


From 0d601505cf0251e0edc3fb1597331c0b368b23b1 Mon Sep 17 00:00:00 2001
From: Vincent SELLIER <vincent.sellier@softwareheritage.org>
Date: Fri, 8 Mar 2024 00:19:02 +0100
Subject: [PATCH 2/2] cluster-component: Activate irc relay on gitlab staging

This is a test to evaluate the behavior and the monitoring noise it
will generate

Related to swh/infra/sysadm-environment#5281
---
 cluster-components/values/gitlab-staging.yaml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/cluster-components/values/gitlab-staging.yaml b/cluster-components/values/gitlab-staging.yaml
index e69de29bb..60b9c53f1 100644
--- a/cluster-components/values/gitlab-staging.yaml
+++ b/cluster-components/values/gitlab-staging.yaml
@@ -0,0 +1,4 @@
+alertmanagerConfig:
+  enabled: true
+  namespace: monitoring
+  host: https://alertmanager-irc-relay.admin.swh.network/swh-sysadm
-- 
GitLab