staging-next-version: Enable metrics
This activates the metrics (through configuration) on the es running in next version.
Then adds the necessary configuration for the scraping of those metrics to happen.
With a sync-window on the impacted cluster and adaptation on the impacted es instance, it's working [1]
[1]
bash-4.4# curl -s http://localhost:9200/_prometheus/metrics | wc -l
1072
bash-4.4# curl -s http://localhost:9200/_prometheus/metrics | head
# HELP es_transport_rx_bytes_count Bytes received
# TYPE es_transport_rx_bytes_count gauge
es_transport_rx_bytes_count{cluster="search-next-version",node="search-next-version-es-node-0",nodeid="YjF3DRM6Q9-iqGcxlfjR1Q",} 0.0
# HELP es_index_warmer_time_seconds Time spent during warmers
# TYPE es_index_warmer_time_seconds gauge
es_index_warmer_time_seconds{cluster="search-next-version",index="origin-v0.11",context="primaries",} 32.0
es_index_warmer_time_seconds{cluster="search-next-version",index="origin-v0.11",context="total",} 32.0
# HELP es_index_translog_uncommitted_operations_number Current number of uncommitted translog operations
# TYPE es_index_translog_uncommitted_operations_number gauge
es_index_translog_uncommitted_operations_number{cluster="search-next-version",index="origin-v0.11",context="primaries",} 0.0
helm diff
[cluster-components] Comparing changes between branches production and mr/es-scrape-metrics...
Your branch is up to date with 'origin/production'.
[cluster-components] Generate config in production branch for cluster-components/values/admin-rke2.yaml...
[cluster-components] Generate config in production branch for cluster-components/values/archive-production-rke2.yaml...
[cluster-components] Generate config in production branch for cluster-components/values/archive-staging-rke2.yaml...
[cluster-components] Generate config in production branch for cluster-components/values/default.yaml...
[cluster-components] Generate config in production branch for cluster-components/values/gitlab-production.yaml...
[cluster-components] Generate config in production branch for cluster-components/values/gitlab-staging.yaml...
[cluster-components] Generate config in production branch for cluster-components/values/local-cluster.yaml...
[cluster-components] Generate config in production branch for cluster-components/values/rancher.yaml...
[cluster-components] Generate config in production branch for cluster-components/values/test-staging-rke2.yaml...
[cluster-components] Generate config in mr/es-scrape-metrics branch for cluster-components/values/admin-rke2.yaml...
[cluster-components] Generate config in mr/es-scrape-metrics branch for cluster-components/values/archive-production-rke2.yaml...
[cluster-components] Generate config in mr/es-scrape-metrics branch for cluster-components/values/archive-staging-rke2.yaml...
[cluster-components] Generate config in mr/es-scrape-metrics branch for cluster-components/values/default.yaml...
[cluster-components] Generate config in mr/es-scrape-metrics branch for cluster-components/values/gitlab-production.yaml...
[cluster-components] Generate config in mr/es-scrape-metrics branch for cluster-components/values/gitlab-staging.yaml...
[cluster-components] Generate config in mr/es-scrape-metrics branch for cluster-components/values/local-cluster.yaml...
[cluster-components] Generate config in mr/es-scrape-metrics branch for cluster-components/values/rancher.yaml...
[cluster-components] Generate config in mr/es-scrape-metrics branch for cluster-components/values/test-staging-rke2.yaml...
------------- diff for cluster-components/values/admin-rke2.yaml -------------
No differences
------------- diff for cluster-components/values/archive-production-rke2.yaml -------------
No differences
------------- diff for cluster-components/values/archive-staging-rke2.yaml -------------
--- /tmp/swh-chart.cluster-components.IYj5qLW5/archive-staging-rke2.yaml.before 2024-12-05 16:34:13.154832454 +0100
+++ /tmp/swh-chart.cluster-components.IYj5qLW5/archive-staging-rke2.yaml.after 2024-12-05 16:34:13.674811779 +0100
@@ -837,20 +837,31 @@
metadata:
name: search-next-version
namespace: swh-cassandra-next-version
spec:
version: 7.15.2
nodeSets:
- name: node
count: 1
podTemplate:
spec:
+ initContainers:
+ - name: sysctl
+ securityContext:
+ privileged: true
+ command: ['sh', '-c', 'sysctl -w vm.max_map_count=262144']
+ - name: install-plugins
+ command:
+ - sh
+ - -c
+ - |
+ bin/elasticsearch-plugin install -b repository-s3 https://github.com/vvanholl/elasticsearch-prometheus-exporter/releases/download/7.15.2.0/prometheus-exporter-7.15.2.0.zip
containers:
- name: elasticsearch
env:
- name: ES_JAVA_OPTS
value: -Xms1g -Xmx1g
volumeClaimTemplates:
- metadata:
name: elasticsearch-data
spec:
@@ -1852,20 +1863,41 @@
matchLabels:
app.kubernetes.io/instance: cassandra-cassandra-cluster
app.kubernetes.io/name: cassandra
namespaceSelector:
any: true
endpoints:
- port: metrics
interval: 30s
honorLabels: true
---
+# Source: cluster-config/templates/scrape-cluster-metrics/service-monitor.yaml
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+ namespace: swh-cassandra-next-version
+ name: elasticsearch-next-version-internal-servers-sm
+ labels:
+ app: elasticsearch-next-version
+spec:
+ selector:
+ matchLabels:
+ common.k8s.elastic.co/type: elasticsearch
+ elasticsearch.k8s.elastic.co/cluster-name: search-next-version
+ namespaceSelector:
+ any: true
+ endpoints:
+ - port: metrics
+ path: /_prometheus/metrics
+ interval: 30s
+ honorLabels: true
+---
# Source: cluster-config/templates/scrape-external-metrics/service-monitor.yaml
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
namespace: cassandra
name: cassandra-servers-svc
labels:
app: cassandra
spec:
selector:
------------- diff for cluster-components/values/default.yaml -------------
No differences
------------- diff for cluster-components/values/gitlab-production.yaml -------------
No differences
------------- diff for cluster-components/values/gitlab-staging.yaml -------------
No differences
------------- diff for cluster-components/values/local-cluster.yaml -------------
No differences
------------- diff for cluster-components/values/rancher.yaml -------------
No differences
------------- diff for cluster-components/values/test-staging-rke2.yaml -------------
No differences
Edited by Antoine R. Dumont