Skip to content

Update status.io metrics upgrade script to use the dynamic infra metrics

Vincent Sellier requested to merge statusio_metrics into production
  • Use thanos instead of old prometheus
  • Several webapps generate the metrics in thanos, so the query needs to be updated to avoid summing all the metrics
  • Support the future 'pending' status

Related to swh/infra/sysadm-environment#5227 (closed)

otocatalog diff
*** Running octocatalog-diff on host pergamon.softwareheritage.org
I, [2024-01-29T09:48:20.639931 #673046]  INFO -- : Catalogs compiled for pergamon.softwareheritage.org
W, [2024-01-29T09:48:21.559760 #673046]  WARN -- : Resource File[/tmp/ocd-ipc-20240129-673046-ct93lx/ocd-builddir-20240129-673053-1ncikmc/routes.yaml] appears to depend on catalog compilation directory. Suppressed from results.
W, [2024-01-29T09:48:21.559820 #673046]  WARN -- : Resource File[/tmp/ocd-ipc-20240129-673046-ct93lx/ocd-builddir-20240129-673052-16c54jm/routes.yaml] appears to depend on catalog compilation directory. Suppressed from results.
W, [2024-01-29T09:48:21.559966 #673046]  WARN -- : Resource key Ini_setting[puppetdbserver_urls] parameters => path may depend on catalog compilation directory, but there may be differences. This is included in results for now, but please verify.
W, [2024-01-29T09:48:21.559981 #673046]  WARN -- : Resource key Ini_setting[puppetdbserver_urls] parameters => path appears to depend on catalog compilation directory. Suppressed from results.
W, [2024-01-29T09:48:21.560011 #673046]  WARN -- : Resource key Ini_setting[soft_write_failure] parameters => path may depend on catalog compilation directory, but there may be differences. This is included in results for now, but please verify.
W, [2024-01-29T09:48:21.560021 #673046]  WARN -- : Resource key Ini_setting[soft_write_failure] parameters => path appears to depend on catalog compilation directory. Suppressed from results.
I, [2024-01-29T09:48:21.560137 #673046]  INFO -- : Diffs computed for pergamon.softwareheritage.org
diff origin/production/pergamon.softwareheritage.org current/pergamon.softwareheritage.org
*******************************************
  Concat_fragment[profile::cron::statusio_scn_metrics] =>
   parameters =>
     content =>
      @@ -1,2 +1,2 @@
       # Cron snippet statusio_scn_metrics
      -*/5 * * * * root chronic sh -c '/usr/local/bin/statusio_export_archive_counters.py -m swh_web_accepted_save_requests --api-id status_io::api_id --api-key status_io::api_key --status-page-id status_io::status_page --metric-id status_io::metrics::scn -f environment=production -f "load_task_status=~scheduled|not_yet_scheduled" -f instance=moma.internal.softwareheritage.org'__
      +*/5 * * * * root chronic sh -c '/usr/local/bin/statusio_export_archive_counters.py --api-id status_io::api_id --api-key status_io::api_key --status-page-id status_io::status_page --metric-id status_io::metrics::scn -q "sum (max by (load_task_status) (swh_web_accepted_save_requests{environment="production", load_task_status=~"pending|scheduled|not_yet_scheduled"}))" -s thanos.internal.admin.swh.network -p 19191'__
*******************************************
  File[/etc/bind/keys/local-update] =>
   parameters =>
     content =>
      @@ -2,4 +2,4 @@
       key local-update {
        algorithm hmac-sha256;
      - secret "0nYmgHmmAjpiH96vPDV8/PujaHgyVGQ/3yN/4QZeDGJvO5Gh8xQTwr+IXwVelCqPqDnfvhF37LkEAPrwxutT7w==";
      + secret "jUU2ArBP0s0yfm06aGkvx2E7OLpfrOtgrsHJ3DMUzneCpbzwBRBNIfI05GHli2hcnjsnyrRJrhD2yChYdXm+eg==";
       };
*******************************************
  File[/etc/bind/rndc.key] =>
   parameters =>
     content =>
      @@ -2,4 +2,4 @@
       key rndc-key {
        algorithm hmac-md5;
      - secret "6WCLMWnCIcaIf0HbKESBEPX6IMBaDWHbwL4f0dzlRyuvS2oZXr7bEAyCclg8esqC+3ctOVN5tBDJRKfOgWdGWA==";
      + secret "q79E6IvRibFbRWpfSSuA0FlmkncQtP6QX9P6utydjUrskZZK7bPtSB+mo0YbVobMzqyGNlCwn6qVRZuaeGQzDg==";
       };
*******************************************
  File[/usr/local/bin/statusio_export_archive_counters.py] =>
   parameters =>
     content =>
      @@ -3,5 +3,5 @@
      _
       # python3 update_metrics.py -m swh_web_accepted_save_requests --api-id 1234 --api-key 456 --status-page-id 123 \
      -#   --metric-id 456 -f environment="production" -f "load_task_status=~scheduled|not_yet_scheduled" -f instance=moma.internal.softwareheritage.org
      +#   --metric-id 456 -f environment="production" -f "load_task_status=~scheduled|not_yet_scheduled|pending" -f instance=thanos.internal.softwareheritage.org
       import statusio
       import requests
      @@ -23,30 +23,17 @@
      _
      _
      -def escape_filter(filter: str) -> str:
      -    if "=~" in filter:
      -        separator = "=~"
      -    else:
      -        separator = "="
      -
      -    terms = filter.split(separator)
      -
      -    return f'{terms[0]}{separator}"{terms[1]}"'
      -
      _
       def get_prometheus_values(
           prometheus_url: str,
      -    metric: str,
      -    filters: List[str],
      +    query: str,
           start: int,
           end: int,
           interval: int,
       ) -> List[List]:
      -    escaped_filters = [escape_filter(filter) for filter in filters]
      -
      -    metric_filters = ",".join(escaped_filters)
      _
      -    url = f"{prometheus_url}?query=sum({metric}{{{metric_filters}}})&start={start}&end={end}&step={interval}"
      +    url = f"{prometheus_url}?query={query}&start={start}&end={end}&step={interval}"
      _
           response = requests.get(url)
      +
           if response.ok == False:
               raise ValueError(f"Unable to get prometheus metrics: {response.text}")
      @@ -83,14 +70,8 @@
       )
       @click.option(
      -    "--prometheus-metric",
      -    "-m",
      +    "--prometheus-query",
      +    "-q",
           required=True,
      -    help="Prometheus metric to query",
      -)
      -@click.option(
      -    "--prometheus-filter",
      -    "-f",
      -    multiple=True,
      -    help="Prometheus metric to query",
      +    help="Prometheus query to select the metrics",
       )
       @click.option(
      @@ -117,6 +98,5 @@
           prometheus_server: str,
           prometheus_port: int,
      -    prometheus_metric: str,
      -    prometheus_filter: List[str],
      +    prometheus_query: str,
           api_id: str,
           api_key: str,
      @@ -140,6 +120,5 @@
           raw_values = get_prometheus_values(
               prometheus_url,
      -        prometheus_metric,
      -        prometheus_filter,
      +        prometheus_query,
               day_start.timestamp(),
               current_time.timestamp(),
      @@ -152,6 +131,5 @@
           raw_values = get_prometheus_values(
               prometheus_url,
      -        prometheus_metric,
      -        prometheus_filter,
      +        prometheus_query,
               week_start.timestamp(),
               current_time.timestamp(),
      @@ -164,6 +142,5 @@
           raw_values = get_prometheus_values(
               prometheus_url,
      -        prometheus_metric,
      -        prometheus_filter,
      +        prometheus_query,
               month_start.timestamp(),
               current_time.timestamp(),
      @@ -191,5 +168,5 @@
           )
      _
      -    # this line will be sent by email via cron_
      +    # this line will be sent by email via cron
           # if the return code is not 0
           print(result)
*******************************************
  Profile::Cron::D[statusio_scn_metrics] =>
   parameters =>
     command =>
      - chronic sh -c '/usr/local/bin/statusio_export_archive_counters.py -m swh_web_accepted_save_requests --api-id status_io::api_id --api-key status_io::api_key --status-page-id status_io::status_page --metric-id status_io::metrics::scn -f environment=production -f "load_task_status=~scheduled|not_yet_scheduled" -f instance=moma.internal.softwareheritage.org'__
      + chronic sh -c '/usr/local/bin/statusio_export_archive_counters.py --api-id status_io::api_id --api-key status_io::api_key --status-page-id status_io::status_page --metric-id status_io::metrics::scn -q "sum (max by (load_task_status) (swh_web_accepted_save_requests{environment="production", load_task_status=~"pending|scheduled|not_yet_scheduled"}))" -s thanos.internal.admin.swh.network -p 19191'__
*******************************************
*** End octocatalog-diff on pergamon.softwareheritage.org

Merge request reports