Add alerts on stale scheduler's recurrent lister tasks
3 kinds of periodic recurring lister tasks on production:
- incremental: 1 day
- full: 7 days or 90 days
This is the equivalent of what's been tried in swh-charts [1]. But that could not be functional without too much work on the scraping being done in the cluster. That's gonna be done eventually but not today.
[1] swh/infra/ci-cd/swh-charts!341 (closed)
octo-diff pergamon
diff origin/production/pergamon.softwareheritage.org current/pergamon.softwareheritage.org
*******************************************
+ Concat::Fragment[icinga2::object::Service::production: Scheduler recurrent lister stale tasks (period: 1 day)] =>
parameters =>
"content": "\nobject Service \"production: Scheduler recurrent lister stale ...
"order": 60,
"target": "/etc/icinga2/conf.d/static-checks.conf"
*******************************************
+ Concat::Fragment[icinga2::object::Service::production: Scheduler recurrent lister stale tasks (period: 7 days)] =>
parameters =>
"content": "\nobject Service \"production: Scheduler recurrent lister stale ...
"order": 60,
"target": "/etc/icinga2/conf.d/static-checks.conf"
*******************************************
+ Concat::Fragment[icinga2::object::Service::production: Scheduler recurrent lister stale tasks (period: 90 days)] =>
parameters =>
"content": "\nobject Service \"production: Scheduler recurrent lister stale ...
"order": 60,
"target": "/etc/icinga2/conf.d/static-checks.conf"
*******************************************
+ Concat::Fragment[icinga2::object::Service::staging: Scheduler recurrent lister stale tasks (period: 1 day)] =>
parameters =>
"content": "\nobject Service \"staging: Scheduler recurrent lister stale tas...
"order": 60,
"target": "/etc/icinga2/conf.d/static-checks.conf"
*******************************************
+ Concat::Fragment[icinga2::object::Service::staging: Scheduler recurrent lister stale tasks (period: 7 days)] =>
parameters =>
"content": "\nobject Service \"staging: Scheduler recurrent lister stale tas...
"order": 60,
"target": "/etc/icinga2/conf.d/static-checks.conf"
*******************************************
+ Concat::Fragment[icinga2::object::Service::staging: Scheduler recurrent lister stale tasks (period: 90 days)] =>
parameters =>
"content": "\nobject Service \"staging: Scheduler recurrent lister stale tas...
"order": 60,
"target": "/etc/icinga2/conf.d/static-checks.conf"
*******************************************
+ Concat_fragment[icinga2::object::Service::production: Scheduler recurrent lister stale tasks (period: 1 day)] =>
parameters =>
"content": "\nobject Service \"production: Scheduler recurrent lister stale ...
"order": 60,
"tag": "_etc_icinga2_conf.d_static-checks.conf",
"target": "/etc/icinga2/conf.d/static-checks.conf"
*******************************************
+ Concat_fragment[icinga2::object::Service::production: Scheduler recurrent lister stale tasks (period: 7 days)] =>
parameters =>
"content": "\nobject Service \"production: Scheduler recurrent lister stale ...
"order": 60,
"tag": "_etc_icinga2_conf.d_static-checks.conf",
"target": "/etc/icinga2/conf.d/static-checks.conf"
*******************************************
+ Concat_fragment[icinga2::object::Service::production: Scheduler recurrent lister stale tasks (period: 90 days)] =>
parameters =>
"content": "\nobject Service \"production: Scheduler recurrent lister stale ...
"order": 60,
"tag": "_etc_icinga2_conf.d_static-checks.conf",
"target": "/etc/icinga2/conf.d/static-checks.conf"
*******************************************
+ Concat_fragment[icinga2::object::Service::staging: Scheduler recurrent lister stale tasks (period: 1 day)] =>
parameters =>
"content": "\nobject Service \"staging: Scheduler recurrent lister stale tas...
"order": 60,
"tag": "_etc_icinga2_conf.d_static-checks.conf",
"target": "/etc/icinga2/conf.d/static-checks.conf"
*******************************************
+ Concat_fragment[icinga2::object::Service::staging: Scheduler recurrent lister stale tasks (period: 7 days)] =>
parameters =>
"content": "\nobject Service \"staging: Scheduler recurrent lister stale tas...
"order": 60,
"tag": "_etc_icinga2_conf.d_static-checks.conf",
"target": "/etc/icinga2/conf.d/static-checks.conf"
*******************************************
+ Concat_fragment[icinga2::object::Service::staging: Scheduler recurrent lister stale tasks (period: 90 days)] =>
parameters =>
"content": "\nobject Service \"staging: Scheduler recurrent lister stale tas...
"order": 60,
"tag": "_etc_icinga2_conf.d_static-checks.conf",
"target": "/etc/icinga2/conf.d/static-checks.conf"
*******************************************
File[/etc/bind/keys/local-update] =>
parameters =>
content =>
@@ -2,4 +2,4 @@
key local-update {
algorithm hmac-sha256;
- secret "UBnxzvjkp8jYHlPA1MUYv2xRnPQ7JRdWEm3jt+4orACsBzQPX2UFw92rN5JxeYt19x0o+fE8sujqU8fPHn04EA==";
+ secret "2V8hs2cGlOaZp35bCCLa9u7OnM434pI7QltDj3YYIGKWz5c5v2R/KVIJRfJJRvLXPiu+csCtun5fBder/y+UDw==";
};
*******************************************
File[/etc/bind/rndc.key] =>
parameters =>
content =>
@@ -2,4 +2,4 @@
key rndc-key {
algorithm hmac-md5;
- secret "TO+MWC8mJvDTYW3qgtrs1rArWjDMTdDjXxf/a2+VoXvdPqnx0HLHfrrwIAFp4xZyKyM9F5f7Ak6G2JbAra/Imw==";
+ secret "Tp6y4jWOeylH+YxZBr+zqgXc33QdM0QMUwPmDHoPlSiT5ICjmG7StnsEB8e9rg3AaHxpvs7mrRU2bX/cNbjFaw==";
};
*******************************************
+ Icinga2::Object::Service[production: Scheduler recurrent lister stale tasks (period: 1 day)] =>
parameters =>
"apply": false,
"assign": [
],
"check_command": "check_prometheus_metric",
"ensure": "present",
"export_to": [
],
"host_name": "albertina.internal.softwareheritage.org",
"ignore": [
],
"import": [
],
"order": 60,
"prefix": false,
"service_name": "production: Scheduler recurrent lister stale tasks (period:...
"target": "/etc/icinga2/conf.d/static-checks.conf",
"template": false,
"vars": {
"prometheus_metric_name": "production: Scheduler recurrent lister stale ta...
"prometheus_query": "-:\"histogram_quantile(0.1, sum(sql_swh_scheduler_del...
"prometheus_query_type": "vector",
"prometheus_metric_warning": 172800,
"prometheus_metric_critical": 259200
}
*******************************************
+ Icinga2::Object::Service[production: Scheduler recurrent lister stale tasks (period: 7 days)] =>
parameters =>
"apply": false,
"assign": [
],
"check_command": "check_prometheus_metric",
"ensure": "present",
"export_to": [
],
"host_name": "albertina.internal.softwareheritage.org",
"ignore": [
],
"import": [
],
"order": 60,
"prefix": false,
"service_name": "production: Scheduler recurrent lister stale tasks (period:...
"target": "/etc/icinga2/conf.d/static-checks.conf",
"template": false,
"vars": {
"prometheus_metric_name": "production: Scheduler recurrent lister stale ta...
"prometheus_query": "-:\"histogram_quantile(0.1, sum(sql_swh_scheduler_del...
"prometheus_query_type": "vector",
"prometheus_metric_warning": 691200,
"prometheus_metric_critical": 777600
}
*******************************************
+ Icinga2::Object::Service[production: Scheduler recurrent lister stale tasks (period: 90 days)] =>
parameters =>
"apply": false,
"assign": [
],
"check_command": "check_prometheus_metric",
"ensure": "present",
"export_to": [
],
"host_name": "albertina.internal.softwareheritage.org",
"ignore": [
],
"import": [
],
"order": 60,
"prefix": false,
"service_name": "production: Scheduler recurrent lister stale tasks (period:...
"target": "/etc/icinga2/conf.d/static-checks.conf",
"template": false,
"vars": {
"prometheus_metric_name": "production: Scheduler recurrent lister stale ta...
"prometheus_query": "-:\"histogram_quantile(0.1, sum(sql_swh_scheduler_del...
"prometheus_query_type": "vector",
"prometheus_metric_warning": 7862400,
"prometheus_metric_critical": 7948800
}
*******************************************
+ Icinga2::Object::Service[staging: Scheduler recurrent lister stale tasks (period: 1 day)] =>
parameters =>
"apply": false,
"assign": [
],
"check_command": "check_prometheus_metric",
"ensure": "present",
"export_to": [
],
"host_name": "db1.internal.staging.swh.network",
"ignore": [
],
"import": [
],
"order": 60,
"prefix": false,
"service_name": "staging: Scheduler recurrent lister stale tasks (period: 1 ...
"target": "/etc/icinga2/conf.d/static-checks.conf",
"template": false,
"vars": {
"prometheus_metric_name": "staging: Scheduler recurrent lister stale tasks...
"prometheus_query": "-:\"histogram_quantile(0.1, sum(sql_swh_scheduler_del...
"prometheus_query_type": "vector",
"prometheus_metric_warning": 172800,
"prometheus_metric_critical": 259200
}
*******************************************
+ Icinga2::Object::Service[staging: Scheduler recurrent lister stale tasks (period: 7 days)] =>
parameters =>
"apply": false,
"assign": [
],
"check_command": "check_prometheus_metric",
"ensure": "present",
"export_to": [
],
"host_name": "db1.internal.staging.swh.network",
"ignore": [
],
"import": [
],
"order": 60,
"prefix": false,
"service_name": "staging: Scheduler recurrent lister stale tasks (period: 7 ...
"target": "/etc/icinga2/conf.d/static-checks.conf",
"template": false,
"vars": {
"prometheus_metric_name": "staging: Scheduler recurrent lister stale tasks...
"prometheus_query": "-:\"histogram_quantile(0.1, sum(sql_swh_scheduler_del...
"prometheus_query_type": "vector",
"prometheus_metric_warning": 691200,
"prometheus_metric_critical": 777600
}
*******************************************
+ Icinga2::Object::Service[staging: Scheduler recurrent lister stale tasks (period: 90 days)] =>
parameters =>
"apply": false,
"assign": [
],
"check_command": "check_prometheus_metric",
"ensure": "present",
"export_to": [
],
"host_name": "db1.internal.staging.swh.network",
"ignore": [
],
"import": [
],
"order": 60,
"prefix": false,
"service_name": "staging: Scheduler recurrent lister stale tasks (period: 90...
"target": "/etc/icinga2/conf.d/static-checks.conf",
"template": false,
"vars": {
"prometheus_metric_name": "staging: Scheduler recurrent lister stale tasks...
"prometheus_query": "-:\"histogram_quantile(0.1, sum(sql_swh_scheduler_del...
"prometheus_query_type": "vector",
"prometheus_metric_warning": 7862400,
"prometheus_metric_critical": 7948800
}
*******************************************
+ Icinga2::Object[icinga2::object::Service::production: Scheduler recurrent lister stale tasks (period: 1 day)] =>
parameters =>
"apply": false,
"assign": [
],
"attrs": {
"host_name": "albertina.internal.softwareheritage.org",
"check_command": "check_prometheus_metric",
"vars": {
"prometheus_metric_name": "production: Scheduler recurrent lister stale ...
"prometheus_query": "-:\"histogram_quantile(0.1, sum(sql_swh_scheduler_d...
"prometheus_query_type": "vector",
"prometheus_metric_warning": 172800,
"prometheus_metric_critical": 259200
}
},
"attrs_list": [
"display_name",
"host_name",
"check_command",
"check_timeout",
"check_interval",
"check_period",
"retry_interval",
"max_check_attempts",
"groups",
"enable_notifications",
"enable_active_checks",
"enable_passive_checks",
"enable_event_handler",
"enable_flapping",
"enable_perfdata",
"event_command",
"flapping_threshold_low",
"flapping_threshold_high",
"volatile",
"zone",
"command_endpoint",
"notes",
"notes_url",
"action_url",
"icon_image",
"icon_image_alt",
"vars"
],
"ensure": "present",
"ignore": [
],
"import": [
],
"object_name": "production: Scheduler recurrent lister stale tasks (period: ...
"object_type": "Service",
"order": 60,
"prefix": false,
"target": "/etc/icinga2/conf.d/static-checks.conf",
"template": false
*******************************************
+ Icinga2::Object[icinga2::object::Service::production: Scheduler recurrent lister stale tasks (period: 7 days)] =>
parameters =>
"apply": false,
"assign": [
],
"attrs": {
"host_name": "albertina.internal.softwareheritage.org",
"check_command": "check_prometheus_metric",
"vars": {
"prometheus_metric_name": "production: Scheduler recurrent lister stale ...
"prometheus_query": "-:\"histogram_quantile(0.1, sum(sql_swh_scheduler_d...
"prometheus_query_type": "vector",
"prometheus_metric_warning": 691200,
"prometheus_metric_critical": 777600
}
},
"attrs_list": [
"display_name",
"host_name",
"check_command",
"check_timeout",
"check_interval",
"check_period",
"retry_interval",
"max_check_attempts",
"groups",
"enable_notifications",
"enable_active_checks",
"enable_passive_checks",
"enable_event_handler",
"enable_flapping",
"enable_perfdata",
"event_command",
"flapping_threshold_low",
"flapping_threshold_high",
"volatile",
"zone",
"command_endpoint",
"notes",
"notes_url",
"action_url",
"icon_image",
"icon_image_alt",
"vars"
],
"ensure": "present",
"ignore": [
],
"import": [
],
"object_name": "production: Scheduler recurrent lister stale tasks (period: ...
"object_type": "Service",
"order": 60,
"prefix": false,
"target": "/etc/icinga2/conf.d/static-checks.conf",
"template": false
*******************************************
+ Icinga2::Object[icinga2::object::Service::production: Scheduler recurrent lister stale tasks (period: 90 days)] =>
parameters =>
"apply": false,
"assign": [
],
"attrs": {
"host_name": "albertina.internal.softwareheritage.org",
"check_command": "check_prometheus_metric",
"vars": {
"prometheus_metric_name": "production: Scheduler recurrent lister stale ...
"prometheus_query": "-:\"histogram_quantile(0.1, sum(sql_swh_scheduler_d...
"prometheus_query_type": "vector",
"prometheus_metric_warning": 7862400,
"prometheus_metric_critical": 7948800
}
},
"attrs_list": [
"display_name",
"host_name",
"check_command",
"check_timeout",
"check_interval",
"check_period",
"retry_interval",
"max_check_attempts",
"groups",
"enable_notifications",
"enable_active_checks",
"enable_passive_checks",
"enable_event_handler",
"enable_flapping",
"enable_perfdata",
"event_command",
"flapping_threshold_low",
"flapping_threshold_high",
"volatile",
"zone",
"command_endpoint",
"notes",
"notes_url",
"action_url",
"icon_image",
"icon_image_alt",
"vars"
],
"ensure": "present",
"ignore": [
],
"import": [
],
"object_name": "production: Scheduler recurrent lister stale tasks (period: ...
"object_type": "Service",
"order": 60,
"prefix": false,
"target": "/etc/icinga2/conf.d/static-checks.conf",
"template": false
*******************************************
+ Icinga2::Object[icinga2::object::Service::staging: Scheduler recurrent lister stale tasks (period: 1 day)] =>
parameters =>
"apply": false,
"assign": [
],
"attrs": {
"host_name": "db1.internal.staging.swh.network",
"check_command": "check_prometheus_metric",
"vars": {
"prometheus_metric_name": "staging: Scheduler recurrent lister stale tas...
"prometheus_query": "-:\"histogram_quantile(0.1, sum(sql_swh_scheduler_d...
"prometheus_query_type": "vector",
"prometheus_metric_warning": 172800,
"prometheus_metric_critical": 259200
}
},
"attrs_list": [
"display_name",
"host_name",
"check_command",
"check_timeout",
"check_interval",
"check_period",
"retry_interval",
"max_check_attempts",
"groups",
"enable_notifications",
"enable_active_checks",
"enable_passive_checks",
"enable_event_handler",
"enable_flapping",
"enable_perfdata",
"event_command",
"flapping_threshold_low",
"flapping_threshold_high",
"volatile",
"zone",
"command_endpoint",
"notes",
"notes_url",
"action_url",
"icon_image",
"icon_image_alt",
"vars"
],
"ensure": "present",
"ignore": [
],
"import": [
],
"object_name": "staging: Scheduler recurrent lister stale tasks (period: 1 d...
"object_type": "Service",
"order": 60,
"prefix": false,
"target": "/etc/icinga2/conf.d/static-checks.conf",
"template": false
*******************************************
+ Icinga2::Object[icinga2::object::Service::staging: Scheduler recurrent lister stale tasks (period: 7 days)] =>
parameters =>
"apply": false,
"assign": [
],
"attrs": {
"host_name": "db1.internal.staging.swh.network",
"check_command": "check_prometheus_metric",
"vars": {
"prometheus_metric_name": "staging: Scheduler recurrent lister stale tas...
"prometheus_query": "-:\"histogram_quantile(0.1, sum(sql_swh_scheduler_d...
"prometheus_query_type": "vector",
"prometheus_metric_warning": 691200,
"prometheus_metric_critical": 777600
}
},
"attrs_list": [
"display_name",
"host_name",
"check_command",
"check_timeout",
"check_interval",
"check_period",
"retry_interval",
"max_check_attempts",
"groups",
"enable_notifications",
"enable_active_checks",
"enable_passive_checks",
"enable_event_handler",
"enable_flapping",
"enable_perfdata",
"event_command",
"flapping_threshold_low",
"flapping_threshold_high",
"volatile",
"zone",
"command_endpoint",
"notes",
"notes_url",
"action_url",
"icon_image",
"icon_image_alt",
"vars"
],
"ensure": "present",
"ignore": [
],
"import": [
],
"object_name": "staging: Scheduler recurrent lister stale tasks (period: 7 d...
"object_type": "Service",
"order": 60,
"prefix": false,
"target": "/etc/icinga2/conf.d/static-checks.conf",
"template": false
*******************************************
+ Icinga2::Object[icinga2::object::Service::staging: Scheduler recurrent lister stale tasks (period: 90 days)] =>
parameters =>
"apply": false,
"assign": [
],
"attrs": {
"host_name": "db1.internal.staging.swh.network",
"check_command": "check_prometheus_metric",
"vars": {
"prometheus_metric_name": "staging: Scheduler recurrent lister stale tas...
"prometheus_query": "-:\"histogram_quantile(0.1, sum(sql_swh_scheduler_d...
"prometheus_query_type": "vector",
"prometheus_metric_warning": 7862400,
"prometheus_metric_critical": 7948800
}
},
"attrs_list": [
"display_name",
"host_name",
"check_command",
"check_timeout",
"check_interval",
"check_period",
"retry_interval",
"max_check_attempts",
"groups",
"enable_notifications",
"enable_active_checks",
"enable_passive_checks",
"enable_event_handler",
"enable_flapping",
"enable_perfdata",
"event_command",
"flapping_threshold_low",
"flapping_threshold_high",
"volatile",
"zone",
"command_endpoint",
"notes",
"notes_url",
"action_url",
"icon_image",
"icon_image_alt",
"vars"
],
"ensure": "present",
"ignore": [
],
"import": [
],
"object_name": "staging: Scheduler recurrent lister stale tasks (period: 90 ...
"object_type": "Service",
"order": 60,
"prefix": false,
"target": "/etc/icinga2/conf.d/static-checks.conf",
"template": false
*******************************************
*** End octocatalog-diff on pergamon.softwareheritage.org
Edited by Antoine R. Dumont