From 0ec2231d69292b2a0a522396f72d6777d0fd7dc0 Mon Sep 17 00:00:00 2001 From: Vincent SELLIER <vincent.sellier@softwareheritage.org> Date: Wed, 10 Nov 2021 12:36:21 +0100 Subject: [PATCH] elk: upgrade journalbeat and filebeat - Remove the journalbeat 5.5.0 service configuration - remove the journalbeat user not needed anymore - deploy the elasticsearch packages (journalbeat and filebeat) - cleanup the temporary configuration - cleanup the old cursor position, when the version will be updated the beginning of the journal will be reingested - update the logstash configuration to support the messages from the old journalbeat version and the new version in parallel - allow to override the beat version for node with an elasticsearch version specified (swh-search) - upate the `check_journal` script to use the new registry file to check the cursor position - Automatically declare the index template from the logstash host Related to T3705 --- data/common/common.yaml | 2 +- data/deployments/staging/common.yaml | 2 - ...esnode1.internal.softwareheritage.org.yaml | 1 - ...esnode2.internal.softwareheritage.org.yaml | 1 - ...esnode3.internal.softwareheritage.org.yaml | 1 - ...-esnode0.internal.staging.swh.network.yaml | 3 + ...esnode4.internal.softwareheritage.org.yaml | 1 + ...esnode5.internal.softwareheritage.org.yaml | 1 + ...esnode6.internal.softwareheritage.org.yaml | 1 + .../files/icinga2/plugins/check_journal | 42 ++++++++-- .../journalbeat/manage_index_template.sh | 40 ++++++++++ site-modules/profile/manifests/filebeat.pp | 3 +- .../journalbeat/index_template_manager.pp | 41 ++++++++++ .../manifests/systemd_journal/journalbeat.pp | 80 ++++++++++--------- .../templates/logstash/filter.conf.erb | 20 ++++- .../journalbeat/journalbeat.conf.erb | 6 ++ .../journalbeat/journalbeat.service.erb | 19 ----- .../journalbeat/journalbeat.yml.erb | 14 ++-- .../role/manifests/swh_logstash_instance.pp | 2 + 19 files changed, 201 insertions(+), 79 deletions(-) create mode 100644 site-modules/profile/files/journalbeat/manage_index_template.sh create mode 100644 site-modules/profile/manifests/journalbeat/index_template_manager.pp create mode 100644 site-modules/profile/templates/systemd_journal/journalbeat/journalbeat.conf.erb delete mode 100644 site-modules/profile/templates/systemd_journal/journalbeat/journalbeat.service.erb diff --git a/data/common/common.yaml b/data/common/common.yaml index 2c36a2c60..0fa319455 100644 --- a/data/common/common.yaml +++ b/data/common/common.yaml @@ -3251,7 +3251,7 @@ swh::postgres::service::dbs: port: "%{hiera('swh::deploy::db::pgbouncer::port')}" user: guest -elastic::elk_version: '7.8.0' +elastic::elk_version: '7.15.2' elasticsearch::hosts: - http://esnode1.internal.softwareheritage.org:9200 diff --git a/data/deployments/staging/common.yaml b/data/deployments/staging/common.yaml index 81f9d630d..2420175e1 100644 --- a/data/deployments/staging/common.yaml +++ b/data/deployments/staging/common.yaml @@ -289,8 +289,6 @@ apache::default_vhost: false # Elasticsearch -elastic::elk_version: '7.9.3' - elasticsearch::config::cluster::name: swh-search elasticsearch::config::discovery::seed_hosts: diff --git a/data/hostname/esnode1.internal.softwareheritage.org.yaml b/data/hostname/esnode1.internal.softwareheritage.org.yaml index 82bde7ba9..784b5caf3 100644 --- a/data/hostname/esnode1.internal.softwareheritage.org.yaml +++ b/data/hostname/esnode1.internal.softwareheritage.org.yaml @@ -11,6 +11,5 @@ swh::apt_config::backported_packages: - zfsutils-linux - zfs-zed -elastic::elk_version: '7.15.2' elasticsearch::config::extras: xpack.security.enabled: false diff --git a/data/hostname/esnode2.internal.softwareheritage.org.yaml b/data/hostname/esnode2.internal.softwareheritage.org.yaml index 82bde7ba9..784b5caf3 100644 --- a/data/hostname/esnode2.internal.softwareheritage.org.yaml +++ b/data/hostname/esnode2.internal.softwareheritage.org.yaml @@ -11,6 +11,5 @@ swh::apt_config::backported_packages: - zfsutils-linux - zfs-zed -elastic::elk_version: '7.15.2' elasticsearch::config::extras: xpack.security.enabled: false diff --git a/data/hostname/esnode3.internal.softwareheritage.org.yaml b/data/hostname/esnode3.internal.softwareheritage.org.yaml index 82bde7ba9..784b5caf3 100644 --- a/data/hostname/esnode3.internal.softwareheritage.org.yaml +++ b/data/hostname/esnode3.internal.softwareheritage.org.yaml @@ -11,6 +11,5 @@ swh::apt_config::backported_packages: - zfsutils-linux - zfs-zed -elastic::elk_version: '7.15.2' elasticsearch::config::extras: xpack.security.enabled: false diff --git a/data/hostname/search-esnode0.internal.staging.swh.network.yaml b/data/hostname/search-esnode0.internal.staging.swh.network.yaml index dc38a4d0c..39bbe90e7 100644 --- a/data/hostname/search-esnode0.internal.staging.swh.network.yaml +++ b/data/hostname/search-esnode0.internal.staging.swh.network.yaml @@ -4,6 +4,9 @@ networks: netmask: 255.255.255.0 gateway: 192.168.130.1 +elastic::elk_version: '7.9.3' +elastic::beat_version: '7.15.2' + swh::apt_config::enable_non_free: true swh::apt_config::backported_packages: buster: diff --git a/data/hostname/search-esnode4.internal.softwareheritage.org.yaml b/data/hostname/search-esnode4.internal.softwareheritage.org.yaml index bd6e1446e..713164673 100644 --- a/data/hostname/search-esnode4.internal.softwareheritage.org.yaml +++ b/data/hostname/search-esnode4.internal.softwareheritage.org.yaml @@ -12,6 +12,7 @@ swh::apt_config::backported_packages: - zfs-zed elastic::elk_version: '7.9.3' +elastic::beat_version: '7.15.2' # Elasticsearch elasticsearch::config::cluster::name: swh-search diff --git a/data/hostname/search-esnode5.internal.softwareheritage.org.yaml b/data/hostname/search-esnode5.internal.softwareheritage.org.yaml index bd6e1446e..713164673 100644 --- a/data/hostname/search-esnode5.internal.softwareheritage.org.yaml +++ b/data/hostname/search-esnode5.internal.softwareheritage.org.yaml @@ -12,6 +12,7 @@ swh::apt_config::backported_packages: - zfs-zed elastic::elk_version: '7.9.3' +elastic::beat_version: '7.15.2' # Elasticsearch elasticsearch::config::cluster::name: swh-search diff --git a/data/hostname/search-esnode6.internal.softwareheritage.org.yaml b/data/hostname/search-esnode6.internal.softwareheritage.org.yaml index bd6e1446e..713164673 100644 --- a/data/hostname/search-esnode6.internal.softwareheritage.org.yaml +++ b/data/hostname/search-esnode6.internal.softwareheritage.org.yaml @@ -12,6 +12,7 @@ swh::apt_config::backported_packages: - zfs-zed elastic::elk_version: '7.9.3' +elastic::beat_version: '7.15.2' # Elasticsearch elasticsearch::config::cluster::name: swh-search diff --git a/site-modules/profile/files/icinga2/plugins/check_journal b/site-modules/profile/files/icinga2/plugins/check_journal index af2cc8aab..d848a10e8 100644 --- a/site-modules/profile/files/icinga2/plugins/check_journal +++ b/site-modules/profile/files/icinga2/plugins/check_journal @@ -25,6 +25,7 @@ import argparse import logging +import yaml import nagiosplugin from nagiosplugin import ScalarContext @@ -61,8 +62,10 @@ class BooleanContext(nagiosplugin.Context): class JournalLag(nagiosplugin.Resource): """Check journal lag""" - def __init__(self, cursorfile): + def __init__(self, cursorfile, registryfile, registryentry): self.cursorfile = cursorfile + self.registryfile = registryfile + self.registryentry = registryentry def parse_cursor(self, cursor): """Parse a journald cursor entry""" @@ -79,17 +82,42 @@ class JournalLag(nagiosplugin.Resource): return ret - def get_file_journal_cursor(self): - _log.info("querying the journal cursor cache file %s" % self.cursorfile) + def get_legacy_journal_cursor(self): + _log.info("querying the journal cursor cache file %s", self.cursorfile) try: with open(self.cursorfile, 'r') as f: ret = f.read().strip() except OSError as e: - raise nagiosplugin.CheckError("failed to read journal cursor file: %s" % e) + _log.warning("failed to read the legacy journal cursor file %s", self.cursorfile) else: _log.debug("current journal cursor: %s" % ret) return ret + def get_journal_cursor_from_registry(self): + _log.info("querying the journal cursor %s from registry file %s", self.registryentry, self.registryfile) + try: + with open(self.registryfile, 'r') as f: + registrycontent = yaml.safe_load(f) + except OSError as e: + _log.warning("failed to read the journal registry file %s", self.registryfile) + else: + entry = [e for e in registrycontent['journal_entries'] if e['path'] == self.registryentry] + cursor = entry[0]["cursor"] + _log.debug("current journal cursor: %s" % cursor) + return cursor + + def get_file_journal_cursor(self): + cursor = self.get_journal_cursor_from_registry() + + if not cursor: + _log.debug("Unable to read cursor position from the registry, fallback on the legacy file") + cursor = self.get_legacy_journal_cursor() + + if not cursor: + raise nagiosplugin.CheckError("failed to get cursor from registry or legacy file") + + return cursor + def get_system_journal_cursor(self): _log.info("querying the system journal for the current cursor") reader = systemd.journal.Reader() @@ -136,6 +164,10 @@ def main(): help='increase output verbosity (use up to 3 times)') argp.add_argument('-f', '--file', metavar='FILE', default='/var/lib/journalbeat/cursor-state', help='read journald cursor state from this file') + argp.add_argument('-r', '--registry', metavar='FILE', default='/var/lib/journalbeat/registry', + help='read journald cursor state from this journalbeat registry file') + argp.add_argument('-e', '--entry', default='LOCAL_SYSTEM_JOURNAL', + help='Check this registry entry') argp.add_argument('-w', '--warning', metavar='RANGE', default='1200', help='return warning if temporal lag is outside RANGE') argp.add_argument('-c', '--critical', metavar='RANGE', default='3600', @@ -148,7 +180,7 @@ def main(): args = argp.parse_args() check = nagiosplugin.Check( - JournalLag(args.file), + JournalLag(args.file, args.registry, args.entry), BooleanContext('sameboot'), ScalarContext('lag_time', args.warning, args.critical), ScalarContext('lag_entries', args.warning_entries, args.critical_entries), diff --git a/site-modules/profile/files/journalbeat/manage_index_template.sh b/site-modules/profile/files/journalbeat/manage_index_template.sh new file mode 100644 index 000000000..0969d7814 --- /dev/null +++ b/site-modules/profile/files/journalbeat/manage_index_template.sh @@ -0,0 +1,40 @@ +#!/bin/bash -x +# +# File managed by puppet (class ::profile::journalbeat::index_template_manager), changes will be lost. + +# Generate the journalbeat index template and create it in elasticsearch +# Save the json in the /var/lib/journalbeat directory +# Params: +# - ES HOST +# - template name +# - index template +# Output: +# - /var/lib/journalbeat/<template name>.json +set -e + +if [ $# -ne 3 ]; then + echo "Usage: $0 <ES_URL> ><template name> <index pattern>" + echo "ex: $0 http://esnode1:9200 swh_workers-7.15.2 'swh_workers-7.15.2-*'" + exit 1 +fi + +ES_HOST=$1 +TEMPLATE_NAME=$2 +INDEX_PATTERN=$3 + +TEMPLATE_FILE="${TEMPLATE_NAME}.json" +JOURNAL_BEAT_HOME=/var/lib/journalbeat + +# generating +journalbeat export template \ + -E setup.ilm.enabled=false \ + -E setup.template.name="${TEMPLATE_NAME}" \ + -E setup.template.pattern="${INDEX_PATTERN}" > "/tmp/${TEMPLATE_FILE}" + + +curl -XPOST -H 'Content-Type: application/json' \ + "${ES_HOST}/_template/${TEMPLATE_NAME}" -d@"/tmp/${TEMPLATE_FILE}" + +mv /tmp/${TEMPLATE_FILE} ${JOURNAL_BEAT_HOME} + +exit 0 diff --git a/site-modules/profile/manifests/filebeat.pp b/site-modules/profile/manifests/filebeat.pp index fb158fa01..fcaf2fe80 100644 --- a/site-modules/profile/manifests/filebeat.pp +++ b/site-modules/profile/manifests/filebeat.pp @@ -7,7 +7,8 @@ class profile::filebeat { include ::profile::elastic::apt_config - $version = lookup('elastic::elk_version') + $default_elk_version = lookup('elastic::elk_version') + $version = lookup('elastic::beat_version', { default_value => $default_elk_version }) package { 'filebeat': ensure => $version, diff --git a/site-modules/profile/manifests/journalbeat/index_template_manager.pp b/site-modules/profile/manifests/journalbeat/index_template_manager.pp new file mode 100644 index 000000000..ad5879e7c --- /dev/null +++ b/site-modules/profile/manifests/journalbeat/index_template_manager.pp @@ -0,0 +1,41 @@ +# Ensure the index template of the current +# filebeat version is declared in elasticsearch +class profile::journalbeat::index_template_manager { + $default_elk_version = lookup('elastic::elk_version') + $version = lookup('elastic::beat_version', { default_value => $default_elk_version }) + + $template_management_script = '/usr/local/bin/manage_index_template.sh' + $journalbeat_home = '/var/lib/journalbeat' + $es_node = lookup('swh::elasticsearch::storage_nodes')[0] + $es_node_url = "${es_node['host']}:${es_node['port']}" + + $journalbeat_indexes = [ + 'systemlogs', + 'swh_workers', + ] + + file { $template_management_script: + ensure => present, + owner => 'root', + group => 'root', + mode => '0544', + source => 'puppet:///modules/profile/journalbeat/manage_index_template.sh', + } + + each($journalbeat_indexes) |$index| { + $template_name = "${index}-${version}" + $index_template = "${template_name}-*" + + exec {"check ${index} template": + command => "${template_management_script} ${es_node_url} ${template_name} ${index_template}", + cwd => '/usr/local/bin', + creates => "${journalbeat_home}/${index}-${version}.json", + user => 'root', + require => [ + Package['journalbeat'], + File[$template_management_script], + ], + before => [Service['journalbeat']] + } + } +} diff --git a/site-modules/profile/manifests/systemd_journal/journalbeat.pp b/site-modules/profile/manifests/systemd_journal/journalbeat.pp index 19b7412e1..dd2b44b85 100644 --- a/site-modules/profile/manifests/systemd_journal/journalbeat.pp +++ b/site-modules/profile/manifests/systemd_journal/journalbeat.pp @@ -1,51 +1,61 @@ # Journalbeat: a systemd journal collection beater for the ELK stack class profile::systemd_journal::journalbeat { $package = 'journalbeat' - $user = 'journalbeat' - $group = 'nogroup' - $homedir = '/var/lib/journalbeat' $configdir = '/etc/journalbeat' $configfile = "${configdir}/journalbeat.yml" $service = 'journalbeat' + $default_elk_version = lookup('elastic::elk_version') + $version = lookup('elastic::beat_version', { default_value => $default_elk_version }) $logstash_hosts = lookup('systemd_journal::logstash_hosts') - package {$package: - ensure => present + include ::profile::elastic::apt_config + + # cleanup + ::apt::pin {'swh-journalbeat': + ensure => absent, + } + -> ::apt::pin {'journalbeat': + explanation => 'Use the elk stack version for journalbeat', + packages => ['journalbeat'], + version => $version, + priority => 1001, + } + -> package {$package: + ensure => $version, } - user {$user: - ensure => present, - gid => $group, - groups => 'systemd-journal', - home => $homedir, - managehome => true, - system => true, + # To remove after complete migration to 7.15 + -> user {'journalbeat': # journalbeat needs to be stopped before trying to remove the user + ensure => absent, + managehome => false, } - # Uses variables - # - $user - # - $homedir - # - $configfile - # - ::systemd::unit_file {"${service}.service": + # cleanup pre 7.15 version + file {"/etc/systemd/system/${service}.service": + ensure => absent, + } + file {'/var/lib/journalbeat/cursor_state': + ensure => absent, + } + ::systemd::dropin_file { "${service}.conf": ensure => present, - content => template('profile/systemd_journal/journalbeat/journalbeat.service.erb'), + unit => "${service}.service", + content => template('profile/systemd_journal/journalbeat/journalbeat.conf.erb'), } ~> service {$service: - ensure => running, - enable => true, - require => [ + ensure => running, + enable => true, + require => [ Package[$package], File[$configfile], + ::Systemd::Dropin_file["${service}.conf"], + ], + subscribe => [ + Package[$package], + File[$configfile], + ::Systemd::Dropin_file["${service}.conf"], ], - } - - file {$configdir: - ensure => directory, - owner => 'root', - group => 'root', - mode => '0644', } # Uses variables @@ -57,16 +67,8 @@ class profile::systemd_journal::journalbeat { group => 'root', mode => '0644', content => template('profile/systemd_journal/journalbeat/journalbeat.yml.erb'), - notify => [ - Service[$service], - ], - } - - ::apt::pin {'swh-journalbeat': - explanation => 'Use journalbeat packages from Software Heritage', - packages => ['journalbeat'], - originator => 'softwareheritage', - priority => 990, + require => [Package[$package]], + notify => [Service[$service]], } profile::cron::d {'logrotate-journal': diff --git a/site-modules/profile/templates/logstash/filter.conf.erb b/site-modules/profile/templates/logstash/filter.conf.erb index afae8a2f6..238081665 100644 --- a/site-modules/profile/templates/logstash/filter.conf.erb +++ b/site-modules/profile/templates/logstash/filter.conf.erb @@ -17,15 +17,31 @@ filter { } } } else if "swh-worker@" in [systemd_unit] { + # Temporary rule to delete after complete migration to 7.15 mutate { add_field => { "[@metadata][target_index]" => "swh_workers-%{+YYYY.MM.dd}" } } - } else { + } else if "swh-worker@" in [systemd][unit] { mutate { add_field => { - "[@metadata][target_index]" => "systemlogs-%{+YYYY.MM.dd}" + "[@metadata][target_index]" => "swh_workers-%{[@metadata][version]}-%{+YYYY.MM.dd}" + } + } + } else { + if [@metadata][version] { + mutate { + add_field => { + "[@metadata][target_index]" => "systemlogs-%{[@metadata][version]}-%{+YYYY.MM.dd}" + } + } + } else { + # Temporary rule to delete after complete migration to 7.15 + mutate { + add_field => { + "[@metadata][target_index]" => "systemlogs-%{+YYYY.MM.dd}" + } } } } diff --git a/site-modules/profile/templates/systemd_journal/journalbeat/journalbeat.conf.erb b/site-modules/profile/templates/systemd_journal/journalbeat/journalbeat.conf.erb new file mode 100644 index 000000000..6049a5ccd --- /dev/null +++ b/site-modules/profile/templates/systemd_journal/journalbeat/journalbeat.conf.erb @@ -0,0 +1,6 @@ +# Managed by puppet (class profile::systemd_journal::journalbeat), changes will be lost + +[Service] +ReadOnlyDirectories=/ +ReadWriteDirectories=-/var/lib/journalbeat +WorkingDirectory=/var/lib/journalbeat diff --git a/site-modules/profile/templates/systemd_journal/journalbeat/journalbeat.service.erb b/site-modules/profile/templates/systemd_journal/journalbeat/journalbeat.service.erb deleted file mode 100644 index c9db293fa..000000000 --- a/site-modules/profile/templates/systemd_journal/journalbeat/journalbeat.service.erb +++ /dev/null @@ -1,19 +0,0 @@ -# Managed by puppet (class profile::systemd_journal::journalbeat), changes will be lost - -[Unit] -Description=Send systemd journal messages to logstash -After=nss-lookup.target - -[Service] -Type=simple -Restart=always -RestartSec=20s -ExecStart=/usr/bin/journalbeat -e -c <%= @configfile %> -User=<%= @user %> -Group=systemd-journal -ReadOnlyDirectories=/ -ReadWriteDirectories=-<%= @homedir %> -WorkingDirectory=<%= @homedir %> - -[Install] -WantedBy=multi-user.target diff --git a/site-modules/profile/templates/systemd_journal/journalbeat/journalbeat.yml.erb b/site-modules/profile/templates/systemd_journal/journalbeat/journalbeat.yml.erb index c82584b2c..497b5f863 100644 --- a/site-modules/profile/templates/systemd_journal/journalbeat/journalbeat.yml.erb +++ b/site-modules/profile/templates/systemd_journal/journalbeat/journalbeat.yml.erb @@ -1,6 +1,12 @@ #======================== Journalbeat Configuration ============================ journalbeat: + inputs: + # Paths that should be crawled and fetched. Possible values files and directories. + # When setting a directory, all journals under it are merged. + # When empty starts to read from local journal. + - paths: [] + # What position in journald to seek to at start up # options: cursor, tail, head (defaults to tail) seek_position: cursor @@ -10,12 +16,6 @@ journalbeat: # options: tail, head, none (defaults to tail) cursor_seek_fallback: head - # Store the cursor of the successfully published events - write_cursor_state: true - - # Path to the file to store the cursor (defaults to ".journalbeat-cursor-state") - cursor_state_file: cursor-state - # How frequently should we save the cursor to disk (defaults to 5s) #cursor_flush_period: 5s @@ -241,7 +241,7 @@ output.logstash: # distribution (for example, the sample dashboards). # If not set by a CLI flag or in the configuration file, the default for the # home path is the location of the binary. -path.home: <%= @homedir %> +# path.home: /var/lib/journalbeat # The configuration path for the beatname installation. This is the default # base path for configuration files, including the main YAML configuration file diff --git a/site-modules/role/manifests/swh_logstash_instance.pp b/site-modules/role/manifests/swh_logstash_instance.pp index 02ccadd65..b68166801 100644 --- a/site-modules/role/manifests/swh_logstash_instance.pp +++ b/site-modules/role/manifests/swh_logstash_instance.pp @@ -2,4 +2,6 @@ class role::swh_logstash_instance inherits role::swh_base { include profile::logstash # Logstash node elected to close indices to avoid unbalance the cluster include profile::elasticsearch::index_janitor + # manage the journalbeat index templates + include profile::journalbeat::index_template_manager } -- GitLab