From be6825729a4b5635e906dc6a3cf0a1d475a74388 Mon Sep 17 00:00:00 2001 From: Julian Jacobi Date: Sun, 17 Nov 2019 14:15:18 +0100 Subject: [PATCH 1/2] add collectd plugin for collecting running transcoder stats --- .../monitoring-client/tasks/collectd.yml | 16 +++ .../roles/monitoring-client/tasks/main.yml | 2 +- .../collectd/collectd.client.conf.j2 | 7 +- .../collectd/plugins/TranscodingUnits.py | 116 ++++++++++++++++++ 4 files changed, 139 insertions(+), 2 deletions(-) create mode 100644 ansible/roles/monitoring-client/templates/collectd/plugins/TranscodingUnits.py diff --git a/ansible/roles/monitoring-client/tasks/collectd.yml b/ansible/roles/monitoring-client/tasks/collectd.yml index c5f72e522..3d2975d2a 100644 --- a/ansible/roles/monitoring-client/tasks/collectd.yml +++ b/ansible/roles/monitoring-client/tasks/collectd.yml @@ -17,6 +17,15 @@ - install - config + - name: install pip packages for systemd exporter + pip: + name: + - pydbus + - collectd + state: present + when: transcoder_streams is defined or transcoder_lounges is defined + tags: install + - name: install collectd perl packages apt: name={{ packages|flatten(levels=1) }} state=latest vars: @@ -72,6 +81,13 @@ when: haproxy is defined and haproxy tags: install + - name: add systemd unit collectd plugin + template: + src: collectd/plugins/TranscodingUnits.py + dest: /opt/voc/collectd/plugins/TranscodingUnits.py + when: transcoder_streams is defined or transcoder_lounges is defined + tags: install + - name: adding entries for curl_json php to types.db lineinfile: dest=/opt/voc/collectd/types.db regexp="{{ item.regex }}" diff --git a/ansible/roles/monitoring-client/tasks/main.yml b/ansible/roles/monitoring-client/tasks/main.yml index 473bea469..6df2993a0 100644 --- a/ansible/roles/monitoring-client/tasks/main.yml +++ b/ansible/roles/monitoring-client/tasks/main.yml @@ -6,7 +6,7 @@ # Install packages - name: install packages needed to monitor host apt: - name: ['nagios-plugins-basic', 'collectd'] + name: ['nagios-plugins-basic', 'collectd', 'python-pip'] state: latest tags: apt diff --git a/ansible/roles/monitoring-client/templates/collectd/collectd.client.conf.j2 b/ansible/roles/monitoring-client/templates/collectd/collectd.client.conf.j2 index 91f1ec97e..8add51e4e 100644 --- a/ansible/roles/monitoring-client/templates/collectd/collectd.client.conf.j2 +++ b/ansible/roles/monitoring-client/templates/collectd/collectd.client.conf.j2 @@ -39,7 +39,6 @@ LoadPlugin ipmi Globals true -LoadPlugin "logfile" LogLevel "info" File "/var/log/collectd.log" @@ -96,6 +95,12 @@ LoadPlugin "logfile" Import NginxHls Import NginxDash {% endif %} +{% if transcoder_streams is defined or transcoding_lounge is defined %} + Import "TranscodingUnits" + + UnitRegex "transcode_(h264|vpx|audio)@[a-zA-Z0-9]{2,2}\.service" + +{% endif %} {% if ansible_virtualization_role == 'host' and libvirtd_bin.stdout != '' %} diff --git a/ansible/roles/monitoring-client/templates/collectd/plugins/TranscodingUnits.py b/ansible/roles/monitoring-client/templates/collectd/plugins/TranscodingUnits.py new file mode 100644 index 000000000..47934285f --- /dev/null +++ b/ansible/roles/monitoring-client/templates/collectd/plugins/TranscodingUnits.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from pydbus import SystemBus +from gi.repository import GLib +import collectd +import re +import time + +DBUS_SYSTEMD = '.systemd1' + +class NoSuchUnitException(Exception): + pass + +class SystemdCollector: + """ + Collector class to collect statistics about systemd units + """ + + def __init__(self): + self.__monitored_units = [] + self.__monitored_regexes = [] + + self.__dbus = SystemBus() + self.__systemd = self.__dbus.get(DBUS_SYSTEMD) + + def add_unit(self, unit): + """ + add unit to scraping + + :param unit: str name of a systemd unit + """ + try: + unit_path = self.__systemd.GetUnit(unit) + if unit_path not in self.__monitored_units: + self.__monitored_units.append(unit_path) + except GLib.Error as e: + if 'NoSuchUnit' in e.message: + raise NoSuchUnit('No Such Unit {}'.format(unit)) + + def add_units_regex(self, regex): + """ + add all units to scraping, which names match the regex + + :param regex: str regex + """ + if regex not in self.__monitored_regexes: + self.__monitored_regexes.append(regex) + + def collectd_config_callback(self, config): + """ + collectd plugin api compatile configuration callback + + :param config: str configuration key + """ + for sub_conf in config.children: + if sub_conf.key == 'UnitName': + self.add_unit(sub_conf.values[0]) + elif sub_conf.key == 'UnitRegex': + self.add_units_regex(sub_conf.values[0]) + + def collect_unit_stats(self): + units = [] + units.extend(self.__monitored_units) + + for unit_tuple in self.__systemd.ListUnits(): + for regex in self.__monitored_regexes: + if re.match(regex, unit_tuple[0]) is not None: + try: + unit_path = self.__systemd.GetUnit(unit_tuple[0]) + if unit_path not in units: + units.append(unit_path) + except GLib.Error: + pass + + stats = {} + + for unit_path in units: + unit = self.__dbus.get(DBUS_SYSTEMD, unit_path) + + enabled = unit.UnitFileState == 'enabled' + active = unit.ActiveState == 'active' + + uptime = int(time.time() * 1000000) - unit.ActiveEnterTimestamp if active else 0 + downtime = int(time.time() * 1000000) - unit.InactiveEnterTimestamp if not active else 0 + + stats[unit.Id] = { + "enabled": 1 if enabled else 0, + "active": 1 if active else 0, + "uptime": uptime, + "downtime": downtime, + } + + return stats + + def collectd_read_callback(self, data=None): + """ + collectd compatible read callback + """ + for unit_name, stats in self.collect_unit_stats().items(): + for stat_name, value in stats.items(): + vl = collectd.Values(plugin='systemd_units', type='gauge', type_instance="{}/{}".format(unit_name, stat_name), values=[value] ) + vl.dispatch() + +try: + collector = SystemdCollector() + + collectd.register_read(collector.collectd_read_callback) + collectd.register_config(collector.collectd_config_callback) +except (ImportError, AttributeError): + collector = SystemdCollector() + + collector.add_units_regex('transcode_(h264|vpx|audio)@[a-zA-Z0-9]{2,2}\.service') + + print(collector.collect_unit_stats()) + From 216cf46bf2f2923490d8567e740b9ae882706501 Mon Sep 17 00:00:00 2001 From: Julian Jacobi Date: Sun, 17 Nov 2019 16:14:00 +0100 Subject: [PATCH 2/2] fix service matching regex --- .../templates/collectd/collectd.client.conf.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/monitoring-client/templates/collectd/collectd.client.conf.j2 b/ansible/roles/monitoring-client/templates/collectd/collectd.client.conf.j2 index 8add51e4e..6f35d3e44 100644 --- a/ansible/roles/monitoring-client/templates/collectd/collectd.client.conf.j2 +++ b/ansible/roles/monitoring-client/templates/collectd/collectd.client.conf.j2 @@ -98,7 +98,7 @@ LoadPlugin ipmi {% if transcoder_streams is defined or transcoding_lounge is defined %} Import "TranscodingUnits" - UnitRegex "transcode_(h264|vpx|audio)@[a-zA-Z0-9]{2,2}\.service" + UnitRegex "transcode_(h264|vpx|audio)@[a-zA-Z0-9]+\.service" {% endif %}