diff --git a/README.md b/README.md index 4ae89b3..9f9c352 100644 --- a/README.md +++ b/README.md @@ -67,6 +67,7 @@ default: vsphere_user: "user" vsphere_password: "password" ignore_ssl: False + timeout: 120 specs_size: 5000 fetch_custom_attributes: True fetch_tags: True @@ -78,11 +79,12 @@ default: hosts: True snapshots: True -esx: +vcenter01: vsphere_host: vc.example2.com vsphere_user: 'root' vsphere_password: 'password' ignore_ssl: True + timeout: 120 specs_size: 5000 fetch_custom_attributes: True fetch_tags: True @@ -94,11 +96,12 @@ esx: hosts: True snapshots: True -limited: +vcenter02: vsphere_host: slowvc.example.com vsphere_user: 'administrator@vsphere.local' vsphere_password: 'password' ignore_ssl: True + timeout: 120 specs_size: 5000 fetch_custom_attributes: True fetch_tags: True @@ -114,39 +117,41 @@ limited: Switching sections can be done by adding ?section=limited to the URL. #### Environment Variables -| Variable | Precedence | Defaults | Description | -| --------------------------------------| ---------------------- | -------- | --------------------------------------------------------------------------| -| `VSPHERE_HOST` | config, env, get_param | n/a | vsphere server to connect to | -| `VSPHERE_USER` | config, env | n/a | User for connecting to vsphere | -| `VSPHERE_PASSWORD` | config, env | n/a | Password for connecting to vsphere | -| `VSPHERE_SPECS_SIZE` | config, env | 5000 | Size of specs list for query stats function | -| `VSPHERE_IGNORE_SSL` | config, env | False | Ignore the ssl cert on the connection to vsphere host | -| `VSPHERE_FETCH_CUSTOM_ATTRIBUTES` | config, env | False | Set to true to collect objects custom attributes as metric labels | -| `VSPHERE_FETCH_TAGS` | config, env | False | Set to true to collect objects tags as metric labels | -| `VSPHERE_FETCH_ALARMS` | config, env | False | Fetch objects triggered alarms, and in case of hosts hdw alarms as well | -| `VSPHERE_COLLECT_HOSTS` | config, env | True | Set to false to disable collection of host metrics | -| `VSPHERE_COLLECT_DATASTORES` | config, env | True | Set to false to disable collection of datastore metrics | -| `VSPHERE_COLLECT_VMS` | config, env | True | Set to false to disable collection of virtual machine metrics | -| `VSPHERE_COLLECT_VMGUESTS` | config, env | True | Set to false to disable collection of virtual machine guest metrics | -| `VSPHERE_COLLECT_SNAPSHOTS` | config, env | True | Set to false to disable collection of snapshot metrics | +| Variable | Precedence | Defaults | Description | +| ---------------------------------------------| ------------| -------- | ---------------------------------------------------------------------------------| +| `VSPHERE_HOST` | config, env | n/a | vsphere server to connect to | +| `VSPHERE_USER` | config, env | n/a | User for connecting to vsphere | +| `VSPHERE_PASSWORD` | config, env | n/a | Password for connecting to vsphere | +| `VSPHERE_SPECS_SIZE` | config, env | 5000 | Size of specs list for query stats function | +| `VSPHERE_IGNORE_SSL` | config, env | False | Ignore the ssl cert on the connection to vsphere host | +| `VSPHERE_TIMEOUT` | config, env | 120 | Set how long to wait before failing to collect | +| `VSPHERE_FETCH_CUSTOM_ATTRIBUTES` | config, env | False | Set to true to collect objects custom attributes as metric labels | +| `VSPHERE_FETCH_TAGS` | config, env | False | Set to true to collect objects tags as metric labels | +| `VSPHERE_FETCH_ALARMS` | config, env | False | Fetch objects triggered alarms, and in case of hosts hdw alarms as well | +| `VSPHERE_COLLECT_HOSTS` | config, env | True | Set to false to disable collection of host metrics | +| `VSPHERE_COLLECT_DATASTORES` | config, env | True | Set to false to disable collection of datastore metrics | +| `VSPHERE_COLLECT_VMS` | config, env | True | Set to false to disable collection of virtual machine metrics | +| `VSPHERE_COLLECT_VMGUESTS` | config, env | True | Set to false to disable collection of virtual machine guest metrics | +| `VSPHERE_COLLECT_SNAPSHOTS` | config, env | True | Set to false to disable collection of snapshot metrics | You can create new sections as well, with very similiar variables. For example, to create a `limited` section you can set: -| Variable | Precedence | Defaults | Description | -| ----------------------------------------------| ---------------------- | -------- | --------------------------------------------------------------------------| -| `VSPHERE_LIMITED_HOST` | config, env, get_param | n/a | vsphere server to connect to | -| `VSPHERE_LIMITED_USER` | config, env | n/a | User for connecting to vsphere | -| `VSPHERE_LIMITED_PASSWORD` | config, env | n/a | Password for connecting to vsphere | -| `VSPHERE_LIMITED_SPECS_SIZE` | config, env | 5000 | Size of specs list for query stats function | -| `VSPHERE_LIMITED_IGNORE_SSL` | config, env | False | Ignore the ssl cert on the connection to vsphere host | -| `VSPHERE_LIMITED_FETCH_CUSTOM_ATTRIBUTES` | config, env | False | Set to true to collect objects custom attributes as metric labels | -| `VSPHERE_LIMITED_FETCH_TAGS` | config, env | False | Set to true to collect objects tags as metric labels | -| `VSPHERE_LIMITED_FETCH_ALARMS` | config, env | False | Fetch objects triggered alarms, and in case of hosts hdw alarms as well | -| `VSPHERE_LIMITED_COLLECT_HOSTS` | config, env | True | Set to false to disable collection of host metrics | -| `VSPHERE_LIMITED_COLLECT_DATASTORES` | config, env | True | Set to false to disable collection of datastore metrics | -| `VSPHERE_LIMITED_COLLECT_VMS` | config, env | True | Set to false to disable collection of virtual machine metrics | -| `VSPHERE_LIMITED_COLLECT_VMGUESTS` | config, env | True | Set to false to disable collection of virtual machine guest metrics | -| `VSPHERE_LIMITED_COLLECT_SNAPSHOTS` | config, env | True | Set to false to disable collection of snapshot metrics | +| Variable | Precedence | Defaults | Description | +| ---------------------------------------------| ----------- | -------- | ---------------------------------------------------------------------------------| +| `VSPHERE_LIMITED_HOST` | config, env | n/a | vsphere server to connect to | +| `VSPHERE_LIMITED_USER` | config, env | n/a | User for connecting to vsphere | +| `VSPHERE_LIMITED_PASSWORD` | config, env | n/a | Password for connecting to vsphere | +| `VSPHERE_LIMITED_SPECS_SIZE` | config, env | 5000 | Size of specs list for query stats function | +| `VSPHERE_LIMITED_IGNORE_SSL` | config, env | False | Ignore the ssl cert on the connection to vsphere host | +| `VSPHERE_LIMITED_TIMEOUT` | config, env | 120 | Set how long to wait before failing to collect | +| `VSPHERE_LIMITED_FETCH_CUSTOM_ATTRIBUTES` | config, env | False | Set to true to collect objects custom attributes as metric labels | +| `VSPHERE_LIMITED_FETCH_TAGS` | config, env | False | Set to true to collect objects tags as metric labels | +| `VSPHERE_LIMITED_FETCH_ALARMS` | config, env | False | Fetch objects triggered alarms, and in case of hosts hdw alarms as well | +| `VSPHERE_LIMITED_COLLECT_HOSTS` | config, env | True | Set to false to disable collection of host metrics | +| `VSPHERE_LIMITED_COLLECT_DATASTORES` | config, env | True | Set to false to disable collection of datastore metrics | +| `VSPHERE_LIMITED_COLLECT_VMS` | config, env | True | Set to false to disable collection of virtual machine metrics | +| `VSPHERE_LIMITED_COLLECT_VMGUESTS` | config, env | True | Set to false to disable collection of virtual machine guest metrics | +| `VSPHERE_LIMITED_COLLECT_SNAPSHOTS` | config, env | True | Set to false to disable collection of snapshot metrics | You need to set at least `VSPHERE_SECTIONNAME_USER` for the section to be detected. @@ -155,18 +160,24 @@ You need to set at least `VSPHERE_SECTIONNAME_USER` for the section to be detect You can use the following parameters in the Prometheus configuration file. The `params` section is used to manage multiple login/passwords. ``` +# Example of Multiple vCenter usage per #23 + - job_name: 'vmware_vcenter' metrics_path: '/metrics' static_configs: - targets: - - 'vcenter.company.com' + - default + - vcenter01 + - vcenter02 relabel_configs: - source_labels: [__address__] - target_label: __param_target - - source_labels: [__param_target] + target_label: __param_section + - source_labels: [__param_section] target_label: instance - target_label: __address__ - replacement: localhost:9272 + replacement: exporter_ip:9272 + +# Example using file service discovery - job_name: 'vmware_esx' metrics_path: '/metrics' @@ -177,28 +188,12 @@ You can use the following parameters in the Prometheus configuration file. The ` section: [esx] relabel_configs: - source_labels: [__address__] - target_label: __param_target - - source_labels: [__param_target] + target_label: __param_section + - source_labels: [__param_section] target_label: instance - target_label: __address__ replacement: localhost:9272 -# Example of Multiple vCenter usage per #23 - -- job_name: vmware_export - metrics_path: /metrics - static_configs: - - targets: - - vcenter01 - - vcenter02 - - vcenter03 - relabel_configs: - - source_labels: [__address__] - target_label: __param_target - - source_labels: [__param_target] - target_label: instance - - target_label: __address__ - replacement: exporter_ip:9272 ``` ## Current Status diff --git a/tests/unit/test_helpers.py b/tests/unit/test_helpers.py index 14d98ba..2d3fef9 100644 --- a/tests/unit/test_helpers.py +++ b/tests/unit/test_helpers.py @@ -4,7 +4,7 @@ from pyVmomi import vim -from vmware_exporter.helpers import batch_fetch_properties, get_bool_env +from vmware_exporter.helpers import batch_fetch_properties, get_bool_env, serialize, deserialize class FakeView(vim.ManagedObject): @@ -16,6 +16,26 @@ def Destroy(self): pass +def test_serialize(): + + # Test basic usage + assert serialize('abc', 'def', g='1', h='2') == 'abc:def:g=1:h=2' + + # Test escaping + assert serialize('\\\n\r,:=') == '\\\\\\n\\r\\,\\:\\=' + + +def test_deserialize(): + + tests = [ + 'abc:def:g=1:h=2', + '\\\\\\n\\r\\,\\:\\=:\\\\\\n\\r\\,\\:\\==\\\\\\n\\r\\,\\:\\=' + ] + for value in tests: + arg, kwarg = deserialize(value) + assert serialize(*arg, **kwarg) == value + + def test_get_bool_env(): # Expected behaviour assert get_bool_env('NON_EXISTENT_ENV', True) diff --git a/vmware_exporter/defer.py b/vmware_exporter/defer.py index 724bde3..aadb4b2 100644 --- a/vmware_exporter/defer.py +++ b/vmware_exporter/defer.py @@ -43,7 +43,7 @@ class BranchingDeferred(defer.Deferred): ''' def __init__(self): - self.callbacks = [] + super().__init__() self.result = None def callback(self, result): diff --git a/vmware_exporter/helpers.py b/vmware_exporter/helpers.py index cb5fc98..5eb2f39 100644 --- a/vmware_exporter/helpers.py +++ b/vmware_exporter/helpers.py @@ -1,6 +1,65 @@ # autopep8'd import os from pyVmomi import vmodl +import re + + +def serialize(*arg, **kwarg): + """ + Serialize into the format + item1:item2:key1=value1:key2:value2 + """ + escape_dict = {'\\': '\\\\', '\n': '\\n', '\r': '\\r', ',': '\\,', ':': '\\:', '=': '\\='} + pattern = re.compile("|".join([re.escape(k) for k in sorted(escape_dict, key=len, reverse=True)]), flags=re.DOTALL) + items = [] + for item in arg: + items.append(pattern.sub(lambda x: escape_dict[x.group(0)], str(item))) + for k, v in kwarg.items(): + items.append("{}={}".format( + pattern.sub(lambda x: escape_dict[x.group(0)], str(k)), + pattern.sub(lambda x: escape_dict[x.group(0)], str(v)) + )) + return ':'.join(items) + + +def deserialize(s): + """ + Deserialize the format into a list and dict + item1:item2:key1=value1:key2:value2 + """ + escape_dict = {'\\r': '\r', '\\\\': '\\', '\\=': '=', '\\:': ':', '\\n': '\n', '\\,': ','} + pattern = re.compile("|".join([re.escape(k) for k in sorted(escape_dict, key=len, reverse=True)]), flags=re.DOTALL) + arg = [] + kwarg = {} + for name, eq, value in re.findall(r'((?:\\.|[^=:\\]+)+)(?:(=)((?:\\.|[^:\\]+)+))?', s): + if eq: + kwarg[pattern.sub(lambda x: escape_dict[x.group(0)], name)] = pattern.sub( + lambda x: escape_dict[x.group(0)], value) + else: + arg.append(pattern.sub(lambda x: escape_dict[x.group(0)], name)) + return (arg, kwarg) + + +class TriggeredAlarm(object): + def __init__(self, name, status): + self.name = name + self.sensorStatus = status + + def __str__(self): + return serialize('triggeredAlarm', self.name, self.sensorStatus) + + +class NumericSensorInfo(object): + def __init__(self, name, status, type="n/a", value="n/a", unitModifier="n/a", unit="n/a"): + self.name = name + self.type = type + self.sensorStatus = status + self.value = value + self.unitModifier = unitModifier + self.unit = unit + + def __str__(self): + return serialize('numericSensorInfo', name=self.name, type=self.type, sensorStatus=self.sensorStatus, value=self.value, unitModifier=self.unitModifier, unit=self.unit) def get_bool_env(key: str, default: bool): @@ -26,13 +85,9 @@ def batch_fetch_properties(content, obj_type, properties): if content.customFieldsManager and content.customFieldsManager.field: allCustomAttributesNames.update( - dict( - [ - (f.key, f.name) - for f in content.customFieldsManager.field - if f.managedObjectType in (obj_type, None) - ] - ) + (f.key, f.name) + for f in content.customFieldsManager.field + if f.managedObjectType in (obj_type, None) ) try: @@ -95,32 +150,37 @@ def batch_fetch_properties(content, obj_type, properties): """ triggered alarms """ + try: - alarms = list( - 'triggeredAlarm:{}:{}'.format(item.alarm.info.systemName.split('.')[1], item.overallStatus) - for item in prop.val - ) + properties[prop.name] = [ + TriggeredAlarm( + name=item.alarm.info.systemName.split('.')[1], + status=item.overallStatus + ) for item in prop.val + ] except Exception: - alarms = ['triggeredAlarm:AlarmsUnavailable:yellow'] - - properties[prop.name] = ','.join(alarms) + properties[prop.name] = [ + TriggeredAlarm( + name='AlarmsUnavailable', + status='yellow' + ) + ] elif 'runtime.healthSystemRuntime.systemHealthInfo.numericSensorInfo' == prop.name: """ handle numericSensorInfo """ - sensors = list( - 'numericSensorInfo:name={}:type={}:sensorStatus={}:value={}:unitModifier={}:unit={}'.format( - item.name, - item.sensorType, - item.healthState.key, - item.currentReading, - item.unitModifier, - item.baseUnits.lower() - ) - for item in prop.val - ) - properties[prop.name] = ','.join(sensors) + + properties[prop.name] = [ + NumericSensorInfo( + name=item.name, + type=item.sensorType, + status=item.healthState.key, + value=item.currentReading, + unitModifier=item.unitModifier, + unit=item.baseUnits.lower() + ) for item in prop.val + ] elif prop.name in [ 'runtime.healthSystemRuntime.hardwareStatusInfo.cpuStatusInfo', @@ -129,18 +189,13 @@ def batch_fetch_properties(content, obj_type, properties): """ handle hardwareStatusInfo """ - sensors = list( - 'numericSensorInfo:name={}:type={}:sensorStatus={}:value={}:unitModifier={}:unit={}'.format( - item.name, - "n/a", - item.status.key, - "n/a", - "n/a", - "n/a", - ) - for item in prop.val - ) - properties[prop.name] = ','.join(sensors) + + properties[prop.name] = [ + NumericSensorInfo( + name=item.name, + status=item.status.key + ) for item in prop.val + ] else: properties[prop.name] = prop.val diff --git a/vmware_exporter/vmware_exporter.py b/vmware_exporter/vmware_exporter.py index e497fa2..fbea980 100755 --- a/vmware_exporter/vmware_exporter.py +++ b/vmware_exporter/vmware_exporter.py @@ -64,7 +64,8 @@ def __init__( fetch_custom_attributes=False, ignore_ssl=False, fetch_tags=False, - fetch_alarms=False + fetch_alarms=False, + timeout=None ): self.host = host @@ -73,6 +74,7 @@ def __init__( self.ignore_ssl = ignore_ssl self.collect_only = collect_only self.specs_size = int(specs_size) + self.timeout = int(timeout) if timeout else None self._session = None @@ -314,6 +316,16 @@ def _create_metric_containers(self): 'vmware_host_red_alarms', 'A metric with the amount of host red alarms and labeled with the list of alarm names', labels=self._labelNames['hosts'] + ['alarms'] + ), + 'vmware_host_yellow_alarm': GaugeMetricFamily( + 'vmware_host_yellow_alarm', + 'A metric with the host and yellow alarm name', + labels=self._labelNames['hosts'] + ['alarm'] + ), + 'vmware_host_red_alarm': GaugeMetricFamily( + 'vmware_host_red_alarm', + 'A metric with the host and red alarm name', + labels=self._labelNames['hosts'] + ['alarm'] ) } ) @@ -332,6 +344,16 @@ def _create_metric_containers(self): 'vmware_datastore_red_alarms', 'A metric with the amount of datastore red alarms and labeled with the list of alarm names', labels=self._labelNames['datastores'] + ['alarms'] + ), + 'vmware_datastore_yellow_alarm': GaugeMetricFamily( + 'vmware_datastore_yellow_alarm', + 'A metric with the datastore and yellow alarm name', + labels=self._labelNames['datastores'] + ['alarm'] + ), + 'vmware_datastore_red_alarm': GaugeMetricFamily( + 'vmware_datastore_red_alarm', + 'A metric with datastore and red alarm name', + labels=self._labelNames['datastores'] + ['alarm'] ) } ) @@ -352,38 +374,16 @@ def _create_metric_containers(self): 'A metric with the amount of virtual machine red alarms and \ labeled with the list of alarm names', labels=self._labelNames['vms'] + ['alarms'] - ) - } - ) - metric_list['vmguests'].update( - { - 'vmware_vm_yellow_alarms': GaugeMetricFamily( - 'vmware_vm_yellow_alarms', - 'A metric with the amount of virtual machine yellow alarms and \ - labeled with the list of alarm names', - labels=self._labelNames['vms'] + ['alarms'] ), - 'vmware_vm_red_alarms': GaugeMetricFamily( - 'vmware_vm_red_alarms', - 'A metric with the amount of virtual machine red alarms and \ - labeled with the list of alarm names', - labels=self._labelNames['vms'] + ['alarms'] - ) - } - ) - metric_list['snapshots'].update( - { - 'vmware_vm_yellow_alarms': GaugeMetricFamily( - 'vmware_vm_yellow_alarms', - 'A metric with the amount of virtual machine yellow alarms and \ - labeled with the list of alarm names', - labels=self._labelNames['vms'] + ['alarms'] + 'vmware_vm_yellow_alarm': GaugeMetricFamily( + 'vmware_vm_yellow_alarm', + 'A metric with the virtual machine and yellow alarm name', + labels=self._labelNames['vms'] + ['alarm'] ), - 'vmware_vm_red_alarms': GaugeMetricFamily( - 'vmware_vm_red_alarms', - 'A metric with the amount of virtual machine red alarms and \ - labeled with the list of alarm names', - labels=self._labelNames['vms'] + ['alarms'] + 'vmware_vm_red_alarm': GaugeMetricFamily( + 'vmware_vm_red_alarm', + 'A metric with the virtual machine and red alarm name', + labels=self._labelNames['vms'] + ['alarm'] ) } ) @@ -397,8 +397,14 @@ def _create_metric_containers(self): return metrics - @defer.inlineCallbacks def collect(self): + d_collect = self._collect() + if self.timeout is not None: + d_collect.addTimeout(self.timeout, reactor) + return d_collect + + @defer.inlineCallbacks + def _collect(self): """ collects metrics """ vsphere_host = self.host @@ -427,9 +433,10 @@ def collect(self): tasks.append(self._vmware_get_hosts(metrics)) tasks.append(self._vmware_get_host_perf_manager_metrics(metrics)) - yield parallelize(*tasks) - - yield self._vmware_disconnect() + try: + yield parallelize(*tasks) + finally: + yield self._vmware_disconnect() logging.info("Finished collecting metrics from {vsphere_host}".format(vsphere_host=vsphere_host)) @@ -1263,11 +1270,24 @@ def _vmware_get_datastores(self, ds_metrics): filter red and yellow alarms """ if self.fetch_alarms: - alarms = datastore.get('triggeredAlarmState').split(',') - alarms = [a for a in alarms if ':' in a] + alarms = datastore.get('triggeredAlarmState', []) + + for alarm in alarms: + if alarm.sensorStatus == 'red': + # Red alarms + ds_metrics['vmware_datastore_red_alarm'].add_metric( + labels + [alarm.name], + 1 + ) + elif alarm.sensorStatus == 'yellow': + # Yellow alarms + ds_metrics['vmware_datastore_yellow_alarm'].add_metric( + labels + [alarm.name], + 1 + ) # Red alarms - red_alarms = [':'.join(a.split(':')[:-1]) for a in alarms if a.split(':')[-1] == 'red'] + red_alarms = ['triggeredAlarm:{}'.format(alarm.name) for alarm in alarms if alarm.sensorStatus == 'red'] red_alarms_label = ','.join(red_alarms) if red_alarms else 'n/a' ds_metrics['vmware_datastore_red_alarms'].add_metric( labels + [red_alarms_label], @@ -1275,7 +1295,8 @@ def _vmware_get_datastores(self, ds_metrics): ) # Yellow alarms - yellow_alarms = [':'.join(a.split(':')[:-1]) for a in alarms if a.split(':')[-1] == 'yellow'] + yellow_alarms = ['triggeredAlarm:{}'.format(alarm.name) + for alarm in alarms if alarm.sensorStatus == 'yellow'] yellow_alarms_label = ','.join(yellow_alarms) if yellow_alarms else 'n/a' ds_metrics['vmware_datastore_yellow_alarms'].add_metric( labels + [yellow_alarms_label], @@ -1543,11 +1564,24 @@ def _vmware_get_vms(self, metrics): filter red and yellow alarms """ if self.fetch_alarms and ('triggeredAlarmState' in row): - alarms = row.get('triggeredAlarmState').split(',') - alarms = [a for a in alarms if ':' in a] + alarms = row.get('triggeredAlarmState', []) + + for alarm in alarms: + if alarm.sensorStatus == 'red': + # Red alarms + metrics['vmware_vm_red_alarm'].add_metric( + labels + [alarm.name], + 1 + ) + elif alarm.sensorStatus == 'yellow': + # Yellow alarms + metrics['vmware_vm_yellow_alarm'].add_metric( + labels + [alarm.name], + 1 + ) # Red alarms - red_alarms = [':'.join(a.split(':')[:-1]) for a in alarms if a.split(':')[-1] == 'red'] + red_alarms = ['triggeredAlarm:{}'.format(alarm.name) for alarm in alarms if alarm.sensorStatus == 'red'] red_alarms_label = ','.join(red_alarms) if red_alarms else 'n/a' metrics['vmware_vm_red_alarms'].add_metric( labels + [red_alarms_label], @@ -1555,7 +1589,8 @@ def _vmware_get_vms(self, metrics): ) # Yellow alarms - yellow_alarms = [':'.join(a.split(':')[:-1]) for a in alarms if a.split(':')[-1] == 'yellow'] + yellow_alarms = ['triggeredAlarm:{}'.format(alarm.name) + for alarm in alarms if alarm.sensorStatus == 'yellow'] yellow_alarms_label = ','.join(yellow_alarms) if yellow_alarms else 'n/a' metrics['vmware_vm_yellow_alarms'].add_metric( labels + [yellow_alarms_label], @@ -1681,10 +1716,24 @@ def _vmware_get_hosts(self, host_metrics): filter red and yellow alarms """ if self.fetch_alarms: - alarms = [a for a in host.get('triggeredAlarmState', '').split(',') if ':' in a] + alarms = host.get('triggeredAlarmState', []) + + for alarm in alarms: + if alarm.sensorStatus == 'red': + # Red alarms + host_metrics['vmware_host_red_alarm'].add_metric( + labels + [alarm.name], + 1 + ) + elif alarm.sensorStatus == 'yellow': + # Yellow alarms + host_metrics['vmware_host_yellow_alarm'].add_metric( + labels + [alarm.name], + 1 + ) # Red alarms - red_alarms = [':'.join(a.split(':')[:-1]) for a in alarms if a.split(':')[-1] == 'red'] + red_alarms = ['triggeredAlarm:{}'.format(alarm.name) for alarm in alarms if alarm.sensorStatus == 'red'] red_alarms_label = ','.join(red_alarms) if red_alarms else 'n/a' host_metrics['vmware_host_red_alarms'].add_metric( labels + [red_alarms_label], @@ -1692,7 +1741,8 @@ def _vmware_get_hosts(self, host_metrics): ) # Yellow alarms - yellow_alarms = [':'.join(a.split(':')[:-1]) for a in alarms if a.split(':')[-1] == 'yellow'] + yellow_alarms = ['triggeredAlarm:{}'.format(alarm.name) + for alarm in alarms if alarm.sensorStatus == 'yellow'] yellow_alarms_label = ','.join(yellow_alarms) if yellow_alarms else 'n/a' host_metrics['vmware_host_yellow_alarms'].add_metric( labels + [yellow_alarms_label], @@ -1700,70 +1750,64 @@ def _vmware_get_hosts(self, host_metrics): ) # Numeric Sensor Info - sensors = host.get('runtime.healthSystemRuntime.systemHealthInfo.numericSensorInfo', '').split(',') + \ - host.get('runtime.healthSystemRuntime.hardwareStatusInfo.cpuStatusInfo', '').split(',') + \ - host.get('runtime.healthSystemRuntime.hardwareStatusInfo.memoryStatusInfo', '').split(',') - - sensors = [s for s in sensors if ':' in s] - - for s in sensors: - sensor = dict(item.split("=") for item in re.split(r':(?=\w+=)', s)[1:]) + sensors = host.get('runtime.healthSystemRuntime.systemHealthInfo.numericSensorInfo', []) + \ + host.get('runtime.healthSystemRuntime.hardwareStatusInfo.cpuStatusInfo', []) + \ + host.get('runtime.healthSystemRuntime.hardwareStatusInfo.memoryStatusInfo', []) - if not all(key in sensor for key in ['sensorStatus', 'name', 'type', 'unit', 'value']): - continue + for sensor in sensors: sensor_status = { 'red': 0, 'yellow': 1, 'green': 2, - 'unknown': 3, - }[sensor['sensorStatus'].lower()] + # 'unknown': 3, + }.get(sensor.sensorStatus.lower(), 3) host_metrics['vmware_host_sensor_state'].add_metric( - labels + [sensor['name'], sensor['type']], + labels + [sensor.name, sensor.type], sensor_status ) # FAN speed - if sensor["unit"] == 'rpm': + if sensor.unit == 'rpm': host_metrics['vmware_host_sensor_fan'].add_metric( - labels + [sensor['name']], - int(sensor['value']) * (10 ** (int(sensor['unitModifier']))) + labels + [sensor.name], + int(sensor.value) * (10 ** (int(sensor.unitModifier))) ) # Temperature - if sensor["unit"] == 'degrees c': + if sensor.unit == 'degrees c': host_metrics['vmware_host_sensor_temperature'].add_metric( - labels + [sensor['name']], - int(sensor['value']) * (10 ** (int(sensor['unitModifier']))) + labels + [sensor.name], + int(sensor.value) * (10 ** (int(sensor.unitModifier))) ) # Power Voltage - if sensor["unit"] == 'volts': + if sensor.unit == 'volts': host_metrics['vmware_host_sensor_power_voltage'].add_metric( - labels + [sensor['name']], - int(sensor['value']) * (10 ** (int(sensor['unitModifier']))) + labels + [sensor.name], + int(sensor.value) * (10 ** (int(sensor.unitModifier))) ) # Power Current - if sensor["unit"] == 'amps': + if sensor.unit == 'amps': host_metrics['vmware_host_sensor_power_current'].add_metric( - labels + [sensor['name']], - int(sensor['value']) * (10 ** (int(sensor['unitModifier']))) + labels + [sensor.name], + int(sensor.value) * (10 ** (int(sensor.unitModifier))) ) # Power Watt - if sensor["unit"] == 'watts': + if sensor.unit == 'watts': host_metrics['vmware_host_sensor_power_watt'].add_metric( - labels + [sensor['name']], - int(sensor['value']) * (10 ** (int(sensor['unitModifier']))) + labels + [sensor.name], + int(sensor.value) * (10 ** (int(sensor.unitModifier))) ) # Redundancy - if sensor["unit"] == 'redundancy-discrete': + if sensor.unit == 'redundancy-discrete': host_metrics['vmware_host_sensor_redundancy'].add_metric( - labels + [sensor['name']], - int(sensor['value']) + labels + [sensor.name], + int(sensor.value) ) # Standby Mode @@ -1872,16 +1916,13 @@ def configure(self, args): try: with open(args.config_file) as cf: self.config = yaml.load(cf, Loader=yaml.FullLoader) - - if 'default' not in self.config.keys(): - logging.error("Error, you must have a default section in config file (for now)") - exit(1) return except Exception as exception: raise SystemExit("Error while reading configuration file: {0}".format(exception.message)) - self.config = { - 'default': { + self.config = {} + if 'VSPHERE_USER' in os.environ: + self.config['default'] = { 'vsphere_host': os.environ.get('VSPHERE_HOST'), 'vsphere_user': os.environ.get('VSPHERE_USER'), 'vsphere_password': os.environ.get('VSPHERE_PASSWORD'), @@ -1896,9 +1937,9 @@ def configure(self, args): 'datastores': get_bool_env('VSPHERE_COLLECT_DATASTORES', True), 'hosts': get_bool_env('VSPHERE_COLLECT_HOSTS', True), 'snapshots': get_bool_env('VSPHERE_COLLECT_SNAPSHOTS', True), - } + }, + 'timeout': get_bool_env('VSPHERE_TIMEOUT', 120), } - } for key in os.environ.keys(): if key == 'VSPHERE_USER': @@ -1923,7 +1964,8 @@ def configure(self, args): 'datastores': get_bool_env('VSPHERE_{}_COLLECT_DATASTORES'.format(section), True), 'hosts': get_bool_env('VSPHERE_{}_COLLECT_HOSTS'.format(section), True), 'snapshots': get_bool_env('VSPHERE_{}_COLLECT_SNAPSHOTS'.format(section), True), - } + }, + 'timeout': get_bool_env('VSPHERE_{}_TIMEOUT'.format(section), 120), } def render_GET(self, request): @@ -1948,26 +1990,22 @@ def _async_render_GET(self, request): @defer.inlineCallbacks def generate_latest_metrics(self, request): """ gets the latest metrics """ - section = request.args.get(b'section', [b'default'])[0].decode('utf-8') - if section not in self.config.keys(): - logging.info("{} is not a valid section, using default".format(section)) - section = 'default' - - if self.config[section].get('vsphere_host') and self.config[section].get('vsphere_host') != "None": - vsphere_host = self.config[section].get('vsphere_host') - elif request.args.get(b'target', [None])[0]: - vsphere_host = request.args.get(b'target', [None])[0].decode('utf-8') - elif request.args.get(b'vsphere_host', [None])[0]: - vsphere_host = request.args.get(b'vsphere_host')[0].decode('utf-8') - else: - request.setResponseCode(500) - logging.info("No vsphere_host or target defined") - request.write(b'No vsphere_host or target defined!\n') - request.finish() - return + section = None + if b'section' in request.args: + section = request.args[b'section'][0].decode('utf-8') + if section not in self.config: + if 'default' in self.config: + logging.info("{} is not a valid section, using default".format(section)) + section = 'default' + else: + request.setResponseCode(400) + logging.info("Invalid section and no default defined") + request.write(b'Invalid section defined!\n') + request.finish() + return collector = VmwareCollector( - vsphere_host, + self.config[section]['vsphere_host'], self.config[section]['vsphere_user'], self.config[section]['vsphere_password'], self.config[section]['collect_only'], @@ -1976,6 +2014,7 @@ def generate_latest_metrics(self, request): self.config[section]['ignore_ssl'], self.config[section]['fetch_tags'], self.config[section]['fetch_alarms'], + self.config[section]['timeout'], ) metrics = yield collector.collect()