From da3464eddef3736b8fb3c38d6537fa7accc88b84 Mon Sep 17 00:00:00 2001 From: Filip Balak Date: Thu, 23 May 2024 15:12:28 +0200 Subject: [PATCH] Fix test_change_client_ocs_version_and_stop_heartbeat test (#9395) * update message of StorageClientHeartbeatMissed alert Signed-off-by: fbalak * remove dot from the alert message Signed-off-by: fbalak * update alert data Signed-off-by: fbalak * increase alert collecting time Signed-off-by: fbalak * update alert messages Signed-off-by: fbalak * update check_alert_list to reflect multiple messages for one alert Signed-off-by: fbalak * specify namespace in patch command Signed-off-by: fbalak * fix alert dictionary keys Signed-off-by: fbalak * fix severity level Signed-off-by: fbalak --------- Signed-off-by: fbalak --- ocs_ci/ocs/resources/storageconsumer.py | 2 + ocs_ci/utility/prometheus.py | 47 +++++++------------ tests/functional/monitoring/conftest.py | 2 +- .../prometheus/alerts/test_provider_client.py | 34 +++++++++----- 4 files changed, 43 insertions(+), 42 deletions(-) diff --git a/ocs_ci/ocs/resources/storageconsumer.py b/ocs_ci/ocs/resources/storageconsumer.py index 5d63f7f55a9..94fd8ea0b00 100644 --- a/ocs_ci/ocs/resources/storageconsumer.py +++ b/ocs_ci/ocs/resources/storageconsumer.py @@ -77,6 +77,8 @@ def set_ocs_version(self, version): + "'", "--subresource", "status", + "--namespace", + config.cluster_ctx.ENV_DATA["cluster_namespace"], ] exec_cmd(" ".join(cmd)) diff --git a/ocs_ci/utility/prometheus.py b/ocs_ci/utility/prometheus.py index d49b8bd753b..ba3d3c6f18c 100644 --- a/ocs_ci/utility/prometheus.py +++ b/ocs_ci/utility/prometheus.py @@ -41,38 +41,27 @@ def check_alert_list( target_alerts = [ alert for alert in alerts if alert.get("labels").get("alertname") == label ] - logger.info(f"Checking properties of found {label} alerts") - if ignore_more_occurences: - for state in states: - delete = False - for key, alert in reversed(list(enumerate(target_alerts))): - if alert.get("state") == state: - if delete: - d_msg = f"Ignoring {alert} as alert already appeared." - logger.debug(d_msg) - target_alerts.pop(key) - else: - delete = True - assert_msg = ( - f"Incorrect number of {label} alerts ({len(target_alerts)} " - f"instead of {len(states)} with states: {states})." - f"\nAlerts: {target_alerts}" - ) - assert len(target_alerts) == len(states), assert_msg for key, state in enumerate(states): - - assert_msg = "Alert message for alert {label} is not correct" - assert target_alerts[key]["annotations"]["message"] == msg, assert_msg - - assert_msg = f"Alert {label} doesn't have {severity} severity" - assert ( - target_alerts[key]["annotations"]["severity_level"] == severity - ), assert_msg - - assert_msg = f"Alert {label} is not in {state} state" - assert target_alerts[key]["state"] == state, assert_msg + target_alerts = [ + alert + for alert in target_alerts + if alert["annotations"]["message"] == msg + and alert["annotations"]["severity_level"] == severity + and alert["state"] == state + ] + assert_msg = ( + f"There was not found alert {label} with message: {msg}, " + f"severity: {severity} in state: {state}" + ) + assert target_alerts, assert_msg + if not ignore_more_occurences: + assert_msg = ( + f"There are multiple instances of alert {label} with " + f"message: {msg}, severity: {severity} in state: {state}" + ) + assert len(target_alerts) == 1, assert_msg logger.info("Alerts were triggered correctly during utilization") diff --git a/tests/functional/monitoring/conftest.py b/tests/functional/monitoring/conftest.py index d70e3278ed6..1aecc197742 100644 --- a/tests/functional/monitoring/conftest.py +++ b/tests/functional/monitoring/conftest.py @@ -1177,7 +1177,7 @@ def change_client_version(): nonlocal client nonlocal original_cluster # run_time of operation - run_time = 60 * 3 + run_time = 60 * 7 client.stop_heartbeat() client.set_ocs_version("4.13.0") logger.info(f"Waiting for {run_time} seconds") diff --git a/tests/functional/monitoring/prometheus/alerts/test_provider_client.py b/tests/functional/monitoring/prometheus/alerts/test_provider_client.py index d51c65832c7..9a6c42b4fff 100644 --- a/tests/functional/monitoring/prometheus/alerts/test_provider_client.py +++ b/tests/functional/monitoring/prometheus/alerts/test_provider_client.py @@ -1,6 +1,7 @@ import logging import pytest +from ocs_ci.framework import config from ocs_ci.framework.pytest_customization.marks import blue_squad from ocs_ci.framework.testlib import ( tier4c, @@ -41,16 +42,32 @@ def test_change_client_ocs_version_and_stop_heartbeat( client_name = measure_change_client_ocs_version_and_stop_heartbeat.get( "metadata" ).get("client_name") + cluster_namespace = config.ENV_DATA["cluster_namespace"] + cluster_name = config.ENV_DATA["storage_cluster_name"] target_alerts = [ { "label": constants.ALERT_STORAGECLIENTHEARTBEATMISSED, - "msg": f"Storage Client ({client_name}) heartbeat missed for more than 120 (s). " - "Lossy network connectivity might exist", + "msg": ( + f"Storage Client ({client_name}) heartbeat missed for more than 120 (s) " + f"in namespace:cluster {cluster_namespace}:{cluster_name}." + ), + "severity": "warning", + }, + { + "label": constants.ALERT_STORAGECLIENTHEARTBEATMISSED, + "msg": ( + f"Storage Client ({client_name}) heartbeat missed for more than 300 (s) " + f"in namespace:cluster {cluster_namespace}:{cluster_name}." + ), + "severity": "critical", }, { "label": constants.ALERT_STORAGECLIENTINCOMPATIBLEOPERATORVERSION, - "msg": f"Storage Client Operator ({client_name}) differs by more " - "than 1 minor version. Client configuration may be incompatible and unsupported", + "msg": ( + f"Storage Client Operator ({client_name}) differs by more than 1 minor " + f"version in namespace:cluster {cluster_namespace}:{cluster_name}." + ), + "severity": "critical", }, ] states = ["firing"] @@ -61,14 +78,7 @@ def test_change_client_ocs_version_and_stop_heartbeat( msg=target_alert["msg"], alerts=alerts, states=states, - severity="error", - ) - prometheus.check_alert_list( - label=target_alert["label"], - msg=target_alert["msg"], - alerts=alerts, - states=states, - severity="warning", + severity=target_alert["severity"], ) api.check_alert_cleared( label=target_alert["label"],