Skip to content

Commit

Permalink
Fix test_change_client_ocs_version_and_stop_heartbeat test (#9395)
Browse files Browse the repository at this point in the history
* update message of StorageClientHeartbeatMissed alert

Signed-off-by: fbalak <[email protected]>

* remove dot from the alert message

Signed-off-by: fbalak <[email protected]>

* update alert data

Signed-off-by: fbalak <[email protected]>

* increase alert collecting time

Signed-off-by: fbalak <[email protected]>

* update alert messages

Signed-off-by: fbalak <[email protected]>

* update check_alert_list to reflect multiple messages for one alert

Signed-off-by: fbalak <[email protected]>

* specify namespace in patch command

Signed-off-by: fbalak <[email protected]>

* fix alert dictionary keys

Signed-off-by: fbalak <[email protected]>

* fix severity level

Signed-off-by: fbalak <[email protected]>

---------

Signed-off-by: fbalak <[email protected]>
  • Loading branch information
fbalak authored May 23, 2024
1 parent 16c9c5f commit da3464e
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 42 deletions.
2 changes: 2 additions & 0 deletions ocs_ci/ocs/resources/storageconsumer.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ def set_ocs_version(self, version):
+ "'",
"--subresource",
"status",
"--namespace",
config.cluster_ctx.ENV_DATA["cluster_namespace"],
]
exec_cmd(" ".join(cmd))

Expand Down
47 changes: 18 additions & 29 deletions ocs_ci/utility/prometheus.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,38 +41,27 @@ def check_alert_list(
target_alerts = [
alert for alert in alerts if alert.get("labels").get("alertname") == label
]

logger.info(f"Checking properties of found {label} alerts")
if ignore_more_occurences:
for state in states:
delete = False
for key, alert in reversed(list(enumerate(target_alerts))):
if alert.get("state") == state:
if delete:
d_msg = f"Ignoring {alert} as alert already appeared."
logger.debug(d_msg)
target_alerts.pop(key)
else:
delete = True
assert_msg = (
f"Incorrect number of {label} alerts ({len(target_alerts)} "
f"instead of {len(states)} with states: {states})."
f"\nAlerts: {target_alerts}"
)
assert len(target_alerts) == len(states), assert_msg

for key, state in enumerate(states):

assert_msg = "Alert message for alert {label} is not correct"
assert target_alerts[key]["annotations"]["message"] == msg, assert_msg

assert_msg = f"Alert {label} doesn't have {severity} severity"
assert (
target_alerts[key]["annotations"]["severity_level"] == severity
), assert_msg

assert_msg = f"Alert {label} is not in {state} state"
assert target_alerts[key]["state"] == state, assert_msg
target_alerts = [
alert
for alert in target_alerts
if alert["annotations"]["message"] == msg
and alert["annotations"]["severity_level"] == severity
and alert["state"] == state
]
assert_msg = (
f"There was not found alert {label} with message: {msg}, "
f"severity: {severity} in state: {state}"
)
assert target_alerts, assert_msg
if not ignore_more_occurences:
assert_msg = (
f"There are multiple instances of alert {label} with "
f"message: {msg}, severity: {severity} in state: {state}"
)
assert len(target_alerts) == 1, assert_msg

logger.info("Alerts were triggered correctly during utilization")

Expand Down
2 changes: 1 addition & 1 deletion tests/functional/monitoring/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1177,7 +1177,7 @@ def change_client_version():
nonlocal client
nonlocal original_cluster
# run_time of operation
run_time = 60 * 3
run_time = 60 * 7
client.stop_heartbeat()
client.set_ocs_version("4.13.0")
logger.info(f"Waiting for {run_time} seconds")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import logging
import pytest

from ocs_ci.framework import config
from ocs_ci.framework.pytest_customization.marks import blue_squad
from ocs_ci.framework.testlib import (
tier4c,
Expand Down Expand Up @@ -41,16 +42,32 @@ def test_change_client_ocs_version_and_stop_heartbeat(
client_name = measure_change_client_ocs_version_and_stop_heartbeat.get(
"metadata"
).get("client_name")
cluster_namespace = config.ENV_DATA["cluster_namespace"]
cluster_name = config.ENV_DATA["storage_cluster_name"]
target_alerts = [
{
"label": constants.ALERT_STORAGECLIENTHEARTBEATMISSED,
"msg": f"Storage Client ({client_name}) heartbeat missed for more than 120 (s). "
"Lossy network connectivity might exist",
"msg": (
f"Storage Client ({client_name}) heartbeat missed for more than 120 (s) "
f"in namespace:cluster {cluster_namespace}:{cluster_name}."
),
"severity": "warning",
},
{
"label": constants.ALERT_STORAGECLIENTHEARTBEATMISSED,
"msg": (
f"Storage Client ({client_name}) heartbeat missed for more than 300 (s) "
f"in namespace:cluster {cluster_namespace}:{cluster_name}."
),
"severity": "critical",
},
{
"label": constants.ALERT_STORAGECLIENTINCOMPATIBLEOPERATORVERSION,
"msg": f"Storage Client Operator ({client_name}) differs by more "
"than 1 minor version. Client configuration may be incompatible and unsupported",
"msg": (
f"Storage Client Operator ({client_name}) differs by more than 1 minor "
f"version in namespace:cluster {cluster_namespace}:{cluster_name}."
),
"severity": "critical",
},
]
states = ["firing"]
Expand All @@ -61,14 +78,7 @@ def test_change_client_ocs_version_and_stop_heartbeat(
msg=target_alert["msg"],
alerts=alerts,
states=states,
severity="error",
)
prometheus.check_alert_list(
label=target_alert["label"],
msg=target_alert["msg"],
alerts=alerts,
states=states,
severity="warning",
severity=target_alert["severity"],
)
api.check_alert_cleared(
label=target_alert["label"],
Expand Down

0 comments on commit da3464e

Please sign in to comment.