diff --git a/roles/kube_prometheus_stack/files/jsonnet/mixins.libsonnet b/roles/kube_prometheus_stack/files/jsonnet/mixins.libsonnet index 64e83b343..3a31daa94 100644 --- a/roles/kube_prometheus_stack/files/jsonnet/mixins.libsonnet +++ b/roles/kube_prometheus_stack/files/jsonnet/mixins.libsonnet @@ -147,25 +147,32 @@ local mixins = { }, { alert: 'MysqlClusterDown', - 'for': '1m', - expr: 'count(mysql_up==0) != count(mysql_up)', - labels: { - severity: 'info', + 'for': '5m', + expr: 'mysql_up == 0', + labels: { severity: 'info' }, + annotations: { + summary: 'Percona XtraDB Cluster replica is down', + description: "{{ $labels.instance }} replica is down.", }, + }, + { + alert: 'MysqlClusterDown', + 'for': '5m', + expr: 'round(count(mysql_up==1) / count(mysql_up) * 100) <= 50', + labels: { severity: 'warning' }, annotations: { - summary: '{{ $value }} percona-xtradb replication down', + summary: 'Percona XtraDB Cluster replicas are down', + description: "{{ $value }}% of replicas are online.", }, }, { alert: 'MysqlClusterDown', 'for': '1m', - expr: 'round(count(mysql_up==1)/count(mysql_up) * 100) <= 50', - labels: { - severity: 'warning', - }, + expr: 'count(mysql_up==0) == count(mysql_up)', + labels: { severity: 'critical' }, annotations: { - summary: 'Only {{ $value }}% percona-xtradb cluster are online', - description: "percona-xtradb cluster less than minimum replication, please check with kubectl get pods -n openstack -l app.kubernetes.io/component=pxc", + summary: 'Percona XtraDB Cluster is down', + description: "All replicas are down.", }, }, ], diff --git a/roles/kube_prometheus_stack/files/jsonnet/tests.yml b/roles/kube_prometheus_stack/files/jsonnet/tests.yml index 3faf14a6b..6ed198da2 100644 --- a/roles/kube_prometheus_stack/files/jsonnet/tests.yml +++ b/roles/kube_prometheus_stack/files/jsonnet/tests.yml @@ -112,16 +112,32 @@ tests: values: '1' - series: 'mysql_up{instance="percona-xtradb-pxc-1", job="pxc"}' values: '1' - - series: 'mysql_up{instance="percona-xtradb-pxc-3", job="pxc"}' - values: '0' + - series: 'mysql_up{instance="percona-xtradb-pxc-2", job="pxc"}' + values: '1' alert_rule_test: - eval_time: 1m + alertname: MysqlClusterDown + exp_alerts: [] + + - interval: 1m + input_series: + - series: 'mysql_up{instance="percona-xtradb-pxc-0", job="pxc"}' + values: '1' + - series: 'mysql_up{instance="percona-xtradb-pxc-1", job="pxc"}' + values: '1' + - series: 'mysql_up{instance="percona-xtradb-pxc-2", job="pxc"}' + values: '0' + alert_rule_test: + - eval_time: 5m alertname: MysqlClusterDown exp_alerts: - exp_labels: severity: P5 + instance: percona-xtradb-pxc-2 + job: pxc exp_annotations: - summary: "1 percona-xtradb replication down" + summary: Percona XtraDB Cluster replica is down + description: percona-xtradb-pxc-2 replica is down. - interval: 1m input_series: @@ -129,31 +145,46 @@ tests: values: '1' - series: 'mysql_up{instance="percona-xtradb-pxc-1", job="pxc"}' values: '0' - - series: 'mysql_up{instance="percona-xtradb-pxc-3", job="pxc"}' - values: '0' + - series: 'mysql_up{instance="percona-xtradb-pxc-2", job="pxc"}' + values: '0' alert_rule_test: - - eval_time: 1m + - eval_time: 5m alertname: MysqlClusterDown exp_alerts: - exp_labels: severity: P3 exp_annotations: - summary: 'Only 33% percona-xtradb cluster are online' - description: "percona-xtradb cluster less than minimum replication, please check with kubectl get pods -n openstack -l app.kubernetes.io/component=pxc" + summary: Percona XtraDB Cluster replicas are down + description: 33% of replicas are online. - exp_labels: severity: P5 + instance: percona-xtradb-pxc-1 + job: pxc exp_annotations: - summary: "2 percona-xtradb replication down" + summary: Percona XtraDB Cluster replica is down + description: percona-xtradb-pxc-1 replica is down. + - exp_labels: + severity: P5 + instance: percona-xtradb-pxc-2 + job: pxc + exp_annotations: + summary: Percona XtraDB Cluster replica is down + description: percona-xtradb-pxc-2 replica is down. - interval: 1m input_series: - series: 'mysql_up{instance="percona-xtradb-pxc-0", job="pxc"}' - values: '1' + values: '0' - series: 'mysql_up{instance="percona-xtradb-pxc-1", job="pxc"}' - values: '1' + values: '0' - series: 'mysql_up{instance="percona-xtradb-pxc-3", job="pxc"}' - values: '1' + values: '0' alert_rule_test: - eval_time: 1m alertname: MysqlClusterDown - exp_alerts: [] \ No newline at end of file + exp_alerts: + - exp_labels: + severity: P1 + exp_annotations: + summary: Percona XtraDB Cluster is down + description: All replicas are down.