giantswarm · QuentinBisson · Mar 5, 2024 · Mar 5, 2024 · Mar 5, 2024 · marieroque
diff --git a/helm/prometheus-rules/templates/alerting-rules/elasticsearch.rules.yml b/helm/prometheus-rules/templates/alerting-rules/elasticsearch.rules.yml
@@ -1,3 +1,4 @@
+{{- if eq .Values.managementCluster.provider.flavor "vintage" }}
 apiVersion: monitoring.coreos.com/v1
 kind: PrometheusRule
 metadata:
@@ -86,3 +87,4 @@ spec:
         severity: page
         team: atlas
         topic: logging
+{{- end }}
diff --git a/helm/prometheus-rules/templates/alerting-rules/managed-logging.rules.yml b/helm/prometheus-rules/templates/alerting-rules/managed-logging.rules.yml
@@ -1,3 +1,4 @@
+{{- if eq .Values.managementCluster.provider.flavor "vintage" }}
 apiVersion: monitoring.coreos.com/v1
 kind: PrometheusRule
 metadata:
@@ -35,3 +36,4 @@ spec:
         severity: page
         team: atlas
         topic: logging
+{{- end }}
diff --git a/helm/prometheus-rules/templates/alerting-rules/promtail.rules.yml b/helm/prometheus-rules/templates/alerting-rules/promtail.rules.yml
@@ -3,7 +3,6 @@ kind: PrometheusRule
 metadata:
   labels:
     {{- include "labels.common" . | nindent 4 }}
-    cluster_type: "management_cluster"
   name: promtail.rules
   namespace: {{ .Values.namespace  }}
 spec:
@@ -14,7 +13,7 @@ spec:
           annotations:
             description: '{{`Scraping of all promtail pods to check if one failed every 5 minutes.`}}'
             opsrecipe: promtail-is-not-running/
-          expr: count(up{app="promtail"} == 0) > 0
+          expr: count(up{container="promtail"} == 0) > 0
           for: 5m
           labels:
             area: "empowerment"

diff --git a/test/tests/providers/global/promtail.rules.test.yml b/test/tests/providers/global/promtail.rules.test.yml
@@ -6,12 +6,12 @@ tests:
   - interval: 1m
     input_series:
       # For the first 60min: test with 1 pod: none, up, down
-      - series: 'up{app="promtail",cluster_type="management_cluster", cluster_id="gauss", installation="gauss", node="ip-10-0-5-35.eu-west-1.compute.internal"}'
+      - series: 'up{container="promtail",cluster_type="management_cluster", cluster_id="gauss", installation="gauss", node="ip-10-0-5-35.eu-west-1.compute.internal"}'
         values: "_x20 1+0x20 0+0x20" 
       # From 60min: test with 2 pods: 1 up and 1 down, 2 up, 2 down.
-      - series: 'up{app="promtail",cluster_type="management_cluster", cluster_id="gauss", installation="gauss", node="ip-10-0-5-145.eu-west-1.compute.internal"}'
+      - series: 'up{container="promtail",cluster_type="management_cluster", cluster_id="gauss", installation="gauss", node="ip-10-0-5-145.eu-west-1.compute.internal"}'
         values: "_x60 1+0x20 1+0x20 0+0x20"
-      - series: 'up{app="promtail",cluster_type="management_cluster", cluster_id="gauss", installation="gauss", node="ip-10-0-5-76.eu-west-1.compute.internal"}'
+      - series: 'up{container="promtail",cluster_type="management_cluster", cluster_id="gauss", installation="gauss", node="ip-10-0-5-76.eu-west-1.compute.internal"}'
         values: "_x60 0+0x20 1+0x20 0+0x20"
     alert_rule_test:
       - alertname: PromtailDown
@@ -23,12 +23,12 @@ tests:
         exp_alerts:
           - exp_labels:
               area: empowerment
-              severity: page
-              team: atlas
-              topic: observability
               cancel_if_cluster_status_creating: "true"
               cancel_if_cluster_status_deleting: "true"
               cancel_if_cluster_status_updating: "true"
+              severity: page
+              team: atlas
+              topic: observability
             exp_annotations:
               description: "Scraping of all promtail pods to check if one failed every 5 minutes."
               opsrecipe: "promtail-is-not-running/"
@@ -38,12 +38,12 @@ tests:
         exp_alerts:
           - exp_labels:
               area: empowerment
-              severity: page
-              team: atlas
-              topic: observability
               cancel_if_cluster_status_creating: "true"
               cancel_if_cluster_status_deleting: "true"
               cancel_if_cluster_status_updating: "true"
+              severity: page
+              team: atlas
+              topic: observability
             exp_annotations:
               description: "Scraping of all promtail pods to check if one failed every 5 minutes."
               opsrecipe: "promtail-is-not-running/"
@@ -54,12 +54,12 @@ tests:
         exp_alerts:
           - exp_labels:
               area: empowerment
-              severity: page
-              team: atlas
-              topic: observability
               cancel_if_cluster_status_creating: "true"
               cancel_if_cluster_status_deleting: "true"
               cancel_if_cluster_status_updating: "true"
+              severity: page
+              team: atlas
+              topic: observability
             exp_annotations:
               description: "Scraping of all promtail pods to check if one failed every 5 minutes."
               opsrecipe: "promtail-is-not-running/"
@@ -84,12 +84,12 @@ tests:
         exp_alerts:
           - exp_labels:
               area: empowerment
-              severity: page
-              team: atlas
-              topic: observability
               cancel_if_cluster_status_creating: "true"
               cancel_if_cluster_status_deleting: "true"
               cancel_if_cluster_status_updating: "true"
+              severity: page
+              team: atlas
+              topic: observability
             exp_annotations:
               description: "This alert checks if that the amount of failed requests is below 10% for promtail"
               opsrecipe: "promtail-requests-are-failing/"
@@ -98,12 +98,12 @@ tests:
         exp_alerts:
           - exp_labels:
               area: empowerment
-              severity: page
-              team: atlas
-              topic: observability
               cancel_if_cluster_status_creating: "true"
               cancel_if_cluster_status_deleting: "true"
               cancel_if_cluster_status_updating: "true"
+              severity: page
+              team: atlas
+              topic: observability
             exp_annotations:
               description: "This alert checks if that the amount of failed requests is below 10% for promtail"
               opsrecipe: "promtail-requests-are-failing/"