From a1161ea3737d40c4ea30361b003c3b650723b002 Mon Sep 17 00:00:00 2001
From: Quentin Bisson <quentin@giantswarm.io>
Date: Tue, 11 Jun 2024 14:49:44 +0200
Subject: [PATCH] Sort out shared alerts ownership (#1232)

Signed-off-by: QuentinBisson <quentin@giantswarm.io>
---
 CHANGELOG.md                                  | 11 +--
 Makefile.custom.mk                            |  1 -
 .../dns-operator-azure.rules.yml              |  0
 .../alerting-rules/certificate.all.rules.yml  |  2 +-
 .../certificate.management-cluster.rules.yml  |  0
 .../certificate.workload-cluster.rules.yml    |  0
 .../management-cluster.rules.yml              | 19 ++---
 .../kubernetes-mixins.rules.yml               |  0
 .../deployment.management-cluster.rules.yml   | 24 +++----
 .../deployment.workload-cluster.rules.yml     | 12 ++--
 .../alerting-rules/operatorkit.rules.yml      |  5 +-
 .../recording-rules/grafana-cloud.rules.yml   |  0
 .../recording-rules/service-level.rules.yml   |  0
 .../alerting-rules/microendpoint.rules.yml    | 72 -------------------
 scripts/sync-kube-mixin.sh                    |  2 +-
 test/conf/promtool_ignore                     | 25 ++++---
 test/hack/bin/template-chart.sh               |  2 +
 17 files changed, 53 insertions(+), 122 deletions(-)
 rename helm/prometheus-rules/templates/{shared => kaas/phoenix}/alerting-rules/dns-operator-azure.rules.yml (100%)
 rename helm/prometheus-rules/templates/{shared => kaas/turtles}/alerting-rules/certificate.all.rules.yml (99%)
 rename helm/prometheus-rules/templates/{shared => kaas/turtles}/alerting-rules/certificate.management-cluster.rules.yml (100%)
 rename helm/prometheus-rules/templates/{shared => kaas/turtles}/alerting-rules/certificate.workload-cluster.rules.yml (100%)
 rename helm/prometheus-rules/templates/{shared => kaas/turtles}/alerting-rules/management-cluster.rules.yml (88%)
 rename helm/prometheus-rules/templates/{shared => kaas/turtles}/recording-rules/kubernetes-mixins.rules.yml (100%)
 rename helm/prometheus-rules/templates/{shared => platform/atlas}/alerting-rules/deployment.management-cluster.rules.yml (97%)
 rename helm/prometheus-rules/templates/{shared => platform/atlas}/alerting-rules/deployment.workload-cluster.rules.yml (96%)
 rename helm/prometheus-rules/templates/{shared => platform/atlas}/alerting-rules/operatorkit.rules.yml (98%)
 rename helm/prometheus-rules/templates/{shared => platform/atlas}/recording-rules/grafana-cloud.rules.yml (100%)
 rename helm/prometheus-rules/templates/{shared => platform/atlas}/recording-rules/service-level.rules.yml (100%)
 delete mode 100644 helm/prometheus-rules/templates/shared/alerting-rules/microendpoint.rules.yml
diff --git a/CHANGELOG.md b/CHANGELOG.md
index ddfd70779..399918773 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -23,20 +23,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Moves cluster-autoscaler and vpa alerts to turtles.
 - Reviewed turtles alerts labels.
 - Use `ready` replicas for Kyverno webhooks alert.
-- Moves ownership of alerts for shared components to turtles.
+- Sort out shared alert ownership by distributing them all to teams.
 
 ### Fixed
 
 - Fixed usage of yq, and jq in check-opsrecipes.sh
 - Fetch jq with make install-tools
-- Fix and improve the check-opsrecipes.sh script so support <directory>/_index.md based ops-recipes.
-- Fix cabbage alerts for multi-provider wcs.
-- Fix a few area labels in alerts.
-- Fix `cert-exporter` alerting.
+- Fixed and improve the check-opsrecipes.sh script to support <directory>/_index.md based ops-recipes.
+- Fixed cabbage alerts for multi-provider MCs.
+- Fixed all area alert labels.
+- Fixed `cert-exporter` alerts to page on all providers.
 - Fix `ManagementClusterDexAppMissing` use of absent for mimir.
 
 ### Removed
 
+- cleanup: get rid of microendpoint alerts as it never fired and probably never will
 - cleanup: remove scrape timeout inhibition leftovers (documentation and labels)
 
 ## [4.1.2] - 2024-05-31
diff --git a/Makefile.custom.mk b/Makefile.custom.mk
index deaad8df3..f7147898d 100644
--- a/Makefile.custom.mk
+++ b/Makefile.custom.mk
@@ -16,7 +16,6 @@ install-tools:
 	./test/hack/bin/fetch-tools.sh
 
 template-chart: install-tools ## prepare the helm chart
-	test/hack/bin/architect helm template --dir helm/prometheus-rules --dry-run
 	bash ./test/hack/bin/template-chart.sh
 
 test-rules: install-tools template-chart ## run unit tests for alerting rules
diff --git a/helm/prometheus-rules/templates/shared/alerting-rules/dns-operator-azure.rules.yml b/helm/prometheus-rules/templates/kaas/phoenix/alerting-rules/dns-operator-azure.rules.yml
similarity index 100%
rename from helm/prometheus-rules/templates/shared/alerting-rules/dns-operator-azure.rules.yml
rename to helm/prometheus-rules/templates/kaas/phoenix/alerting-rules/dns-operator-azure.rules.yml
diff --git a/helm/prometheus-rules/templates/shared/alerting-rules/certificate.all.rules.yml b/helm/prometheus-rules/templates/kaas/turtles/alerting-rules/certificate.all.rules.yml
similarity index 99%
rename from helm/prometheus-rules/templates/shared/alerting-rules/certificate.all.rules.yml
rename to helm/prometheus-rules/templates/kaas/turtles/alerting-rules/certificate.all.rules.yml
index a04fda923..2ed94092d 100644
--- a/helm/prometheus-rules/templates/shared/alerting-rules/certificate.all.rules.yml
+++ b/helm/prometheus-rules/templates/kaas/turtles/alerting-rules/certificate.all.rules.yml
@@ -42,7 +42,7 @@ spec:
         opsrecipe: kyverno-certificate-secret-will-expire-in-less-than-two-days/
       expr: (cert_exporter_secret_not_after{name=~".*kyverno.*"} - time()) < 2 * 24 * 60 * 60
       labels:
-        area: managedservices
+        area: kaas
         cancel_if_outside_working_hours: "true"
         severity: notify
         team: shield
diff --git a/helm/prometheus-rules/templates/shared/alerting-rules/certificate.management-cluster.rules.yml b/helm/prometheus-rules/templates/kaas/turtles/alerting-rules/certificate.management-cluster.rules.yml
similarity index 100%
rename from helm/prometheus-rules/templates/shared/alerting-rules/certificate.management-cluster.rules.yml
rename to helm/prometheus-rules/templates/kaas/turtles/alerting-rules/certificate.management-cluster.rules.yml
diff --git a/helm/prometheus-rules/templates/shared/alerting-rules/certificate.workload-cluster.rules.yml b/helm/prometheus-rules/templates/kaas/turtles/alerting-rules/certificate.workload-cluster.rules.yml
similarity index 100%
rename from helm/prometheus-rules/templates/shared/alerting-rules/certificate.workload-cluster.rules.yml
rename to helm/prometheus-rules/templates/kaas/turtles/alerting-rules/certificate.workload-cluster.rules.yml
diff --git a/helm/prometheus-rules/templates/shared/alerting-rules/management-cluster.rules.yml b/helm/prometheus-rules/templates/kaas/turtles/alerting-rules/management-cluster.rules.yml
similarity index 88%
rename from helm/prometheus-rules/templates/shared/alerting-rules/management-cluster.rules.yml
rename to helm/prometheus-rules/templates/kaas/turtles/alerting-rules/management-cluster.rules.yml
index 0576aacd3..dd3e05eed 100644
--- a/helm/prometheus-rules/templates/shared/alerting-rules/management-cluster.rules.yml
+++ b/helm/prometheus-rules/templates/kaas/turtles/alerting-rules/management-cluster.rules.yml
@@ -13,28 +13,27 @@ spec:
   groups:
   - name: management-cluster
     rules:
-    {{- if (eq .Values.managementCluster.provider.kind "aws") }}
     - alert: ManagementClusterHasLessThanThreeNodes
       annotations:
         description: '{{`Management cluster {{ $labels.cluster_id }} has less than 3 nodes.`}}'
         opsrecipe: management-cluster-less-than-three-workers/
-      expr: sum(kubelet_node_name{cluster_type="management_cluster"} * on (cluster_id, node) kube_node_role{role="worker", cluster_type="management_cluster"}) by (cluster_id) < 3
+      expr: sum(kubelet_node_name{cluster_type="management_cluster"} * on (cluster_id, node) kube_node_role{role="worker", cluster_type="management_cluster"}) by (cluster_id, installation, pipeline, provider) < 3
       for: 1h
       labels:
         area: kaas
         cancel_if_outside_working_hours: {{ include "workingHoursOnly" . }}
         severity: page
-        team: phoenix
+        team: {{ include "providerTeam" . }}
         topic: managementcluster
     - alert: ManagementClusterMissingNodes
       annotations:
         description: '{{`Management cluster {{ $labels.cluster_id }} has less than 4 minimum nodes.`}}'
-      expr: sum(kube_node_status_condition{cluster_type="management_cluster", condition="Ready", status="true"}) by (cluster_id) < 4
+      expr: sum(kube_node_status_condition{cluster_type="management_cluster", condition="Ready", status="true"}) by (cluster_id, installation, pipeline, provider) < 4
       for: 15m
       labels:
         area: kaas
         severity: notify
-        team: phoenix
+        team: {{ include "providerTeam" . }}
         topic: managementcluster
     - alert: ManagementClusterCPUUsageTooHigh
       annotations:
@@ -46,7 +45,7 @@ spec:
         area: kass
         cancel_if_outside_working_hours: "true"
         severity: page
-        team: phoenix
+        team: {{ include "providerTeam" . }}
         topic: managementcluster
     - alert: ManagementClusterMemoryUsageTooHigh
       annotations:
@@ -58,12 +57,12 @@ spec:
         area: kass
         cancel_if_outside_working_hours: "true"
         severity: page
-        team: phoenix
+        team: {{ include "providerTeam" . }}
         topic: managementcluster
     - alert: ManagementClusterPodLimitAlmostReached
       annotations:
         description: '{{`Cluster {{ $labels.cluster_id }} is almost exceeding its pod limit.`}}'
-      expr: (sum(kube_pod_info{cluster_type="management_cluster"}) by (cluster_id) / sum(kube_node_status_capacity{resource="pods", cluster_type="management_cluster"}) by (cluster_id)) > 0.8
+      expr: (sum(kube_pod_info{cluster_type="management_cluster"}) by (cluster_id, installation, pipeline, provider) / sum(kube_node_status_capacity{resource="pods", cluster_type="management_cluster"}) by (cluster_id, installation, pipeline, provider)) > 0.8
       for: 5m
       labels:
         area: kaas
@@ -72,8 +71,10 @@ spec:
         cancel_if_cluster_status_updating: "true"
         cancel_if_outside_working_hours: {{ include "workingHoursOnly" . }}
         severity: notify
-        team: phoenix
+        team: {{ include "providerTeam" . }}
         topic: managementcluster
+    {{- if (eq .Values.managementCluster.provider.kind "aws") }}
+    ## TODO Remove when all vintage clusters are gone
     - alert: ManagementClusterCriticalPodNotRunning
       annotations:
         description: '{{`Critical pod {{ $labels.namespace }}/{{ $labels.pod }} is not running.`}}'
diff --git a/helm/prometheus-rules/templates/shared/recording-rules/kubernetes-mixins.rules.yml b/helm/prometheus-rules/templates/kaas/turtles/recording-rules/kubernetes-mixins.rules.yml
similarity index 100%
rename from helm/prometheus-rules/templates/shared/recording-rules/kubernetes-mixins.rules.yml
rename to helm/prometheus-rules/templates/kaas/turtles/recording-rules/kubernetes-mixins.rules.yml
diff --git a/helm/prometheus-rules/templates/shared/alerting-rules/deployment.management-cluster.rules.yml b/helm/prometheus-rules/templates/platform/atlas/alerting-rules/deployment.management-cluster.rules.yml
similarity index 97%
rename from helm/prometheus-rules/templates/shared/alerting-rules/deployment.management-cluster.rules.yml
rename to helm/prometheus-rules/templates/platform/atlas/alerting-rules/deployment.management-cluster.rules.yml
index 5a587e527..36ac26281 100644
--- a/helm/prometheus-rules/templates/shared/alerting-rules/deployment.management-cluster.rules.yml
+++ b/helm/prometheus-rules/templates/platform/atlas/alerting-rules/deployment.management-cluster.rules.yml
@@ -20,7 +20,7 @@ spec:
       expr: kube_deployment_status_replicas_unavailable{cluster_type="management_cluster", deployment=~"alertmanager.*|grafana.*|prometheus.*|promxy.*|mimir.*|loki.*|object-storage.*|logging-operator.*|silence-operator.*|sloth.*"} > 0
       for: 30m
       labels:
-        area: kaas
+        area: platform
         cancel_if_cluster_status_creating: "true"
         cancel_if_cluster_status_deleting: "true"
         cancel_if_cluster_status_updating: "true"
@@ -35,7 +35,7 @@ spec:
       expr: kube_deployment_status_replicas_unavailable{cluster_type="management_cluster", deployment=~"app-admission-controller-.+|app-operator-.+|chart-operator-.+", cluster_id!~"argali|giraffe"} > 0
       for: 30m
       labels:
-        area: managedservices
+        area: platform
         cancel_if_cluster_status_creating: "true"
         cancel_if_cluster_status_deleting: "true"
         cancel_if_cluster_status_updating: "true"
@@ -50,7 +50,7 @@ spec:
       expr: kube_deployment_status_replicas_unavailable{cluster_type="management_cluster", deployment=~"app-admission-controller-.+|app-operator-.+|chart-operator-.+", cluster_id=~"argali|giraffe"} > 0
       for: 3h
       labels:
-        area: managedservices
+        area: platform
         severity: page
         team: honeybadger
         topic: managementcluster
@@ -61,7 +61,7 @@ spec:
       expr: kube_deployment_status_replicas_unavailable{cluster_type="management_cluster", deployment=~"kyverno.*", cluster_id!~"argali|giraffe"} > 0
       for: 30m
       labels:
-        area: managedservices
+        area: platform
         cancel_if_cluster_status_creating: "true"
         cancel_if_cluster_status_deleting: "true"
         cancel_if_cluster_status_updating: "true"
@@ -76,7 +76,7 @@ spec:
       expr: kube_deployment_status_replicas_unavailable{cluster_type="management_cluster", deployment=~"kyverno.*", cluster_id=~"argali|giraffe"} > 0
       for: 30m
       labels:
-        area: managedservices
+        area: platform
         cancel_if_cluster_status_creating: "true"
         cancel_if_cluster_status_deleting: "true"
         cancel_if_cluster_status_updating: "true"
@@ -91,7 +91,7 @@ spec:
       expr: label_join(kube_deployment_status_replicas_unavailable{cluster_type="management_cluster", deployment=~"aws-admission-controller.*|aws-operator-.+|cluster-operator-.+|cluster-api-core-webhook.*|event-exporter-.*|etcd-kubernetes-resources-count-exporter-.*|upgrade-schedule-operator.*|worker-.+|master-.+", cluster_id!~"argali|giraffe"}, "service", "/", "namespace", "deployment") > 0
       for: 30m
       labels:
-        area: kaas
+        area: platform
         cancel_if_cluster_status_creating: "true"
         cancel_if_cluster_status_deleting: "true"
         cancel_if_cluster_status_updating: "true"
@@ -105,7 +105,7 @@ spec:
       expr: kube_deployment_status_replicas_unavailable{cluster_type="management_cluster", deployment=~"aws-operator-.+|cluster-operator-.+|coredns-.+|event-exporter-.+|etcd-kubernetes-resources-count-exporter.*", cluster_id=~"argali|giraffe"} > 0
       for: 3h
       labels:
-        area: kaas
+        area: platform
         severity: page
         team: {{ include "providerTeam" . }}
         topic: managementcluster
@@ -116,7 +116,7 @@ spec:
       expr: kube_deployment_status_replicas_available{cluster_type="management_cluster", deployment=~"([a-z]*)-operator([a-z,-]*)",provider="aws"} + kube_deployment_status_replicas_unavailable{cluster_type="management_cluster", deployment=~"([a-z]*)-operator([a-z,-]*)",provider="aws"} == 0
       for: 4h
       labels:
-        area: kaas
+        area: platform
         severity: notify
         team: phoenix
         topic: managementcluster
@@ -127,7 +127,7 @@ spec:
       expr: kube_deployment_status_replicas_unavailable{cluster_type="management_cluster", deployment=~"aws-admission-controller.*|aws-operator.*|cluster-operator.*|cluster-api-core-webhook.*|event-exporter-.*|upgrade-schedule-operator.*|event-exporter-app.*", cluster_id=~"argali|giraffe"} > 0
       for: 3h
       labels:
-        area: kaas
+        area: platform
         cancel_if_cluster_status_creating: "true"
         cancel_if_cluster_status_deleting: "true"
         cancel_if_cluster_status_updating: "true"
@@ -142,7 +142,7 @@ spec:
       expr: kube_deployment_status_replicas_unavailable{cluster_type="management_cluster", deployment=~"(ingress-nginx|nginx-ingress-controller)-.+", cluster_id!~"argali|giraffe"} > 0
       for: 30m
       labels:
-        area: kaas
+        area: platform
         cancel_if_cluster_status_creating: "true"
         cancel_if_cluster_status_deleting: "true"
         cancel_if_cluster_status_updating: "true"
@@ -156,7 +156,7 @@ spec:
       expr: kube_deployment_status_replicas_unavailable{cluster_type="management_cluster", deployment=~"(ingress-nginx|nginx-ingress-controller|coredns)-.+", cluster_id=~"argali|giraffe"} > 0
       for: 3h
       labels:
-        area: kaas
+        area: platform
         severity: page
         team: cabbage
         topic: managementcluster
@@ -171,7 +171,7 @@ spec:
       {{- end }}
       for: 30m
       labels:
-        area: kaas
+        area: platform
         cancel_if_cluster_status_creating: "true"
         cancel_if_cluster_status_deleting: "true"
         cancel_if_cluster_status_updating: "true"
diff --git a/helm/prometheus-rules/templates/shared/alerting-rules/deployment.workload-cluster.rules.yml b/helm/prometheus-rules/templates/platform/atlas/alerting-rules/deployment.workload-cluster.rules.yml
similarity index 96%
rename from helm/prometheus-rules/templates/shared/alerting-rules/deployment.workload-cluster.rules.yml
rename to helm/prometheus-rules/templates/platform/atlas/alerting-rules/deployment.workload-cluster.rules.yml
index e0c8f0dcb..776df0011 100644
--- a/helm/prometheus-rules/templates/shared/alerting-rules/deployment.workload-cluster.rules.yml
+++ b/helm/prometheus-rules/templates/platform/atlas/alerting-rules/deployment.workload-cluster.rules.yml
@@ -20,7 +20,7 @@ spec:
       expr: kube_deployment_status_replicas_unavailable{cluster_type="workload_cluster", deployment="chart-operator"} > 0
       for: 30m
       labels:
-        area: managedservices
+        area: platform
         cancel_if_outside_working_hours: {{ include "workingHoursOnly" . }}
         severity: page
         team: honeybadger
@@ -32,7 +32,7 @@ spec:
       expr: label_join(kube_deployment_status_replicas_unavailable{cluster_type="workload_cluster", deployment=~"metrics-server|vertical-pod-autoscaler(-app)?-admission-controller|vertical-pod-autoscaler(-app)?-recommender|vertical-pod-autoscaler(-app)?-updater|aws-pod-identity-webhook.*|cluster-autoscaler|aws-load-balancer-controller"}, "service", "/", "namespace", "deployment") > 0
       for: 30m
       labels:
-        area: kaas
+        area: platform
         cancel_if_outside_working_hours: {{ include "workingHoursOnly" . }}
         severity: page
         team: {{ include "providerTeam" . }}
@@ -44,7 +44,7 @@ spec:
       expr: label_join(kube_deployment_status_replicas_unavailable{cluster_type="workload_cluster", deployment="etcd-kubernetes-resources-count-exporter"}, "service", "/", "namespace", "deployment") > 0
       for: 30m
       labels:
-        area: kaas
+        area: platform
         cancel_if_prometheus_agent_down: "true"
         cancel_if_outside_working_hours: "true"
         severity: page
@@ -56,7 +56,7 @@ spec:
       expr: kube_deployment_status_replicas_available{cluster_type="workload_cluster", deployment="chart-operator"} + kube_deployment_status_replicas_unavailable{cluster_type="workload_cluster", deployment="chart-operator"} == 0
       for: 4h
       labels:
-        area: managedservices
+        area: platform
         cancel_if_outside_working_hours: {{ include "workingHoursOnly" . }}
         severity: notify
         team: honeybadger
@@ -67,7 +67,7 @@ spec:
       expr: kube_deployment_spec_replicas{cluster_type="workload_cluster", deployment=~"trivy-operator|starboard-exporter|jiralert"} == 0
       for: 4h
       labels:
-        area: managedservices
+        area: platform
         cancel_if_outside_working_hours: "true"
         severity: notify
         team: shield
@@ -79,7 +79,7 @@ spec:
       expr: kube_deployment_status_replicas_unavailable{cluster_type="workload_cluster", deployment=~"cert-manager-*|teleport-*|dex*|athena*|rbac-operator|credentiald"} > 0
       for: 30m
       labels:
-        area: kaas
+        area: platform
         cancel_if_cluster_status_creating: "true"
         cancel_if_cluster_status_deleting: "true"
         cancel_if_cluster_status_updating: "true"
diff --git a/helm/prometheus-rules/templates/shared/alerting-rules/operatorkit.rules.yml b/helm/prometheus-rules/templates/platform/atlas/alerting-rules/operatorkit.rules.yml
similarity index 98%
rename from helm/prometheus-rules/templates/shared/alerting-rules/operatorkit.rules.yml
rename to helm/prometheus-rules/templates/platform/atlas/alerting-rules/operatorkit.rules.yml
index 4e1805ff2..a571c99af 100644
--- a/helm/prometheus-rules/templates/shared/alerting-rules/operatorkit.rules.yml
+++ b/helm/prometheus-rules/templates/platform/atlas/alerting-rules/operatorkit.rules.yml
@@ -1,3 +1,4 @@
+# Atlas is the team responsible for the operatorkit
 apiVersion: monitoring.coreos.com/v1
 kind: PrometheusRule
 metadata:
@@ -17,7 +18,7 @@ spec:
       expr: operatorkit_controller_error_total{pod=~"app-operator.*|chart-operator.*"} > 5
       for: 1m
       labels:
-        area: kaas
+        area: platform
         severity: notify
         team: honeybadger
         topic: qa
@@ -27,7 +28,7 @@ spec:
       expr: (time() - operatorkit_controller_last_reconciled{pod=~"app-operator.*|chart-operator.*"}) / 60 > 30
       for: 10m
       labels:
-        area: managedservices
+        area: platform
         severity: notify
         team: honeybadger
         topic: releng
diff --git a/helm/prometheus-rules/templates/shared/recording-rules/grafana-cloud.rules.yml b/helm/prometheus-rules/templates/platform/atlas/recording-rules/grafana-cloud.rules.yml
similarity index 100%
rename from helm/prometheus-rules/templates/shared/recording-rules/grafana-cloud.rules.yml
rename to helm/prometheus-rules/templates/platform/atlas/recording-rules/grafana-cloud.rules.yml
diff --git a/helm/prometheus-rules/templates/shared/recording-rules/service-level.rules.yml b/helm/prometheus-rules/templates/platform/atlas/recording-rules/service-level.rules.yml
similarity index 100%
rename from helm/prometheus-rules/templates/shared/recording-rules/service-level.rules.yml
rename to helm/prometheus-rules/templates/platform/atlas/recording-rules/service-level.rules.yml
diff --git a/helm/prometheus-rules/templates/shared/alerting-rules/microendpoint.rules.yml b/helm/prometheus-rules/templates/shared/alerting-rules/microendpoint.rules.yml
deleted file mode 100644
index 4577db4d5..000000000
--- a/helm/prometheus-rules/templates/shared/alerting-rules/microendpoint.rules.yml
+++ /dev/null
@@ -1,72 +0,0 @@
-apiVersion: monitoring.coreos.com/v1
-kind: PrometheusRule
-metadata:
-  creationTimestamp: null
-  labels:
-    {{- include "labels.common" . | nindent 4 }}
-  name: microendpoint.rules
-  namespace: {{ .Values.namespace  }}
-spec:
-  groups:
-  - name: microendpoint
-    rules:
-    # replacing `version` with `reconciled_version` is only done if the latter
-    # is non-empty and is done to work with old operator versions using
-    # microendpoint < 0.1.0 (i.e. before VOO)
-    - alert: CollidingOperatorsAtlas
-      annotations:
-        description: '{{`CR version {{ $labels.version }} in cluster {{ $labels.cluster_id }} is reconciled by multiple apps including {{ $labels.app }}.`}}'
-        opsrecipe: multiple-operators-running-same-version/
-      expr: sum(label_replace(giantswarm_build_info{app=~"prometheus-meta-operator.*"}, "version", "$1", "reconciled_version", "(.+)")) by (app, cluster_id, installation, provider, pipeline, version) > 1
-      for: 5m
-      labels:
-        area: empowerment
-        cancel_if_outside_working_hours: {{ include "workingHoursOnly" . }}
-        severity: page
-        team: atlas
-        topic: releng
-    # replacing `version` with `reconciled_version` is only done if the latter
-    # is non-empty and is done to work with old operator versions using
-    # microendpoint < 0.1.0 (i.e. before VOO)
-    - alert: CollidingOperatorsHoneybadger
-      annotations:
-        description: '{{`CR version {{ $labels.version }} in cluster {{ $labels.cluster_id }} is reconciled by multiple apps including {{ $labels.app }}.`}}'
-        opsrecipe: multiple-operators-running-same-version/
-      expr: sum(label_replace(giantswarm_build_info{app=~"app-operator.*|chart-operator.*"}, "version", "$1", "reconciled_version", "(.+)")) by (app, cluster_id, installation, provider, pipeline, version) > 1
-      for: 5m
-      labels:
-        area: managedservices
-        cancel_if_outside_working_hours: {{ include "workingHoursOnly" . }}
-        severity: page
-        team: honeybadger
-        topic: releng
-    # replacing `version` with `reconciled_version` is only done if the latter
-    # is non-empty and is done to work with old operator versions using
-    # microendpoint < 0.1.0 (i.e. before VOO)
-    - alert: CollidingOperatorsAWS
-      annotations:
-        description: '{{`CR version {{ $labels.version }} in cluster {{ $labels.cluster_id }} is reconciled by multiple apps including {{ $labels.app }}.`}}'
-        opsrecipe: multiple-operators-running-same-version/
-      expr: sum(label_replace(giantswarm_build_info{app=~"aws-operator.*|cluster-operator.*"}, "version", "$1", "reconciled_version", "(.+)")) by (app, cluster_id, installation, provider, pipeline, version) > 1
-      for: 5m
-      labels:
-        area: kaas
-        cancel_if_outside_working_hours: {{ include "workingHoursOnly" . }}
-        severity: page
-        team: phoenix
-        topic: releng
-    # replacing `version` with `reconciled_version` is only done if the latter
-    # is non-empty and is done to work with old operator versions using
-    # microendpoint < 0.1.0 (i.e. before VOO)
-    - alert: CollidingOperatorsRocket
-      annotations:
-        description: '{{`CR version {{ $labels.version }} in cluster {{ $labels.cluster_id }} is reconciled by multiple apps including {{ $labels.app }}.`}}'
-        opsrecipe: multiple-operators-running-same-version/
-      expr: sum(label_replace(giantswarm_build_info{app=~"ignition-operator|cert-operator|node-operator"}, "version", "$1", "reconciled_version", "(.+)")) by (app, cluster_id, installation, provider, pipeline, version) > 1
-      for: 5m
-      labels:
-        area: kaas
-        cancel_if_outside_working_hours: {{ include "workingHoursOnly" . }}
-        severity: page
-        team: rocket
-        topic: releng
diff --git a/scripts/sync-kube-mixin.sh b/scripts/sync-kube-mixin.sh
index 85bf5490e..5850ab3c4 100755
--- a/scripts/sync-kube-mixin.sh
+++ b/scripts/sync-kube-mixin.sh
@@ -5,7 +5,7 @@ set -o nounset
 set -o pipefail
 
 TMPDIR="$(mktemp -d -t 'tmp.XXXXXXXXXX')"
-RULESFILE="helm/prometheus-rules/templates/shared/recording-rules/kubernetes-mixins.rules.yml"
+RULESFILE="helm/prometheus-rules/templates/kaas/turtles/recording-rules/kubernetes-mixins.rules.yml"
 
 trap 'cleanup' EXIT
 
diff --git a/test/conf/promtool_ignore b/test/conf/promtool_ignore
index 71a57dac8..2d3d2a071 100644
--- a/test/conf/promtool_ignore
+++ b/test/conf/promtool_ignore
@@ -9,6 +9,7 @@ kaas/phoenix/alerting-rules/calico.rules.yml
 kaas/phoenix/alerting-rules/capa.management-cluster.rules.yml
 kaas/phoenix/alerting-rules/cluster-service.rules.yml
 kaas/phoenix/alerting-rules/credentiald.rules.yml
+kaas/phoenix/alerting-rules/dns-operator-azure.rules.yml
 kaas/phoenix/alerting-rules/inhibit.aws.management-cluster.rules.yml
 kaas/phoenix/alerting-rules/inhibit.kiam.rules.yml
 kaas/phoenix/alerting-rules/kiam.rules.yml
@@ -23,6 +24,9 @@ kaas/turtles/alerting-rules/capi-machinedeployment.rules.yml
 kaas/turtles/alerting-rules/capi-machinepool.rules.yml
 kaas/turtles/alerting-rules/capi-machineset.rules.yml
 kaas/turtles/alerting-rules/capi.management-cluster.rules.yml
+kaas/turtles/alerting-rules/certificate.all.rules.yml
+kaas/turtles/alerting-rules/certificate.management-cluster.rules.yml
+kaas/turtles/alerting-rules/certificate.workload-cluster.rules.yml
 kaas/turtles/alerting-rules/cluster-autoscaler.rules.yml
 kaas/turtles/alerting-rules/docker.rules.yml
 kaas/turtles/alerting-rules/etcd.management-cluster.rules.yml
@@ -33,6 +37,7 @@ kaas/turtles/alerting-rules/inhibit.capi.rules.yml
 kaas/turtles/alerting-rules/inhibit.kubelet.rules.yml
 kaas/turtles/alerting-rules/job.rules.yml
 kaas/turtles/alerting-rules/kubelet.rules.yml
+kaas/turtles/alerting-rules/management-cluster.rules.yml
 kaas/turtles/alerting-rules/net-exporter.rules.yml
 kaas/turtles/alerting-rules/node-exporter.rules.yml
 kaas/turtles/alerting-rules/node.management-cluster.rules.yml
@@ -42,16 +47,23 @@ kaas/turtles/alerting-rules/storage.workload-cluster.rules.yml
 kaas/turtles/alerting-rules/systemd.rules.yml
 kaas/turtles/alerting-rules/timesync.rules.yml
 kaas/turtles/alerting-rules/vertical-pod-autoscaler.rules.yml
+kaas/turtles/recording-rules/kubernetes-mixins.rules.yml
+platform/atlas/alerting-rules/deployment.management-cluster.rules.yml
+platform/atlas/alerting-rules/deployment.workload-cluster.rules.yml
 platform/atlas/alerting-rules/fluentbit.rules.yml
 platform/atlas/alerting-rules/inhibit.oncall.rules.yml
 platform/atlas/alerting-rules/keda.rules.yml
 platform/atlas/alerting-rules/kube-state-metrics.rules.yml
+platform/atlas/alerting-rules/operatorkit.rules.yml
 platform/atlas/alerting-rules/prometheus-meta-operator.rules.yml
 platform/atlas/alerting-rules/prometheus-operator.rules.yml
 platform/atlas/alerting-rules/service-level.rules.yml
+platform/atlas/alerting-rules/service-level.rules.yml
 platform/atlas/alerting-rules/storage.rules.yml
+platform/atlas/recording-rules/grafana-cloud.rules.yml
 platform/atlas/recording-rules/loki-mixins.rules.yml
 platform/atlas/recording-rules/mimir-mixins.rules.yml
+platform/atlas/recording-rules/service-level.rules.yml
 platform/cabbage/alerting-rules/coredns.rules.yml
 platform/cabbage/alerting-rules/external-dns.rules.yml
 platform/cabbage/alerting-rules/ingress-controller.rules.yml
@@ -64,16 +76,3 @@ platform/honeybadger/alerting-rules/secret.rules.yml
 platform/honeybadger/recording-rules/helm-operations.rules.yml
 platform/honeybadger/recording-rules/helm-operations.rules.yml
 platform/shield/alerting-rules/falco.rules.yml
-shared/alerting-rules/certificate.all.rules.yml
-shared/alerting-rules/certificate.management-cluster.rules.yml
-shared/alerting-rules/certificate.workload-cluster.rules.yml
-shared/alerting-rules/deployment.management-cluster.rules.yml
-shared/alerting-rules/deployment.workload-cluster.rules.yml
-shared/alerting-rules/dns-operator-azure.rules.yml
-shared/alerting-rules/management-cluster.rules.yml
-shared/alerting-rules/microendpoint.rules.yml
-shared/alerting-rules/operatorkit.rules.yml
-shared/alerting-rules/service-level.rules.yml
-shared/recording-rules/grafana-cloud.rules.yml
-shared/recording-rules/kubernetes-mixins.rules.yml
-shared/recording-rules/service-level.rules.yml
diff --git a/test/hack/bin/template-chart.sh b/test/hack/bin/template-chart.sh
index ab8edf5f8..155e858bf 100755
--- a/test/hack/bin/template-chart.sh
+++ b/test/hack/bin/template-chart.sh
@@ -7,6 +7,8 @@ main() {
   local -a providers
   mapfile -t providers <"$GIT_WORKDIR/test/conf/providers"
 
+  rm -rf "$GIT_WORKDIR"/test/hack/output/helm-chart/
+
   for provider in "${providers[@]}"; do
     echo "Templating chart for provider: $provider"