From ff29140f9346e953f3774bed02faa6ce18052b65 Mon Sep 17 00:00:00 2001 From: Quentin Bisson Date: Tue, 11 Jun 2024 12:23:54 +0200 Subject: [PATCH] Fix dex absent query for mimir (#1231) * Fix dex absent query for mimir * Fix some shared alert ownership (#1228) * Fix some shared alert ownership Signed-off-by: QuentinBisson * Update helm/prometheus-rules/templates/kaas/turtles/alerting-rules/kubelet.rules.yml --------- Signed-off-by: QuentinBisson * fix tests Signed-off-by: QuentinBisson --------- Signed-off-by: QuentinBisson --- CHANGELOG.md | 4 +- CODEOWNERS | 19 +++-- .../kaas/bigmac/alerting-rules/dex.rules.yml | 4 + test/conf/promtool_ignore | 79 +++++++++++++------ 4 files changed, 70 insertions(+), 36 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1c33c5dbc..ddfd70779 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,15 +25,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Use `ready` replicas for Kyverno webhooks alert. - Moves ownership of alerts for shared components to turtles. - ### Fixed - Fixed usage of yq, and jq in check-opsrecipes.sh - Fetch jq with make install-tools - Fix and improve the check-opsrecipes.sh script so support /_index.md based ops-recipes. - Fix cabbage alerts for multi-provider wcs. -- Fix a few area labels. +- Fix a few area labels in alerts. - Fix `cert-exporter` alerting. +- Fix `ManagementClusterDexAppMissing` use of absent for mimir. ### Removed diff --git a/CODEOWNERS b/CODEOWNERS index 28f07ef17..bd081f495 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -1,9 +1,12 @@ * @giantswarm/team-atlas -/helm/prometheus-rules/templates/kaas/bigmac/ @team-bigmac -/helm/prometheus-rules/templates/kaas/phoenix/ @team-phoenix -/helm/prometheus-rules/templates/kaas/rocket/ @team-rocket -/helm/prometheus-rules/templates/kaas/turtles/ @team-turtles -/helm/prometheus-rules/templates/platform/atlas/ @team-atlas -/helm/prometheus-rules/templates/platform/cabbage/ @team-cabbage -/helm/prometheus-rules/templates/platform/honeybadger/ @team-honeybadger -/helm/prometheus-rules/templates/platform/shield/ @team-shield +/helm/prometheus-rules/templates/kaas/bigmac/ @giantswarm/team-bigmac +/helm/prometheus-rules/templates/kaas/phoenix/ @giantswarm/team-phoenix +/helm/prometheus-rules/templates/kaas/rocket/ @giantswarm/team-rocket +/helm/prometheus-rules/templates/kaas/turtles/ @giantswarm/team-turtles +/helm/prometheus-rules/templates/platform/atlas/ @giantswarm/team-atlas +/helm/prometheus-rules/templates/platform/cabbage/ @giantswarm/team-cabbage +/helm/prometheus-rules/templates/platform/honeybadger/ @giantswarm/team-honeybadger +/helm/prometheus-rules/templates/platform/shield/ @giantswarm/team-shield + +# No owners for changelog +/CHANGELOG.md diff --git a/helm/prometheus-rules/templates/kaas/bigmac/alerting-rules/dex.rules.yml b/helm/prometheus-rules/templates/kaas/bigmac/alerting-rules/dex.rules.yml index 4c6cbccca..7a0656d3d 100644 --- a/helm/prometheus-rules/templates/kaas/bigmac/alerting-rules/dex.rules.yml +++ b/helm/prometheus-rules/templates/kaas/bigmac/alerting-rules/dex.rules.yml @@ -41,7 +41,11 @@ spec: annotations: description: '{{`dex-operator did not register a dex-app in giantswarm namespace.`}}' opsrecipe: dex-operator/ + {{- if .Values.mimir.enabled }} + expr: absent(dex_operator_idp_secret_expiry_time{app_namespace="giantswarm", cluster_type="management_cluster", cluster_id="{{ .Values.managementCluster.name }}", installation="{{ .Values.managementCluster.name }}", provider="{{ .Values.managementCluster.provider.kind }}", pipeline="{{ .Values.managementCluster.pipeline }}"}) + {{- else }} expr: absent(dex_operator_idp_secret_expiry_time{app_namespace="giantswarm", cluster_type="management_cluster"}) == 1 + {{- end }} for: 30m labels: area: kaas diff --git a/test/conf/promtool_ignore b/test/conf/promtool_ignore index fd715736d..71a57dac8 100644 --- a/test/conf/promtool_ignore +++ b/test/conf/promtool_ignore @@ -1,52 +1,79 @@ +kaas/bigmac/alerting-rules/cert-manager.rules.yml kaas/bigmac/alerting-rules/dex.rules.yml +kaas/bigmac/alerting-rules/teleport.rules.yml +kaas/phoenix/alerting-rules/aws-load-balancer-controller.rules.yml +kaas/phoenix/alerting-rules/aws.job.rules.yml kaas/phoenix/alerting-rules/aws.management-cluster.rules.yml kaas/phoenix/alerting-rules/aws.workload-cluster.rules.yml -kaas/phoenix/alerting-rules/certificate.management-cluster.rules.yml -kaas/phoenix/alerting-rules/cluster-autoscaler.rules.yml +kaas/phoenix/alerting-rules/calico.rules.yml +kaas/phoenix/alerting-rules/capa.management-cluster.rules.yml +kaas/phoenix/alerting-rules/cluster-service.rules.yml kaas/phoenix/alerting-rules/credentiald.rules.yml -kaas/phoenix/alerting-rules/inhibit.all.rules.yml -kaas/phoenix/alerting-rules/inhibit.management-cluster.rules.yml -kaas/phoenix/alerting-rules/job.rules.yml +kaas/phoenix/alerting-rules/inhibit.aws.management-cluster.rules.yml +kaas/phoenix/alerting-rules/inhibit.kiam.rules.yml kaas/phoenix/alerting-rules/kiam.rules.yml -kaas/rocket/alerting-rules/falco.rules.yml +kaas/phoenix/alerting-rules/vault.rules.yml +kaas/turtles/alerting-rules/apiserver.management-cluster.rules.yml +kaas/turtles/alerting-rules/apiserver.workload-cluster.rules.yml +kaas/turtles/alerting-rules/bastions.rules.yml +kaas/turtles/alerting-rules/capi-cluster.rules.yml +kaas/turtles/alerting-rules/capi-kubeadmcontrolplane.rules.yml +kaas/turtles/alerting-rules/capi-machine.rules.yml +kaas/turtles/alerting-rules/capi-machinedeployment.rules.yml +kaas/turtles/alerting-rules/capi-machinepool.rules.yml +kaas/turtles/alerting-rules/capi-machineset.rules.yml +kaas/turtles/alerting-rules/capi.management-cluster.rules.yml +kaas/turtles/alerting-rules/cluster-autoscaler.rules.yml +kaas/turtles/alerting-rules/docker.rules.yml +kaas/turtles/alerting-rules/etcd.management-cluster.rules.yml +kaas/turtles/alerting-rules/etcd.workload-cluster.rules.yml +kaas/turtles/alerting-rules/etcdbackup.rules.yml +kaas/turtles/alerting-rules/fairness.rules.yml +kaas/turtles/alerting-rules/inhibit.capi.rules.yml +kaas/turtles/alerting-rules/inhibit.kubelet.rules.yml +kaas/turtles/alerting-rules/job.rules.yml +kaas/turtles/alerting-rules/kubelet.rules.yml +kaas/turtles/alerting-rules/net-exporter.rules.yml +kaas/turtles/alerting-rules/node-exporter.rules.yml +kaas/turtles/alerting-rules/node.management-cluster.rules.yml +kaas/turtles/alerting-rules/node.workload-cluster.rules.yml +kaas/turtles/alerting-rules/storage.management-cluster.rules.yml +kaas/turtles/alerting-rules/storage.workload-cluster.rules.yml +kaas/turtles/alerting-rules/systemd.rules.yml +kaas/turtles/alerting-rules/timesync.rules.yml +kaas/turtles/alerting-rules/vertical-pod-autoscaler.rules.yml platform/atlas/alerting-rules/fluentbit.rules.yml +platform/atlas/alerting-rules/inhibit.oncall.rules.yml +platform/atlas/alerting-rules/keda.rules.yml platform/atlas/alerting-rules/kube-state-metrics.rules.yml platform/atlas/alerting-rules/prometheus-meta-operator.rules.yml platform/atlas/alerting-rules/prometheus-operator.rules.yml +platform/atlas/alerting-rules/service-level.rules.yml +platform/atlas/alerting-rules/storage.rules.yml +platform/atlas/recording-rules/loki-mixins.rules.yml +platform/atlas/recording-rules/mimir-mixins.rules.yml platform/cabbage/alerting-rules/coredns.rules.yml platform/cabbage/alerting-rules/external-dns.rules.yml platform/cabbage/alerting-rules/ingress-controller.rules.yml -platform/cabbage/alerting-rules/network.all.rules.yml +platform/cabbage/alerting-rules/network.rules.yml +platform/cabbage/recording-rules/gs-managed-app-deployment-status.rules.yml platform/honeybadger/alerting-rules/chart.rules.yml platform/honeybadger/alerting-rules/helm.rules.yml platform/honeybadger/alerting-rules/release.rules.yml platform/honeybadger/alerting-rules/secret.rules.yml -platform/shield/alerting-rules/kyverno.rules.yml -shared/alerting-rules/apiserver.management-cluster.rules.yml -shared/alerting-rules/apiserver.workload-cluster.rules.yml -shared/alerting-rules/calico.rules.yml +platform/honeybadger/recording-rules/helm-operations.rules.yml +platform/honeybadger/recording-rules/helm-operations.rules.yml +platform/shield/alerting-rules/falco.rules.yml +shared/alerting-rules/certificate.all.rules.yml +shared/alerting-rules/certificate.management-cluster.rules.yml shared/alerting-rules/certificate.workload-cluster.rules.yml -shared/alerting-rules/cluster-service.rules.yml shared/alerting-rules/deployment.management-cluster.rules.yml shared/alerting-rules/deployment.workload-cluster.rules.yml -shared/alerting-rules/disk.management-cluster.rules.yml -shared/alerting-rules/disk.workload-cluster.rules.yml -shared/alerting-rules/etcd.management-cluster.rules.yml -shared/alerting-rules/etcd.workload-cluster.rules.yml -shared/alerting-rules/etcdbackup.rules.yml -shared/alerting-rules/fairness.rules.yml +shared/alerting-rules/dns-operator-azure.rules.yml shared/alerting-rules/management-cluster.rules.yml shared/alerting-rules/microendpoint.rules.yml -shared/alerting-rules/node.management_cluster.rules.yml -shared/alerting-rules/node.workload_cluster.rules.yml shared/alerting-rules/operatorkit.rules.yml shared/alerting-rules/service-level.rules.yml -shared/alerting-rules/timesync.rules.yml -shared/alerting-rules/up.rules.yml -shared/alerting-rules/vault.rules.yml shared/recording-rules/grafana-cloud.rules.yml -shared/recording-rules/gs-managed-app-deployment-status.rules.yml shared/recording-rules/kubernetes-mixins.rules.yml shared/recording-rules/service-level.rules.yml -shared/recording-rules/mimir-mixins.rules.yml -shared/recording-rules/loki-mixins.rules.yml