Skip to content

Commit

Permalink
remove cluster_id label from 2 loki alerting rules (#1317)
Browse files Browse the repository at this point in the history
* remove cluster_id label from 2 loki alerting rules

* refacto

* update changelog
  • Loading branch information
QuantumEnigmaa authored Aug 6, 2024
1 parent 160ac69 commit d2cd806
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 8 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Changed

- Restricted range of `LokiHpaReachedMaxReplicas` and `LokiNeedsToBeScaledDown` rules to management clusters.

## [4.9.0] - 2024-08-01

### Added
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,14 +88,14 @@ spec:
description: '{{`Loki component {{ $labels.labelpod }} is consuming very few resources and needs to be scaled down.`}}'
opsrecipe: loki/
expr: |-
sum by (cluster_id, installation, namespace, pipeline, provider, labelpod) (label_replace(container_memory_working_set_bytes{container="loki", namespace="loki"}, "labelpod", "$1", "pod", "(loki-[[:alnum:]]*)-.*"))
sum by (cluster_id, installation, namespace, pipeline, provider, labelpod) (label_replace(container_memory_working_set_bytes{container="loki", namespace="loki", cluster_type="management_cluster"}, "labelpod", "$1", "pod", "(loki-[[:alnum:]]*)-.*"))
/
sum by(cluster_id, installation, namespace, pipeline, provider, labelpod) (label_replace(kube_pod_container_resource_requests{container="loki", namespace="loki", unit="byte"}, "labelpod", "$1", "pod", "(loki-[[:alnum:]]*)-.*"))
sum by(cluster_id, installation, namespace, pipeline, provider, labelpod) (label_replace(kube_pod_container_resource_requests{container="loki", namespace="loki", unit="byte", cluster_type="management_cluster"}, "labelpod", "$1", "pod", "(loki-[[:alnum:]]*)-.*"))
<= 0.30
and
sum(label_replace(rate(container_cpu_usage_seconds_total{container="loki", namespace="loki"}[5m]), "labelpod", "$1", "pod", "(loki-[[:alnum:]]*)-.*")) by (cluster_id, installation, namespace, pipeline, provider, labelpod)
sum(label_replace(rate(container_cpu_usage_seconds_total{container="loki", namespace="loki", cluster_type="management_cluster"}[5m]), "labelpod", "$1", "pod", "(loki-[[:alnum:]]*)-.*")) by (cluster_id, installation, namespace, pipeline, provider, labelpod)
/
sum by(cluster_id, installation, namespace, pipeline, provider, labelpod) (label_replace(kube_pod_container_resource_requests{container="loki", namespace="loki", unit="core"}, "labelpod", "$1", "pod", "(loki-[[:alnum:]]*)-.*"))
sum by(cluster_id, installation, namespace, pipeline, provider, labelpod) (label_replace(kube_pod_container_resource_requests{container="loki", namespace="loki", unit="core", cluster_type="management_cluster"}, "labelpod", "$1", "pod", "(loki-[[:alnum:]]*)-.*"))
<= 0.30
for: 1d
labels:
Expand All @@ -113,9 +113,9 @@ spec:
description: '{{`Loki component {{ $labels.horizontalpodautoscaler }} has reached its maxReplicas number but still needs to be scaled up.`}}'
opsrecipe: loki/
expr: |
sum by (cluster_id, installation, namespace, pipeline, provider, horizontalpodautoscaler) (kube_horizontalpodautoscaler_status_desired_replicas{namespace="loki", horizontalpodautoscaler=~"loki-backend|loki-write|loki-read"})
sum by (cluster_id, installation, namespace, pipeline, provider, horizontalpodautoscaler) (kube_horizontalpodautoscaler_status_desired_replicas{namespace="loki", horizontalpodautoscaler=~"loki-backend|loki-write|loki-read", cluster_type="management_cluster"})
!=
sum by (cluster_id, installation, namespace, pipeline, provider, horizontalpodautoscaler) (kube_horizontalpodautoscaler_status_current_replicas{namespace="loki", horizontalpodautoscaler=~"loki-backend|loki-write|loki-read"})
sum by (cluster_id, installation, namespace, pipeline, provider, horizontalpodautoscaler) (kube_horizontalpodautoscaler_status_current_replicas{namespace="loki", horizontalpodautoscaler=~"loki-backend|loki-write|loki-read", cluster_type="management_cluster"})
for: 4h
labels:
area: platform
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,8 +177,8 @@ tests:
cancel_if_cluster_status_deleting: "true"
cancel_if_cluster_status_updating: "true"
cancel_if_outside_working_hours: "true"
cluster_id: golem
installation: "golem"
cluster_id: "golem"
labelpod: "loki-backend"
pipeline: "testing"
provider: "capa"
Expand Down Expand Up @@ -215,9 +215,9 @@ tests:
team: atlas
topic: observability
namespace: loki
cluster_id: golem
horizontalpodautoscaler: loki-backend
installation: golem
cluster_id: golem
pipeline: testing
provider: capa
exp_annotations:
Expand Down

0 comments on commit d2cd806

Please sign in to comment.