Skip to content

Commit

Permalink
Merge branch 'main' into send-slo-aggregations-to-grafana-cloud
Browse files Browse the repository at this point in the history
  • Loading branch information
QuentinBisson authored Sep 4, 2024
2 parents 4edb344 + 7197d3b commit a1f5c3b
Show file tree
Hide file tree
Showing 6 changed files with 29 additions and 10 deletions.
9 changes: 8 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

- Add aggregations for slo metrics to export them to grafana cloud

## [4.13.2] - 2024-09-03

### Changed

- Updated `LokiHpaReachedMaxReplicas` alert.

## [4.13.1] - 2024-09-03

### Fixed
Expand Down Expand Up @@ -3052,7 +3058,8 @@ Fix `PromtailRequestsErrors` alerts as promtail retries after some backoff so ac

- Add existing rules from https://github.com/giantswarm/prometheus-meta-operator/pull/637/commits/bc6a26759eb955de92b41ed5eb33fa37980660f2

[Unreleased]: https://github.com/giantswarm/prometheus-rules/compare/v4.13.1...HEAD
[Unreleased]: https://github.com/giantswarm/prometheus-rules/compare/v4.13.2...HEAD
[4.13.2]: https://github.com/giantswarm/prometheus-rules/compare/v4.13.1...v4.13.2
[4.13.1]: https://github.com/giantswarm/prometheus-rules/compare/v4.13.0...v4.13.1
[4.13.0]: https://github.com/giantswarm/prometheus-rules/compare/v4.12.0...v4.13.0
[4.12.0]: https://github.com/giantswarm/prometheus-rules/compare/v4.11.0...v4.12.0
Expand Down
2 changes: 1 addition & 1 deletion helm/prometheus-rules/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ home: https://github.com/giantswarm/prometheus-rules
icon: https://s.giantswarm.io/app-icons/1/png/default-app-light.png
name: prometheus-rules
appVersion: '0.1.0'
version: '4.13.1'
version: '4.13.2'
annotations:
application.giantswarm.io/team: "atlas"
config.giantswarm.io/version: 1.x.x
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,18 @@ spec:
annotations:
description: '{{`Loki component {{ $labels.horizontalpodautoscaler }} has reached its maxReplicas number but still needs to be scaled up.`}}'
opsrecipe: loki/
expr: |
sum by (cluster_id, installation, namespace, pipeline, provider, horizontalpodautoscaler) (kube_horizontalpodautoscaler_status_desired_replicas{namespace="loki", horizontalpodautoscaler=~"loki-backend|loki-write|loki-read", cluster_type="management_cluster"})
!=
sum by (cluster_id, installation, namespace, pipeline, provider, horizontalpodautoscaler) (kube_horizontalpodautoscaler_status_current_replicas{namespace="loki", horizontalpodautoscaler=~"loki-backend|loki-write|loki-read", cluster_type="management_cluster"})
expr: |-
(
kube_horizontalpodautoscaler_status_desired_replicas{namespace="loki"} >=
on(cluster_id, customer, installation, namespace, horizontalpodautoscaler)
kube_horizontalpodautoscaler_spec_max_replicas{namespace="loki"}
)
and on(cluster_id, customer, installation, namespace, horizontalpodautoscaler)
(
kube_horizontalpodautoscaler_status_target_metric{namespace="loki"} >
on(cluster_id, customer, installation, namespace, horizontalpodautoscaler, metric_name, metric_target_type)
kube_horizontalpodautoscaler_spec_target_metric{namespace="loki"}
)
for: 4h
labels:
area: platform
Expand Down
2 changes: 1 addition & 1 deletion test/hack/checkLabels/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ toolchain go1.23.0
require (
// Try to keep version in sync with our prometheus rule CRD version.
// see https://github.com/giantswarm/prometheus-operator-crd/blob/master/helm/prometheus-operator-crd/Chart.yaml#L11
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.76.0
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.76.1
sigs.k8s.io/yaml v1.4.0
)

Expand Down
2 changes: 2 additions & 0 deletions test/hack/checkLabels/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -543,6 +543,8 @@ github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.75.2 h
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.75.2/go.mod h1:XYrdZw5dW12Cjkt4ndbeNZZTBp4UCHtW0ccR9+sTtPU=
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.76.0 h1:tRwEFYFg+To2TGnibGl8dHBCh8Z/BVNKnXj2O5Za/2M=
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.76.0/go.mod h1:Rd8YnCqz+2FYsiGmE2DMlaLjQRB4v2jFNnzCt9YY4IM=
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.76.1 h1:QU2cs0xxKYvF1JfibP/8vs+pFy6OvIpqNR2lYC4jYNU=
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.76.1/go.mod h1:Rd8YnCqz+2FYsiGmE2DMlaLjQRB4v2jFNnzCt9YY4IM=
github.com/prometheus/alertmanager v0.22.2 h1:JrDZalSEMb2/2bqGAhls6ZnvOxbC5jMIu29JV+uWTC0=
github.com/prometheus/alertmanager v0.22.2/go.mod h1:rYinOWxFuCnNssc3iOjn2oMTlhLaPcUuqV5yk5JKUAE=
github.com/prometheus/alertmanager v0.25.0 h1:vbXKUR6PYRiZPRIKfmXaG+dmCKG52RtPL4Btl8hQGvg=
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -196,8 +196,12 @@ tests:
# loki-backend real memory usage gradually decreases until it goes below 30% of the memory requests.
- series: 'kube_horizontalpodautoscaler_status_desired_replicas{horizontalpodautoscaler="loki-backend", namespace="loki", cluster_type="management_cluster", cluster_id="golem", installation="golem", pipeline="testing", provider="capa", region="eu-west-2"}'
values: "2+0x20 3+0x250 2+0x250"
- series: 'kube_horizontalpodautoscaler_status_current_replicas{horizontalpodautoscaler="loki-backend", namespace="loki", cluster_type="management_cluster", cluster_id="golem", installation="golem", pipeline="testing", provider="capa", region="eu-west-2"}'
- series: 'kube_horizontalpodautoscaler_spec_max_replicas{horizontalpodautoscaler="loki-backend", namespace="loki", cluster_type="management_cluster", cluster_id="golem", installation="golem", pipeline="testing", provider="capa", region="eu-west-2"}'
values: "2+0x520"
- series: 'kube_horizontalpodautoscaler_status_target_metric{horizontalpodautoscaler="loki-backend", namespace="loki", cluster_type="management_cluster", cluster_id="golem", installation="golem", pipeline="testing", provider="capa", region="eu-west-2"}'
values: "60+0x20 120+0x250 60+0x250"
- series: 'kube_horizontalpodautoscaler_spec_target_metric{horizontalpodautoscaler="loki-backend", namespace="loki", cluster_type="management_cluster", cluster_id="golem", installation="golem", pipeline="testing", provider="capa", region="eu-west-2"}'
values: "90+0x520"
alert_rule_test:
- alertname: LokiHpaReachedMaxReplicas
eval_time: 15m
Expand All @@ -218,8 +222,6 @@ tests:
horizontalpodautoscaler: loki-backend
installation: golem
cluster_id: golem
pipeline: testing
provider: capa
exp_annotations:
description: Loki component loki-backend has reached its maxReplicas number but still needs to be scaled up.
opsrecipe: loki/
Expand Down

0 comments on commit a1f5c3b

Please sign in to comment.