Skip to content

Commit

Permalink
Add alert LokiMissingLogs (#1386)
Browse files Browse the repository at this point in the history
Co-authored-by: Herve Nicol <[email protected]>
  • Loading branch information
hervenicol and hervenicol authored Oct 8, 2024
1 parent b6a05ac commit acebcab
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 0 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Added

- Alerting rule for Loki missing logs at ingestion

## [4.17.0] - 2024-10-03

### Removed
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,3 +171,27 @@ spec:
severity: page
team: atlas
topic: observability
- alert: LokiMissingLogs
annotations:
dashboard: loki-canary/loki-canary
description: This alert checks that loki is not missing canary logs
opsrecipe: loki/
expr: |
(
sum by (cluster_id, pod, installation, pipeline, provider)
(increase(loki_canary_missing_entries_total{cluster_type="management_cluster",namespace="loki"}[5m]))
/
sum by (cluster_id, pod, installation, pipeline, provider)
(increase(loki_canary_entries_total{cluster_type="management_cluster",namespace="loki"}[5m]))
) > 0
for: 30m
labels:
area: platform
cancel_if_cluster_control_plane_unhealthy: "true"
cancel_if_cluster_status_creating: "true"
cancel_if_cluster_status_deleting: "true"
cancel_if_cluster_status_updating: "true"
cancel_if_outside_working_hours: "true"
severity: page
team: atlas
topic: observability
Original file line number Diff line number Diff line change
Expand Up @@ -289,3 +289,38 @@ tests:
opsrecipe: "loki#lokicompactorfailedcompaction"
- alertname: LokiCompactorFailedCompaction
eval_time: 300m

# Test for LokiMissingLogs alert
- interval: 1m
input_series:
- series: 'loki_canary_entries_total{app="loki", cluster="loki", cluster_id="grizzly", cluster_type="management_cluster", container="loki-canary", customer="giantswarm", endpoint="http-metrics", installation="grizzly", namespace="loki", pod="loki-canary-5649fbcb65-lkdkq", pipeline="testing", provider="capz", service="loki-canary", service_priority="highest"}'
values: 0+1x1000
- series: 'loki_canary_missing_entries_total{app="loki", cluster="loki", cluster_id="grizzly", cluster_type="management_cluster", container="loki-canary", customer="giantswarm", endpoint="http-metrics", installation="grizzly", namespace="loki", pod="loki-canary-5649fbcb65-lkdkq", pipeline="testing", provider="capz", service="loki-canary", service_priority="highest"}'
values: "0+0x120 0+1x120 120+0x120"
alert_rule_test:
- alertname: LokiMissingLogs
eval_time: 60m
- alertname: LokiMissingLogs
eval_time: 200m
exp_alerts:
- exp_labels:
area: platform
cancel_if_cluster_control_plane_unhealthy: "true"
cancel_if_cluster_status_creating: "true"
cancel_if_cluster_status_deleting: "true"
cancel_if_cluster_status_updating: "true"
cancel_if_outside_working_hours: "true"
cluster_id: grizzly
installation: "grizzly"
pipeline: "testing"
pod: "loki-canary-5649fbcb65-lkdkq"
provider: "capz"
severity: page
team: atlas
topic: observability
exp_annotations:
dashboard: loki-canary/loki-canary
description: This alert checks that loki is not missing canary logs
opsrecipe: "loki/"
- alertname: LokiMissingLogs
eval_time: 300m

0 comments on commit acebcab

Please sign in to comment.