Skip to content

Commit

Permalink
Add CiliumNetworkPolicyFailed alert (#1100)
Browse files Browse the repository at this point in the history
  • Loading branch information
kopiczko authored Apr 15, 2024
1 parent 969dc6e commit b248db6
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 6 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Added

- Add CiliumFailedNetworkPolicy alert.

## [3.10.1] - 2024-04-12

### Fixed
Expand Down
15 changes: 15 additions & 0 deletions helm/prometheus-rules/templates/alerting-rules/cilium.rules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,18 @@ spec:
severity: page
team: cabbage
topic: cilium
- alert: CiliumNetworkPolicyFailed
annotations:
description: '{{`Too many Cilium Network Policy errors.`}}'
opsrecipe: unsupported-cilium-network-policy/
# cilium_policy_change_total - for cilium >=1.15
# cilium_policy_import_errors_total - for cilium <1.15
expr: max(rate(cilium_policy_change_total{outcome=~"fail.*"}[20m]) OR rate(cilium_policy_import_errors_total[20m])) > 0
for: 10m
labels:
area: managedservices
cancel_if_outside_working_hours: "true"
severity: page
team: cabbage
topic: cilium

64 changes: 58 additions & 6 deletions test/tests/providers/global/cilium.rules.test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ tests:
- series: 'cilium_bpf_map_pressure{map_name="policy_00001"}'
values: "_x20 20+0x20 90+0x20"
alert_rule_test:
- alertname: CiliumBPFMapAlmostFull
- alertname: CiliumBPFMapAlmostFull
eval_time: 10m
- alertname: CiliumBPFMapAlmostFull
- alertname: CiliumBPFMapAlmostFull
eval_time: 30m
- alertname: CiliumBPFMapAlmostFull
- alertname: CiliumBPFMapAlmostFull
eval_time: 50m
exp_alerts:
- exp_labels:
Expand All @@ -31,11 +31,11 @@ tests:
- series: 'cilium_bpf_map_pressure{map_name="policy_00001"}'
values: "_x20 20+0x20 90+0x20 98+0x20"
alert_rule_test:
- alertname: CiliumBPFMapFull
- alertname: CiliumBPFMapFull
eval_time: 10m
- alertname: CiliumBPFMapFull
- alertname: CiliumBPFMapFull
eval_time: 30m
- alertname: CiliumBPFMapFull
- alertname: CiliumBPFMapFull
eval_time: 70m
exp_alerts:
- exp_labels:
Expand All @@ -47,3 +47,55 @@ tests:
exp_annotations:
description: "Cilium BPF map is about filled up."
opsrecipe: "cilium-bpf-map/"
# CiliumNetworkPolicyFailed for 1.15+ (cilium_policy_change_total{outcome="fail.*"})
- interval: 1m
input_series:
# For the first 60min: test with 1 pod: none, up, down
- series: 'cilium_policy_change_total{outcome="fail"}'
values: "_x20 0+0x20 0+100x30 _x1000"
- series: 'cilium_policy_change_total{outcome="success"}'
values: "_x120 1+10000x50 _x1000"
- series: 'cilium_policy_import_errors_total{}'
values: "_x220 0+0x20 0+100x30 _x1000"
alert_rule_test:
# cilium_policy_change_total{outcome="fail"}
- alertname: CiliumNetworkPolicyFailed
eval_time: 10m
- alertname: CiliumNetworkPolicyFailed
eval_time: 30m
- alertname: CiliumNetworkPolicyFailed
eval_time: 60m
exp_alerts:
- exp_labels:
area: managedservices
severity: page
team: cabbage
topic: cilium
cancel_if_outside_working_hours: "true"
exp_annotations:
description: "Too many Cilium Network Policy errors."
opsrecipe: "unsupported-cilium-network-policy/"
# cilium_policy_change_total{outcome="success"}
- alertname: CiliumNetworkPolicyFailed
eval_time: 110m
- alertname: CiliumNetworkPolicyFailed
eval_time: 130m
- alertname: CiliumNetworkPolicyFailed
eval_time: 160m
# cilium_policy_import_errors_total{}
- alertname: CiliumNetworkPolicyFailed
eval_time: 210m
- alertname: CiliumNetworkPolicyFailed
eval_time: 230m
- alertname: CiliumNetworkPolicyFailed
eval_time: 260m
exp_alerts:
- exp_labels:
area: managedservices
severity: page
team: cabbage
topic: cilium
cancel_if_outside_working_hours: "true"
exp_annotations:
description: "Too many Cilium Network Policy errors."
opsrecipe: "unsupported-cilium-network-policy/"

0 comments on commit b248db6

Please sign in to comment.