From faf1d041e3798bf13aeb5a62cd4c3ac8fcd1e278 Mon Sep 17 00:00:00 2001 From: Bartlomiej Plotka Date: Wed, 31 Mar 2021 12:59:13 +0200 Subject: [PATCH] pkg/rules: fix deduplication of equal alerts with different labels (#3960) (#3999) Currently, if an alerting rule having the same name with different severity labels is being returned from different replicas then they are being treated as separate alerts. Given the following alerts a1,a2 with severities s1,s2 returned from replicas r1,2: a1[s1,r1] a1[s2,r1] a1[s1,r2] a1[s2,r2] Then, currently, the algorithm deduplicates to: a1[s1] a1[s2] a1[s1] a1[s2] Instead of the intendet result: a1[s1] a1[s2] This fixes it by removing replica labels before sorting labels for deduplication. Signed-off-by: Sergiusz Urbaniak # Conflicts: # CHANGELOG.md Co-authored-by: Sergiusz Urbaniak --- CHANGELOG.md | 3 +- pkg/rules/rules.go | 9 ++-- pkg/rules/rules_test.go | 108 +++++++++++++++++++++++++++++++++++++--- 3 files changed, 106 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e2c5e8aa1f..d55e080ba8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,7 +35,8 @@ We use _breaking :warning:_ to mark changes that are not backward compatible (re - [#3815](https://github.com/thanos-io/thanos/pull/3815) Receive: Improve handling of empty time series from clients - [#3795](https://github.com/thanos-io/thanos/pull/3795) s3: A truncated "get object" response is reported as error. - [#3899](https://github.com/thanos-io/thanos/pull/3899) Receive: Correct the inference of client gRPC configuration. -- [#3943](https://github.com/thanos-io/thanos/pull/3943): Receive: Fixed memory regression introduced in v0.17.0. +- [#3943](https://github.com/thanos-io/thanos/pull/3943) Receive: Fixed memory regression introduced in v0.17.0. +- [#3960](https://github.com/thanos-io/thanos/pull/3960) Query: Fixed deduplication of equal alerts with different labels. ### Changed diff --git a/pkg/rules/rules.go b/pkg/rules/rules.go index 9d40ee2403..422d03297c 100644 --- a/pkg/rules/rules.go +++ b/pkg/rules/rules.go @@ -68,23 +68,22 @@ func dedupRules(rules []*rulespb.Rule, replicaLabels map[string]struct{}) []*rul return rules } - // Sort each rule's label names such that they are comparable. + // Remove replica labels and sort each rule's label names such that they are comparable. for _, r := range rules { + removeReplicaLabels(r, replicaLabels) sort.Slice(r.GetLabels(), func(i, j int) bool { return r.GetLabels()[i].Name < r.GetLabels()[j].Name }) } - // Sort rules globally based on synthesized deduplication labels, also considering replica labels and their values. + // Sort rules globally. sort.Slice(rules, func(i, j int) bool { return rules[i].Compare(rules[j]) < 0 }) - // Remove rules based on synthesized deduplication labels, this time ignoring replica labels and last evaluation. + // Remove rules based on synthesized deduplication labels. i := 0 - removeReplicaLabels(rules[i], replicaLabels) for j := 1; j < len(rules); j++ { - removeReplicaLabels(rules[j], replicaLabels) if rules[i].Compare(rules[j]) != 0 { // Effectively retain rules[j] in the resulting slice. i++ diff --git a/pkg/rules/rules_test.go b/pkg/rules/rules_test.go index ada7bf8ebf..b6e67be19e 100644 --- a/pkg/rules/rules_test.go +++ b/pkg/rules/rules_test.go @@ -448,14 +448,14 @@ func TestDedupRules(t *testing.T) { rulespb.NewAlertingRule(&rulespb.Alert{ Name: "a1", Query: "up", - DurationSeconds: 2.0, + DurationSeconds: 1.0, Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ {Name: "a", Value: "1"}, }}}), rulespb.NewAlertingRule(&rulespb.Alert{ Name: "a1", Query: "up", - DurationSeconds: 1.0, + DurationSeconds: 2.0, Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ {Name: "a", Value: "1"}, }}}), @@ -505,13 +505,13 @@ func TestDedupRules(t *testing.T) { }}}), }, want: []*rulespb.Rule{ + rulespb.NewRecordingRule(&rulespb.RecordingRule{Name: "a1"}), rulespb.NewRecordingRule(&rulespb.RecordingRule{Name: "a1", Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ {Name: "a", Value: "1"}, }}}), rulespb.NewRecordingRule(&rulespb.RecordingRule{Name: "a1", Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ {Name: "a", Value: "2"}, }}}), - rulespb.NewRecordingRule(&rulespb.RecordingRule{Name: "a1"}), }, replicaLabels: []string{"replica"}, }, @@ -613,6 +613,11 @@ func TestDedupRules(t *testing.T) { }), }, want: []*rulespb.Rule{ + rulespb.NewAlertingRule(&rulespb.Alert{ + State: rulespb.AlertState_FIRING, + Name: "a1", + LastEvaluation: time.Unix(2, 0), + }), rulespb.NewAlertingRule(&rulespb.Alert{ State: rulespb.AlertState_FIRING, Name: "a1", @@ -621,11 +626,6 @@ func TestDedupRules(t *testing.T) { }}, LastEvaluation: time.Unix(1, 0), }), - rulespb.NewAlertingRule(&rulespb.Alert{ - State: rulespb.AlertState_FIRING, - Name: "a1", - LastEvaluation: time.Unix(2, 0), - }), rulespb.NewAlertingRule(&rulespb.Alert{ State: rulespb.AlertState_PENDING, Name: "a2", @@ -634,6 +634,98 @@ func TestDedupRules(t *testing.T) { }, replicaLabels: []string{"replica"}, }, + { + name: "alerts with different severity", + rules: []*rulespb.Rule{ + rulespb.NewAlertingRule(&rulespb.Alert{ + Name: "a1", + Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ + {Name: "replica", Value: "1"}, + {Name: "severity", Value: "warning"}, + }}, + LastEvaluation: time.Unix(1, 0), + }), + rulespb.NewAlertingRule(&rulespb.Alert{ + Name: "a1", + Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ + {Name: "replica", Value: "1"}, + {Name: "severity", Value: "critical"}, + }}, + LastEvaluation: time.Unix(1, 0), + }), + rulespb.NewAlertingRule(&rulespb.Alert{ + Name: "a1", + Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ + {Name: "replica", Value: "2"}, + {Name: "severity", Value: "warning"}, + }}, + LastEvaluation: time.Unix(1, 0), + }), + rulespb.NewAlertingRule(&rulespb.Alert{ + Name: "a1", + Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ + {Name: "replica", Value: "2"}, + {Name: "severity", Value: "critical"}, + }}, + LastEvaluation: time.Unix(1, 0), + }), + }, + want: []*rulespb.Rule{ + rulespb.NewAlertingRule(&rulespb.Alert{ + Name: "a1", + Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ + {Name: "severity", Value: "critical"}, + }}, + LastEvaluation: time.Unix(1, 0), + }), + rulespb.NewAlertingRule(&rulespb.Alert{ + Name: "a1", + Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ + {Name: "severity", Value: "warning"}, + }}, + LastEvaluation: time.Unix(1, 0), + }), + }, + replicaLabels: []string{"replica"}, + }, + { + name: "alerts with missing replica labels", + rules: []*rulespb.Rule{ + rulespb.NewAlertingRule(&rulespb.Alert{ + Name: "a1", + Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ + {Name: "replica", Value: "1"}, + {Name: "label", Value: "foo"}, + }}, + LastEvaluation: time.Unix(1, 0), + }), + rulespb.NewAlertingRule(&rulespb.Alert{ + Name: "a1", + Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ + {Name: "replica", Value: "2"}, + {Name: "label", Value: "foo"}, + }}, + LastEvaluation: time.Unix(1, 0), + }), + rulespb.NewAlertingRule(&rulespb.Alert{ + Name: "a1", + Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ + {Name: "label", Value: "foo"}, + }}, + LastEvaluation: time.Unix(1, 0), + }), + }, + want: []*rulespb.Rule{ + rulespb.NewAlertingRule(&rulespb.Alert{ + Name: "a1", + Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ + {Name: "label", Value: "foo"}, + }}, + LastEvaluation: time.Unix(1, 0), + }), + }, + replicaLabels: []string{"replica"}, + }, } { t.Run(tc.name, func(t *testing.T) { replicaLabels := make(map[string]struct{})