Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(WIP) Fetch and apply Mimir and Grafana Alertmanager configurations #7962

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions development/mimir-read-write-mode/config/mimir.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@ common:
storage:
backend: s3
s3:
endpoint: minio:9000
access_key_id: mimir
endpoint: minio:9000
access_key_id: mimir
secret_access_key: supersecret
insecure: true
insecure: true

blocks_storage:
s3:
bucket_name: mimir-blocks
bucket_name: mimir-blocks
tsdb:
dir: /data/ingester

Expand All @@ -31,10 +31,9 @@ blocks_storage:
memcached:
addresses: dns+memcached:11211


memberlist:
# Use write replicas as seed nodes.
join_members: [ mimir-write-1, mimir-write-2, mimir-write-3 ]
join_members: [mimir-write-1, mimir-write-2, mimir-write-3]

ruler:
rule_path: /data/ruler
Expand All @@ -43,7 +42,7 @@ ruler:

ruler_storage:
s3:
bucket_name: mimir-ruler
bucket_name: mimir-ruler
cache:
backend: memcached
memcached:
Expand All @@ -56,6 +55,7 @@ alertmanager:
data_dir: /data/alertmanager
fallback_config_file: ./config/alertmanager-fallback-config.yaml
external_url: http://localhost:8006/alertmanager
grafana_alertmanager_compatibility_enabled: true

alertmanager_storage:
s3:
Expand Down
30 changes: 15 additions & 15 deletions development/mimir-read-write-mode/docker-compose.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ std.manifestYamlDoc({
self.backend +
self.nginx +
self.minio +
self.grafana +
self.grafana_agent +
// self.grafana +
// self.grafana_agent +
self.memcached +
{},

Expand Down Expand Up @@ -94,19 +94,19 @@ std.manifestYamlDoc({
},
},

grafana:: {
grafana: {
image: 'grafana/grafana:9.4.3',
environment: [
'GF_AUTH_ANONYMOUS_ENABLED=true',
'GF_AUTH_ANONYMOUS_ORG_ROLE=Admin',
],
volumes: [
'./config/datasource-mimir.yaml:/etc/grafana/provisioning/datasources/mimir.yaml',
],
ports: ['3000:3000'],
},
},
// grafana:: {
// grafana: {
// image: 'grafana/grafana:9.4.3',
// environment: [
// 'GF_AUTH_ANONYMOUS_ENABLED=true',
// 'GF_AUTH_ANONYMOUS_ORG_ROLE=Admin',
// ],
// volumes: [
// './config/datasource-mimir.yaml:/etc/grafana/provisioning/datasources/mimir.yaml',
// ],
// ports: ['3000:3000'],
// },
// },

grafana_agent:: {
// Scrape the metrics also with the Grafana agent (useful to test metadata ingestion
Expand Down
19 changes: 0 additions & 19 deletions development/mimir-read-write-mode/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,23 +1,4 @@
"services":
"grafana":
"environment":
- "GF_AUTH_ANONYMOUS_ENABLED=true"
- "GF_AUTH_ANONYMOUS_ORG_ROLE=Admin"
"image": "grafana/grafana:9.4.3"
"ports":
- "3000:3000"
"volumes":
- "./config/datasource-mimir.yaml:/etc/grafana/provisioning/datasources/mimir.yaml"
"grafana-agent":
"command":
- "-config.file=/etc/agent-config/grafana-agent.yaml"
- "-metrics.wal-directory=/tmp"
- "-server.http.address=127.0.0.1:9091"
"image": "grafana/agent:v0.37.3"
"ports":
- "9091:9091"
"volumes":
- "./config:/etc/agent-config"
"memcached":
"image": "memcached:1.6.19-alpine"
"mimir-backend-1":
Expand Down
7 changes: 5 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ require (
github.com/google/go-github/v57 v57.0.0
github.com/google/uuid v1.6.0
github.com/grafana-tools/sdk v0.0.0-20220919052116-6562121319fc
github.com/grafana/alerting v0.0.0-20240424080142-bb4f4f429d36
github.com/grafana/alerting v0.0.0-20240516100902-0cf0ef264288
github.com/grafana/regexp v0.0.0-20221122212121-6b5c0a4cb7fd
github.com/hashicorp/golang-lru/v2 v2.0.7
github.com/hashicorp/vault/api v1.10.0
Expand Down Expand Up @@ -253,7 +253,7 @@ require (
gopkg.in/telebot.v3 v3.2.1 // indirect
k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect
k8s.io/utils v0.0.0-20230726121419-3b25d923346b // indirect
sigs.k8s.io/yaml v1.4.0 // indirect
sigs.k8s.io/yaml v1.4.0
)

// Using a fork of Prometheus with Mimir-specific changes.
Expand Down Expand Up @@ -282,3 +282,6 @@ replace github.com/opentracing-contrib/go-stdlib => github.com/grafana/opentraci

// Replace opentracing-contrib/go-grpc with a fork until https://github.com/opentracing-contrib/go-grpc/pull/16 is merged.
replace github.com/opentracing-contrib/go-grpc => github.com/charleskorn/go-grpc v0.0.0-20231024023642-e9298576254f

// Replacing prometheus/alertmanager with our fork.
replace github.com/prometheus/alertmanager => github.com/grafana/prometheus-alertmanager v0.25.1-0.20240422145632-c33c6b5b6e6b
8 changes: 4 additions & 4 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -505,8 +505,8 @@ github.com/gosimple/slug v1.1.1 h1:fRu/digW+NMwBIP+RmviTK97Ho/bEj/C9swrCspN3D4=
github.com/gosimple/slug v1.1.1/go.mod h1:ER78kgg1Mv0NQGlXiDe57DpCyfbNywXXZ9mIorhxAf0=
github.com/grafana-tools/sdk v0.0.0-20220919052116-6562121319fc h1:PXZQA2WCxe85Tnn+WEvr8fDpfwibmEPgfgFEaC87G24=
github.com/grafana-tools/sdk v0.0.0-20220919052116-6562121319fc/go.mod h1:AHHlOEv1+GGQ3ktHMlhuTUwo3zljV3QJbC0+8o2kn+4=
github.com/grafana/alerting v0.0.0-20240424080142-bb4f4f429d36 h1:v4aQ0cde8SCzNRrD2RczzmFolEkXWriSY9tKakAD0ng=
github.com/grafana/alerting v0.0.0-20240424080142-bb4f4f429d36/go.mod h1:8nOsn7PWmttOmWiR7bvYIl3VLl+tIq72ZF+1y54w36M=
github.com/grafana/alerting v0.0.0-20240516100902-0cf0ef264288 h1:cnaJxI2rhiJcyDxebP2zKGNlzK4KVpU1uznanv0VJlg=
github.com/grafana/alerting v0.0.0-20240516100902-0cf0ef264288/go.mod h1:8nOsn7PWmttOmWiR7bvYIl3VLl+tIq72ZF+1y54w36M=
github.com/grafana/dskit v0.0.0-20240509115328-a1bba1277f06 h1:/QUlscuctksoAF335nhWJtNtDO2KB+p7I6C2GmI76lM=
github.com/grafana/dskit v0.0.0-20240509115328-a1bba1277f06/go.mod h1:HvSf3uf8Ps2vPpzHeAFyZTdUcbVr+Rxpq1xcx7J/muc=
github.com/grafana/e2e v0.1.2-0.20240118170847-db90b84177fc h1:BW+LjKJDz0So5LI8UZfW5neWeKpSkWqhmGjQFzcFfLM=
Expand All @@ -521,6 +521,8 @@ github.com/grafana/mimir-prometheus v0.0.0-20240515135245-e5b85c151ba8 h1:XmqfG3
github.com/grafana/mimir-prometheus v0.0.0-20240515135245-e5b85c151ba8/go.mod h1:ZlD3SoAHSwXK5VGLHv78Jh5kOpgSLaQAzt9gxq76fLM=
github.com/grafana/opentracing-contrib-go-stdlib v0.0.0-20230509071955-f410e79da956 h1:em1oddjXL8c1tL0iFdtVtPloq2hRPen2MJQKoAWpxu0=
github.com/grafana/opentracing-contrib-go-stdlib v0.0.0-20230509071955-f410e79da956/go.mod h1:qtI1ogk+2JhVPIXVc6q+NHziSmy2W5GbdQZFUHADCBU=
github.com/grafana/prometheus-alertmanager v0.25.1-0.20240422145632-c33c6b5b6e6b h1:HCbWyVL6vi7gxyO76gQksSPH203oBJ1MJ3JcG1OQlsg=
github.com/grafana/prometheus-alertmanager v0.25.1-0.20240422145632-c33c6b5b6e6b/go.mod h1:01sXtHoRwI8W324IPAzuxDFOmALqYLCOhvSC2fUHWXc=
github.com/grafana/pyroscope-go/godeltaprof v0.1.6 h1:nEdZ8louGAplSvIJi1HVp7kWvFvdiiYg3COLlTwJiFo=
github.com/grafana/pyroscope-go/godeltaprof v0.1.6/go.mod h1:Tk376Nbldo4Cha9RgiU7ik8WKFkNpfds98aUzS8omLE=
github.com/grafana/regexp v0.0.0-20221005093135-b4c2bcb0a4b6 h1:A3dhViTeFDSQcGOXuUi6ukCQSMyDtDISBp2z6OOo2YM=
Expand Down Expand Up @@ -783,8 +785,6 @@ github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRI
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI=
github.com/posener/complete v1.2.3/go.mod h1:WZIdtGGp+qx0sLrYKtIRAruyNpv6hFCicSgv7Sy7s/s=
github.com/prometheus/alertmanager v0.27.0 h1:V6nTa2J5V4s8TG4C4HtrBP/WNSebCCTYGGv4qecA/+I=
github.com/prometheus/alertmanager v0.27.0/go.mod h1:8Ia/R3urPmbzJ8OsdvmZvIprDwvwmYCmUbwBL+jlPOE=
github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo=
github.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU=
Expand Down
98 changes: 85 additions & 13 deletions pkg/alertmanager/alertmanager.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"context"
"crypto/md5"
"encoding/binary"
"encoding/json"
"fmt"
"net/http"
"net/url"
Expand All @@ -20,6 +21,11 @@ import (

"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/grafana/alerting/definition"
"github.com/grafana/alerting/images"
alertingLogging "github.com/grafana/alerting/logging"
alertingNotify "github.com/grafana/alerting/notify"
alertingReceivers "github.com/grafana/alerting/receivers"
"github.com/grafana/dskit/flagext"
"github.com/pkg/errors"
"github.com/prometheus/alertmanager/api"
Expand Down Expand Up @@ -56,6 +62,7 @@ import (
"github.com/prometheus/common/model"
"github.com/prometheus/common/route"
"golang.org/x/time/rate"
yaml "sigs.k8s.io/yaml/goyaml.v2"

"github.com/grafana/mimir/pkg/alertmanager/alertstore"
util_net "github.com/grafana/mimir/pkg/util/net"
Expand Down Expand Up @@ -313,7 +320,7 @@ func clusterWait(position func() int, timeout time.Duration) func() time.Duratio
}

// ApplyConfig applies a new configuration to an Alertmanager.
func (am *Alertmanager) ApplyConfig(userID string, conf *config.Config, rawCfg string) error {
func (am *Alertmanager) ApplyConfig(userID string, conf *definition.PostableApiAlertingConfig, rawCfg string) error {
templateFiles := make([]string, len(conf.Templates))
for i, t := range conf.Templates {
templateFilepath, err := safeTemplateFilepath(filepath.Join(am.cfg.TenantDataDir, templatesDir), t)
Expand All @@ -330,7 +337,16 @@ func (am *Alertmanager) ApplyConfig(userID string, conf *config.Config, rawCfg s
}
tmpl.ExternalURL = am.cfg.ExternalURL

am.api.Update(conf, func(_ model.LabelSet) {})
// TODO: what?
rawConf, err := json.Marshal(conf)
if err != nil {
return err
}

var cfg config.Config
if err := yaml.Unmarshal(rawConf, &cfg); err != nil {
return err
}

// Ensure inhibitor is set before being called
if am.inhibitor != nil {
Expand All @@ -356,7 +372,7 @@ func (am *Alertmanager) ApplyConfig(userID string, conf *config.Config, rawCfg s
// Create a firewall binded to the per-tenant config.
firewallDialer := util_net.NewFirewallDialer(newFirewallDialerConfigProvider(userID, am.cfg.Limits))

integrationsMap, err := buildIntegrationsMap(conf.Receivers, tmpl, firewallDialer, am.logger, func(integrationName string, notifier notify.Notifier) notify.Notifier {
integrationsMap, err := buildIntegrationsMap(am.logger, conf.Receivers, tmpl, firewallDialer, am.logger, func(integrationName string, notifier notify.Notifier) notify.Notifier {
if am.cfg.Limits != nil {
rl := &tenantRateLimits{
tenant: userID,
Expand All @@ -372,6 +388,14 @@ func (am *Alertmanager) ApplyConfig(userID string, conf *config.Config, rawCfg s
return err
}

activeReceivers := alertingNotify.GetActiveReceiversMap(dispatch.NewRoute(cfg.Route, nil))
var receivers []*notify.Receiver
for k, v := range integrationsMap {
_, active := activeReceivers[k]
receivers = append(receivers, notify.NewReceiver(k, active, v))
}
am.api.Update(&cfg, receivers, func(_ model.LabelSet) {})

timeIntervals := make(map[string][]timeinterval.TimeInterval, len(conf.MuteTimeIntervals)+len(conf.TimeIntervals))
for _, ti := range conf.MuteTimeIntervals {
timeIntervals[ti.Name] = ti.TimeIntervals
Expand All @@ -383,7 +407,7 @@ func (am *Alertmanager) ApplyConfig(userID string, conf *config.Config, rawCfg s
intervener := timeinterval.NewIntervener(timeIntervals)

pipeline := am.pipelineBuilder.New(
integrationsMap,
receivers,
waitFunc,
am.inhibitor,
silence.NewSilencer(am.silences, am.marker, am.logger),
Expand All @@ -394,7 +418,7 @@ func (am *Alertmanager) ApplyConfig(userID string, conf *config.Config, rawCfg s
am.lastPipeline = pipeline
am.dispatcher = dispatch.NewDispatcher(
am.alerts,
dispatch.NewRoute(conf.Route, nil),
dispatch.NewRoute(cfg.Route, nil),
pipeline,
am.marker,
timeoutFunc,
Expand Down Expand Up @@ -451,25 +475,58 @@ func (am *Alertmanager) getFullState() (*clusterpb.FullState, error) {

// buildIntegrationsMap builds a map of name to the list of integration notifiers off of a
// list of receiver config.
func buildIntegrationsMap(nc []config.Receiver, tmpl *template.Template, firewallDialer *util_net.FirewallDialer, logger log.Logger, notifierWrapper func(string, notify.Notifier) notify.Notifier) (map[string][]notify.Integration, error) {
integrationsMap := make(map[string][]notify.Integration, len(nc))
func buildIntegrationsMap(l log.Logger, nc []*definition.PostableApiReceiver, tmpl *template.Template, firewallDialer *util_net.FirewallDialer, logger log.Logger, notifierWrapper func(string, notify.Notifier) notify.Notifier) (map[string][]*notify.Integration, error) {
integrationsMap := make(map[string][]*notify.Integration, len(nc))

for _, rcv := range nc {
integrations, err := buildReceiverIntegrations(rcv, tmpl, firewallDialer, logger, notifierWrapper)
if err != nil {
return nil, err
if rcv.Type() == definition.GrafanaReceiverType {
integrationConfigs := make([]*alertingNotify.GrafanaIntegrationConfig, 0, len(rcv.GrafanaManagedReceivers))
for _, r := range rcv.GrafanaManagedReceivers {
integrationConfigs = append(integrationConfigs, &alertingNotify.GrafanaIntegrationConfig{
// TODO: unify into one struct?
UID: r.UID,
Name: r.Name,
Type: r.Type,
DisableResolveMessage: r.DisableResolveMessage,
Settings: json.RawMessage(r.Settings),
SecureSettings: r.SecureSettings,
})
}
api := alertingNotify.APIReceiver{
GrafanaIntegrations: alertingNotify.GrafanaIntegrations{
Integrations: integrationConfigs,
},
}
receiverCfg, err := alertingNotify.BuildReceiverConfiguration(context.Background(), &api, noopDecryptFn)
if err != nil {
return nil, err
}

// TODO: tenantID
integrations, err := alertingNotify.BuildReceiverIntegrations(receiverCfg, tmpl, &images.UnavailableProvider{}, newLoggerFactory(l), whSenderFn, emailSenderFn, 0, "test")
if err != nil {
return nil, err
}
integrationsMap[rcv.Name] = integrations
} else {
integrations, err := buildReceiverIntegrations(rcv, tmpl, firewallDialer, logger, notifierWrapper)
if err != nil {
return nil, err
}
integrationsMap[rcv.Name] = integrations
}
integrationsMap[rcv.Name] = integrations
}
return integrationsMap, nil
}

// buildReceiverIntegrations builds a list of integration notifiers off of a
// receiver config.
// Taken from https://github.com/prometheus/alertmanager/blob/94d875f1227b29abece661db1a68c001122d1da5/cmd/alertmanager/main.go#L112-L159.
func buildReceiverIntegrations(nc config.Receiver, tmpl *template.Template, firewallDialer *util_net.FirewallDialer, logger log.Logger, wrapper func(string, notify.Notifier) notify.Notifier) ([]notify.Integration, error) {
func buildReceiverIntegrations(nc *definition.PostableApiReceiver, tmpl *template.Template, firewallDialer *util_net.FirewallDialer, logger log.Logger, wrapper func(string, notify.Notifier) notify.Notifier) ([]*notify.Integration, error) {
fmt.Println("buildReceiverIntegrations got receiver with PostableGrafanaReceivers:", nc.PostableGrafanaReceivers.GrafanaManagedReceivers)
var (
errs types.MultiError
integrations []notify.Integration
integrations []*notify.Integration
add = func(name string, i int, rs notify.ResolvedSender, f func(l log.Logger) (notify.Notifier, error)) {
n, err := f(log.With(logger, "integration", name))
if err != nil {
Expand Down Expand Up @@ -532,6 +589,21 @@ func buildReceiverIntegrations(nc config.Receiver, tmpl *template.Template, fire
return integrations, nil
}

// noopDecryptFn implements alertingNotify.DecryptFn.
// TODO: make part of alerting package.
func noopDecryptFn(_ context.Context, sjd map[string][]byte, key string, fallback string) string {
if v, ok := sjd[key]; ok {
return string(v)
}
return fallback
}
func whSenderFn(n alertingReceivers.Metadata) (alertingReceivers.WebhookSender, error) {
return NewSender(alertingLogging.FakeLogger{}), nil
}
func emailSenderFn(n alertingReceivers.Metadata) (alertingReceivers.EmailSender, error) {
return NewSender(alertingLogging.FakeLogger{}), nil
}

func md5HashAsMetricValue(data []byte) float64 {
sum := md5.Sum(data)
// We only want 48 bits as a float64 only has a 53 bit mantissa.
Expand Down
5 changes: 2 additions & 3 deletions pkg/alertmanager/alertmanager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ import (
"github.com/grafana/dskit/concurrency"
"github.com/grafana/dskit/test"
"github.com/prometheus/alertmanager/cluster/clusterpb"
"github.com/prometheus/alertmanager/config"
"github.com/prometheus/alertmanager/featurecontrol"
"github.com/prometheus/alertmanager/types"
"github.com/prometheus/client_golang/prometheus"
Expand Down Expand Up @@ -85,7 +84,7 @@ route:
group_interval: 10ms
receiver: 'prod'`

cfg, err := config.Load(cfgRaw)
cfg, err := loadConfig(cfgRaw)
require.NoError(t, err)
require.NoError(t, am.ApplyConfig(user, cfg, cfgRaw))

Expand Down Expand Up @@ -169,7 +168,7 @@ route:
group_interval: 10ms
receiver: 'prod'`

cfg, err := config.Load(cfgRaw)
cfg, err := loadConfig(cfgRaw)
require.NoError(t, err)
require.NoError(t, am.ApplyConfig(user, cfg, cfgRaw))

Expand Down
Loading
Loading