From b004dc4c61a51aafe713723023282d5fb35d1aea Mon Sep 17 00:00:00 2001 From: yushuangyu <yushuangyu@flashcat.cloud> Date: Sun, 3 Jul 2022 22:01:41 +0800 Subject: [PATCH 1/6] 1. add kafka metrics collector 2. change Sample funcs from package inputs to types to avoid cycle imports 3. add runtimex.Stack to print stack detail when panic --- agent/agent.go | 4 +- agent/logs_agent.go | 11 +- agent/metrics_agent.go | 8 +- agent/metrics_reader.go | 6 +- conf/input.kafka/kafka.toml | 12 + go.mod | 11 +- go.sum | 7 + inputs/collector.go | 76 ++ inputs/conntrack/conntrack.go | 2 +- inputs/cpu/cpu.go | 3 +- inputs/disk/disk.go | 3 +- inputs/diskio/diskio.go | 3 +- inputs/docker/docker.go | 33 +- inputs/elasticsearch/elasticsearch.go | 24 +- inputs/http_response/http_response.go | 2 +- inputs/inputs.go | 51 -- inputs/kafka/README.md | 18 + inputs/kafka/alerts.json | 0 inputs/kafka/dashboard.json | 0 inputs/kafka/exporter/exporter.go | 860 ++++++++++++++++++ inputs/kafka/exporter/interpolation_map.go | 115 +++ inputs/kafka/exporter/scram_client.go | 36 + inputs/kafka/kafka.go | 260 ++++++ inputs/kernel/kernel.go | 2 +- inputs/kernel_vmstat/kernel_vmstat.go | 2 +- inputs/kubernetes/kubernetes.go | 14 +- .../linux_sysctl_fs/linux_sysctl_fs_linux.go | 2 +- inputs/mem/mem.go | 3 +- inputs/mysql/binlog.go | 8 +- inputs/mysql/custom_queries.go | 6 +- inputs/mysql/engine_innodb.go | 8 +- inputs/mysql/engine_innodb_compute.go | 16 +- inputs/mysql/global_status.go | 22 +- inputs/mysql/global_variables.go | 12 +- inputs/mysql/mysql.go | 8 +- inputs/mysql/processlist.go | 4 +- inputs/mysql/processlist_by_user.go | 4 +- inputs/mysql/schema_size.go | 4 +- inputs/mysql/slave_status.go | 4 +- inputs/mysql/table_size.go | 6 +- inputs/net/net.go | 3 +- inputs/net_response/net_response.go | 2 +- inputs/netstat/netstat.go | 3 +- .../nginx_upstream_check.go | 2 +- inputs/ntp/ntp.go | 2 +- inputs/nvidia_smi/nvidia_smi.go | 11 +- inputs/oracle/README.md | 4 +- inputs/oracle/oracle_linux_amd64.go | 10 +- inputs/ping/ping.go | 2 +- inputs/processes/processes_notwindows.go | 3 +- inputs/procstat/README.md | 6 +- inputs/procstat/procstat.go | 50 +- inputs/prometheus/prometheus.go | 8 +- inputs/rabbitmq/rabbitmq.go | 12 +- inputs/redis/redis.go | 18 +- inputs/switch_legacy/switch_legacy.go | 47 +- inputs/system/system.go | 3 +- inputs/tomcat/tomcat.go | 20 +- inputs/zookeeper/zookeeper.go | 28 +- parser/prometheus/parser.go | 29 +- pkg/runtimex/stack.go | 76 ++ types/sample.go | 67 ++ types/types.go | 10 - 63 files changed, 1788 insertions(+), 298 deletions(-) create mode 100644 conf/input.kafka/kafka.toml create mode 100644 inputs/collector.go create mode 100644 inputs/kafka/README.md create mode 100644 inputs/kafka/alerts.json create mode 100644 inputs/kafka/dashboard.json create mode 100644 inputs/kafka/exporter/exporter.go create mode 100644 inputs/kafka/exporter/interpolation_map.go create mode 100644 inputs/kafka/exporter/scram_client.go create mode 100644 inputs/kafka/kafka.go create mode 100644 pkg/runtimex/stack.go create mode 100644 types/sample.go delete mode 100644 types/types.go diff --git a/agent/agent.go b/agent/agent.go index 4dccf7df..5de7b5a9 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -3,6 +3,8 @@ package agent import ( "log" + "flashcat.cloud/categraf/traces" + // auto registry _ "flashcat.cloud/categraf/inputs/conntrack" _ "flashcat.cloud/categraf/inputs/cpu" @@ -12,6 +14,7 @@ import ( _ "flashcat.cloud/categraf/inputs/elasticsearch" _ "flashcat.cloud/categraf/inputs/exec" _ "flashcat.cloud/categraf/inputs/http_response" + _ "flashcat.cloud/categraf/inputs/kafka" _ "flashcat.cloud/categraf/inputs/kernel" _ "flashcat.cloud/categraf/inputs/kernel_vmstat" _ "flashcat.cloud/categraf/inputs/kubernetes" @@ -35,7 +38,6 @@ import ( _ "flashcat.cloud/categraf/inputs/system" _ "flashcat.cloud/categraf/inputs/tomcat" _ "flashcat.cloud/categraf/inputs/zookeeper" - "flashcat.cloud/categraf/traces" ) type Agent struct { diff --git a/agent/logs_agent.go b/agent/logs_agent.go index 7aff2d19..1629430e 100644 --- a/agent/logs_agent.go +++ b/agent/logs_agent.go @@ -12,9 +12,6 @@ import ( "log" "time" - coreconfig "flashcat.cloud/categraf/config" - - logsconfig "flashcat.cloud/categraf/config/logs" "flashcat.cloud/categraf/logs/auditor" "flashcat.cloud/categraf/logs/client" "flashcat.cloud/categraf/logs/diagnostic" @@ -23,9 +20,11 @@ import ( "flashcat.cloud/categraf/logs/input/listener" "flashcat.cloud/categraf/logs/pipeline" "flashcat.cloud/categraf/logs/restart" - "flashcat.cloud/categraf/logs/service" - logService "flashcat.cloud/categraf/logs/service" "flashcat.cloud/categraf/logs/status" + + coreconfig "flashcat.cloud/categraf/config" + logsconfig "flashcat.cloud/categraf/config/logs" + logService "flashcat.cloud/categraf/logs/service" ) // LogAgent represents the data pipeline that collects, decodes, @@ -44,7 +43,7 @@ type LogAgent struct { } // NewAgent returns a new Logs LogAgent -func NewLogAgent(sources *logsconfig.LogSources, services *service.Services, processingRules []*logsconfig.ProcessingRule, endpoints *logsconfig.Endpoints) *LogAgent { +func NewLogAgent(sources *logsconfig.LogSources, services *logService.Services, processingRules []*logsconfig.ProcessingRule, endpoints *logsconfig.Endpoints) *LogAgent { // setup the auditor // We pass the health handle to the auditor because it's the end of the pipeline and the most // critical part. Arguably it could also be plugged to the destination. diff --git a/agent/metrics_agent.go b/agent/metrics_agent.go index 5eb8f157..a9dad818 100644 --- a/agent/metrics_agent.go +++ b/agent/metrics_agent.go @@ -42,18 +42,18 @@ func (a *Agent) startMetricsAgent() error { } // construct input instance - instance := creator() + inp := creator() // set configurations for input instance - cfg.LoadConfigs(path.Join(config.Config.ConfigDir, inputFilePrefix+name), instance) + cfg.LoadConfigs(path.Join(config.Config.ConfigDir, inputFilePrefix+name), inp) - if err = instance.Init(); err != nil { + if err = inp.Init(); err != nil { if !errors.Is(err, types.ErrInstancesEmpty) { log.Println("E! failed to init input:", name, "error:", err) } continue } - reader := NewInputReader(instance) + reader := NewInputReader(inp) reader.Start() a.InputReaders[name] = reader diff --git a/agent/metrics_reader.go b/agent/metrics_reader.go index a5f83182..c99f72d0 100644 --- a/agent/metrics_reader.go +++ b/agent/metrics_reader.go @@ -9,6 +9,7 @@ import ( "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/house" "flashcat.cloud/categraf/inputs" + "flashcat.cloud/categraf/pkg/runtimex" "flashcat.cloud/categraf/types" "flashcat.cloud/categraf/writer" "github.com/toolkits/pkg/container/list" @@ -69,7 +70,7 @@ func (r *InputReader) gatherOnce() { if strings.Contains(fmt.Sprint(r), "closed channel") { return } else { - log.Println("E! gather metrics panic:", r) + log.Println("E! gather metrics panic:", r, string(runtimex.Stack(3))) } } }() @@ -92,6 +93,9 @@ func (r *InputReader) gatherOnce() { } s := samples[i].(*types.Sample) + if s == nil { + continue + } if s.Timestamp.IsZero() { s.Timestamp = now diff --git a/conf/input.kafka/kafka.toml b/conf/input.kafka/kafka.toml new file mode 100644 index 00000000..8b4e5132 --- /dev/null +++ b/conf/input.kafka/kafka.toml @@ -0,0 +1,12 @@ +# # collect interval +# interval = 15 + +############################################################################ +# !!! uncomment [[instances]] to enable this plugin +[[instances]] +# # interval = global.interval * interval_times +# interval_times = 1 + +# append some labels to metrics +labels = { cluster="cloud-n9e-kafka" } +kafka_uris = ["127.0.0.1:9092","127.0.0.1:9092","127.0.0.1:9092"] \ No newline at end of file diff --git a/go.mod b/go.mod index d738912c..957f4e8b 100644 --- a/go.mod +++ b/go.mod @@ -9,6 +9,7 @@ require ( github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf github.com/docker/docker v20.10.16+incompatible github.com/gaochao1/sw v1.0.0 + github.com/go-kit/kit v0.11.0 github.com/go-ping/ping v0.0.0-20211130115550-779d1e919534 github.com/go-redis/redis/v8 v8.11.5 github.com/go-sql-driver/mysql v1.6.0 @@ -22,6 +23,7 @@ require ( github.com/jmoiron/sqlx v1.3.5 github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 github.com/koding/multiconfig v0.0.0-20171124222453-69c27309b2d7 + github.com/krallistic/kazoo-go v0.0.0-20170526135507-a15279744f4e github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 github.com/open-telemetry/opentelemetry-collector-contrib/exporter/alibabacloudlogserviceexporter v0.54.0 github.com/open-telemetry/opentelemetry-collector-contrib/exporter/jaegerexporter v0.54.0 @@ -46,6 +48,7 @@ require ( github.com/stretchr/testify v1.7.4 github.com/toolkits/pkg v1.3.0 github.com/ulricqin/gosnmp v0.0.1 + github.com/xdg/scram v1.0.5 go.opentelemetry.io/collector v0.54.0 go.opentelemetry.io/otel/metric v0.30.0 go.opentelemetry.io/otel/trace v1.7.0 @@ -90,7 +93,6 @@ require ( github.com/felixge/httpsnoop v1.0.2 // indirect github.com/freedomkk-qfeng/go-fastping v0.0.0-20160109021039-d7bb493dee3e // indirect github.com/fsnotify/fsnotify v1.5.4 // indirect - github.com/go-kit/kit v0.11.0 // indirect github.com/go-kit/log v0.2.0 // indirect github.com/go-logfmt/logfmt v0.5.1 // indirect github.com/go-logr/logr v1.2.3 // indirect @@ -158,6 +160,7 @@ require ( github.com/prometheus/statsd_exporter v0.21.0 // indirect github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475 // indirect github.com/rs/cors v1.8.2 // indirect + github.com/samuel/go-zookeeper v0.0.0-20190923202752-2cc03de413da // indirect github.com/shopspring/decimal v1.3.1 // indirect github.com/sirupsen/logrus v1.8.1 // indirect github.com/spf13/afero v1.8.2 // indirect @@ -176,6 +179,7 @@ require ( github.com/xdg-go/pbkdf2 v1.0.0 // indirect github.com/xdg-go/scram v1.1.1 // indirect github.com/xdg-go/stringprep v1.0.3 // indirect + github.com/xdg/stringprep v1.0.3 // indirect github.com/yusufpapurcu/wmi v1.2.2 // indirect go.opencensus.io v0.23.0 // indirect go.opentelemetry.io/collector/pdata v0.54.0 // indirect @@ -199,4 +203,7 @@ require ( gotest.tools/v3 v3.2.0 // indirect ) -replace go.opentelemetry.io/collector => github.com/flashcatcloud/opentelemetry-collector v0.54.1-0.20220628041301-3b8dabd1bcd0 +replace ( + go.opentelemetry.io/collector => github.com/flashcatcloud/opentelemetry-collector v0.54.1-0.20220628041301-3b8dabd1bcd0 + github.com/prometheus/client_golang => ../../flashcatcloud/client_golang +) diff --git a/go.sum b/go.sum index 919c121b..41e17a51 100644 --- a/go.sum +++ b/go.sum @@ -582,6 +582,8 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/krallistic/kazoo-go v0.0.0-20170526135507-a15279744f4e h1:IWiVY66Xy9YrDZ28qJMt1UTlh6x9UGW0aDH/o58CSnA= +github.com/krallistic/kazoo-go v0.0.0-20170526135507-a15279744f4e/go.mod h1:Rq6003vCNoJNrT6ol0hMebQ3GWLWXSHrD/QcMlXt0EE= github.com/leodido/go-urn v1.2.0/go.mod h1:+8+nEpDfqqsY+g338gtMEUOtuK+4dEMhiQEgxpxOKII= github.com/lib/pq v1.0.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= github.com/lib/pq v1.2.0 h1:LXpIM/LZ5xGFhOpXAQUIMM1HdyqzVYM13zNdjCEEcA0= @@ -854,6 +856,7 @@ github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQD github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= github.com/ryanuber/columnize v2.1.0+incompatible/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= github.com/ryanuber/go-glob v1.0.0/go.mod h1:807d1WSdnB0XRJzKNil9Om6lcp/3a0v4qIHxIXzX/Yc= +github.com/samuel/go-zookeeper v0.0.0-20190923202752-2cc03de413da h1:p3Vo3i64TCLY7gIfzeQaUJ+kppEO5WQG3cL8iE8tGHU= github.com/samuel/go-zookeeper v0.0.0-20190923202752-2cc03de413da/go.mod h1:gi+0XIa01GRL2eRQVjQkKGqKF3SF9vZR/HnPullcV2E= github.com/sanity-io/litter v1.2.0/go.mod h1:JF6pZUFgu2Q0sBZ+HSV35P8TVPI1TTzEwyu9FXAw2W4= github.com/schollz/progressbar/v2 v2.13.2/go.mod h1:6YZjqdthH6SCZKv2rqGryrxPtfmRB/DWZxSMfCXPyD8= @@ -945,6 +948,10 @@ github.com/xdg-go/scram v1.1.1/go.mod h1:RaEWvsqvNKKvBPvcKeFjrG2cJqOkHTiyTpzz23n github.com/xdg-go/stringprep v1.0.2/go.mod h1:8F9zXuvzgwmyT5DUm4GUfZGDdT3W+LCvS6+da4O5kxM= github.com/xdg-go/stringprep v1.0.3 h1:kdwGpVNwPFtjs98xCGkHjQtGKh86rDcRZN17QEMCOIs= github.com/xdg-go/stringprep v1.0.3/go.mod h1:W3f5j4i+9rC0kuIEJL0ky1VpHXQU3ocBgklLGvcBnW8= +github.com/xdg/scram v1.0.5 h1:TuS0RFmt5Is5qm9Tm2SoD89OPqe4IRiFtyFY4iwWXsw= +github.com/xdg/scram v1.0.5/go.mod h1:lB8K/P019DLNhemzwFU4jHLhdvlE6uDZjXFejJXr49I= +github.com/xdg/stringprep v1.0.3 h1:cmL5Enob4W83ti/ZHuZLuKD/xqJfus4fVPwE+/BDm+4= +github.com/xdg/stringprep v1.0.3/go.mod h1:Jhud4/sHMO4oL310DaZAKk9ZaJ08SJfe+sJh0HrGL1Y= github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= diff --git a/inputs/collector.go b/inputs/collector.go new file mode 100644 index 00000000..f4e4ed67 --- /dev/null +++ b/inputs/collector.go @@ -0,0 +1,76 @@ +package inputs + +import ( + "errors" + "log" + + "flashcat.cloud/categraf/types" + "github.com/prometheus/client_golang/prometheus" + "github.com/toolkits/pkg/container/list" + + pp "flashcat.cloud/categraf/parser/prometheus" + dto "github.com/prometheus/client_model/go" +) + +const capMetricChan = 1000 + +var parser = new(pp.Parser) + +func Collect(e prometheus.Collector, slist *list.SafeList) error { + if e == nil { + return errors.New("exporter must not be nil") + } + + metricChan := make(chan prometheus.Metric, capMetricChan) + go func() { + e.Collect(metricChan) + close(metricChan) + }() + + for metric := range metricChan { + if metric == nil { + continue + } + + desc := metric.Desc() + if desc.Err() != nil { + log.Println("E! got invalid metric:", desc.Name(), desc.Err()) + continue + } + + dtoMetric := &dto.Metric{} + err := metric.Write(dtoMetric) + if err != nil { + log.Println("E! failed to write metric:", desc.String()) + continue + } + + labels := map[string]string{} + for _, kv := range desc.ConstLabels() { + labels[*kv.Name] = *kv.Value + } + + for _, kv := range dtoMetric.Label { + labels[*kv.Name] = *kv.Value + } + + switch { + case dtoMetric.Counter != nil: + _ = slist.PushFront(types.NewSample(desc.Name(), *dtoMetric.Counter.Value, labels)) + + case dtoMetric.Gauge != nil: + _ = slist.PushFront(types.NewSample(desc.Name(), *dtoMetric.Gauge.Value, labels)) + + case dtoMetric.Summary != nil: + parser.HandleSummary(dtoMetric, nil, desc.Name(), slist) + + case dtoMetric.Histogram != nil: + parser.HandleHistogram(dtoMetric, nil, desc.Name(), slist) + + default: + _ = slist.PushFront(types.NewSample(desc.Name(), *dtoMetric.Untyped.Value, labels)) + } + } + + return nil +} diff --git a/inputs/conntrack/conntrack.go b/inputs/conntrack/conntrack.go index 30e8c460..36bad1b0 100644 --- a/inputs/conntrack/conntrack.go +++ b/inputs/conntrack/conntrack.go @@ -100,5 +100,5 @@ func (c *Conntrack) Gather(slist *list.SafeList) { log.Println("E! Conntrack input failed to collect metrics. Is the conntrack kernel module loaded?") } - inputs.PushSamples(slist, fields) + types.PushSamples(slist, fields) } diff --git a/inputs/cpu/cpu.go b/inputs/cpu/cpu.go index d93f978c..b6ce99a5 100644 --- a/inputs/cpu/cpu.go +++ b/inputs/cpu/cpu.go @@ -9,6 +9,7 @@ import ( "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/inputs/system" + "flashcat.cloud/categraf/types" ) const inputName = "cpu" @@ -95,7 +96,7 @@ func (c *CPUStats) Gather(slist *list.SafeList) { "usage_active": 100 * (active - lastActive) / totalDelta, } - inputs.PushSamples(slist, fields, tags) + types.PushSamples(slist, fields, tags) } c.lastStats = make(map[string]cpuUtil.TimesStat) diff --git a/inputs/disk/disk.go b/inputs/disk/disk.go index c1897027..dc7903d6 100644 --- a/inputs/disk/disk.go +++ b/inputs/disk/disk.go @@ -8,6 +8,7 @@ import ( "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/inputs/system" "flashcat.cloud/categraf/pkg/choice" + "flashcat.cloud/categraf/types" "github.com/toolkits/pkg/container/list" ) @@ -84,7 +85,7 @@ func (s *DiskStats) Gather(slist *list.SafeList) { "inodes_used": du.InodesUsed, } - inputs.PushSamples(slist, fields, tags) + types.PushSamples(slist, fields, tags) } } diff --git a/inputs/diskio/diskio.go b/inputs/diskio/diskio.go index 57b7c06a..750e5bc1 100644 --- a/inputs/diskio/diskio.go +++ b/inputs/diskio/diskio.go @@ -8,6 +8,7 @@ import ( "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/inputs/system" "flashcat.cloud/categraf/pkg/filter" + "flashcat.cloud/categraf/types" "github.com/toolkits/pkg/container/list" ) @@ -81,6 +82,6 @@ func (d *DiskIO) Gather(slist *list.SafeList) { "merged_writes": io.MergedWriteCount, } - inputs.PushSamples(slist, fields, map[string]string{"name": io.Name}) + types.PushSamples(slist, fields, map[string]string{"name": io.Name}) } } diff --git a/inputs/docker/docker.go b/inputs/docker/docker.go index edfad9e5..f59839fe 100644 --- a/inputs/docker/docker.go +++ b/inputs/docker/docker.go @@ -17,12 +17,13 @@ import ( "flashcat.cloud/categraf/pkg/choice" "flashcat.cloud/categraf/pkg/dock" "flashcat.cloud/categraf/pkg/filter" - tlsx "flashcat.cloud/categraf/pkg/tls" - itypes "flashcat.cloud/categraf/types" "github.com/docker/docker/api/types" "github.com/docker/docker/api/types/filters" "github.com/docker/docker/api/types/swarm" "github.com/toolkits/pkg/container/list" + + tlsx "flashcat.cloud/categraf/pkg/tls" + itypes "flashcat.cloud/categraf/types" ) const inputName = "docker" @@ -167,7 +168,7 @@ func (ins *Instance) gatherOnce(slist *list.SafeList) { if ins.client == nil { c, err := ins.getNewClient() if err != nil { - slist.PushFront(inputs.NewSample("docker_up", 0, ins.Labels)) + slist.PushFront(itypes.NewSample("docker_up", 0, ins.Labels)) log.Println("E! failed to new docker client:", err) return } @@ -177,12 +178,12 @@ func (ins *Instance) gatherOnce(slist *list.SafeList) { defer ins.client.Close() if err := ins.gatherInfo(slist); err != nil { - slist.PushFront(inputs.NewSample("docker_up", 0, ins.Labels)) + slist.PushFront(itypes.NewSample("docker_up", 0, ins.Labels)) log.Println("E! failed to gather docker info:", err) return } - slist.PushFront(inputs.NewSample("docker_up", 1, ins.Labels)) + slist.PushFront(itypes.NewSample("docker_up", 1, ins.Labels)) if ins.GatherServices { ins.gatherSwarmInfo(slist) @@ -346,10 +347,10 @@ func (ins *Instance) gatherContainerInspect(container types.Container, slist *li statefields["docker_container_status_uptime"] = uptime.Seconds() } - inputs.PushSamples(slist, statefields, tags, ins.Labels) + itypes.PushSamples(slist, statefields, tags, ins.Labels) if info.State.Health != nil { - slist.PushFront(inputs.NewSample("docker_container_health_failing_streak", info.ContainerJSONBase.State.Health.FailingStreak, tags, ins.Labels)) + slist.PushFront(itypes.NewSample("docker_container_health_failing_streak", info.ContainerJSONBase.State.Health.FailingStreak, tags, ins.Labels)) } ins.parseContainerStats(v, slist, tags, daemonOSType) @@ -429,7 +430,7 @@ func (ins *Instance) parseContainerStats(stat *types.StatsJSON, slist *list.Safe memfields["docker_container_mem_private_working_set"] = stat.MemoryStats.PrivateWorkingSet } - inputs.PushSamples(slist, memfields, tags, ins.Labels) + itypes.PushSamples(slist, memfields, tags, ins.Labels) // cpu @@ -454,7 +455,7 @@ func (ins *Instance) parseContainerStats(stat *types.StatsJSON, slist *list.Safe cpufields["docker_container_cpu_usage_percent"] = cpuPercent } - inputs.PushSamples(slist, cpufields, map[string]string{"cpu": "cpu-total"}, tags, ins.Labels) + itypes.PushSamples(slist, cpufields, map[string]string{"cpu": "cpu-total"}, tags, ins.Labels) } if choice.Contains("cpu", ins.PerDeviceInclude) && len(stat.CPUStats.CPUUsage.PercpuUsage) > 0 { @@ -466,7 +467,7 @@ func (ins *Instance) parseContainerStats(stat *types.StatsJSON, slist *list.Safe } for i, percpu := range percpuusage { - slist.PushFront(inputs.NewSample( + slist.PushFront(itypes.NewSample( "docker_container_cpu_usage_total", percpu, map[string]string{"cpu": fmt.Sprintf("cpu%d", i)}, @@ -492,7 +493,7 @@ func (ins *Instance) parseContainerStats(stat *types.StatsJSON, slist *list.Safe } if choice.Contains("network", ins.PerDeviceInclude) { - inputs.PushSamples(slist, netfields, map[string]string{"network": network}, tags, ins.Labels) + itypes.PushSamples(slist, netfields, map[string]string{"network": network}, tags, ins.Labels) } if choice.Contains("network", ins.TotalInclude) { @@ -519,7 +520,7 @@ func (ins *Instance) parseContainerStats(stat *types.StatsJSON, slist *list.Safe // totalNetworkStatMap could be empty if container is running with --net=host. if choice.Contains("network", ins.TotalInclude) && len(totalNetworkStatMap) != 0 { - inputs.PushSamples(slist, totalNetworkStatMap, map[string]string{"network": "total"}, tags, ins.Labels) + itypes.PushSamples(slist, totalNetworkStatMap, map[string]string{"network": "total"}, tags, ins.Labels) } ins.gatherBlockIOMetrics(slist, stat, tags) @@ -535,7 +536,7 @@ func (ins *Instance) gatherBlockIOMetrics(slist *list.SafeList, stat *types.Stat totalStatMap := make(map[string]interface{}) for device, fields := range deviceStatMap { if perDeviceBlkio { - inputs.PushSamples(slist, fields, map[string]string{"device": device}, tags, ins.Labels) + itypes.PushSamples(slist, fields, map[string]string{"device": device}, tags, ins.Labels) } if totalBlkio { for field, value := range fields { @@ -560,7 +561,7 @@ func (ins *Instance) gatherBlockIOMetrics(slist *list.SafeList, stat *types.Stat } if totalBlkio { - inputs.PushSamples(slist, totalStatMap, map[string]string{"device": "total"}, tags, ins.Labels) + itypes.PushSamples(slist, totalStatMap, map[string]string{"device": "total"}, tags, ins.Labels) } } @@ -691,7 +692,7 @@ func (ins *Instance) gatherSwarmInfo(slist *list.SafeList) { log.Println("E! Unknown replica mode") } - inputs.PushSamples(slist, fields, tags, ins.Labels) + itypes.PushSamples(slist, fields, tags, ins.Labels) } } @@ -719,7 +720,7 @@ func (ins *Instance) gatherInfo(slist *list.SafeList) error { "docker_memory_total": info.MemTotal, } - inputs.PushSamples(slist, fields, ins.Labels) + itypes.PushSamples(slist, fields, ins.Labels) return nil } diff --git a/inputs/elasticsearch/elasticsearch.go b/inputs/elasticsearch/elasticsearch.go index 7105403d..c615056c 100644 --- a/inputs/elasticsearch/elasticsearch.go +++ b/inputs/elasticsearch/elasticsearch.go @@ -242,7 +242,7 @@ func (ins *Instance) gatherOnce(slist *list.SafeList) { // Gather node ID if info.nodeID, err = ins.gatherNodeID(s + "/_nodes/_local/name"); err != nil { - slist.PushFront(inputs.NewSample("up", 0, ins.Labels)) + slist.PushFront(types.NewSample("up", 0, ins.Labels)) log.Println("E! failed to gather node id:", err) return } @@ -250,12 +250,12 @@ func (ins *Instance) gatherOnce(slist *list.SafeList) { // get cat/master information here so NodeStats can determine // whether this node is the Master if info.masterID, err = ins.getCatMaster(s + "/_cat/master"); err != nil { - slist.PushFront(inputs.NewSample("up", 0, ins.Labels)) + slist.PushFront(types.NewSample("up", 0, ins.Labels)) log.Println("E! failed to get cat master:", err) return } - slist.PushFront(inputs.NewSample("up", 1, ins.Labels)) + slist.PushFront(types.NewSample("up", 1, ins.Labels)) ins.serverInfoMutex.Lock() ins.serverInfo[s] = info ins.serverInfoMutex.Unlock() @@ -328,7 +328,7 @@ func (ins *Instance) gatherIndicesStats(url string, slist *list.SafeList) error // Total Shards Stats for k, v := range indicesStats.Shards { - slist.PushFront(inputs.NewSample("indices_stats_shards_total_"+k, v, ins.Labels)) + slist.PushFront(types.NewSample("indices_stats_shards_total_"+k, v, ins.Labels)) } // All Stats @@ -340,7 +340,7 @@ func (ins *Instance) gatherIndicesStats(url string, slist *list.SafeList) error return err } for key, val := range jsonParser.Fields { - slist.PushFront(inputs.NewSample("indices_stats_"+m+"_"+key, val, map[string]string{"index_name": "_all"}, ins.Labels)) + slist.PushFront(types.NewSample("indices_stats_"+m+"_"+key, val, map[string]string{"index_name": "_all"}, ins.Labels)) } } @@ -393,7 +393,7 @@ func (ins *Instance) gatherSingleIndexStats(name string, index indexStat, slist return err } for key, val := range f.Fields { - slist.PushFront(inputs.NewSample("indices_stats_"+m+"_"+key, val, indexTag, ins.Labels)) + slist.PushFront(types.NewSample("indices_stats_"+m+"_"+key, val, indexTag, ins.Labels)) } } @@ -436,7 +436,7 @@ func (ins *Instance) gatherSingleIndexStats(name string, index indexStat, slist } for key, val := range flattened.Fields { - slist.PushFront(inputs.NewSample("indices_stats_shards_"+key, val, shardTags, ins.Labels)) + slist.PushFront(types.NewSample("indices_stats_shards_"+key, val, shardTags, ins.Labels)) } } } @@ -501,7 +501,7 @@ func (ins *Instance) gatherClusterStats(url string, slist *list.SafeList) error } for key, val := range f.Fields { - slist.PushFront(inputs.NewSample("clusterstats_"+p+"_"+key, val, tags, ins.Labels)) + slist.PushFront(types.NewSample("clusterstats_"+p+"_"+key, val, tags, ins.Labels)) } } @@ -531,7 +531,7 @@ func (ins *Instance) gatherClusterHealth(url string, slist *list.SafeList) error "cluster_health_unassigned_shards": healthStats.UnassignedShards, } - inputs.PushSamples(slist, clusterFields, map[string]string{"name": healthStats.ClusterName}, ins.Labels) + types.PushSamples(slist, clusterFields, map[string]string{"name": healthStats.ClusterName}, ins.Labels) for name, health := range healthStats.Indices { indexFields := map[string]interface{}{ @@ -544,7 +544,7 @@ func (ins *Instance) gatherClusterHealth(url string, slist *list.SafeList) error "cluster_health_indices_status_code": mapHealthStatusToCode(health.Status), "cluster_health_indices_unassigned_shards": health.UnassignedShards, } - inputs.PushSamples(slist, indexFields, map[string]string{"index": name, "name": healthStats.ClusterName}, ins.Labels) + types.PushSamples(slist, indexFields, map[string]string{"index": name, "name": healthStats.ClusterName}, ins.Labels) } return nil @@ -571,7 +571,7 @@ func (ins *Instance) gatherNodeStats(url string, slist *list.SafeList) error { } for k, v := range n.Attributes { - slist.PushFront(inputs.NewSample("node_attribute_"+k, v, tags, ins.Labels)) + slist.PushFront(types.NewSample("node_attribute_"+k, v, tags, ins.Labels)) } stats := map[string]interface{}{ @@ -600,7 +600,7 @@ func (ins *Instance) gatherNodeStats(url string, slist *list.SafeList) error { } for key, val := range f.Fields { - slist.PushFront(inputs.NewSample(p+"_"+key, val, tags, ins.Labels)) + slist.PushFront(types.NewSample(p+"_"+key, val, tags, ins.Labels)) } } } diff --git a/inputs/http_response/http_response.go b/inputs/http_response/http_response.go index 0406d04d..e2089aa2 100644 --- a/inputs/http_response/http_response.go +++ b/inputs/http_response/http_response.go @@ -221,7 +221,7 @@ func (ins *Instance) gather(slist *list.SafeList, target string) { defer func() { for field, value := range fields { - slist.PushFront(inputs.NewSample(field, value, labels)) + slist.PushFront(types.NewSample(field, value, labels)) } }() diff --git a/inputs/inputs.go b/inputs/inputs.go index 29187c06..3ae468ab 100644 --- a/inputs/inputs.go +++ b/inputs/inputs.go @@ -2,8 +2,6 @@ package inputs import ( "flashcat.cloud/categraf/config" - "flashcat.cloud/categraf/pkg/conv" - "flashcat.cloud/categraf/types" "github.com/toolkits/pkg/container/list" ) @@ -22,52 +20,3 @@ var InputCreators = map[string]Creator{} func Add(name string, creator Creator) { InputCreators[name] = creator } - -func NewSample(metric string, value interface{}, labels ...map[string]string) *types.Sample { - floatValue, err := conv.ToFloat64(value) - if err != nil { - return nil - } - - s := &types.Sample{ - Metric: metric, - Value: floatValue, - Labels: make(map[string]string), - } - - for i := 0; i < len(labels); i++ { - for k, v := range labels[i] { - if v == "-" { - continue - } - s.Labels[k] = v - } - } - - return s -} - -func NewSamples(fields map[string]interface{}, labels ...map[string]string) []*types.Sample { - count := len(fields) - samples := make([]*types.Sample, 0, count) - - for metric, value := range fields { - floatValue, err := conv.ToFloat64(value) - if err != nil { - continue - } - samples = append(samples, NewSample(metric, floatValue, labels...)) - } - - return samples -} - -func PushSamples(slist *list.SafeList, fields map[string]interface{}, labels ...map[string]string) { - for metric, value := range fields { - floatValue, err := conv.ToFloat64(value) - if err != nil { - continue - } - slist.PushFront(NewSample(metric, floatValue, labels...)) - } -} diff --git a/inputs/kafka/README.md b/inputs/kafka/README.md new file mode 100644 index 00000000..9dfbba26 --- /dev/null +++ b/inputs/kafka/README.md @@ -0,0 +1,18 @@ +# kafka + +kafka 监控采集插件,封装kafka-exporter(https://github.com/davidmparrott/kafka_exporter)而来 + +## Configuration + +```toml +# # collect interval +# interval = 15 + +# 要监控 MySQL,首先要给出要监控的MySQL的连接地址、用户名、密码 +[[instances]] + +``` + +## 监控大盘和告警规则 + +本 README 的同级目录,大家可以看到 dashboard.json 就是监控大盘,导入夜莺就可以使用,alerts.json 是告警规则,也是导入夜莺就可以使用。 \ No newline at end of file diff --git a/inputs/kafka/alerts.json b/inputs/kafka/alerts.json new file mode 100644 index 00000000..e69de29b diff --git a/inputs/kafka/dashboard.json b/inputs/kafka/dashboard.json new file mode 100644 index 00000000..e69de29b diff --git a/inputs/kafka/exporter/exporter.go b/inputs/kafka/exporter/exporter.go new file mode 100644 index 00000000..558a2cdf --- /dev/null +++ b/inputs/kafka/exporter/exporter.go @@ -0,0 +1,860 @@ +package exporter + +import ( + "crypto/tls" + "crypto/x509" + "fmt" + "io/ioutil" + "os" + "regexp" + "sort" + "strconv" + "strings" + "sync" + "time" + + "github.com/Shopify/sarama" + "github.com/go-kit/log" + "github.com/go-kit/log/level" + "github.com/krallistic/kazoo-go" + "github.com/prometheus/client_golang/prometheus" +) + +const ( + namespace = "kafka" + clientID = "kafka_exporter" +) + +var ( + clusterBrokers *prometheus.Desc + topicPartitions *prometheus.Desc + topicCurrentOffset *prometheus.Desc + topicOldestOffset *prometheus.Desc + topicPartitionLeader *prometheus.Desc + topicPartitionReplicas *prometheus.Desc + topicPartitionInSyncReplicas *prometheus.Desc + topicPartitionUsesPreferredReplica *prometheus.Desc + topicUnderReplicatedPartition *prometheus.Desc + consumergroupCurrentOffset *prometheus.Desc + consumergroupCurrentOffsetSum *prometheus.Desc + consumergroupUncomittedOffsets *prometheus.Desc + consumergroupUncommittedOffsetsSum *prometheus.Desc + consumergroupUncommittedOffsetsZookeeper *prometheus.Desc + consumergroupMembers *prometheus.Desc + topicPartitionLagMillis *prometheus.Desc + lagDatapointUsedInterpolation *prometheus.Desc + lagDatapointUsedExtrapolation *prometheus.Desc +) + +// Exporter collects Kafka stats from the given server and exports them using +// the prometheus metrics package. +type Exporter struct { + client sarama.Client + topicFilter *regexp.Regexp + groupFilter *regexp.Regexp + mu sync.Mutex + useZooKeeperLag bool + zookeeperClient *kazoo.Kazoo + nextMetadataRefresh time.Time + metadataRefreshInterval time.Duration + allowConcurrent bool + sgMutex sync.Mutex + sgWaitCh chan struct{} + sgChans []chan<- prometheus.Metric + consumerGroupFetchAll bool + consumerGroupLagTable interpolationMap + kafkaOpts Options + saramaConfig *sarama.Config + logger log.Logger +} + +type Options struct { + Uri []string + UseSASL bool + UseSASLHandshake bool + SaslUsername string + SaslPassword string + SaslMechanism string + UseTLS bool + TlsCAFile string + TlsCertFile string + TlsKeyFile string + TlsInsecureSkipTLSVerify bool + KafkaVersion string + UseZooKeeperLag bool + UriZookeeper []string + Labels string + MetadataRefreshInterval string + AllowConcurrent bool + MaxOffsets int + PruneIntervalSeconds int +} + +// CanReadCertAndKey returns true if the certificate and key files already exists, +// otherwise returns false. If lost one of cert and key, returns error. +func CanReadCertAndKey(certPath, keyPath string) (bool, error) { + certReadable := canReadFile(certPath) + keyReadable := canReadFile(keyPath) + + if certReadable == false && keyReadable == false { + return false, nil + } + + if certReadable == false { + return false, fmt.Errorf("error reading %s, certificate and key must be supplied as a pair", certPath) + } + + if keyReadable == false { + return false, fmt.Errorf("error reading %s, certificate and key must be supplied as a pair", keyPath) + } + + return true, nil +} + +// If the file represented by path exists and +// readable, returns true otherwise returns false. +func canReadFile(path string) bool { + f, err := os.Open(path) + if err != nil { + return false + } + + defer f.Close() + + return true +} + +// New returns an initialized Exporter. +func New(logger log.Logger, opts Options, topicFilter, groupFilter string) (*Exporter, error) { + var zookeeperClient *kazoo.Kazoo + config := sarama.NewConfig() + config.ClientID = clientID + kafkaVersion, err := sarama.ParseKafkaVersion(opts.KafkaVersion) + if err != nil { + return nil, err + } + config.Version = kafkaVersion + + if opts.UseSASL { + // Convert to lowercase so that SHA512 and SHA256 is still valid + opts.SaslMechanism = strings.ToLower(opts.SaslMechanism) + switch opts.SaslMechanism { + case "scram-sha512": + config.Net.SASL.SCRAMClientGeneratorFunc = func() sarama.SCRAMClient { return &XDGSCRAMClient{HashGeneratorFcn: SHA512} } + config.Net.SASL.Mechanism = sarama.SASLMechanism(sarama.SASLTypeSCRAMSHA512) + case "scram-sha256": + config.Net.SASL.SCRAMClientGeneratorFunc = func() sarama.SCRAMClient { return &XDGSCRAMClient{HashGeneratorFcn: SHA256} } + config.Net.SASL.Mechanism = sarama.SASLMechanism(sarama.SASLTypeSCRAMSHA256) + + case "plain": + default: + level.Error(logger).Log("msg", "invalid sasl mechanism. can only be \"scram-sha256\", \"scram-sha512\" or \"plain\"", "SaslMechanism", opts.SaslMechanism) + return nil, fmt.Errorf("invalid sasl mechanism \"%s\": can only be \"scram-sha256\", \"scram-sha512\" or \"plain\"", opts.SaslMechanism) + } + + config.Net.SASL.Enable = true + config.Net.SASL.Handshake = opts.UseSASLHandshake + + if opts.SaslUsername != "" { + config.Net.SASL.User = opts.SaslUsername + } + + if opts.SaslPassword != "" { + config.Net.SASL.Password = opts.SaslPassword + } + } + + if opts.UseTLS { + config.Net.TLS.Enable = true + + config.Net.TLS.Config = &tls.Config{ + RootCAs: x509.NewCertPool(), + InsecureSkipVerify: opts.TlsInsecureSkipTLSVerify, + } + + if opts.TlsCAFile != "" { + if ca, err := ioutil.ReadFile(opts.TlsCAFile); err == nil { + config.Net.TLS.Config.RootCAs.AppendCertsFromPEM(ca) + } else { + level.Error(logger).Log("msg", "unable to open TlsCAFile", "TlsCAFile", opts.TlsCAFile) + return nil, fmt.Errorf("UseTLS is true but unable to open TlsCAFile: %s", opts.TlsCAFile) + } + } + + canReadCertAndKey, err := CanReadCertAndKey(opts.TlsCertFile, opts.TlsKeyFile) + if err != nil { + level.Error(logger).Log("msg", "Error attempting to read TlsCertFile or TlsKeyFile", "err", err.Error()) + return nil, err + } + if canReadCertAndKey { + cert, err := tls.LoadX509KeyPair(opts.TlsCertFile, opts.TlsKeyFile) + if err == nil { + config.Net.TLS.Config.Certificates = []tls.Certificate{cert} + } else { + level.Error(logger).Log("msg", "Error attempting to load X509KeyPair", "err", err.Error()) + return nil, err + } + } + } + + if opts.UseZooKeeperLag { + zookeeperClient, err = kazoo.NewKazoo(opts.UriZookeeper, nil) + } + + interval, err := time.ParseDuration(opts.MetadataRefreshInterval) + if err != nil { + level.Error(logger).Log("msg", "Error parsing metadata refresh interval", "err", err.Error()) + return nil, err + } + + config.Metadata.RefreshFrequency = interval + + client, err := sarama.NewClient(opts.Uri, config) + + if err != nil { + level.Error(logger).Log("msg", "Error initiating kafka client: %s", "err", err.Error()) + return nil, err + } + level.Debug(logger).Log("msg", "Done with kafka client initialization") + + // Init our exporter. + newExporter := &Exporter{ + client: client, + topicFilter: regexp.MustCompile(topicFilter), + groupFilter: regexp.MustCompile(groupFilter), + useZooKeeperLag: opts.UseZooKeeperLag, + zookeeperClient: zookeeperClient, + nextMetadataRefresh: time.Now(), + metadataRefreshInterval: interval, + allowConcurrent: opts.AllowConcurrent, + sgMutex: sync.Mutex{}, + sgWaitCh: nil, + sgChans: []chan<- prometheus.Metric{}, + consumerGroupFetchAll: config.Version.IsAtLeast(sarama.V2_0_0_0), + consumerGroupLagTable: interpolationMap{mu: sync.Mutex{}}, + kafkaOpts: opts, + saramaConfig: config, + logger: logger, + } + + level.Debug(logger).Log("msg", "Initializing metrics") + newExporter.initializeMetrics() + return newExporter, nil +} + +func (e *Exporter) fetchOffsetVersion() int16 { + version := e.client.Config().Version + if e.client.Config().Version.IsAtLeast(sarama.V2_0_0_0) { + return 4 + } else if version.IsAtLeast(sarama.V0_10_2_0) { + return 2 + } else if version.IsAtLeast(sarama.V0_8_2_2) { + return 1 + } + return 0 +} + +// Describe describes all the metrics ever exported by the Kafka exporter. It +// implements prometheus.Collector. +func (e *Exporter) Describe(ch chan<- *prometheus.Desc) { + ch <- clusterBrokers + ch <- topicCurrentOffset + ch <- topicOldestOffset + ch <- topicPartitions + ch <- topicPartitionLeader + ch <- topicPartitionReplicas + ch <- topicPartitionInSyncReplicas + ch <- topicPartitionUsesPreferredReplica + ch <- topicUnderReplicatedPartition + ch <- consumergroupCurrentOffset + ch <- consumergroupCurrentOffsetSum + ch <- consumergroupUncomittedOffsets + ch <- consumergroupUncommittedOffsetsZookeeper + ch <- consumergroupUncommittedOffsetsSum + ch <- topicPartitionLagMillis + ch <- lagDatapointUsedInterpolation + ch <- lagDatapointUsedExtrapolation +} + +func (e *Exporter) Collect(ch chan<- prometheus.Metric) { + if e.allowConcurrent { + e.collect(ch) + return + } + // Locking to avoid race add + e.sgMutex.Lock() + e.sgChans = append(e.sgChans, ch) + // Safe to compare length since we own the Lock + if len(e.sgChans) == 1 { + e.sgWaitCh = make(chan struct{}) + go e.collectChans(e.sgWaitCh) + } else { + level.Info(e.logger).Log("msg", "concurrent calls detected, waiting for first to finish") + } + // Put in another variable to ensure not overwriting it in another Collect once we wait + waiter := e.sgWaitCh + e.sgMutex.Unlock() + // Released lock, we have insurance that our chan will be part of the collectChan slice + <-waiter + // collectChan finished +} + +// Collect fetches the stats from configured Kafka location and delivers them +// as Prometheus metrics. It implements prometheus.Collector. +func (e *Exporter) collectChans(quit chan struct{}) { + original := make(chan prometheus.Metric) + container := make([]prometheus.Metric, 0, 100) + go func() { + for metric := range original { + container = append(container, metric) + } + }() + e.collect(original) + close(original) + // Lock to avoid modification on the channel slice + e.sgMutex.Lock() + for _, ch := range e.sgChans { + for _, metric := range container { + ch <- metric + } + } + // Reset the slice + e.sgChans = e.sgChans[:0] + // Notify remaining waiting Collect they can return + close(quit) + // Release the lock so Collect can append to the slice again + e.sgMutex.Unlock() +} + +func (e *Exporter) collect(ch chan<- prometheus.Metric) { + var wg = sync.WaitGroup{} + ch <- prometheus.MustNewConstMetric( + clusterBrokers, prometheus.GaugeValue, float64(len(e.client.Brokers())), + ) + + now := time.Now() + + if now.After(e.nextMetadataRefresh) { + level.Info(e.logger).Log("msg", "Refreshing client metadata") + if err := e.client.RefreshMetadata(); err != nil { + level.Error(e.logger).Log("msg", "Error refreshing topics. Using cached topic data", "err", err.Error()) + } + + e.nextMetadataRefresh = now.Add(e.metadataRefreshInterval) + } + + topics, err := e.client.Topics() + if err != nil { + level.Error(e.logger).Log("msg", "Error getting topics: %s. Skipping metric generation", "err", err.Error()) + return + } + + level.Info(e.logger).Log("msg", "Generating topic metrics") + for _, topic := range topics { + wg.Add(1) + topic := topic + go func() { + defer wg.Done() + e.metricsForTopic(topic, ch) + }() + } + + level.Debug(e.logger).Log("msg", "waiting for topic metric generation to complete") + wg.Wait() + + level.Info(e.logger).Log("msg", "Generating consumergroup metrics") + if len(e.client.Brokers()) > 0 { + for _, broker := range e.client.Brokers() { + wg.Add(1) + + broker := broker + go func() { + defer wg.Done() + e.metricsForConsumerGroup(broker, ch) + }() + } + level.Debug(e.logger).Log("msg", "waiting for consumergroup metric generation to complete") + wg.Wait() + } else { + level.Error(e.logger).Log("msg", "No brokers found. Unable to generate topic metrics") + } + + level.Info(e.logger).Log("msg", "Calculating consumergroup lag") + wg.Add(1) + go func() { + defer wg.Done() + e.metricsForLag(ch) + }() + level.Debug(e.logger).Log("msg", "waiting for consumergroup lag estimation metric generation to complete") + wg.Wait() +} + +func (e *Exporter) metricsForTopic(topic string, ch chan<- prometheus.Metric) { + level.Debug(e.logger).Log("msg", "Fetching topic metrics", "topic", topic) + if e.topicFilter.MatchString(topic) { + partitions, err := e.client.Partitions(topic) + if err != nil { + level.Error(e.logger).Log("msg", "Error getting partitions for topic", "topic", topic, "err", err.Error()) + return + } + ch <- prometheus.MustNewConstMetric( + topicPartitions, prometheus.GaugeValue, float64(len(partitions)), topic, + ) + e.mu.Lock() + offset := make(map[int32]int64, len(partitions)) + e.mu.Unlock() + for _, partition := range partitions { + broker, err := e.client.Leader(topic, partition) + if err != nil { + level.Error(e.logger).Log("msg", "Error getting leader for topic/partition", "topic", topic, "partition", partition, "err", err.Error()) + } else { + ch <- prometheus.MustNewConstMetric( + topicPartitionLeader, prometheus.GaugeValue, float64(broker.ID()), topic, strconv.FormatInt(int64(partition), 10), + ) + } + + currentOffset, err := e.client.GetOffset(topic, partition, sarama.OffsetNewest) + if err != nil { + level.Error(e.logger).Log("msg", "Error getting offset for topic/partition", "topic", topic, "partition", partition, "err", err.Error()) + } else { + e.mu.Lock() + offset[partition] = currentOffset + e.mu.Unlock() + ch <- prometheus.MustNewConstMetric( + topicCurrentOffset, prometheus.GaugeValue, float64(currentOffset), topic, strconv.FormatInt(int64(partition), 10), + ) + } + + oldestOffset, err := e.client.GetOffset(topic, partition, sarama.OffsetOldest) + if err != nil { + level.Error(e.logger).Log("msg", "Error getting oldest offset for topic/partition", "topic", topic, "partition", partition, "err", err.Error()) + } else { + ch <- prometheus.MustNewConstMetric( + topicOldestOffset, prometheus.GaugeValue, float64(oldestOffset), topic, strconv.FormatInt(int64(partition), 10), + ) + } + + replicas, err := e.client.Replicas(topic, partition) + if err != nil { + level.Error(e.logger).Log("msg", "Error getting replicas for topic/partition", "topic", topic, "partition", partition, "err", err.Error()) + } else { + ch <- prometheus.MustNewConstMetric( + topicPartitionReplicas, prometheus.GaugeValue, float64(len(replicas)), topic, strconv.FormatInt(int64(partition), 10), + ) + } + + inSyncReplicas, err := e.client.InSyncReplicas(topic, partition) + if err != nil { + level.Error(e.logger).Log("msg", "Error getting in-sync replicas for topic/partition", "topic", topic, "partition", partition, "err", err.Error()) + } else { + ch <- prometheus.MustNewConstMetric( + topicPartitionInSyncReplicas, prometheus.GaugeValue, float64(len(inSyncReplicas)), topic, strconv.FormatInt(int64(partition), 10), + ) + } + + if broker != nil && replicas != nil && len(replicas) > 0 && broker.ID() == replicas[0] { + ch <- prometheus.MustNewConstMetric( + topicPartitionUsesPreferredReplica, prometheus.GaugeValue, float64(1), topic, strconv.FormatInt(int64(partition), 10), + ) + } else { + ch <- prometheus.MustNewConstMetric( + topicPartitionUsesPreferredReplica, prometheus.GaugeValue, float64(0), topic, strconv.FormatInt(int64(partition), 10), + ) + } + + if replicas != nil && inSyncReplicas != nil && len(inSyncReplicas) < len(replicas) { + ch <- prometheus.MustNewConstMetric( + topicUnderReplicatedPartition, prometheus.GaugeValue, float64(1), topic, strconv.FormatInt(int64(partition), 10), + ) + } else { + ch <- prometheus.MustNewConstMetric( + topicUnderReplicatedPartition, prometheus.GaugeValue, float64(0), topic, strconv.FormatInt(int64(partition), 10), + ) + } + + if e.useZooKeeperLag { + ConsumerGroups, err := e.zookeeperClient.Consumergroups() + + if err != nil { + level.Error(e.logger).Log("msg", "Error getting consumergroups from ZooKeeper", "err", err.Error()) + } + + for _, group := range ConsumerGroups { + offset, _ := group.FetchOffset(topic, partition) + if offset > 0 { + + consumerGroupLag := currentOffset - offset + ch <- prometheus.MustNewConstMetric( + consumergroupUncommittedOffsetsZookeeper, prometheus.GaugeValue, float64(consumerGroupLag), group.Name, topic, strconv.FormatInt(int64(partition), 10), + ) + } + } + } + } + } +} + +func (e *Exporter) metricsForConsumerGroup(broker *sarama.Broker, ch chan<- prometheus.Metric) { + level.Debug(e.logger).Log("msg", "Fetching consumer group metrics for broker", "broker", broker.ID()) + if err := broker.Open(e.client.Config()); err != nil && err != sarama.ErrAlreadyConnected { + level.Error(e.logger).Log("msg", "Error connecting to broker", "broker", broker.ID(), "err", err.Error()) + return + } + defer broker.Close() + + level.Debug(e.logger).Log("msg", "listing consumergroups for broker", "broker", broker.ID()) + groups, err := broker.ListGroups(&sarama.ListGroupsRequest{}) + if err != nil { + level.Error(e.logger).Log("msg", "Error listing consumergroups for broker", "broker", broker.ID(), "err", err.Error()) + return + } + groupIds := make([]string, 0) + for groupId := range groups.Groups { + if e.groupFilter.MatchString(groupId) { + groupIds = append(groupIds, groupId) + } + } + level.Debug(e.logger).Log("msg", "describing consumergroups for broker", "broker", broker.ID()) + describeGroups, err := broker.DescribeGroups(&sarama.DescribeGroupsRequest{Groups: groupIds}) + if err != nil { + level.Error(e.logger).Log("msg", "Error from broker.DescribeGroups()", "err", err.Error()) + return + } + for _, group := range describeGroups.Groups { + offsetFetchRequest := sarama.OffsetFetchRequest{ConsumerGroup: group.GroupId, Version: e.fetchOffsetVersion()} + if !e.consumerGroupFetchAll { + //TODO: currently this will never add partitions to the request since the only place insertions to the table are done is further down in this method + for topic, partitions := range e.consumerGroupLagTable.iMap[group.GroupId] { + for partition := range partitions { + offsetFetchRequest.AddPartition(topic, partition) + } + } + } + ch <- prometheus.MustNewConstMetric( + consumergroupMembers, prometheus.GaugeValue, float64(len(group.Members)), group.GroupId, + ) + level.Debug(e.logger).Log("msg", "fetching offsets for broker/group", "broker", broker.ID(), "group", group.GroupId) + if offsetFetchResponse, err := broker.FetchOffset(&offsetFetchRequest); err != nil { + level.Error(e.logger).Log("msg", "Error fetching offset for consumergroup", "group", group.GroupId, "err", err.Error()) + } else { + for topic, partitions := range offsetFetchResponse.Blocks { + if !e.topicFilter.MatchString(topic) { + continue + } + // If the topic is not consumed by that consumer group, skip it + topicConsumed := false + for _, offsetFetchResponseBlock := range partitions { + // Kafka will return -1 if there is no offset associated with a topic-partition under that consumer group + if offsetFetchResponseBlock.Offset != -1 { + topicConsumed = true + break + } + } + if topicConsumed { + var currentOffsetSum int64 + var lagSum int64 + for partition, offsetFetchResponseBlock := range partitions { + kerr := offsetFetchResponseBlock.Err + if kerr != sarama.ErrNoError { + level.Error(e.logger).Log("msg", "Error in response block for topic/partition", "topic", topic, "partition", partition, "err", kerr.Error()) + continue + } + currentOffset := offsetFetchResponseBlock.Offset + currentOffsetSum += currentOffset + + ch <- prometheus.MustNewConstMetric( + consumergroupCurrentOffset, prometheus.GaugeValue, float64(currentOffset), group.GroupId, topic, strconv.FormatInt(int64(partition), 10), + ) + e.mu.Lock() + // Get and insert the next offset to be produced into the interpolation map + nextOffset, err := e.client.GetOffset(topic, partition, sarama.OffsetNewest) + if err != nil { + level.Error(e.logger).Log("msg", "Error getting next offset for topic/partition", "topic", topic, "partition", partition, "err", err.Error()) + } + e.consumerGroupLagTable.createOrUpdate(group.GroupId, topic, partition, nextOffset) + + // If the topic is consumed by that consumer group, but no offset associated with the partition + // forcing lag to -1 to be able to alert on that + var lag int64 + if currentOffset == -1 { + lag = -1 + } else { + lag = nextOffset - currentOffset + lagSum += lag + } + e.mu.Unlock() + ch <- prometheus.MustNewConstMetric( + consumergroupUncomittedOffsets, prometheus.GaugeValue, float64(lag), group.GroupId, topic, strconv.FormatInt(int64(partition), 10), + ) + } + ch <- prometheus.MustNewConstMetric( + consumergroupCurrentOffsetSum, prometheus.GaugeValue, float64(currentOffsetSum), group.GroupId, topic, + ) + ch <- prometheus.MustNewConstMetric( + consumergroupUncommittedOffsetsSum, prometheus.GaugeValue, float64(lagSum), group.GroupId, topic, + ) + } + } + } + } +} + +func (e *Exporter) metricsForLag(ch chan<- prometheus.Metric) { + + admin, err := sarama.NewClusterAdminFromClient(e.client) + if err != nil { + level.Error(e.logger).Log("msg", "Error creating cluster admin", "err", err.Error()) + return + } + if admin == nil { + level.Error(e.logger).Log("msg", "Failed to create cluster admin") + return + } + + // Iterate over all consumergroup/topic/partitions + e.consumerGroupLagTable.mu.Lock() + for group, topics := range e.consumerGroupLagTable.iMap { + for topic, partitionMap := range topics { + var partitionKeys []int32 + // Collect partitions to create ListConsumerGroupOffsets request + for key := range partitionMap { + partitionKeys = append(partitionKeys, key) + } + + // response.Blocks is a map of topic to partition to offset + response, err := admin.ListConsumerGroupOffsets(group, map[string][]int32{ + topic: partitionKeys, + }) + if err != nil { + level.Error(e.logger).Log("msg", "Error listing offsets for", "group", group, "err", err.Error()) + } + if response == nil { + level.Error(e.logger).Log("msg", "Got nil response from ListConsumerGroupOffsets for group", "group", group) + continue + } + + for partition, offsets := range partitionMap { + if len(offsets) < 2 { + level.Debug(e.logger).Log("msg", "Insufficient data for lag calculation for group: continuing", "group", group) + continue + } + if latestConsumedOffset, ok := response.Blocks[topic][partition]; ok { + /* + Sort offset keys so we know if we have an offset to use as a left bound in our calculation + If latestConsumedOffset < smallestMappedOffset then extrapolate + Else Find two offsets that bound latestConsumedOffset + */ + var producedOffsets []int64 + for offsetKey := range offsets { + producedOffsets = append(producedOffsets, offsetKey) + } + sort.Slice(producedOffsets, func(i, j int) bool { return producedOffsets[i] < producedOffsets[j] }) + if latestConsumedOffset.Offset < producedOffsets[0] { + level.Debug(e.logger).Log("msg", "estimating lag for group/topic/partition", "group", group, "topic", topic, "partition", partition, "method", "extrapolation") + // Because we do not have data points that bound the latestConsumedOffset we must use extrapolation + highestOffset := producedOffsets[len(producedOffsets)-1] + lowestOffset := producedOffsets[0] + + px := float64(offsets[highestOffset].UnixNano()/1000000) - + float64(highestOffset-latestConsumedOffset.Offset)* + float64((offsets[highestOffset].Sub(offsets[lowestOffset])).Milliseconds())/float64(highestOffset-lowestOffset) + lagMillis := float64(time.Now().UnixNano()/1000000) - px + level.Debug(e.logger).Log("msg", "estimated lag for group/topic/partition (in ms)", "group", group, "topic", topic, "partition", partition, "lag", lagMillis) + + ch <- prometheus.MustNewConstMetric(lagDatapointUsedExtrapolation, prometheus.CounterValue, 1, group, topic, strconv.FormatInt(int64(partition), 10)) + ch <- prometheus.MustNewConstMetric(topicPartitionLagMillis, prometheus.GaugeValue, lagMillis, group, topic, strconv.FormatInt(int64(partition), 10)) + + } else { + level.Debug(e.logger).Log("msg", "estimating lag for group/topic/partition", "group", group, "topic", topic, "partition", partition, "method", "interpolation") + nextHigherOffset := getNextHigherOffset(producedOffsets, latestConsumedOffset.Offset) + nextLowerOffset := getNextLowerOffset(producedOffsets, latestConsumedOffset.Offset) + px := float64(offsets[nextHigherOffset].UnixNano()/1000000) - + float64(nextHigherOffset-latestConsumedOffset.Offset)* + float64((offsets[nextHigherOffset].Sub(offsets[nextLowerOffset])).Milliseconds())/float64(nextHigherOffset-nextLowerOffset) + lagMillis := float64(time.Now().UnixNano()/1000000) - px + level.Debug(e.logger).Log("msg", "estimated lag for group/topic/partition (in ms)", "group", group, "topic", topic, "partition", partition, "lag", lagMillis) + ch <- prometheus.MustNewConstMetric(lagDatapointUsedInterpolation, prometheus.CounterValue, 1, group, topic, strconv.FormatInt(int64(partition), 10)) + ch <- prometheus.MustNewConstMetric(topicPartitionLagMillis, prometheus.GaugeValue, lagMillis, group, topic, strconv.FormatInt(int64(partition), 10)) + } + } else { + level.Error(e.logger).Log("msg", "Could not get latest latest consumed offset", "group", group, "topic", topic, "partition", partition) + } + } + } + } + e.consumerGroupLagTable.mu.Unlock() +} + +func getNextHigherOffset(offsets []int64, k int64) int64 { + index := len(offsets) - 1 + max := offsets[index] + + for max >= k && index > 0 { + if offsets[index-1] < k { + return max + } + max = offsets[index] + index-- + } + return max +} + +func getNextLowerOffset(offsets []int64, k int64) int64 { + index := 0 + min := offsets[index] + for min <= k && index < len(offsets)-1 { + if offsets[index+1] > k { + return min + } + min = offsets[index] + index++ + } + return min +} + +//Run iMap.Prune() on an interval (default 30 seconds). A new client is created +//to avoid an issue where the client may be closed before Prune attempts to +//use it. +func (e *Exporter) RunPruner(quit chan struct{}) { + ticker := time.NewTicker(time.Duration(e.kafkaOpts.PruneIntervalSeconds) * time.Second) + + for { + select { + case <-ticker.C: + client, err := sarama.NewClient(e.kafkaOpts.Uri, e.saramaConfig) + if err != nil { + level.Error(e.logger).Log("msg", "Error initializing kafka client for RunPruner", "err", err.Error()) + return + } + e.consumerGroupLagTable.Prune(e.logger, client, e.kafkaOpts.MaxOffsets) + client.Close() + case <-quit: + ticker.Stop() + return + } + } +} + +func (e *Exporter) Close() { + e.client.Close() +} + +func (e *Exporter) initializeMetrics() { + labels := make(map[string]string) + + // Protect against empty labels + if e.kafkaOpts.Labels != "" { + for _, label := range strings.Split(e.kafkaOpts.Labels, ",") { + splitLabels := strings.Split(label, "=") + if len(splitLabels) >= 2 { + labels[splitLabels[0]] = splitLabels[1] + } + } + } + + clusterBrokers = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "", "brokers"), + "Number of Brokers in the Kafka Cluster.", + nil, labels, + ) + topicPartitions = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "topic", "partitions"), + "Number of partitions for this Topic", + []string{"topic"}, labels, + ) + topicCurrentOffset = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "topic", "partition_current_offset"), + "Current Offset of a Broker at Topic/Partition", + []string{"topic", "partition"}, labels, + ) + topicOldestOffset = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "topic", "partition_oldest_offset"), + "Oldest Offset of a Broker at Topic/Partition", + []string{"topic", "partition"}, labels, + ) + + topicPartitionLeader = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "topic", "partition_leader"), + "Leader Broker ID of this Topic/Partition", + []string{"topic", "partition"}, labels, + ) + + topicPartitionReplicas = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "topic", "partition_replicas"), + "Number of Replicas for this Topic/Partition", + []string{"topic", "partition"}, labels, + ) + + topicPartitionInSyncReplicas = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "topic", "partition_in_sync_replica"), + "Number of In-Sync Replicas for this Topic/Partition", + []string{"topic", "partition"}, labels, + ) + + topicPartitionUsesPreferredReplica = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "topic", "partition_leader_is_preferred"), + "1 if Topic/Partition is using the Preferred Broker", + []string{"topic", "partition"}, labels, + ) + + topicUnderReplicatedPartition = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "topic", "partition_under_replicated_partition"), + "1 if Topic/Partition is under Replicated", + []string{"topic", "partition"}, labels, + ) + + consumergroupCurrentOffset = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "consumergroup", "current_offset"), + "Current Offset of a ConsumerGroup at Topic/Partition", + []string{"consumergroup", "topic", "partition"}, labels, + ) + + consumergroupCurrentOffsetSum = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "consumergroup", "current_offset_sum"), + "Current Offset of a ConsumerGroup at Topic for all partitions", + []string{"consumergroup", "topic"}, labels, + ) + + consumergroupUncomittedOffsets = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "consumergroup", "uncommitted_offsets"), + "Current Approximate count of uncommitted offsets for a ConsumerGroup at Topic/Partition", + []string{"consumergroup", "topic", "partition"}, labels, + ) + + consumergroupUncommittedOffsetsZookeeper = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "consumergroupzookeeper", "uncommitted_offsets_zookeeper"), + "Current Approximate count of uncommitted offsets(zookeeper) for a ConsumerGroup at Topic/Partition", + []string{"consumergroup", "topic", "partition"}, nil, + ) + + consumergroupUncommittedOffsetsSum = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "consumergroup", "uncommitted_offsets_sum"), + "Current Approximate count of uncommitted offsets for a ConsumerGroup at Topic for all partitions", + []string{"consumergroup", "topic"}, labels, + ) + + consumergroupMembers = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "consumergroup", "members"), + "Amount of members in a consumer group", + []string{"consumergroup"}, labels, + ) + + topicPartitionLagMillis = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "consumer_lag", "millis"), + "Current approximation of consumer lag for a ConsumerGroup at Topic/Partition", + []string{"consumergroup", "topic", "partition"}, + labels, + ) + + lagDatapointUsedInterpolation = prometheus.NewDesc(prometheus.BuildFQName(namespace, "consumer_lag", "interpolation"), + "Indicates that a consumer group lag estimation used interpolation", + []string{"consumergroup", "topic", "partition"}, + labels, + ) + + lagDatapointUsedExtrapolation = prometheus.NewDesc(prometheus.BuildFQName(namespace, "consumer_lag", "extrapolation"), + "Indicates that a consumer group lag estimation used extrapolation", + []string{"consumergroup", "topic", "partition"}, + labels, + ) +} diff --git a/inputs/kafka/exporter/interpolation_map.go b/inputs/kafka/exporter/interpolation_map.go new file mode 100644 index 00000000..c014914e --- /dev/null +++ b/inputs/kafka/exporter/interpolation_map.go @@ -0,0 +1,115 @@ +package exporter + +import ( + "sort" + "sync" + "time" + + "github.com/Shopify/sarama" + "github.com/go-kit/log" + "github.com/go-kit/log/level" +) + +type interpolationMap struct { + iMap map[string]map[string]map[int32]map[int64]time.Time + mu sync.Mutex +} + +// Prune removes any entries from the Interpolation map that are not returned by the +// ClusterAdmin. An example would be when a consumer group or topic has been deleted +// from the cluster, the Interpolation map may still have cached offsets. Any partition +// that contains more offset entries than maxNumberOfOffsets will have the oldest +// offsets pruned +func (i *interpolationMap) Prune(logger log.Logger, client sarama.Client, maxOffsets int) { + level.Debug(logger).Log("msg", "pruning iMap data", "maxOffsets", maxOffsets) + if i.iMap == nil { + level.Info(logger).Log("msg", "Interpolation map is nil, nothing to prune") + return + } + admin, err := sarama.NewClusterAdminFromClient(client) + if err != nil { + level.Error(logger).Log("msg", "Error creating cluster admin", "err", err.Error()) + } + if admin == nil { + level.Error(logger).Log("msg", "Failed to create cluster admin") + return + } + + defer admin.Close() + + groupsMap, err := admin.ListConsumerGroups() + groupKeys := make([]string, len(groupsMap)) + for group, _ := range groupsMap { + groupKeys = append(groupKeys, group) + } + + topicsMap, err := admin.ListTopics() + topicKeys := make([]string, len(topicsMap)) + for topic, _ := range topicsMap { + topicKeys = append(topicKeys, topic) + } + + i.mu.Lock() + level.Debug(logger).Log("msg", "iMap locked for pruning") + start := time.Now() + + for group, _ := range i.iMap { + if !contains(groupKeys, group) { + delete(i.iMap, group) + continue + } + for topic, partitions := range i.iMap[group] { + if !contains(topicKeys, topic) { + delete(i.iMap[group], topic) + continue + } + for partition, offsets := range partitions { + if len(offsets) > maxOffsets { + offsetKeys := make([]int64, len(offsets)) + for offset, _ := range offsets { + offsetKeys = append(offsetKeys, offset) + } + sort.Slice(offsetKeys, func(i, j int) bool { return offsetKeys[i] < offsetKeys[j] }) + offsetKeys = offsetKeys[0 : len(offsetKeys)-maxOffsets] + level.Debug(logger).Log("msg", "pruning offsets", "count", len(offsetKeys), "group", group, "topic", topic, "partition", partition) + for _, offsetToRemove := range offsetKeys { + delete(i.iMap[group][topic][partition], offsetToRemove) + } + } + } + } + } + level.Debug(logger).Log("msg", "pruning complete", "duration", time.Since(start).String()) + i.mu.Unlock() +} + +// Lazily create the interpolation map as we see new group/topic/partition/offset +func (i *interpolationMap) createOrUpdate(group, topic string, partition int32, offset int64) { + i.mu.Lock() + if i.iMap == nil { + i.iMap = make(map[string]map[string]map[int32]map[int64]time.Time) + } + if fetchedGroup, ok := i.iMap[group]; ok { + if fetchedTopic, ok := fetchedGroup[topic]; ok { + if fetchedPartition, ok := fetchedTopic[partition]; ok { + fetchedPartition[offset] = time.Now() + } else { + fetchedTopic[partition] = make(map[int64]time.Time) + } + } else { + fetchedGroup[topic] = make(map[int32]map[int64]time.Time) + } + } else { + i.iMap[group] = make(map[string]map[int32]map[int64]time.Time) + } + i.mu.Unlock() +} + +func contains(keys []string, v string) bool { + for _, k := range keys { + if k == v { + return true + } + } + return false +} diff --git a/inputs/kafka/exporter/scram_client.go b/inputs/kafka/exporter/scram_client.go new file mode 100644 index 00000000..8404c235 --- /dev/null +++ b/inputs/kafka/exporter/scram_client.go @@ -0,0 +1,36 @@ +package exporter + +import ( + "crypto/sha256" + "crypto/sha512" + "hash" + + "github.com/xdg/scram" +) + +var SHA256 scram.HashGeneratorFcn = func() hash.Hash { return sha256.New() } +var SHA512 scram.HashGeneratorFcn = func() hash.Hash { return sha512.New() } + +type XDGSCRAMClient struct { + *scram.Client + *scram.ClientConversation + scram.HashGeneratorFcn +} + +func (x *XDGSCRAMClient) Begin(userName, password, authzID string) (err error) { + x.Client, err = x.HashGeneratorFcn.NewClient(userName, password, authzID) + if err != nil { + return err + } + x.ClientConversation = x.Client.NewConversation() + return nil +} + +func (x *XDGSCRAMClient) Step(challenge string) (response string, err error) { + response, err = x.ClientConversation.Step(challenge) + return +} + +func (x *XDGSCRAMClient) Done() bool { + return x.ClientConversation.Done() +} diff --git a/inputs/kafka/kafka.go b/inputs/kafka/kafka.go new file mode 100644 index 00000000..8ccc0c48 --- /dev/null +++ b/inputs/kafka/kafka.go @@ -0,0 +1,260 @@ +package tpl + +import ( + "fmt" + "log" + "os" + "strings" + "sync" + "sync/atomic" + + "flashcat.cloud/categraf/config" + "flashcat.cloud/categraf/inputs" + "flashcat.cloud/categraf/inputs/kafka/exporter" + "flashcat.cloud/categraf/types" + "github.com/Shopify/sarama" + "github.com/go-kit/log/level" + "github.com/toolkits/pkg/container/list" + + klog "github.com/go-kit/log" +) + +const inputName = "kafka" + +type Kafka struct { + config.Interval + counter uint64 + waitgrp sync.WaitGroup + Instances []*Instance `toml:"instances"` +} + +func init() { + inputs.Add(inputName, func() inputs.Input { + return &Kafka{} + }) +} + +func (r *Kafka) Prefix() string { + return "" +} + +func (r *Kafka) Init() error { + if len(r.Instances) == 0 { + return types.ErrInstancesEmpty + } + + for i := 0; i < len(r.Instances); i++ { + if err := r.Instances[i].Init(); err != nil { + return err + } + } + + return nil +} + +func (r *Kafka) Drop() { + for _, i := range r.Instances { + if i == nil { + continue + } + + if i.e != nil { + i.e.Close() + } + } +} + +func (r *Kafka) Gather(slist *list.SafeList) { + atomic.AddUint64(&r.counter, 1) + + for i := range r.Instances { + ins := r.Instances[i] + + r.waitgrp.Add(1) + go func(slist *list.SafeList, ins *Instance) { + defer r.waitgrp.Done() + + if ins.IntervalTimes > 0 { + counter := atomic.LoadUint64(&r.counter) + if counter%uint64(ins.IntervalTimes) != 0 { + return + } + } + + ins.gatherOnce(slist) + }(slist, ins) + } + + r.waitgrp.Wait() +} + +type Instance struct { + Labels map[string]string `toml:"labels"` + IntervalTimes int64 `toml:"interval_times"` + LogLevel string `toml:"log_level"` + + // Address (host:port) of Kafka server. + KafkaURIs []string `toml:"kafka_uris,omitempty"` + + // Connect using SASL/PLAIN + UseSASL bool `toml:"use_sasl,omitempty"` + + // Only set this to false if using a non-Kafka SASL proxy + UseSASLHandshake *bool `toml:"use_sasl_handshake,omitempty"` + + // SASL user name + SASLUsername string `toml:"sasl_username,omitempty"` + + // SASL user password + SASLPassword string `toml:"sasl_password,omitempty"` + + // The SASL SCRAM SHA algorithm sha256 or sha512 as mechanism + SASLMechanism string `toml:"sasl_mechanism,omitempty"` + + // Connect using TLS + UseTLS bool `toml:"use_tls,omitempty"` + + // The optional certificate authority file for TLS client authentication + CAFile string `toml:"ca_file,omitempty"` + + // The optional certificate file for TLS client authentication + CertFile string `toml:"cert_file,omitempty"` + + // The optional key file for TLS client authentication + KeyFile string `toml:"key_file,omitempty"` + + // If true, the server's certificate will not be checked for validity. This will make your HTTPS connections insecure + InsecureSkipVerify bool `toml:"insecure_skip_verify,omitempty"` + + // Kafka broker version + KafkaVersion string `toml:"kafka_version,omitempty"` + + // if you need to use a group from zookeeper + UseZooKeeperLag bool `toml:"use_zookeeper_lag,omitempty"` + + // Address array (hosts) of zookeeper server. + ZookeeperURIs []string `toml:"zookeeper_uris,omitempty"` + + // Metadata refresh interval + MetadataRefreshInterval string `toml:"metadata_refresh_interval,omitempty"` + + // If true, all scrapes will trigger kafka operations otherwise, they will share results. WARN: This should be disabled on large clusters + AllowConcurrent *bool `toml:"allow_concurrency,omitempty"` + + // Maximum number of offsets to store in the interpolation table for a partition + MaxOffsets int `toml:"max_offsets,omitempty"` + + // How frequently should the interpolation table be pruned, in seconds + PruneIntervalSeconds int `toml:"prune_interval_seconds,omitempty"` + + // Regex filter for topics to be monitored + TopicsFilter string `toml:"topics_filter_regex,omitempty"` + + // Regex filter for consumer groups to be monitored + GroupFilter string `toml:"groups_filter_regex,omitempty"` + + l klog.Logger `toml:"-"` + e *exporter.Exporter `toml:"-"` +} + +func (ins *Instance) Init() error { + if len(ins.KafkaURIs) == 0 || ins.KafkaURIs[0] == "" { + return fmt.Errorf("kafka_uris must be specified") + } + if ins.UseTLS && (ins.CertFile == "" || ins.KeyFile == "") { + return fmt.Errorf("tls is enabled but key pair was not provided") + } + if ins.UseSASL && (ins.SASLPassword == "" || ins.SASLUsername == "") { + return fmt.Errorf("SASL is enabled but username or password was not provided") + } + if ins.UseZooKeeperLag && (len(ins.ZookeeperURIs) == 0 || ins.ZookeeperURIs[0] == "") { + return fmt.Errorf("zookeeper lag is enabled but no zookeeper uri was provided") + } + + // default options + if ins.UseSASLHandshake == nil { + flag := true + ins.UseSASLHandshake = &flag + } + if len(ins.KafkaVersion) == 0 { + ins.KafkaVersion = sarama.V2_0_0_0.String() + } + if len(ins.MetadataRefreshInterval) == 0 { + ins.MetadataRefreshInterval = "1s" + } + if ins.AllowConcurrent == nil { + flag := true + ins.AllowConcurrent = &flag + } + if ins.MaxOffsets <= 0 { + ins.MaxOffsets = 1000 + } + if ins.PruneIntervalSeconds <= 0 { + ins.PruneIntervalSeconds = 30 + } + if len(ins.TopicsFilter) == 0 { + ins.TopicsFilter = ".*" + } + if len(ins.GroupFilter) == 0 { + ins.GroupFilter = ".*" + } + + options := exporter.Options{ + Uri: ins.KafkaURIs, + UseSASL: ins.UseSASL, + UseSASLHandshake: *ins.UseSASLHandshake, + SaslUsername: ins.SASLUsername, + SaslPassword: string(ins.SASLPassword), + SaslMechanism: ins.SASLMechanism, + UseTLS: ins.UseTLS, + TlsCAFile: ins.CAFile, + TlsCertFile: ins.CertFile, + TlsKeyFile: ins.KeyFile, + TlsInsecureSkipTLSVerify: ins.InsecureSkipVerify, + KafkaVersion: ins.KafkaVersion, + UseZooKeeperLag: ins.UseZooKeeperLag, + UriZookeeper: ins.ZookeeperURIs, + MetadataRefreshInterval: ins.MetadataRefreshInterval, + AllowConcurrent: *ins.AllowConcurrent, + MaxOffsets: ins.MaxOffsets, + PruneIntervalSeconds: ins.PruneIntervalSeconds, + } + + encLabels := []string{} + for k, v := range ins.Labels { + encLabels = append(encLabels, fmt.Sprintf("%s=%s", k, v)) + } + options.Labels = strings.Join(encLabels, ",") + + ins.l = level.NewFilter(klog.NewLogfmtLogger(klog.NewSyncWriter(os.Stderr)), levelFilter(ins.LogLevel)) + + e, err := exporter.New(ins.l, options, ins.TopicsFilter, ins.GroupFilter) + if err != nil { + return fmt.Errorf("could not instantiate kafka lag exporter: %w", err) + } + + ins.e = e + return nil +} + +func (ins *Instance) gatherOnce(slist *list.SafeList) { + err := inputs.Collect(ins.e, slist) + if err != nil { + log.Println("E! failed to collect metrics:", err) + } +} + +func levelFilter(l string) level.Option { + switch l { + case "debug": + return level.AllowDebug() + case "info": + return level.AllowInfo() + case "warn": + return level.AllowWarn() + case "error": + return level.AllowError() + default: + return level.AllowAll() + } +} diff --git a/inputs/kernel/kernel.go b/inputs/kernel/kernel.go index 33177db6..2090f9ba 100644 --- a/inputs/kernel/kernel.go +++ b/inputs/kernel/kernel.go @@ -117,7 +117,7 @@ func (s *KernelStats) Gather(slist *list.SafeList) { } } - inputs.PushSamples(slist, fields) + types.PushSamples(slist, fields) } func (s *KernelStats) getProcStat() ([]byte, error) { diff --git a/inputs/kernel_vmstat/kernel_vmstat.go b/inputs/kernel_vmstat/kernel_vmstat.go index 693e6071..12463168 100644 --- a/inputs/kernel_vmstat/kernel_vmstat.go +++ b/inputs/kernel_vmstat/kernel_vmstat.go @@ -73,7 +73,7 @@ func (s *KernelVmstat) Gather(slist *list.SafeList) { } } - inputs.PushSamples(slist, fields) + types.PushSamples(slist, fields) } func (s *KernelVmstat) getProcVmstat() ([]byte, error) { diff --git a/inputs/kubernetes/kubernetes.go b/inputs/kubernetes/kubernetes.go index 0bddaef5..875f937b 100644 --- a/inputs/kubernetes/kubernetes.go +++ b/inputs/kubernetes/kubernetes.go @@ -146,11 +146,11 @@ func (ins *Instance) gatherOnce(slist *list.SafeList) { err := ins.LoadJSON(urlpath, summaryMetrics) if err != nil { log.Println("E! failed to load", urlpath, "error:", err) - slist.PushFront(inputs.NewSample("kubelet_up", 0, ins.Labels)) + slist.PushFront(types.NewSample("kubelet_up", 0, ins.Labels)) return } - slist.PushFront(inputs.NewSample("kubelet_up", 1, ins.Labels)) + slist.PushFront(types.NewSample("kubelet_up", 1, ins.Labels)) podInfos, err := ins.gatherPodInfo(ins.URL) if err != nil { @@ -207,7 +207,7 @@ func (ins *Instance) buildPodMetrics(summaryMetrics *SummaryMetrics, podInfo []M fields["pod_container_logsfs_available_bytes"] = container.LogsFS.AvailableBytes fields["pod_container_logsfs_capacity_bytes"] = container.LogsFS.CapacityBytes fields["pod_container_logsfs_used_bytes"] = container.LogsFS.UsedBytes - inputs.PushSamples(slist, fields, tags, ins.Labels) + types.PushSamples(slist, fields, tags, ins.Labels) } } @@ -226,7 +226,7 @@ func (ins *Instance) buildPodMetrics(summaryMetrics *SummaryMetrics, podInfo []M fields["pod_volume_available_bytes"] = volume.AvailableBytes fields["pod_volume_capacity_bytes"] = volume.CapacityBytes fields["pod_volume_used_bytes"] = volume.UsedBytes - inputs.PushSamples(slist, fields, tags, ins.Labels) + types.PushSamples(slist, fields, tags, ins.Labels) } } @@ -244,7 +244,7 @@ func (ins *Instance) buildPodMetrics(summaryMetrics *SummaryMetrics, podInfo []M fields["pod_network_rx_errors"] = pod.Network.RXErrors fields["pod_network_tx_bytes"] = pod.Network.TXBytes fields["pod_network_tx_errors"] = pod.Network.TXErrors - inputs.PushSamples(slist, fields, tags, ins.Labels) + types.PushSamples(slist, fields, tags, ins.Labels) } } } @@ -269,7 +269,7 @@ func (ins *Instance) buildSystemContainerMetrics(summaryMetrics *SummaryMetrics, fields["system_container_logsfs_available_bytes"] = container.LogsFS.AvailableBytes fields["system_container_logsfs_capacity_bytes"] = container.LogsFS.CapacityBytes - inputs.PushSamples(slist, fields, tags, ins.Labels) + types.PushSamples(slist, fields, tags, ins.Labels) } } @@ -297,7 +297,7 @@ func (ins *Instance) buildNodeMetrics(summaryMetrics *SummaryMetrics, slist *lis fields["node_runtime_image_fs_capacity_bytes"] = summaryMetrics.Node.Runtime.ImageFileSystem.CapacityBytes fields["node_runtime_image_fs_used_bytes"] = summaryMetrics.Node.Runtime.ImageFileSystem.UsedBytes - inputs.PushSamples(slist, fields, tags, ins.Labels) + types.PushSamples(slist, fields, tags, ins.Labels) } func (ins *Instance) gatherPodInfo(baseURL string) ([]Metadata, error) { diff --git a/inputs/linux_sysctl_fs/linux_sysctl_fs_linux.go b/inputs/linux_sysctl_fs/linux_sysctl_fs_linux.go index 3412ed2e..2a1c1273 100644 --- a/inputs/linux_sysctl_fs/linux_sysctl_fs_linux.go +++ b/inputs/linux_sysctl_fs/linux_sysctl_fs_linux.go @@ -67,7 +67,7 @@ func (s *SysctlFS) Gather(slist *list.SafeList) { log.Println("E! failed to gather file-nr:", err) } - inputs.PushSamples(slist, fields) + types.PushSamples(slist, fields) } func (s *SysctlFS) gatherOne(name string, fields map[string]interface{}) error { diff --git a/inputs/mem/mem.go b/inputs/mem/mem.go index 536d7838..9e477721 100644 --- a/inputs/mem/mem.go +++ b/inputs/mem/mem.go @@ -7,6 +7,7 @@ import ( "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/inputs/system" + "flashcat.cloud/categraf/types" "github.com/toolkits/pkg/container/list" ) @@ -109,5 +110,5 @@ func (s *MemStats) Gather(slist *list.SafeList) { } } - inputs.PushSamples(slist, fields) + types.PushSamples(slist, fields) } diff --git a/inputs/mysql/binlog.go b/inputs/mysql/binlog.go index b569e8e5..94d31f8f 100644 --- a/inputs/mysql/binlog.go +++ b/inputs/mysql/binlog.go @@ -6,8 +6,8 @@ import ( "strconv" "strings" - "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/pkg/tagx" + "flashcat.cloud/categraf/types" "github.com/toolkits/pkg/container/list" ) @@ -66,11 +66,11 @@ func (m *MySQL) gatherBinlog(slist *list.SafeList, ins *Instance, db *sql.DB, gl } tags := tagx.Copy(globalTags) - slist.PushFront(inputs.NewSample("binlog_size_bytes", size, tags)) - slist.PushFront(inputs.NewSample("binlog_file_count", count, tags)) + slist.PushFront(types.NewSample("binlog_size_bytes", size, tags)) + slist.PushFront(types.NewSample("binlog_file_count", count, tags)) value, err := strconv.ParseFloat(strings.Split(filename, ".")[1], 64) if err == nil { - slist.PushFront(inputs.NewSample("binlog_file_number", value, tags)) + slist.PushFront(types.NewSample("binlog_file_number", value, tags)) } } diff --git a/inputs/mysql/custom_queries.go b/inputs/mysql/custom_queries.go index 433b2775..ff833724 100644 --- a/inputs/mysql/custom_queries.go +++ b/inputs/mysql/custom_queries.go @@ -8,9 +8,9 @@ import ( "sync" "time" - "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/pkg/conv" "flashcat.cloud/categraf/pkg/tagx" + "flashcat.cloud/categraf/types" "github.com/toolkits/pkg/container/list" ) @@ -93,10 +93,10 @@ func (m *MySQL) parseRow(row map[string]string, query QueryConfig, slist *list.S } if query.FieldToAppend == "" { - slist.PushFront(inputs.NewSample(query.Mesurement+"_"+column, value, labels)) + slist.PushFront(types.NewSample(query.Mesurement+"_"+column, value, labels)) } else { suffix := cleanName(row[query.FieldToAppend]) - slist.PushFront(inputs.NewSample(query.Mesurement+"_"+suffix+"_"+column, value, labels)) + slist.PushFront(types.NewSample(query.Mesurement+"_"+suffix+"_"+column, value, labels)) } } diff --git a/inputs/mysql/engine_innodb.go b/inputs/mysql/engine_innodb.go index e05e84b5..46f47e8d 100644 --- a/inputs/mysql/engine_innodb.go +++ b/inputs/mysql/engine_innodb.go @@ -7,8 +7,8 @@ import ( "strconv" "strings" - "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/pkg/tagx" + "flashcat.cloud/categraf/types" "github.com/toolkits/pkg/container/list" ) @@ -43,19 +43,19 @@ func (m *MySQL) gatherEngineInnodbStatus(slist *list.SafeList, ins *Instance, db if err != nil { continue } - slist.PushFront(inputs.NewSample("engine_innodb_queries_inside_innodb", value, tags)) + slist.PushFront(types.NewSample("engine_innodb_queries_inside_innodb", value, tags)) value, err = strconv.ParseFloat(data[2], 64) if err != nil { continue } - slist.PushFront(inputs.NewSample("engine_innodb_queries_in_queue", value, tags)) + slist.PushFront(types.NewSample("engine_innodb_queries_in_queue", value, tags)) } else if data := rViews.FindStringSubmatch(line); data != nil { value, err := strconv.ParseFloat(data[1], 64) if err != nil { continue } - slist.PushFront(inputs.NewSample("engine_innodb_read_views_open_inside_innodb", value, tags)) + slist.PushFront(types.NewSample("engine_innodb_read_views_open_inside_innodb", value, tags)) } } } diff --git a/inputs/mysql/engine_innodb_compute.go b/inputs/mysql/engine_innodb_compute.go index 0853adaa..a4616653 100644 --- a/inputs/mysql/engine_innodb_compute.go +++ b/inputs/mysql/engine_innodb_compute.go @@ -3,8 +3,8 @@ package mysql import ( "database/sql" - "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/pkg/tagx" + "flashcat.cloud/categraf/types" "github.com/toolkits/pkg/container/list" ) @@ -22,14 +22,14 @@ func (m *MySQL) gatherEngineInnodbStatusCompute(slist *list.SafeList, ins *Insta pageUtil = pageUsed / cache["innodb_buffer_pool_pages_total"] * 100 } - slist.PushFront(inputs.NewSample("global_status_buffer_pool_bytes", byteUsed, tags, map[string]string{"state": "used"})) - slist.PushFront(inputs.NewSample("global_status_buffer_pool_bytes", byteData, tags, map[string]string{"state": "data"})) - slist.PushFront(inputs.NewSample("global_status_buffer_pool_bytes", byteFree, tags, map[string]string{"state": "free"})) - slist.PushFront(inputs.NewSample("global_status_buffer_pool_bytes", byteTotal, tags, map[string]string{"state": "total"})) - slist.PushFront(inputs.NewSample("global_status_buffer_pool_bytes", byteDirty, tags, map[string]string{"state": "dirty"})) - slist.PushFront(inputs.NewSample("global_status_buffer_pool_pages_utilization", pageUtil, tags)) + slist.PushFront(types.NewSample("global_status_buffer_pool_bytes", byteUsed, tags, map[string]string{"state": "used"})) + slist.PushFront(types.NewSample("global_status_buffer_pool_bytes", byteData, tags, map[string]string{"state": "data"})) + slist.PushFront(types.NewSample("global_status_buffer_pool_bytes", byteFree, tags, map[string]string{"state": "free"})) + slist.PushFront(types.NewSample("global_status_buffer_pool_bytes", byteTotal, tags, map[string]string{"state": "total"})) + slist.PushFront(types.NewSample("global_status_buffer_pool_bytes", byteDirty, tags, map[string]string{"state": "dirty"})) + slist.PushFront(types.NewSample("global_status_buffer_pool_pages_utilization", pageUtil, tags)) if ins.ExtraInnodbMetrics { - slist.PushFront(inputs.NewSample("global_status_buffer_pool_pages", pageUsed, tags, map[string]string{"state": "used"})) + slist.PushFront(types.NewSample("global_status_buffer_pool_pages", pageUsed, tags, map[string]string{"state": "used"})) } } diff --git a/inputs/mysql/global_status.go b/inputs/mysql/global_status.go index 0f55ab37..ffba6f83 100644 --- a/inputs/mysql/global_status.go +++ b/inputs/mysql/global_status.go @@ -8,8 +8,8 @@ import ( "strings" "time" - "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/pkg/tagx" + "flashcat.cloud/categraf/types" "github.com/toolkits/pkg/container/list" ) @@ -62,42 +62,42 @@ func (m *MySQL) gatherGlobalStatus(slist *list.SafeList, ins *Instance, db *sql. match := globalStatusRE.FindStringSubmatch(key) if match == nil { - slist.PushFront(inputs.NewSample("global_status_"+key, floatVal, tags)) + slist.PushFront(types.NewSample("global_status_"+key, floatVal, tags)) continue } switch match[1] { case "com": // Total number of executed MySQL commands. - slist.PushFront(inputs.NewSample("global_status_commands_total", floatVal, tags, map[string]string{"command": match[2]})) + slist.PushFront(types.NewSample("global_status_commands_total", floatVal, tags, map[string]string{"command": match[2]})) case "handler": // Total number of executed MySQL handlers. - slist.PushFront(inputs.NewSample("global_status_handlers_total", floatVal, tags, map[string]string{"handler": match[2]})) + slist.PushFront(types.NewSample("global_status_handlers_total", floatVal, tags, map[string]string{"handler": match[2]})) case "connection_errors": // Total number of MySQL connection errors. - slist.PushFront(inputs.NewSample("global_status_connection_errors_total", floatVal, tags, map[string]string{"error": match[2]})) + slist.PushFront(types.NewSample("global_status_connection_errors_total", floatVal, tags, map[string]string{"error": match[2]})) case "innodb_buffer_pool_pages": switch match[2] { case "data", "free", "misc", "old", "total", "dirty": // Innodb buffer pool pages by state. - slist.PushFront(inputs.NewSample("global_status_buffer_pool_pages", floatVal, tags, map[string]string{"state": match[2]})) + slist.PushFront(types.NewSample("global_status_buffer_pool_pages", floatVal, tags, map[string]string{"state": match[2]})) default: // Innodb buffer pool page state changes. - slist.PushFront(inputs.NewSample("global_status_buffer_pool_page_changes_total", floatVal, tags, map[string]string{"operation": match[2]})) + slist.PushFront(types.NewSample("global_status_buffer_pool_page_changes_total", floatVal, tags, map[string]string{"operation": match[2]})) } case "innodb_rows": // Total number of MySQL InnoDB row operations. - slist.PushFront(inputs.NewSample("global_status_innodb_row_ops_total", floatVal, tags, map[string]string{"operation": match[2]})) + slist.PushFront(types.NewSample("global_status_innodb_row_ops_total", floatVal, tags, map[string]string{"operation": match[2]})) case "performance_schema": // Total number of MySQL instrumentations that could not be loaded or created due to memory constraints. - slist.PushFront(inputs.NewSample("global_status_performance_schema_lost_total", floatVal, tags, map[string]string{"instrumentation": match[2]})) + slist.PushFront(types.NewSample("global_status_performance_schema_lost_total", floatVal, tags, map[string]string{"instrumentation": match[2]})) } } } // mysql_galera_variables_info metric. if textItems["wsrep_local_state_uuid"] != "" { - slist.PushFront(inputs.NewSample("galera_status_info", 1, tags, map[string]string{ + slist.PushFront(types.NewSample("galera_status_info", 1, tags, map[string]string{ "wsrep_local_state_uuid": textItems["wsrep_local_state_uuid"], "wsrep_cluster_state_uuid": textItems["wsrep_cluster_state_uuid"], "wsrep_provider_version": textItems["wsrep_provider_version"], @@ -134,7 +134,7 @@ func (m *MySQL) gatherGlobalStatus(slist *list.SafeList, ins *Instance, db *sql. if evsParsingSuccess { for _, v := range evsMap { - slist.PushFront(inputs.NewSample("galera_evs_repl_latency_"+v.name, v.value, tags)) + slist.PushFront(types.NewSample("galera_evs_repl_latency_"+v.name, v.value, tags)) } } } diff --git a/inputs/mysql/global_variables.go b/inputs/mysql/global_variables.go index 89c9f288..29a85bd3 100644 --- a/inputs/mysql/global_variables.go +++ b/inputs/mysql/global_variables.go @@ -7,8 +7,8 @@ import ( "strconv" "strings" - "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/pkg/tagx" + "flashcat.cloud/categraf/types" "github.com/toolkits/pkg/container/list" ) @@ -59,12 +59,12 @@ func (m *MySQL) gatherGlobalVariables(slist *list.SafeList, ins *Instance, db *s continue } - slist.PushFront(inputs.NewSample("global_variables_"+key, floatVal, tags)) + slist.PushFront(types.NewSample("global_variables_"+key, floatVal, tags)) continue } } - slist.PushFront(inputs.NewSample("version_info", 1, tags, map[string]string{ + slist.PushFront(types.NewSample("version_info", 1, tags, map[string]string{ "version": textItems["version"], "innodb_version": textItems["innodb_version"], "version_comment": textItems["version_comment"], @@ -73,14 +73,14 @@ func (m *MySQL) gatherGlobalVariables(slist *list.SafeList, ins *Instance, db *s // mysql_galera_variables_info metric. // PXC/Galera variables information. if textItems["wsrep_cluster_name"] != "" { - slist.PushFront(inputs.NewSample("galera_variables_info", 1, tags, map[string]string{ + slist.PushFront(types.NewSample("galera_variables_info", 1, tags, map[string]string{ "wsrep_cluster_name": textItems["wsrep_cluster_name"], })) } // mysql_galera_gcache_size_bytes metric. if textItems["wsrep_provider_options"] != "" { - slist.PushFront(inputs.NewSample("galera_gcache_size_bytes", parseWsrepProviderOptions(textItems["wsrep_provider_options"]), tags)) + slist.PushFront(types.NewSample("galera_gcache_size_bytes", parseWsrepProviderOptions(textItems["wsrep_provider_options"]), tags)) } if textItems["transaction_isolation"] != "" || textItems["tx_isolation"] != "" { @@ -89,7 +89,7 @@ func (m *MySQL) gatherGlobalVariables(slist *list.SafeList, ins *Instance, db *s level = textItems["tx_isolation"] } - slist.PushFront(inputs.NewSample("transaction_isolation", 1, tags, map[string]string{"level": level})) + slist.PushFront(types.NewSample("transaction_isolation", 1, tags, map[string]string{"level": level})) } } diff --git a/inputs/mysql/mysql.go b/inputs/mysql/mysql.go index 09e4761e..49135206 100644 --- a/inputs/mysql/mysql.go +++ b/inputs/mysql/mysql.go @@ -215,12 +215,12 @@ func (m *MySQL) gatherOnce(slist *list.SafeList, ins *Instance) { // scrape use seconds defer func(begun time.Time) { use := time.Since(begun).Seconds() - slist.PushFront(inputs.NewSample("scrape_use_seconds", use, tags)) + slist.PushFront(types.NewSample("scrape_use_seconds", use, tags)) }(begun) db, err := sql.Open("mysql", ins.dsn) if err != nil { - slist.PushFront(inputs.NewSample("up", 0, tags)) + slist.PushFront(types.NewSample("up", 0, tags)) log.Println("E! failed to open mysql:", err) return } @@ -232,12 +232,12 @@ func (m *MySQL) gatherOnce(slist *list.SafeList, ins *Instance) { db.SetConnMaxLifetime(time.Minute) if err = db.Ping(); err != nil { - slist.PushFront(inputs.NewSample("up", 0, tags)) + slist.PushFront(types.NewSample("up", 0, tags)) log.Println("E! failed to ping mysql:", err) return } - slist.PushFront(inputs.NewSample("up", 1, tags)) + slist.PushFront(types.NewSample("up", 1, tags)) cache := make(map[string]float64) diff --git a/inputs/mysql/processlist.go b/inputs/mysql/processlist.go index 2fde3aa6..e95bcf1a 100644 --- a/inputs/mysql/processlist.go +++ b/inputs/mysql/processlist.go @@ -5,8 +5,8 @@ import ( "log" "strings" - "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/pkg/tagx" + "flashcat.cloud/categraf/types" "github.com/toolkits/pkg/container/list" ) @@ -131,7 +131,7 @@ func (m *MySQL) gatherProcesslistByState(slist *list.SafeList, ins *Instance, db } for s, c := range stateCounts { - slist.PushFront(inputs.NewSample("processlist_processes_by_state", c, labels, map[string]string{"state": s})) + slist.PushFront(types.NewSample("processlist_processes_by_state", c, labels, map[string]string{"state": s})) } } diff --git a/inputs/mysql/processlist_by_user.go b/inputs/mysql/processlist_by_user.go index bb4692ce..f56aca3e 100644 --- a/inputs/mysql/processlist_by_user.go +++ b/inputs/mysql/processlist_by_user.go @@ -4,8 +4,8 @@ import ( "database/sql" "log" - "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/pkg/tagx" + "flashcat.cloud/categraf/types" "github.com/toolkits/pkg/container/list" ) @@ -34,6 +34,6 @@ func (m *MySQL) gatherProcesslistByUser(slist *list.SafeList, ins *Instance, db return } - slist.PushFront(inputs.NewSample("processlist_processes_by_user", connections, labels, map[string]string{"user": user})) + slist.PushFront(types.NewSample("processlist_processes_by_user", connections, labels, map[string]string{"user": user})) } } diff --git a/inputs/mysql/schema_size.go b/inputs/mysql/schema_size.go index b37468a7..91edad0c 100644 --- a/inputs/mysql/schema_size.go +++ b/inputs/mysql/schema_size.go @@ -4,8 +4,8 @@ import ( "database/sql" "log" - "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/pkg/tagx" + "flashcat.cloud/categraf/types" "github.com/toolkits/pkg/container/list" ) @@ -34,6 +34,6 @@ func (m *MySQL) gatherSchemaSize(slist *list.SafeList, ins *Instance, db *sql.DB return } - slist.PushFront(inputs.NewSample("schema_size_bytes", size, labels, map[string]string{"schema": schema})) + slist.PushFront(types.NewSample("schema_size_bytes", size, labels, map[string]string{"schema": schema})) } } diff --git a/inputs/mysql/slave_status.go b/inputs/mysql/slave_status.go index 2cbd29d6..fb82f505 100644 --- a/inputs/mysql/slave_status.go +++ b/inputs/mysql/slave_status.go @@ -6,7 +6,7 @@ import ( "log" "strings" - "flashcat.cloud/categraf/inputs" + "flashcat.cloud/categraf/types" "github.com/toolkits/pkg/container/list" ) @@ -88,7 +88,7 @@ func (m *MySQL) gatherSlaveStatus(slist *list.SafeList, ins *Instance, db *sql.D } if value, ok := parseStatus(*scanArgs[i].(*sql.RawBytes)); ok { - slist.PushFront(inputs.NewSample("slave_status_"+key, value, globalTags, map[string]string{ + slist.PushFront(types.NewSample("slave_status_"+key, value, globalTags, map[string]string{ "master_host": masterHost, "master_uuid": masterUUID, "channel_name": channelName, diff --git a/inputs/mysql/table_size.go b/inputs/mysql/table_size.go index 62a8c6a8..9ffd5f19 100644 --- a/inputs/mysql/table_size.go +++ b/inputs/mysql/table_size.go @@ -4,8 +4,8 @@ import ( "database/sql" "log" - "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/pkg/tagx" + "flashcat.cloud/categraf/types" "github.com/toolkits/pkg/container/list" ) @@ -42,7 +42,7 @@ func (m *MySQL) gatherTableSize(slist *list.SafeList, ins *Instance, db *sql.DB, return } - slist.PushFront(inputs.NewSample("table_size_index_bytes", indexSize, labels, map[string]string{"schema": schema, "table": table})) - slist.PushFront(inputs.NewSample("table_size_data_bytes", dataSize, labels, map[string]string{"schema": schema, "table": table})) + slist.PushFront(types.NewSample("table_size_index_bytes", indexSize, labels, map[string]string{"schema": schema, "table": table})) + slist.PushFront(types.NewSample("table_size_data_bytes", dataSize, labels, map[string]string{"schema": schema, "table": table})) } } diff --git a/inputs/net/net.go b/inputs/net/net.go index 30aa5110..b70800f6 100644 --- a/inputs/net/net.go +++ b/inputs/net/net.go @@ -9,6 +9,7 @@ import ( "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/inputs/system" "flashcat.cloud/categraf/pkg/filter" + "flashcat.cloud/categraf/types" "github.com/toolkits/pkg/container/list" ) @@ -113,6 +114,6 @@ func (s *NetIOStats) Gather(slist *list.SafeList) { "drop_out": io.Dropout, } - inputs.PushSamples(slist, fields, tags) + types.PushSamples(slist, fields, tags) } } diff --git a/inputs/net_response/net_response.go b/inputs/net_response/net_response.go index fc0277ce..0820e2c6 100644 --- a/inputs/net_response/net_response.go +++ b/inputs/net_response/net_response.go @@ -168,7 +168,7 @@ func (ins *Instance) gather(slist *list.SafeList, target string) { defer func() { for field, value := range fields { - slist.PushFront(inputs.NewSample(field, value, labels)) + slist.PushFront(types.NewSample(field, value, labels)) } }() diff --git a/inputs/netstat/netstat.go b/inputs/netstat/netstat.go index c8eeeb8b..8f2caf54 100644 --- a/inputs/netstat/netstat.go +++ b/inputs/netstat/netstat.go @@ -7,6 +7,7 @@ import ( "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/inputs/system" + "flashcat.cloud/categraf/types" "github.com/toolkits/pkg/container/list" ) @@ -76,5 +77,5 @@ func (s *NetStats) Gather(slist *list.SafeList) { "udp_socket": counts["UDP"], } - inputs.PushSamples(slist, fields, tags) + types.PushSamples(slist, fields, tags) } diff --git a/inputs/nginx_upstream_check/nginx_upstream_check.go b/inputs/nginx_upstream_check/nginx_upstream_check.go index 59bc6b7f..accf59dc 100644 --- a/inputs/nginx_upstream_check/nginx_upstream_check.go +++ b/inputs/nginx_upstream_check/nginx_upstream_check.go @@ -247,7 +247,7 @@ func (ins *Instance) gather(slist *list.SafeList, target string) { "fall": server.Fall, } - inputs.PushSamples(slist, fields, tags, labels) + types.PushSamples(slist, fields, tags, labels) } } diff --git a/inputs/ntp/ntp.go b/inputs/ntp/ntp.go index e44f44e4..87413215 100644 --- a/inputs/ntp/ntp.go +++ b/inputs/ntp/ntp.go @@ -58,7 +58,7 @@ func (n *NTPStat) Gather(slist *list.SafeList) { duration := ((serverReciveTime.UnixNano() - orgTime.UnixNano()) + (serverTransmitTime.UnixNano() - dstTime.UnixNano())) / 2 delta := duration / 1e6 // convert to ms - slist.PushFront(inputs.NewSample("offset_ms", delta)) + slist.PushFront(types.NewSample("offset_ms", delta)) break } } diff --git a/inputs/nvidia_smi/nvidia_smi.go b/inputs/nvidia_smi/nvidia_smi.go index c7b91fe4..5efcd7e5 100644 --- a/inputs/nvidia_smi/nvidia_smi.go +++ b/inputs/nvidia_smi/nvidia_smi.go @@ -9,6 +9,7 @@ import ( "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/inputs" + "flashcat.cloud/categraf/types" "github.com/toolkits/pkg/container/list" ) @@ -63,16 +64,16 @@ func (s *GPUStats) Gather(slist *list.SafeList) { // scrape use seconds defer func(begun time.Time) { use := time.Since(begun).Seconds() - slist.PushFront(inputs.NewSample("scrape_use_seconds", use)) + slist.PushFront(types.NewSample("scrape_use_seconds", use)) }(begun) currentTable, err := scrape(s.qFields, s.NvidiaSmiCommand) if err != nil { - slist.PushFront(inputs.NewSample("scraper_up", 0)) + slist.PushFront(types.NewSample("scraper_up", 0)) return } - slist.PushFront(inputs.NewSample("scraper_up", 1)) + slist.PushFront(types.NewSample("scraper_up", 1)) for _, currentRow := range currentTable.rows { uuid := strings.TrimPrefix(strings.ToLower(currentRow.qFieldToCells[uuidQField].rawValue), "gpu-") @@ -82,7 +83,7 @@ func (s *GPUStats) Gather(slist *list.SafeList) { vBiosVersion := currentRow.qFieldToCells[vBiosVersionQField].rawValue driverVersion := currentRow.qFieldToCells[driverVersionQField].rawValue - slist.PushFront(inputs.NewSample("gpu_info", 1, map[string]string{ + slist.PushFront(types.NewSample("gpu_info", 1, map[string]string{ "uuid": uuid, "name": name, "driver_model_current": driverModelCurrent, @@ -102,7 +103,7 @@ func (s *GPUStats) Gather(slist *list.SafeList) { continue } - slist.PushFront(inputs.NewSample(metricInfo.metricName, num, map[string]string{"uuid": uuid})) + slist.PushFront(types.NewSample(metricInfo.metricName, num, map[string]string{"uuid": uuid})) } } } diff --git a/inputs/oracle/README.md b/inputs/oracle/README.md index 6b3dbf14..c11605d4 100644 --- a/inputs/oracle/README.md +++ b/inputs/oracle/README.md @@ -35,10 +35,10 @@ func (o *Oracle) parseRow(row map[string]string, metricConf MetricConfig, slist } if metricConf.FieldToAppend == "" { - slist.PushFront(inputs.NewSample(metricConf.Mesurement+"_"+column, value, labels)) + slist.PushFront(types.NewSample(metricConf.Mesurement+"_"+column, value, labels)) } else { suffix := cleanName(row[metricConf.FieldToAppend]) - slist.PushFront(inputs.NewSample(metricConf.Mesurement+"_"+suffix+"_"+column, value, labels)) + slist.PushFront(types.NewSample(metricConf.Mesurement+"_"+suffix+"_"+column, value, labels)) } } diff --git a/inputs/oracle/oracle_linux_amd64.go b/inputs/oracle/oracle_linux_amd64.go index dde0c7e4..ef69baf8 100644 --- a/inputs/oracle/oracle_linux_amd64.go +++ b/inputs/oracle/oracle_linux_amd64.go @@ -128,16 +128,16 @@ func (o *Oracle) gatherOnce(slist *list.SafeList, ins Instance) { defer func(begun time.Time) { use := time.Since(begun).Seconds() - slist.PushFront(inputs.NewSample("scrape_use_seconds", use, tags)) + slist.PushFront(types.NewSample("scrape_use_seconds", use, tags)) }(time.Now()) db := o.dbconnpool[ins.Address] if err := db.Ping(); err != nil { - slist.PushFront(inputs.NewSample("up", 0, tags)) + slist.PushFront(types.NewSample("up", 0, tags)) log.Println("E! failed to ping oracle:", ins.Address, "error:", err) } else { - slist.PushFront(inputs.NewSample("up", 1, tags)) + slist.PushFront(types.NewSample("up", 1, tags)) } waitMetrics := new(sync.WaitGroup) @@ -244,10 +244,10 @@ func (ins *Instance) parseRow(row map[string]string, metricConf MetricConfig, sl } if metricConf.FieldToAppend == "" { - slist.PushFront(inputs.NewSample(metricConf.Mesurement+"_"+column, value, labels)) + slist.PushFront(types.NewSample(metricConf.Mesurement+"_"+column, value, labels)) } else { suffix := cleanName(row[metricConf.FieldToAppend]) - slist.PushFront(inputs.NewSample(metricConf.Mesurement+"_"+suffix+"_"+column, value, labels)) + slist.PushFront(types.NewSample(metricConf.Mesurement+"_"+suffix+"_"+column, value, labels)) } } diff --git a/inputs/ping/ping.go b/inputs/ping/ping.go index dcd3e64c..0169de83 100644 --- a/inputs/ping/ping.go +++ b/inputs/ping/ping.go @@ -160,7 +160,7 @@ func (ins *Instance) gather(slist *list.SafeList, target string) { defer func() { for field, value := range fields { - slist.PushFront(inputs.NewSample(field, value, labels)) + slist.PushFront(types.NewSample(field, value, labels)) } }() diff --git a/inputs/processes/processes_notwindows.go b/inputs/processes/processes_notwindows.go index 4f85d8ea..1bbe2a04 100644 --- a/inputs/processes/processes_notwindows.go +++ b/inputs/processes/processes_notwindows.go @@ -17,6 +17,7 @@ import ( "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/pkg/osx" + "flashcat.cloud/categraf/types" "github.com/toolkits/pkg/container/list" ) @@ -70,7 +71,7 @@ func (p *Processes) Gather(slist *list.SafeList) { } } - inputs.PushSamples(slist, fields) + types.PushSamples(slist, fields) } // Gets empty fields of metrics based on the OS diff --git a/inputs/procstat/README.md b/inputs/procstat/README.md index f7134156..b4e2414e 100644 --- a/inputs/procstat/README.md +++ b/inputs/procstat/README.md @@ -35,16 +35,16 @@ func (ins *Instance) gatherCPU(slist *list.SafeList, procs map[PID]Process, tags if err == nil { if solarisMode { value += v / float64(runtime.NumCPU()) - slist.PushFront(inputs.NewSample("cpu_usage", v/float64(runtime.NumCPU()), map[string]string{"pid": fmt.Sprint(pid)}, tags)) + slist.PushFront(types.NewSample("cpu_usage", v/float64(runtime.NumCPU()), map[string]string{"pid": fmt.Sprint(pid)}, tags)) } else { value += v - slist.PushFront(inputs.NewSample("cpu_usage", v, map[string]string{"pid": fmt.Sprint(pid)}, tags)) + slist.PushFront(types.NewSample("cpu_usage", v, map[string]string{"pid": fmt.Sprint(pid)}, tags)) } } } if ins.GatherTotal { - slist.PushFront(inputs.NewSample("cpu_usage_total", value, tags)) + slist.PushFront(types.NewSample("cpu_usage_total", value, tags)) } } ``` diff --git a/inputs/procstat/procstat.go b/inputs/procstat/procstat.go index 878d2732..e914a0d2 100644 --- a/inputs/procstat/procstat.go +++ b/inputs/procstat/procstat.go @@ -139,11 +139,11 @@ func (s *Procstat) gatherOnce(slist *list.SafeList, ins *Instance) { if err != nil { log.Println("E! procstat: failed to lookup pids, search string:", ins.searchString, "error:", err) - slist.PushFront(inputs.NewSample("lookup_count", 0, tags)) + slist.PushFront(types.NewSample("lookup_count", 0, tags)) return } - slist.PushFront(inputs.NewSample("lookup_count", len(pids), tags)) + slist.PushFront(types.NewSample("lookup_count", len(pids), tags)) if len(pids) == 0 { return } @@ -210,13 +210,13 @@ func (ins *Instance) gatherThreads(slist *list.SafeList, procs map[PID]Process, if err == nil { val += v if ins.GatherPerPid { - slist.PushFront(inputs.NewSample("num_threads", val, map[string]string{"pid": fmt.Sprint(pid)}, tags)) + slist.PushFront(types.NewSample("num_threads", val, map[string]string{"pid": fmt.Sprint(pid)}, tags)) } } } if ins.GatherTotal { - slist.PushFront(inputs.NewSample("num_threads_total", val, tags)) + slist.PushFront(types.NewSample("num_threads_total", val, tags)) } } @@ -227,13 +227,13 @@ func (ins *Instance) gatherFD(slist *list.SafeList, procs map[PID]Process, tags if err == nil { val += v if ins.GatherPerPid { - slist.PushFront(inputs.NewSample("num_fds", val, map[string]string{"pid": fmt.Sprint(pid)}, tags)) + slist.PushFront(types.NewSample("num_fds", val, map[string]string{"pid": fmt.Sprint(pid)}, tags)) } } } if ins.GatherTotal { - slist.PushFront(inputs.NewSample("num_fds_total", val, tags)) + slist.PushFront(types.NewSample("num_fds_total", val, tags)) } } @@ -253,19 +253,19 @@ func (ins *Instance) gatherIO(slist *list.SafeList, procs map[PID]Process, tags readBytes += io.ReadBytes writeBytes += io.WriteBytes if ins.GatherPerPid { - slist.PushFront(inputs.NewSample("read_count", readCount, map[string]string{"pid": fmt.Sprint(pid)}, tags)) - slist.PushFront(inputs.NewSample("write_count", writeCount, map[string]string{"pid": fmt.Sprint(pid)}, tags)) - slist.PushFront(inputs.NewSample("read_bytes", readBytes, map[string]string{"pid": fmt.Sprint(pid)}, tags)) - slist.PushFront(inputs.NewSample("write_bytes", writeBytes, map[string]string{"pid": fmt.Sprint(pid)}, tags)) + slist.PushFront(types.NewSample("read_count", readCount, map[string]string{"pid": fmt.Sprint(pid)}, tags)) + slist.PushFront(types.NewSample("write_count", writeCount, map[string]string{"pid": fmt.Sprint(pid)}, tags)) + slist.PushFront(types.NewSample("read_bytes", readBytes, map[string]string{"pid": fmt.Sprint(pid)}, tags)) + slist.PushFront(types.NewSample("write_bytes", writeBytes, map[string]string{"pid": fmt.Sprint(pid)}, tags)) } } } if ins.GatherTotal { - slist.PushFront(inputs.NewSample("read_count_total", readCount, tags)) - slist.PushFront(inputs.NewSample("write_count_total", writeCount, tags)) - slist.PushFront(inputs.NewSample("read_bytes_total", readBytes, tags)) - slist.PushFront(inputs.NewSample("write_bytes_total", writeBytes, tags)) + slist.PushFront(types.NewSample("read_count_total", readCount, tags)) + slist.PushFront(types.NewSample("write_count_total", writeCount, tags)) + slist.PushFront(types.NewSample("read_bytes_total", readBytes, tags)) + slist.PushFront(types.NewSample("write_bytes_total", writeBytes, tags)) } } @@ -276,7 +276,7 @@ func (ins *Instance) gatherUptime(slist *list.SafeList, procs map[PID]Process, t v, err := procs[pid].CreateTime() // returns epoch in ms if err == nil { if ins.GatherPerPid { - slist.PushFront(inputs.NewSample("uptime", value, map[string]string{"pid": fmt.Sprint(pid)}, tags)) + slist.PushFront(types.NewSample("uptime", value, map[string]string{"pid": fmt.Sprint(pid)}, tags)) } if value == -1 { value = v @@ -290,7 +290,7 @@ func (ins *Instance) gatherUptime(slist *list.SafeList, procs map[PID]Process, t } if ins.GatherTotal { - slist.PushFront(inputs.NewSample("uptime_minimum", value, tags)) + slist.PushFront(types.NewSample("uptime_minimum", value, tags)) } } @@ -301,16 +301,16 @@ func (ins *Instance) gatherCPU(slist *list.SafeList, procs map[PID]Process, tags if err == nil { if solarisMode { value += v / float64(runtime.NumCPU()) - slist.PushFront(inputs.NewSample("cpu_usage", v/float64(runtime.NumCPU()), map[string]string{"pid": fmt.Sprint(pid)}, tags)) + slist.PushFront(types.NewSample("cpu_usage", v/float64(runtime.NumCPU()), map[string]string{"pid": fmt.Sprint(pid)}, tags)) } else { value += v - slist.PushFront(inputs.NewSample("cpu_usage", v, map[string]string{"pid": fmt.Sprint(pid)}, tags)) + slist.PushFront(types.NewSample("cpu_usage", v, map[string]string{"pid": fmt.Sprint(pid)}, tags)) } } } if ins.GatherTotal { - slist.PushFront(inputs.NewSample("cpu_usage_total", value, tags)) + slist.PushFront(types.NewSample("cpu_usage_total", value, tags)) } } @@ -321,13 +321,13 @@ func (ins *Instance) gatherMem(slist *list.SafeList, procs map[PID]Process, tags if err == nil { value += v if ins.GatherPerPid { - slist.PushFront(inputs.NewSample("mem_usage", v, map[string]string{"pid": fmt.Sprint(pid)}, tags)) + slist.PushFront(types.NewSample("mem_usage", v, map[string]string{"pid": fmt.Sprint(pid)}, tags)) } } } if ins.GatherTotal { - slist.PushFront(inputs.NewSample("mem_usage_total", value, tags)) + slist.PushFront(types.NewSample("mem_usage_total", value, tags)) } } @@ -339,8 +339,8 @@ func (ins *Instance) gatherLimit(slist *list.SafeList, procs map[PID]Process, ta for _, rlim := range rlims { if rlim.Resource == process.RLIMIT_NOFILE { if ins.GatherPerPid { - slist.PushFront(inputs.NewSample("rlimit_num_fds_soft", rlim.Soft, map[string]string{"pid": fmt.Sprint(pid)}, tags)) - slist.PushFront(inputs.NewSample("rlimit_num_fds_hard", rlim.Hard, map[string]string{"pid": fmt.Sprint(pid)}, tags)) + slist.PushFront(types.NewSample("rlimit_num_fds_soft", rlim.Soft, map[string]string{"pid": fmt.Sprint(pid)}, tags)) + slist.PushFront(types.NewSample("rlimit_num_fds_hard", rlim.Hard, map[string]string{"pid": fmt.Sprint(pid)}, tags)) } if softMin == 0 { @@ -362,8 +362,8 @@ func (ins *Instance) gatherLimit(slist *list.SafeList, procs map[PID]Process, ta } if ins.GatherTotal { - slist.PushFront(inputs.NewSample("rlimit_num_fds_soft_minimum", softMin, tags)) - slist.PushFront(inputs.NewSample("rlimit_num_fds_hard_minimum", hardMin, tags)) + slist.PushFront(types.NewSample("rlimit_num_fds_soft_minimum", softMin, tags)) + slist.PushFront(types.NewSample("rlimit_num_fds_hard_minimum", hardMin, tags)) } } diff --git a/inputs/prometheus/prometheus.go b/inputs/prometheus/prometheus.go index 0913edcd..791c1e6c 100644 --- a/inputs/prometheus/prometheus.go +++ b/inputs/prometheus/prometheus.go @@ -237,13 +237,13 @@ func (p *Prometheus) gatherUrl(urlwg *sync.WaitGroup, slist *list.SafeList, ins res, err := ins.client.Do(req) if err != nil { - slist.PushFront(inputs.NewSample("up", 0, labels)) + slist.PushFront(types.NewSample("up", 0, labels)) log.Println("E! failed to query url:", u.String(), "error:", err) return } if res.StatusCode != http.StatusOK { - slist.PushFront(inputs.NewSample("up", 0, labels)) + slist.PushFront(types.NewSample("up", 0, labels)) log.Println("E! failed to query url:", u.String(), "status code:", res.StatusCode) return } @@ -252,12 +252,12 @@ func (p *Prometheus) gatherUrl(urlwg *sync.WaitGroup, slist *list.SafeList, ins body, err := io.ReadAll(res.Body) if err != nil { - slist.PushFront(inputs.NewSample("up", 0, labels)) + slist.PushFront(types.NewSample("up", 0, labels)) log.Println("E! failed to read response body, error:", err) return } - slist.PushFront(inputs.NewSample("up", 1, labels)) + slist.PushFront(types.NewSample("up", 1, labels)) parser := prometheus.NewParser(ins.NamePrefix, labels, res.Header, ins.ignoreMetricsFilter, ins.ignoreLabelKeysFilter) if err = parser.Parse(body, slist); err != nil { diff --git a/inputs/rabbitmq/rabbitmq.go b/inputs/rabbitmq/rabbitmq.go index a5c7f6d9..aff21a91 100644 --- a/inputs/rabbitmq/rabbitmq.go +++ b/inputs/rabbitmq/rabbitmq.go @@ -390,7 +390,7 @@ func (ins *Instance) gatherOnce(slist *list.SafeList) { // scrape use seconds defer func(begun time.Time) { use := time.Since(begun).Seconds() - slist.PushFront(inputs.NewSample("scrape_use_seconds", use, tags, ins.Labels)) + slist.PushFront(types.NewSample("scrape_use_seconds", use, tags, ins.Labels)) }(begun) var wg sync.WaitGroup @@ -509,7 +509,7 @@ func gatherOverview(ins *Instance, slist *list.SafeList) { "overview_return_unroutable_rate": overview.MessageStats.ReturnUnroutableDetails.Rate, } - inputs.PushSamples(slist, fields, tags) + types.PushSamples(slist, fields, tags) } func gatherExchanges(ins *Instance, slist *list.SafeList) { @@ -546,7 +546,7 @@ func gatherExchanges(ins *Instance, slist *list.SafeList) { "exchange_messages_publish_out_rate": exchange.MessageStats.PublishOutDetails.Rate, } - inputs.PushSamples(slist, fields, tags) + types.PushSamples(slist, fields, tags) } } @@ -604,7 +604,7 @@ func gatherFederationLinks(ins *Instance, slist *list.SafeList) { "federation_messages_return_unroutable": link.LocalChannel.MessageStats.ReturnUnroutable, } - inputs.PushSamples(slist, fields, tags, ins.Labels) + types.PushSamples(slist, fields, tags, ins.Labels) } } @@ -734,7 +734,7 @@ func gatherNodes(ins *Instance, slist *list.SafeList) { } } - inputs.PushSamples(slist, fields, tags, ins.Labels) + types.PushSamples(slist, fields, tags, ins.Labels) }(node) } @@ -818,6 +818,6 @@ func gatherQueues(ins *Instance, slist *list.SafeList) { "queue_messages_redeliver_rate": queue.MessageStats.RedeliverDetails.Rate, } - inputs.PushSamples(slist, fields, tags, ins.Labels) + types.PushSamples(slist, fields, tags, ins.Labels) } } diff --git a/inputs/redis/redis.go b/inputs/redis/redis.go index b04ddaaf..9f3a4ffa 100644 --- a/inputs/redis/redis.go +++ b/inputs/redis/redis.go @@ -133,18 +133,18 @@ func (r *Redis) gatherOnce(slist *list.SafeList, ins *Instance) { // scrape use seconds defer func(begun time.Time) { use := time.Since(begun).Seconds() - slist.PushFront(inputs.NewSample("scrape_use_seconds", use, tags)) + slist.PushFront(types.NewSample("scrape_use_seconds", use, tags)) }(begun) // ping err := ins.client.Ping(context.Background()).Err() - slist.PushFront(inputs.NewSample("ping_use_seconds", time.Since(begun).Seconds(), tags)) + slist.PushFront(types.NewSample("ping_use_seconds", time.Since(begun).Seconds(), tags)) if err != nil { - slist.PushFront(inputs.NewSample("up", 0, tags)) + slist.PushFront(types.NewSample("up", 0, tags)) log.Println("E! failed to ping redis:", ins.Address, "error:", err) return } else { - slist.PushFront(inputs.NewSample("up", 1, tags)) + slist.PushFront(types.NewSample("up", 1, tags)) } r.gatherInfoAll(slist, ins, tags) @@ -170,7 +170,7 @@ func (r *Redis) gatherCommandValues(slist *list.SafeList, ins *Instance, tags ma } for k, v := range fields { - slist.PushFront(inputs.NewSample("exec_result_"+k, v, tags)) + slist.PushFront(types.NewSample("exec_result_"+k, v, tags)) } } @@ -293,7 +293,7 @@ func (r *Redis) gatherInfoAll(slist *list.SafeList, ins *Instance, tags map[stri fields["keyspace_hitrate"] = keyspaceHitrate for k, v := range fields { - slist.PushFront(inputs.NewSample(k, v, tags)) + slist.PushFront(types.NewSample(k, v, tags)) } } @@ -324,7 +324,7 @@ func gatherKeyspaceLine( } for k, v := range fields { - slist.PushFront(inputs.NewSample("keyspace_"+k, v, tags)) + slist.PushFront(types.NewSample("keyspace_"+k, v, tags)) } } } @@ -373,7 +373,7 @@ func gatherCommandstateLine( } for k, v := range fields { - slist.PushFront(inputs.NewSample("cmdstat_"+k, v, tags)) + slist.PushFront(types.NewSample("cmdstat_"+k, v, tags)) } } @@ -419,6 +419,6 @@ func gatherReplicationLine( } for k, v := range fields { - slist.PushFront(inputs.NewSample("replication_"+k, v, tags)) + slist.PushFront(types.NewSample("replication_"+k, v, tags)) } } diff --git a/inputs/switch_legacy/switch_legacy.go b/inputs/switch_legacy/switch_legacy.go index 2a80510c..71f72b56 100644 --- a/inputs/switch_legacy/switch_legacy.go +++ b/inputs/switch_legacy/switch_legacy.go @@ -11,6 +11,7 @@ import ( "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/pkg/conv" + "flashcat.cloud/categraf/pkg/runtimex" "flashcat.cloud/categraf/types" "github.com/gaochao1/sw" "github.com/toolkits/pkg/concurrent/semaphore" @@ -201,7 +202,7 @@ func (ins *Instance) custstat(wg *sync.WaitGroup, ip string, slist *list.SafeLis defer func() { if r := recover(); r != nil { - log.Println("E! recovered in custstat, ip:", ip, "oid:", cust.OID, "error:", r) + log.Println("E! recovered in custstat, ip:", ip, "oid:", cust.OID, "error:", r, "stack:", runtimex.Stack(3)) } }() @@ -213,7 +214,7 @@ func (ins *Instance) custstat(wg *sync.WaitGroup, ip string, slist *list.SafeLis if len(snmpPDUs) > 0 && err == nil { value, err = conv.ToFloat64(snmpPDUs[0].Value) if err == nil { - slist.PushFront(inputs.NewSample(cust.Metric, value, cust.Tags, ins.Labels)) + slist.PushFront(types.NewSample(cust.Metric, value, cust.Tags, ins.Labels)) } else { log.Println("E! failed to convert to float64, ip:", ip, "oid:", cust.OID, "value:", snmpPDUs[0].Value) } @@ -243,7 +244,7 @@ func (ins *Instance) gatherMemMetrics(ips []string, slist *list.SafeList) { if utilPercent == -1 { continue } - slist.PushFront(inputs.NewSample("mem_util", utilPercent, map[string]string{ins.parent.SwitchIdLabel: ip}, ins.Labels)) + slist.PushFront(types.NewSample("mem_util", utilPercent, map[string]string{ins.parent.SwitchIdLabel: ip}, ins.Labels)) } } @@ -282,7 +283,7 @@ func (ins *Instance) gatherCpuMetrics(ips []string, slist *list.SafeList) { if utilPercent == -1 { continue } - slist.PushFront(inputs.NewSample("cpu_util", utilPercent, map[string]string{ins.parent.SwitchIdLabel: ip}, ins.Labels)) + slist.PushFront(types.NewSample("cpu_util", utilPercent, map[string]string{ins.parent.SwitchIdLabel: ip}, ins.Labels)) } } @@ -354,10 +355,10 @@ func (ins *Instance) gatherFlowMetrics(ips []string, slist *list.SafeList) { } if ins.GatherOperStatus { - slist.PushFront(inputs.NewSample("if_oper_status", ifStat.IfOperStatus, tags)) + slist.PushFront(types.NewSample("if_oper_status", ifStat.IfOperStatus, tags)) } - slist.PushFront(inputs.NewSample("if_speed", ifStat.IfSpeed, tags)) + slist.PushFront(types.NewSample("if_speed", ifStat.IfSpeed, tags)) if lastIfStatList := ins.lastifmap.Get(ip); lastIfStatList != nil { for _, lastifStat := range lastIfStatList { @@ -373,18 +374,18 @@ func (ins *Instance) gatherFlowMetrics(ips []string, slist *list.SafeList) { IfHCOutOctets := 8 * (float64(ifStat.IfHCOutOctets) - float64(lastifStat.IfHCOutOctets)) / float64(interval) if limitCheck(IfHCInOctets, speedlimit) { - slist.PushFront(inputs.NewSample("if_in", IfHCInOctets, tags)) + slist.PushFront(types.NewSample("if_in", IfHCInOctets, tags)) if ifStat.IfSpeed > 0 { - slist.PushFront(inputs.NewSample("if_in_speed_percent", 100*IfHCInOctets/float64(ifStat.IfSpeed), tags)) + slist.PushFront(types.NewSample("if_in_speed_percent", 100*IfHCInOctets/float64(ifStat.IfSpeed), tags)) } } else { log.Println("W! if_in out of range, current:", ifStat.IfHCInOctets, "lasttime:", lastifStat.IfHCInOctets, "tags:", tags) } if limitCheck(IfHCOutOctets, speedlimit) { - slist.PushFront(inputs.NewSample("if_out", IfHCOutOctets, tags)) + slist.PushFront(types.NewSample("if_out", IfHCOutOctets, tags)) if ifStat.IfSpeed > 0 { - slist.PushFront(inputs.NewSample("if_out_speed_percent", 100*IfHCOutOctets/float64(ifStat.IfSpeed), tags)) + slist.PushFront(types.NewSample("if_out_speed_percent", 100*IfHCOutOctets/float64(ifStat.IfSpeed), tags)) } } else { log.Println("W! if_out out of range, current:", ifStat.IfHCOutOctets, "lasttime:", lastifStat.IfHCOutOctets, "tags:", tags) @@ -403,13 +404,13 @@ func (ins *Instance) gatherFlowMetrics(ips []string, slist *list.SafeList) { IfHCOutBroadcastPkts := (float64(ifStat.IfHCOutBroadcastPkts) - float64(lastifStat.IfHCOutBroadcastPkts)) / float64(interval) if limitCheck(IfHCInBroadcastPkts, ins.BroadcastPktLimit) { - slist.PushFront(inputs.NewSample("if_in_broadcast_pkt", IfHCInBroadcastPkts, tags)) + slist.PushFront(types.NewSample("if_in_broadcast_pkt", IfHCInBroadcastPkts, tags)) } else { log.Println("W! if_in_broadcast_pkt out of range, current:", ifStat.IfHCInBroadcastPkts, "lasttime:", lastifStat.IfHCInBroadcastPkts, "tags:", tags) } if limitCheck(IfHCOutBroadcastPkts, ins.BroadcastPktLimit) { - slist.PushFront(inputs.NewSample("if_out_broadcast_pkt", IfHCOutBroadcastPkts, tags)) + slist.PushFront(types.NewSample("if_out_broadcast_pkt", IfHCOutBroadcastPkts, tags)) } else { log.Println("W! if_out_broadcast_pkt out of range, current:", ifStat.IfHCOutBroadcastPkts, "lasttime:", lastifStat.IfHCOutBroadcastPkts, "tags:", tags) } @@ -428,13 +429,13 @@ func (ins *Instance) gatherFlowMetrics(ips []string, slist *list.SafeList) { IfHCOutMulticastPkts := (float64(ifStat.IfHCOutMulticastPkts) - float64(lastifStat.IfHCOutMulticastPkts)) / float64(interval) if limitCheck(IfHCInMulticastPkts, ins.MulticastPktLimit) { - slist.PushFront(inputs.NewSample("if_in_multicast_pkt", IfHCInMulticastPkts, tags)) + slist.PushFront(types.NewSample("if_in_multicast_pkt", IfHCInMulticastPkts, tags)) } else { log.Println("W! if_in_multicast_pkt out of range, current:", ifStat.IfHCInMulticastPkts, "lasttime:", lastifStat.IfHCInMulticastPkts, "tags:", tags) } if limitCheck(IfHCOutMulticastPkts, ins.MulticastPktLimit) { - slist.PushFront(inputs.NewSample("if_out_multicast_pkt", IfHCOutMulticastPkts, tags)) + slist.PushFront(types.NewSample("if_out_multicast_pkt", IfHCOutMulticastPkts, tags)) } else { log.Println("W! if_out_multicast_pkt out of range, current:", ifStat.IfHCOutMulticastPkts, "lasttime:", lastifStat.IfHCOutMulticastPkts, "tags:", tags) } @@ -453,13 +454,13 @@ func (ins *Instance) gatherFlowMetrics(ips []string, slist *list.SafeList) { IfOutDiscards := (float64(ifStat.IfOutDiscards) - float64(lastifStat.IfOutDiscards)) / float64(interval) if limitCheck(IfInDiscards, ins.DiscardsPktLimit) { - slist.PushFront(inputs.NewSample("if_in_discards", IfInDiscards, tags)) + slist.PushFront(types.NewSample("if_in_discards", IfInDiscards, tags)) } else { log.Println("W! if_in_discards out of range, current:", ifStat.IfInDiscards, "lasttime:", lastifStat.IfInDiscards, "tags:", tags) } if limitCheck(IfOutDiscards, ins.DiscardsPktLimit) { - slist.PushFront(inputs.NewSample("if_out_discards", IfOutDiscards, tags)) + slist.PushFront(types.NewSample("if_out_discards", IfOutDiscards, tags)) } else { log.Println("W! if_out_discards out of range, current:", ifStat.IfOutDiscards, "lasttime:", lastifStat.IfOutDiscards, "tags:", tags) } @@ -478,13 +479,13 @@ func (ins *Instance) gatherFlowMetrics(ips []string, slist *list.SafeList) { IfOutErrors := (float64(ifStat.IfOutErrors) - float64(lastifStat.IfOutErrors)) / float64(interval) if limitCheck(IfInErrors, ins.ErrorsPktLimit) { - slist.PushFront(inputs.NewSample("if_in_errors", IfInErrors, tags)) + slist.PushFront(types.NewSample("if_in_errors", IfInErrors, tags)) } else { log.Println("W! if_in_errors out of range, current:", ifStat.IfInErrors, "lasttime:", lastifStat.IfInErrors, "tags:", tags) } if limitCheck(IfOutErrors, ins.ErrorsPktLimit) { - slist.PushFront(inputs.NewSample("if_out_errors", IfOutErrors, tags)) + slist.PushFront(types.NewSample("if_out_errors", IfOutErrors, tags)) } else { log.Println("W! if_out_errors out of range, current:", ifStat.IfOutErrors, "lasttime:", lastifStat.IfOutErrors, "tags:", tags) } @@ -500,7 +501,7 @@ func (ins *Instance) gatherFlowMetrics(ips []string, slist *list.SafeList) { interval := ifStat.TS - lastifStat.TS IfInUnknownProtos := (float64(ifStat.IfInUnknownProtos) - float64(lastifStat.IfInUnknownProtos)) / float64(interval) if limitCheck(IfInUnknownProtos, ins.UnknownProtosPktLimit) { - slist.PushFront(inputs.NewSample("if_in_unknown_protos", IfInUnknownProtos, tags)) + slist.PushFront(types.NewSample("if_in_unknown_protos", IfInUnknownProtos, tags)) } else { log.Println("W! if_in_unknown_protos out of range, current:", ifStat.IfInUnknownProtos, "lasttime:", lastifStat.IfInUnknownProtos, "tags:", tags) } @@ -516,7 +517,7 @@ func (ins *Instance) gatherFlowMetrics(ips []string, slist *list.SafeList) { interval := ifStat.TS - lastifStat.TS IfOutQLen := (float64(ifStat.IfOutQLen) - float64(lastifStat.IfOutQLen)) / float64(interval) if limitCheck(IfOutQLen, ins.OutQlenPktLimit) { - slist.PushFront(inputs.NewSample("if_out_qlen", IfOutQLen, tags)) + slist.PushFront(types.NewSample("if_out_qlen", IfOutQLen, tags)) } else { log.Println("W! if_out_qlen out of range, current:", ifStat.IfOutQLen, "lasttime:", lastifStat.IfOutQLen, "tags:", tags) } @@ -535,13 +536,13 @@ func (ins *Instance) gatherFlowMetrics(ips []string, slist *list.SafeList) { IfHCOutUcastPkts := (float64(ifStat.IfHCOutUcastPkts) - float64(lastifStat.IfHCOutUcastPkts)) / float64(interval) if limitCheck(IfHCInUcastPkts, ins.PktLimit) { - slist.PushFront(inputs.NewSample("if_in_pkts", IfHCInUcastPkts, tags)) + slist.PushFront(types.NewSample("if_in_pkts", IfHCInUcastPkts, tags)) } else { log.Println("W! if_in_pkts out of range, current:", ifStat.IfHCInUcastPkts, "lasttime:", lastifStat.IfHCInUcastPkts, "tags:", tags) } if limitCheck(IfHCOutUcastPkts, ins.PktLimit) { - slist.PushFront(inputs.NewSample("if_out_pkts", IfHCOutUcastPkts, tags)) + slist.PushFront(types.NewSample("if_out_pkts", IfHCOutUcastPkts, tags)) } else { log.Println("W! if_out_pkts out of range, current:", ifStat.IfHCOutUcastPkts, "lasttime:", lastifStat.IfHCOutUcastPkts, "tags:", tags) } @@ -619,7 +620,7 @@ func (ins *Instance) gatherPing(ips []string, slist *list.SafeList) []string { } if ins.GatherPingMetrics { - slist.PushFront(inputs.NewSample("ping_up", val, map[string]string{ins.parent.SwitchIdLabel: ip}, ins.Labels)) + slist.PushFront(types.NewSample("ping_up", val, map[string]string{ins.parent.SwitchIdLabel: ip}, ins.Labels)) } } diff --git a/inputs/system/system.go b/inputs/system/system.go index 1679164e..d4fb5749 100644 --- a/inputs/system/system.go +++ b/inputs/system/system.go @@ -7,6 +7,7 @@ import ( "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/inputs" + "flashcat.cloud/categraf/types" "github.com/shirou/gopsutil/v3/cpu" "github.com/shirou/gopsutil/v3/host" "github.com/shirou/gopsutil/v3/load" @@ -77,5 +78,5 @@ func (s *SystemStats) Gather(slist *list.SafeList) { } } - inputs.PushSamples(slist, fields) + types.PushSamples(slist, fields) } diff --git a/inputs/tomcat/tomcat.go b/inputs/tomcat/tomcat.go index 287c37dc..16bdb277 100644 --- a/inputs/tomcat/tomcat.go +++ b/inputs/tomcat/tomcat.go @@ -196,19 +196,19 @@ func (t *Tomcat) gatherOnce(slist *list.SafeList, ins *Instance) { // scrape use seconds defer func(begun time.Time) { use := time.Since(begun).Seconds() - slist.PushFront(inputs.NewSample("scrape_use_seconds", use, tags)) + slist.PushFront(types.NewSample("scrape_use_seconds", use, tags)) }(begun) // url cannot connect? up = 0 resp, err := ins.client.Do(ins.request) if err != nil { - slist.PushFront(inputs.NewSample("up", 0, tags)) + slist.PushFront(types.NewSample("up", 0, tags)) log.Println("E! failed to query tomcat url:", err) return } if resp.StatusCode != http.StatusOK { - slist.PushFront(inputs.NewSample("up", 0, tags)) + slist.PushFront(types.NewSample("up", 0, tags)) log.Println("E! received HTTP status code:", resp.StatusCode, "expected: 200") return } @@ -217,16 +217,16 @@ func (t *Tomcat) gatherOnce(slist *list.SafeList, ins *Instance) { var status TomcatStatus if err := xml.NewDecoder(resp.Body).Decode(&status); err != nil { - slist.PushFront(inputs.NewSample("up", 0, tags)) + slist.PushFront(types.NewSample("up", 0, tags)) log.Println("E! failed to decode response body:", err) return } - slist.PushFront(inputs.NewSample("up", 1, tags)) + slist.PushFront(types.NewSample("up", 1, tags)) - slist.PushFront(inputs.NewSample("jvm_memory_free", status.TomcatJvm.JvmMemory.Free, tags)) - slist.PushFront(inputs.NewSample("jvm_memory_total", status.TomcatJvm.JvmMemory.Total, tags)) - slist.PushFront(inputs.NewSample("jvm_memory_max", status.TomcatJvm.JvmMemory.Max, tags)) + slist.PushFront(types.NewSample("jvm_memory_free", status.TomcatJvm.JvmMemory.Free, tags)) + slist.PushFront(types.NewSample("jvm_memory_total", status.TomcatJvm.JvmMemory.Total, tags)) + slist.PushFront(types.NewSample("jvm_memory_max", status.TomcatJvm.JvmMemory.Max, tags)) // add tomcat_jvm_memorypool measurements for _, mp := range status.TomcatJvm.JvmMemoryPools { @@ -242,7 +242,7 @@ func (t *Tomcat) gatherOnce(slist *list.SafeList, ins *Instance) { "jvm_memorypool_used": mp.UsageUsed, } - inputs.PushSamples(slist, tcmpFields, tags, tcmpTags) + types.PushSamples(slist, tcmpFields, tags, tcmpTags) } // add tomcat_connector measurements @@ -268,6 +268,6 @@ func (t *Tomcat) gatherOnce(slist *list.SafeList, ins *Instance) { "connector_bytes_sent": c.RequestInfo.BytesSent, } - inputs.PushSamples(slist, tccFields, tags, tccTags) + types.PushSamples(slist, tccFields, tags, tccTags) } } diff --git a/inputs/zookeeper/zookeeper.go b/inputs/zookeeper/zookeeper.go index 70c8eb8d..57896c6a 100644 --- a/inputs/zookeeper/zookeeper.go +++ b/inputs/zookeeper/zookeeper.go @@ -140,13 +140,13 @@ func (ins *Instance) gatherOneHost(wg *sync.WaitGroup, slist *list.SafeList, zkH // scrape use seconds defer func(begun time.Time) { use := time.Since(begun).Seconds() - slist.PushFront(inputs.NewSample("zk_scrape_use_seconds", use, tags)) + slist.PushFront(types.NewSample("zk_scrape_use_seconds", use, tags)) }(begun) // zk_up mntrConn, err := ins.ZkConnect(zkHost) if err != nil { - slist.PushFront(inputs.NewSample("zk_up", 0, tags)) + slist.PushFront(types.NewSample("zk_up", 0, tags)) log.Println("E! failed to connect zookeeper:", zkHost, "error:", err) return } @@ -157,7 +157,7 @@ func (ins *Instance) gatherOneHost(wg *sync.WaitGroup, slist *list.SafeList, zkH // zk_ruok ruokConn, err := ins.ZkConnect(zkHost) if err != nil { - slist.PushFront(inputs.NewSample("zk_ruok", 0, tags)) + slist.PushFront(types.NewSample("zk_ruok", 0, tags)) log.Println("E! failed to connect zookeeper:", zkHost, "error:", err) return } @@ -174,16 +174,16 @@ func (ins *Instance) gatherMntrResult(conn net.Conn, slist *list.SafeList, globa // 'mntr' command isn't allowed in zk config, log as warning if strings.Contains(lines[0], cmdNotExecutedSffx) { - slist.PushFront(inputs.NewSample("zk_up", 0, globalTags)) + slist.PushFront(types.NewSample("zk_up", 0, globalTags)) log.Printf(commandNotAllowedTmpl, "mntr", conn.RemoteAddr().String()) return } - slist.PushFront(inputs.NewSample("zk_up", 1, globalTags)) + slist.PushFront(types.NewSample("zk_up", 1, globalTags)) // skip instance if it in a leader only state and doesnt serving client requests if lines[0] == instanceNotServingMessage { - slist.PushFront(inputs.NewSample("zk_server_leader", 1, globalTags)) + slist.PushFront(types.NewSample("zk_server_leader", 1, globalTags)) return } @@ -204,17 +204,17 @@ func (ins *Instance) gatherMntrResult(conn net.Conn, slist *list.SafeList, globa switch key { case "zk_server_state": if value == "leader" { - slist.PushFront(inputs.NewSample("zk_server_leader", 1, globalTags)) + slist.PushFront(types.NewSample("zk_server_leader", 1, globalTags)) } else { - slist.PushFront(inputs.NewSample("zk_server_leader", 0, globalTags)) + slist.PushFront(types.NewSample("zk_server_leader", 0, globalTags)) } case "zk_version": version := versionRE.ReplaceAllString(value, "$1") - slist.PushFront(inputs.NewSample("zk_version", 1, globalTags, map[string]string{"version": version})) + slist.PushFront(types.NewSample("zk_version", 1, globalTags, map[string]string{"version": version})) case "zk_peer_state": - slist.PushFront(inputs.NewSample("zk_peer_state", 1, globalTags, map[string]string{"state": value})) + slist.PushFront(types.NewSample("zk_peer_state", 1, globalTags, map[string]string{"state": value})) default: var k string @@ -226,9 +226,9 @@ func (ins *Instance) gatherMntrResult(conn net.Conn, slist *list.SafeList, globa k = metricNameReplacer.Replace(key) if strings.Contains(k, "{") { labels := parseLabels(k) - slist.PushFront(inputs.NewSample(k, value, globalTags, labels)) + slist.PushFront(types.NewSample(k, value, globalTags, labels)) } else { - slist.PushFront(inputs.NewSample(k, value, globalTags)) + slist.PushFront(types.NewSample(k, value, globalTags)) } } } @@ -237,12 +237,12 @@ func (ins *Instance) gatherMntrResult(conn net.Conn, slist *list.SafeList, globa func (ins *Instance) gatherRuokResult(conn net.Conn, slist *list.SafeList, globalTags map[string]string) { res := sendZookeeperCmd(conn, "ruok") if res == "imok" { - slist.PushFront(inputs.NewSample("zk_ruok", 1, globalTags)) + slist.PushFront(types.NewSample("zk_ruok", 1, globalTags)) } else { if strings.Contains(res, cmdNotExecutedSffx) { log.Printf(commandNotAllowedTmpl, "ruok", conn.RemoteAddr().String()) } - slist.PushFront(inputs.NewSample("zk_ruok", 0, globalTags)) + slist.PushFront(types.NewSample("zk_ruok", 0, globalTags)) } } diff --git a/parser/prometheus/parser.go b/parser/prometheus/parser.go index 5e83a380..393d7c73 100644 --- a/parser/prometheus/parser.go +++ b/parser/prometheus/parser.go @@ -9,13 +9,14 @@ import ( "mime" "net/http" - "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/pkg/filter" "flashcat.cloud/categraf/pkg/prom" + "flashcat.cloud/categraf/types" "github.com/matttproud/golang_protobuf_extensions/pbutil" - dto "github.com/prometheus/client_model/go" "github.com/prometheus/common/expfmt" "github.com/toolkits/pkg/container/list" + + dto "github.com/prometheus/client_model/go" ) type Parser struct { @@ -79,12 +80,12 @@ func (p *Parser) Parse(buf []byte, slist *list.SafeList) error { tags := p.makeLabels(m) if mf.GetType() == dto.MetricType_SUMMARY { - p.handleSummary(m, tags, metricName, slist) + p.HandleSummary(m, tags, metricName, slist) } else if mf.GetType() == dto.MetricType_HISTOGRAM { - p.handleHistogram(m, tags, metricName, slist) + p.HandleHistogram(m, tags, metricName, slist) } else { fields := getNameAndValue(m, metricName) - inputs.PushSamples(slist, fields, tags) + types.PushSamples(slist, fields, tags) } } } @@ -92,24 +93,24 @@ func (p *Parser) Parse(buf []byte, slist *list.SafeList) error { return nil } -func (p *Parser) handleSummary(m *dto.Metric, tags map[string]string, metricName string, slist *list.SafeList) { - slist.PushFront(inputs.NewSample(prom.BuildMetric(p.NamePrefix, metricName, "count"), float64(m.GetSummary().GetSampleCount()), tags)) - slist.PushFront(inputs.NewSample(prom.BuildMetric(p.NamePrefix, metricName, "sum"), m.GetSummary().GetSampleSum(), tags)) +func (p *Parser) HandleSummary(m *dto.Metric, tags map[string]string, metricName string, slist *list.SafeList) { + slist.PushFront(types.NewSample(prom.BuildMetric(p.NamePrefix, metricName, "count"), float64(m.GetSummary().GetSampleCount()), tags)) + slist.PushFront(types.NewSample(prom.BuildMetric(p.NamePrefix, metricName, "sum"), m.GetSummary().GetSampleSum(), tags)) for _, q := range m.GetSummary().Quantile { - slist.PushFront(inputs.NewSample(prom.BuildMetric(p.NamePrefix, metricName), q.GetValue(), tags, map[string]string{"quantile": fmt.Sprint(q.GetQuantile())})) + slist.PushFront(types.NewSample(prom.BuildMetric(p.NamePrefix, metricName), q.GetValue(), tags, map[string]string{"quantile": fmt.Sprint(q.GetQuantile())})) } } -func (p *Parser) handleHistogram(m *dto.Metric, tags map[string]string, metricName string, slist *list.SafeList) { - slist.PushFront(inputs.NewSample(prom.BuildMetric(p.NamePrefix, metricName, "count"), float64(m.GetHistogram().GetSampleCount()), tags)) - slist.PushFront(inputs.NewSample(prom.BuildMetric(p.NamePrefix, metricName, "sum"), m.GetHistogram().GetSampleSum(), tags)) - slist.PushFront(inputs.NewSample(prom.BuildMetric(p.NamePrefix, metricName, "bucket"), float64(m.GetHistogram().GetSampleCount()), tags, map[string]string{"le": "+Inf"})) +func (p *Parser) HandleHistogram(m *dto.Metric, tags map[string]string, metricName string, slist *list.SafeList) { + slist.PushFront(types.NewSample(prom.BuildMetric(p.NamePrefix, metricName, "count"), float64(m.GetHistogram().GetSampleCount()), tags)) + slist.PushFront(types.NewSample(prom.BuildMetric(p.NamePrefix, metricName, "sum"), m.GetHistogram().GetSampleSum(), tags)) + slist.PushFront(types.NewSample(prom.BuildMetric(p.NamePrefix, metricName, "bucket"), float64(m.GetHistogram().GetSampleCount()), tags, map[string]string{"le": "+Inf"})) for _, b := range m.GetHistogram().Bucket { le := fmt.Sprint(b.GetUpperBound()) value := float64(b.GetCumulativeCount()) - slist.PushFront(inputs.NewSample(prom.BuildMetric(p.NamePrefix, metricName, "bucket"), value, tags, map[string]string{"le": le})) + slist.PushFront(types.NewSample(prom.BuildMetric(p.NamePrefix, metricName, "bucket"), value, tags, map[string]string{"le": le})) } } diff --git a/pkg/runtimex/stack.go b/pkg/runtimex/stack.go new file mode 100644 index 00000000..8b90fe91 --- /dev/null +++ b/pkg/runtimex/stack.go @@ -0,0 +1,76 @@ +package runtimex + +import ( + "bytes" + "fmt" + "io/ioutil" + "runtime" +) + +var ( + dunno = []byte("???") + centerDot = []byte("·") + dot = []byte(".") + slash = []byte("/") +) + +// stack returns a nicely formatted stack frame, skipping skip frames. +func Stack(skip int) []byte { + buf := new(bytes.Buffer) // the returned data + // As we loop, we open files and read them. These variables record the currently + // loaded file. + var lines [][]byte + var lastFile string + for i := skip; ; i++ { // Skip the expected number of frames + pc, file, line, ok := runtime.Caller(i) + if !ok { + break + } + // Print this much at least. If we can't find the source, it won't show. + fmt.Fprintf(buf, "%s:%d (0x%x)\n", file, line, pc) + if file != lastFile { + data, err := ioutil.ReadFile(file) + if err != nil { + continue + } + lines = bytes.Split(data, []byte{'\n'}) + lastFile = file + } + fmt.Fprintf(buf, "\t%s: %s\n", function(pc), source(lines, line)) + } + return buf.Bytes() +} + +// source returns a space-trimmed slice of the n'th line. +func source(lines [][]byte, n int) []byte { + n-- // in stack trace, lines are 1-indexed but our array is 0-indexed + if n < 0 || n >= len(lines) { + return dunno + } + return bytes.TrimSpace(lines[n]) +} + +// function returns, if possible, the name of the function containing the PC. +func function(pc uintptr) []byte { + fn := runtime.FuncForPC(pc) + if fn == nil { + return dunno + } + name := []byte(fn.Name()) + // The name includes the path name to the package, which is unnecessary + // since the file name is already included. Plus, it has center dots. + // That is, we see + // runtime/debug.*T·ptrmethod + // and want + // *T.ptrmethod + // Also the package path might contains dot (e.g. code.google.com/...), + // so first eliminate the path prefix + if lastslash := bytes.LastIndex(name, slash); lastslash >= 0 { + name = name[lastslash+1:] + } + if period := bytes.Index(name, dot); period >= 0 { + name = name[period+1:] + } + name = bytes.Replace(name, centerDot, dot, -1) + return name +} diff --git a/types/sample.go b/types/sample.go new file mode 100644 index 00000000..41f17a6d --- /dev/null +++ b/types/sample.go @@ -0,0 +1,67 @@ +package types + +import ( + "log" + "reflect" + "time" + + "flashcat.cloud/categraf/pkg/conv" + "github.com/toolkits/pkg/container/list" +) + +type Sample struct { + Metric string `json:"metric"` + Timestamp time.Time `json:"timestamp"` + Value float64 `json:"value"` + Labels map[string]string `json:"labels"` +} + +func NewSample(metric string, value interface{}, labels ...map[string]string) *Sample { + floatValue, err := conv.ToFloat64(value) + if err != nil { + log.Printf("E! can not convert value type %v to float: %v\n", reflect.TypeOf(value), err) + return nil + } + + s := &Sample{ + Metric: metric, + Value: floatValue, + Labels: make(map[string]string), + } + + for i := 0; i < len(labels); i++ { + for k, v := range labels[i] { + if v == "-" { + continue + } + s.Labels[k] = v + } + } + + return s +} + +func NewSamples(fields map[string]interface{}, labels ...map[string]string) []*Sample { + count := len(fields) + samples := make([]*Sample, 0, count) + + for metric, value := range fields { + floatValue, err := conv.ToFloat64(value) + if err != nil { + continue + } + samples = append(samples, NewSample(metric, floatValue, labels...)) + } + + return samples +} + +func PushSamples(slist *list.SafeList, fields map[string]interface{}, labels ...map[string]string) { + for metric, value := range fields { + floatValue, err := conv.ToFloat64(value) + if err != nil { + continue + } + slist.PushFront(NewSample(metric, floatValue, labels...)) + } +} diff --git a/types/types.go b/types/types.go deleted file mode 100644 index 170f7dd7..00000000 --- a/types/types.go +++ /dev/null @@ -1,10 +0,0 @@ -package types - -import "time" - -type Sample struct { - Metric string `json:"metric"` - Timestamp time.Time `json:"timestamp"` - Value float64 `json:"value"` - Labels map[string]string `json:"labels"` -} From 870b6be01a548e7396bb949c0b7abc9f39e90795 Mon Sep 17 00:00:00 2001 From: yushuangyu <yushuangyu@flashcat.cloud> Date: Mon, 4 Jul 2022 15:37:31 +0800 Subject: [PATCH 2/6] add kafka alerts and dashboard conf --- README.md | 6 +- conf/input.kafka/kafka.toml | 77 +++++++- inputs/kafka/README.md | 13 +- inputs/kafka/alerts.json | 72 ++++++++ inputs/kafka/dashboard.json | 360 ++++++++++++++++++++++++++++++++++++ inputs/kafka/kafka.go | 12 +- traces/README.md | 13 ++ 7 files changed, 536 insertions(+), 17 deletions(-) create mode 100644 traces/README.md diff --git a/README.md b/README.md index c20660bf..b73f202b 100644 --- a/README.md +++ b/README.md @@ -101,7 +101,7 @@ Click on the links to see the README of each plugin. - [ ] mongodb - [ ] rocketmq - [ ] activemq -- [ ] kafka +- [x] [kafka](inputs/kafka) - [x] [elasticsearch](inputs/elasticsearch) - [x] windows - [ ] mssql @@ -117,12 +117,12 @@ Click on the links to see the README of each plugin. - [ ] ipmi - [ ] smartctl - [ ] logging -- [ ] trace +- [x] [traces](traces) ## Thanks -Categraf is developed on the basis of Telegraf and Exporters. Thanks to the great open source community. +Categraf is developed on the basis of Telegraf, Exporters and the OpenTelemetry. Thanks to the great open source community. ## Community diff --git a/conf/input.kafka/kafka.toml b/conf/input.kafka/kafka.toml index 8b4e5132..02e16209 100644 --- a/conf/input.kafka/kafka.toml +++ b/conf/input.kafka/kafka.toml @@ -8,5 +8,78 @@ # interval_times = 1 # append some labels to metrics -labels = { cluster="cloud-n9e-kafka" } -kafka_uris = ["127.0.0.1:9092","127.0.0.1:9092","127.0.0.1:9092"] \ No newline at end of file +# instance is a preferred tag with the cluster name. If none is provided, the first of kafka_uris will be used +labels = { instance="kafka-cluster-01" } + +# log level only for kafka exporter +log_level = "error" + +# Address (host:port) of Kafka server. +kafka_uris = ["127.0.0.1:9092","127.0.0.1:9092","127.0.0.1:9092"] + +# Connect using SASL/PLAIN +# Default is false +# use_sasl = false + +# Only set this to false if using a non-Kafka SASL proxy +# Default is true +# use_sasl_handshake = false + +# SASL user name +# sasl_username = "username" + +# SASL user password +# sasl_password = "password" + +# The SASL SCRAM SHA algorithm sha256 or sha512 as mechanism +# sasl_mechanism = "" + +# Connect using TLS +# use_tls = false + +# The optional certificate authority file for TLS client authentication +# ca_file = "" + +# The optional certificate file for TLS client authentication +# cert_file = "" + +# The optional key file for TLS client authentication +# key_file = "" + +# If true, the server's certificate will not be checked for validity. This will make your HTTPS connections insecure +# insecure_skip_verify = true + +# Kafka broker version +# Default is 2.0.0 +# kafka_version = "2.0.0" + +# if you need to use a group from zookeeper +# Default is false +# use_zookeeper_lag = false + +# Address array (hosts) of zookeeper server. +# zookeeper_uris = [] + +# Metadata refresh interval +# Default is 1s +# metadata_refresh_interval = "1m" + +# If true, all scrapes will trigger kafka operations otherwise, they will share results. WARN: This should be disabled on large clusters +# Default is false +# allow_concurrency = false + +# Maximum number of offsets to store in the interpolation table for a partition +# Default is 1000 +# max_offsets = 1000 + +# How frequently should the interpolation table be pruned, in seconds. +# Default is 30 +# prune_interval_seconds = 30 + +# Regex filter for topics to be monitored +# Default is ".*" +# topics_filter_regex = ".*" + +# Regex filter for consumer groups to be monitored +# Default is ".*" +# groups_filter_regex = ".*" \ No newline at end of file diff --git a/inputs/kafka/README.md b/inputs/kafka/README.md index 9dfbba26..e8a89d37 100644 --- a/inputs/kafka/README.md +++ b/inputs/kafka/README.md @@ -1,18 +1,11 @@ # kafka -kafka 监控采集插件,封装kafka-exporter(https://github.com/davidmparrott/kafka_exporter)而来 +kafka 监控采集插件,由kafka-exporter(https://github.com/davidmparrott/kafka_exporter)封装而来。 ## Configuration -```toml -# # collect interval -# interval = 15 - -# 要监控 MySQL,首先要给出要监控的MySQL的连接地址、用户名、密码 -[[instances]] - -``` +请参考配置[示例](../../conf/input.kafka/kafka.toml) ## 监控大盘和告警规则 -本 README 的同级目录,大家可以看到 dashboard.json 就是监控大盘,导入夜莺就可以使用,alerts.json 是告警规则,也是导入夜莺就可以使用。 \ No newline at end of file +同级目录下的 dashboard.json、alerts.json 可以直接导入夜莺使用。 \ No newline at end of file diff --git a/inputs/kafka/alerts.json b/inputs/kafka/alerts.json index e69de29b..3bd622ec 100644 --- a/inputs/kafka/alerts.json +++ b/inputs/kafka/alerts.json @@ -0,0 +1,72 @@ +[ + { + "name": "数据有丢失风险-副本数小于3", + "note": "", + "prod": "", + "algorithm": "", + "algo_params": null, + "delay": 0, + "severity": 2, + "disabled": 1, + "prom_for_duration": 60, + "prom_ql": "sum(kafka_topic_partition_in_sync_replica) by (topic) < 3", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "notify_max_number": 0, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [ + "service=kafka" + ] + }, + { + "name": "消费能力不足-延迟超过5分钟", + "note": "", + "prod": "", + "algorithm": "", + "algo_params": null, + "delay": 0, + "severity": 2, + "disabled": 1, + "prom_for_duration": 60, + "prom_ql": "kafka_consumer_lag_millis / 1000 > 300", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "notify_max_number": 0, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [ + "service=kafka" + ] + } + ] \ No newline at end of file diff --git a/inputs/kafka/dashboard.json b/inputs/kafka/dashboard.json index e69de29b..58adf922 100644 --- a/inputs/kafka/dashboard.json +++ b/inputs/kafka/dashboard.json @@ -0,0 +1,360 @@ +{ + "name": "Kafka - 模板", + "tags": "Kafka Prometheus ", + "configs": { + "var": [ + { + "name": "instance", + "definition": "label_values(kafka_brokers, instance)", + "type": "query" + } + ], + "panels": [ + { + "id": "6fac3216-f9e2-45c5-8037-959ab3c98de5", + "type": "row", + "name": "overview", + "layout": { + "h": 1, + "w": 24, + "x": 0, + "y": 0, + "i": "6fac3216-f9e2-45c5-8037-959ab3c98de5", + "isResizable": false + }, + "collapsed": true, + "panels": [] + }, + { + "targets": [ + { + "refId": "A", + "expr": "kafka_brokers{instance=\"$instance\"}" + } + ], + "name": "brokers", + "custom": { + "textMode": "value", + "colorMode": "value", + "calc": "lastNotNull", + "colSpan": 1, + "textSize": { + "value": 50 + } + }, + "options": { + "standardOptions": {} + }, + "version": "2.0.0", + "type": "stat", + "layout": { + "h": 3, + "w": 6, + "x": 0, + "y": 1, + "i": "b10ab025-2795-4ea1-b537-d03948324ea8", + "isResizable": true + }, + "id": "b10ab025-2795-4ea1-b537-d03948324ea8" + }, + { + "targets": [ + { + "refId": "A", + "expr": "count(count by (topic) (kafka_topic_partitions{instance=\"$instance\"}))" + } + ], + "name": "topics", + "custom": { + "textMode": "value", + "colorMode": "value", + "calc": "lastNotNull", + "colSpan": 1, + "textSize": { + "value": 50 + } + }, + "options": { + "standardOptions": {} + }, + "version": "2.0.0", + "type": "stat", + "layout": { + "h": 3, + "w": 6, + "x": 6, + "y": 1, + "i": "8845f449-cb6c-4fa6-9930-351f106f9e52", + "isResizable": true + }, + "id": "8845f449-cb6c-4fa6-9930-351f106f9e52" + }, + { + "targets": [ + { + "refId": "A", + "expr": "sum(kafka_topic_partitions{instance=\"$instance\"})", + "legend": "" + } + ], + "name": "partitions", + "custom": { + "textMode": "value", + "colorMode": "value", + "calc": "lastNotNull", + "colSpan": 1, + "textSize": { + "value": 50 + } + }, + "options": { + "standardOptions": {} + }, + "version": "2.0.0", + "type": "stat", + "layout": { + "h": 3, + "w": 6, + "x": 12, + "y": 1, + "i": "cc26ea7b-8860-45cd-9f62-90f42bd195f5", + "isResizable": true + }, + "id": "cc26ea7b-8860-45cd-9f62-90f42bd195f5" + }, + { + "targets": [ + { + "refId": "A", + "expr": "sum(kafka_topic_partition_replicas{instance=~\"$instance\"})" + } + ], + "name": "Replicas", + "custom": { + "textMode": "valueAndName", + "colorMode": "value", + "calc": "lastNotNull", + "colSpan": 1, + "textSize": {} + }, + "options": { + "standardOptions": {} + }, + "version": "2.0.0", + "type": "stat", + "layout": { + "h": 3, + "w": 6, + "x": 18, + "y": 1, + "i": "98cd9cee-69ad-4533-9eed-e307a24fffa6", + "isResizable": true + }, + "id": "98cd9cee-69ad-4533-9eed-e307a24fffa6" + }, + { + "id": "79a8e48a-fdf0-4c7e-bae4-478f7b294751", + "type": "row", + "name": "throughput", + "layout": { + "h": 1, + "w": 24, + "x": 0, + "y": 4, + "i": "79a8e48a-fdf0-4c7e-bae4-478f7b294751", + "isResizable": false + } + }, + { + "targets": [ + { + "expr": "sum(rate(kafka_topic_partition_current_offset{instance=\"$instance\"}[1m])) by (topic)" + } + ], + "name": "Messages produced per second", + "options": { + "tooltip": { + "mode": "all", + "sort": "desc" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": {}, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 7, + "w": 8, + "x": 0, + "y": 5, + "i": "3ceedd68-54d0-44db-9390-ceb2299619e5", + "isResizable": true + }, + "id": "3ceedd68-54d0-44db-9390-ceb2299619e5" + }, + { + "targets": [ + { + "expr": "sum(rate(kafka_consumergroup_current_offset{instance=\"$instance\"}[1m])) by (topic)" + } + ], + "name": "Messages consumed per second", + "options": { + "tooltip": { + "mode": "all", + "sort": "desc" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": {}, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 7, + "w": 8, + "x": 8, + "y": 5, + "i": "a43a7752-00e6-41fb-9055-e04d17e22d99", + "isResizable": true + }, + "id": "a43a7752-00e6-41fb-9055-e04d17e22d99" + }, + { + "targets": [ + { + "expr": "sum(kafka_consumer_lag_millis{instance=\"$instance\"}) by (consumergroup, topic) ", + "legend": "{{consumergroup}} (topic: {{topic}})" + } + ], + "name": "Latency by Consumer Group", + "options": { + "tooltip": { + "mode": "all", + "sort": "desc" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": { + "util": "milliseconds" + }, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 7, + "w": 8, + "x": 16, + "y": 5, + "i": "422193ca-facf-450c-b7cb-4975447f3ffc", + "isResizable": true + }, + "id": "422193ca-facf-450c-b7cb-4975447f3ffc" + }, + { + "id": "e85fc913-f075-4284-a9bc-75e481039372", + "type": "row", + "name": "patition/replicate", + "layout": { + "h": 1, + "w": 24, + "x": 0, + "y": 12, + "i": "e85fc913-f075-4284-a9bc-75e481039372", + "isResizable": false + } + }, + { + "targets": [ + { + "refId": "A", + "expr": "kafka_topic_partitions{instance=\"$instance\"}", + "legend": "{{topic}}" + } + ], + "name": "Partitions per Topic", + "custom": { + "showHeader": true, + "calc": "lastNotNull", + "displayMode": "seriesToRows" + }, + "options": { + "standardOptions": {} + }, + "overrides": [ + {} + ], + "version": "2.0.0", + "type": "table", + "layout": { + "h": 7, + "w": 12, + "x": 0, + "y": 13, + "i": "8d50a6ec-9dde-4239-830d-d3568d0e8748", + "isResizable": true + }, + "id": "8d50a6ec-9dde-4239-830d-d3568d0e8748" + }, + { + "targets": [ + { + "refId": "A", + "expr": "kafka_topic_partition_under_replicated_partition", + "legend": "{{topic}}-{{partition}}" + } + ], + "name": "Partitions Under Replicated", + "description": "副本不同步预案\n1. Restart the Zookeeper leader.\n2. Restart the broker\\brokers that are not replicating some of the partitions.", + "custom": { + "showHeader": true, + "colorMode": "value", + "calc": "lastNotNull", + "displayMode": "seriesToRows" + }, + "options": { + "standardOptions": {} + }, + "overrides": [ + {} + ], + "version": "2.0.0", + "type": "table", + "layout": { + "h": 7, + "w": 12, + "x": 12, + "y": 13, + "i": "9aaf3255-8281-47f3-996b-8585e0f68c05", + "isResizable": true + }, + "id": "9aaf3255-8281-47f3-996b-8585e0f68c05" + } + ] + } +} \ No newline at end of file diff --git a/inputs/kafka/kafka.go b/inputs/kafka/kafka.go index 8ccc0c48..dd9a425f 100644 --- a/inputs/kafka/kafka.go +++ b/inputs/kafka/kafka.go @@ -180,10 +180,10 @@ func (ins *Instance) Init() error { ins.KafkaVersion = sarama.V2_0_0_0.String() } if len(ins.MetadataRefreshInterval) == 0 { - ins.MetadataRefreshInterval = "1s" + ins.MetadataRefreshInterval = "1m" } if ins.AllowConcurrent == nil { - flag := true + flag := false ins.AllowConcurrent = &flag } if ins.MaxOffsets <= 0 { @@ -198,6 +198,13 @@ func (ins *Instance) Init() error { if len(ins.GroupFilter) == 0 { ins.GroupFilter = ".*" } + if ins.Labels == nil { + ins.Labels = make(map[string]string) + } + _, ok := ins.Labels["instance"] + if !ok { + ins.Labels["instance"] = ins.KafkaURIs[0] + } options := exporter.Options{ Uri: ins.KafkaURIs, @@ -224,6 +231,7 @@ func (ins *Instance) Init() error { for k, v := range ins.Labels { encLabels = append(encLabels, fmt.Sprintf("%s=%s", k, v)) } + options.Labels = strings.Join(encLabels, ",") ins.l = level.NewFilter(klog.NewLogfmtLogger(klog.NewSyncWriter(os.Stderr)), levelFilter(ins.LogLevel)) diff --git a/traces/README.md b/traces/README.md new file mode 100644 index 00000000..58c96dd7 --- /dev/null +++ b/traces/README.md @@ -0,0 +1,13 @@ +# traces +Categraf simply wrapped the OpenTelemetry Collector, which means you can get a full support for recving data from and exporting to popular trace vendors, such as the Jaeger and Zipkin. + +We only support the common [components](../config/traces/components.go) as default. If you want more, simply add the new ones to [components.go](../config/traces/components.go), +and make sure you configure that in the conf. + +For more details, see the official docs: +- https://opentelemetry.io/docs/collector/getting-started +- https://github.com/open-telemetry/opentelemetry-collector + +## Configuration + +Here is the [examples](../conf/traces.yaml). \ No newline at end of file From d4970d7d905bbea0ef6a8f7f5d6c84023b0e323d Mon Sep 17 00:00:00 2001 From: yushuangyu <yushuangyu@flashcat.cloud> Date: Mon, 4 Jul 2022 15:47:53 +0800 Subject: [PATCH 3/6] update go mod --- go.mod | 6 +++--- go.sum | 54 ++++-------------------------------------------------- 2 files changed, 7 insertions(+), 53 deletions(-) diff --git a/go.mod b/go.mod index 957f4e8b..38918c8d 100644 --- a/go.mod +++ b/go.mod @@ -9,7 +9,7 @@ require ( github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf github.com/docker/docker v20.10.16+incompatible github.com/gaochao1/sw v1.0.0 - github.com/go-kit/kit v0.11.0 + github.com/go-kit/log v0.2.0 github.com/go-ping/ping v0.0.0-20211130115550-779d1e919534 github.com/go-redis/redis/v8 v8.11.5 github.com/go-sql-driver/mysql v1.6.0 @@ -93,7 +93,7 @@ require ( github.com/felixge/httpsnoop v1.0.2 // indirect github.com/freedomkk-qfeng/go-fastping v0.0.0-20160109021039-d7bb493dee3e // indirect github.com/fsnotify/fsnotify v1.5.4 // indirect - github.com/go-kit/log v0.2.0 // indirect + github.com/go-kit/kit v0.11.0 // indirect github.com/go-logfmt/logfmt v0.5.1 // indirect github.com/go-logr/logr v1.2.3 // indirect github.com/go-logr/stdr v1.2.2 // indirect @@ -204,6 +204,6 @@ require ( ) replace ( + github.com/prometheus/client_golang => github.com/flashcatcloud/client_golang v1.12.2-0.20220704074148-3b31f0c90903 go.opentelemetry.io/collector => github.com/flashcatcloud/opentelemetry-collector v0.54.1-0.20220628041301-3b8dabd1bcd0 - github.com/prometheus/client_golang => ../../flashcatcloud/client_golang ) diff --git a/go.sum b/go.sum index 41e17a51..6edf4769 100644 --- a/go.sum +++ b/go.sum @@ -90,11 +90,8 @@ github.com/VividCortex/gohistogram v1.0.0 h1:6+hBz+qvs0JOrrNhhmR7lFxo5sINxBCGXrd github.com/VividCortex/gohistogram v1.0.0/go.mod h1:Pf5mBqqDxYaXu3hDrrU+w6nw50o/4+TcAqDqk/vUH7g= github.com/afex/hystrix-go v0.0.0-20180502004556-fa1af6a1f4f5/go.mod h1:SkGFH1ia65gfNATL8TAiHDNxPzPdmEL5uirI2Uyuz6c= github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw= -github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= -github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= -github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho= github.com/aliyun/aliyun-log-go-sdk v0.1.36 h1:ZWhKl1tBOvRn3/sIrFy8gMX+Hfq2u8mee1DeU96jqjc= github.com/aliyun/aliyun-log-go-sdk v0.1.36/go.mod h1:1QQ59pEJiVVXqKgbHcU6FWIgxT5RKBt+CT8AiQ2bEts= github.com/alouca/gologger v0.0.0-20120904114645-7d4b7291de9c h1:k/7/05/5kPRX7HaKyVYlsGVX6XkFTyYLqkqHzceUVlU= @@ -135,8 +132,6 @@ github.com/aws/smithy-go v1.8.0/go.mod h1:SObp3lf9smib00L/v3U2eAKG8FyQ7iLrJnQiAm github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= github.com/benbjohnson/clock v1.3.0 h1:ip6w0uFQkncKQ979AypyG0ER7mqUSBdKLOgAle/AT8A= github.com/benbjohnson/clock v1.3.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= -github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= -github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= @@ -235,6 +230,8 @@ github.com/fatih/structs v1.1.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga github.com/felixge/httpsnoop v1.0.1/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/felixge/httpsnoop v1.0.2 h1:+nS9g82KMXccJ/wp0zyRW9ZBHFETmMGtkk+2CTTrW4o= github.com/felixge/httpsnoop v1.0.2/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= +github.com/flashcatcloud/client_golang v1.12.2-0.20220704074148-3b31f0c90903 h1:CfWf8xXOpjs1G8xsE2ZAueqtPynok/pS5VHWhlwgRrg= +github.com/flashcatcloud/client_golang v1.12.2-0.20220704074148-3b31f0c90903/go.mod h1:nDOYPpTKRWyFSHGWY5QbDUvjSMBusROfFzxhmDKUNWo= github.com/flashcatcloud/opentelemetry-collector v0.54.1-0.20220628041301-3b8dabd1bcd0 h1:RUknqWUJ7dTtK5rZ602dqYZyDVJiYlY3a8Dep2zuLjU= github.com/flashcatcloud/opentelemetry-collector v0.54.1-0.20220628041301-3b8dabd1bcd0/go.mod h1:FgNzyfb4sAGb5cqusB5znETJ8Pz4OQUBGbOeGIZ2rlQ= github.com/fogleman/gg v1.2.1-0.20190220221249-0403632d5b90/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= @@ -267,7 +264,6 @@ github.com/gin-gonic/gin v1.7.4/go.mod h1:jD2toBW3GZUr5UMcdrwQA10I7RuaFOl/SGeDjX github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= -github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= github.com/go-kit/kit v0.10.0/go.mod h1:xUsJbQ/Fp4kEt7AFgCuvyX4a71u8h9jB8tj/ORgOZ7o= github.com/go-kit/kit v0.11.0 h1:IGmIEl7aHTYh6E2HlT+ptILBotjo4xl8PMDl852etiI= @@ -276,7 +272,6 @@ github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vb github.com/go-kit/log v0.2.0 h1:7i2K3eKTos3Vc0enKCfnVcgHh2olr/MyfboYq7cAcFw= github.com/go-kit/log v0.2.0/go.mod h1:NwTd00d/i8cPZ3xOwwiv2PO5MOcx78fFErGNcVmBjv0= github.com/go-ldap/ldap v3.0.2+incompatible/go.mod h1:qfd9rJvER9Q0/D/Sqn1DfHRoBp40uXYvFoEVrNEPqRc= -github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= github.com/go-logfmt/logfmt v0.5.1 h1:otpy5pqBCBZ1ng9RQ0dPu4PN7ba75Y/aA+UpowDyNVA= @@ -315,7 +310,6 @@ github.com/godror/knownpb v0.1.0/go.mod h1:4nRFbQo1dDuwKnblRXDxrfCFYeT4hjg3GjMqe github.com/gogo/googleapis v1.1.0/go.mod h1:gf4bu3Q80BeJ6H1S1vYPm8/ELATdvryBaNFGgqEef3s= github.com/gogo/googleapis v1.4.1 h1:1Yx4Myt7BxzvUr5ldGSbwYiZG6t9wGBZ+8/fX3Wvtq0= github.com/gogo/googleapis v1.4.1/go.mod h1:2lpHqI5OcWCtVElxXnPt+s8oJvMpySlOyM6xDCrzib4= -github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v1.2.0/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= @@ -541,11 +535,8 @@ github.com/joho/godotenv v1.3.0 h1:Zjp+RcGpHhGlrMbJzXTrZZPrWj+1vfm90La1wgB6Bhc= github.com/joho/godotenv v1.3.0/go.mod h1:7hK45KPybAkOC6peb+G5yklZfMxEjkZhHbwpqxOKXbg= github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= -github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= github.com/json-iterator/go v1.1.7/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= -github.com/json-iterator/go v1.1.8/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= -github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= @@ -570,7 +561,6 @@ github.com/knadh/koanf v1.4.2/go.mod h1:4NCo0q4pmU398vF9vq2jStF9MWQZ8JEDcDMHlDCr github.com/koding/multiconfig v0.0.0-20171124222453-69c27309b2d7 h1:SWlt7BoQNASbhTUD0Oy5yysI2seJ7vWuGUp///OM4TM= github.com/koding/multiconfig v0.0.0-20171124222453-69c27309b2d7/go.mod h1:Y2SaZf2Rzd0pXkLVhLlCiAXFCLSXAIbTKDivVgff/AM= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= -github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg= github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= @@ -782,7 +772,6 @@ github.com/pierrec/lz4 v2.6.1+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi github.com/pierrec/lz4/v3 v3.3.4/go.mod h1:280XNCGS8jAcG++AHdd6SeWnzyJ1w9oow2vbORyey8Q= github.com/pierrec/lz4/v4 v4.1.14 h1:+fL8AQEZtz/ijeNnpduH0bROTu0O3NZAlPjQxGn8LwE= github.com/pierrec/lz4/v4 v4.1.14/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= -github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= @@ -796,40 +785,16 @@ github.com/posener/complete v1.2.3/go.mod h1:WZIdtGGp+qx0sLrYKtIRAruyNpv6hFCicSg github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c h1:ncq/mPwQF4JjgDlrVEn3C11VoGHZN7m8qihwgMEtzYw= github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g= -github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= -github.com/prometheus/client_golang v0.9.3-0.20190127221311-3c4408c8b829/go.mod h1:p2iRAGwDERtqlqzRXnrOVns+ignqQo//hLXqYxZYVNs= -github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= -github.com/prometheus/client_golang v1.3.0/go.mod h1:hJaj2vgQTGQmVCsAACORcieXFeDPbaTKGT+JTgUa3og= -github.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU= -github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M= -github.com/prometheus/client_golang v1.11.0/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0= -github.com/prometheus/client_golang v1.11.1/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0= -github.com/prometheus/client_golang v1.12.1/go.mod h1:3Z9XVyYiZYEO+YQWt3RD2R3jrbd179Rt297l4aS6nDY= -github.com/prometheus/client_golang v1.12.2 h1:51L9cDoUHVrXx4zWYlcLQIZ+d+VXHgqnYKkIuq4g/34= -github.com/prometheus/client_golang v1.12.2/go.mod h1:3Z9XVyYiZYEO+YQWt3RD2R3jrbd179Rt297l4aS6nDY= -github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= -github.com/prometheus/client_model v0.0.0-20190115171406-56726106282f/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/prometheus/client_model v0.1.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.2.0 h1:uq5h0d+GuxiXLJLNABMgp2qUWDPiLvgCzz2dUR+/W/M= github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/prometheus/common v0.2.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= -github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= -github.com/prometheus/common v0.7.0/go.mod h1:DjGbpBbp5NYNiECxcL/VnbXCCaQpKd3tt26CguLLsqA= github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8bs7vj7HSQ4= -github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo= -github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc= github.com/prometheus/common v0.28.0/go.mod h1:vu+V0TpY+O6vW9J44gczi3Ap/oXXR10b+M/gUGO4Hls= github.com/prometheus/common v0.32.1/go.mod h1:vu+V0TpY+O6vW9J44gczi3Ap/oXXR10b+M/gUGO4Hls= +github.com/prometheus/common v0.34.0/go.mod h1:gB3sOl7P0TvJabZpLY5uQMpUqRCPPCyRLCZYc7JZTNE= github.com/prometheus/common v0.35.0 h1:Eyr+Pw2VymWejHqCugNaQXkAi6KayVNxaHeu6khmFBE= github.com/prometheus/common v0.35.0/go.mod h1:phzohg0JFMnBEFGxTDbfu3QyL5GI8gTQJFhYO5B3mfA= -github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= -github.com/prometheus/procfs v0.0.0-20190117184657-bf6a532e95b1/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= -github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= -github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A= -github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU= -github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= github.com/prometheus/procfs v0.7.3 h1:4jVXhlkAyzOScmCkXBTOLRLTz8EeU+eyjrwB/EPq0VU= github.com/prometheus/procfs v0.7.3/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= github.com/prometheus/prometheus v2.5.0+incompatible h1:7QPitgO2kOFG8ecuRn9O/4L9+10He72rVRJvMXrE9Hg= @@ -870,9 +835,7 @@ github.com/shirou/w32 v0.0.0-20160930032740-bb4de0191aa4/go.mod h1:qsXQc7+bwAM3Q github.com/shopspring/decimal v1.3.1 h1:2Usl1nmF/WZucqkFZhnfFYxxxu8LG21F6nPQBE5gKV8= github.com/shopspring/decimal v1.3.1/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o= github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= -github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= -github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE= github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= @@ -1020,7 +983,6 @@ go.uber.org/zap v1.13.0/go.mod h1:zwrFLgMcdUuIBviXEYEH1YKNaOBnKXsx2IPda5bBwHM= go.uber.org/zap v1.17.0/go.mod h1:MXVU+bhUf/A7Xi2HNOnopQOrmycQ5Ih87HtOu4q5SSo= go.uber.org/zap v1.21.0 h1:WefMeulhovoZ2sYXz7st6K0sLj7bBhpiFaud4r4zST8= go.uber.org/zap v1.21.0/go.mod h1:wjWOCqI0f2ZZrJF/UufIOkiC8ii6tm1iqIsLo76RfJw= -golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20181029021203-45a5f77698d3/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190228161510-8dd112bcdc25/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= @@ -1086,7 +1048,6 @@ golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73r golang.org/x/net v0.0.0-20181201002055-351d144fa1fc/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190125091013-d26f9f9a57f3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= @@ -1170,11 +1131,9 @@ golang.org/x/sync v0.0.0-20210220032951-036812b2e83c h1:5KslGYwFpkhGh+Q16bwMP3cO golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181026203630-95b1ffbd15a5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181122145206-62eef0e2fa9b/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190129075346-302c3dd5f1cc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190130150945-aca44879d564/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -1201,10 +1160,8 @@ golang.org/x/sys v0.0.0-20191008105621-543471e840be/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191220142924-d4481acd189f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191220220014-0732a990476f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200106162015-b016eb3dc98e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200113162924-86b910548bc1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -1219,8 +1176,6 @@ golang.org/x/sys v0.0.0-20200501052902-10377860bb8e/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200511232937-7e40ca221e25/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200515095857-1151b9dac4a9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200523222454-059865788121/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200625212154-ddb9806d33ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200905004654-be1d3432aa8f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -1243,7 +1198,6 @@ golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210423185535-09eb48e85fd7/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210514084401-e8d321eab015/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210603125802-9665404d3644/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -1262,6 +1216,7 @@ golang.org/x/sys v0.0.0-20220227234510-4e6760a101f9/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220328115105-d36c6a25d886/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220412211240-33da011f77ad/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220429233432-b5fbb4746d32/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220610221304-9f5ed59c137d h1:Zu/JngovGLVi6t2J3nmAf3AoTDwuzw85YZ3b9o4yU7s= golang.org/x/sys v0.0.0-20220610221304-9f5ed59c137d/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= @@ -1563,7 +1518,6 @@ gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= From 5eabb7e85ac0d8c8b0282c19c3eddff2e9ec8221 Mon Sep 17 00:00:00 2001 From: yushuangyu <yushuangyu@flashcat.cloud> Date: Mon, 4 Jul 2022 16:09:42 +0800 Subject: [PATCH 4/6] fix build linux --- inputs/conntrack/conntrack.go | 1 + inputs/kernel/kernel.go | 1 + inputs/kernel_vmstat/kernel_vmstat.go | 1 + inputs/linux_sysctl_fs/linux_sysctl_fs_linux.go | 1 + 4 files changed, 4 insertions(+) diff --git a/inputs/conntrack/conntrack.go b/inputs/conntrack/conntrack.go index 36bad1b0..b2d18734 100644 --- a/inputs/conntrack/conntrack.go +++ b/inputs/conntrack/conntrack.go @@ -12,6 +12,7 @@ import ( "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/inputs" + "flashcat.cloud/categraf/types" "github.com/toolkits/pkg/container/list" ) diff --git a/inputs/kernel/kernel.go b/inputs/kernel/kernel.go index 2090f9ba..00db3cbc 100644 --- a/inputs/kernel/kernel.go +++ b/inputs/kernel/kernel.go @@ -13,6 +13,7 @@ import ( "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/inputs" + "flashcat.cloud/categraf/types" "github.com/toolkits/pkg/container/list" ) diff --git a/inputs/kernel_vmstat/kernel_vmstat.go b/inputs/kernel_vmstat/kernel_vmstat.go index 12463168..126c1bc0 100644 --- a/inputs/kernel_vmstat/kernel_vmstat.go +++ b/inputs/kernel_vmstat/kernel_vmstat.go @@ -12,6 +12,7 @@ import ( "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/inputs" + "flashcat.cloud/categraf/types" "github.com/toolkits/pkg/container/list" ) diff --git a/inputs/linux_sysctl_fs/linux_sysctl_fs_linux.go b/inputs/linux_sysctl_fs/linux_sysctl_fs_linux.go index 2a1c1273..15b973c0 100644 --- a/inputs/linux_sysctl_fs/linux_sysctl_fs_linux.go +++ b/inputs/linux_sysctl_fs/linux_sysctl_fs_linux.go @@ -14,6 +14,7 @@ import ( "flashcat.cloud/categraf/config" "flashcat.cloud/categraf/inputs" "flashcat.cloud/categraf/pkg/osx" + "flashcat.cloud/categraf/types" "github.com/toolkits/pkg/container/list" ) From 052520bcdba1e79d5c49665500637f053285ddbb Mon Sep 17 00:00:00 2001 From: yushuangyu <yushuangyu@flashcat.cloud> Date: Mon, 4 Jul 2022 19:09:42 +0800 Subject: [PATCH 5/6] update kafka package and cluster label --- conf/input.kafka/kafka.toml | 4 ++-- inputs/kafka/dashboard.json | 20 ++++++++++---------- inputs/kafka/kafka.go | 6 +++--- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/conf/input.kafka/kafka.toml b/conf/input.kafka/kafka.toml index 02e16209..9c31420d 100644 --- a/conf/input.kafka/kafka.toml +++ b/conf/input.kafka/kafka.toml @@ -8,8 +8,8 @@ # interval_times = 1 # append some labels to metrics -# instance is a preferred tag with the cluster name. If none is provided, the first of kafka_uris will be used -labels = { instance="kafka-cluster-01" } +# cluster is a preferred tag with the cluster name. If none is provided, the first of kafka_uris will be used +labels = { cluster="kafka-cluster-01" } # log level only for kafka exporter log_level = "error" diff --git a/inputs/kafka/dashboard.json b/inputs/kafka/dashboard.json index 58adf922..a9db1011 100644 --- a/inputs/kafka/dashboard.json +++ b/inputs/kafka/dashboard.json @@ -4,8 +4,8 @@ "configs": { "var": [ { - "name": "instance", - "definition": "label_values(kafka_brokers, instance)", + "name": "cluster", + "definition": "label_values(kafka_brokers, cluster)", "type": "query" } ], @@ -29,7 +29,7 @@ "targets": [ { "refId": "A", - "expr": "kafka_brokers{instance=\"$instance\"}" + "expr": "kafka_brokers{cluster=\"$cluster\"}" } ], "name": "brokers", @@ -61,7 +61,7 @@ "targets": [ { "refId": "A", - "expr": "count(count by (topic) (kafka_topic_partitions{instance=\"$instance\"}))" + "expr": "count(count by (topic) (kafka_topic_partitions{cluster=\"$cluster\"}))" } ], "name": "topics", @@ -93,7 +93,7 @@ "targets": [ { "refId": "A", - "expr": "sum(kafka_topic_partitions{instance=\"$instance\"})", + "expr": "sum(kafka_topic_partitions{cluster=\"$cluster\"})", "legend": "" } ], @@ -126,7 +126,7 @@ "targets": [ { "refId": "A", - "expr": "sum(kafka_topic_partition_replicas{instance=~\"$instance\"})" + "expr": "sum(kafka_topic_partition_replicas{cluster=~\"$cluster\"})" } ], "name": "Replicas", @@ -168,7 +168,7 @@ { "targets": [ { - "expr": "sum(rate(kafka_topic_partition_current_offset{instance=\"$instance\"}[1m])) by (topic)" + "expr": "sum(rate(kafka_topic_partition_current_offset{cluster=\"$cluster\"}[1m])) by (topic)" } ], "name": "Messages produced per second", @@ -204,7 +204,7 @@ { "targets": [ { - "expr": "sum(rate(kafka_consumergroup_current_offset{instance=\"$instance\"}[1m])) by (topic)" + "expr": "sum(rate(kafka_consumergroup_current_offset{cluster=\"$cluster\"}[1m])) by (topic)" } ], "name": "Messages consumed per second", @@ -240,7 +240,7 @@ { "targets": [ { - "expr": "sum(kafka_consumer_lag_millis{instance=\"$instance\"}) by (consumergroup, topic) ", + "expr": "sum(kafka_consumer_lag_millis{cluster=\"$cluster\"}) by (consumergroup, topic) ", "legend": "{{consumergroup}} (topic: {{topic}})" } ], @@ -293,7 +293,7 @@ "targets": [ { "refId": "A", - "expr": "kafka_topic_partitions{instance=\"$instance\"}", + "expr": "kafka_topic_partitions{cluster=\"$cluster\"}", "legend": "{{topic}}" } ], diff --git a/inputs/kafka/kafka.go b/inputs/kafka/kafka.go index dd9a425f..6da87129 100644 --- a/inputs/kafka/kafka.go +++ b/inputs/kafka/kafka.go @@ -1,4 +1,4 @@ -package tpl +package kafka import ( "fmt" @@ -201,9 +201,9 @@ func (ins *Instance) Init() error { if ins.Labels == nil { ins.Labels = make(map[string]string) } - _, ok := ins.Labels["instance"] + _, ok := ins.Labels["cluster"] if !ok { - ins.Labels["instance"] = ins.KafkaURIs[0] + ins.Labels["cluster"] = ins.KafkaURIs[0] } options := exporter.Options{ From c63c9411030f45c08f071154f8f4cd475f0d08c4 Mon Sep 17 00:00:00 2001 From: yushuangyu <yushuangyu@flashcat.cloud> Date: Mon, 4 Jul 2022 19:38:13 +0800 Subject: [PATCH 6/6] update kafka dashboard --- inputs/kafka/dashboard.json | 78 +++++++++++++++++++------------------ 1 file changed, 40 insertions(+), 38 deletions(-) diff --git a/inputs/kafka/dashboard.json b/inputs/kafka/dashboard.json index a9db1011..b2666468 100644 --- a/inputs/kafka/dashboard.json +++ b/inputs/kafka/dashboard.json @@ -1,6 +1,6 @@ { "name": "Kafka - 模板", - "tags": "Kafka Prometheus ", + "tags": "Kafka Prometheus", "configs": { "var": [ { @@ -9,9 +9,10 @@ "type": "query" } ], + "version": "2.0.0", "panels": [ { - "id": "6fac3216-f9e2-45c5-8037-959ab3c98de5", + "id": "51502c3a-dd6f-41c7-b8f1-87b88826c96e", "type": "row", "name": "overview", "layout": { @@ -19,11 +20,10 @@ "w": 24, "x": 0, "y": 0, - "i": "6fac3216-f9e2-45c5-8037-959ab3c98de5", + "i": "51502c3a-dd6f-41c7-b8f1-87b88826c96e", "isResizable": false }, - "collapsed": true, - "panels": [] + "collapsed": true }, { "targets": [ @@ -52,10 +52,10 @@ "w": 6, "x": 0, "y": 1, - "i": "b10ab025-2795-4ea1-b537-d03948324ea8", + "i": "e2c1d271-ec43-4821-aa19-451e856af755", "isResizable": true }, - "id": "b10ab025-2795-4ea1-b537-d03948324ea8" + "id": "e2c1d271-ec43-4821-aa19-451e856af755" }, { "targets": [ @@ -84,17 +84,16 @@ "w": 6, "x": 6, "y": 1, - "i": "8845f449-cb6c-4fa6-9930-351f106f9e52", + "i": "fd3a0b9f-fd67-4360-a94c-869fee7b5b98", "isResizable": true }, - "id": "8845f449-cb6c-4fa6-9930-351f106f9e52" + "id": "fd3a0b9f-fd67-4360-a94c-869fee7b5b98" }, { "targets": [ { "refId": "A", - "expr": "sum(kafka_topic_partitions{cluster=\"$cluster\"})", - "legend": "" + "expr": "sum(kafka_topic_partitions{cluster=\"$cluster\"})" } ], "name": "partitions", @@ -117,16 +116,16 @@ "w": 6, "x": 12, "y": 1, - "i": "cc26ea7b-8860-45cd-9f62-90f42bd195f5", + "i": "e228d857-746b-41b6-8d2d-0152453c46f4", "isResizable": true }, - "id": "cc26ea7b-8860-45cd-9f62-90f42bd195f5" + "id": "e228d857-746b-41b6-8d2d-0152453c46f4" }, { "targets": [ { "refId": "A", - "expr": "sum(kafka_topic_partition_replicas{cluster=~\"$cluster\"})" + "expr": "sum(kafka_topic_partition_replicas{cluster=\"$cluster\"})" } ], "name": "Replicas", @@ -147,13 +146,13 @@ "w": 6, "x": 18, "y": 1, - "i": "98cd9cee-69ad-4533-9eed-e307a24fffa6", + "i": "85438099-8d6b-4817-b9b9-1d0ed36029cd", "isResizable": true }, - "id": "98cd9cee-69ad-4533-9eed-e307a24fffa6" + "id": "85438099-8d6b-4817-b9b9-1d0ed36029cd" }, { - "id": "79a8e48a-fdf0-4c7e-bae4-478f7b294751", + "id": "0db4aac4-86cf-44cd-950e-6c6a99be8ff4", "type": "row", "name": "throughput", "layout": { @@ -161,9 +160,10 @@ "w": 24, "x": 0, "y": 4, - "i": "79a8e48a-fdf0-4c7e-bae4-478f7b294751", + "i": "0db4aac4-86cf-44cd-950e-6c6a99be8ff4", "isResizable": false - } + }, + "collapsed": true }, { "targets": [ @@ -175,7 +175,7 @@ "options": { "tooltip": { "mode": "all", - "sort": "desc" + "sort": "none" }, "legend": { "displayMode": "hidden" @@ -196,10 +196,10 @@ "w": 8, "x": 0, "y": 5, - "i": "3ceedd68-54d0-44db-9390-ceb2299619e5", + "i": "c2ec4036-3081-45cc-b672-024c6df93833", "isResizable": true }, - "id": "3ceedd68-54d0-44db-9390-ceb2299619e5" + "id": "c2ec4036-3081-45cc-b672-024c6df93833" }, { "targets": [ @@ -211,7 +211,7 @@ "options": { "tooltip": { "mode": "all", - "sort": "desc" + "sort": "none" }, "legend": { "displayMode": "hidden" @@ -232,15 +232,15 @@ "w": 8, "x": 8, "y": 5, - "i": "a43a7752-00e6-41fb-9055-e04d17e22d99", + "i": "7ad651a6-c12c-4d46-8d01-749fa776faef", "isResizable": true }, - "id": "a43a7752-00e6-41fb-9055-e04d17e22d99" + "id": "7ad651a6-c12c-4d46-8d01-749fa776faef" }, { "targets": [ { - "expr": "sum(kafka_consumer_lag_millis{cluster=\"$cluster\"}) by (consumergroup, topic) ", + "expr": "sum(kafka_consumer_lag_millis{cluster=\"$cluster\"}) by (consumergroup, topic)", "legend": "{{consumergroup}} (topic: {{topic}})" } ], @@ -248,13 +248,13 @@ "options": { "tooltip": { "mode": "all", - "sort": "desc" + "sort": "none" }, "legend": { "displayMode": "hidden" }, "standardOptions": { - "util": "milliseconds" + "util": "humantimeMilliseconds" }, "thresholds": {} }, @@ -271,13 +271,13 @@ "w": 8, "x": 16, "y": 5, - "i": "422193ca-facf-450c-b7cb-4975447f3ffc", + "i": "855aa8f5-0c51-42d4-b9a4-5460b7cd0f5a", "isResizable": true }, - "id": "422193ca-facf-450c-b7cb-4975447f3ffc" + "id": "855aa8f5-0c51-42d4-b9a4-5460b7cd0f5a" }, { - "id": "e85fc913-f075-4284-a9bc-75e481039372", + "id": "20166830-7f85-4665-8f39-bf904267af29", "type": "row", "name": "patition/replicate", "layout": { @@ -285,9 +285,10 @@ "w": 24, "x": 0, "y": 12, - "i": "e85fc913-f075-4284-a9bc-75e481039372", + "i": "20166830-7f85-4665-8f39-bf904267af29", "isResizable": false - } + }, + "collapsed": true }, { "targets": [ @@ -300,6 +301,7 @@ "name": "Partitions per Topic", "custom": { "showHeader": true, + "colorMode": "value", "calc": "lastNotNull", "displayMode": "seriesToRows" }, @@ -316,16 +318,16 @@ "w": 12, "x": 0, "y": 13, - "i": "8d50a6ec-9dde-4239-830d-d3568d0e8748", + "i": "8837a52e-c9eb-4afa-acc1-c3a5dac72d3b", "isResizable": true }, - "id": "8d50a6ec-9dde-4239-830d-d3568d0e8748" + "id": "8837a52e-c9eb-4afa-acc1-c3a5dac72d3b" }, { "targets": [ { "refId": "A", - "expr": "kafka_topic_partition_under_replicated_partition", + "expr": "kafka_topic_partition_under_replicated_partition{cluster=\"$cluster\"}", "legend": "{{topic}}-{{partition}}" } ], @@ -350,10 +352,10 @@ "w": 12, "x": 12, "y": 13, - "i": "9aaf3255-8281-47f3-996b-8585e0f68c05", + "i": "dd615767-dda7-4da6-b37f-0d484553aac6", "isResizable": true }, - "id": "9aaf3255-8281-47f3-996b-8585e0f68c05" + "id": "dd615767-dda7-4da6-b37f-0d484553aac6" } ] }