diff --git a/agent/agent.go b/agent/agent.go index 8103eb4f..d34454c7 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -23,6 +23,7 @@ import ( _ "flashcat.cloud/categraf/inputs/linux_sysctl_fs" _ "flashcat.cloud/categraf/inputs/logstash" _ "flashcat.cloud/categraf/inputs/mem" + _ "flashcat.cloud/categraf/inputs/mongodb" _ "flashcat.cloud/categraf/inputs/mysql" _ "flashcat.cloud/categraf/inputs/net" _ "flashcat.cloud/categraf/inputs/net_response" diff --git a/conf/input.mongodb/mongodb.toml b/conf/input.mongodb/mongodb.toml new file mode 100644 index 00000000..888bff0b --- /dev/null +++ b/conf/input.mongodb/mongodb.toml @@ -0,0 +1,56 @@ +[[instances]] +# log level, enum: panic, fatal, error, warn, warning, info, debug, trace, defaults to info. +log_level = "info" +# append some const labels to metrics +# NOTICE! the instance label is required for dashboards +labels = { instance="mongo-cluster-01" } + +# mongodb dsn, see https://www.mongodb.com/docs/manual/reference/connection-string/ +mongodb_uri = "mongodb://127.0.0.1:27017" +# if you don't specify the username or password in the mongodb_uri, you can set here. +# This will overwrite the dsn, it would be helpful when special characters existing in the username or password and you don't want to encode them. +# NOTICE! this user must be granted enough rights to query needed stats, see ../inputs/mongodb/README.md +username = "username@Bj" +password = "password@Bj" +# if set to true, use the direct connection way +# direct_connect = true + +# collect all means you collect all the metrics, if set, all below enable_xxx flags in this section will be ignored +collect_all = true +# if set to true, collect databases metrics +# enable_db_stats = true +# if set to true, collect getDiagnosticData metrics +# enable_diagnostic_data = true +# if set to true, collect replSetGetStatus metrics +# enable_replicaset_status = true +# if set to true, collect top metrics by admin command +# enable_top_metrics = true +# if set to true, collect index metrics. You should specify one of the coll_stats_namespaces and the discovering_mode flags. +# enable_index_stats = true +# if set to true, collect collections metrics. You should specify one of the coll_stats_namespaces and the discovering_mode flags. +# enable_coll_stats = true + +# Only get stats for the collections matching this list of namespaces. if none set, discovering_mode will be enabled. +# Example: db1.col1,db.col1 +# coll_stats_namespaces = [] +# Only get stats for index with the collections matching this list of namespaces. +# Example: db1.col1,db.col1 +# index_stats_collections = [] +# if set to true, replace -1 to DESC for label key_name of the descending_index metrics +# enable_override_descending_index = true + +# which exposes metrics with 0.1x compatible metric names has been implemented which simplifies migration from the old version to the current version. +# compatible_mode = true + + +# [[instances]] +# # interval = global.interval * interval_times +# interval_times = 1 + +# log_level = "error" + +# append some labels to metrics +# labels = { instance="mongo-cluster-02" } +# mongodb_uri = "mongodb://username:password@127.0.0.1:27017" +# collect_all = true +# compatible_mode = true \ No newline at end of file diff --git a/go.mod b/go.mod index 2f78aa5c..62b2e0a7 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module flashcat.cloud/categraf go 1.18 require ( + github.com/AlekSi/pointer v1.2.0 github.com/ClickHouse/clickhouse-go/v2 v2.0.15 github.com/Shopify/sarama v1.34.1 github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137 @@ -42,15 +43,19 @@ require ( github.com/open-telemetry/opentelemetry-collector-contrib/receiver/kafkareceiver v0.54.0 github.com/open-telemetry/opentelemetry-collector-contrib/receiver/zipkinreceiver v0.54.0 github.com/patrickmn/go-cache v2.1.0+incompatible + github.com/percona/percona-toolkit v0.0.0-20211210121818-b2860eee3152 + github.com/pkg/errors v0.9.1 github.com/prometheus/client_golang v1.12.2 github.com/prometheus/client_model v0.2.0 github.com/prometheus/common v0.35.0 github.com/prometheus/prometheus v0.36.2 github.com/shirou/gopsutil/v3 v3.22.5 + github.com/sirupsen/logrus v1.8.1 github.com/stretchr/testify v1.7.4 github.com/toolkits/pkg v1.3.0 github.com/ulricqin/gosnmp v0.0.1 github.com/xdg/scram v1.0.5 + go.mongodb.org/mongo-driver v1.9.1 go.opentelemetry.io/collector v0.54.0 go.opentelemetry.io/otel/metric v0.30.0 go.opentelemetry.io/otel/trace v1.7.0 @@ -206,7 +211,6 @@ require ( github.com/pelletier/go-toml/v2 v2.0.0-beta.8 // indirect github.com/pierrec/lz4 v2.6.1+incompatible // indirect github.com/pierrec/lz4/v4 v4.1.14 // indirect - github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect github.com/prometheus/alertmanager v0.24.0 // indirect @@ -219,9 +223,9 @@ require ( github.com/rs/cors v1.8.2 // indirect github.com/samuel/go-zookeeper v0.0.0-20190923202752-2cc03de413da // indirect github.com/scaleway/scaleway-sdk-go v1.0.0-beta.9 // indirect + github.com/shirou/gopsutil v3.21.11+incompatible // indirect github.com/shopspring/decimal v1.3.1 // indirect github.com/shurcooL/httpfs v0.0.0-20190707220628-8d4bc4ba7749 // indirect - github.com/sirupsen/logrus v1.8.1 // indirect github.com/spf13/afero v1.8.2 // indirect github.com/spf13/cast v1.5.0 // indirect github.com/spf13/cobra v1.4.0 // indirect @@ -240,8 +244,8 @@ require ( github.com/xdg-go/scram v1.1.1 // indirect github.com/xdg-go/stringprep v1.0.3 // indirect github.com/xdg/stringprep v1.0.3 // indirect + github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d // indirect github.com/yusufpapurcu/wmi v1.2.2 // indirect - go.mongodb.org/mongo-driver v1.8.3 // indirect go.opencensus.io v0.23.0 // indirect go.opentelemetry.io/collector/pdata v0.54.0 // indirect go.opentelemetry.io/collector/semconv v0.54.0 // indirect @@ -269,6 +273,7 @@ require ( google.golang.org/protobuf v1.28.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/ini.v1 v1.66.4 // indirect + gopkg.in/mgo.v2 v2.0.0-20190816093944-a6b53ec6cb22 // indirect gopkg.in/natefinch/lumberjack.v2 v2.0.0 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gotest.tools/v3 v3.2.0 // indirect diff --git a/go.sum b/go.sum index 4f623d7a..85b67c35 100644 --- a/go.sum +++ b/go.sum @@ -57,6 +57,8 @@ code.cloudfoundry.org/bytefmt v0.0.0-20190710193110-1eb035ffe2b6/go.mod h1:wN/zk contrib.go.opencensus.io/exporter/prometheus v0.4.1 h1:oObVeKo2NxpdF/fIfrPsNj6K0Prg0R0mHM+uANlYMiM= contrib.go.opencensus.io/exporter/prometheus v0.4.1/go.mod h1:t9wvfitlUjGXG2IXAZsuFq26mDGid/JwCEXp+gTG/9U= dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= +github.com/AlekSi/pointer v1.2.0 h1:glcy/gc4h8HnG2Z3ZECSzZ1IX1x2JxRVuDzaJwQE0+w= +github.com/AlekSi/pointer v1.2.0/go.mod h1:gZGfd3dpW4vEc/UlyfKKi1roIqcCgwOIvb0tSNSBle0= github.com/Azure/azure-sdk-for-go v65.0.0+incompatible h1:HzKLt3kIwMm4KeJYTdx9EbjRYTySD/t8i1Ee/W5EGXw= github.com/Azure/azure-sdk-for-go v65.0.0+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc= github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 h1:UQHMgLO+TxOElx5B5HZ4hJQsoJ/PvUvKRhJHDQXO8P8= @@ -954,6 +956,8 @@ github.com/pelletier/go-toml v1.9.4 h1:tjENF6MfZAg8e4ZmZTeWaWiT2vXtsoO6+iuOjFhEC github.com/pelletier/go-toml v1.9.4/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c= github.com/pelletier/go-toml/v2 v2.0.0-beta.8 h1:dy81yyLYJDwMTifq24Oi/IslOslRrDSb3jwDggjz3Z0= github.com/pelletier/go-toml/v2 v2.0.0-beta.8/go.mod h1:r9LEWfGN8R5k0VXJ+0BkIe7MYkRdwZOjgMj2KwnJFUo= +github.com/percona/percona-toolkit v0.0.0-20211210121818-b2860eee3152 h1:3AOGevjw3JfLPqzos6VnF2L9T0UosjHu4IO8+Ogk08w= +github.com/percona/percona-toolkit v0.0.0-20211210121818-b2860eee3152/go.mod h1:CCa6vyT51VeEG5KcJ2smk4/HyxX6Aunt8RxfnwlC85A= github.com/performancecopilot/speed v3.0.0+incompatible/go.mod h1:/CLtqpZ5gBg1M9iaPbIdPPGyKcA8hKdoy6hAWba7Yac= github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU= github.com/pierrec/cmdflag v0.0.2/go.mod h1:a3zKGZ3cdQUfxjd0RGMLZr8xI3nvpJOB+m6o/1X5BmU= @@ -1038,6 +1042,7 @@ github.com/schollz/progressbar/v2 v2.13.2/go.mod h1:6YZjqdthH6SCZKv2rqGryrxPtfmR github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 h1:nn5Wsu0esKSJiIVhscUtVbo7ada43DJhG55ua/hjS5I= github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= github.com/shirou/gopsutil v2.19.11+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA= +github.com/shirou/gopsutil v3.21.11+incompatible h1:+1+c1VGhc88SSonWP6foOcLhvnKlUeu/erjjvaPEYiI= github.com/shirou/gopsutil v3.21.11+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA= github.com/shirou/gopsutil/v3 v3.22.5 h1:atX36I/IXgFiB81687vSiBI5zrMsxcIBkP9cQMJQoJA= github.com/shirou/gopsutil/v3 v3.22.5/go.mod h1:so9G9VzeHt/hsd0YwqprnjHnfARAUktauykSbr+y2gA= @@ -1141,6 +1146,7 @@ github.com/xdg/stringprep v1.0.3 h1:cmL5Enob4W83ti/ZHuZLuKD/xqJfus4fVPwE+/BDm+4= github.com/xdg/stringprep v1.0.3/go.mod h1:Jhud4/sHMO4oL310DaZAKk9ZaJ08SJfe+sJh0HrGL1Y= github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= github.com/xlab/treeprint v1.1.0/go.mod h1:gj5Gd3gPdKtR1ikdDK6fnFLdmIS0X30kTTuNd/WEJu0= +github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d h1:splanxYIlg+5LfHAM6xpdFEAYOk8iySO56hMFq6uLyA= github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d/go.mod h1:rHwXgn7JulP+udvsHwJoVG1YGAP6VLg4y9I5dyZdqmA= github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= @@ -1158,8 +1164,9 @@ go.etcd.io/etcd/client/v2 v2.305.0/go.mod h1:h9puh54ZTgAKtEbut2oe9P4L/oqKCVB6xsX go.etcd.io/etcd/client/v3 v3.5.0/go.mod h1:AIKXXVX/DQXtfTEqBryiLTUXwON+GuvO6Z7lLS/oTh0= go.mongodb.org/mongo-driver v1.7.3/go.mod h1:NqaYOwnXWr5Pm7AOpO5QFxKJ503nbMse/R79oO62zWg= go.mongodb.org/mongo-driver v1.7.5/go.mod h1:VXEWRZ6URJIkUq2SCAyapmhH0ZLRBP+FT4xhp5Zvxng= -go.mongodb.org/mongo-driver v1.8.3 h1:TDKlTkGDKm9kkJVUOAXDK5/fkqKHJVwYQSpoRfB43R4= go.mongodb.org/mongo-driver v1.8.3/go.mod h1:0sQWfOeY63QTntERDJJ/0SuKK0T1uVSgKCuAROlKEPY= +go.mongodb.org/mongo-driver v1.9.1 h1:m078y9v7sBItkt1aaoe2YlvWEXcD263e1a4E1fBrJ1c= +go.mongodb.org/mongo-driver v1.9.1/go.mod h1:0sQWfOeY63QTntERDJJ/0SuKK0T1uVSgKCuAROlKEPY= go.opencensus.io v0.20.1/go.mod h1:6WKK9ahsWS3RSO+PY9ZHZUfv2irvY6gN279GOPZjmmk= go.opencensus.io v0.20.2/go.mod h1:6WKK9ahsWS3RSO+PY9ZHZUfv2irvY6gN279GOPZjmmk= go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= @@ -1787,6 +1794,8 @@ gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= gopkg.in/ini.v1 v1.66.4 h1:SsAcf+mM7mRZo2nJNGt8mZCjG8ZRaNGMURJw7BsIST4= gopkg.in/ini.v1 v1.66.4/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= +gopkg.in/mgo.v2 v2.0.0-20190816093944-a6b53ec6cb22 h1:VpOs+IwYnYBaFnrNAeB8UUWtL3vEUnzSCL1nVjPhqrw= +gopkg.in/mgo.v2 v2.0.0-20190816093944-a6b53ec6cb22/go.mod h1:yeKp02qBN3iKW1OzL3MGk2IdtZzaj7SFntXj72NppTA= gopkg.in/natefinch/lumberjack.v2 v2.0.0 h1:1Lc07Kr7qY4U2YPouBjpCLxpiyxIVoxqXgkXLknAOE8= gopkg.in/natefinch/lumberjack.v2 v2.0.0/go.mod h1:l0ndWWf7gzL7RNwBG7wST/UCcT4T24xpD6X8LsfU/+k= gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo= diff --git a/inputs/collector.go b/inputs/collector.go index f4e4ed67..3a56fe5e 100644 --- a/inputs/collector.go +++ b/inputs/collector.go @@ -16,7 +16,7 @@ const capMetricChan = 1000 var parser = new(pp.Parser) -func Collect(e prometheus.Collector, slist *list.SafeList) error { +func Collect(e prometheus.Collector, slist *list.SafeList, constLabels ...map[string]string) error { if e == nil { return errors.New("exporter must not be nil") } @@ -54,6 +54,12 @@ func Collect(e prometheus.Collector, slist *list.SafeList) error { labels[*kv.Name] = *kv.Value } + for _, kvs := range constLabels { + for k, v := range kvs { + labels[k] = v + } + } + switch { case dtoMetric.Counter != nil: _ = slist.PushFront(types.NewSample(desc.Name(), *dtoMetric.Counter.Value, labels)) diff --git a/inputs/mongodb/README.md b/inputs/mongodb/README.md new file mode 100644 index 00000000..9087d89b --- /dev/null +++ b/inputs/mongodb/README.md @@ -0,0 +1,26 @@ +# mongodb + +mongodb 监控采集插件,由mongodb-exporter(https://github.com/percona/mongodb_exporter)封装而来。 + +## Configuration + + + +- 配置文件,[参考示例](../../conf/input.mongodb/mongodb.toml) +- 配置权限,至少授予以下权限给配置文件中用于连接 MongoDB 的 user 才能收集指标: + ``` + { + "role":"clusterMonitor", + "db":"admin" + }, + { + "role":"read", + "db":"local" + } + + ``` + 更详细的权限配置请参考[官方文档](https://www.mongodb.com/docs/manual/reference/built-in-roles/#mongodb-authrole-clusterMonitor) + +## 监控大盘和告警规则 + +同级目录下的 dashboard.json、alerts.json 可以直接导入夜莺使用。 \ No newline at end of file diff --git a/inputs/mongodb/alerts.json b/inputs/mongodb/alerts.json new file mode 100644 index 00000000..e442038f --- /dev/null +++ b/inputs/mongodb/alerts.json @@ -0,0 +1,282 @@ +[ + { + "name": "Mongo出现Assert错误", + "note": "", + "prod": "", + "algorithm": "", + "algo_params": null, + "delay": 0, + "severity": 3, + "disabled": 0, + "prom_for_duration": 1800, + "prom_ql": "rate(mongodb_ss_asserts{assert_type=~\"regular|message\"}[5m]) > 0", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "notify_max_number": 0, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [ + "alertname=MongoAssertsDetected" + ] + }, + { + "name": "Mongo出现游标超时", + "note": "", + "prod": "", + "algorithm": "", + "algo_params": null, + "delay": 0, + "severity": 2, + "disabled": 0, + "prom_for_duration": 1800, + "prom_ql": "rate(mongodb_ss_metrics_cursor_timedOut[5m]) > 0", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "notify_max_number": 0, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [ + "alertname=MongoRecurrentCursorTimeout" + ] + }, + { + "name": "Mongo出现页错误中断", + "note": "", + "prod": "", + "algorithm": "", + "algo_params": null, + "delay": 0, + "severity": 2, + "disabled": 0, + "prom_for_duration": 1800, + "prom_ql": "rate(mongodb_ss_extra_info_page_faults[5m]) > 0", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "notify_max_number": 0, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [ + "alertname=MongoRecurrentMemoryPageFaults" + ] + }, + { + "name": "Mongo刚刚有重启,请注意", + "note": "", + "prod": "", + "algorithm": "", + "algo_params": null, + "delay": 0, + "severity": 3, + "disabled": 0, + "prom_for_duration": 0, + "prom_ql": "mongodb_ss_uptime < 60", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "notify_max_number": 0, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [ + "alertname=MongoRestarted" + ] + }, + { + "name": "Mongo副本集主从延迟超过30s", + "note": "", + "prod": "", + "algorithm": "", + "algo_params": null, + "delay": 0, + "severity": 1, + "disabled": 0, + "prom_for_duration": 60, + "prom_ql": "mongodb_mongod_replset_member_replication_lag > 30", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "notify_max_number": 0, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [ + "alertname=MongoSlaveReplicationLag(>30s)" + ] + }, + { + "name": "Mongo实例挂了", + "note": "", + "prod": "", + "algorithm": "", + "algo_params": null, + "delay": 0, + "severity": 1, + "disabled": 0, + "prom_for_duration": 0, + "prom_ql": "mongodb_up < 1", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "notify_max_number": 0, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [ + "alertname=MongoServerDown" + ] + }, + { + "name": "Mongo操作平均耗时超过250秒", + "note": "", + "prod": "", + "algorithm": "", + "algo_params": null, + "delay": 0, + "severity": 2, + "disabled": 0, + "prom_for_duration": 600, + "prom_ql": "rate(mongodb_ss_opLatencies_latency[5m]) / rate(mongodb_ss_opLatencies_ops[5m]) > 250000", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "notify_max_number": 0, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [ + "alertname=MongoOperationHighLatency" + ] + }, + { + "name": "Mongo连接数已超过80%", + "note": "", + "prod": "", + "algorithm": "", + "algo_params": null, + "delay": 0, + "severity": 2, + "disabled": 0, + "prom_for_duration": 120, + "prom_ql": "avg by (instance) (mongodb_ss_connections{conn_type=\"current\"}) / avg by (instance) (mongodb_ss_connections{conn_type=\"available\"}) * 100 > 80", + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_etime": "23:59", + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "notify_max_number": 0, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [ + "alertname=MongoTooManyConnections(>80%)" + ] + } +] \ No newline at end of file diff --git a/inputs/mongodb/dashboard.json b/inputs/mongodb/dashboard.json new file mode 100644 index 00000000..d1cc0702 --- /dev/null +++ b/inputs/mongodb/dashboard.json @@ -0,0 +1,936 @@ +{ + "name": "MongoDB Overview - 模板", + "tags": "Prometheus MongoDB", + "configs": { + "var": [ + { + "name": "instance", + "definition": "label_values(mongodb_up,instance)" + } + ], + "panels": [ + { + "id": "dd7882d6-9502-4a76-845a-efdbcdb25466", + "type": "row", + "name": "Basic Info", + "layout": { + "h": 1, + "w": 24, + "x": 0, + "y": 0, + "i": "dd7882d6-9502-4a76-845a-efdbcdb25466", + "isResizable": false + } + }, + { + "targets": [ + { + "refId": "A", + "expr": "mongodb_up{instance=\"$instance\"}", + "time": { + "num": 1, + "unit": "hour", + "description": "小时" + } + } + ], + "name": "Up", + "description": "实例数", + "custom": { + "textMode": "value", + "colorMode": "value", + "calc": "lastNotNull", + "colSpan": 1, + "textSize": {} + }, + "options": { + "valueMappings": [ + { + "type": "range", + "match": { + "from": 1 + }, + "result": { + "color": "#53b503" + } + }, + { + "type": "range", + "match": { + "special": null, + "from": 0, + "to": 1 + }, + "result": { + "color": "#e70d0d" + } + } + ], + "standardOptions": {} + }, + "version": "2.0.0", + "type": "stat", + "layout": { + "h": 7, + "w": 6, + "x": 0, + "y": 1, + "i": "8ab8a2a8-9545-4e58-b9ba-34d68408fdda", + "isResizable": true + }, + "id": "8ab8a2a8-9545-4e58-b9ba-34d68408fdda" + }, + { + "targets": [ + { + "refId": "A", + "expr": "mongodb_ss_uptime{instance='$instance'}", + "time": { + "num": 1, + "unit": "hour", + "description": "小时" + } + } + ], + "name": "Uptime", + "description": "启用时长", + "custom": { + "textMode": "value", + "colorMode": "value", + "calc": "lastNotNull", + "colSpan": 1, + "textSize": { + "title": null + } + }, + "options": { + "valueMappings": [ + { + "type": "range", + "match": { + "to": 1800 + }, + "result": { + "color": "#ec7718" + } + }, + { + "type": "range", + "match": { + "from": 1800 + }, + "result": { + "color": "#53b503" + } + } + ], + "standardOptions": { + "util": "humantimeSeconds" + } + }, + "version": "2.0.0", + "type": "stat", + "layout": { + "h": 7, + "w": 6, + "x": 6, + "y": 1, + "i": "7ac43abc-bb8f-4af1-9c62-f24e9c467390", + "isResizable": true + }, + "id": "7ac43abc-bb8f-4af1-9c62-f24e9c467390" + }, + { + "targets": [ + { + "refId": "A", + "expr": "mongodb_ss_mem_resident{instance='$instance'} * 1024 * 1024", + "legend": "resident" + }, + { + "expr": "mongodb_ss_mem_virtual{instance='$instance'} * 1024 * 1024", + "refId": "B", + "legend": "virtual" + } + ], + "name": "Memory", + "description": "内存占用(MiB)", + "options": { + "tooltip": { + "mode": "all", + "sort": "desc" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": { + "util": "bytesIEC" + }, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 7, + "w": 6, + "x": 12, + "y": 1, + "i": "0bffd1e3-ca5b-46a8-b7ba-c04c4db74b62", + "isResizable": true + }, + "id": "0bffd1e3-ca5b-46a8-b7ba-c04c4db74b62" + }, + { + "targets": [ + { + "refId": "A", + "expr": "rate(mongodb_ss_extra_info_page_faults{instance=\"$instance\"}[5m])", + "legend": "total" + } + ], + "name": "Page Faults", + "description": "页缺失中断次数 Page faults indicate that requests are processed from disk either because an index is missing or there is not enough memory for the data set. Consider increasing memory or sharding out.", + "options": { + "tooltip": { + "mode": "all", + "sort": "desc" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": { + "util": "none", + "decimals": null + }, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 7, + "w": 6, + "x": 18, + "y": 1, + "i": "7d00bb33-1018-4af1-a498-17cd8b517f2f", + "isResizable": true + }, + "id": "7d00bb33-1018-4af1-a498-17cd8b517f2f" + }, + { + "targets": [ + { + "refId": "A", + "expr": "rate(mongodb_ss_network_bytesOut{instance='$instance'}[5m])", + "legend": "bytesOut" + }, + { + "expr": "rate(mongodb_ss_network_bytesIn{instance='$instance'}[5m])", + "refId": "B", + "legend": "bytesIn" + } + ], + "name": "Network I/O", + "description": "网络流量(byte)", + "options": { + "tooltip": { + "mode": "all", + "sort": "none" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": { + "util": "bytesSI" + }, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 7, + "w": 6, + "x": 0, + "y": 8, + "i": "3efb0ec7-7ebf-4d0c-b6a0-b498ada6b4a5", + "isResizable": true + }, + "id": "3efb0ec7-7ebf-4d0c-b6a0-b498ada6b4a5" + }, + { + "targets": [ + { + "refId": "A", + "expr": "mongodb_ss_connections{instance=\"$instance\", conn_type=\"current\"}", + "legend": "current" + } + ], + "name": "Connections", + "description": "连接数 Keep in mind the hard limit on the maximum number of connections set by your distribution.", + "options": { + "tooltip": { + "mode": "all", + "sort": "none" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": {}, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 7, + "w": 6, + "x": 6, + "y": 8, + "i": "f55e9333-9e0d-4eb0-82e1-0adc38e34316", + "isResizable": true + }, + "id": "f55e9333-9e0d-4eb0-82e1-0adc38e34316" + }, + { + "targets": [ + { + "refId": "A", + "expr": "rate(mongodb_ss_asserts{instance=\"$instance\"}[5m])", + "legend": "{{assert_type}}" + } + ], + "name": "Assert Events", + "description": "断言错误次数 Asserts are not important by themselves, but you can correlate spikes with other graphs.", + "options": { + "tooltip": { + "mode": "all", + "sort": "none" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": {}, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 7, + "w": 6, + "x": 12, + "y": 8, + "i": "14b7339c-07bd-4798-b127-7db7117bc664", + "isResizable": true + }, + "id": "14b7339c-07bd-4798-b127-7db7117bc664" + }, + { + "targets": [ + { + "refId": "A", + "expr": "mongodb_ss_globalLock_currentQueue{instance=\"$instance\"}", + "legend": "{{count_type}}" + } + ], + "name": "Lock Queue", + "description": "等待获取锁操作数量 Any number of queued operations for long periods of time is an indication of possible issues. Find the cause and fix it before requests get stuck in the queue.", + "options": { + "tooltip": { + "mode": "all", + "sort": "none" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": {}, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 7, + "w": 6, + "x": 18, + "y": 8, + "i": "5dc13288-5642-494f-b60b-d12ee568d5bd", + "isResizable": true + }, + "id": "5dc13288-5642-494f-b60b-d12ee568d5bd" + }, + { + "id": "39584c81-90e3-4a89-982a-4881429e5091", + "type": "row", + "name": "Operation Info", + "layout": { + "h": 1, + "w": 24, + "x": 0, + "y": 15, + "i": "39584c81-90e3-4a89-982a-4881429e5091", + "isResizable": false + } + }, + { + "targets": [ + { + "refId": "A", + "expr": "rate(mongodb_ss_opcounters{instance=\"$instance\", type!=\"command\"}[5m])", + "legend": "{{legacy_op_type}}" + }, + { + "expr": "rate(mongodb_ss_opcountersRepl{instance=\"$instance\", type!~\"(command|query|getmore)\"}[5m]) ", + "refId": "B", + "legend": "repl_{{legacy_op_type}}" + }, + { + "expr": "rate(mongodb_ss_metrics_ttl_deletedDocuments{instance=\"$instance\"}[5m]) ", + "refId": "C", + "legend": "ttl_delete" + } + ], + "name": "Command Operations", + "description": "接收请求数 Shows how many times a command is executed per second on average during the selected interval.", + "options": { + "tooltip": { + "mode": "all", + "sort": "desc" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": {}, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 7, + "w": 12, + "x": 0, + "y": 16, + "i": "cbce4c2b-c215-4aab-a093-7ae95472ef1f", + "isResizable": true + }, + "id": "cbce4c2b-c215-4aab-a093-7ae95472ef1f" + }, + { + "targets": [ + { + "refId": "A", + "expr": "rate(mongodb_ss_metrics_document{instance=\"$instance\"}[5m])", + "legend": "{{doc_op_type}}" + } + ], + "name": "Document Operations", + "description": "文档操作数 When used in combination with 'Command Operations', this graph can help identify write amplification. For example, when one insert or update command actually inserts or updates hundreds, thousands, or even millions of documents.", + "options": { + "tooltip": { + "mode": "all", + "sort": "none" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": {}, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 7, + "w": 12, + "x": 12, + "y": 16, + "i": "e4db591b-b434-4687-b7f0-a467ef5f6cc1", + "isResizable": true + }, + "id": "e4db591b-b434-4687-b7f0-a467ef5f6cc1" + }, + { + "targets": [ + { + "refId": "A", + "expr": "rate(mongodb_ss_opLatencies_latency{instance='$instance'}[5m]) / rate(mongodb_ss_opLatencies_ops{instance='$instance'}[5m]) / 1000", + "legend": "{{op_type}}" + } + ], + "name": "Response Time", + "description": "操作详情耗时(毫秒)", + "options": { + "tooltip": { + "mode": "all", + "sort": "none" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": { + "util": "milliseconds" + }, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 7, + "w": 8, + "x": 0, + "y": 23, + "i": "32ca2e82-a2ad-4095-a352-9cd985b5dddc", + "isResizable": true + }, + "id": "32ca2e82-a2ad-4095-a352-9cd985b5dddc" + }, + { + "targets": [ + { + "refId": "A", + "expr": "sum(increase(mongodb_ss_metrics_queryExecutor_scannedObjects{instance=\"$instance\"}[5m])) / sum(increase(mongodb_ss_metrics_document{instance=\"$instance\", doc_op_type=\"returned\"}[5m]))", + "legend": "Document" + }, + { + "expr": "sum(increase(mongodb_ss_metrics_queryExecutor_scanned{instance=\"$instance\"}[5m])) / sum(increase(mongodb_ss_metrics_document{instance=\"$instance\", doc_op_type=\"returned\"}[5m]))", + "refId": "B", + "legend": "Index" + } + ], + "name": "Query Efficiency", + "description": "查询效率", + "options": { + "tooltip": { + "mode": "all", + "sort": "none" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": { + "util": "percentUnit" + }, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 7, + "w": 8, + "x": 8, + "y": 23, + "i": "98e954e5-943b-46ee-ad47-bb41c408be36", + "isResizable": true + }, + "id": "98e954e5-943b-46ee-ad47-bb41c408be36" + }, + { + "targets": [ + { + "refId": "A", + "expr": "mongodb_ss_metrics_cursor_open{instance=\"$instance\"}", + "legend": "{{csr_type}}" + } + ], + "name": "Cursors", + "description": "游标数量 Helps identify why connections are increasing. Shows active cursors compared to cursors being automatically killed after 10 minutes due to an application not closing the connection.", + "options": { + "tooltip": { + "mode": "all", + "sort": "none" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": {}, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 7, + "w": 8, + "x": 16, + "y": 23, + "i": "bc58e613-7770-42b8-86e2-3248cfe8e28e", + "isResizable": true + }, + "id": "bc58e613-7770-42b8-86e2-3248cfe8e28e" + }, + { + "id": "96898c4d-9df7-43dd-886e-66640bacb9f2", + "type": "row", + "name": "Cache Info", + "layout": { + "h": 1, + "w": 24, + "x": 0, + "y": 30, + "i": "96898c4d-9df7-43dd-886e-66640bacb9f2", + "isResizable": false + } + }, + { + "targets": [ + { + "refId": "A", + "expr": "mongodb_ss_wt_cache_bytes_currently_in_the_cache{instance='$instance'}", + "legend": "currently_in_the_cache" + }, + { + "expr": "mongodb_ss_wt_cache_bytes_dirty_in_the_cache_cumulative{instance='$instance'}", + "refId": "E", + "legend": "dirty_in_the_cache_cumulative" + }, + { + "expr": "mongodb_ss_wt_cache_bytes_allocated_for_updates{instance='$instance'}", + "refId": "B", + "legend": "allocated_for_updates" + }, + { + "expr": "mongodb_ss_wt_cache_bytes_belonging_to_the_history_store_table_in_the_cache{instance='$instance'}", + "refId": "D", + "legend": "belonging_to_the_history_store_table_in_the_cache" + }, + { + "expr": "mongodb_ss_wt_cache_bytes_belonging_to_page_images_in_the_cache{instance='$instance'}", + "refId": "C", + "legend": "belonging_to_page_images_in_the_cache" + }, + { + "expr": "mongodb_ss_wt_cache_bytes_not_belonging_to_page_images_in_the_cache{instance='$instance'}", + "refId": "F", + "legend": "not_belonging_to_page_images_in_the_cache" + } + ], + "name": "Cache Size", + "description": "缓存大小(byte)", + "options": { + "tooltip": { + "mode": "all", + "sort": "none" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": { + "util": "bytesIEC" + }, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 7, + "w": 6, + "x": 0, + "y": 31, + "i": "074ec28a-6760-480f-8ae3-9b7a6ed37e45", + "isResizable": true + }, + "id": "074ec28a-6760-480f-8ae3-9b7a6ed37e45" + }, + { + "targets": [ + { + "refId": "A", + "expr": "rate(mongodb_ss_wt_cache_bytes_read_into_cache{instance='$instance'}[5m])", + "legend": "read_into_cache" + }, + { + "expr": "rate(mongodb_ss_wt_cache_bytes_written_from_cache{instance='$instance'}[5m])", + "refId": "B", + "legend": "written_from_cache" + } + ], + "name": "Cache I/O", + "description": "写入或读取的缓存数据大小(byte)", + "options": { + "tooltip": { + "mode": "all", + "sort": "none" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": { + "util": "bytesSI" + }, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 7, + "w": 6, + "x": 6, + "y": 31, + "i": "67b2e881-9949-4b04-a654-4e78c9e773af", + "isResizable": true + }, + "id": "67b2e881-9949-4b04-a654-4e78c9e773af" + }, + { + "targets": [ + { + "refId": "A", + "expr": "100 * sum(mongodb_ss_wt_cache_tracked_dirty_pages_in_the_cache{instance='$instance'}) / sum(mongodb_ss_wt_cache_pages_read_into_cache{instance='$instance'} + mongodb_ss_wt_cache_pages_written_from_cache{instance='$instance'})", + "legend": "dirty rate" + } + ], + "name": "Cache Dirty Pages Rate", + "description": "缓存脏页占比", + "options": { + "tooltip": { + "mode": "all", + "sort": "none" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": { + "util": "percent" + }, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 7, + "w": 6, + "x": 12, + "y": 31, + "i": "3a8b5e4e-510e-4302-a1b0-f124277b2621", + "isResizable": true + }, + "id": "3a8b5e4e-510e-4302-a1b0-f124277b2621" + }, + { + "targets": [ + { + "refId": "A", + "expr": "rate(mongodb_mongod_wiredtiger_cache_evicted_total{instance='$instance'}[5m])", + "legend": "evicted pages" + } + ], + "name": "Cache Evicted Pages", + "description": "缓存剔除页数量", + "options": { + "tooltip": { + "mode": "all", + "sort": "none" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": {}, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 7, + "w": 6, + "x": 18, + "y": 31, + "i": "8a61ef96-f386-4c58-b574-8cb2d968a8bb", + "isResizable": true + }, + "id": "8a61ef96-f386-4c58-b574-8cb2d968a8bb" + }, + { + "id": "f72662cc-92dd-4112-a5ca-987d7c82766d", + "type": "row", + "name": "ReplSet Info", + "layout": { + "h": 1, + "w": 24, + "x": 0, + "y": 38, + "i": "f72662cc-92dd-4112-a5ca-987d7c82766d", + "isResizable": false + } + }, + { + "targets": [ + { + "refId": "A", + "expr": "time() - max(mongodb_rs_members_electionDate)/1000" + } + ], + "name": "Replset Election", + "description": "副本集选主时间", + "custom": { + "textMode": "value", + "colorMode": "value", + "calc": "lastNotNull", + "colSpan": 1, + "textSize": {} + }, + "options": { + "valueMappings": [ + { + "type": "range", + "match": { + "to": 1800 + }, + "result": { + "color": "#f24526" + } + }, + { + "type": "range", + "match": { + "from": 1800 + }, + "result": { + "color": "#53b503" + } + } + ], + "standardOptions": { + "util": "seconds", + "decimals": 1 + } + }, + "version": "2.0.0", + "type": "stat", + "layout": { + "h": 7, + "w": 12, + "x": 0, + "y": 39, + "i": "84df3245-a7fb-4aed-9879-adabbe60abec", + "isResizable": true + }, + "id": "84df3245-a7fb-4aed-9879-adabbe60abec" + }, + { + "targets": [ + { + "refId": "A", + "expr": "mongodb_mongod_replset_member_replication_lag{instance=\"$instance\"}", + "legend": "{{name}}" + } + ], + "name": "Replset Lag Seconds", + "description": "副本集成员主从同步延迟", + "options": { + "tooltip": { + "mode": "all", + "sort": "none" + }, + "legend": { + "displayMode": "hidden" + }, + "standardOptions": { + "util": "seconds" + }, + "thresholds": {} + }, + "custom": { + "drawStyle": "lines", + "lineInterpolation": "smooth", + "fillOpacity": 0.5, + "stack": "off" + }, + "version": "2.0.0", + "type": "timeseries", + "layout": { + "h": 7, + "w": 12, + "x": 12, + "y": 39, + "i": "33453bc0-6b09-4299-8d3c-bd955d72ffee", + "isResizable": true + }, + "id": "33453bc0-6b09-4299-8d3c-bd955d72ffee" + } + ] + } +} \ No newline at end of file diff --git a/inputs/mongodb/exporter/base_collector.go b/inputs/mongodb/exporter/base_collector.go new file mode 100644 index 00000000..8ed3ce5a --- /dev/null +++ b/inputs/mongodb/exporter/base_collector.go @@ -0,0 +1,84 @@ +// mongodb_exporter +// Copyright (C) 2022 Percona LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package exporter + +import ( + "context" + "sync" + + "github.com/prometheus/client_golang/prometheus" + "github.com/sirupsen/logrus" + "go.mongodb.org/mongo-driver/mongo" +) + +type baseCollector struct { + client *mongo.Client + logger *logrus.Logger + + lock sync.Mutex + metricsCache []prometheus.Metric +} + +// newBaseCollector creates a skeletal collector, which is used to create other collectors. +func newBaseCollector(client *mongo.Client, logger *logrus.Logger) *baseCollector { + return &baseCollector{ + client: client, + logger: logger, + } +} + +func (d *baseCollector) Describe(ctx context.Context, ch chan<- *prometheus.Desc, collect func(mCh chan<- prometheus.Metric)) { + select { + case <-ctx.Done(): + return + default: + } + + d.lock.Lock() + defer d.lock.Unlock() + + d.metricsCache = make([]prometheus.Metric, 0, defaultCacheSize) + + // This is a copy/paste of prometheus.DescribeByCollect(d, ch) with the aggreated functionality + // to populate the metrics cache. Since on each scrape Prometheus will call Describe and inmediatelly + // after it will call Collect, it is safe to populate the cache here. + metrics := make(chan prometheus.Metric) + go func() { + collect(metrics) + close(metrics) + }() + + for m := range metrics { + d.metricsCache = append(d.metricsCache, m) // populate the cache + ch <- m.Desc() + } +} + +func (d *baseCollector) Collect(ch chan<- prometheus.Metric, collect func(mCh chan<- prometheus.Metric)) { + d.lock.Lock() + defer d.lock.Unlock() + + if len(d.metricsCache) > 0 { + for _, metric := range d.metricsCache { + ch <- metric + } + + return + } + + collect(ch) +} diff --git a/inputs/mongodb/exporter/collstats_collector.go b/inputs/mongodb/exporter/collstats_collector.go new file mode 100644 index 00000000..1d9daabf --- /dev/null +++ b/inputs/mongodb/exporter/collstats_collector.go @@ -0,0 +1,148 @@ +// mongodb_exporter +// Copyright (C) 2017 Percona LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package exporter + +import ( + "context" + "strings" + + "github.com/prometheus/client_golang/prometheus" + "github.com/sirupsen/logrus" + "go.mongodb.org/mongo-driver/bson" + "go.mongodb.org/mongo-driver/mongo" +) + +type collstatsCollector struct { + ctx context.Context + base *baseCollector + + compatibleMode bool + discoveringMode bool + topologyInfo labelsGetter + + collections []string +} + +// newCollectionStatsCollector creates a collector for statistics about collections. +func newCollectionStatsCollector(ctx context.Context, client *mongo.Client, logger *logrus.Logger, compatible, discovery bool, topology labelsGetter, collections []string) *collstatsCollector { + return &collstatsCollector{ + ctx: ctx, + base: newBaseCollector(client, logger), + + compatibleMode: compatible, + discoveringMode: discovery, + topologyInfo: topology, + + collections: collections, + } +} + +func (d *collstatsCollector) Describe(ch chan<- *prometheus.Desc) { + d.base.Describe(d.ctx, ch, d.collect) +} + +func (d *collstatsCollector) Collect(ch chan<- prometheus.Metric) { + d.base.Collect(ch, d.collect) +} + +func (d *collstatsCollector) collect(ch chan<- prometheus.Metric) { + + collections := d.collections + + client := d.base.client + logger := d.base.logger + + if d.discoveringMode { + namespaces, err := listAllCollections(d.ctx, client, d.collections, systemDBs) + if err != nil { + logger.Errorf("cannot auto discover databases and collections: %s", err.Error()) + + return + } + + collections = fromMapToSlice(namespaces) + } + + for _, dbCollection := range collections { + parts := strings.Split(dbCollection, ".") + if len(parts) < 2 { //nolint:gomnd + continue + } + + database := parts[0] + collection := strings.Join(parts[1:], ".") // support collections having a . + + aggregation := bson.D{ + { + Key: "$collStats", Value: bson.M{ + // TODO: PMM-9568 : Add support to handle histogram metrics + "latencyStats": bson.M{"histograms": false}, + "storageStats": bson.M{"scale": 1}, + }, + }, + } + project := bson.D{ + { + Key: "$project", Value: bson.M{ + "storageStats.wiredTiger": 0, + "storageStats.indexDetails": 0, + }, + }, + } + + cursor, err := client.Database(database).Collection(collection).Aggregate(d.ctx, mongo.Pipeline{aggregation, project}) + if err != nil { + logger.Errorf("cannot get $collstats cursor for collection %s.%s: %s", database, collection, err) + + continue + } + + var stats []bson.M + if err = cursor.All(d.ctx, &stats); err != nil { + logger.Errorf("cannot get $collstats for collection %s.%s: %s", database, collection, err) + + continue + } + + logger.Debugf("$collStats metrics for %s.%s", database, collection) + debugResult(logger, stats) + + prefix := "collstats" + labels := d.topologyInfo.baseLabels() + labels["database"] = database + labels["collection"] = collection + + for _, metrics := range stats { + for _, metric := range makeMetrics(prefix, metrics, labels, d.compatibleMode) { + ch <- metric + } + } + } +} + +func fromMapToSlice(databases map[string][]string) []string { + var collections []string + for db, cols := range databases { + for _, value := range cols { + collections = append(collections, db+"."+value) + } + } + + return collections +} + +var _ prometheus.Collector = (*collstatsCollector)(nil) diff --git a/inputs/mongodb/exporter/common.go b/inputs/mongodb/exporter/common.go new file mode 100644 index 00000000..353722e1 --- /dev/null +++ b/inputs/mongodb/exporter/common.go @@ -0,0 +1,230 @@ +// mongodb_exporter +// Copyright (C) 2022 Percona LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package exporter + +import ( + "context" + "sort" + "strings" + + "github.com/AlekSi/pointer" + "github.com/pkg/errors" + "go.mongodb.org/mongo-driver/bson" + "go.mongodb.org/mongo-driver/bson/primitive" + "go.mongodb.org/mongo-driver/mongo" + "go.mongodb.org/mongo-driver/mongo/options" +) + +var systemDBs = []string{"admin", "config", "local"} //nolint:gochecknoglobals + +func listCollections(ctx context.Context, client *mongo.Client, database string, filterInNamespaces []string) ([]string, error) { + filter := bson.D{} // Default=empty -> list all collections + + // if there is a filter with the list of collections we want, create a filter like + // $or: { + // {"$regex": "collection1"}, + // {"$regex": "collection2"}, + // } + if len(filterInNamespaces) > 0 { + matchExpressions := []bson.D{} + + for _, namespace := range filterInNamespaces { + parts := strings.Split(namespace, ".") // db.collection.name.with.dots + if len(parts) > 1 { + // The part before the first dot is the database name. + // The rest is the collection name and it can have dots. We need to rebuild it. + collection := strings.Join(parts[1:], ".") + matchExpressions = append(matchExpressions, + bson.D{{Key: "name", Value: primitive.Regex{Pattern: collection, Options: "i"}}}) + } + } + + if len(matchExpressions) > 0 { + filter = bson.D{{Key: "$or", Value: matchExpressions}} + } + } + + collections, err := client.Database(database).ListCollectionNames(ctx, filter) + if err != nil { + return nil, errors.Wrap(err, "cannot get the list of collections for discovery") + } + + return collections, nil +} + +// databases returns the list of databases matching the filters. +// - filterInNamespaces: Include only the database names matching the any of the regular expressions in this list. +// +// Case will be ignored because the function will automatically add the ignore case +// flag to the regular expression. +// +// - exclude: List of databases to be excluded. Useful to ignore system databases. +func databases(ctx context.Context, client *mongo.Client, filterInNamespaces []string, exclude []string) ([]string, error) { + opts := &options.ListDatabasesOptions{NameOnly: pointer.ToBool(true), AuthorizedDatabases: pointer.ToBool(true)} + + filter := bson.D{} + + if excludeFilter := makeExcludeFilter(exclude); excludeFilter != nil { + filter = append(filter, *excludeFilter) + } + + if namespacesFilter := makeDBsFilter(filterInNamespaces); namespacesFilter != nil { + filter = append(filter, *namespacesFilter) + } + + dbNames, err := client.ListDatabaseNames(ctx, filter, opts) + if err != nil { + return nil, errors.Wrap(err, "cannot get the database names list") + } + + return dbNames, nil +} + +func makeExcludeFilter(exclude []string) *primitive.E { + filterExpressions := []bson.D{} + for _, dbname := range exclude { + filterExpressions = append(filterExpressions, + bson.D{{Key: "name", Value: bson.D{{Key: "$ne", Value: dbname}}}}, + ) + } + + if len(filterExpressions) == 0 { + return nil + } + + return &primitive.E{Key: "$and", Value: filterExpressions} +} + +func makeDBsFilter(filterInNamespaces []string) *primitive.E { + filterExpressions := []bson.D{} + + nss := removeEmptyStrings(filterInNamespaces) + for _, namespace := range nss { + parts := strings.Split(namespace, ".") + filterExpressions = append(filterExpressions, + bson.D{{Key: "name", Value: bson.D{{Key: "$eq", Value: parts[0]}}}}, + ) + } + + if len(filterExpressions) == 0 { + return nil + } + + return &primitive.E{Key: "$or", Value: filterExpressions} +} + +func removeEmptyStrings(items []string) []string { + cleanList := []string{} + + for _, item := range items { + if item == "" { + continue + } + cleanList = append(cleanList, item) + } + + return cleanList +} + +func unique(slice []string) []string { + keys := make(map[string]bool) + list := []string{} + + for _, entry := range slice { + if _, ok := keys[entry]; !ok { + keys[entry] = true + list = append(list, entry) + } + } + + return list +} + +func listAllCollections(ctx context.Context, client *mongo.Client, filterInNamespaces []string, excludeDBs []string) (map[string][]string, error) { + namespaces := make(map[string][]string) + + dbs, err := databases(ctx, client, filterInNamespaces, excludeDBs) + if err != nil { + return nil, errors.Wrap(err, "cannot make the list of databases to list all collections") + } + + filterNS := removeEmptyStrings(filterInNamespaces) + + // If there are no specified namespaces to search for collections, it means all dbs should be included. + if len(filterNS) == 0 { + filterNS = append(filterNS, dbs...) + } + + for _, db := range dbs { + for _, namespace := range filterNS { + parts := strings.Split(namespace, ".") + dbname := strings.TrimSpace(parts[0]) + + if dbname == "" || dbname != db { + continue + } + + colls, err := listCollections(ctx, client, db, []string{namespace}) + if err != nil { + return nil, errors.Wrapf(err, "cannot list the collections for %q", db) + } + + if _, ok := namespaces[db]; !ok { + namespaces[db] = []string{} + } + + namespaces[db] = append(namespaces[db], colls...) + } + } + + // Make it testable. + for db, colls := range namespaces { + uc := unique(colls) + sort.Strings(uc) + namespaces[db] = uc + } + + return namespaces, nil +} + +func nonSystemCollectionsCount(ctx context.Context, client *mongo.Client, includeNamespaces []string, filterInCollections []string) (int, error) { + databases, err := databases(ctx, client, includeNamespaces, systemDBs) + if err != nil { + return 0, errors.Wrap(err, "cannot retrieve the collection names for count collections") + } + + var count int + + for _, dbname := range databases { + colls, err := listCollections(ctx, client, dbname, filterInCollections) + if err != nil { + return 0, errors.Wrap(err, "cannot get collections count") + } + count += len(colls) + } + + return count, nil +} + +func splitNamespace(ns string) (database, collection string) { + parts := strings.Split(ns, ".") + if len(parts) < 2 { // there is no collection? + return parts[0], "" + } + + return parts[0], strings.Join(parts[1:], ".") +} diff --git a/inputs/mongodb/exporter/dbstats_collector.go b/inputs/mongodb/exporter/dbstats_collector.go new file mode 100644 index 00000000..0e01389e --- /dev/null +++ b/inputs/mongodb/exporter/dbstats_collector.go @@ -0,0 +1,101 @@ +// mongodb_exporter +// Copyright (C) 2017 Percona LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package exporter + +import ( + "context" + + "github.com/prometheus/client_golang/prometheus" + "github.com/sirupsen/logrus" + "go.mongodb.org/mongo-driver/bson" + "go.mongodb.org/mongo-driver/mongo" +) + +type dbstatsCollector struct { + ctx context.Context + base *baseCollector + + compatibleMode bool + topologyInfo labelsGetter + + databaseFilter []string +} + +// newDBStatsCollector creates a collector for statistics on database storage. +func newDBStatsCollector(ctx context.Context, client *mongo.Client, logger *logrus.Logger, compatible bool, topology labelsGetter, databaseRegex []string) *dbstatsCollector { + return &dbstatsCollector{ + ctx: ctx, + base: newBaseCollector(client, logger), + + compatibleMode: compatible, + topologyInfo: topology, + + databaseFilter: databaseRegex, + } +} + +func (d *dbstatsCollector) Describe(ch chan<- *prometheus.Desc) { + d.base.Describe(d.ctx, ch, d.collect) +} + +func (d *dbstatsCollector) Collect(ch chan<- prometheus.Metric) { + d.base.Collect(ch, d.collect) +} + +func (d *dbstatsCollector) collect(ch chan<- prometheus.Metric) { + + logger := d.base.logger + client := d.base.client + + dbNames, err := databases(d.ctx, client, d.databaseFilter, nil) + if err != nil { + logger.Errorf("Failed to get database names: %s", err) + + return + } + + logger.Debugf("getting stats for databases: %v", dbNames) + for _, db := range dbNames { + var dbStats bson.M + cmd := bson.D{{Key: "dbStats", Value: 1}, {Key: "scale", Value: 1}} + r := client.Database(db).RunCommand(d.ctx, cmd) + err := r.Decode(&dbStats) + if err != nil { + logger.Errorf("Failed to get $dbstats for database %s: %s", db, err) + + continue + } + + logger.Debugf("$dbStats metrics for %s", db) + debugResult(logger, dbStats) + + prefix := "dbstats" + + labels := d.topologyInfo.baseLabels() + + // Since all dbstats will have the same fields, we need to use a label + // to differentiate metrics between different databases. + labels["database"] = db + + newMetrics := makeMetrics(prefix, dbStats, labels, d.compatibleMode) + for _, metric := range newMetrics { + ch <- metric + } + } +} + +var _ prometheus.Collector = (*dbstatsCollector)(nil) diff --git a/inputs/mongodb/exporter/debug.go b/inputs/mongodb/exporter/debug.go new file mode 100644 index 00000000..f384c4ea --- /dev/null +++ b/inputs/mongodb/exporter/debug.go @@ -0,0 +1,43 @@ +// mongodb_exporter +// Copyright (C) 2022 Percona LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package exporter + +import ( + "encoding/json" + "fmt" + "os" + + "github.com/sirupsen/logrus" +) + +func debugResult(log *logrus.Logger, m interface{}) { + if !log.IsLevelEnabled(logrus.DebugLevel) { + return + } + + debugStr, err := json.MarshalIndent(m, "", " ") + if err != nil { + log.Errorf("cannot marshal struct for debug: %s", err) + return + } + + // don't use logrus because: + // 1. It will escape new lines and " making it harder to read and to use + // 2. It will add timestamp + // 3. This way is easier to copy/paste to put the info in a ticket + fmt.Fprintln(os.Stderr, string(debugStr)) +} diff --git a/inputs/mongodb/exporter/diagnostic_data_collector.go b/inputs/mongodb/exporter/diagnostic_data_collector.go new file mode 100644 index 00000000..bae4d14d --- /dev/null +++ b/inputs/mongodb/exporter/diagnostic_data_collector.go @@ -0,0 +1,112 @@ +// mongodb_exporter +// Copyright (C) 2017 Percona LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package exporter + +import ( + "context" + + "github.com/pkg/errors" + "github.com/prometheus/client_golang/prometheus" + "github.com/sirupsen/logrus" + "go.mongodb.org/mongo-driver/bson" + "go.mongodb.org/mongo-driver/mongo" +) + +type diagnosticDataCollector struct { + ctx context.Context + base *baseCollector + + compatibleMode bool + topologyInfo labelsGetter +} + +// newDiagnosticDataCollector creates a collector for diagnostic information. +func newDiagnosticDataCollector(ctx context.Context, client *mongo.Client, logger *logrus.Logger, compatible bool, topology labelsGetter) *diagnosticDataCollector { + return &diagnosticDataCollector{ + ctx: ctx, + base: newBaseCollector(client, logger), + + compatibleMode: compatible, + topologyInfo: topology, + } +} + +func (d *diagnosticDataCollector) Describe(ch chan<- *prometheus.Desc) { + d.base.Describe(d.ctx, ch, d.collect) +} + +func (d *diagnosticDataCollector) Collect(ch chan<- prometheus.Metric) { + d.base.Collect(ch, d.collect) +} + +func (d *diagnosticDataCollector) collect(ch chan<- prometheus.Metric) { + + var m bson.M + + logger := d.base.logger + client := d.base.client + + cmd := bson.D{{Key: "getDiagnosticData", Value: "1"}} + res := client.Database("admin").RunCommand(d.ctx, cmd) + if res.Err() != nil { + if isArbiter, _ := isArbiter(d.ctx, client); isArbiter { + return + } + } + + if err := res.Decode(&m); err != nil { + logger.Errorf("cannot run getDiagnosticData: %s", err) + } + + if m == nil || m["data"] == nil { + logger.Error("cannot run getDiagnosticData: response is empty") + } + + m, ok := m["data"].(bson.M) + if !ok { + err := errors.Wrapf(errUnexpectedDataType, "%T for data field", m["data"]) + logger.Errorf("cannot decode getDiagnosticData: %s", err) + } + + logger.Debug("getDiagnosticData result") + debugResult(logger, m) + + metrics := makeMetrics("", m, d.topologyInfo.baseLabels(), d.compatibleMode) + metrics = append(metrics, locksMetrics(m)...) + metrics = append(metrics, specialMetrics(d.ctx, client, m, logger)...) + if cem, err := cacheEvictedTotalMetric(m); err == nil { + metrics = append(metrics, cem) + } + + if d.compatibleMode { + nodeType, err := getNodeType(d.ctx, client) + if err != nil { + logger.WithFields(logrus.Fields{ + "component": "diagnosticDataCollector", + }).Errorf("Cannot get node type to check if this is a mongos: %s", err) + } else if nodeType == typeMongos { + metrics = append(metrics, mongosMetrics(d.ctx, client, logger)...) + } + } + + for _, metric := range metrics { + ch <- metric + } +} + +// check interface. +var _ prometheus.Collector = (*diagnosticDataCollector)(nil) diff --git a/inputs/mongodb/exporter/exporter.go b/inputs/mongodb/exporter/exporter.go new file mode 100644 index 00000000..89c62d62 --- /dev/null +++ b/inputs/mongodb/exporter/exporter.go @@ -0,0 +1,269 @@ +// mongodb_exporter +// Copyright (C) 2017 Percona LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +// Package exporter implements the collectors and metrics handlers. +package exporter + +import ( + "context" + "fmt" + "sync" + + "github.com/prometheus/client_golang/prometheus" + "github.com/sirupsen/logrus" + "go.mongodb.org/mongo-driver/mongo" + "go.mongodb.org/mongo-driver/mongo/options" +) + +var _ prometheus.Collector = (*Exporter)(nil) + +// Exporter holds Exporter methods and attributes. +type Exporter struct { + client *mongo.Client + clientMu sync.Mutex + logger *logrus.Logger + opts *Opts + lock *sync.Mutex + totalCollectionsCount int + + cs []prometheus.Collector +} + +// Opts holds new exporter options. +type Opts struct { + URI string + Username string + Password string + + // Only get stats for the collections matching this list of namespaces. + // Example: db1.col1,db.col1 + CollStatsNamespaces []string + IndexStatsCollections []string + CollStatsLimit int + CompatibleMode bool + DirectConnect bool + DiscoveringMode bool + CollectAll bool + EnableDBStats bool + EnableDiagnosticData bool + EnableReplicasetStatus bool + EnableTopMetrics bool + EnableIndexStats bool + EnableCollStats bool + EnableOverrideDescendingIndex bool + + Logger *logrus.Logger +} + +var ( + errCannotHandleType = fmt.Errorf("don't know how to handle data type") + errUnexpectedDataType = fmt.Errorf("unexpected data type") +) + +const ( + defaultCacheSize = 1000 +) + +// New connects to the database and returns a new Exporter instance. +func New(opts *Opts) (*Exporter, error) { + if opts == nil { + opts = new(Opts) + } + + if opts.Logger == nil { + opts.Logger = logrus.New() + } + + exp := &Exporter{ + logger: opts.Logger, + opts: opts, + lock: &sync.Mutex{}, + totalCollectionsCount: -1, // Not calculated yet. waiting the db connection. + } + + ctx := context.Background() + _, err := exp.getClient(ctx) + if err != nil { + return nil, fmt.Errorf("cannot connect to mongo: %v", err) + } + + return exp, exp.initCollectors(ctx, exp.client) +} + +func (e *Exporter) Close() { + if e.client != nil { + e.client.Disconnect(context.Background()) + } +} + +func (e *Exporter) Collect(ch chan<- prometheus.Metric) { + wg := new(sync.WaitGroup) + + for idx := range e.cs { + wg.Add(1) + + go func(i int) { + defer wg.Done() + + e.cs[i].Collect(ch) + }(idx) + } + + wg.Wait() +} + +func (e *Exporter) Describe(ch chan<- *prometheus.Desc) { + wg := new(sync.WaitGroup) + wg.Add(len(e.cs)) + + for idx := range e.cs { + go func(i int) { + defer wg.Done() + + e.cs[i].Describe(ch) + }(idx) + } + + wg.Wait() +} + +func (e *Exporter) initCollectors(ctx context.Context, client *mongo.Client) error { + gc := newGeneralCollector(ctx, client, e.opts.Logger) + e.cs = append(e.cs, gc) + + // Enable collectors like collstats and indexstats depending on the number of collections + // present in the database. + limitsOk := false + if e.opts.CollStatsLimit <= 0 || // Unlimited + e.getTotalCollectionsCount() <= e.opts.CollStatsLimit { + limitsOk = true + } + + if e.opts.CollectAll { + if len(e.opts.CollStatsNamespaces) == 0 { + e.opts.DiscoveringMode = true + } + e.opts.EnableDiagnosticData = true + e.opts.EnableDBStats = true + e.opts.EnableCollStats = true + e.opts.EnableTopMetrics = true + e.opts.EnableReplicasetStatus = true + e.opts.EnableIndexStats = true + } + + topologyInfo := newTopologyInfo(ctx, client) + if e.opts.EnableDiagnosticData { + ddc := newDiagnosticDataCollector(ctx, client, e.opts.Logger, + e.opts.CompatibleMode, topologyInfo) + e.cs = append(e.cs, ddc) + } + + // If we manually set the collection names we want or auto discovery is set. + if (len(e.opts.CollStatsNamespaces) > 0 || e.opts.DiscoveringMode) && e.opts.EnableCollStats && limitsOk { + cc := newCollectionStatsCollector(ctx, client, e.opts.Logger, + e.opts.CompatibleMode, e.opts.DiscoveringMode, + topologyInfo, e.opts.CollStatsNamespaces) + e.cs = append(e.cs, cc) + } + + // If we manually set the collection names we want or auto discovery is set. + if (len(e.opts.IndexStatsCollections) > 0 || e.opts.DiscoveringMode) && e.opts.EnableIndexStats && limitsOk { + ic := newIndexStatsCollector(ctx, client, e.opts.Logger, + e.opts.DiscoveringMode, e.opts.EnableOverrideDescendingIndex, + topologyInfo, e.opts.IndexStatsCollections) + e.cs = append(e.cs, ic) + } + + if e.opts.EnableDBStats && limitsOk { + cc := newDBStatsCollector(ctx, client, e.opts.Logger, + e.opts.CompatibleMode, topologyInfo, nil) + e.cs = append(e.cs, cc) + } + + nodeType, err := getNodeType(ctx, client) + if err != nil { + return fmt.Errorf("cannot get node type to check if this is a mongos : %s", err) + } + + if e.opts.EnableTopMetrics && nodeType != typeMongos && limitsOk { + tc := newTopCollector(ctx, client, e.opts.Logger, + e.opts.CompatibleMode, topologyInfo) + e.cs = append(e.cs, tc) + } + + // replSetGetStatus is not supported through mongos. + if e.opts.EnableReplicasetStatus && nodeType != typeMongos { + rsgsc := newReplicationSetStatusCollector(ctx, client, e.opts.Logger, + e.opts.CompatibleMode, topologyInfo) + e.cs = append(e.cs, rsgsc) + } + + return nil +} + +func (e *Exporter) getTotalCollectionsCount() int { + e.lock.Lock() + defer e.lock.Unlock() + + return e.totalCollectionsCount +} + +func (e *Exporter) getClient(ctx context.Context) (*mongo.Client, error) { + // Get global client. Maybe it must be initialized first. + // Initialization is retried with every scrape until it succeeds once. + e.clientMu.Lock() + defer e.clientMu.Unlock() + + // If client is already initialized, return it. + if e.client != nil { + return e.client, nil + } + + client, err := connect(context.Background(), e.opts.URI, e.opts.Username, e.opts.Password, e.opts.DirectConnect) + if err != nil { + return nil, err + } + + e.client = client + return client, nil +} + +func connect(ctx context.Context, dsn, username, password string, directConnect bool) (*mongo.Client, error) { + opts := options.Client().ApplyURI(dsn) + opts.SetDirect(directConnect) + opts.SetAppName("mongodb_exporter") + + if len(username) > 0 || len(password) > 0 { + opts.SetAuth(options.Credential{ + Username: username, + Password: password, + }) + } + + client, err := mongo.Connect(ctx, opts) + if err != nil { + return nil, err + } + + if err = client.Ping(ctx, nil); err != nil { + // Ping failed. Close background connections. Error is ignored since the ping error is more relevant. + _ = client.Disconnect(ctx) + + return nil, fmt.Errorf("cannot connect to MongoDB: %w", err) + } + + return client, nil +} diff --git a/inputs/mongodb/exporter/general_collector.go b/inputs/mongodb/exporter/general_collector.go new file mode 100644 index 00000000..355b7b10 --- /dev/null +++ b/inputs/mongodb/exporter/general_collector.go @@ -0,0 +1,71 @@ +// mongodb_exporter +// Copyright (C) 2017 Percona LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package exporter + +import ( + "context" + + "github.com/prometheus/client_golang/prometheus" + "github.com/sirupsen/logrus" + "go.mongodb.org/mongo-driver/mongo" + "go.mongodb.org/mongo-driver/mongo/readpref" +) + +// This collector is always enabled and it is not directly related to any particular MongoDB +// command to gather stats. +type generalCollector struct { + ctx context.Context + base *baseCollector +} + +// newGeneralCollector creates a collector for MongoDB connectivity status. +func newGeneralCollector(ctx context.Context, client *mongo.Client, logger *logrus.Logger) *generalCollector { + return &generalCollector{ + ctx: ctx, + base: newBaseCollector(client, logger), + } +} + +func (d *generalCollector) Describe(ch chan<- *prometheus.Desc) { + d.base.Describe(d.ctx, ch, d.collect) +} + +func (d *generalCollector) Collect(ch chan<- prometheus.Metric) { + d.base.Collect(ch, d.collect) +} + +func (d *generalCollector) collect(ch chan<- prometheus.Metric) { + ch <- mongodbUpMetric(d.ctx, d.base.client, d.base.logger) +} + +func mongodbUpMetric(ctx context.Context, client *mongo.Client, log *logrus.Logger) prometheus.Metric { + var value float64 + + if client != nil { + if err := client.Ping(ctx, readpref.PrimaryPreferred()); err == nil { + value = 1 + } else { + log.Errorf("error while checking mongodb connection: %s. mongo_up is set to 0", err) + } + } + + d := prometheus.NewDesc("mongodb_up", "Whether MongoDB is up.", nil, nil) + + return prometheus.MustNewConstMetric(d, prometheus.GaugeValue, value) +} + +var _ prometheus.Collector = (*generalCollector)(nil) diff --git a/inputs/mongodb/exporter/indexstats_collector.go b/inputs/mongodb/exporter/indexstats_collector.go new file mode 100644 index 00000000..aa78a4b3 --- /dev/null +++ b/inputs/mongodb/exporter/indexstats_collector.go @@ -0,0 +1,160 @@ +// mongodb_exporter +// Copyright (C) 2017 Percona LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package exporter + +import ( + "context" + "fmt" + "strings" + + "github.com/prometheus/client_golang/prometheus" + "github.com/sirupsen/logrus" + "go.mongodb.org/mongo-driver/bson" + "go.mongodb.org/mongo-driver/mongo" +) + +type indexstatsCollector struct { + ctx context.Context + base *baseCollector + + discoveringMode bool + overrideDescendingIndex bool + topologyInfo labelsGetter + + collections []string +} + +// newIndexStatsCollector creates a collector for statistics on index usage. +func newIndexStatsCollector(ctx context.Context, client *mongo.Client, logger *logrus.Logger, discovery, overrideDescendingIndex bool, topology labelsGetter, collections []string) *indexstatsCollector { + return &indexstatsCollector{ + ctx: ctx, + base: newBaseCollector(client, logger), + + discoveringMode: discovery, + topologyInfo: topology, + overrideDescendingIndex: overrideDescendingIndex, + + collections: collections, + } +} + +func (d *indexstatsCollector) Describe(ch chan<- *prometheus.Desc) { + d.base.Describe(d.ctx, ch, d.collect) +} + +func (d *indexstatsCollector) Collect(ch chan<- prometheus.Metric) { + d.base.Collect(ch, d.collect) +} + +func (d *indexstatsCollector) collect(ch chan<- prometheus.Metric) { + + collections := d.collections + + logger := d.base.logger + client := d.base.client + + if d.discoveringMode { + namespaces, err := listAllCollections(d.ctx, client, d.collections, systemDBs) + if err != nil { + logger.Errorf("cannot auto discover databases and collections") + + return + } + + collections = fromMapToSlice(namespaces) + } + + for _, dbCollection := range collections { + parts := strings.Split(dbCollection, ".") + if len(parts) < 2 { //nolint:gomnd + continue + } + + database := parts[0] + collection := strings.Join(parts[1:], ".") + + aggregation := bson.D{ + {Key: "$indexStats", Value: bson.M{}}, + } + + cursor, err := client.Database(database).Collection(collection).Aggregate(d.ctx, mongo.Pipeline{aggregation}) + if err != nil { + logger.Errorf("cannot get $indexStats cursor for collection %s.%s: %s", database, collection, err) + + continue + } + + var stats []bson.M + if err = cursor.All(d.ctx, &stats); err != nil { + logger.Errorf("cannot get $indexStats for collection %s.%s: %s", database, collection, err) + + continue + } + + d.base.logger.Debugf("indexStats for %s.%s", database, collection) + + debugResult(d.base.logger, stats) + + for _, metric := range stats { + indexName := fmt.Sprintf("%s", metric["name"]) + // Override the label name + if d.overrideDescendingIndex { + indexName = strings.ReplaceAll(fmt.Sprintf("%s", metric["name"]), "-1", "DESC") + } + + // prefix and labels are needed to avoid duplicated metric names since the metrics are the + // same, for different collections. + prefix := "indexstats" + labels := d.topologyInfo.baseLabels() + labels["database"] = database + labels["collection"] = collection + labels["key_name"] = indexName + + metrics := sanitizeMetrics(metric) + for _, metric := range makeMetrics(prefix, metrics, labels, false) { + ch <- metric + } + } + } +} + +// According to specs, we should expose only this 2 metrics. 'building' might not exist. +func sanitizeMetrics(m bson.M) bson.M { + ops := float64(0) + + if val := walkTo(m, []string{"accesses", "ops"}); val != nil { + if f, err := asFloat64(val); err == nil { + ops = *f + } + } + + filteredMetrics := bson.M{ + "accesses": bson.M{ + "ops": ops, + }, + } + + if val := walkTo(m, []string{"building"}); val != nil { + if f, err := asFloat64(val); err == nil { + filteredMetrics["building"] = *f + } + } + + return filteredMetrics +} + +var _ prometheus.Collector = (*indexstatsCollector)(nil) diff --git a/inputs/mongodb/exporter/metrics.go b/inputs/mongodb/exporter/metrics.go new file mode 100644 index 00000000..bc23166b --- /dev/null +++ b/inputs/mongodb/exporter/metrics.go @@ -0,0 +1,430 @@ +// mongodb_exporter +// Copyright (C) 2017 Percona LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package exporter + +import ( + "regexp" + "strings" + "time" + + "github.com/pkg/errors" + "github.com/prometheus/client_golang/prometheus" + "go.mongodb.org/mongo-driver/bson" + "go.mongodb.org/mongo-driver/bson/primitive" +) + +const ( + exporterPrefix = "mongodb_" +) + +type rawMetric struct { + // Full Qualified Name + fqName string + // Help string + help string + // Label names + ln []string + // Label values + lv []string + // Metric value as float64 + val float64 + // Value type + vt prometheus.ValueType +} + +//nolint:gochecknoglobals +var ( + // Rules to shrink metric names + // Please do not change the definitions order: rules are sorted by precedence. + prefixes = [][]string{ + {"serverStatus.wiredTiger.transaction", "ss_wt_txn"}, + {"serverStatus.wiredTiger", "ss_wt"}, + {"serverStatus", "ss"}, + {"replSetGetStatus", "rs"}, + {"systemMetrics", "sys"}, + {"local.oplog.rs.stats.wiredTiger", "oplog_stats_wt"}, + {"local.oplog.rs.stats", "oplog_stats"}, + {"collstats_storage.wiredTiger", "collstats_storage_wt"}, + {"collstats_storage.indexDetails", "collstats_storage_idx"}, + {"collStats.storageStats", "collstats_storage"}, + {"collStats.latencyStats", "collstats_latency"}, + } + + // This map is used to add labels to some specific metrics. + // For example, the fields under the serverStatus.opcounters. structure have this + // signature: + // + // "opcounters": primitive.M{ + // "insert": int32(4), + // "query": int32(2118), + // "update": int32(14), + // "delete": int32(22), + // "getmore": int32(9141), + // "command": int32(67923), + // }, + // + // Applying the renaming rules, serverStatus will become ss but instead of having metrics + // with the form ss.opcounters. where operation is each one of the fields inside + // the structure (insert, query, update, etc), those keys will become labels for the same + // metric name. The label name is defined as the value for each metric name in the map and + // the value the label will have is the field name in the structure. Example. + // + // mongodb_ss_opcounters{legacy_op_type="insert"} 4 + // mongodb_ss_opcounters{legacy_op_type="query"} 2118 + // mongodb_ss_opcounters{legacy_op_type="update"} 14 + // mongodb_ss_opcounters{legacy_op_type="delete"} 22 + // mongodb_ss_opcounters{legacy_op_type="getmore"} 9141 + // mongodb_ss_opcounters{legacy_op_type="command"} 67923 + // + nodeToPDMetrics = map[string]string{ + "collStats.storageStats.indexDetails.": "index_name", + "globalLock.activeQueue.": "count_type", + "globalLock.locks.": "lock_type", + "serverStatus.asserts.": "assert_type", + "serverStatus.connections.": "conn_type", + "serverStatus.globalLock.currentQueue.": "count_type", + "serverStatus.metrics.commands.": "cmd_name", + "serverStatus.metrics.cursor.open.": "csr_type", + "serverStatus.metrics.document.": "doc_op_type", + "serverStatus.opLatencies.": "op_type", + "serverStatus.opReadConcernCounters.": "concern_type", + "serverStatus.opcounters.": "legacy_op_type", + "serverStatus.opcountersRepl.": "legacy_op_type", + "serverStatus.transactions.commitTypes.": "commit_type", + "serverStatus.wiredTiger.concurrentTransactions.": "txn_rw_type", + "serverStatus.wiredTiger.perf.": "perf_bucket", + "systemMetrics.disks.": "device_name", + } + + // Regular expressions used to make the metric name Prometheus-compatible + // This variables are global to compile the regexps only once. + specialCharsRe = regexp.MustCompile(`[^a-zA-Z0-9_]+`) + repeatedUnderscoresRe = regexp.MustCompile(`__+`) + dollarRe = regexp.MustCompile(`\_$`) +) + +// prometheusize renames metrics by replacing some prefixes with shorter names +// replace special chars to follow Prometheus metric naming rules and adds the +// exporter name prefix. +func prometheusize(s string) string { + for _, pair := range prefixes { + if strings.HasPrefix(s, pair[0]+".") { + s = pair[1] + strings.TrimPrefix(s, pair[0]) + break + } + } + + s = specialCharsRe.ReplaceAllString(s, "_") + s = dollarRe.ReplaceAllString(s, "") + s = repeatedUnderscoresRe.ReplaceAllString(s, "_") + s = strings.TrimPrefix(s, "_") + + return exporterPrefix + s +} + +// nameAndLabel checks if there are predefined metric name and label for that metric or +// the standard metrics name should be used in place. +func nameAndLabel(prefix, name string) (string, string) { + if label, ok := nodeToPDMetrics[prefix]; ok { + return prometheusize(prefix), label + } + + return prometheusize(prefix + name), "" +} + +// makeRawMetric creates a Prometheus metric based on the parameters we collected by +// traversing the MongoDB structures returned by the collector functions. +func makeRawMetric(prefix, name string, value interface{}, labels map[string]string) (*rawMetric, error) { + f, err := asFloat64(value) + if err != nil { + return nil, err + } + if f == nil { + return nil, nil + } + + help := metricHelp(prefix, name) + + fqName, label := nameAndLabel(prefix, name) + + metricType := prometheus.UntypedValue + if strings.HasSuffix(strings.ToLower(name), "count") { + metricType = prometheus.CounterValue + } + + rm := &rawMetric{ + fqName: fqName, + help: help, + val: *f, + vt: metricType, + ln: make([]string, 0, len(labels)), + lv: make([]string, 0, len(labels)), + } + + // Add original labels to the metric + for k, v := range labels { + rm.ln = append(rm.ln, k) + rm.lv = append(rm.lv, v) + } + + // Add predefined label, if any + if label != "" { + rm.ln = append(rm.ln, label) + rm.lv = append(rm.lv, name) + } + + return rm, nil +} + +func asFloat64(value interface{}) (*float64, error) { + var f float64 + switch v := value.(type) { + case bool: + if v { + f = 1 + } + case int: + f = float64(v) + case int32: + f = float64(v) + case int64: + f = float64(v) + case float32: + f = float64(v) + case float64: + f = v + case primitive.DateTime: + f = float64(v) + case primitive.A, primitive.ObjectID, primitive.Timestamp, primitive.Binary, string, []uint8, time.Time: + return nil, nil + default: + return nil, errors.Wrapf(errCannotHandleType, "%T", v) + } + return &f, nil +} + +func rawToPrometheusMetric(rm *rawMetric) (prometheus.Metric, error) { + d := prometheus.NewDesc(rm.fqName, rm.help, rm.ln, nil) + return prometheus.NewConstMetric(d, rm.vt, rm.val, rm.lv...) +} + +// metricHelp builds the metric help. +// It is a very very very simple function, but the idea is if the future we want +// to improve the help somehow, there is only one place to change it for the real +// functions and for all the tests. +// Use only prefix or name but not both because 2 metrics cannot have same name but different help. +// For metrics where we labelize some keys, if we put the real metric name here it will be rejected +// by prometheus. For first level metrics, there is no prefix so we should use the metric name or +// the help would be empty. +func metricHelp(prefix, name string) string { + if prefix != "" { + return prefix + } + + return name +} + +func makeMetrics(prefix string, m bson.M, labels map[string]string, compatibleMode bool) []prometheus.Metric { + var res []prometheus.Metric + + if prefix != "" { + prefix += "." + } + + for k, val := range m { + switch v := val.(type) { + case bson.M: + res = append(res, makeMetrics(prefix+k, v, labels, compatibleMode)...) + case map[string]interface{}: + res = append(res, makeMetrics(prefix+k, v, labels, compatibleMode)...) + case primitive.A: + v = []interface{}(v) + res = append(res, processSlice(prefix, k, v, labels, compatibleMode)...) + case []interface{}: + continue + default: + rm, err := makeRawMetric(prefix, k, v, labels) + if err != nil { + invalidMetric := prometheus.NewInvalidMetric(prometheus.NewInvalidDesc(err), err) + res = append(res, invalidMetric) + continue + } + + // makeRawMetric returns a nil metric for some data types like strings + // because we cannot extract data from all types + if rm == nil { + continue + } + + metrics := []*rawMetric{rm} + + if renamedMetrics := metricRenameAndLabel(rm, specialConversions()); renamedMetrics != nil { + metrics = renamedMetrics + } + + for _, m := range metrics { + metric, err := rawToPrometheusMetric(m) + if err != nil { + invalidMetric := prometheus.NewInvalidMetric(prometheus.NewInvalidDesc(err), err) + res = append(res, invalidMetric) + continue + } + + res = append(res, metric) + + if compatibleMode { + res = appendCompatibleMetric(res, m) + } + } + } + } + + return res +} + +// Extract maps from arrays. Only some structures like replicasets have arrays of members +// and each member is represented by a map[string]interface{}. +func processSlice(prefix, k string, v []interface{}, commonLabels map[string]string, compatibleMode bool) []prometheus.Metric { + metrics := make([]prometheus.Metric, 0) + labels := make(map[string]string) + for name, value := range commonLabels { + labels[name] = value + } + + for _, item := range v { + var s map[string]interface{} + + switch i := item.(type) { + case map[string]interface{}: + s = i + case primitive.M: + s = map[string]interface{}(i) + default: + continue + } + + // use the replicaset or server name as a label + if name, ok := s["name"].(string); ok { + labels["member_idx"] = name + } + if state, ok := s["stateStr"].(string); ok { + labels["member_state"] = state + } + + metrics = append(metrics, makeMetrics(prefix+k, s, labels, compatibleMode)...) + } + + return metrics +} + +type conversion struct { + newName string + oldName string + labelConversions map[string]string // key: current label, value: old exporter (compatible) label + labelValueConversions map[string]string // key: current label, value: old exporter (compatible) label + prefix string + suffixLabel string + suffixMapping map[string]string +} + +func metricRenameAndLabel(rm *rawMetric, convs []conversion) []*rawMetric { + // check if the metric exists in the conversions array. + // if it exists, it should be converted. + var result []*rawMetric + for _, cm := range convs { + switch { + case cm.newName != "" && rm.fqName == cm.newName: // first renaming case. See (1) + result = append(result, newToOldMetric(rm, cm)) + + case cm.prefix != "" && strings.HasPrefix(rm.fqName, cm.prefix): // second renaming case. See (2) + conversionSuffix := strings.TrimPrefix(rm.fqName, cm.prefix) + conversionSuffix = strings.TrimPrefix(conversionSuffix, "_") + + // Check that also the suffix matches. + // In the conversion array, there are metrics with the same prefix but the 'old' name varies + // also depending on the metic suffix + if _, ok := cm.suffixMapping[conversionSuffix]; ok { + om := createOldMetricFromNew(rm, cm) + result = append(result, om) + } + } + } + + return result +} + +// specialConversions returns a list of special conversions we want to implement. +// See: https://jira.percona.com/browse/PMM-6506 +func specialConversions() []conversion { + return []conversion{ + { + oldName: "mongodb_ss_opLatencies_ops", + prefix: "mongodb_ss_opLatencies", + suffixLabel: "op_type", + suffixMapping: map[string]string{ + "commands_ops": "commands", + "reads_ops": "reads", + "transactions_ops": "transactions", + "writes_ops": "writes", + }, + }, + { + oldName: "mongodb_ss_opLatencies_latency", + prefix: "mongodb_ss_opLatencies", + suffixLabel: "op_type", + suffixMapping: map[string]string{ + "commands_latency": "commands", + "reads_latency": "reads", + "transactions_latency": "transactions", + "writes_latency": "writes", + }, + }, + // mongodb_ss_wt_concurrentTransactions_read_out + // mongodb_ss_wt_concurrentTransactions_write_out + { + oldName: "mongodb_ss_wt_concurrentTransactions_out", + prefix: "mongodb_ss_wt_concurrentTransactions", + suffixLabel: "txn_rw", + suffixMapping: map[string]string{ + "read_out": "read", + "write_out": "write", + }, + }, + // mongodb_ss_wt_concurrentTransactions_read_available + // mongodb_ss_wt_concurrentTransactions_write_available + { + oldName: "mongodb_ss_wt_concurrentTransactions_available", + prefix: "mongodb_ss_wt_concurrentTransactions", + suffixLabel: "txn_rw", + suffixMapping: map[string]string{ + "read_available": "read", + "write_available": "write", + }, + }, + // mongodb_ss_wt_concurrentTransactions_read_totalTickets + // mongodb_ss_wt_concurrentTransactions_write_totalTickets + { + oldName: "mongodb_ss_wt_concurrentTransactions_totalTickets", + prefix: "mongodb_ss_wt_concurrentTransactions", + suffixLabel: "txn_rw", + suffixMapping: map[string]string{ + "read_totalTickets": "read", + "write_totalTickets": "write", + }, + }, + } +} diff --git a/inputs/mongodb/exporter/replset_status_collector.go b/inputs/mongodb/exporter/replset_status_collector.go new file mode 100644 index 00000000..5c02223d --- /dev/null +++ b/inputs/mongodb/exporter/replset_status_collector.go @@ -0,0 +1,89 @@ +// mongodb_exporter +// Copyright (C) 2017 Percona LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package exporter + +import ( + "context" + + "github.com/prometheus/client_golang/prometheus" + "github.com/sirupsen/logrus" + "go.mongodb.org/mongo-driver/bson" + "go.mongodb.org/mongo-driver/mongo" +) + +const ( + replicationNotEnabled = 76 + replicationNotYetInitialized = 94 +) + +type replSetGetStatusCollector struct { + ctx context.Context + base *baseCollector + + compatibleMode bool + topologyInfo labelsGetter +} + +// newReplicationSetStatusCollector creates a collector for statistics on replication set. +func newReplicationSetStatusCollector(ctx context.Context, client *mongo.Client, logger *logrus.Logger, compatible bool, topology labelsGetter) *replSetGetStatusCollector { + return &replSetGetStatusCollector{ + ctx: ctx, + base: newBaseCollector(client, logger), + + compatibleMode: compatible, + topologyInfo: topology, + } +} + +func (d *replSetGetStatusCollector) Describe(ch chan<- *prometheus.Desc) { + d.base.Describe(d.ctx, ch, d.collect) +} + +func (d *replSetGetStatusCollector) Collect(ch chan<- prometheus.Metric) { + d.base.Collect(ch, d.collect) +} + +func (d *replSetGetStatusCollector) collect(ch chan<- prometheus.Metric) { + + logger := d.base.logger + client := d.base.client + + cmd := bson.D{{Key: "replSetGetStatus", Value: "1"}} + res := client.Database("admin").RunCommand(d.ctx, cmd) + + var m bson.M + + if err := res.Decode(&m); err != nil { + if e, ok := err.(mongo.CommandError); ok { + if e.Code == replicationNotYetInitialized || e.Code == replicationNotEnabled { + return + } + } + logger.Errorf("cannot get replSetGetStatus: %s", err) + + return + } + + logger.Debug("replSetGetStatus result:") + debugResult(logger, m) + + for _, metric := range makeMetrics("", m, d.topologyInfo.baseLabels(), d.compatibleMode) { + ch <- metric + } +} + +var _ prometheus.Collector = (*replSetGetStatusCollector)(nil) diff --git a/inputs/mongodb/exporter/serverstatus_collector.go b/inputs/mongodb/exporter/serverstatus_collector.go new file mode 100644 index 00000000..cbd80c00 --- /dev/null +++ b/inputs/mongodb/exporter/serverstatus_collector.go @@ -0,0 +1,74 @@ +// mongodb_exporter +// Copyright (C) 2017 Percona LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package exporter + +import ( + "context" + + "github.com/prometheus/client_golang/prometheus" + "github.com/sirupsen/logrus" + "go.mongodb.org/mongo-driver/bson" + "go.mongodb.org/mongo-driver/mongo" +) + +type serverStatusCollector struct { + ctx context.Context + base *baseCollector + + compatibleMode bool + topologyInfo labelsGetter +} + +// newServerStatusCollector creates a collector for statistics on server status. +func newServerStatusCollector(ctx context.Context, client *mongo.Client, logger *logrus.Logger, compatible bool, topology labelsGetter) *serverStatusCollector { + return &serverStatusCollector{ + ctx: ctx, + base: newBaseCollector(client, logger), + compatibleMode: compatible, + topologyInfo: topology, + } +} + +func (d *serverStatusCollector) Describe(ch chan<- *prometheus.Desc) { + d.base.Describe(d.ctx, ch, d.collect) +} + +func (d *serverStatusCollector) Collect(ch chan<- prometheus.Metric) { + d.base.Collect(ch, d.collect) +} + +func (d *serverStatusCollector) collect(ch chan<- prometheus.Metric) { + + logger := d.base.logger + client := d.base.client + + cmd := bson.D{{Key: "serverStatus", Value: "1"}} + res := client.Database("admin").RunCommand(d.ctx, cmd) + + var m bson.M + if err := res.Decode(&m); err != nil { + ch <- prometheus.NewInvalidMetric(prometheus.NewInvalidDesc(err), err) + return + } + + logrus.Debug("serverStatus result:") + debugResult(logger, m) + + for _, metric := range makeMetrics("", m, d.topologyInfo.baseLabels(), d.compatibleMode) { + ch <- metric + } +} diff --git a/inputs/mongodb/exporter/top_collector.go b/inputs/mongodb/exporter/top_collector.go new file mode 100644 index 00000000..3c43913e --- /dev/null +++ b/inputs/mongodb/exporter/top_collector.go @@ -0,0 +1,157 @@ +// mongodb_exporter +// Copyright (C) 2017 Percona LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package exporter + +import ( + "context" + "fmt" + + "github.com/prometheus/client_golang/prometheus" + "github.com/sirupsen/logrus" + "go.mongodb.org/mongo-driver/bson" + "go.mongodb.org/mongo-driver/bson/primitive" + "go.mongodb.org/mongo-driver/mongo" +) + +type topCollector struct { + ctx context.Context + base *baseCollector + + compatibleMode bool + topologyInfo labelsGetter +} + +var ErrInvalidOrMissingTotalsEntry = fmt.Errorf("Invalid or misssing totals entry in top results") + +func newTopCollector(ctx context.Context, client *mongo.Client, logger *logrus.Logger, compatible bool, + topology labelsGetter, +) *topCollector { + return &topCollector{ + ctx: ctx, + base: newBaseCollector(client, logger), + compatibleMode: compatible, + topologyInfo: topology, + } +} + +func (d *topCollector) Describe(ch chan<- *prometheus.Desc) { + d.base.Describe(d.ctx, ch, d.collect) +} + +func (d *topCollector) Collect(ch chan<- prometheus.Metric) { + d.base.Collect(ch, d.collect) +} + +func (d *topCollector) collect(ch chan<- prometheus.Metric) { + + logger := d.base.logger + client := d.base.client + + cmd := bson.D{{Key: "top", Value: "1"}} + res := client.Database("admin").RunCommand(d.ctx, cmd) + + var m primitive.M + if err := res.Decode(&m); err != nil { + ch <- prometheus.NewInvalidMetric(prometheus.NewInvalidDesc(err), err) + return + } + + logrus.Debug("top result:") + debugResult(logger, m) + + totals, ok := m["totals"].(primitive.M) + if !ok { + ch <- prometheus.NewInvalidMetric(prometheus.NewInvalidDesc(ErrInvalidOrMissingTotalsEntry), + ErrInvalidOrMissingTotalsEntry) + } + + /* + The top command will return a structure with a key named totals and it is a map + where the key is the collection namespace and for each collection there are per + collection usage statistics. + Example: rs1:SECONDARY> db.adminCommand({"top": 1}); + + { + "totals" : { + "note" : "all times in microseconds", + "admin.system.roles" : { + "total" : { + "time" : 41, + "count" : 1 + }, + "readLock" : { + "time" : 41, + "count" : 1 + }, + "writeLock" : { + "time" : 0, + "count" : 0 + }, + "queries" : { + "time" : 41, + "count" : 1 + }, + "getmore" : { + "time" : 0, + "count" : 0 + }, + "insert" : { + "time" : 0, + "count" : 0 + }, + "update" : { + "time" : 0, + "count" : 0 + }, + "remove" : { + "time" : 0, + "count" : 0 + }, + "commands" : { + "time" : 0, + "count" : 0 + } + }, + "admin.system.version" : { + "total" : { + "time" : 63541, + "count" : 218 + }, + + If we pass this structure to the makeMetrics function, we will have metric names with the form of + prefix + namespace + metric like mongodb_top_totals_admin.system.role_readlock_count. + Having the namespace as part of the metric is a Prometheus anti pattern and diffucults grouping + metrics in Grafana. For this reason, we need to manually loop through the metric in the totals key + and pass the namespace as a label to the makeMetrics function. + */ + + for namespace, metrics := range totals { + labels := d.topologyInfo.baseLabels() + db, coll := splitNamespace(namespace) + labels["database"] = db + labels["collection"] = coll + + mm, ok := metrics.(primitive.M) // ingore entries like -> "note" : "all times in microseconds" + if !ok { + continue + } + + for _, metric := range makeMetrics("top", mm, labels, d.compatibleMode) { + ch <- metric + } + } +} diff --git a/inputs/mongodb/exporter/topology_info.go b/inputs/mongodb/exporter/topology_info.go new file mode 100644 index 00000000..c7f0d85c --- /dev/null +++ b/inputs/mongodb/exporter/topology_info.go @@ -0,0 +1,193 @@ +// mongodb_exporter +// Copyright (C) 2017 Percona LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package exporter + +import ( + "context" + "fmt" + "sync" + + "github.com/percona/percona-toolkit/src/go/mongolib/proto" + "github.com/percona/percona-toolkit/src/go/mongolib/util" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "go.mongodb.org/mongo-driver/bson/primitive" + "go.mongodb.org/mongo-driver/mongo" +) + +type mongoDBNodeType string + +const ( + labelClusterRole = "cl_role" + labelClusterID = "cl_id" + labelReplicasetName = "rs_nm" + labelReplicasetState = "rs_state" + + typeIsDBGrid = "isdbgrid" + typeMongos mongoDBNodeType = "mongos" + typeMongod mongoDBNodeType = "mongod" + typeShardServer mongoDBNodeType = "shardsvr" + typeOther mongoDBNodeType = "" +) + +type labelsGetter interface { + baseLabels() map[string]string + loadLabels(context.Context) error +} + +// This is an object to make it posible to easily reload the labels in case of +// disconnection from the db. Just call loadLabels when required. +type topologyInfo struct { + // TODO: with https://jira.percona.com/browse/PMM-6435, replace this client pointer + // by a new connector, able to reconnect if needed. In case of reconnection, we should + // call loadLabels to refresh the labels because they might have changed + client *mongo.Client + rw sync.RWMutex + labels map[string]string +} + +// ErrCannotGetTopologyLabels Cannot read topology labels. +var ErrCannotGetTopologyLabels = fmt.Errorf("cannot get topology labels") + +func newTopologyInfo(ctx context.Context, client *mongo.Client) *topologyInfo { + ti := &topologyInfo{ + client: client, + labels: make(map[string]string), + rw: sync.RWMutex{}, + } + + err := ti.loadLabels(ctx) + if err != nil { + logrus.Warnf("cannot load topology labels: %s", err) + } + + return ti +} + +// baseLabels returns a copy of the topology labels because in some collectors like +// collstats collector, we must use these base labels and add the namespace or other labels. +func (t *topologyInfo) baseLabels() map[string]string { + c := map[string]string{} + + t.rw.RLock() + for k, v := range t.labels { + c[k] = v + } + t.rw.RUnlock() + + return c +} + +// TopologyLabels reads several values from MongoDB instance like replicaset name, and other +// topology information and returns a map of labels used to better identify the current monitored instance. +func (t *topologyInfo) loadLabels(ctx context.Context) error { + t.rw.Lock() + defer t.rw.Unlock() + + t.labels = make(map[string]string) + + role, err := getClusterRole(ctx, t.client) + if err != nil { + return errors.Wrap(err, "cannot get node type for topology info") + } + + t.labels[labelClusterRole] = role + + // Standalone instances or mongos instances won't have a replicaset name + if rs, err := util.ReplicasetConfig(ctx, t.client); err == nil { + t.labels[labelReplicasetName] = rs.Config.ID + } + + isArbiter, err := isArbiter(ctx, t.client) + if err != nil { + return err + } + + cid, err := util.ClusterID(ctx, t.client) + if err != nil { + if !isArbiter { // arbiters don't have a cluster ID + return errors.Wrapf(ErrCannotGetTopologyLabels, "error getting cluster ID: %s", err) + } + } + t.labels[labelClusterID] = cid + + // Standalone instances or mongos instances won't have a replicaset state + state, err := util.MyState(ctx, t.client) + if err == nil { + t.labels[labelReplicasetState] = fmt.Sprintf("%d", state) + } + + return nil +} + +func isArbiter(ctx context.Context, client *mongo.Client) (bool, error) { + doc := struct { + ArbiterOnly bool `bson:"arbiterOnly"` + }{} + + if err := client.Database("admin").RunCommand(ctx, primitive.M{"isMaster": 1}).Decode(&doc); err != nil { + return false, errors.Wrap(err, "cannot check if the instance is an arbiter") + } + + return doc.ArbiterOnly, nil +} + +func getNodeType(ctx context.Context, client *mongo.Client) (mongoDBNodeType, error) { + md := proto.MasterDoc{} + if err := client.Database("admin").RunCommand(ctx, primitive.M{"isMaster": 1}).Decode(&md); err != nil { + return "", err + } + + if md.SetName != nil || md.Hosts != nil { + return typeShardServer, nil + } else if md.Msg == typeIsDBGrid { + // isdbgrid is always the msg value when calling isMaster on a mongos + // see http://docs.mongodb.org/manual/core/sharded-cluster-query-router/ + return typeMongos, nil + } + + return typeMongod, nil +} + +func getClusterRole(ctx context.Context, client *mongo.Client) (string, error) { + cmdOpts := primitive.M{} + // Not always we can get this info. For example, we cannot get this for hidden hosts so + // if there is an error, just ignore it + res := client.Database("admin").RunCommand(ctx, primitive.D{ + {Key: "getCmdLineOpts", Value: 1}, + {Key: "recordStats", Value: 1}, + }) + + if res.Err() != nil { + return "", nil + } + + if err := res.Decode(&cmdOpts); err != nil { + return "", errors.Wrap(err, "cannot decode getCmdLineOpts response") + } + + if walkTo(cmdOpts, []string{"parsed", "sharding", "configDB"}) != nil { + return "mongos", nil + } + + clusterRole := "" + if cr := walkTo(cmdOpts, []string{"parsed", "sharding", "clusterRole"}); cr != nil { + clusterRole, _ = cr.(string) + } + + return clusterRole, nil +} diff --git a/inputs/mongodb/exporter/v1_compatibility.go b/inputs/mongodb/exporter/v1_compatibility.go new file mode 100644 index 00000000..217cf732 --- /dev/null +++ b/inputs/mongodb/exporter/v1_compatibility.go @@ -0,0 +1,1399 @@ +// mongodb_exporter +// Copyright (C) 2022 Percona LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package exporter + +import ( + "context" + "fmt" + "math" + "strings" + "time" + + "github.com/percona/percona-toolkit/src/go/mongolib/proto" + "github.com/percona/percona-toolkit/src/go/mongolib/util" + "github.com/pkg/errors" + "github.com/prometheus/client_golang/prometheus" + "github.com/sirupsen/logrus" + "go.mongodb.org/mongo-driver/bson" + "go.mongodb.org/mongo-driver/bson/primitive" + "go.mongodb.org/mongo-driver/mongo" + "go.mongodb.org/mongo-driver/mongo/options" +) + +const ( + // UnknownState is the values for an unknown rs state. + // From MongoDB documentation: https://docs.mongodb.com/manual/reference/replica-states/ + UnknownState = 6 +) + +// ErrInvalidMetricValue cannot create a new metric due to an invalid value. +var errInvalidMetricValue = fmt.Errorf("invalid metric value") + +/* + This is used to convert a new metric like: mongodb_ss_asserts{assert_type=*} (1) + to the old-compatible metric: mongodb_mongod_asserts_total{type="regular|warning|msg|user|rollovers"}. + In this particular case, conversion would be: + conversion { + newName: "mongodb_ss_asserts", + oldName: "mongodb_mongod_asserts_total", + labels : map[string]string{ "assert_type": "type"}, + }. + + Some other metric renaming are more complex. (2) + In some cases, there is a total renaming, with new labels and the only part we can use to identify a metric + is its prefix. Example: + Metrics like mongodb_ss_metrics_operation _fastmod or + mongodb_ss_metrics_operation_idhack or + mongodb_ss_metrics_operation_scanAndOrder + should use the trim the "prefix" mongodb_ss_metrics_operation from the metric name, and that remaining suffic + is the label value for a new label "suffixLabel". + It means that the metric (current) mongodb_ss_metrics_operation_idhack will become into the old equivalent one + mongodb_mongod_metrics_operation_total {"state": "idhack"} as defined in the conversion slice: + { + oldName: "mongodb_mongod_metrics_operation_total", //{state="fastmod|idhack|scan_and_order"} + prefix: "mongodb_ss_metrics_operation", // _[fastmod|idhack|scanAndOrder] + suffixLabel: "state", + }, + + suffixMapping field: + -------------------- + Also, some metrics suffixes for the second renaming case need a mapping between the old and new values. + For example, the metric mongodb_ss_wt_cache_bytes_currently_in_the_cache has mongodb_ss_wt_cache_bytes + as the prefix so the suffix is bytes_currently_in_the_cache should be converted to a mertic named + mongodb_mongod_wiredtiger_cache_bytes and the suffix bytes_currently_in_the_cache is being mapped to + "total". + + Third renaming form: see (3) below. +*/ + +// For simple metric renaming, only some fields should be updated like the metric name, the help and some +// labels that have 1 to 1 mapping (1). +func newToOldMetric(rm *rawMetric, c conversion) *rawMetric { + oldMetric := &rawMetric{ + fqName: c.oldName, + help: rm.help, + val: rm.val, + vt: rm.vt, + ln: make([]string, 0, len(rm.ln)), + lv: make([]string, 0, len(rm.lv)), + } + + for _, val := range rm.lv { + if newLabelVal, ok := c.labelValueConversions[val]; ok { + oldMetric.lv = append(oldMetric.lv, newLabelVal) + continue + } + oldMetric.lv = append(oldMetric.lv, val) + } + + // Some label names should be converted from the new (current) name to the + // mongodb_exporter v1 compatible name + for _, newLabelName := range rm.ln { + // if it should be converted, append the old-compatible name + if oldLabel, ok := c.labelConversions[newLabelName]; ok { + oldMetric.ln = append(oldMetric.ln, oldLabel) + continue + } + // otherwise, keep the same label name + oldMetric.ln = append(oldMetric.ln, newLabelName) + } + + return oldMetric +} + +// The second renaming case is not a direct rename. In this case, the new metric name has a common +// prefix and the rest of the metric name is used as the value for a label in tne old metric style. (2) +// In this renaming case, the metric "mongodb_ss_wt_cache_bytes_bytes_currently_in_the_cache +// should be converted to mongodb_mongod_wiredtiger_cache_bytes with label "type": "total". +// For this conversion, we have the suffixMapping field that holds the mapping for all suffixes. +// Example definition: +// +// oldName: "mongodb_mongod_wiredtiger_cache_bytes", +// prefix: "mongodb_ss_wt_cache_bytes", +// suffixLabel: "type", +// suffixMapping: map[string]string{ +// "bytes_currently_in_the_cache": "total", +// "tracked_dirty_bytes_in_the_cache": "dirty", +// "tracked_bytes_belonging_to_internal_pages_in_the_cache": "internal_pages", +// "tracked_bytes_belonging_to_leaf_pages_in_the_cache": "internal_pages", +// }, +// }, +func createOldMetricFromNew(rm *rawMetric, c conversion) *rawMetric { + suffix := strings.TrimPrefix(rm.fqName, c.prefix) + suffix = strings.TrimPrefix(suffix, "_") + + if newSuffix, ok := c.suffixMapping[suffix]; ok { + suffix = newSuffix + } + + oldMetric := &rawMetric{ + fqName: c.oldName, + help: c.oldName, + val: rm.val, + vt: rm.vt, + ln: []string{c.suffixLabel}, + lv: []string{suffix}, + } + + return oldMetric +} + +func cacheEvictedTotalMetric(m bson.M) (prometheus.Metric, error) { + s, err := sumMetrics(m, [][]string{ + {"serverStatus", "wiredTiger", "cache", "modified pages evicted"}, + {"serverStatus", "wiredTiger", "cache", "unmodified pages evicted"}, + }) + if err != nil { + return nil, err + } + + d := prometheus.NewDesc("mongodb_mongod_wiredtiger_cache_evicted_total", "wiredtiger cache evicted total", nil, nil) + metric, err := prometheus.NewConstMetric(d, prometheus.GaugeValue, s) + if err != nil { + return nil, err + } + + return metric, nil +} + +func sumMetrics(m bson.M, paths [][]string) (float64, error) { + var total float64 + + for _, path := range paths { + v := walkTo(m, path) + if v == nil { + continue + } + + f, err := asFloat64(v) + if err != nil { + return 0, errors.Wrapf(errInvalidMetricValue, "%v", v) + } + + total += *f + } + + return total, nil +} + +// Converts new metric to the old metric style and append it to the response slice. +func appendCompatibleMetric(res []prometheus.Metric, rm *rawMetric) []prometheus.Metric { + compatibleMetrics := metricRenameAndLabel(rm, conversions()) + if compatibleMetrics == nil { + return res + } + + for _, compatibleMetric := range compatibleMetrics { + metric, err := rawToPrometheusMetric(compatibleMetric) + if err != nil { + invalidMetric := prometheus.NewInvalidMetric(prometheus.NewInvalidDesc(err), err) + res = append(res, invalidMetric) + return res + } + + res = append(res, metric) + } + + return res +} + +//nolint:funlen +func conversions() []conversion { + return []conversion{ + { + oldName: "mongodb_asserts_total", + newName: "mongodb_ss_asserts", + labelConversions: map[string]string{"assert_type": "type"}, + }, + { + oldName: "mongodb_connections", + newName: "mongodb_ss_connections", + labelConversions: map[string]string{"conn_type": "state"}, + }, + { + oldName: "mongodb_connections_metrics_created_total", + newName: "mongodb_ss_connections_totalCreated", + }, + { + oldName: "mongodb_extra_info_page_faults_total", + newName: "mongodb_ss_extra_info_page_faults", + }, + { + oldName: "mongodb_mongod_durability_journaled_megabytes", + newName: "mongodb_ss_dur_journaledMB", + }, + { + oldName: "mongodb_mongod_durability_commits", + newName: "mongodb_ss_dur_commits", + }, + { + oldName: "mongodb_mongod_background_flushing_average_milliseconds", + newName: "mongodb_ss_backgroundFlushing_average_ms", + }, + { + oldName: "mongodb_mongod_global_lock_client", + prefix: "mongodb_ss_globalLock_activeClients", + suffixLabel: "type", + suffixMapping: map[string]string{ + "readers": "reader", + "writers": "writer", + "total": "total", + }, + }, + { + oldName: "mongodb_mongod_global_lock_current_queue", + newName: "mongodb_ss_globalLock_currentQueue", + labelConversions: map[string]string{"count_type": "type"}, + labelValueConversions: map[string]string{ + "readers": "reader", + "writers": "writer", + }, + }, + { + oldName: "mongodb_instance_local_time", + newName: "mongodb_start", + }, + + { + oldName: "mongodb_mongod_instance_uptime_seconds", + newName: "mongodb_ss_uptime", + }, + { + oldName: "mongodb_instance_uptime_seconds", + newName: "mongodb_ss_uptime", + }, + { + oldName: "mongodb_mongod_locks_time_locked_local_microseconds_total", + newName: "mongodb_ss_locks_Local_acquireCount_[rw]", + }, + { + oldName: "mongodb_memory", + newName: "mongodb_ss_mem_[resident|virtual]", + }, + { + oldName: "mongodb_memory", + prefix: "mongodb_ss_mem", + suffixLabel: "type", + suffixMapping: map[string]string{ + "mapped": "mapped", + "mappedWithJournal": "mapped_with_journal", + }, + }, + { + oldName: "mongodb_mongod_metrics_cursor_open", + newName: "mongodb_ss_metrics_cursor_open", + labelConversions: map[string]string{"csr_type": "state"}, + }, + { + oldName: "mongodb_mongod_metrics_cursor_timed_out_total", + newName: "mongodb_ss_metrics_cursor_timedOut", + }, + { + oldName: "mongodb_mongod_metrics_document_total", + newName: "mongodb_ss_metric_document", + labelConversions: map[string]string{"doc_op_type": "type"}, + }, + { + oldName: "mongodb_mongod_metrics_get_last_error_wtime_num_total", + newName: "mongodb_ss_metrics_getLastError_wtime_num", + }, + { + oldName: "mongodb_mongod_metrics_get_last_error_wtimeouts_total", + newName: "mongodb_ss_metrics_getLastError_wtimeouts", + }, + { + oldName: "mongodb_mongod_metrics_operation_total", + prefix: "mongodb_ss_metrics_operation", + suffixLabel: "state", + suffixMapping: map[string]string{ + "scanAndOrder": "scanAndOrder", + "writeConflicts": "writeConflicts", + }, + }, + { + oldName: "mongodb_mongod_metrics_query_executor_total", + prefix: "mongodb_ss_metrics_query", + suffixLabel: "state", + }, + { + oldName: "mongodb_mongod_metrics_record_moves_total", + newName: "mongodb_ss_metrics_record_moves", + }, + { + oldName: "mongodb_mongod_metrics_repl_apply_batches_num_total", + newName: "mongodb_ss_metrics_repl_apply_batches_num", + }, + { + oldName: "mongodb_mongod_metrics_repl_apply_batches_total_milliseconds", + newName: "mongodb_ss_metrics_repl_apply_batches_totalMillis", + }, + { + oldName: "mongodb_mongod_metrics_repl_apply_ops_total", + newName: "mongodb_ss_metrics_repl_apply_ops", + }, + { + oldName: "mongodb_mongod_metrics_repl_buffer_count", + newName: "mongodb_ss_metrics_repl_buffer_count", + }, + { + oldName: "mongodb_mongod_metrics_repl_buffer_max_size_bytes", + newName: "mongodb_ss_metrics_repl_buffer_maxSizeBytes", + }, + { + oldName: "mongodb_mongod_metrics_repl_buffer_size_bytes", + newName: "mongodb_ss_metrics_repl_buffer_sizeBytes", + }, + { + oldName: "mongodb_mongod_metrics_repl_executor_queue", + prefix: "mongodb_ss_metrics_repl_executor_queues", + suffixLabel: "type", + }, + { + oldName: "mongodb_mongod_metrics_repl_executor_unsignaled_events", + newName: "mongodb_ss_metrics_repl_executor_unsignaledEvents", + }, + { + oldName: "mongodb_mongod_metrics_repl_network_bytes_total", + newName: "mongodb_ss_metrics_repl_network_bytes", + }, + { + oldName: "mongodb_mongod_metrics_repl_network_getmores_num_total", + newName: "mongodb_ss_metrics_repl_network_getmores_num", + }, + { + oldName: "mongodb_mongod_metrics_repl_network_getmores_total_milliseconds", + newName: "mongodb_ss_metrics_repl_network_getmores_totalMillis", + }, + { + oldName: "mongodb_mongod_metrics_repl_network_ops_total", + newName: "mongodb_ss_metrics_repl_network_ops", + }, + { + oldName: "mongodb_mongod_metrics_repl_network_readers_created_total", + newName: "mongodb_ss_metrics_repl_network_readersCreated", + }, + { + oldName: "mongodb_mongod_metrics_ttl_deleted_documents_total", + newName: "mongodb_ss_metrics_ttl_deletedDocuments", + }, + { + oldName: "mongodb_mongod_metrics_ttl_passes_total", + newName: "mongodb_ss_metrics_ttl_passes", + }, + { + oldName: "mongodb_network_bytes_total", + prefix: "mongodb_ss_network", + suffixLabel: "state", + }, + { + oldName: "mongodb_network_metrics_num_requests_total", + newName: "mongodb_ss_network_numRequests", + }, + { + oldName: "mongodb_mongod_op_counters_repl_total", + newName: "mongodb_ss_opcountersRepl", + labelConversions: map[string]string{"legacy_op_type": "type"}, + }, + { + oldName: "mongodb_op_counters_total", + newName: "mongodb_ss_opcounters", + labelConversions: map[string]string{"legacy_op_type": "type"}, + }, + { + oldName: "mongodb_mongod_wiredtiger_blockmanager_blocks_total", + prefix: "mongodb_ss_wt_block_manager", + suffixLabel: "type", + }, + { + oldName: "mongodb_mongod_wiredtiger_cache_max_bytes", + newName: "mongodb_ss_wt_cache_maximum_bytes_configured", + }, + { + oldName: "mongodb_mongod_wiredtiger_cache_overhead_percent", + newName: "mongodb_ss_wt_cache_percentage_overhead", + }, + { + oldName: "mongodb_mongod_wiredtiger_concurrent_transactions_available_tickets", + newName: "mongodb_ss_wt_concurrentTransactions_available", + }, + { + oldName: "mongodb_mongod_wiredtiger_concurrent_transactions_out_tickets", + newName: "mongodb_ss_wt_concurrentTransactions_out", + }, + { + oldName: "mongodb_mongod_wiredtiger_concurrent_transactions_total_tickets", + newName: "mongodb_ss_wt_concurrentTransactions_totalTickets", + }, + { + oldName: "mongodb_mongod_wiredtiger_log_records_scanned_total", + newName: "mongodb_ss_wt_log_records_processed_by_log_scan", + }, + { + oldName: "mongodb_mongod_wiredtiger_session_open_cursors_total", + newName: "mongodb_ss_wt_session_open_cursor_count", + }, + { + oldName: "mongodb_mongod_wiredtiger_session_open_sessions_total", + newName: "mongodb_ss_wt_session_open_session_count", + }, + { + oldName: "mongodb_mongod_wiredtiger_transactions_checkpoint_milliseconds_total", + newName: "mongodb_ss_wt_txn_transaction_checkpoint_total_time_msecs", + }, + { + oldName: "mongodb_mongod_wiredtiger_transactions_running_checkpoints", + newName: "mongodb_ss_wt_txn_transaction_checkpoint_currently_running", + }, + { + oldName: "mongodb_mongod_wiredtiger_transactions_total", + prefix: "mongodb_ss_wt_txn_transactions", + suffixLabel: "type", + suffixMapping: map[string]string{ + "begins": "begins", + "checkpoints": "checkpoints", + "committed": "committed", + "rolled_back": "rolled_back", + }, + }, + { + oldName: "mongodb_mongod_wiredtiger_blockmanager_bytes_total", + prefix: "mongodb_ss_wt_block_manager", + suffixLabel: "type", + suffixMapping: map[string]string{ + "bytes_read": "read", "mapped_bytes_read": "read_mapped", + "bytes_written": "written", + }, + }, + // the 2 metrics bellow have the same prefix. + { + oldName: "mongodb_mongod_wiredtiger_cache_bytes", + prefix: "mongodb_ss_wt_cache_bytes", + suffixLabel: "type", + suffixMapping: map[string]string{ + "currently_in_the_cache": "total", + "tracked_dirty_bytes_in_the_cache": "dirty", + "tracked_bytes_belonging_to_internal_pages_in_the_cache": " internal_pages", + "tracked_bytes_belonging_to_leaf_pages_in_the_cache": "internal_pages", + }, + }, + { + oldName: "mongodb_mongod_wiredtiger_cache_bytes_total", + prefix: "mongodb_ss_wt_cache", + suffixLabel: "type", + suffixMapping: map[string]string{ + "bytes_read_into_cache": "read", + "bytes_written_from_cache": "written", + }, + }, + { + oldName: "mongodb_mongod_wiredtiger_cache_pages", + prefix: "mongodb_ss_wt_cache", + suffixLabel: "type", + suffixMapping: map[string]string{ + "pages_currently_held_in_the_cache": "total", + "tracked_dirty_pages_in_the_cache": "dirty", + }, + }, + { + oldName: "mongodb_mongod_wiredtiger_cache_pages_total", + prefix: "mongodb_ss_wt_cache", + suffixLabel: "type", + suffixMapping: map[string]string{ + "pages_read_into_cache": "read", + "pages_written_from_cache": "written", + }, + }, + { + oldName: "mongodb_mongod_wiredtiger_log_records_total", + prefix: "mongodb_ss_wt_log", + suffixLabel: "type", + suffixMapping: map[string]string{ + "log_records_compressed": "compressed", + "log_records_not_compressed": "uncompressed", + }, + }, + { + oldName: "mongodb_mongod_wiredtiger_log_bytes_total", + prefix: "mongodb_ss_wt_log", + suffixLabel: "type", + suffixMapping: map[string]string{ + "log_bytes_of_payload_data": "payload", + "log_bytes_written": "unwritten", + }, + }, + { + oldName: "mongodb_mongod_wiredtiger_log_operations_total", + prefix: "mongodb_ss_wt_log", + suffixLabel: "type", + suffixMapping: map[string]string{ + "log_read_operations": "read", + "log_write_operations": "write", + "log_scan_operations": "scan", + "log_scan_records_requiring_two_reads": "scan_double", + "log_sync_operations": "sync", + "log_sync_dir_operations": "sync_dir", + "log_flush_operations": "flush", + }, + }, + { + oldName: "mongodb_mongod_wiredtiger_transactions_checkpoint_milliseconds", + prefix: "mongodb_ss_wt_txn_transaction_checkpoint", + suffixLabel: "type", + suffixMapping: map[string]string{ + "min_time_msecs": "min", + "max_time_msecs": "max", + }, + }, + { + oldName: "mongodb_mongod_global_lock_current_queue", + prefix: "mongodb_mongod_global_lock_current_queue", + labelConversions: map[string]string{"op_type": "type"}, + }, + { + oldName: "mongodb_mongod_op_latencies_ops_total", + newName: "mongodb_ss_opLatencies_ops", + labelConversions: map[string]string{"op_type": "type"}, + labelValueConversions: map[string]string{ + "commands": "command", + "reads": "read", + "writes": "write", + }, + }, + { + oldName: "mongodb_mongod_op_latencies_latency_total", + newName: "mongodb_ss_opLatencies_latency", + labelConversions: map[string]string{"op_type": "type"}, + labelValueConversions: map[string]string{ + "commands": "command", + "reads": "read", + "writes": "write", + }, + }, + { + oldName: "mongodb_mongod_metrics_document_total", + newName: "mongodb_ss_metrics_document", + labelConversions: map[string]string{"doc_op_type": "state"}, + }, + { + oldName: "mongodb_mongod_metrics_query_executor_total", + prefix: "mongodb_ss_metrics_queryExecutor", + suffixLabel: "state", + suffixMapping: map[string]string{ + "scanned": "scanned", + "scannedObjects": "scanned_objects", + }, + }, + { + oldName: "mongodb_memory", + prefix: "mongodb_ss_mem", + suffixLabel: "type", + suffixMapping: map[string]string{ + "resident": "resident", + "virtual": "virtual", + }, + }, + { + oldName: "mongodb_mongod_metrics_get_last_error_wtime_total_milliseconds", + newName: "mongodb_ss_metrics_getLastError_wtime_totalMillis", + }, + { + oldName: "mongodb_ss_wt_cache_maximum_bytes_configured", + newName: "mongodb_mongod_wiredtiger_cache_max_bytes", + }, + { + oldName: "mongodb_mongod_db_collections_total", + newName: "mongodb_dbstats_collections", + }, + { + oldName: "mongodb_mongod_db_data_size_bytes", + newName: "mongodb_dbstats_dataSize", + }, + { + oldName: "mongodb_mongod_db_index_size_bytes", + newName: "mongodb_dbstats_indexSize", + }, + { + oldName: "mongodb_mongod_db_indexes_total", + newName: "mongodb_dbstats_indexes", + }, + { + oldName: "mongodb_mongod_db_objects_total", + newName: "mongodb_dbstats_objects", + }, + } +} + +// Third metric renaming case (3). +// Lock* metrics don't fit in (1) nor in (2) and since they are just a few, and we know they always exists +// as part of getDiagnosticData, we can just call locksMetrics with getDiagnosticData result as the input +// to get the v1 compatible metrics from the new structure. + +type lockMetric struct { + name string + path []string + labels map[string]string +} + +func lockMetrics() []lockMetric { + return []lockMetric{ + { + name: "mongodb_ss_locks_acquireCount", + path: []string{"serverStatus", "locks", "ParallelBatchWriterMode", "acquireCount", "r"}, + labels: map[string]string{"lock_mode": "r", "resource": "ParallelBatchWriterMode"}, + }, + { + name: "mongodb_ss_locks_acquireCount", + path: []string{"serverStatus", "locks", "ParallelBatchWriterMode", "acquireCount", "w"}, + labels: map[string]string{"lock_mode": "w", "resource": "ReplicationStateTransition"}, + }, + { + name: "mongodb_ss_locks_acquireCount", + path: []string{"serverStatus", "locks", "ReplicationStateTransition", "acquireCount", "w"}, + labels: map[string]string{"resource": "ReplicationStateTransition", "lock_mode": "w"}, + }, + { + name: "mongodb_ss_locks_acquireWaitCount", + path: []string{"serverStatus", "locks", "ReplicationStateTransition", "acquireCount", "W"}, + labels: map[string]string{"lock_mode": "W", "resource": "ReplicationStateTransition"}, + }, + { + name: "mongodb_ss_locks_timeAcquiringMicros", + path: []string{"serverStatus", "locks", "ReplicationStateTransition", "timeAcquiringMicros", "w"}, + labels: map[string]string{"lock_mode": "w", "resource": "ReplicationStateTransition"}, + }, + { + name: "mongodb_ss_locks_acquireCount", + path: []string{"serverStatus", "locks", "Global", "acquireCount", "r"}, + labels: map[string]string{"lock_mode": "r", "resource": "Global"}, + }, + { + name: "mongodb_ss_locks_acquireCount", + path: []string{"serverStatus", "locks", "Global", "acquireCount", "w"}, + labels: map[string]string{"lock_mode": "w", "resource": "Global"}, + }, + { + name: "mongodb_ss_locks_acquireCount", + path: []string{"serverStatus", "locks", "Global", "acquireCount", "W"}, + labels: map[string]string{"lock_mode": "W", "resource": "Global"}, + }, + } +} + +// locksMetrics returns the list of lock metrics as a prometheus.Metric slice +// This function reads the human readable list from lockMetrics() and creates a slice of metrics +// ready to be exposed, taking the value for each metric from th provided bson.M structure from +// getDiagnosticData. +func locksMetrics(m bson.M) []prometheus.Metric { + metrics := lockMetrics() + res := make([]prometheus.Metric, 0, len(metrics)) + + for _, lm := range metrics { + mm, err := makeLockMetric(m, lm) + if mm == nil { + continue + } + if err != nil { + logrus.Errorf("cannot convert lock metric %s to old style: %s", mm.Desc(), err) + continue + } + res = append(res, mm) + } + + return res +} + +func makeLockMetric(m bson.M, lm lockMetric) (prometheus.Metric, error) { + val := walkTo(m, lm.path) + if val == nil { + return nil, nil + } + + f, err := asFloat64(val) + if err != nil { + return prometheus.NewInvalidMetric(prometheus.NewInvalidDesc(err), err), err + } + + if f == nil { + return nil, nil + } + + ln := make([]string, 0, len(lm.labels)) + lv := make([]string, 0, len(lm.labels)) + for labelName, labelValue := range lm.labels { + ln = append(ln, labelName) + lv = append(lv, labelValue) + } + + d := prometheus.NewDesc(lm.name, lm.name, ln, nil) + + return prometheus.NewConstMetric(d, prometheus.UntypedValue, *f, lv...) +} + +type specialMetric struct { + paths [][]string + labels map[string]string + name string + help string +} + +func specialMetricDefinitions() []specialMetric { + return []specialMetric{ + { + name: "mongodb_mongod_locks_time_acquiring_global_microseconds_total", + help: "sum of serverStatus.locks.Global.timeAcquiringMicros.[r|w]", + paths: [][]string{ + {"serverStatus", "locks", "Global", "timeAcquiringMicros", "r"}, + {"serverStatus", "locks", "Global", "timeAcquiringMicros", "w"}, + }, + }, + } +} + +func specialMetrics(ctx context.Context, client *mongo.Client, m bson.M, l *logrus.Logger) []prometheus.Metric { + metrics := make([]prometheus.Metric, 0) + + for _, def := range specialMetricDefinitions() { + val, err := sumMetrics(m, def.paths) + if err != nil { + l.Errorf("cannot create metric for path: %v: %s", def.paths, err) + continue + } + + d := prometheus.NewDesc(def.name, def.help, nil, def.labels) + metric, err := prometheus.NewConstMetric(d, prometheus.GaugeValue, val) + if err != nil { + l.Errorf("cannot create metric for path: %v: %s", def.paths, err) + continue + } + + metrics = append(metrics, metric) + } + + metrics = append(metrics, storageEngine(m)) + metrics = append(metrics, serverVersion(m)) + metrics = append(metrics, myState(ctx, client)) + + if mm := replSetMetrics(m); mm != nil { + metrics = append(metrics, mm...) + } + + if opLogMetrics, err := oplogStatus(ctx, client); err != nil { + l.Warnf("cannot create metrics for oplog: %s", err) + } else { + metrics = append(metrics, opLogMetrics...) + } + + return metrics +} + +func storageEngine(m bson.M) prometheus.Metric { + v := walkTo(m, []string{"serverStatus", "storageEngine", "name"}) + name := "mongodb_mongod_storage_engine" + help := "The storage engine used by the MongoDB instance" + + engine, ok := v.(string) + if !ok { + engine = "Engine is unavailable" + } + labels := map[string]string{"engine": engine} + + d := prometheus.NewDesc(name, help, nil, labels) + metric, _ := prometheus.NewConstMetric(d, prometheus.GaugeValue, float64(1)) + + return metric +} + +func serverVersion(m bson.M) prometheus.Metric { + v := walkTo(m, []string{"serverStatus", "version"}) + name := "mongodb_version_info" + help := "The server version" + + serverVersion, ok := v.(string) + if !ok { + serverVersion = "server version is unavailable" + } + labels := map[string]string{"mongodb": serverVersion} + + d := prometheus.NewDesc(name, help, nil, labels) + metric, _ := prometheus.NewConstMetric(d, prometheus.GaugeValue, float64(1)) + + return metric +} + +func myState(ctx context.Context, client *mongo.Client) prometheus.Metric { + state, err := util.MyState(ctx, client) + if err != nil { + state = UnknownState + } + + var id string + rs, err := util.ReplicasetConfig(ctx, client) + if err == nil { + id = rs.Config.ID + } + + name := "mongodb_mongod_replset_my_state" + help := "An integer between 0 and 10 that represents the replica state of the current member" + + labels := map[string]string{"set": id} + + d := prometheus.NewDesc(name, help, nil, labels) + metric, _ := prometheus.NewConstMetric(d, prometheus.GaugeValue, float64(state)) + + return metric +} + +func oplogStatus(ctx context.Context, client *mongo.Client) ([]prometheus.Metric, error) { + oplogRS := client.Database("local").Collection("oplog.rs") + type oplogRSResult struct { + Timestamp primitive.Timestamp `bson:"ts"` + } + var head, tail oplogRSResult + headRes := oplogRS.FindOne(ctx, bson.M{}, options.FindOne().SetSort(bson.M{ + "$natural": -1, + })) + if headRes.Err() != nil { + return nil, headRes.Err() + } + + if err := headRes.Decode(&head); err != nil { + return nil, err + } + tailRes := oplogRS.FindOne(ctx, bson.M{}, options.FindOne().SetSort(bson.M{ + "$natural": 1, + })) + if tailRes.Err() != nil { + return nil, tailRes.Err() + } + + if err := tailRes.Decode(&tail); err != nil { + return nil, err + } + + headDesc := prometheus.NewDesc("mongodb_mongod_replset_oplog_head_timestamp", + "The timestamp of the newest change in the oplog", nil, nil) + headMetric := prometheus.MustNewConstMetric(headDesc, prometheus.GaugeValue, float64(head.Timestamp.T)) + + tailDesc := prometheus.NewDesc("mongodb_mongod_replset_oplog_tail_timestamp", + "The timestamp of the oldest change in the oplog", nil, nil) + tailMetric := prometheus.MustNewConstMetric(tailDesc, prometheus.GaugeValue, float64(tail.Timestamp.T)) + + return []prometheus.Metric{headMetric, tailMetric}, nil +} + +func replSetMetrics(m bson.M) []prometheus.Metric { + replSetGetStatus, ok := m["replSetGetStatus"].(bson.M) + if !ok { + return nil + } + var repl proto.ReplicaSetStatus + b, err := bson.Marshal(replSetGetStatus) + if err != nil { + return nil + } + if err := bson.Unmarshal(b, &repl); err != nil { + return nil + } + + var primaryOpTime time.Time + gotPrimary := false + + var metrics []prometheus.Metric + // Find primary + for _, m := range repl.Members { + if m.StateStr == "PRIMARY" { + primaryOpTime = m.OptimeDate.Time() + gotPrimary = true + + break + } + } + + createMetric := func(name, help string, value float64, labels map[string]string) { + const prefix = "mongodb_mongod_replset_" + d := prometheus.NewDesc(prefix+name, help, nil, labels) + metrics = append(metrics, prometheus.MustNewConstMetric(d, prometheus.GaugeValue, value)) + } + + createMetric("number_of_members", + "The number of replica set members.", + float64(len(repl.Members)), map[string]string{ + "set": repl.Set, + }) + + for _, m := range repl.Members { + labels := map[string]string{ + "name": m.Name, + "state": m.StateStr, + "set": repl.Set, + } + if m.Self { + createMetric("my_name", "The replica state name of the current member.", 1, map[string]string{ + "name": m.Name, + "set": repl.Set, + }) + } + + if !m.ElectionTime.IsZero() { + createMetric("member_election_date", + "The timestamp the node was elected as replica leader", + float64(m.ElectionTime.T), labels) + } + if t := m.OptimeDate.Time(); gotPrimary && !t.IsZero() && m.StateStr != "PRIMARY" { + val := math.Abs(float64(t.Unix() - primaryOpTime.Unix())) + createMetric("member_replication_lag", + "The replication lag that this member has with the primary.", + val, labels) + } + if m.PingMs != nil { + createMetric("member_ping_ms", + "The pingMs represents the number of milliseconds (ms) that a round-trip packet takes to travel between the remote member and the local instance.", + *m.PingMs, labels) + } + if t := m.LastHeartbeat.Time(); !t.IsZero() { + createMetric("member_last_heartbeat", + "The lastHeartbeat value provides an ISODate formatted date and time of the transmission time of last heartbeat received from this member.", + float64(t.Unix()), labels) + } + if t := m.LastHeartbeatRecv.Time(); !t.IsZero() { + createMetric("member_last_heartbeat_recv", + "The lastHeartbeatRecv value provides an ISODate formatted date and time that the last heartbeat was received from this member.", + float64(t.Unix()), labels) + } + if m.ConfigVersion > 0 { + createMetric("member_config_version", + "The configVersion value is the replica set configuration version.", + m.ConfigVersion, labels) + } + } + return metrics +} + +func mongosMetrics(ctx context.Context, client *mongo.Client, l *logrus.Logger) []prometheus.Metric { + metrics := make([]prometheus.Metric, 0) + + if metric, err := databasesTotalPartitioned(ctx, client); err != nil { + l.Debugf("cannot create metric for database total: %s", err) + } else { + metrics = append(metrics, metric) + } + + if metric, err := databasesTotalUnpartitioned(ctx, client); err != nil { + l.Debugf("cannot create metric for database total: %s", err) + } else { + metrics = append(metrics, metric) + } + + if metric, err := shardedCollectionsTotal(ctx, client); err != nil { + l.Debugf("cannot create metric for collections total: %s", err) + } else { + metrics = append(metrics, metric) + } + + metrics = append(metrics, balancerEnabled(ctx, client)) + + metric, err := chunksTotal(ctx, client) + if err != nil { + l.Debugf("cannot create metric for chunks total: %s", err) + } else { + metrics = append(metrics, metric) + } + + ms, err := chunksTotalPerShard(ctx, client) + if err != nil { + l.Debugf("cannot create metric for chunks total per shard: %s", err) + } else { + metrics = append(metrics, ms...) + } + + if metric, err := chunksBalanced(ctx, client); err != nil { + l.Debugf("cannot create metric for chunks balanced: %s", err) + } else { + metrics = append(metrics, metric) + } + + ms, err = changelog10m(ctx, client, l) + if err != nil { + l.Errorf("cannot create metric for changelog: %s", err) + } else { + metrics = append(metrics, ms...) + } + + metrics = append(metrics, dbstatsMetrics(ctx, client, l)...) + + if metric, err := shardingShardsTotal(ctx, client); err != nil { + l.Debugf("cannot create metric for database total: %s", err) + } else { + metrics = append(metrics, metric) + } + + if metric, err := shardingShardsDrainingTotal(ctx, client); err != nil { + l.Debugf("cannot create metric for database total: %s", err) + } else { + metrics = append(metrics, metric) + } + + return metrics +} + +func databasesTotalPartitioned(ctx context.Context, client *mongo.Client) (prometheus.Metric, error) { + n, err := client.Database("config").Collection("databases").CountDocuments(ctx, bson.M{"partitioned": true}) + if err != nil { + return nil, err + } + + name := "mongodb_mongos_sharding_databases_total" + help := "Total number of sharded databases" + labels := map[string]string{"type": "partitioned"} + + d := prometheus.NewDesc(name, help, nil, labels) + return prometheus.NewConstMetric(d, prometheus.GaugeValue, float64(n)) +} + +func databasesTotalUnpartitioned(ctx context.Context, client *mongo.Client) (prometheus.Metric, error) { + n, err := client.Database("config").Collection("databases").CountDocuments(ctx, bson.M{"partitioned": false}) + if err != nil { + return nil, err + } + + name := "mongodb_mongos_sharding_databases_total" + help := "Total number of sharded databases" + labels := map[string]string{"type": "unpartitioned"} + + d := prometheus.NewDesc(name, help, nil, labels) + return prometheus.NewConstMetric(d, prometheus.GaugeValue, float64(n)) +} + +// shardedCollectionsTotal gets total sharded collections. +func shardedCollectionsTotal(ctx context.Context, client *mongo.Client) (prometheus.Metric, error) { + collCount, err := client.Database("config").Collection("collections").CountDocuments(ctx, bson.M{"dropped": false}) + if err != nil { + return nil, err + } + name := "mongodb_mongos_sharding_collections_total" + help := "Total # of Collections with Sharding enabled" + + d := prometheus.NewDesc(name, help, nil, nil) + return prometheus.NewConstMetric(d, prometheus.GaugeValue, float64(collCount)) +} + +func chunksBalanced(ctx context.Context, client *mongo.Client) (prometheus.Metric, error) { + var m struct { + InBalancerRound bool `bson:"inBalancerRound"` + } + + cmd := bson.D{{Key: "balancerStatus", Value: "1"}} + res := client.Database("admin").RunCommand(ctx, cmd) + + if err := res.Decode(&m); err != nil { + return nil, err + } + + value := float64(0) + if !m.InBalancerRound { + value = 1 + } + + name := "mongodb_mongos_sharding_chunks_is_balanced" + help := "Shards are balanced" + + d := prometheus.NewDesc(name, help, nil, nil) + return prometheus.NewConstMetric(d, prometheus.GaugeValue, value) +} + +func balancerEnabled(ctx context.Context, client *mongo.Client) prometheus.Metric { + type bss struct { + stopped bool `bson:"stopped"` + } + var bs bss + enabled := 0 + + err := client.Database("config").Collection("settings").FindOne(ctx, bson.M{"_id": "balancer"}).Decode(&bs) + if err != nil { + enabled = 1 + } else if !bs.stopped { + enabled = 1 + } + + name := "mongodb_mongos_sharding_balancer_enabled" + help := "Balancer is enabled" + + d := prometheus.NewDesc(name, help, nil, nil) + metric, _ := prometheus.NewConstMetric(d, prometheus.GaugeValue, float64(enabled)) + + return metric +} + +func chunksTotal(ctx context.Context, client *mongo.Client) (prometheus.Metric, error) { + n, err := client.Database("config").Collection("chunks").CountDocuments(ctx, bson.M{}) + if err != nil { + return nil, err + } + + name := "mongodb_mongos_sharding_chunks_total" + help := "Total number of chunks" + + d := prometheus.NewDesc(name, help, nil, nil) + return prometheus.NewConstMetric(d, prometheus.GaugeValue, float64(n)) +} + +func chunksTotalPerShard(ctx context.Context, client *mongo.Client) ([]prometheus.Metric, error) { + aggregation := bson.D{ + {Key: "$group", Value: bson.M{"_id": "$shard", "count": bson.M{"$sum": 1}}}, + } + + cursor, err := client.Database("config").Collection("chunks").Aggregate(ctx, mongo.Pipeline{aggregation}) + if err != nil { + return nil, err + } + + var shards []bson.M + if err = cursor.All(ctx, &shards); err != nil { + return nil, err + } + + metrics := make([]prometheus.Metric, 0, len(shards)) + + for _, shard := range shards { + help := "Total number of chunks per shard" + labels := map[string]string{"shard": shard["_id"].(string)} + + d := prometheus.NewDesc("mongodb_mongos_sharding_shard_chunks_total", help, nil, labels) + val, ok := shard["count"].(int32) + if !ok { + continue + } + + metric, err := prometheus.NewConstMetric(d, prometheus.GaugeValue, float64(val)) + if err != nil { + continue + } + + metrics = append(metrics, metric) + } + + return metrics, nil +} + +func shardingShardsTotal(ctx context.Context, client *mongo.Client) (prometheus.Metric, error) { + n, err := client.Database("config").Collection("shards").CountDocuments(ctx, bson.M{}) + if err != nil { + return nil, err + } + + name := "mongodb_mongos_sharding_shards_total" + help := "Total number of shards" + + d := prometheus.NewDesc(name, help, nil, nil) + return prometheus.NewConstMetric(d, prometheus.GaugeValue, float64(n)) +} + +func shardingShardsDrainingTotal(ctx context.Context, client *mongo.Client) (prometheus.Metric, error) { + n, err := client.Database("config").Collection("shards").CountDocuments(ctx, bson.M{"draining": true}) + if err != nil { + return nil, err + } + + name := "mongodb_mongos_sharding_shards_draining_total" + help := "Total number of drainingshards" + + d := prometheus.NewDesc(name, help, nil, nil) + return prometheus.NewConstMetric(d, prometheus.GaugeValue, float64(n)) +} + +// ShardingChangelogSummaryID Sharding Changelog Summary ID. +type ShardingChangelogSummaryID struct { + Event string `bson:"event"` + Note string `bson:"note"` +} + +// ShardingChangelogSummary Sharding Changelog Summary. +type ShardingChangelogSummary struct { + ID *ShardingChangelogSummaryID `bson:"_id"` + Count float64 `bson:"count"` +} + +// ShardingChangelogStats is an array of Sharding changelog stats. +type ShardingChangelogStats struct { + Items *[]ShardingChangelogSummary +} + +func changelog10m(ctx context.Context, client *mongo.Client, l *logrus.Logger) ([]prometheus.Metric, error) { + var metrics []prometheus.Metric + + coll := client.Database("config").Collection("changelog") + match := bson.M{"time": bson.M{"$gt": time.Now().Add(-10 * time.Minute)}} + group := bson.M{"_id": bson.M{"event": "$what", "note": "$details.note"}, "count": bson.M{"$sum": 1}} + + c, err := coll.Aggregate(ctx, []bson.M{{"$match": match}, {"$group": group}}) + if err != nil { + return nil, errors.Wrap(err, "failed to aggregate sharding changelog events") + } + + defer c.Close(ctx) //nolint:errcheck + + for c.Next(ctx) { + s := &ShardingChangelogSummary{} + if err := c.Decode(s); err != nil { + l.Error(err) + continue + } + + name := "mongodb_mongos_sharding_changelog_10min_total" + help := "mongodb_mongos_sharding_changelog_10min_total" + + labelValue := s.ID.Event + if s.ID.Note != "" { + labelValue += "." + s.ID.Note + } + + d := prometheus.NewDesc(name, help, nil, map[string]string{"event": labelValue}) + metric, err := prometheus.NewConstMetric(d, prometheus.GaugeValue, s.Count) + if err != nil { + continue + } + + metrics = append(metrics, metric) + } + + if err := c.Err(); err != nil { + return nil, err + } + + return metrics, nil +} + +// DatabaseStatList contains stats from all databases. +type databaseStatList struct { + Members []databaseStatus +} + +// DatabaseStatus represents stats about a database (mongod and raw from mongos). +type databaseStatus struct { + rawStatus // embed to collect top-level attributes + Shards map[string]*rawStatus `bson:"raw,omitempty"` +} + +// RawStatus represents stats about a database from Mongos side. +type rawStatus struct { + Name string `bson:"db,omitempty"` + IndexSize int `bson:"indexSize,omitempty"` + DataSize int `bson:"dataSize,omitempty"` + Collections int `bson:"collections,omitempty"` + Objects int `bson:"objects,omitempty"` + Indexes int `bson:"indexes,omitempty"` +} + +func getDatabaseStatList(ctx context.Context, client *mongo.Client, l *logrus.Logger) *databaseStatList { + dbStatList := &databaseStatList{} + dbNames, err := client.ListDatabaseNames(ctx, bson.M{}) + if err != nil { + l.Errorf("Failed to get database names: %s.", err) + return nil + } + l.Debugf("getting stats for databases: %v", dbNames) + for _, db := range dbNames { + dbStatus := databaseStatus{} + r := client.Database(db).RunCommand(context.TODO(), bson.D{{Key: "dbStats", Value: 1}, {Key: "scale", Value: 1}}) + err := r.Decode(&dbStatus) + if err != nil { + l.Errorf("Failed to get database status: %s.", err) + return nil + } + dbStatList.Members = append(dbStatList.Members, dbStatus) + } + + return dbStatList +} + +func dbstatsMetrics(ctx context.Context, client *mongo.Client, l *logrus.Logger) []prometheus.Metric { + var metrics []prometheus.Metric + + dbStatList := getDatabaseStatList(ctx, client, l) + if dbStatList == nil { + return metrics + } + + for _, member := range dbStatList.Members { + if len(member.Shards) > 0 { + for shard, stats := range member.Shards { + labels := prometheus.Labels{ + "db": stats.Name, + "shard": strings.Split(shard, "/")[0], + } + + name := "mongodb_mongos_db_data_size_bytes" + help := "The total size in bytes of the uncompressed data held in this database" + + d := prometheus.NewDesc(name, help, nil, labels) + metric, err := prometheus.NewConstMetric(d, prometheus.GaugeValue, float64(stats.DataSize)) + if err == nil { + metrics = append(metrics, metric) + } + + name = "mongodb_mongos_db_indexes_total" + help = "Contains a count of the total number of indexes across all collections in the database" + + d = prometheus.NewDesc(name, help, nil, labels) + metric, err = prometheus.NewConstMetric(d, prometheus.GaugeValue, float64(stats.Indexes)) + if err == nil { + metrics = append(metrics, metric) + } + + name = "mongodb_mongos_db_index_size_bytes" + help = "The total size in bytes of all indexes created on this database" + + d = prometheus.NewDesc(name, help, nil, labels) + metric, err = prometheus.NewConstMetric(d, prometheus.GaugeValue, float64(stats.IndexSize)) + if err == nil { + metrics = append(metrics, metric) + } + + name = "mongodb_mongos_db_collections_total" + help = "Total number of collections" + + d = prometheus.NewDesc(name, help, nil, labels) + metric, err = prometheus.NewConstMetric(d, prometheus.GaugeValue, float64(stats.Collections)) + if err == nil { + metrics = append(metrics, metric) + } + } + } + } + + return metrics +} + +func walkTo(m primitive.M, path []string) interface{} { + val, ok := m[path[0]] + if !ok { + return nil + } + + if len(path) > 1 { + switch v := val.(type) { + case primitive.M: + val = walkTo(v, path[1:]) + case map[string]interface{}: + val = walkTo(v, path[1:]) + default: + return nil + } + } + + return val +} diff --git a/inputs/mongodb/mongodb.go b/inputs/mongodb/mongodb.go new file mode 100644 index 00000000..a1ff458a --- /dev/null +++ b/inputs/mongodb/mongodb.go @@ -0,0 +1,175 @@ +package mongodb + +import ( + "errors" + "fmt" + "log" + "sync" + "sync/atomic" + + "flashcat.cloud/categraf/config" + "flashcat.cloud/categraf/inputs" + "flashcat.cloud/categraf/inputs/mongodb/exporter" + "flashcat.cloud/categraf/types" + "github.com/sirupsen/logrus" + "github.com/toolkits/pkg/container/list" +) + +const inputName = "mongodb" + +type MongoDB struct { + config.Interval + counter uint64 + waitgrp sync.WaitGroup + Instances []*Instance `toml:"instances"` +} + +func init() { + inputs.Add(inputName, func() inputs.Input { + return &MongoDB{} + }) +} + +func (r *MongoDB) Prefix() string { + return "" +} + +func (r *MongoDB) Init() error { + if len(r.Instances) == 0 { + return types.ErrInstancesEmpty + } + + for i := 0; i < len(r.Instances); i++ { + if err := r.Instances[i].Init(); err != nil { + return err + } + } + + return nil +} + +func (r *MongoDB) Drop() { + for _, i := range r.Instances { + if i == nil { + continue + } + + if i.e != nil { + i.e.Close() + } + } +} + +func (r *MongoDB) Gather(slist *list.SafeList) { + atomic.AddUint64(&r.counter, 1) + + for i := range r.Instances { + ins := r.Instances[i] + + if len(ins.MongodbURI) == 0 { + continue + } + + r.waitgrp.Add(1) + go func(slist *list.SafeList, ins *Instance) { + defer r.waitgrp.Done() + + if ins.IntervalTimes > 0 { + counter := atomic.LoadUint64(&r.counter) + if counter%uint64(ins.IntervalTimes) != 0 { + return + } + } + + ins.gatherOnce(slist) + }(slist, ins) + } + + r.waitgrp.Wait() +} + +type Instance struct { + Labels map[string]string `toml:"labels"` + IntervalTimes int64 `toml:"interval_times"` + LogLevel string `toml:"log_level"` + + // Address (host:port) of MongoDB server. + MongodbURI string `toml:"mongodb_uri,omitempty"` + Username string `toml:"username,omitempty"` + Password string `toml:"password,omitempty"` + CollStatsNamespaces []string `toml:"coll_stats_namespaces,omitempty"` + IndexStatsCollections []string `toml:"index_stats_collections,omitempty"` + CollStatsLimit int `toml:"coll_stats_limit,omitempty"` + CompatibleMode bool `toml:"compatible_mode,omitempty"` + DirectConnect bool `toml:"direct_connect,omitempty"` + DiscoveringMode bool `toml:"discovering_mode,omitempty"` + CollectAll bool `toml:"collect_all,omitempty"` + EnableDBStats bool `toml:"enable_db_stats,omitempty"` + EnableDiagnosticData bool `toml:"enable_diagnostic_data,omitempty"` + EnableReplicasetStatus bool `toml:"enable_replicaset_status,omitempty"` + EnableTopMetrics bool `toml:"enable_top_metrics,omitempty"` + EnableIndexStats bool `toml:"enable_index_stats,omitempty"` + EnableCollStats bool `toml:"enable_coll_stats,omitempty"` + EnableOverrideDescendingIndex bool `toml:"enable_override_descending_index,omitempty"` + + e *exporter.Exporter `toml:"-"` +} + +func (ins *Instance) Init() error { + if len(ins.MongodbURI) == 0 { + return nil + } + + if len(ins.LogLevel) == 0 { + ins.LogLevel = "info" + } + level, err := logrus.ParseLevel(ins.LogLevel) + if err != nil { + return err + } + + if ins.Labels == nil { + ins.Labels = make(map[string]string) + } + _, ok := ins.Labels["instance"] + if !ok { + return errors.New("instance must be specified in labels") + } + + l := logrus.New() + l.SetLevel(level) + + e, err := exporter.New(&exporter.Opts{ + URI: string(ins.MongodbURI), + Username: ins.Username, + Password: ins.Password, + CollStatsNamespaces: ins.CollStatsNamespaces, + IndexStatsCollections: ins.IndexStatsCollections, + CollStatsLimit: 0, + CompatibleMode: ins.CompatibleMode, + DirectConnect: ins.DirectConnect, + DiscoveringMode: ins.DiscoveringMode, + CollectAll: ins.CollectAll, + EnableDBStats: ins.EnableDBStats, + EnableDiagnosticData: ins.EnableDiagnosticData, + EnableReplicasetStatus: ins.EnableReplicasetStatus, + EnableTopMetrics: ins.EnableTopMetrics, + EnableIndexStats: ins.EnableIndexStats, + EnableCollStats: ins.EnableCollStats, + EnableOverrideDescendingIndex: ins.EnableOverrideDescendingIndex, + Logger: l, + }) + if err != nil { + return fmt.Errorf("could not instantiate mongodb lag exporter: %w", err) + } + + ins.e = e + return nil +} + +func (ins *Instance) gatherOnce(slist *list.SafeList) { + err := inputs.Collect(ins.e, slist, ins.Labels) + if err != nil { + log.Println("E! failed to collect metrics:", err) + } +}