From 1b85fbb71fb70b002687d245b4094360c7ecb5d3 Mon Sep 17 00:00:00 2001 From: Tobias Gesellchen Date: Mon, 11 Mar 2019 09:34:51 +0100 Subject: [PATCH] optionally collect `/_scheduler/jobs` (CouchDB 2.x only) --- couchdb-exporter.go | 7 ++- couchdb-exporter_test.go | 10 ++-- lib/collector-v2.go | 12 +++++ lib/collector.go | 7 +++ lib/couchdb-client.go | 26 ++++++++++ lib/couchdb-stats.go | 27 +++++++++- lib/exporter.go | 11 ++++ testdata/scheduler-jobs-v2.json | 92 +++++++++++++++++++++++++++++++++ 8 files changed, 186 insertions(+), 6 deletions(-) create mode 100644 testdata/scheduler-jobs-v2.json diff --git a/couchdb-exporter.go b/couchdb-exporter.go index 15099cca..98fb632b 100644 --- a/couchdb-exporter.go +++ b/couchdb-exporter.go @@ -23,6 +23,7 @@ type exporterConfigType struct { couchdbInsecure bool databases string databaseViews bool + schedulerJobs bool } var exporterConfig exporterConfigType @@ -37,6 +38,7 @@ func init() { flag.BoolVar(&exporterConfig.couchdbInsecure, "couchdb.insecure", true, "Ignore server certificate if using https") flag.StringVar(&exporterConfig.databases, "databases", "", fmt.Sprintf("Comma separated list of database names, or '%s'", lib.AllDbs)) flag.BoolVar(&exporterConfig.databaseViews, "databases.views", true, "Collect view details of every observed database") + flag.BoolVar(&exporterConfig.schedulerJobs, "scheduler.jobs", false, "Collect active replication jobs (CouchDB 2.x+ only)") flag.BoolVar(&glogadapt.Logging.ToStderr, "logtostderr", false, "log to standard error instead of files") flag.BoolVar(&glogadapt.Logging.AlsoToStderr, "alsologtostderr", false, "log to standard error as well as files") @@ -62,8 +64,9 @@ func main() { Username: *&exporterConfig.couchdbUsername, Password: *&exporterConfig.couchdbPassword}, lib.CollectorConfig{ - Databases: databases, - CollectViews: *&exporterConfig.databaseViews, + Databases: databases, + CollectViews: *&exporterConfig.databaseViews, + CollectSchedulerJobs: *&exporterConfig.schedulerJobs, }, *&exporterConfig.couchdbInsecure) prometheus.MustRegister(exporter) diff --git a/couchdb-exporter_test.go b/couchdb-exporter_test.go index ea1da907..f47ee4b2 100644 --- a/couchdb-exporter_test.go +++ b/couchdb-exporter_test.go @@ -76,6 +76,9 @@ func couchdbResponse(t *testing.T, versionSuffix string) Handler { } else if r.URL.Path == "/_active_tasks" { file := readFile(t, fmt.Sprintf("./testdata/active-tasks-%s.json", versionSuffix)) w.Write([]byte(file)) + } else if r.URL.Path == "/_scheduler/jobs" { + file := readFile(t, fmt.Sprintf("./testdata/scheduler-jobs-%s.json", versionSuffix)) + w.Write([]byte(file)) } else if r.URL.Path == "/example" { file := readFile(t, fmt.Sprintf("./testdata/example-meta-%s.json", versionSuffix)) w.Write([]byte(file)) @@ -113,8 +116,9 @@ func performCouchdbStatsTest(t *testing.T, couchdbVersion string, expectedMetric server := httptest.NewServer(handler) e := lib.NewExporter(server.URL, basicAuth, lib.CollectorConfig{ - Databases: []string{"example", "another-example"}, - CollectViews: true, + Databases: []string{"example", "another-example"}, + CollectViews: true, + CollectSchedulerJobs: true, }, true) ch := make(chan prometheus.Metric) @@ -163,7 +167,7 @@ func TestCouchdbStatsV1(t *testing.T) { } func TestCouchdbStatsV2(t *testing.T) { - performCouchdbStatsTest(t, "v2", 103, 4712, 58570, 14) + performCouchdbStatsTest(t, "v2", 106, 4712, 58570, 15) } func TestCouchdbStatsV1Integration(t *testing.T) { diff --git a/lib/collector-v2.go b/lib/collector-v2.go index 8e9787e8..3529c368 100644 --- a/lib/collector-v2.go +++ b/lib/collector-v2.go @@ -80,6 +80,18 @@ func (e *Exporter) collectV2(stats Stats, exposedHttpStatusCodes []string, colle } } + if collectorConfig.CollectSchedulerJobs { + for _, job := range stats.SchedulerJobsResponse.Jobs { + e.schedulerJobs.WithLabelValues( + job.Node, + job.ID, + job.Database, + job.DocID, + job.Source, + job.Target).Set(float64(len(job.History))) + } + } + activeTasksByNode := make(map[string]ActiveTaskTypes) for _, task := range stats.ActiveTasksResponse { if task.Type == "replication" { diff --git a/lib/collector.go b/lib/collector.go index 4a1c170f..df4fe334 100644 --- a/lib/collector.go +++ b/lib/collector.go @@ -21,6 +21,7 @@ type CollectorConfig struct { Databases []string ObservedDatabases []string CollectViews bool + CollectSchedulerJobs bool } type ActiveTaskTypes struct { @@ -74,6 +75,8 @@ func (e *Exporter) Describe(ch chan<- *prometheus.Desc) { e.viewStaleness.Describe(ch) + e.schedulerJobs.Describe(ch) + e.requestCount.Describe(ch) } @@ -114,6 +117,8 @@ func (e *Exporter) resetAllMetrics() { e.activeTasksReplicationLastUpdate, e.viewStaleness, + + e.schedulerJobs, } e.resetMetrics(metrics) } @@ -205,6 +210,8 @@ func (e *Exporter) collect(ch chan<- prometheus.Metric) error { e.viewStaleness.Collect(ch) + e.schedulerJobs.Collect(ch) + e.requestCount.Collect(ch) return nil diff --git a/lib/couchdb-client.go b/lib/couchdb-client.go index 76e74a46..661bf143 100644 --- a/lib/couchdb-client.go +++ b/lib/couchdb-client.go @@ -161,6 +161,10 @@ func (c *CouchdbClient) getStats(config CollectorConfig) (Stats, error) { return Stats{}, err } } + schedulerJobs := SchedulerJobsResponse{} + if config.CollectSchedulerJobs { + schedulerJobs, err = c.getSchedulerJobs() + } activeTasks, err := c.getActiveTasks() if err != nil { return Stats{}, err @@ -174,6 +178,7 @@ func (c *CouchdbClient) getStats(config CollectorConfig) (Stats, error) { DatabasesTotal: len(databasesList), DatabaseStatsByDbName: databaseStats, ActiveTasksResponse: activeTasks, + SchedulerJobsResponse: schedulerJobs, ApiVersion: "2"}, nil } else { urisByNode := map[string]string{ @@ -280,6 +285,27 @@ func (c *CouchdbClient) enhanceWithViewUpdateSeq(dbStatsByDbName map[string]Data return nil } +// CouchDB 2.x+ only +func (c *CouchdbClient) getSchedulerJobs() (SchedulerJobsResponse, error) { + data, err := c.Request("GET", fmt.Sprintf("%s/_scheduler/jobs", c.BaseUri), nil) + if err != nil { + return SchedulerJobsResponse{}, fmt.Errorf("error reading scheduler jobs: %v", err) + } + + var schedulerJobs SchedulerJobsResponse + err = json.Unmarshal(data, &schedulerJobs) + if err != nil { + return SchedulerJobsResponse{}, fmt.Errorf("error unmarshalling scheduler jobs: %v", err) + } + //for _, job := range schedulerJobs.Jobs { + // replDoc, err := c.Request("GET", fmt.Sprintf("%s/%s/%s", c.BaseUri, job.Database, job.DocID), nil) + // if err != nil { + // return SchedulerJobsResponse{}, fmt.Errorf("error reading replication doc '%s/%s': %v", job.Database, job.DocID, err) + // } + //} + return schedulerJobs, nil +} + func (c *CouchdbClient) getActiveTasks() (ActiveTasksResponse, error) { data, err := c.Request("GET", fmt.Sprintf("%s/_active_tasks", c.BaseUri), nil) if err != nil { diff --git a/lib/couchdb-stats.go b/lib/couchdb-stats.go index 915feade..3887df16 100644 --- a/lib/couchdb-stats.go +++ b/lib/couchdb-stats.go @@ -1,6 +1,9 @@ package lib -import "encoding/json" +import ( + "encoding/json" + "time" +) type Counter struct { // v1.x api @@ -152,10 +155,32 @@ type ActiveTask struct { type ActiveTasksResponse []ActiveTask +type SchedulerJobsResponse struct { + TotalRows int `json:"total_rows"` + Offset int `json:"offset"` + Jobs []struct { + Database string `json:"database"` + ID string `json:"id"` + Pid string `json:"pid"` + Source string `json:"source"` + Target string `json:"target"` + User string `json:"user"` + DocID string `json:"doc_id"` + History []struct { + Timestamp time.Time `json:"timestamp"` + Type string `json:"type"` + } `json:"history"` + Node string `json:"node"` + StartTime time.Time `json:"start_time"` + } `json:"jobs"` +} + type Stats struct { StatsByNodeName map[string]StatsResponse DatabasesTotal int DatabaseStatsByDbName DatabaseStatsByDbName ActiveTasksResponse ActiveTasksResponse + // SchedulerJobsResponse: CouchDB 2.x+ only + SchedulerJobsResponse SchedulerJobsResponse ApiVersion string } diff --git a/lib/exporter.go b/lib/exporter.go index 4dac51a8..80d1a40a 100644 --- a/lib/exporter.go +++ b/lib/exporter.go @@ -53,6 +53,8 @@ type Exporter struct { activeTasksReplicationLastUpdate *prometheus.GaugeVec viewStaleness *prometheus.GaugeVec + + schedulerJobs *prometheus.GaugeVec } func NewExporter(uri string, basicAuth BasicAuth, collectorConfig CollectorConfig, insecure bool) *Exporter { @@ -324,5 +326,14 @@ func NewExporter(uri string, basicAuth BasicAuth, collectorConfig CollectorConfi Help: "the view's staleness (the view's update_seq compared to the database's update_seq)", }, []string{"db_name", "design_doc_name", "view_name", "shard_begin", "shard_end"}), + + schedulerJobs: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: "scheduler", + Name: "jobs", + Help: "scheduler jobs", + }, + []string{"node_name", "job_id", "db_name", "doc_id", "source", "target"}), } } diff --git a/testdata/scheduler-jobs-v2.json b/testdata/scheduler-jobs-v2.json new file mode 100644 index 00000000..f86b4d71 --- /dev/null +++ b/testdata/scheduler-jobs-v2.json @@ -0,0 +1,92 @@ +{ + "total_rows": 3, + "offset": 0, + "jobs": [ + { + "database": "_replicator", + "id": "1ebc7c8a11b299a779ffb807e089c17e+create_target", + "pid": "<0.10858.6>", + "source": "http://source:5984/source-db/", + "target": "http://target:5984/target-db/", + "user": "admin", + "doc_id": "replicate-once", + "history": [ + { + "timestamp": "2019-03-10T21:11:07Z", + "type": "started" + }, + { + "timestamp": "2019-03-10T21:11:07Z", + "type": "added" + } + ], + "node": "couchdb@172.16.238.11", + "start_time": "2019-03-10T21:11:07Z" + }, + { + "database": "_replicator", + "id": "2ae1733d6570280350d3e24c4929ebdf+continuous+create_target", + "pid": null, + "source": "http://source:5984/source-db/", + "target": "http://target:5984/target-db/", + "user": "admin", + "doc_id": "replicate-continuous-error", + "history": [ + { + "timestamp": "2019-03-10T22:42:12Z", + "type": "crashed", + "reason": "{replication_auth_error,\n {session_request_failed,\"http://localhost:15984/_session\",\"root\",\n {conn_failed,{error,econnrefused}}}}" + }, + { + "timestamp": "2019-03-10T22:42:07Z", + "type": "started" + }, + { + "timestamp": "2019-03-10T22:40:10Z", + "type": "crashed", + "reason": "{replication_auth_error,\n {session_request_failed,\"http://localhost:15984/_session\",\"root\",\n {conn_failed,{error,econnrefused}}}}" + }, + { + "timestamp": "2019-03-10T22:40:07Z", + "type": "started" + }, + { + "timestamp": "2019-03-10T22:38:45Z", + "type": "crashed", + "reason": "{replication_auth_error,\n {session_request_failed,\"http://localhost:15984/_session\",\"root\",\n {conn_failed,{error,econnrefused}}}}" + }, + { + "timestamp": "2019-03-10T22:38:40Z", + "type": "started" + }, + { + "timestamp": "2019-03-10T22:38:40Z", + "type": "added" + } + ], + "node": "couchdb@172.16.238.12", + "start_time": "2019-03-10T22:38:40Z" + }, + { + "database": "_replicator", + "id": "382782aeedd59468b52add2b0d7389a2+continuous+create_target", + "pid": "<0.25596.0>", + "source": "http://source:5984/source-db/", + "target": "http://target:5984/target-db/", + "user": "admin", + "doc_id": "replicate-continuous", + "history": [ + { + "timestamp": "2019-03-10T21:07:40Z", + "type": "started" + }, + { + "timestamp": "2019-03-10T21:07:40Z", + "type": "added" + } + ], + "node": "couchdb@172.16.238.13", + "start_time": "2019-03-10T21:07:40Z" + } + ] +}