diff --git a/spinnaker-monitoring-third-party/third_party/prometheus/install.sh b/spinnaker-monitoring-third-party/third_party/prometheus/install.sh index ffab768..8ff32cd 100755 --- a/spinnaker-monitoring-third-party/third_party/prometheus/install.sh +++ b/spinnaker-monitoring-third-party/third_party/prometheus/install.sh @@ -26,11 +26,14 @@ DASHBOARDS=true # Variables for Server Configuration # explicit prometheus versions because its not available with apt-get # grafana will be latest version in apt-get -PROMETHEUS_VERSION=prometheus-2.2.1.linux-amd64 -NODE_EXPORTER_VERSION=node_exporter-0.15.2.linux-amd64 -PUSHGATEWAY_VERSION=pushgateway-0.4.0.linux-amd64 +PROMETHEUS_VERSION=prometheus-2.6.0.linux-amd64 +NODE_EXPORTER_VERSION=node_exporter-0.17.0.linux-amd64 +PUSHGATEWAY_VERSION=pushgateway-0.7.0.linux-amd64 +PROMETHEUS_HOST=${PROMETHEUS_HOST:-localhost} PROMETHEUS_PORT=${PROMETHEUS_PORT:-9090} GRAFANA_PORT=${GRAFANA_PORT:-3000} +GRAFANA_HOST=${GRAFANA_HOST:-localhost} + GCE_CONFIG=false OVERWRITE=false @@ -163,7 +166,7 @@ rule_files: scrape_configs: - job_name: 'prometheus' static_configs: - - targets: ['localhost:9090'] + - targets: ['$PROMETHEUS_HOST:$PROMETHEUS_PORT'] EOF ) @@ -328,7 +331,7 @@ function configure_gateway_prometheus() { - job_name: 'pushgateway' honor_labels: true static_configs: - - targets: ['localhost:9091'] + - targets: ['$PROMETHEUS_HOST:9091'] EOF ) @@ -471,9 +474,9 @@ function install_grafana() { function add_grafana_userdata() { echo "Adding datasource" - PAYLOAD="{'name':'Spinnaker','type':'prometheus','url':'http://localhost:${PROMETHEUS_PORT}','access':'direct','isDefault':true}" + PAYLOAD="{'name':'Spinnaker','type':'prometheus','url':'http://${PROMETHEUS_HOST}:${PROMETHEUS_PORT}','access':'direct','isDefault':true}" curl -s -S -u "$GRAFANA_USER:$GRAFANA_PASSWORD" \ - http://localhost:${GRAFANA_PORT}/api/datasources \ + http://$GRAFANA_HOST:${GRAFANA_PORT}/api/datasources \ -H "Content-Type: application/json" \ -X POST \ -d "${PAYLOAD//\'/\"}" @@ -486,7 +489,7 @@ function add_grafana_userdata() { temp_file=$(mktemp) echo "{ \"dashboard\": $x, \"overwrite\": $OVERWRITE }" > $temp_file curl -s -S -u "$GRAFANA_USER:$GRAFANA_PASSWORD" \ - http://localhost:${GRAFANA_PORT}/api/dashboards/import \ + http://$GRAFANA_HOST:${GRAFANA_PORT}/api/dashboards/import \ -H "Content-Type: application/json" \ -X POST \ -d @${temp_file} @@ -541,7 +544,7 @@ fi if $DASHBOARDS; then TRIES=0 - until nc -z localhost $GRAFANA_PORT || [[ $TRIES -gt 5 ]]; do + until nc -z $GRAFANA_HOST $GRAFANA_PORT || [[ $TRIES -gt 5 ]]; do sleep 1 let TRIES+=1 done diff --git a/spinnaker-monitoring-third-party/third_party/prometheus/machine-dashboard.json b/spinnaker-monitoring-third-party/third_party/prometheus/machine-dashboard.json index 637354f..31acd68 100644 --- a/spinnaker-monitoring-third-party/third_party/prometheus/machine-dashboard.json +++ b/spinnaker-monitoring-third-party/third_party/prometheus/machine-dashboard.json @@ -81,8 +81,8 @@ "steppedLine": false, "targets": [ { - "EXPR": "sum($Function(node_cpu{mode!=\"idle\",instance~\"$Instance\"}[$SamplePeriod])) / sum($Function(node_cpu[$SamplePeriod])) ", - "expr": "sum($Function(node_cpu{mode!=\"idle\"}[$SamplePeriod])) by (job) / sum($Function(node_cpu[$SamplePeriod])) by (job)", + "EXPR": "sum($Function(node_cpu_seconds_total{mode!=\"idle\",instance~\"$Instance\"}[$SamplePeriod])) / sum($Function(node_cpu_seconds_total[$SamplePeriod])) ", + "expr": "sum($Function(node_cpu_seconds_total{mode!=\"idle\"}[$SamplePeriod])) by (job) / sum($Function(node_cpu[$SamplePeriod])) by (job)", "format": "time_series", "hide": false, "interval": "", @@ -93,7 +93,7 @@ "step": 10 }, { - "expr": "sum($Function(node_cpu{mode!=\"idle\",instance=~\"$Instance\"}[$SamplePeriod])) by (instance) / sum($Function(node_cpu{instance=~\"$Instance\"}[$SamplePeriod])) by (instance)", + "expr": "sum($Function(node_cpu_seconds_total{mode!=\"idle\",instance=~\"$Instance\"}[$SamplePeriod])) by (instance) / sum($Function(node_cpu_seconds_total{instance=~\"$Instance\"}[$SamplePeriod])) by (instance)", "format": "time_series", "hide": false, "intervalFactor": 2, @@ -171,7 +171,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum((node_memory_MemTotal{instance=~\"$Instance\"} - node_memory_MemAvailable{instance=~\"$Instance\"}) / node_memory_MemTotal{instance=~\"$Instance\"}) by (instance)", + "expr": "sum((node_memory_MemTotal_bytes{instance=~\"$Instance\"} - node_memory_MemAvailable_bytes{instance=~\"$Instance\"}) / node_memory_MemTotal_bytes{instance=~\"$Instance\"}) by (instance)", "format": "time_series", "hide": false, "interval": "", @@ -259,7 +259,7 @@ "steppedLine": false, "targets": [ { - "expr": "label_replace(sum((node_filesystem_size{mountpoint!~\"/var/lib/docker.*|/run.*\",device=~\"/dev/.*\",instance=~\"$Instance\"} -\n node_filesystem_avail{mountpoint!~\"/var/lib/docker.*/run.*\",device=~\"/dev/.*\",instance=~\"$Instance\"}) / node_filesystem_size{mountpoint!~\"/var/lib/docker.*|/run.*\",device=~\"/dev/.*\",instance=~\"$Instance\"}) by (instance, device) , \"device\", \"$1\", \"device\", \"/dev(.*)\")", + "expr": "label_replace(sum((node_filesystem_size_bytes{mountpoint!~\"/var/lib/docker.*|/run.*\",device=~\"/dev/.*\",instance=~\"$Instance\"} -\n node_filesystem_avail_bytes{mountpoint!~\"/var/lib/docker.*/run.*\",device=~\"/dev/.*\",instance=~\"$Instance\"}) / node_filesystem_size_bytes{mountpoint!~\"/var/lib/docker.*|/run.*\",device=~\"/dev/.*\",instance=~\"$Instance\"}) by (instance, device) , \"device\", \"$1\", \"device\", \"/dev(.*)\")", "format": "time_series", "hide": false, "interval": "", @@ -357,7 +357,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum($Function(node_network_receive_bytes{device=~\"eth.*\",instance=~\"$Instance\"}[$SamplePeriod])) by (device)", + "expr": "sum($Function(node_network_receive_bytes_total{device=~\"eth.*\",instance=~\"$Instance\"}[$SamplePeriod])) by (device)", "format": "time_series", "hide": false, "interval": "", @@ -368,7 +368,7 @@ "step": 4 }, { - "expr": "sum($Function(node_network_transmit_bytes{device=~\"eth.*\",instance=~\"$Instance\"}[$SamplePeriod])) by (device)", + "expr": "sum($Function(node_network_transmit_bytes_total{device=~\"eth.*\",instance=~\"$Instance\"}[$SamplePeriod])) by (device)", "format": "time_series", "hide": false, "intervalFactor": 2, @@ -453,7 +453,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum($Function(node_network_receive_bytes{device=\"lo\",instance=~\"$Instance\"}[$SamplePeriod])) by (device)", + "expr": "sum($Function(node_network_receive_bytes_total{device=\"lo\",instance=~\"$Instance\"}[$SamplePeriod])) by (device)", "format": "time_series", "hide": false, "intervalFactor": 2, @@ -463,7 +463,7 @@ "step": 4 }, { - "expr": "sum($Function(node_network_transmit_bytes{device=\"lo\",instance=~\"$Instance\"}[$SamplePeriod])) by (device)", + "expr": "sum($Function(node_network_transmit_bytes_total{device=\"lo\",instance=~\"$Instance\"}[$SamplePeriod])) by (device)", "format": "time_series", "hide": false, "interval": "", @@ -559,7 +559,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum($Function(node_disk_bytes_written{instance=~\"$Instance\"}[$SamplePeriod])) by (device)", + "expr": "sum($Function(node_disk_written_bytes_total{instance=~\"$Instance\"}[$SamplePeriod])) by (device)", "format": "time_series", "interval": "", "intervalFactor": 2, @@ -569,7 +569,7 @@ "step": 4 }, { - "expr": "sum($Function(node_disk_bytes_read{instance=~\"$Instance\"}[$SamplePeriod])) by (device)", + "expr": "sum($Function(node_disk_read_bytes_total{instance=~\"$Instance\"}[$SamplePeriod])) by (device)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{device}}/READ", @@ -653,7 +653,7 @@ "steppedLine": false, "targets": [ { - "expr": "label_replace(sum(node_filesystem_free{mountpoint!~\"/var/lib/docker.*|/run.*\",device=~\"/dev/.*\",instance=~\"$Instance\"}) by (instance, device), \"device\", \"$1\", \"device\", \"/dev(.*)\")", + "expr": "label_replace(sum(node_filesystem_free_bytes{mountpoint!~\"/var/lib/docker.*|/run.*\",device=~\"/dev/.*\",instance=~\"$Instance\"}) by (instance, device), \"device\", \"$1\", \"device\", \"/dev(.*)\")", "format": "time_series", "hide": false, "interval": "", @@ -753,7 +753,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(node_memory_MemFree{instance=~\"$Instance\"})", + "expr": "sum(node_memory_MemFree_bytes{instance=~\"$Instance\"})", "format": "time_series", "hide": false, "interval": "", @@ -764,7 +764,7 @@ "step": 4 }, { - "expr": "sum(node_memory_Committed_AS{instance=~\"$Instance\"})", + "expr": "sum(node_memory_Committed_AS_bytes{instance=~\"$Instance\"})", "format": "time_series", "hide": false, "intervalFactor": 2, @@ -774,7 +774,7 @@ "step": 4 }, { - "expr": "sum(node_memory_Dirty{instance=~\"$Instance\"})", + "expr": "sum(node_memory_Dirty_bytes{instance=~\"$Instance\"})", "format": "time_series", "hide": false, "intervalFactor": 2, @@ -784,7 +784,7 @@ "step": 4 }, { - "expr": "sum(node_memory_MemTotal{instance=~\"$Instance\"})", + "expr": "sum(node_memory_MemTotal_bytes{instance=~\"$Instance\"})", "format": "time_series", "hide": false, "intervalFactor": 2, @@ -794,7 +794,7 @@ "step": 4 }, { - "expr": "sum(node_memory_MemAvailable{instance=~\"$Instance\"})", + "expr": "sum(node_memory_MemAvailable_bytes{instance=~\"$Instance\"})", "format": "time_series", "hide": false, "intervalFactor": 2, @@ -872,7 +872,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum($Function(node_network_receive_bytes{device=~\"eth.*|lo\",instance=~\"$Instance\"}[$SamplePeriod])) by (device)", + "expr": "sum($Function(node_network_receive_bytes_total{device=~\"eth.*|lo\",instance=~\"$Instance\"}[$SamplePeriod])) by (device)", "format": "time_series", "hide": false, "interval": "", @@ -883,7 +883,7 @@ "step": 4 }, { - "expr": "sum($Function(node_network_transmit_bytes{device=~\"eth.*|lo\",instance=~\"$Instance\"}[$SamplePeriod])) by (device)", + "expr": "sum($Function(node_network_transmit_bytes_total{device=~\"eth.*|lo\",instance=~\"$Instance\"}[$SamplePeriod])) by (device)", "format": "time_series", "hide": false, "interval": "", @@ -974,7 +974,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum($Function(node_cpu{instance=~\"$Instance\",mode!=\"idle\"}[$SamplePeriod])) by (cpu) / sum($Function(node_cpu{instance=~\"$Instance\"}[$SamplePeriod])) by (cpu)", + "expr": "sum($Function(node_cpu_seconds_total{instance=~\"$Instance\",mode!=\"idle\"}[$SamplePeriod])) by (cpu) / sum($Function(node_cpu_seconds_total{instance=~\"$Instance\"}[$SamplePeriod])) by (cpu)", "format": "time_series", "hide": true, "interval": "", @@ -985,7 +985,7 @@ "step": 4 }, { - "expr": "sum($Function(node_cpu{instance=~\"$Instance\",mode!=\"idle\"}[$SamplePeriod])) by (cpu)", + "expr": "sum($Function(node_cpu_seconds_total{instance=~\"$Instance\",mode!=\"idle\"}[$SamplePeriod])) by (cpu)", "format": "time_series", "hide": false, "interval": "", @@ -1064,7 +1064,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum($Function(node_cpu{instance=~\"$Instance\",mode!=\"idle\"}[$SamplePeriod])) by (mode) ", + "expr": "sum($Function(node_cpu_seconds_total{instance=~\"$Instance\",mode!=\"idle\"}[$SamplePeriod])) by (mode) ", "format": "time_series", "hide": false, "interval": "", @@ -1135,7 +1135,7 @@ "multi": false, "name": "Instance", "options": [], - "query": "label_values(node_cpu, instance)", + "query": "label_values(node_cpu_seconds_total, instance)", "refresh": 2, "regex": "", "sort": 1, @@ -1262,6 +1262,6 @@ ] }, "timezone": "browser", - "title": "Machine Stats", - "version": 4 + "title": "VM Platform Stats", + "version": 5 }