From 9bca557ee32f14f1a37c9e194ebde8f5d960b648 Mon Sep 17 00:00:00 2001 From: "Addo.Zhang" Date: Thu, 11 Jul 2024 18:27:05 +0800 Subject: [PATCH] fix grafana dashboard issue (#303) Signed-off-by: Addo.Zhang --- .../pipy/dashboards/fsm-control-plane.json | 26 +- .../fsm-data-plane-performance.json | 8 +- .../dashboards/fsm-mesh-sidecar-details.json | 235 +----------------- charts/fsm/templates/fsm-deployment.yaml | 2 - charts/fsm/templates/grafana-configmap.yaml | 14 -- .../fsm/templates/prometheus-configmap.yaml | 4 - 6 files changed, 19 insertions(+), 270 deletions(-) diff --git a/charts/fsm/grafana/pipy/dashboards/fsm-control-plane.json b/charts/fsm/grafana/pipy/dashboards/fsm-control-plane.json index 32d5a4c5b..f61093de7 100755 --- a/charts/fsm/grafana/pipy/dashboards/fsm-control-plane.json +++ b/charts/fsm/grafana/pipy/dashboards/fsm-control-plane.json @@ -145,7 +145,7 @@ "steppedLine": false, "targets": [ { - "expr": "irate(sidecar_cluster_upstream_rq_xx{sidecar_response_code_class=\"2\",source_service=\"$source_service\",source_namespace=\"$source_namespace\",sidecar_cluster_name=\"fsm-controller\"}[1m])", + "expr": "irate(sidecar_cluster_upstream_rq_xx{sidecar_response_code_class=\"2\",source_service=\"$source_service\",source_namespace=\"$source_namespace\"}[1m])", "interval": "1m", "legendFormat": "Time (per minute)", "refId": "A" @@ -252,7 +252,7 @@ "steppedLine": false, "targets": [ { - "expr": "irate(sidecar_cluster_upstream_rq_xx{sidecar_response_code_class!=\"2\",source_service=\"$source_service\",source_namespace=\"$source_namespace\",sidecar_cluster_name=\"fsm-controller\"}[1m])", + "expr": "irate(sidecar_cluster_upstream_rq_xx{sidecar_response_code_class!=\"2\",source_service=\"$source_service\",source_namespace=\"$source_namespace\"}[1m])", "interval": "1m", "legendFormat": "Time (per minute)", "refId": "A" @@ -347,7 +347,7 @@ "pluginVersion": "8.2.2", "targets": [ { - "expr": "sum(sidecar_cluster_upstream_cx_active{source_service=\"$source_service\",source_namespace=\"$source_namespace\",sidecar_cluster_name=\"fsm-controller\"})", + "expr": "sum(sidecar_cluster_upstream_cx_active{source_service=\"$source_service\",source_namespace=\"$source_namespace\"})", "legendFormat": "Connections", "refId": "A" } @@ -468,12 +468,12 @@ "steppedLine": false, "targets": [ { - "expr": "sum(irate(sidecar_cluster_upstream_cx_tx_bytes_total{source_service=\"$source_service\",source_namespace=\"$source_namespace\",sidecar_cluster_name=\"fsm-controller\"}[1m]))", + "expr": "sum(irate(sidecar_cluster_upstream_cx_tx_bytes_total{source_service=\"$source_service\",source_namespace=\"$source_namespace\"}[1m]))", "legendFormat": "Bytes sent", "refId": "A" }, { - "expr": "sum(irate(sidecar_cluster_upstream_cx_rx_bytes_total{source_service=\"$source_service\",source_namespace=\"$source_namespace\",sidecar_cluster_name=\"fsm-controller\"}[1m]))", + "expr": "sum(irate(sidecar_cluster_upstream_cx_rx_bytes_total{source_service=\"$source_service\",source_namespace=\"$source_namespace\"}[1m]))", "legendFormat": "Bytes received", "refId": "B" } @@ -570,42 +570,42 @@ "steppedLine": false, "targets": [ { - "expr": "sum(irate(sidecar_cluster_upstream_cx_destroy_remote_with_active_rq{source_service=\"$source_service\",source_namespace=\"$source_namespace\",sidecar_cluster_name=\"fsm-controller\"}[1m]))", + "expr": "sum(irate(sidecar_cluster_upstream_cx_destroy_remote_with_active_rq{source_service=\"$source_service\",source_namespace=\"$source_namespace\"}[1m]))", "legendFormat": "Connection destroyed by the client", "refId": "A" }, { - "expr": "sum(irate(sidecar_cluster_upstream_cx_connect_timeout{source_service=\"$source_service\",source_namespace=\"$source_namespace\",sidecar_cluster_name=\"fsm-controller\"}[1m]))", + "expr": "sum(irate(sidecar_cluster_upstream_cx_connect_timeout{source_service=\"$source_service\",source_namespace=\"$source_namespace\"}[1m]))", "legendFormat": "Connection timeout", "refId": "B" }, { - "expr": "sum(irate(sidecar_cluster_upstream_cx_destroy_local_with_active_rq{source_service=\"$source_service\",source_namespace=\"$source_namespace\",sidecar_cluster_name=\"fsm-controller\"}[1m]))", + "expr": "sum(irate(sidecar_cluster_upstream_cx_destroy_local_with_active_rq{source_service=\"$source_service\",source_namespace=\"$source_namespace\"}[1m]))", "legendFormat": "Connection destroyed by local Sidecar", "refId": "C" }, { - "expr": "sum(irate(sidecar_cluster_upstream_rq_pending_failure_eject{source_service=\"$source_service\",source_namespace=\"$source_namespace\",sidecar_cluster_name=\"fsm-controller\"}[1m]))", + "expr": "sum(irate(sidecar_cluster_upstream_rq_pending_failure_eject{source_service=\"$source_service\",source_namespace=\"$source_namespace\"}[1m]))", "legendFormat": "Pending failure ejection", "refId": "D" }, { - "expr": "sum(irate(sidecar_cluster_upstream_rq_pending_overflow{source_service=\"$source_service\",source_namespace=\"$source_namespace\",sidecar_cluster_name=\"fsm-controller\"}[1m]))", + "expr": "sum(irate(sidecar_cluster_upstream_rq_pending_overflow{source_service=\"$source_service\",source_namespace=\"$source_namespace\"}[1m]))", "legendFormat": "Pending overflow", "refId": "E" }, { - "expr": "sum(irate(sidecar_cluster_upstream_rq_timeout{source_service=\"$source_service\",source_namespace=\"$source_namespace\",sidecar_cluster_name=\"fsm-controller\"}[1m]))", + "expr": "sum(irate(sidecar_cluster_upstream_rq_timeout{source_service=\"$source_service\",source_namespace=\"$source_namespace\"}[1m]))", "legendFormat": "Request timeout", "refId": "F" }, { - "expr": "sum(irate(sidecar_cluster_upstream_rq_rx_reset{source_service=\"$source_service\",source_namespace=\"$source_namespace\",sidecar_cluster_name=\"fsm-controller\"}[1m]))", + "expr": "sum(irate(sidecar_cluster_upstream_rq_rx_reset{source_service=\"$source_service\",source_namespace=\"$source_namespace\"}[1m]))", "legendFormat": "Response reset", "refId": "G" }, { - "expr": "sum(irate(sidecar_cluster_upstream_rq_tx_reset{source_service=\"$source_service\",source_namespace=\"$source_namespace\",sidecar_cluster_name=\"fsm-controller\"}[1m]))", + "expr": "sum(irate(sidecar_cluster_upstream_rq_tx_reset{source_service=\"$source_service\",source_namespace=\"$source_namespace\"}[1m]))", "legendFormat": "Request reset", "refId": "H" } diff --git a/charts/fsm/grafana/pipy/dashboards/fsm-data-plane-performance.json b/charts/fsm/grafana/pipy/dashboards/fsm-data-plane-performance.json index e52deb017..41849b087 100644 --- a/charts/fsm/grafana/pipy/dashboards/fsm-data-plane-performance.json +++ b/charts/fsm/grafana/pipy/dashboards/fsm-data-plane-performance.json @@ -132,7 +132,7 @@ "steppedLine": false, "targets": [ { - "expr": "irate(container_cpu_usage_seconds_total{namespace=~\"$namespace\", pod=~\"$pod\", container=\"sidecar\"}[1m])", + "expr": "irate(container_cpu_usage_seconds_total{namespace=~\"$namespace\", pod=~\"$pod\"}[1m])", "interval": "", "legendFormat": "{{pod}}:{{container}}", "refId": "A" @@ -226,7 +226,7 @@ "steppedLine": false, "targets": [ { - "expr": "container_memory_rss{namespace=~\"$namespace\", pod=~\"$pod\", container=\"sidecar\"}", + "expr": "container_memory_rss{namespace=~\"$namespace\", pod=~\"$pod\"}", "interval": "", "legendFormat": "{{pod}}:{{container}}", "refId": "A" @@ -334,7 +334,7 @@ "steppedLine": false, "targets": [ { - "expr": "irate(container_cpu_usage_seconds_total{namespace=~\"$namespace\", pod=~\"$pod\", container=~\"$container\"}[1m])", + "expr": "irate(container_cpu_usage_seconds_total{namespace=~\"$namespace\", pod=~\"$pod\"}[1m])", "interval": "", "legendFormat": "{{pod}}:{{container}}", "refId": "A" @@ -428,7 +428,7 @@ "steppedLine": false, "targets": [ { - "expr": "container_memory_rss{namespace=~\"$namespace\", pod=~\"$pod\", container=~\"$container\"}", + "expr": "container_memory_rss{namespace=~\"$namespace\", pod=~\"$pod\"}", "interval": "", "legendFormat": "{{pod}}:{{container}}", "refId": "A" diff --git a/charts/fsm/grafana/pipy/dashboards/fsm-mesh-sidecar-details.json b/charts/fsm/grafana/pipy/dashboards/fsm-mesh-sidecar-details.json index 5c203fdfd..12a47b0d7 100644 --- a/charts/fsm/grafana/pipy/dashboards/fsm-mesh-sidecar-details.json +++ b/charts/fsm/grafana/pipy/dashboards/fsm-mesh-sidecar-details.json @@ -302,7 +302,7 @@ "steppedLine": false, "targets": [ { - "expr": "irate(container_cpu_usage_seconds_total{namespace=\"${CONTROL_PLANE_NAMESPACE}\", container!~\"POD\", container!~\"\"}[1m])", + "expr": "irate(container_cpu_usage_seconds_total{namespace=\"${CONTROL_PLANE_NAMESPACE}\"}[1m])", "interval": "", "legendFormat": "{{pod}}", "refId": "A" @@ -420,7 +420,7 @@ "steppedLine": false, "targets": [ { - "expr": "container_memory_rss{namespace=\"${CONTROL_PLANE_NAMESPACE}\", container!=\"\", container!=\"POD\"}", + "expr": "container_memory_rss{namespace=\"${CONTROL_PLANE_NAMESPACE}\"}", "interval": "", "legendFormat": "{{pod}}", "refId": "A" @@ -783,95 +783,6 @@ "timeFrom": null, "timeShift": null }, - { - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#73BF69", - "colorScale": "sqrt", - "colorScheme": "interpolateOranges", - "exponent": 0.5, - "mode": "opacity" - }, - "dataFormat": "tsbuckets", - "datasource": "${DS_PROMETHEUS}", - "fieldConfig": { - "defaults": { - "custom": { - "align": null - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 18, - "x": 0, - "y": 36 - }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 28, - "interval": "10s", - "legend": { - "show": false - }, - "pluginVersion": "8.2.2", - "reverseYBuckets": false, - "targets": [ - { - "expr": "sum(rate(fsm_proxy_config_update_time_bucket{resource_type=\"$xds_path\", source_pod_name=~\"$fsm_controller_instance\"}[1m])) by (le)", - "format": "heatmap", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{le}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "xDS Path histogram", - "tooltip": { - "show": true, - "showHistogram": false - }, - "type": "heatmap", - "xAxis": { - "show": true - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": null, - "format": "short", - "logBase": 1, - "max": null, - "min": null, - "show": true, - "splitFactor": null - }, - "yBucketBound": "auto", - "yBucketNumber": null, - "yBucketSize": null - }, { "aliasColors": {}, "bars": true, @@ -988,122 +899,6 @@ "alignLevel": null } }, - { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fieldConfig": { - "defaults": { - "custom": {}, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 9, - "w": 9, - "x": 9, - "y": 46 - }, - "hiddenSeries": false, - "id": 37, - "interval": "10s", - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": false, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pluginVersion": "8.2.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "idelta(fsm_proxy_config_update_time_count{success=\"true\", resource_type=~\"$xds_path\", source_pod_name=~\"$fsm_controller_instance\"}[1m])\n", - "interval": "", - "legendFormat": "{{resource_type}}-{{source_pod_name}}-Success", - "refId": "A" - }, - { - "expr": "idelta(fsm_proxy_config_update_time_count{success=\"false\", resource_type=~\"$xds_path\", source_pod_name=~\"$fsm_controller_instance\"}[1m])", - "interval": "", - "legendFormat": "{{resource_type}}-{{source_pod_name}}-Failure", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "xDS Updates", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "timeseries", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "count", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, { "collapsed": false, "datasource": null, @@ -1478,32 +1273,6 @@ "type": "query", "useTags": false }, - { - "allValue": null, - "current": { - "selected": false, - "text": "ADS", - "value": "ADS" - }, - "datasource": "${DS_PROMETHEUS}", - "definition": "label_values(fsm_proxy_config_update_time_bucket, resource_type)", - "hide": 0, - "includeAll": false, - "label": "xDS Path", - "multi": false, - "name": "xds_path", - "options": [], - "query": "label_values(fsm_proxy_config_update_time_bucket, resource_type)", - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, { "allValue": null, "current": { diff --git a/charts/fsm/templates/fsm-deployment.yaml b/charts/fsm/templates/fsm-deployment.yaml index 512b9aa7d..db13f5580 100644 --- a/charts/fsm/templates/fsm-deployment.yaml +++ b/charts/fsm/templates/fsm-deployment.yaml @@ -161,8 +161,6 @@ spec: valueFrom: fieldRef: fieldPath: metadata.name - - name: FSM_DEFAULT_SIDECAR_CLASS - value: "{{ .Values.fsm.sidecarClass }}" volumeMounts: - mountPath: /repo name: shared-repo diff --git a/charts/fsm/templates/grafana-configmap.yaml b/charts/fsm/templates/grafana-configmap.yaml index 5a733a953..9bac6891a 100644 --- a/charts/fsm/templates/grafana-configmap.yaml +++ b/charts/fsm/templates/grafana-configmap.yaml @@ -70,25 +70,15 @@ metadata: app: fsm-grafana data: fsm-pod.json: | -{{- if eq .Values.fsm.sidecarClass "pipy" }} {{ .Files.Get "grafana/pipy/dashboards/fsm-pod-to-service.json" | replace "${DS_PROMETHEUS}" "Prometheus" | indent 4 }} -{{- end }} fsm-workload.json: | -{{- if eq .Values.fsm.sidecarClass "pipy" }} {{ .Files.Get "grafana/pipy/dashboards/fsm-workload-to-service.json" | replace "${DS_PROMETHEUS}" "Prometheus" | indent 4 }} -{{- end }} fsm-service-to-service.json: | -{{- if eq .Values.fsm.sidecarClass "pipy" }} {{ .Files.Get "grafana/pipy/dashboards/fsm-service-to-service.json" | replace "${DS_PROMETHEUS}" "Prometheus" | indent 4 }} -{{- end }} fsm-data-plane-container.json: | -{{- if eq .Values.fsm.sidecarClass "pipy" }} {{ .Files.Get "grafana/pipy/dashboards/fsm-data-plane-performance.json" | replace "${DS_PROMETHEUS}" "Prometheus" | indent 4 }} -{{- end }} fsm-workload-to-workload.json: | -{{- if eq .Values.fsm.sidecarClass "pipy" }} {{ .Files.Get "grafana/pipy/dashboards/fsm-workload-to-workload.json" | replace "${DS_PROMETHEUS}" "Prometheus" | indent 4 }} -{{- end }} --- apiVersion: v1 @@ -101,13 +91,9 @@ metadata: app: fsm-grafana data: fsm-control-plane.json: | -{{- if eq .Values.fsm.sidecarClass "pipy" }} {{ .Files.Get "grafana/pipy/dashboards/fsm-control-plane.json" | replace "${DS_PROMETHEUS}" "Prometheus" | indent 4 }} -{{- end }} fsm-mesh-sidecar-details.json: | -{{- if eq .Values.fsm.sidecarClass "pipy" }} {{ .Files.Get "grafana/pipy/dashboards/fsm-mesh-sidecar-details.json" | replace "${DS_PROMETHEUS}" "Prometheus" | replace "${CONTROL_PLANE_NAMESPACE}" (include "fsm.namespace" .) | indent 4 }} -{{- end }} --- apiVersion: v1 diff --git a/charts/fsm/templates/prometheus-configmap.yaml b/charts/fsm/templates/prometheus-configmap.yaml index befb9006a..e393f4c50 100644 --- a/charts/fsm/templates/prometheus-configmap.yaml +++ b/charts/fsm/templates/prometheus-configmap.yaml @@ -54,11 +54,9 @@ data: kubernetes_sd_configs: - role: pod metric_relabel_configs: - {{- if eq .Values.fsm.sidecarClass "pipy" }} - source_labels: [__name__] regex: '(sidecar_server_live|sidecar_cluster_health_check_.*|sidecar_cluster_upstream_rq_xx|sidecar_cluster_upstream_cx_active|sidecar_cluster_upstream_cx_tx_bytes_total|sidecar_cluster_upstream_cx_rx_bytes_total|sidecar_cluster_upstream_rq_total|sidecar_cluster_upstream_cx_destroy_remote_with_active_rq|sidecar_cluster_upstream_cx_connect_timeout|sidecar_cluster_upstream_cx_destroy_local_with_active_rq|sidecar_cluster_upstream_rq_pending_failure_eject|sidecar_cluster_upstream_rq_pending_overflow|sidecar_cluster_upstream_rq_timeout|sidecar_cluster_upstream_rq_rx_reset|socks_active_connection|socks_total_connection|socks_send_bytes_total|socks_receive_bytes_total|^fsm.*)' action: keep - {{- end }} relabel_configs: - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] action: keep @@ -131,7 +129,6 @@ data: replacement: $1:$2 target_label: __address__ metric_relabel_configs: - {{- if eq .Values.fsm.sidecarClass "pipy" }} - source_labels: [__name__] regex: 'sidecar_.*fsm_request_(total|duration_ms_(bucket|count|sum))' action: keep @@ -171,7 +168,6 @@ data: action: replace regex: sidecar_response_code_\d{3}_source_namespace_.*_source_kind_.*_source_name_.*_source_pod_.*_destination_namespace_.*_destination_kind_.*_destination_name_.*_destination_pod_(.*)_fsm_request_total target_label: destination_pod - {{- end }} - source_labels: [__name__] action: replace regex: .*(fsm_request_total)