From b3fdf08d4e70217968ce5821a4cda6d56959e6b3 Mon Sep 17 00:00:00 2001 From: Dominik Kress Date: Wed, 23 Oct 2024 14:52:41 +0200 Subject: [PATCH] Create observability-platform-cost-estimate (#637) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Create observability-platform-cost-estimate Introduce new dashboard for estimating observability platform costs. * Update helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/observability-platform-cost-estimate Co-authored-by: Hervé Nicol --------- Co-authored-by: Hervé Nicol --- .../observability-platform-cost-estimate | 830 ++++++++++++++++++ 1 file changed, 830 insertions(+) create mode 100644 helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/observability-platform-cost-estimate diff --git a/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/observability-platform-cost-estimate b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/observability-platform-cost-estimate new file mode 100644 index 00000000..f6ef81e6 --- /dev/null +++ b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/observability-platform-cost-estimate @@ -0,0 +1,830 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 145, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 5, + "panels": [], + "title": "Basic Resource Usage of Observability Platform Components", + "type": "row" + }, + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "mimir" + }, + "description": "Number of average RAM used in $range", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 1 + }, + "id": 1, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "inverted", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": true, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.2.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": false, + "expr": "avg_over_time(\nsum(max(container_memory_usage_bytes{pod=~\"${pods:raw}\", cluster_type=~\"management_cluster\", container!=\"\", image!=\"\"}) by(pod,container))\n[$__range:])", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Memory Usage", + "type": "stat" + }, + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "mimir" + }, + "description": "Number of average CPU Cores in use in $range", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 1 + }, + "id": 2, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "inverted", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": true, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.2.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": false, + "expr": "avg_over_time(\nsum(sum(rate(container_cpu_usage_seconds_total{pod=~\"${pods:raw}\", cluster_type=~\"management_cluster\"}[5m])) by (pod))\n[$__range:])", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "CPU Usage", + "type": "stat" + }, + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "mimir" + }, + "description": "Incoming network traffic", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 12, + "y": 1 + }, + "id": 3, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "inverted", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": true, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.2.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "avg_over_time(\nsum(irate(container_network_receive_bytes_total{pod=~\"${pods:raw}\"}[5m]))\n[$__range:])", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Network IN", + "type": "stat" + }, + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "mimir" + }, + "description": "Outgoing network traffic", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 1 + }, + "id": 4, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "inverted", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": true, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.2.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "avg_over_time(\nsum(irate(container_network_transmit_bytes_total{pod=~\"${pods:raw}\"}[$__rate_interval]))[$__range:])", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Network Out", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "", + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 7 + }, + "id": 6, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "# Cost Calculation \n\nWith the above numbers you can create a cost estimate by using the [AWS price calculator](https://calculator.aws/#/).\n\nThe general idea is to get a rough estimate how much the resources cost that are needed to run all the components of the observability platform. \n\nThe above numbers already include the typical replication of the components for their resiliency, so no need to add any overhead for this.\n\nThe machines our typical CAPA installations use is: **r6i.2xlarge**\n\n", + "mode": "markdown" + }, + "pluginVersion": "11.2.1", + "type": "text" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 13 + }, + "id": 7, + "panels": [], + "title": "Additional Interesting Numbers for Comparisons", + "type": "row" + }, + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "mimir" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "locale" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 14 + }, + "id": 9, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.2.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(increase(loki_distributor_lines_received_total[$__range]))", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Amount of Log Lines received by Loki over $range", + "type": "stat" + }, + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "mimir" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 14 + }, + "id": 10, + "options": { + "colorMode": "none", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.2.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(increase(loki_distributor_bytes_received_total[$__range]))", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Amount of Bytes received by Loki over $range", + "type": "stat" + }, + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "mimir" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "fixed" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 14 + }, + "id": 11, + "options": { + "colorMode": "none", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.2.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum (increase(loki_ingester_chunk_stored_bytes_total[$__range]))", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Bytes stored by Loki ingesters in S3 over $range", + "type": "stat" + }, + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "mimir" + }, + "description": "Average number of time series in mimirs ingesters over $range", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "locale" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 14, + "x": 0, + "y": 22 + }, + "id": 8, + "options": { + "colorMode": "none", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.2.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": false, + "expr": "avg_over_time(\n sum(cortex_ingester_active_series{cluster_type=~\"management_cluster\"})\n[$__range:])", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Metric Time Series in Mimir Ingesters", + "type": "stat" + }, + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "mimir" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 10, + "x": 14, + "y": 22 + }, + "id": 12, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.2.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": false, + "expr": "avg_over_time(\nsum by(cluster_type)(kube_node_info)\n[$__range:])", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Average amount of nodes in cluster type over $range", + "type": "stat" + } + ], + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [ + { + "allValue": "(mimir|loki|grafana|alertmanager|alloy|prometheus|promtail|metrics-server|logging-operator|oauth2-proxy|object-storage-operator|observability-operator|silence-operator|sloth)-.*", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "hide": 0, + "includeAll": true, + "multi": false, + "name": "pods", + "options": [ + { + "selected": true, + "text": "All", + "value": "$__all" + }, + { + "selected": false, + "text": "mimir-.*", + "value": "mimir-.*" + }, + { + "selected": false, + "text": "loki-.*", + "value": "loki-.*" + }, + { + "selected": false, + "text": "grafana-.*", + "value": "grafana-.*" + }, + { + "selected": false, + "text": "alertmanager-.*", + "value": "alertmanager-.*" + }, + { + "selected": false, + "text": "alloy-.*", + "value": "alloy-.*" + }, + { + "selected": false, + "text": "prometheus-.*", + "value": "prometheus-.*" + }, + { + "selected": false, + "text": "promtail-.*", + "value": "promtail-.*" + }, + { + "selected": false, + "text": "metrics-server-.*", + "value": "metrics-server-.*" + }, + { + "selected": false, + "text": "logging-operator-.*", + "value": "logging-operator-.*" + }, + { + "selected": false, + "text": "oauth2-proxy-.*", + "value": "oauth2-proxy-.*" + }, + { + "selected": false, + "text": "object-storage-operator-.*", + "value": "object-storage-operator-.*" + }, + { + "selected": false, + "text": "observability-operator-.*", + "value": "observability-operator-.*" + }, + { + "selected": false, + "text": "silence-operator-.*", + "value": "silence-operator-.*" + }, + { + "selected": false, + "text": "sloth-.*", + "value": "sloth-.*" + } + ], + "query": "mimir-.*,\nloki-.*,\ngrafana-.*,\nalertmanager-.*,\nalloy-.*,\nprometheus-.*,\npromtail-.*,\nmetrics-server-.*,\nlogging-operator-.*,\noauth2-proxy-.*,\nobject-storage-operator-.*,\nobservability-operator-.*,\nsilence-operator-.*,\nsloth-.*,", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" + } + ] + }, + "time": { + "from": "now-7d", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Observability Platform: Cost Estimate", + "uid": "o11y-cost-estimate", + "version": 3, + "weekStart": "" +}