From b7c052c2ed77a0e10acf5ed5ef05903760fd16f1 Mon Sep 17 00:00:00 2001 From: chencs Date: Wed, 27 Sep 2023 16:07:25 -0700 Subject: [PATCH] Scale on OOMs for autoscaling components (#5739) * Add memory trigger to ruler-querier and account for OOMs in memory calculation for ruler-querier and ruler-query-frontend * Add use_oom_trigger flag to all autoscaling components * remove use_oom_trigger flag * add memory panel to remote ruler reads dashboard * build mixins * update HPA query * Update changelog * changelog fixes --- CHANGELOG.md | 3 + .../metamonitoring/grafana-dashboards.yaml | 84 ++++++++++++++++++- .../dashboards/mimir-remote-ruler-reads.json | 84 ++++++++++++++++++- .../dashboards/mimir-remote-ruler-reads.json | 84 ++++++++++++++++++- .../dashboards/remote-ruler-reads.libsonnet | 28 +++++++ ...g-custom-target-utilization-generated.yaml | 67 +++++++++++++++ ...oscaling-custom-target-utilization.jsonnet | 1 + .../test-autoscaling-generated.yaml | 67 +++++++++++++++ operations/mimir/autoscaling.libsonnet | 67 ++++++++++++++- 9 files changed, 469 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b555e07b9d7..a04a6a8c3f6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -202,6 +202,7 @@ * [BUGFIX] Alerts: fixed `MimirIngesterHasNotShippedBlocks` and `MimirIngesterHasNotShippedBlocksSinceStart` alerts. #5396 * [BUGFIX] Alerts: Fix `MimirGossipMembersMismatch` to include `admin-api` and custom compactor pods. `admin-api` is a GEM component. #5641 #5797 * [BUGFIX] Dashboards: fix autoscaling dashboard panels that could show multiple series for a single component. #5810 +* [BUGFIX] Dashboards: fix ruler-querier scaling metric panel query and split into CPU and memory scaling metric panels. #5739 ### Jsonnet @@ -234,7 +235,9 @@ * [ENHANCEMENT] Add _config.commonConfig to allow adding common configuration parameters for all Mimir components. #5703 * [ENHANCEMENT] Update rollout-operator to `v0.7.0`. #5718 * [ENHANCEMENT] Increase the default rollout speed for store-gateway when lazy loading is disabled. #5823 +* [ENHANCEMENT] Add autoscaling on memory for ruler-queriers. #5739 * [BUGFIX] Fix compilation when index, chunks or metadata caches are disabled. #5710 +* [BUGFIX] Autoscaling: treat OOMing containers as though they are using their full memory request. #5739 ### Mimirtool diff --git a/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/metamonitoring/grafana-dashboards.yaml b/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/metamonitoring/grafana-dashboards.yaml index 75d8a706ed8..610341119fa 100644 --- a/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/metamonitoring/grafana-dashboards.yaml +++ b/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/metamonitoring/grafana-dashboards.yaml @@ -26967,7 +26967,7 @@ data: "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, - "span": 4, + "span": 3, "stack": false, "steppedLine": false, "targets": [ @@ -27057,7 +27057,7 @@ data: "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, - "span": 4, + "span": 3, "stack": false, "steppedLine": false, "targets": [ @@ -27111,7 +27111,7 @@ data: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "description": "### Autoscaler failures rate\nThe rate of failures in the KEDA custom metrics API server. Whenever an error occurs, the KEDA custom\nmetrics server is unable to query the scaling metric from Prometheus so the autoscaler does not work properly.\n\n", + "description": "### Scaling metric (memory): Desired replicas\nThis panel shows the scaling metric exposed by KEDA divided by the target/threshold used.\nIt should represent the desired number of replicas, ignoring the min/max constraints applied later.\n\n", "fill": 1, "id": 13, "legend": { @@ -27133,7 +27133,83 @@ data: "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, - "span": 4, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "keda_metrics_adapter_scaler_metrics_value{metric=~\".*memory.*\"}\n/\non(metric) group_left label_replace(\n kube_horizontalpodautoscaler_spec_target_metric{cluster=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-ruler-querier\"},\n \"metric\", \"$1\", \"metric_name\", \"(.+)\"\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ scaledObject }}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Scaling metric (memory): Desired replicas", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### Autoscaler failures rate\nThe rate of failures in the KEDA custom metrics API server. Whenever an error occurs, the KEDA custom\nmetrics server is unable to query the scaling metric from Prometheus so the autoscaler does not work properly.\n\n", + "fill": 1, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, "stack": false, "steppedLine": false, "targets": [ diff --git a/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-remote-ruler-reads.json b/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-remote-ruler-reads.json index 1c4de4b5dbe..2d4146d13ac 100644 --- a/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-remote-ruler-reads.json +++ b/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-remote-ruler-reads.json @@ -835,7 +835,7 @@ "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, - "span": 4, + "span": 3, "stack": false, "steppedLine": false, "targets": [ @@ -925,7 +925,7 @@ "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, - "span": 4, + "span": 3, "stack": false, "steppedLine": false, "targets": [ @@ -979,7 +979,7 @@ "dashLength": 10, "dashes": false, "datasource": "$datasource", - "description": "### Autoscaler failures rate\nThe rate of failures in the KEDA custom metrics API server. Whenever an error occurs, the KEDA custom\nmetrics server is unable to query the scaling metric from Prometheus so the autoscaler does not work properly.\n\n", + "description": "### Scaling metric (memory): Desired replicas\nThis panel shows the scaling metric exposed by KEDA divided by the target/threshold used.\nIt should represent the desired number of replicas, ignoring the min/max constraints applied later.\n\n", "fill": 1, "id": 13, "legend": { @@ -1001,7 +1001,83 @@ "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, - "span": 4, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "keda_metrics_adapter_scaler_metrics_value{metric=~\".*memory.*\"}\n/\non(metric) group_left label_replace(\n kube_horizontalpodautoscaler_spec_target_metric{cluster=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-ruler-querier\"},\n \"metric\", \"$1\", \"metric_name\", \"(.+)\"\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ scaledObject }}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Scaling metric (memory): Desired replicas", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### Autoscaler failures rate\nThe rate of failures in the KEDA custom metrics API server. Whenever an error occurs, the KEDA custom\nmetrics server is unable to query the scaling metric from Prometheus so the autoscaler does not work properly.\n\n", + "fill": 1, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, "stack": false, "steppedLine": false, "targets": [ diff --git a/operations/mimir-mixin-compiled/dashboards/mimir-remote-ruler-reads.json b/operations/mimir-mixin-compiled/dashboards/mimir-remote-ruler-reads.json index e7f9ea6222c..2b83b3e7422 100644 --- a/operations/mimir-mixin-compiled/dashboards/mimir-remote-ruler-reads.json +++ b/operations/mimir-mixin-compiled/dashboards/mimir-remote-ruler-reads.json @@ -835,7 +835,7 @@ "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, - "span": 4, + "span": 3, "stack": false, "steppedLine": false, "targets": [ @@ -925,7 +925,7 @@ "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, - "span": 4, + "span": 3, "stack": false, "steppedLine": false, "targets": [ @@ -979,7 +979,7 @@ "dashLength": 10, "dashes": false, "datasource": "$datasource", - "description": "### Autoscaler failures rate\nThe rate of failures in the KEDA custom metrics API server. Whenever an error occurs, the KEDA custom\nmetrics server is unable to query the scaling metric from Prometheus so the autoscaler does not work properly.\n\n", + "description": "### Scaling metric (memory): Desired replicas\nThis panel shows the scaling metric exposed by KEDA divided by the target/threshold used.\nIt should represent the desired number of replicas, ignoring the min/max constraints applied later.\n\n", "fill": 1, "id": 13, "legend": { @@ -1001,7 +1001,83 @@ "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, - "span": 4, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "keda_metrics_adapter_scaler_metrics_value{metric=~\".*memory.*\"}\n/\non(metric) group_left label_replace(\n kube_horizontalpodautoscaler_spec_target_metric{cluster=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-ruler-querier\"},\n \"metric\", \"$1\", \"metric_name\", \"(.+)\"\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ scaledObject }}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Scaling metric (memory): Desired replicas", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### Autoscaler failures rate\nThe rate of failures in the KEDA custom metrics API server. Whenever an error occurs, the KEDA custom\nmetrics server is unable to query the scaling metric from Prometheus so the autoscaler does not work properly.\n\n", + "fill": 1, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, "stack": false, "steppedLine": false, "targets": [ diff --git a/operations/mimir-mixin/dashboards/remote-ruler-reads.libsonnet b/operations/mimir-mixin/dashboards/remote-ruler-reads.libsonnet index 8311333182f..9c5ec29852a 100644 --- a/operations/mimir-mixin/dashboards/remote-ruler-reads.libsonnet +++ b/operations/mimir-mixin/dashboards/remote-ruler-reads.libsonnet @@ -150,6 +150,34 @@ local filename = 'mimir-remote-ruler-reads.json'; ||| ), ) + .addPanel( + local title = 'Scaling metric (memory): Desired replicas'; + $.panel(title) + + $.queryPanel( + [ + ||| + keda_metrics_adapter_scaler_metrics_value{metric=~".*memory.*"} + / + on(metric) group_left label_replace( + kube_horizontalpodautoscaler_spec_target_metric{%(namespace)s, horizontalpodautoscaler=~"%(hpa_name)s"}, + "metric", "$1", "metric_name", "(.+)" + ) + ||| % { + hpa_name: $._config.autoscaling.ruler_querier.hpa_name, + namespace: $.namespaceMatcher(), + }, + ], [ + '{{ scaledObject }}', + ] + ) + + $.panelDescription( + title, + ||| + This panel shows the scaling metric exposed by KEDA divided by the target/threshold used. + It should represent the desired number of replicas, ignoring the min/max constraints applied later. + ||| + ), + ) .addPanel( local title = 'Autoscaler failures rate'; $.panel(title) + diff --git a/operations/mimir-tests/test-autoscaling-custom-target-utilization-generated.yaml b/operations/mimir-tests/test-autoscaling-custom-target-utilization-generated.yaml index 1d8fa010de0..120fdd0b468 100644 --- a/operations/mimir-tests/test-autoscaling-custom-target-utilization-generated.yaml +++ b/operations/mimir-tests/test-autoscaling-custom-target-utilization-generated.yaml @@ -1948,6 +1948,15 @@ spec: max by (pod) (up{container="alertmanager",namespace="default"}) > 0 )[15m:] ) + + + sum( + sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="alertmanager", namespace="default", resource="memory"}[15m])) + and + max by (pod) (changes(kube_pod_container_status_restarts_total{container="alertmanager", namespace="default"}[15m]) > 0) + and + max by (pod) (kube_pod_container_status_last_terminated_reason{container="alertmanager", namespace="default", reason="OOMKilled"}) + or vector(0) + ) serverAddress: http://prometheus.default:9090/prometheus threshold: "9556302233" type: prometheus @@ -1995,6 +2004,15 @@ spec: max by (pod) (up{container="distributor",namespace="default"}) > 0 )[15m:] ) + + + sum( + sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="distributor", namespace="default", resource="memory"}[15m])) + and + max by (pod) (changes(kube_pod_container_status_restarts_total{container="distributor", namespace="default"}[15m]) > 0) + and + max by (pod) (kube_pod_container_status_last_terminated_reason{container="distributor", namespace="default", reason="OOMKilled"}) + or vector(0) + ) serverAddress: http://prometheus.default:9090/prometheus threshold: "3058016714" type: prometheus @@ -2069,6 +2087,15 @@ spec: max by (pod) (up{container="query-frontend",namespace="default"}) > 0 )[15m:] ) + + + sum( + sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="query-frontend", namespace="default", resource="memory"}[15m])) + and + max by (pod) (changes(kube_pod_container_status_restarts_total{container="query-frontend", namespace="default"}[15m]) > 0) + and + max by (pod) (kube_pod_container_status_last_terminated_reason{container="query-frontend", namespace="default", reason="OOMKilled"}) + or vector(0) + ) serverAddress: http://prometheus.default:9090/prometheus threshold: "559939584" type: prometheus @@ -2116,6 +2143,15 @@ spec: max by (pod) (up{container="ruler",namespace="default"}) > 0 )[15m:] ) + + + sum( + sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="ruler", namespace="default", resource="memory"}[15m])) + and + max by (pod) (changes(kube_pod_container_status_restarts_total{container="ruler", namespace="default"}[15m]) > 0) + and + max by (pod) (kube_pod_container_status_last_terminated_reason{container="ruler", namespace="default", reason="OOMKilled"}) + or vector(0) + ) serverAddress: http://prometheus.default:9090/prometheus threshold: "5733781340" type: prometheus @@ -2153,6 +2189,28 @@ spec: serverAddress: http://prometheus.default:9090/prometheus threshold: "178" type: prometheus + - metadata: + metricName: ruler_querier_memory_hpa_default + query: | + max_over_time( + sum( + sum by (pod) (container_memory_working_set_bytes{container="ruler-querier",namespace="default"}) + and + max by (pod) (up{container="ruler-querier",namespace="default"}) > 0 + )[15m:] + ) + + + sum( + sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="ruler-querier", namespace="default", resource="memory"}[15m])) + and + max by (pod) (changes(kube_pod_container_status_restarts_total{container="ruler-querier", namespace="default"}[15m]) > 0) + and + max by (pod) (kube_pod_container_status_last_terminated_reason{container="ruler-querier", namespace="default", reason="OOMKilled"}) + or vector(0) + ) + serverAddress: http://prometheus.default:9090/prometheus + threshold: "955630223" + type: prometheus --- apiVersion: keda.sh/v1alpha1 kind: ScaledObject @@ -2197,6 +2255,15 @@ spec: max by (pod) (up{container="ruler-query-frontend",namespace="default"}) > 0 )[15m:] ) + + + sum( + sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="ruler-query-frontend", namespace="default", resource="memory"}[15m])) + and + max by (pod) (changes(kube_pod_container_status_restarts_total{container="ruler-query-frontend", namespace="default"}[15m]) > 0) + and + max by (pod) (kube_pod_container_status_last_terminated_reason{container="ruler-query-frontend", namespace="default", reason="OOMKilled"}) + or vector(0) + ) serverAddress: http://prometheus.default:9090/prometheus threshold: "559939584" type: prometheus diff --git a/operations/mimir-tests/test-autoscaling-custom-target-utilization.jsonnet b/operations/mimir-tests/test-autoscaling-custom-target-utilization.jsonnet index bd975540368..709c7fbd06b 100644 --- a/operations/mimir-tests/test-autoscaling-custom-target-utilization.jsonnet +++ b/operations/mimir-tests/test-autoscaling-custom-target-utilization.jsonnet @@ -5,6 +5,7 @@ autoscaling_querier_target_utilization: targetUtilization, autoscaling_ruler_querier_cpu_target_utilization: targetUtilization, + autoscaling_ruler_querier_memory_target_utilization: targetUtilization, autoscaling_distributor_cpu_target_utilization: targetUtilization, autoscaling_distributor_memory_target_utilization: targetUtilization, autoscaling_ruler_cpu_target_utilization: targetUtilization, diff --git a/operations/mimir-tests/test-autoscaling-generated.yaml b/operations/mimir-tests/test-autoscaling-generated.yaml index bd0c4469d3d..08a181b440e 100644 --- a/operations/mimir-tests/test-autoscaling-generated.yaml +++ b/operations/mimir-tests/test-autoscaling-generated.yaml @@ -1948,6 +1948,15 @@ spec: max by (pod) (up{container="alertmanager",namespace="default"}) > 0 )[15m:] ) + + + sum( + sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="alertmanager", namespace="default", resource="memory"}[15m])) + and + max by (pod) (changes(kube_pod_container_status_restarts_total{container="alertmanager", namespace="default"}[15m]) > 0) + and + max by (pod) (kube_pod_container_status_last_terminated_reason{container="alertmanager", namespace="default", reason="OOMKilled"}) + or vector(0) + ) serverAddress: http://prometheus.default:9090/prometheus threshold: "10737418240" type: prometheus @@ -1995,6 +2004,15 @@ spec: max by (pod) (up{container="distributor",namespace="default"}) > 0 )[15m:] ) + + + sum( + sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="distributor", namespace="default", resource="memory"}[15m])) + and + max by (pod) (changes(kube_pod_container_status_restarts_total{container="distributor", namespace="default"}[15m]) > 0) + and + max by (pod) (kube_pod_container_status_last_terminated_reason{container="distributor", namespace="default", reason="OOMKilled"}) + or vector(0) + ) serverAddress: http://prometheus.default:9090/prometheus threshold: "3435973836" type: prometheus @@ -2069,6 +2087,15 @@ spec: max by (pod) (up{container="query-frontend",namespace="default"}) > 0 )[15m:] ) + + + sum( + sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="query-frontend", namespace="default", resource="memory"}[15m])) + and + max by (pod) (changes(kube_pod_container_status_restarts_total{container="query-frontend", namespace="default"}[15m]) > 0) + and + max by (pod) (kube_pod_container_status_last_terminated_reason{container="query-frontend", namespace="default", reason="OOMKilled"}) + or vector(0) + ) serverAddress: http://prometheus.default:9090/prometheus threshold: "629145600" type: prometheus @@ -2116,6 +2143,15 @@ spec: max by (pod) (up{container="ruler",namespace="default"}) > 0 )[15m:] ) + + + sum( + sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="ruler", namespace="default", resource="memory"}[15m])) + and + max by (pod) (changes(kube_pod_container_status_restarts_total{container="ruler", namespace="default"}[15m]) > 0) + and + max by (pod) (kube_pod_container_status_last_terminated_reason{container="ruler", namespace="default", reason="OOMKilled"}) + or vector(0) + ) serverAddress: http://prometheus.default:9090/prometheus threshold: "6442450944" type: prometheus @@ -2153,6 +2189,28 @@ spec: serverAddress: http://prometheus.default:9090/prometheus threshold: "200" type: prometheus + - metadata: + metricName: ruler_querier_memory_hpa_default + query: | + max_over_time( + sum( + sum by (pod) (container_memory_working_set_bytes{container="ruler-querier",namespace="default"}) + and + max by (pod) (up{container="ruler-querier",namespace="default"}) > 0 + )[15m:] + ) + + + sum( + sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="ruler-querier", namespace="default", resource="memory"}[15m])) + and + max by (pod) (changes(kube_pod_container_status_restarts_total{container="ruler-querier", namespace="default"}[15m]) > 0) + and + max by (pod) (kube_pod_container_status_last_terminated_reason{container="ruler-querier", namespace="default", reason="OOMKilled"}) + or vector(0) + ) + serverAddress: http://prometheus.default:9090/prometheus + threshold: "1073741824" + type: prometheus --- apiVersion: keda.sh/v1alpha1 kind: ScaledObject @@ -2197,6 +2255,15 @@ spec: max by (pod) (up{container="ruler-query-frontend",namespace="default"}) > 0 )[15m:] ) + + + sum( + sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="ruler-query-frontend", namespace="default", resource="memory"}[15m])) + and + max by (pod) (changes(kube_pod_container_status_restarts_total{container="ruler-query-frontend", namespace="default"}[15m]) > 0) + and + max by (pod) (kube_pod_container_status_last_terminated_reason{container="ruler-query-frontend", namespace="default", reason="OOMKilled"}) + or vector(0) + ) serverAddress: http://prometheus.default:9090/prometheus threshold: "629145600" type: prometheus diff --git a/operations/mimir/autoscaling.libsonnet b/operations/mimir/autoscaling.libsonnet index ca5e0556143..77497a34adc 100644 --- a/operations/mimir/autoscaling.libsonnet +++ b/operations/mimir/autoscaling.libsonnet @@ -11,6 +11,7 @@ autoscaling_ruler_querier_min_replicas: error 'you must set autoscaling_ruler_querier_min_replicas in the _config', autoscaling_ruler_querier_max_replicas: error 'you must set autoscaling_ruler_querier_max_replicas in the _config', autoscaling_ruler_querier_cpu_target_utilization: 1, + autoscaling_ruler_querier_memory_target_utilization: 1, autoscaling_distributor_enabled: false, autoscaling_distributor_min_replicas: error 'you must set autoscaling_distributor_min_replicas in the _config', @@ -193,6 +194,8 @@ // The "up" metrics correctly handles the stale marker when the pod is terminated, while it’s not the // case for the cAdvisor metrics. By intersecting these 2 metrics, we only look the memory utilization // of containers there are running at any given time, without suffering the PromQL lookback period. + // If a pod is terminated because it OOMs, we still want to scale up -- add the memory resource request of OOMing + // pods to the memory metric calculation. local memoryHPAQuery = ||| max_over_time( sum( @@ -201,9 +204,27 @@ max by (pod) (up{container="%(container)s",namespace="%(namespace)s"}) > 0 )[15m:] ) + + + sum( + sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="%(container)s", namespace="%(namespace)s", resource="memory"}[15m])) + and + max by (pod) (changes(kube_pod_container_status_restarts_total{container="%(container)s", namespace="%(namespace)s"}[15m]) > 0) + and + max by (pod) (kube_pod_container_status_last_terminated_reason{container="%(container)s", namespace="%(namespace)s", reason="OOMKilled"}) + or vector(0) + ) |||, - newQueryFrontendScaledObject(name, cpu_requests, memory_requests, min_replicas, max_replicas, cpu_target_utilization, memory_target_utilization):: self.newScaledObject( + + newQueryFrontendScaledObject( + name, + cpu_requests, + memory_requests, + min_replicas, + max_replicas, + cpu_target_utilization, + memory_target_utilization, + ):: self.newScaledObject( name, $._config.namespace, { min_replica_count: min_replicas, max_replica_count: max_replicas, @@ -312,7 +333,16 @@ // newRulerQuerierScaledObject will create a scaled object for the ruler-querier component with the given name. // `weight` param works in the same way as in `newQuerierScaledObject`, see docs there. - newRulerQuerierScaledObject(name, querier_cpu_requests, min_replicas, max_replicas, cpu_target_utilization, weight=1):: self.newScaledObject(name, $._config.namespace, { + newRulerQuerierScaledObject( + name, + querier_cpu_requests, + memory_requests, + min_replicas, + max_replicas, + cpu_target_utilization, + memory_target_utilization, + weight=1, + ):: self.newScaledObject(name, $._config.namespace, { min_replica_count: replicasWithWeight(min_replicas, weight), max_replica_count: replicasWithWeight(max_replicas, weight), @@ -326,6 +356,17 @@ // threshold is expected to be a string. threshold: std.toString(std.floor(cpuToMilliCPUInt(querier_cpu_requests) * cpu_target_utilization)), }, + { + metric_name: '%s_memory_hpa_%s' % [std.strReplace(name, '-', '_'), $._config.namespace], + + query: memoryHPAQuery % { + container: name, + namespace: $._config.namespace, + }, + + // Threshold is expected to be a string + threshold: std.toString(std.floor($.util.siToBytes(memory_requests) * memory_target_utilization)), + }, ], }), @@ -333,8 +374,10 @@ $.newRulerQuerierScaledObject( name='ruler-querier', querier_cpu_requests=$.ruler_querier_container.resources.requests.cpu, + memory_requests=$.ruler_querier_container.resources.requests.memory, min_replicas=$._config.autoscaling_ruler_querier_min_replicas, max_replicas=$._config.autoscaling_ruler_querier_max_replicas, + memory_target_utilization=$._config.autoscaling_ruler_querier_memory_target_utilization, cpu_target_utilization=$._config.autoscaling_ruler_querier_cpu_target_utilization, ), @@ -365,7 +408,15 @@ // Distributors // - newDistributorScaledObject(name, distributor_cpu_requests, distributor_memory_requests, min_replicas, max_replicas, cpu_target_utilization, memory_target_utilization):: self.newScaledObject(name, $._config.namespace, { + newDistributorScaledObject( + name, + distributor_cpu_requests, + distributor_memory_requests, + min_replicas, + max_replicas, + cpu_target_utilization, + memory_target_utilization, + ):: self.newScaledObject(name, $._config.namespace, { min_replica_count: min_replicas, max_replica_count: max_replicas, @@ -464,7 +515,15 @@ // Alertmanager - newAlertmanagerScaledObject(name, alertmanager_cpu_requests, alertmanager_memory_requests, min_replicas, max_replicas, cpu_target_utilization, memory_target_utilization):: self.newScaledObject(name, $._config.namespace, { + newAlertmanagerScaledObject( + name, + alertmanager_cpu_requests, + alertmanager_memory_requests, + min_replicas, + max_replicas, + cpu_target_utilization, + memory_target_utilization, + ):: self.newScaledObject(name, $._config.namespace, { min_replica_count: min_replicas, max_replica_count: max_replicas,