From 91c93ef83ce67caa3cd0f4f3343a2f016b97aa8d Mon Sep 17 00:00:00 2001 From: Justin Lei Date: Thu, 30 Nov 2023 07:57:53 -0800 Subject: [PATCH] Remove alert and runbook for bucket scanning --- .../mimir/manage/mimir-runbooks/_index.md | 8 -------- .../templates/metamonitoring/mixin-alerts.yaml | 13 ------------- .../mimir-mixin-compiled-baremetal/alerts.yaml | 13 ------------- operations/mimir-mixin-compiled/alerts.yaml | 13 ------------- operations/mimir-mixin/alerts/blocks.libsonnet | 16 ---------------- 5 files changed, 63 deletions(-) diff --git a/docs/sources/mimir/manage/mimir-runbooks/_index.md b/docs/sources/mimir/manage/mimir-runbooks/_index.md index be88e981db9..cc5ad3d3a23 100644 --- a/docs/sources/mimir/manage/mimir-runbooks/_index.md +++ b/docs/sources/mimir/manage/mimir-runbooks/_index.md @@ -522,14 +522,6 @@ How to **fix** it: - Set the shard size of one or more tenants to `0`; this will shard the given tenant's rule groups across all ingesters. - Decrease the total number of ruler replicas by the number of idle replicas. -### MimirQuerierHasNotScanTheBucket - -This alert fires when a Mimir querier is not successfully scanning blocks in the storage (bucket). A querier is expected to periodically iterate the bucket to find new and deleted blocks (defaults to every 5m) and if it's not successfully synching the bucket since a long time, it may end up querying only a subset of blocks, thus leading to potentially partial results. - -How to **investigate**: - -- Look for any scan error in the querier logs (ie. networking or rate limiting issues) - ### MimirStoreGatewayHasNotSyncTheBucket This alert fires when a Mimir store-gateway is not successfully scanning blocks in the storage (bucket). A store-gateway is expected to periodically iterate the bucket to find new and deleted blocks (defaults to every 5m) and if it's not successfully synching the bucket for a long time, it may end up querying only a subset of blocks, thus leading to potentially partial results. diff --git a/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/metamonitoring/mixin-alerts.yaml b/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/metamonitoring/mixin-alerts.yaml index bb355dbf831..e6b3aeed980 100644 --- a/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/metamonitoring/mixin-alerts.yaml +++ b/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/metamonitoring/mixin-alerts.yaml @@ -772,19 +772,6 @@ spec: for: 3m labels: severity: critical - - alert: MimirQuerierHasNotScanTheBucket - annotations: - message: Mimir Querier {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not successfully scanned the bucket since {{ $value | humanizeDuration - }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirquerierhasnotscanthebucket - expr: | - (time() - cortex_querier_blocks_last_successful_scan_timestamp_seconds > 60 * 30) - and - cortex_querier_blocks_last_successful_scan_timestamp_seconds > 0 - for: 5m - labels: - severity: critical - alert: MimirStoreGatewayHasNotSyncTheBucket annotations: message: Mimir store-gateway {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace diff --git a/operations/mimir-mixin-compiled-baremetal/alerts.yaml b/operations/mimir-mixin-compiled-baremetal/alerts.yaml index 3964924c774..a3665d747e0 100644 --- a/operations/mimir-mixin-compiled-baremetal/alerts.yaml +++ b/operations/mimir-mixin-compiled-baremetal/alerts.yaml @@ -746,19 +746,6 @@ groups: for: 3m labels: severity: critical - - alert: MimirQuerierHasNotScanTheBucket - annotations: - message: Mimir Querier {{ $labels.instance }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not successfully scanned the bucket since {{ $value | humanizeDuration - }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirquerierhasnotscanthebucket - expr: | - (time() - cortex_querier_blocks_last_successful_scan_timestamp_seconds > 60 * 30) - and - cortex_querier_blocks_last_successful_scan_timestamp_seconds > 0 - for: 5m - labels: - severity: critical - alert: MimirStoreGatewayHasNotSyncTheBucket annotations: message: Mimir store-gateway {{ $labels.instance }} in {{ $labels.cluster }}/{{ diff --git a/operations/mimir-mixin-compiled/alerts.yaml b/operations/mimir-mixin-compiled/alerts.yaml index 8ed8aa48abe..74448c596f0 100644 --- a/operations/mimir-mixin-compiled/alerts.yaml +++ b/operations/mimir-mixin-compiled/alerts.yaml @@ -760,19 +760,6 @@ groups: for: 3m labels: severity: critical - - alert: MimirQuerierHasNotScanTheBucket - annotations: - message: Mimir Querier {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace - }} has not successfully scanned the bucket since {{ $value | humanizeDuration - }}. - runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirquerierhasnotscanthebucket - expr: | - (time() - cortex_querier_blocks_last_successful_scan_timestamp_seconds > 60 * 30) - and - cortex_querier_blocks_last_successful_scan_timestamp_seconds > 0 - for: 5m - labels: - severity: critical - alert: MimirStoreGatewayHasNotSyncTheBucket annotations: message: Mimir store-gateway {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace diff --git a/operations/mimir-mixin/alerts/blocks.libsonnet b/operations/mimir-mixin/alerts/blocks.libsonnet index d2404f637e4..2c969bf24d8 100644 --- a/operations/mimir-mixin/alerts/blocks.libsonnet +++ b/operations/mimir-mixin/alerts/blocks.libsonnet @@ -183,22 +183,6 @@ message: '%(product)s Ingester %(alert_instance_variable)s in %(alert_aggregation_variables)s is failing to write to TSDB WAL.' % $._config, }, }, - { - // Alert if the querier is not successfully scanning the bucket. - alert: $.alertName('QuerierHasNotScanTheBucket'), - 'for': '5m', - expr: ||| - (time() - cortex_querier_blocks_last_successful_scan_timestamp_seconds > 60 * 30) - and - cortex_querier_blocks_last_successful_scan_timestamp_seconds > 0 - |||, - labels: { - severity: 'critical', - }, - annotations: { - message: '%(product)s Querier %(alert_instance_variable)s in %(alert_aggregation_variables)s has not successfully scanned the bucket since {{ $value | humanizeDuration }}.' % $._config, - }, - }, { // Alert if the store-gateway is not successfully synching the bucket. alert: $.alertName('StoreGatewayHasNotSyncTheBucket'),