Skip to content

Commit

Permalink
make build-mixin
Browse files Browse the repository at this point in the history
  • Loading branch information
alex5517 committed May 22, 2024
1 parent 32feab0 commit 663aa32
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 11 deletions.
10 changes: 5 additions & 5 deletions operations/mimir-mixin-compiled-baremetal/alerts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ groups:
message: Mimir store-gateway {{ $labels.instance }} in {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ $value | humanizePercentage }} errors while doing {{ $labels.operation }} on the object storage.
runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirstoregatewaytoomanyfailedoperations
expr: |
sum by(cluster, namespace, operation) (rate(thanos_objstore_bucket_operation_failures_total{component="store-gateway"}[1m])) > 0
sum by(cluster, namespace, instance, operation) (rate(thanos_objstore_bucket_operation_failures_total{component="store-gateway"}[1m])) > 0
for: 5m
labels:
severity: warning
Expand Down Expand Up @@ -697,10 +697,10 @@ groups:
runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbwalcorrupted
expr: |
# alert when there are more than one corruptions
count by (cluster, namespace) (rate(cortex_ingester_tsdb_wal_corruptions_total[5m]) > 0) > 1
count by (cluster, namespace, instance) (rate(cortex_ingester_tsdb_wal_corruptions_total[5m]) > 0) > 1
and
# and there is only one zone
count by (cluster, namespace) (group by (cluster, namespace, job) (cortex_ingester_tsdb_wal_corruptions_total)) == 1
count by (cluster, namespace, instance) (group by (cluster, namespace, instance, job) (cortex_ingester_tsdb_wal_corruptions_total)) == 1
labels:
deployment: single-zone
severity: critical
Expand All @@ -710,10 +710,10 @@ groups:
runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbwalcorrupted
expr: |
# alert when there are more than one corruptions
count by (cluster, namespace) (sum by (cluster, namespace, job) (rate(cortex_ingester_tsdb_wal_corruptions_total[5m]) > 0)) > 1
count by (cluster, namespace, instance) (sum by (cluster, namespace, instance, job) (rate(cortex_ingester_tsdb_wal_corruptions_total[5m]) > 0)) > 1
and
# and there are multiple zones
count by (cluster, namespace) (group by (cluster, namespace, job) (cortex_ingester_tsdb_wal_corruptions_total)) > 1
count by (cluster, namespace, instance) (group by (cluster, namespace, instance, job) (cortex_ingester_tsdb_wal_corruptions_total)) > 1
labels:
deployment: multi-zone
severity: critical
Expand Down
12 changes: 6 additions & 6 deletions operations/mimir-mixin-compiled/alerts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ groups:
message: Mimir store-gateway {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ $value | humanizePercentage }} errors while doing {{ $labels.operation }} on the object storage.
runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirstoregatewaytoomanyfailedoperations
expr: |
sum by(cluster, namespace, operation) (rate(thanos_objstore_bucket_operation_failures_total{component="store-gateway"}[1m])) > 0
sum by(cluster, namespace, pod, operation) (rate(thanos_objstore_bucket_operation_failures_total{component="store-gateway"}[1m])) > 0
for: 5m
labels:
severity: warning
Expand Down Expand Up @@ -711,10 +711,10 @@ groups:
runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbwalcorrupted
expr: |
# alert when there are more than one corruptions
count by (cluster, namespace) (rate(cortex_ingester_tsdb_wal_corruptions_total[5m]) > 0) > 1
count by (cluster, namespace, pod) (rate(cortex_ingester_tsdb_wal_corruptions_total[5m]) > 0) > 1
and
# and there is only one zone
count by (cluster, namespace) (group by (cluster, namespace, job) (cortex_ingester_tsdb_wal_corruptions_total)) == 1
count by (cluster, namespace, pod) (group by (cluster, namespace, pod, job) (cortex_ingester_tsdb_wal_corruptions_total)) == 1
labels:
deployment: single-zone
severity: critical
Expand All @@ -724,10 +724,10 @@ groups:
runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringestertsdbwalcorrupted
expr: |
# alert when there are more than one corruptions
count by (cluster, namespace) (sum by (cluster, namespace, job) (rate(cortex_ingester_tsdb_wal_corruptions_total[5m]) > 0)) > 1
count by (cluster, namespace, pod) (sum by (cluster, namespace, pod, job) (rate(cortex_ingester_tsdb_wal_corruptions_total[5m]) > 0)) > 1
and
# and there are multiple zones
count by (cluster, namespace) (group by (cluster, namespace, job) (cortex_ingester_tsdb_wal_corruptions_total)) > 1
count by (cluster, namespace, pod) (group by (cluster, namespace, pod, job) (cortex_ingester_tsdb_wal_corruptions_total)) > 1
labels:
deployment: multi-zone
severity: critical
Expand Down Expand Up @@ -982,9 +982,9 @@ groups:
runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterstuckprocessingrecordsfromkafka
expr: |
# Alert if the reader is not processing any records, but there buffered records to process in the Kafka client.
# NOTE: the cortex_ingest_storage_reader_buffered_fetch_records_total metric is a gauge showing the current number of buffered records.
(sum by (cluster, namespace, pod) (rate(cortex_ingest_storage_reader_records_total[5m])) == 0)
and
# NOTE: the cortex_ingest_storage_reader_buffered_fetch_records_total metric is a gauge showing the current number of buffered records.
(sum by (cluster, namespace, pod) (cortex_ingest_storage_reader_buffered_fetch_records_total) > 0)
for: 5m
labels:
Expand Down

0 comments on commit 663aa32

Please sign in to comment.