diff --git a/CHANGELOG.md b/CHANGELOG.md index a13d0955b..5e1e3a6c9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- Recording rules for Tempo + ## [2.138.2] - 2023-10-23 ### Added diff --git a/README.md b/README.md index c9d55aadd..f682572c7 100644 --- a/README.md +++ b/README.md @@ -95,10 +95,9 @@ Official documentation for inhibit rules can be found here: https://www.promethe The recording rules are located `helm/prometheus-rules/templates/recording-rules` - ### Mixin -#### kubermetes-mixins +#### kubernetes-mixins To Update `kubernetes-mixins` recording rules: @@ -114,6 +113,10 @@ Come as-is from https://github.com/grafana/mimir/tree/main/operations/mimir-mixi Come as-is from https://github.com/grafana/loki/tree/main/production/loki-mixin-compiled-ssd ; just added helm headers (metadata, spec...) +#### tempo-mixins + +Come as-is from https://github.com/grafana/tempo/tree/main/operations/tempo-mixin-compiled ; just added helm headers (metadata, spec...) + ### Testing You can run all tests by running `make test`. diff --git a/helm/prometheus-rules/templates/recording-rules/tempo-mixins.rules.yml b/helm/prometheus-rules/templates/recording-rules/tempo-mixins.rules.yml new file mode 100644 index 000000000..966175ae2 --- /dev/null +++ b/helm/prometheus-rules/templates/recording-rules/tempo-mixins.rules.yml @@ -0,0 +1,23 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: + {{- include "labels.common" . | nindent 4 }} + name: tempo.recording.rules + namespace: {{ .Values.namespace }} +spec: + groups: + - name: tempo_rules + rules: + - expr: "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket[1m])) by (le, cluster, namespace, job, route))" + record: "cluster_namespace_job_route:tempo_request_duration_seconds:99quantile" + - expr: "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket[1m])) by (le, cluster, namespace, job, route))" + record: "cluster_namespace_job_route:tempo_request_duration_seconds:50quantile" + - expr: "sum(rate(tempo_request_duration_seconds_sum[1m])) by (cluster, namespace, job, route) / sum(rate(tempo_request_duration_seconds_count[1m])) by (cluster, namespace, job, route)" + record: "cluster_namespace_job_route:tempo_request_duration_seconds:avg" + - expr: "sum(rate(tempo_request_duration_seconds_bucket[1m])) by (le, cluster, namespace, job, route)" + record: "cluster_namespace_job_route:tempo_request_duration_seconds_bucket:sum_rate" + - expr: "sum(rate(tempo_request_duration_seconds_sum[1m])) by (cluster, namespace, job, route)" + record: "cluster_namespace_job_route:tempo_request_duration_seconds_sum:sum_rate" + - expr: "sum(rate(tempo_request_duration_seconds_count[1m])) by (cluster, namespace, job, route)" + record: "cluster_namespace_job_route:tempo_request_duration_seconds_count:sum_rate"