From 02bd1a812ab4384acf2f607a9fdfead1a043df8b Mon Sep 17 00:00:00 2001 From: Andrew Seigner Date: Tue, 1 Mar 2022 08:20:49 -0800 Subject: [PATCH] Drop some latency buckets (#54) The majority of metrics sent by the agent are latency buckets. `linkerd-proxy` exports 25-bucket histograms: ``` 1, 2, 3, 4, 5, 10, 20, 30, 40, 50, 100, 200, 300, 400, 500, 1000, 2000, 3000, 4000, 5000, 10000, 20000, 30000, 40000, 50000, ``` Decrease the number of time series the agent sends by decreasing latency buckets from 25 to 10: ``` 1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 50000, ``` Also, modify the `buoyant-cloud-agent` scrape job to only opt-in specific metrics needed for debugging. Signed-off-by: Andrew Seigner --- .../templates/metrics-agent.yaml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/charts/linkerd-buoyant/templates/metrics-agent.yaml b/charts/linkerd-buoyant/templates/metrics-agent.yaml index f855a25..3e607e7 100644 --- a/charts/linkerd-buoyant/templates/metrics-agent.yaml +++ b/charts/linkerd-buoyant/templates/metrics-agent.yaml @@ -60,6 +60,11 @@ data: - action: labelmap regex: __meta_kubernetes_pod_label_(.+) + metric_relabel_configs: + - source_labels: [__name__] + regex: ^go_memstats_alloc_bytes|go_memstats_alloc_bytes_total|go_memstats_heap_inuse_bytes|go_memstats_stack_inuse_bytes|process_resident_memory_bytes|process_virtual_memory_bytes|up$ + action: keep + # scrape_configs copied from `linkerd install` - job_name: 'kubernetes-nodes-cadvisor' @@ -198,6 +203,12 @@ data: regex: 'response_latency_ms_bucket;outbound' action: drop + # drop some latency buckets + - source_labels: + - le + regex: ^2.*|3.*|4.*$ + action: drop + # drop high-cardinality outbound tcp open connections - source_labels: - __name__ @@ -315,6 +326,12 @@ data: - source_labels: [__name__] regex: ^gateway_alive|gateway_probe_latency_ms_bucket$ action: keep + + # drop some latency buckets + - source_labels: + - le + regex: ^2.*|3.*|4.*$ + action: drop --- # # Metrics Agent