From 58d7d5e563411d45ecab36d0573f7f19f1908b58 Mon Sep 17 00:00:00 2001 From: dkirov-dd <166512750+dkirov-dd@users.noreply.github.com> Date: Tue, 27 Aug 2024 11:08:26 +0200 Subject: [PATCH] Add monitor messages (#18418) --- aws_neuron/assets/monitors/execution_errors.json | 2 +- aws_neuron/assets/monitors/execution_latency.json | 2 +- aws_neuron/assets/monitors/neuron_runtime_vcpu.json | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/aws_neuron/assets/monitors/execution_errors.json b/aws_neuron/assets/monitors/execution_errors.json index d442b5f62f36b..6f2048b78c13c 100644 --- a/aws_neuron/assets/monitors/execution_errors.json +++ b/aws_neuron/assets/monitors/execution_errors.json @@ -8,7 +8,7 @@ "name": "High amount of execution errors on {{instance_id.name}}", "type": "query alert", "query": "sum(last_5m):sum:aws_neuron.execution.errors.count{*} by {instance_id}.as_count() > 10", - "message": "", + "message": "{{#is_alert}}{{instance_id.name}} has experienced {{value}} execution errors over the last 5 minutes.{{/is_alert}}\n{{#is_recovery}}The number of execution errors on {{instance_id.name}} is back below the threshold of {{threshold}}.{{/is_recovery}}", "tags": [ "integration:aws_neuron" ], diff --git a/aws_neuron/assets/monitors/execution_latency.json b/aws_neuron/assets/monitors/execution_latency.json index 3c64319148fc4..763fba6f72a21 100644 --- a/aws_neuron/assets/monitors/execution_latency.json +++ b/aws_neuron/assets/monitors/execution_latency.json @@ -8,7 +8,7 @@ "name": "Latency is high on {{instance_id.name}} ", "type": "query alert", "query": "avg(last_5m):avg:aws_neuron.execution.latency_seconds{*} by {instance_id} > 2", - "message": "", + "message": "{{#is_alert}}The average execution latency on {{instance_id.name}} has been {{value}} over the past 5 minutes.{{/is_alert}}\n{{#is_recovery}}The execution latency of {{instance_id.name}} has recovered below the threshold of {{threshold}}.{{/is_recovery}}", "tags": [ "integration:aws_neuron" ], diff --git a/aws_neuron/assets/monitors/neuron_runtime_vcpu.json b/aws_neuron/assets/monitors/neuron_runtime_vcpu.json index 0de630711279f..6f62ebc1cb15b 100644 --- a/aws_neuron/assets/monitors/neuron_runtime_vcpu.json +++ b/aws_neuron/assets/monitors/neuron_runtime_vcpu.json @@ -8,7 +8,7 @@ "name": "The Neuron Runtime has high vCPU usage on {{instance_id.name}}", "type": "query alert", "query": "avg(last_5m):avg:aws_neuron.neuron_runtime.vcpu_usage_ratio{*} by {instance_id} > 0.8", - "message": "", + "message": "{{#is_alert}}The average vCPU usage on {{instance_id.name}} has been {{value}} over the past 5 minutes.{{/is_alert}}\n{{#is_recovery}}The average vCPU usage of {{instance_id.name}} has recovered below the threshold of {{threshold}}.{{/is_recovery}}", "tags": [ "integration:aws_neuron" ],