Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix shotover_chain_messages_per_batch_count metric #1633

Merged
merged 2 commits into from
Sep 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion docs/src/user-guide/observability.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ This optional interface will serve Prometheus metrics from `/metrics`. It will b
| `shotover_chain_total_count` | `chain` | [counter](#counter) | Counts the amount of times `chain` is used |
| `shotover_chain_failures_count` | `chain` | [counter](#counter) | Counts the amount of times `chain` fails |
| `shotover_chain_latency_seconds` | `chain` | [histogram](#histogram) | The latency for running `chain` |
| `shotover_chain_messages_per_batch_count` | `chain` | [histogram](#histogram) | The number of messages in each batch passing through `chain`. |
| `shotover_chain_requests_batch_size` | `chain` | [histogram](#histogram) | The number of requests in each request batch passing through `chain`. |
| `shotover_chain_responses_batch_size` | `chain` | [histogram](#histogram) | The number of responses in each response batch passing through `chain`. |
| `shotover_available_connections_count` | `source` | [gauge](#gauge) | How many more connections can be opened to `source` before new connections will be rejected. |
| `connections_opened` | `source` | [counter](#counter) | Counts the total number of connections that clients have opened against this source. |
| `shotover_source_to_sink_latency_seconds` | `sink` | [histogram](#histogram) | The milliseconds between reading a request from a source TCP connection and writing it to a sink TCP connection |
Expand Down
22 changes: 22 additions & 0 deletions shotover-proxy/tests/runner/observability_int_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ async fn test_metrics() {
# TYPE shotover_available_connections_count gauge
# TYPE shotover_chain_failures_count counter
# TYPE shotover_chain_messages_per_batch_count summary
# TYPE shotover_chain_requests_batch_size summary
# TYPE shotover_chain_responses_batch_size summary
# TYPE shotover_chain_total_count counter
# TYPE shotover_query_count counter
# TYPE shotover_sink_to_source_latency_seconds summary
Expand All @@ -34,6 +36,26 @@ shotover_chain_messages_per_batch_count{chain="redis",quantile="0.95"}
shotover_chain_messages_per_batch_count{chain="redis",quantile="0.99"}
shotover_chain_messages_per_batch_count{chain="redis",quantile="0.999"}
shotover_chain_messages_per_batch_count{chain="redis",quantile="1"}
shotover_chain_requests_batch_size_count{chain="redis"}
shotover_chain_requests_batch_size_sum{chain="redis"}
shotover_chain_requests_batch_size{chain="redis",quantile="0"}
shotover_chain_requests_batch_size{chain="redis",quantile="0.1"}
shotover_chain_requests_batch_size{chain="redis",quantile="0.5"}
shotover_chain_requests_batch_size{chain="redis",quantile="0.9"}
shotover_chain_requests_batch_size{chain="redis",quantile="0.95"}
shotover_chain_requests_batch_size{chain="redis",quantile="0.99"}
shotover_chain_requests_batch_size{chain="redis",quantile="0.999"}
shotover_chain_requests_batch_size{chain="redis",quantile="1"}
shotover_chain_responses_batch_size_count{chain="redis"}
shotover_chain_responses_batch_size_sum{chain="redis"}
shotover_chain_responses_batch_size{chain="redis",quantile="0"}
shotover_chain_responses_batch_size{chain="redis",quantile="0.1"}
shotover_chain_responses_batch_size{chain="redis",quantile="0.5"}
shotover_chain_responses_batch_size{chain="redis",quantile="0.9"}
shotover_chain_responses_batch_size{chain="redis",quantile="0.95"}
shotover_chain_responses_batch_size{chain="redis",quantile="0.99"}
shotover_chain_responses_batch_size{chain="redis",quantile="0.999"}
shotover_chain_responses_batch_size{chain="redis",quantile="1"}
shotover_chain_total_count{chain="redis"}
shotover_query_count{name="redis-chain"}
shotover_sink_to_source_latency_seconds_count{source="redis"}
Expand Down
39 changes: 28 additions & 11 deletions shotover/src/transforms/chain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ pub struct TransformChain {

chain_total: Counter,
chain_failures: Counter,
chain_batch_size: Histogram,
chain_requests_batch_size: Histogram,
chain_responses_batch_size: Histogram,
chain_latency_seconds: Histogram,
}

Expand Down Expand Up @@ -165,12 +166,21 @@ impl TransformChain {
let start = Instant::now();
chain_state.reset(&mut self.chain);

self.chain_batch_size
.record(chain_state.requests.len() as f64);
if !chain_state.requests.is_empty() {
self.chain_requests_batch_size
.record(chain_state.requests.len() as f64);
}

let result = chain_state.call_next_transform().await;
self.chain_total.increment(1);
if result.is_err() {
self.chain_failures.increment(1);
match &result {
Ok(responses) => {
if !responses.is_empty() {
self.chain_responses_batch_size
.record(responses.len() as f64);
}
}
Err(_) => self.chain_failures.increment(1),
}

self.chain_latency_seconds.record(start.elapsed());
Expand Down Expand Up @@ -221,7 +231,8 @@ pub struct TransformChainBuilder {

chain_total: Counter,
chain_failures: Counter,
chain_batch_size: Histogram,
chain_responses_batch_size: Histogram,
chain_requests_batch_size: Histogram,
}

impl TransformChainBuilder {
Expand All @@ -235,18 +246,23 @@ impl TransformChainBuilder {
}
).collect();

let chain_batch_size =
histogram!("shotover_chain_messages_per_batch_count", "chain" => name);
// This is deprecated but give users some time to migrate to the requests/responses versions that have replaced this metric
histogram!("shotover_chain_messages_per_batch_count", "chain" => name).record(0);

let chain_requests_batch_size =
histogram!("shotover_chain_requests_batch_size", "chain" => name);
let chain_responses_batch_size =
histogram!("shotover_chain_responses_batch_size", "chain" => name);
let chain_total = counter!("shotover_chain_total_count", "chain" => name);
let chain_failures = counter!("shotover_chain_failures_count", "chain" => name);
// Cant register shotover_chain_latency_seconds because a unique one is created for each client ip address

TransformChainBuilder {
name,
chain,
chain_total,
chain_failures,
chain_batch_size,
chain_requests_batch_size,
chain_responses_batch_size,
}
}

Expand Down Expand Up @@ -376,7 +392,8 @@ impl TransformChainBuilder {
chain,
chain_total: self.chain_total.clone(),
chain_failures: self.chain_failures.clone(),
chain_batch_size: self.chain_batch_size.clone(),
chain_requests_batch_size: self.chain_requests_batch_size.clone(),
chain_responses_batch_size: self.chain_responses_batch_size.clone(),
chain_latency_seconds: histogram!(
"shotover_chain_latency_seconds",
"chain" => self.name,
Expand Down
Loading