Skip to content

Commit

Permalink
fix shotover_chain_messages_per_batch_count metric (#1633)
Browse files Browse the repository at this point in the history
  • Loading branch information
rukai authored Sep 23, 2024
1 parent 84189fc commit fca032a
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 12 deletions.
3 changes: 2 additions & 1 deletion docs/src/user-guide/observability.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ This optional interface will serve Prometheus metrics from `/metrics`. It will b
| `shotover_chain_total_count` | `chain` | [counter](#counter) | Counts the amount of times `chain` is used |
| `shotover_chain_failures_count` | `chain` | [counter](#counter) | Counts the amount of times `chain` fails |
| `shotover_chain_latency_seconds` | `chain` | [histogram](#histogram) | The latency for running `chain` |
| `shotover_chain_messages_per_batch_count` | `chain` | [histogram](#histogram) | The number of messages in each batch passing through `chain`. |
| `shotover_chain_requests_batch_size` | `chain` | [histogram](#histogram) | The number of requests in each request batch passing through `chain`. |
| `shotover_chain_responses_batch_size` | `chain` | [histogram](#histogram) | The number of responses in each response batch passing through `chain`. |
| `shotover_available_connections_count` | `source` | [gauge](#gauge) | How many more connections can be opened to `source` before new connections will be rejected. |
| `connections_opened` | `source` | [counter](#counter) | Counts the total number of connections that clients have opened against this source. |
| `shotover_source_to_sink_latency_seconds` | `sink` | [histogram](#histogram) | The milliseconds between reading a request from a source TCP connection and writing it to a sink TCP connection |
Expand Down
22 changes: 22 additions & 0 deletions shotover-proxy/tests/runner/observability_int_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ async fn test_metrics() {
# TYPE shotover_available_connections_count gauge
# TYPE shotover_chain_failures_count counter
# TYPE shotover_chain_messages_per_batch_count summary
# TYPE shotover_chain_requests_batch_size summary
# TYPE shotover_chain_responses_batch_size summary
# TYPE shotover_chain_total_count counter
# TYPE shotover_query_count counter
# TYPE shotover_sink_to_source_latency_seconds summary
Expand All @@ -34,6 +36,26 @@ shotover_chain_messages_per_batch_count{chain="redis",quantile="0.95"}
shotover_chain_messages_per_batch_count{chain="redis",quantile="0.99"}
shotover_chain_messages_per_batch_count{chain="redis",quantile="0.999"}
shotover_chain_messages_per_batch_count{chain="redis",quantile="1"}
shotover_chain_requests_batch_size_count{chain="redis"}
shotover_chain_requests_batch_size_sum{chain="redis"}
shotover_chain_requests_batch_size{chain="redis",quantile="0"}
shotover_chain_requests_batch_size{chain="redis",quantile="0.1"}
shotover_chain_requests_batch_size{chain="redis",quantile="0.5"}
shotover_chain_requests_batch_size{chain="redis",quantile="0.9"}
shotover_chain_requests_batch_size{chain="redis",quantile="0.95"}
shotover_chain_requests_batch_size{chain="redis",quantile="0.99"}
shotover_chain_requests_batch_size{chain="redis",quantile="0.999"}
shotover_chain_requests_batch_size{chain="redis",quantile="1"}
shotover_chain_responses_batch_size_count{chain="redis"}
shotover_chain_responses_batch_size_sum{chain="redis"}
shotover_chain_responses_batch_size{chain="redis",quantile="0"}
shotover_chain_responses_batch_size{chain="redis",quantile="0.1"}
shotover_chain_responses_batch_size{chain="redis",quantile="0.5"}
shotover_chain_responses_batch_size{chain="redis",quantile="0.9"}
shotover_chain_responses_batch_size{chain="redis",quantile="0.95"}
shotover_chain_responses_batch_size{chain="redis",quantile="0.99"}
shotover_chain_responses_batch_size{chain="redis",quantile="0.999"}
shotover_chain_responses_batch_size{chain="redis",quantile="1"}
shotover_chain_total_count{chain="redis"}
shotover_query_count{name="redis-chain"}
shotover_sink_to_source_latency_seconds_count{source="redis"}
Expand Down
39 changes: 28 additions & 11 deletions shotover/src/transforms/chain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ pub struct TransformChain {

chain_total: Counter,
chain_failures: Counter,
chain_batch_size: Histogram,
chain_requests_batch_size: Histogram,
chain_responses_batch_size: Histogram,
chain_latency_seconds: Histogram,
}

Expand Down Expand Up @@ -165,12 +166,21 @@ impl TransformChain {
let start = Instant::now();
chain_state.reset(&mut self.chain);

self.chain_batch_size
.record(chain_state.requests.len() as f64);
if !chain_state.requests.is_empty() {
self.chain_requests_batch_size
.record(chain_state.requests.len() as f64);
}

let result = chain_state.call_next_transform().await;
self.chain_total.increment(1);
if result.is_err() {
self.chain_failures.increment(1);
match &result {
Ok(responses) => {
if !responses.is_empty() {
self.chain_responses_batch_size
.record(responses.len() as f64);
}
}
Err(_) => self.chain_failures.increment(1),
}

self.chain_latency_seconds.record(start.elapsed());
Expand Down Expand Up @@ -221,7 +231,8 @@ pub struct TransformChainBuilder {

chain_total: Counter,
chain_failures: Counter,
chain_batch_size: Histogram,
chain_responses_batch_size: Histogram,
chain_requests_batch_size: Histogram,
}

impl TransformChainBuilder {
Expand All @@ -235,18 +246,23 @@ impl TransformChainBuilder {
}
).collect();

let chain_batch_size =
histogram!("shotover_chain_messages_per_batch_count", "chain" => name);
// This is deprecated but give users some time to migrate to the requests/responses versions that have replaced this metric
histogram!("shotover_chain_messages_per_batch_count", "chain" => name).record(0);

let chain_requests_batch_size =
histogram!("shotover_chain_requests_batch_size", "chain" => name);
let chain_responses_batch_size =
histogram!("shotover_chain_responses_batch_size", "chain" => name);
let chain_total = counter!("shotover_chain_total_count", "chain" => name);
let chain_failures = counter!("shotover_chain_failures_count", "chain" => name);
// Cant register shotover_chain_latency_seconds because a unique one is created for each client ip address

TransformChainBuilder {
name,
chain,
chain_total,
chain_failures,
chain_batch_size,
chain_requests_batch_size,
chain_responses_batch_size,
}
}

Expand Down Expand Up @@ -376,7 +392,8 @@ impl TransformChainBuilder {
chain,
chain_total: self.chain_total.clone(),
chain_failures: self.chain_failures.clone(),
chain_batch_size: self.chain_batch_size.clone(),
chain_requests_batch_size: self.chain_requests_batch_size.clone(),
chain_responses_batch_size: self.chain_responses_batch_size.clone(),
chain_latency_seconds: histogram!(
"shotover_chain_latency_seconds",
"chain" => self.name,
Expand Down

0 comments on commit fca032a

Please sign in to comment.