Skip to content

Commit

Permalink
Feature: Add new metric request_throughput (#619)
Browse files Browse the repository at this point in the history
* Feature: Add new metric `slow_request_server_throughput` to track the throughput of slow queries.

* Refactor variables and config keys, add logic to avoid unnecesary observations

* Remove "Slow" from Throughput related variables.
Extract header parsing into a dedicated function.

* Added testing for `ExtractValueFromMultiValueHeader()` and `ThroughputMetricHistogram()`

* Updated parsing to follow `Server-Timing` syntax format.

* Update config key definitions

Co-authored-by: Dimitar Dimitrov <[email protected]>

* Refactor code, add tests, rename RequestCutoff to LatencyCutoff

* Fixed linting

---------

Co-authored-by: Dimitar Dimitrov <[email protected]>
  • Loading branch information
tinitiuset and dimitarvdimitrov authored Nov 25, 2024
1 parent 22cde92 commit 77bb9dd
Show file tree
Hide file tree
Showing 5 changed files with 274 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
## Changelog

* [CHANGE] Add new metric `slow_request_server_throughput` to track the throughput of slow queries. #619
* [CHANGE] Log middleware updated to honor `logRequestHeaders` in all logging scenarios. #615
* [CHANGE] Roll back the gRPC dependency to v1.65.0 to allow downstream projects to avoid a performance regression and maybe a bug in v1.66.0. #581
* [CHANGE] Update the gRPC dependency to v1.66.0 and deprecate the `grpc_server_recv_buffer_pools_enabled` option that is no longer supported by it. #580
Expand Down
36 changes: 36 additions & 0 deletions middleware/instrument.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@ package middleware

import (
"context"
"fmt"
"io"
"net/http"
"strconv"
"strings"
"time"

"github.com/felixge/httpsnoop"
"github.com/gorilla/mux"
Expand Down Expand Up @@ -50,6 +52,9 @@ type Instrument struct {
RequestBodySize *prometheus.HistogramVec
ResponseBodySize *prometheus.HistogramVec
InflightRequests *prometheus.GaugeVec
LatencyCutoff time.Duration
ThroughputUnit string
RequestThroughput *prometheus.HistogramVec
}

// IsWSHandshakeRequest returns true if the given request is a websocket handshake request.
Expand Down Expand Up @@ -105,9 +110,40 @@ func (i Instrument) Wrap(next http.Handler) http.Handler {
labelValues = append(labelValues, tenantID)
instrument.ObserveWithExemplar(r.Context(), i.PerTenantDuration.WithLabelValues(labelValues...), respMetrics.Duration.Seconds())
}
if i.LatencyCutoff > 0 && respMetrics.Duration > i.LatencyCutoff {
volume, err := extractValueFromMultiValueHeader(w.Header().Get("Server-Timing"), i.ThroughputUnit, "val")
if err == nil {
instrument.ObserveWithExemplar(r.Context(), i.RequestThroughput.WithLabelValues(r.Method, route), volume/respMetrics.Duration.Seconds())
}
}
})
}

// Extracts a single value from a multi-value header, e.g. "name0;key0=0.0;key1=1.1, name1;key0=1.1"
func extractValueFromMultiValueHeader(h, name string, key string) (float64, error) {
parts := strings.Split(h, ", ")
if len(parts) == 0 {
return 0, fmt.Errorf("not a multi-value header")
}
for _, part := range parts {
if part, found := strings.CutPrefix(part, name); found {
for _, spart := range strings.Split(part, ";") {
if !strings.HasPrefix(spart, key) {
continue
}
var value float64
_, err := fmt.Sscanf(spart, key+"=%f", &value)
if err != nil {
return 0, fmt.Errorf("failed to parse value from header: %w", err)
}
return value, nil
}
}

}
return 0, fmt.Errorf("desired name not found in header")
}

// Return a name identifier for ths request. There are three options:
// 1. The request matches a gorilla mux route, with a name. Use that.
// 2. The request matches an unamed gorilla mux router. Munge the path
Expand Down
213 changes: 213 additions & 0 deletions middleware/instrument_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
package middleware

import (
"net/http"
"net/http/httptest"
"strconv"
"strings"
"testing"
"time"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/prometheus/client_golang/prometheus/testutil"
"github.com/stretchr/testify/require"

"github.com/grafana/dskit/instrument"
)

func TestThroughputMetricHistogram(t *testing.T) {
tests := []struct {
testName string
sleep bool
header string
observed bool
}{
{
testName: "WithSleep",
sleep: true,
header: "unit;val=0, other_unit;val=2",
observed: true,
},
{
testName: "WithoutSleep",
sleep: false,
header: "unit;val=0, other_unit;val=2",
observed: false,
},
{
testName: "WithSleepEmptyHeader",
sleep: true,
header: "",
observed: false,
},
{
testName: "WithoutSleepEmptyHeader",
sleep: false,
header: "",
observed: false,
},
}

for _, tt := range tests {
t.Run(tt.testName, func(t *testing.T) {

reg := prometheus.NewPedanticRegistry()
i := newInstrument(reg)

wrap := i.Wrap(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
if tt.sleep {
time.Sleep(i.LatencyCutoff)
}
w.Header().Set("Server-Timing", tt.header)
}))

req := httptest.NewRequest("GET", "/", nil)
res := httptest.NewRecorder()

wrap.ServeHTTP(res, req)

output := ``
if tt.observed {
output = `
# HELP request_throughput_unit Server throughput running requests.
# TYPE request_throughput_unit histogram
request_throughput_unit_bucket{cutoff_ms="100",method="GET",route="other",le="1"} 1
request_throughput_unit_bucket{cutoff_ms="100",method="GET",route="other",le="5"} 1
request_throughput_unit_bucket{cutoff_ms="100",method="GET",route="other",le="10"} 1
request_throughput_unit_bucket{cutoff_ms="100",method="GET",route="other",le="+Inf"} 1
request_throughput_unit_sum{cutoff_ms="100",method="GET",route="other"} 0
request_throughput_unit_count{cutoff_ms="100",method="GET",route="other"} 1
`
}

require.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(output), "request_throughput_"+i.ThroughputUnit))
})
}
}

func newInstrument(registry *prometheus.Registry) Instrument {
reg := promauto.With(registry)

const throughputUnit = "unit"
const LatencyCutoff = 100 * time.Millisecond

return Instrument{
Duration: reg.NewHistogramVec(prometheus.HistogramOpts{
Name: "request_duration_seconds",
Help: "Time (in seconds) spent serving HTTP requests.",
Buckets: instrument.DefBuckets,
}, []string{"method", "route", "status_code", "ws"}),
PerTenantDuration: reg.NewHistogramVec(prometheus.HistogramOpts{
Name: "per_tenant_request_duration_seconds",
Help: "Time (in seconds) spent serving HTTP requests for a particular tenant.",
Buckets: instrument.DefBuckets,
}, []string{"method", "route", "status_code", "ws", "tenant"}),
RequestBodySize: reg.NewHistogramVec(prometheus.HistogramOpts{
Name: "request_message_bytes",
Help: "Size (in bytes) of messages received in the request.",
Buckets: BodySizeBuckets,
}, []string{"method", "route"}),
ResponseBodySize: reg.NewHistogramVec(prometheus.HistogramOpts{
Name: "response_message_bytes",
Help: "Size (in bytes) of messages sent in response.",
Buckets: BodySizeBuckets,
}, []string{"method", "route"}),
InflightRequests: reg.NewGaugeVec(prometheus.GaugeOpts{
Name: "inflight_requests",
Help: "Current number of inflight requests.",
}, []string{"method", "route"}),
LatencyCutoff: LatencyCutoff,
ThroughputUnit: throughputUnit,
RequestThroughput: reg.NewHistogramVec(prometheus.HistogramOpts{
Name: "request_throughput_" + throughputUnit,
Help: "Server throughput running requests.",
ConstLabels: prometheus.Labels{"cutoff_ms": strconv.FormatInt(LatencyCutoff.Milliseconds(), 10)},
Buckets: []float64{1, 5, 10},
}, []string{"method", "route"}),
}
}

func TestExtractValueFromMultiValueHeader(t *testing.T) {
tests := []struct {
testName string
header string
name string
key string
expected float64
err bool
}{
{
testName: "ExistantKeyInName1",
header: "name0;key0=0.0;key1=1.1, name1;key0=1.1",
name: "name0",
key: "key0",
expected: 0.0,
err: false,
},
{
testName: "ExistantKeyInName2",
header: "name0;key0=0.0;key1=1.1, name1;key1=1.1",
name: "name0",
key: "key1",
expected: 1.1,
err: false,
},
{
testName: "NonExistantName1",
header: "name0;key0=0.0;key1=1.1, name1;key0=1.1",
name: "name2",
key: "key0",
expected: 0.0,
err: true,
},
{
testName: "NonExistantName2",
header: "name0;key0=0.0;key1=1.1, name1;key1=1.1",
name: "name2",
key: "key1",
expected: 0.0,
err: true,
},
{
testName: "NonExistantKeyInName",
header: "name0;key0=0.0;key1=1.1",
name: "name0",
key: "key2",
expected: 0,
err: true,
},
{
testName: "StringInKey",
header: "name0;key0=str;key1=1.1",
name: "name0",
key: "key0",
expected: 0,
err: true,
},
{
testName: "EmptyHeader",
header: "",
name: "name0",
key: "key0",
expected: 0,
err: true,
},
{
testName: "IncorrectFormat",
header: "key0=0.0, key1=1.1",
name: "key0",
key: "key0",
expected: 0,
err: true,
},
}

for _, tt := range tests {
t.Run(tt.testName, func(t *testing.T) {
value, err := extractValueFromMultiValueHeader(tt.header, tt.name, tt.key)
require.Equal(t, tt.err, err != nil, "expected error: %v, got: %v", tt.err, err)
require.Equal(t, tt.expected, value, "expected value: %f, got: %f", tt.expected, value)
})
}
}
12 changes: 12 additions & 0 deletions server/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
package server

import (
"strconv"
"time"

"github.com/prometheus/client_golang/prometheus"
Expand All @@ -22,6 +23,7 @@ type Metrics struct {
ReceivedMessageSize *prometheus.HistogramVec
SentMessageSize *prometheus.HistogramVec
InflightRequests *prometheus.GaugeVec
RequestThroughput *prometheus.HistogramVec
}

func NewServerMetrics(cfg Config) *Metrics {
Expand Down Expand Up @@ -73,5 +75,15 @@ func NewServerMetrics(cfg Config) *Metrics {
Name: "inflight_requests",
Help: "Current number of inflight requests.",
}, []string{"method", "route"}),
RequestThroughput: reg.NewHistogramVec(prometheus.HistogramOpts{
Namespace: cfg.MetricsNamespace,
Name: "request_throughput_" + cfg.Throughput.Unit,
Help: "Server throughput of running requests.",
ConstLabels: prometheus.Labels{"cutoff_ms": strconv.FormatInt(cfg.Throughput.LatencyCutoff.Milliseconds(), 10)},
Buckets: instrument.DefBuckets,
NativeHistogramBucketFactor: cfg.MetricsNativeHistogramFactor,
NativeHistogramMaxBucketNumber: 100,
NativeHistogramMinResetDuration: time.Hour,
}, []string{"method", "route"}),
}
}
12 changes: 12 additions & 0 deletions server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,13 @@ type Config struct {

// This limiter is called for every started and finished gRPC request.
GrpcMethodLimiter GrpcInflightMethodLimiter `yaml:"-"`

Throughput Throughput `yaml:"-"`
}

type Throughput struct {
LatencyCutoff time.Duration `yaml:"throughput_latency_cutoff"`
Unit string `yaml:"throughput_unit"`
}

var infinty = time.Duration(math.MaxInt64)
Expand Down Expand Up @@ -209,6 +216,8 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
f.StringVar(&cfg.LogRequestExcludeHeadersList, "server.log-request-headers-exclude-list", "", "Comma separated list of headers to exclude from loggin. Only used if server.log-request-headers is true.")
f.BoolVar(&cfg.LogRequestAtInfoLevel, "server.log-request-at-info-level-enabled", false, "Optionally log requests at info level instead of debug level. Applies to request headers as well if server.log-request-headers is enabled.")
f.BoolVar(&cfg.ProxyProtocolEnabled, "server.proxy-protocol-enabled", false, "Enables PROXY protocol.")
f.DurationVar(&cfg.Throughput.LatencyCutoff, "server.throughput.latency-cutoff", 0, "Requests taking over the cutoff are be observed to measure throughput. Server-Timing header is used with specified unit as the indicator, for example 'Server-Timing: unit;val=8.2'. If set to 0, the throughput is not calculated.")
f.StringVar(&cfg.Throughput.Unit, "server.throughput.unit", "total_samples", "Unit of the server throughput metric, for example 'processed_bytes' or 'total_samples'. Observed values are gathered from the 'Server-Timing' header with the 'val' key. If set, it is appended to the request_server_throughput metric name.")
}

func (cfg *Config) registererOrDefault() prometheus.Registerer {
Expand Down Expand Up @@ -527,6 +536,9 @@ func BuildHTTPMiddleware(cfg Config, router *mux.Router, metrics *Metrics, logge
RequestBodySize: metrics.ReceivedMessageSize,
ResponseBodySize: metrics.SentMessageSize,
InflightRequests: metrics.InflightRequests,
LatencyCutoff: cfg.Throughput.LatencyCutoff,
ThroughputUnit: cfg.Throughput.Unit,
RequestThroughput: metrics.RequestThroughput,
},
}
var httpMiddleware []middleware.Interface
Expand Down

0 comments on commit 77bb9dd

Please sign in to comment.