From 339736761fac2a639c6a8f8d0866228dcabda2ce Mon Sep 17 00:00:00 2001 From: Andrew Seigner Date: Thu, 9 Aug 2018 13:14:53 -0700 Subject: [PATCH] Fix handling of latency time units strest-client allows setting a `--latencyUnit` flag. This setting was not honored, and in some cases caused invalid calculations: - prometheus histogram metrics did not adjust buckets based on time unit - prometheus histogram metrics were always labeled as `latency_ms`, regardless of time unit - maxValue in hdrhistogram was assumed to be in milliseconds - the `min` cli output was incorrect - prometheus jitter histogram was never registered/exported Modify strest-client to correctly honor `--latencyUnit`. Prometheus histograms now exported for each time unit. Also some cleanup to prefer native go time types. Signed-off-by: Andrew Seigner --- README.md | 30 ++++++++------ client/client.go | 103 +++++++++++++++++++++++++++++++++++------------ cmd/client.go | 2 +- 3 files changed, 95 insertions(+), 40 deletions(-) diff --git a/README.md b/README.md index 6fe2ce88..2f33bb23 100644 --- a/README.md +++ b/README.md @@ -14,26 +14,30 @@ starting gRPC server on :11111 ``` Next run the client. By default, the client will send as many request as it can -on a single connection for 10 seconds, and exit with a performance report. +on a single connection. Kill it to see a final performance report. ``` -$ strest-grpc client --address localhost:11111 -2017-05-12T16:17:40-07:00 98.2KB 354/0 10s L: 0 [ 89 97 ] 102 J: 0 0 +$ strest-grpc client --address localhost:11111 --latencyUnit us +2018-08-10T10:45:10-07:00 0.0B 39594/0 10s L: 132 [388 629 ] 7383 J: 0 0 { - "good": 354, + "good": 42074, "bad": 0, - "bytes": 100556, + "bytes": 0, "latency": { - "50": 10, - "95": 89, - "99": 97, - "999": 102 + "p50": 215, + "p75": 250, + "p90": 317, + "p95": 397, + "p99": 634, + "p999": 7383 }, "jitter": { - "50": 0, - "95": 0, - "99": 0, - "999": 0 + "p50": 0, + "p75": 0, + "p90": 0, + "p95": 0, + "p99": 0, + "p999": 0 } } ``` diff --git a/client/client.go b/client/client.go index 6df82262..6c77718a 100644 --- a/client/client.go +++ b/client/client.go @@ -62,15 +62,14 @@ type Report struct { // Quantiles contains common latency quantiles (p50, p95, p999) type Quantiles struct { - Quantile50 int64 `json:"50"` - Quantile95 int64 `json:"95"` - Quantile99 int64 `json:"99"` - Quantile999 int64 `json:"999"` + Quantile50 int64 `json:"p50"` + Quantile75 int64 `json:"p75"` + Quantile90 int64 `json:"p90"` + Quantile95 int64 `json:"p95"` + Quantile99 int64 `json:"p99"` + Quantile999 int64 `json:"p999"` } -// DayInMillis represents the number of milliseconds in a 24-hour day. -const DayInMillis int64 = 24 * 60 * 60 * 1000000 - var ( sizeSuffixes = []string{"B", "KB", "MB", "GB"} @@ -84,26 +83,62 @@ var ( Help: "Number of successful requests", }) - promLatencyHistogram = prometheus.NewHistogram(prometheus.HistogramOpts{ + promLatencyMSHistogram = prometheus.NewHistogram(prometheus.HistogramOpts{ Name: "latency_ms", Help: "gRPC latency distributions in milliseconds.", // 50 exponential buckets ranging from 0.5 ms to 3 minutes // TODO: make this tunable Buckets: prometheus.ExponentialBuckets(0.5, 1.3, 50), }) - promJitterHistogram = prometheus.NewHistogram(prometheus.HistogramOpts{ + promJitterMSHistogram = prometheus.NewHistogram(prometheus.HistogramOpts{ Name: "jitter_ms", Help: "gRPC jitter distributions in milliseconds.", // 50 exponential buckets ranging from 0.5 ms to 3 minutes // TODO: make this tunable Buckets: prometheus.ExponentialBuckets(0.5, 1.3, 50), }) + + promLatencyUSHistogram = prometheus.NewHistogram(prometheus.HistogramOpts{ + Name: "latency_us", + Help: "gRPC latency distributions in microseconds.", + // 50 exponential buckets ranging from 1 us to 2.4 seconds + // TODO: make this tunable + Buckets: prometheus.ExponentialBuckets(1, 1.35, 50), + }) + promJitterUSHistogram = prometheus.NewHistogram(prometheus.HistogramOpts{ + Name: "jitter_us", + Help: "gRPC jitter distributions in microseconds.", + // 50 exponential buckets ranging from 1 us to 2.4 seconds + // TODO: make this tunable + Buckets: prometheus.ExponentialBuckets(1, 1.35, 50), + }) + + promLatencyNSHistogram = prometheus.NewHistogram(prometheus.HistogramOpts{ + Name: "latency_ns", + Help: "gRPC latency distributions in nanoseconds.", + // 50 exponential buckets ranging from 1 ns to 0.4 seconds + // TODO: make this tunable + Buckets: prometheus.ExponentialBuckets(1, 1.5, 50), + }) + promJitterNSHistogram = prometheus.NewHistogram(prometheus.HistogramOpts{ + Name: "jitter_ns", + Help: "gRPC jitter distributions in nanoseconds.", + // 50 exponential buckets ranging from 1 ns to 0.4 seconds + // TODO: make this tunable + Buckets: prometheus.ExponentialBuckets(1, 1.5, 50), + }) ) func registerMetrics() { prometheus.MustRegister(promRequests) prometheus.MustRegister(promSuccesses) - prometheus.MustRegister(promLatencyHistogram) + prometheus.MustRegister(promLatencyMSHistogram) + prometheus.MustRegister(promJitterMSHistogram) + prometheus.MustRegister(promLatencyUSHistogram) + prometheus.MustRegister(promJitterUSHistogram) + prometheus.MustRegister(promLatencyNSHistogram) + prometheus.MustRegister(promJitterNSHistogram) + } func formatBytes(bytes uint) string { @@ -132,7 +167,7 @@ func logIntervalReport( good, bad, interval, - min/1000000, + min, latencyHist.ValueAtQuantile(95), latencyHist.ValueAtQuantile(99), max, @@ -144,6 +179,8 @@ func logIntervalReport( func logFinalReport(good, bad, bytes uint, latencies *hdrhistogram.Histogram, jitters *hdrhistogram.Histogram) { latency := Quantiles{ Quantile50: latencies.ValueAtQuantile(50), + Quantile75: latencies.ValueAtQuantile(75), + Quantile90: latencies.ValueAtQuantile(90), Quantile95: latencies.ValueAtQuantile(95), Quantile99: latencies.ValueAtQuantile(99), Quantile999: latencies.ValueAtQuantile(999), @@ -151,6 +188,8 @@ func logFinalReport(good, bad, bytes uint, latencies *hdrhistogram.Histogram, ji jitter := Quantiles{ Quantile50: jitters.ValueAtQuantile(50), + Quantile75: jitters.ValueAtQuantile(75), + Quantile90: jitters.ValueAtQuantile(90), Quantile95: jitters.ValueAtQuantile(95), Quantile99: jitters.ValueAtQuantile(99), Quantile999: jitters.ValueAtQuantile(999), @@ -436,7 +475,7 @@ func loop( driver chan<- struct{}, responses <-chan *MeasuredResponse, shutdownChannels [][]chan struct{}, - latencyDivisor int64, + latencyDur time.Duration, ) { defer mainWait.Done() @@ -447,10 +486,17 @@ func loop( intervalReport := time.Tick(cfg.Interval) - latencyHist := hdrhistogram.New(0, DayInMillis, 3) - globalLatencyHist := hdrhistogram.New(0, DayInMillis, 3) - jitterHist := hdrhistogram.New(0, DayInMillis, 3) - globalJitterHist := hdrhistogram.New(0, DayInMillis, 3) + latencyDurNS := latencyDur.Nanoseconds() + msInNS := time.Millisecond.Nanoseconds() + usInNS := time.Microsecond.Nanoseconds() + + // dayInTimeUnits represents the number of time units (ms, us, or ns) in a 24-hour day. + dayInTimeUnits := int64(24 * time.Hour / latencyDur) + + latencyHist := hdrhistogram.New(0, dayInTimeUnits, 3) + globalLatencyHist := hdrhistogram.New(0, dayInTimeUnits, 3) + jitterHist := hdrhistogram.New(0, dayInTimeUnits, 3) + globalJitterHist := hdrhistogram.New(0, dayInTimeUnits, 3) cleanup := make(chan struct{}, 2) interrupt := make(chan os.Signal, 2) @@ -554,7 +600,8 @@ func loop( bytes += resp.bytes totalBytes += resp.bytes - latency := resp.latency.Nanoseconds() / latencyDivisor + respLatencyNS := resp.latency.Nanoseconds() + latency := respLatencyNS / latencyDurNS if latency < int64(min) { min = uint(latency) } @@ -563,13 +610,17 @@ func loop( } latencyHist.RecordValue(latency) globalLatencyHist.RecordValue(latency) - promLatencyHistogram.Observe(float64(latency)) - - jitter := resp.timeBetweenFrames.Nanoseconds() / latencyDivisor - promJitterHistogram.Observe(float64(jitter)) + promLatencyMSHistogram.Observe(float64(respLatencyNS / msInNS)) + promLatencyUSHistogram.Observe(float64(respLatencyNS / usInNS)) + promLatencyNSHistogram.Observe(float64(respLatencyNS)) + respJitterNS := resp.timeBetweenFrames.Nanoseconds() + jitter := respJitterNS / latencyDurNS jitterHist.RecordValue(jitter) globalJitterHist.RecordValue(jitter) + promJitterMSHistogram.Observe(float64(respJitterNS / msInNS)) + promJitterUSHistogram.Observe(float64(respJitterNS / usInNS)) + promJitterNSHistogram.Observe(float64(respJitterNS)) } if recvCount == cfg.TotalRequests { @@ -619,13 +670,13 @@ type Config struct { // TODO: this would be much less ugly if the configuration was either stored in a struct, or used viper... func (cfg Config) Run() { - latencyDivisor := int64(1000000) + latencyDur := time.Millisecond if cfg.LatencyUnit == "ms" { - latencyDivisor = 1000000 + latencyDur = time.Millisecond } else if cfg.LatencyUnit == "us" { - latencyDivisor = 1000 + latencyDur = time.Microsecond } else if cfg.LatencyUnit == "ns" { - latencyDivisor = 1 + latencyDur = time.Nanosecond } else { log.Fatalf("latency unit should be [ms | us | ns].") } @@ -746,7 +797,7 @@ func (cfg Config) Run() { driver, responses, shutdownChannels, - latencyDivisor, + latencyDur, ) mainWait.Wait() diff --git a/cmd/client.go b/cmd/client.go index 64b9ceaf..e6c23ee8 100644 --- a/cmd/client.go +++ b/cmd/client.go @@ -29,7 +29,7 @@ func init() { flags.UintVar(&clientCfg.TotalRequests, "totalRequests", 0, "total number of requests to send. default: infinite") flags.UintVar(&clientCfg.TotalTargetRps, "totalTargetRps", 0, "target requests per second") flags.DurationVar(&clientCfg.Interval, "interval", 10*time.Second, "reporting interval") - flags.StringVar(&clientCfg.LatencyPercentiles, "latencyPercentiles", "100=0", "response latency percentile distribution. (e.g. 50=10,100=100)") + flags.StringVar(&clientCfg.LatencyPercentiles, "latencyPercentiles", "100=0", "response latency percentile distribution (in ms). (e.g. 50=10,100=100)") flags.StringVar(&clientCfg.LengthPercentiles, "lengthPercentiles", "100=0", "response body length percentile distribution. (e.g. 50=100,100=1000)") flags.Float64Var(&clientCfg.ErrorRate, "errorRate", 0.0, "the chance to return an error") flags.BoolVar(&clientCfg.NoFinalReport, "noFinalReport", false, "do not print a final JSON output report")