From 33cdac3d242e22b0efafc45ca3a5dcc1313c0b8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Matczuk?= Date: Wed, 4 Dec 2024 14:39:29 +0100 Subject: [PATCH 1/2] middleware(prometheus): use summaries instead of histograms It is much more efficient to use summaries than histograms. This would give information on p50, p90 and p99. Fixes #925 --- middleware/prometheus.go | 57 +++++++------------ middleware/prometheus_test.go | 17 ++---- .../testdata/TestPrometheusWrap.golden.txt | 49 ++++------------ 3 files changed, 40 insertions(+), 83 deletions(-) diff --git a/middleware/prometheus.go b/middleware/prometheus.go index 641f0cc5..dbcf8036 100644 --- a/middleware/prometheus.go +++ b/middleware/prometheus.go @@ -17,23 +17,10 @@ import ( "github.com/saucelabs/forwarder/internal/martian" ) -const ( - _ = iota // ignore first value by assigning to blank identifier - kb float64 = 1 << (10 * iota) - mb -) - -var sizeBuckets = []float64{ //nolint:gochecknoglobals // this is a global variable by design - 1 * kb, - 2 * kb, - 5 * kb, - 10 * kb, - 100 * kb, - 500 * kb, - 1 * mb, - 2.5 * mb, - 5 * mb, - 10 * mb, +var objectives = map[float64]float64{ + 0.5: 0.01, // Median (50th percentile) with ±1% error + 0.9: 0.01, // 90th percentile with ±1% error + 0.99: 0.001, // 99th percentile with ±0.1% error } type PrometheusOpt func(*Prometheus) @@ -53,9 +40,9 @@ func WithCustomLabeler(label string, labeler PrometheusLabeler) PrometheusOpt { type Prometheus struct { requestsInFlight *prometheus.GaugeVec requestsTotal *prometheus.CounterVec - requestDuration *prometheus.HistogramVec - requestSize *prometheus.HistogramVec - responseSize *prometheus.HistogramVec + requestDuration *prometheus.SummaryVec + requestSize *prometheus.SummaryVec + responseSize *prometheus.SummaryVec label string labeler PrometheusLabeler @@ -90,25 +77,25 @@ func NewPrometheus(r prometheus.Registerer, namespace string, opts ...Prometheus Help: "Total number of HTTP requests processed.", }, labelsWithStatus) - p.requestDuration = f.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: namespace, - Name: "http_request_duration_seconds", - Help: "The HTTP request latencies in seconds.", - Buckets: prometheus.DefBuckets, + p.requestDuration = f.NewSummaryVec(prometheus.SummaryOpts{ + Namespace: namespace, + Name: "http_request_duration_seconds", + Help: "The HTTP request latencies in seconds.", + Objectives: objectives, }, labelsWithStatus) - p.requestSize = f.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: namespace, - Name: "http_request_size_bytes", - Help: "The HTTP request sizes in bytes.", - Buckets: sizeBuckets, + p.requestSize = f.NewSummaryVec(prometheus.SummaryOpts{ + Namespace: namespace, + Name: "http_request_size_bytes", + Help: "The HTTP request sizes in bytes.", + Objectives: objectives, }, labelsWithStatus) - p.responseSize = f.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: namespace, - Name: "http_response_size_bytes", - Help: "The HTTP response sizes in bytes.", - Buckets: sizeBuckets, + p.responseSize = f.NewSummaryVec(prometheus.SummaryOpts{ + Namespace: namespace, + Name: "http_response_size_bytes", + Help: "The HTTP response sizes in bytes.", + Objectives: objectives, }, labelsWithStatus) return p diff --git a/middleware/prometheus_test.go b/middleware/prometheus_test.go index c71ab0c1..73a86034 100644 --- a/middleware/prometheus_test.go +++ b/middleware/prometheus_test.go @@ -15,8 +15,12 @@ import ( "time" "github.com/prometheus/client_golang/prometheus" - dto "github.com/prometheus/client_model/go" - "github.com/saucelabs/forwarder/utils/golden" +) + +const ( + _ = iota // ignore first value by assigning to blank identifier + kb float64 = 1 << (10 * iota) + mb ) func TestPrometheusWrap(t *testing.T) { @@ -60,13 +64,4 @@ func TestPrometheusWrap(t *testing.T) { w := httptest.NewRecorder() s.ServeHTTP(w, r) } - - golden.DiffPrometheusMetrics(t, r, func(mf *dto.MetricFamily) bool { - if int(mf.GetType()) == 4 { - for _, m := range mf.GetMetric() { - m.Histogram.SampleSum = nil - } - } - return true - }) } diff --git a/middleware/testdata/TestPrometheusWrap.golden.txt b/middleware/testdata/TestPrometheusWrap.golden.txt index c8502a12..b9eef9f9 100644 --- a/middleware/testdata/TestPrometheusWrap.golden.txt +++ b/middleware/testdata/TestPrometheusWrap.golden.txt @@ -1,32 +1,15 @@ # HELP test_http_request_duration_seconds The HTTP request latencies in seconds. -# TYPE test_http_request_duration_seconds histogram -test_http_request_duration_seconds_bucket{code="200",method="GET",le="0.005"} 0 -test_http_request_duration_seconds_bucket{code="200",method="GET",le="0.01"} 0 -test_http_request_duration_seconds_bucket{code="200",method="GET",le="0.025"} 1 -test_http_request_duration_seconds_bucket{code="200",method="GET",le="0.05"} 1 -test_http_request_duration_seconds_bucket{code="200",method="GET",le="0.1"} 1 -test_http_request_duration_seconds_bucket{code="200",method="GET",le="0.25"} 2 -test_http_request_duration_seconds_bucket{code="200",method="GET",le="0.5"} 2 -test_http_request_duration_seconds_bucket{code="200",method="GET",le="1"} 3 -test_http_request_duration_seconds_bucket{code="200",method="GET",le="2.5"} 4 -test_http_request_duration_seconds_bucket{code="200",method="GET",le="5"} 4 -test_http_request_duration_seconds_bucket{code="200",method="GET",le="10"} 4 -test_http_request_duration_seconds_bucket{code="200",method="GET",le="+Inf"} 4 +# TYPE test_http_request_duration_seconds summary +test_http_request_duration_seconds{code="200",method="GET",quantile="0.5"} 0.101057833 +test_http_request_duration_seconds{code="200",method="GET",quantile="0.9"} 1.000892875 +test_http_request_duration_seconds{code="200",method="GET",quantile="0.99"} 1.000892875 test_http_request_duration_seconds_sum{code="200",method="GET"} 0 test_http_request_duration_seconds_count{code="200",method="GET"} 4 # HELP test_http_request_size_bytes The HTTP request sizes in bytes. -# TYPE test_http_request_size_bytes histogram -test_http_request_size_bytes_bucket{code="200",method="GET",le="1024"} 0 -test_http_request_size_bytes_bucket{code="200",method="GET",le="2048"} 1 -test_http_request_size_bytes_bucket{code="200",method="GET",le="5120"} 1 -test_http_request_size_bytes_bucket{code="200",method="GET",le="10240"} 2 -test_http_request_size_bytes_bucket{code="200",method="GET",le="102400"} 3 -test_http_request_size_bytes_bucket{code="200",method="GET",le="512000"} 4 -test_http_request_size_bytes_bucket{code="200",method="GET",le="1.048576e+06"} 4 -test_http_request_size_bytes_bucket{code="200",method="GET",le="2.62144e+06"} 4 -test_http_request_size_bytes_bucket{code="200",method="GET",le="5.24288e+06"} 4 -test_http_request_size_bytes_bucket{code="200",method="GET",le="1.048576e+07"} 4 -test_http_request_size_bytes_bucket{code="200",method="GET",le="+Inf"} 4 +# TYPE test_http_request_size_bytes summary +test_http_request_size_bytes{code="200",method="GET",quantile="0.5"} 5171 +test_http_request_size_bytes{code="200",method="GET",quantile="0.9"} 102451 +test_http_request_size_bytes{code="200",method="GET",quantile="0.99"} 102451 test_http_request_size_bytes_sum{code="200",method="GET"} 0 test_http_request_size_bytes_count{code="200",method="GET"} 4 # HELP test_http_requests_in_flight Current number of HTTP requests being served. @@ -36,17 +19,9 @@ test_http_requests_in_flight{method="GET"} 0 # TYPE test_http_requests_total counter test_http_requests_total{code="200",method="GET"} 4 # HELP test_http_response_size_bytes The HTTP response sizes in bytes. -# TYPE test_http_response_size_bytes histogram -test_http_response_size_bytes_bucket{code="200",method="GET",le="1024"} 0 -test_http_response_size_bytes_bucket{code="200",method="GET",le="2048"} 1 -test_http_response_size_bytes_bucket{code="200",method="GET",le="5120"} 1 -test_http_response_size_bytes_bucket{code="200",method="GET",le="10240"} 2 -test_http_response_size_bytes_bucket{code="200",method="GET",le="102400"} 3 -test_http_response_size_bytes_bucket{code="200",method="GET",le="512000"} 4 -test_http_response_size_bytes_bucket{code="200",method="GET",le="1.048576e+06"} 4 -test_http_response_size_bytes_bucket{code="200",method="GET",le="2.62144e+06"} 4 -test_http_response_size_bytes_bucket{code="200",method="GET",le="5.24288e+06"} 4 -test_http_response_size_bytes_bucket{code="200",method="GET",le="1.048576e+07"} 4 -test_http_response_size_bytes_bucket{code="200",method="GET",le="+Inf"} 4 +# TYPE test_http_response_size_bytes summary +test_http_response_size_bytes{code="200",method="GET",quantile="0.5"} 5171 +test_http_response_size_bytes{code="200",method="GET",quantile="0.9"} 102451 +test_http_response_size_bytes{code="200",method="GET",quantile="0.99"} 102451 test_http_response_size_bytes_sum{code="200",method="GET"} 0 test_http_response_size_bytes_count{code="200",method="GET"} 4 From 070255169ccf4115a635630d6fc0f99e3f104510 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Matczuk?= Date: Thu, 5 Dec 2024 11:41:08 +0100 Subject: [PATCH 2/2] middleware(prometheus): remove request and response size metrics --- http_proxy.go | 2 - middleware/prometheus.go | 90 +------------------ middleware/prometheus_test.go | 70 ++++++++------- .../testdata/TestPrometheusWrap.golden.txt | 24 ++--- 4 files changed, 48 insertions(+), 138 deletions(-) diff --git a/http_proxy.go b/http_proxy.go index 79a73632..0a0e84a5 100644 --- a/http_proxy.go +++ b/http_proxy.go @@ -392,8 +392,6 @@ func (hp *HTTPProxy) middlewareStack() (martian.RequestResponseModifier, *martia if hp.config.PromRegistry != nil { p := middleware.NewPrometheus(hp.config.PromRegistry, hp.config.PromNamespace, hp.config.PromHTTPOpts...) - stack.AddRequestModifier(p) - stack.AddResponseModifier(p) trace = new(martian.ProxyTrace) trace.ReadRequest = func(info martian.ReadRequestInfo) { diff --git a/middleware/prometheus.go b/middleware/prometheus.go index dbcf8036..829c4a2c 100644 --- a/middleware/prometheus.go +++ b/middleware/prometheus.go @@ -7,7 +7,6 @@ package middleware import ( - "io" "net/http" "strconv" "time" @@ -41,8 +40,9 @@ type Prometheus struct { requestsInFlight *prometheus.GaugeVec requestsTotal *prometheus.CounterVec requestDuration *prometheus.SummaryVec - requestSize *prometheus.SummaryVec - responseSize *prometheus.SummaryVec + // The following metrics are now removed, revert if needed. + // requestSize *prometheus.SummaryVec + // responseSize *prometheus.SummaryVec label string labeler PrometheusLabeler @@ -84,20 +84,6 @@ func NewPrometheus(r prometheus.Registerer, namespace string, opts ...Prometheus Objectives: objectives, }, labelsWithStatus) - p.requestSize = f.NewSummaryVec(prometheus.SummaryOpts{ - Namespace: namespace, - Name: "http_request_size_bytes", - Help: "The HTTP request sizes in bytes.", - Objectives: objectives, - }, labelsWithStatus) - - p.responseSize = f.NewSummaryVec(prometheus.SummaryOpts{ - Namespace: namespace, - Name: "http_response_size_bytes", - Help: "The HTTP response sizes in bytes.", - Objectives: objectives, - }, labelsWithStatus) - return p } @@ -109,8 +95,6 @@ func (p *Prometheus) Wrap(h http.Handler) http.Handler { d := newDelegator(w, nil) - r.Body = bodyCounter(r.Body) - start := time.Now() h.ServeHTTP(d, r) elapsed := time.Since(start).Seconds() @@ -121,27 +105,10 @@ func (p *Prometheus) Wrap(h http.Handler) http.Handler { p.requestsTotal.WithLabelValues(labelsWithStatus...).Inc() p.requestDuration.WithLabelValues(labelsWithStatus...).Observe(elapsed) - reqSize := int64(0) - if c, ok := r.Body.(counter); ok { - reqSize = c.Count() - } - p.requestsInFlight.WithLabelValues(labels...).Dec() - p.requestSize.WithLabelValues(labelsWithStatus...).Observe(float64(reqSize)) - p.responseSize.WithLabelValues(labelsWithStatus...).Observe(float64(d.Written())) }) } -func (p *Prometheus) ModifyRequest(req *http.Request) error { - req.Body = bodyCounter(req.Body) - return nil -} - -func (p *Prometheus) ModifyResponse(res *http.Response) error { - res.Body = bodyCounter(res.Body) - return nil -} - func (p *Prometheus) ReadRequest(req *http.Request) { p.requestsInFlight.WithLabelValues(p.labels(req)...).Inc() } @@ -158,18 +125,6 @@ func (p *Prometheus) WroteResponse(res *http.Response) { p.requestsInFlight.WithLabelValues(labels...).Dec() p.requestsTotal.WithLabelValues(labelsWithStatus...).Inc() p.requestDuration.WithLabelValues(labelsWithStatus...).Observe(elapsed) - - reqSize := int64(0) - if c, ok := req.Body.(counter); ok { - reqSize = c.Count() - } - p.requestSize.WithLabelValues(labelsWithStatus...).Observe(float64(reqSize)) - - resSize := int64(0) - if c, ok := res.Body.(counter); ok { - resSize = c.Count() - } - p.responseSize.WithLabelValues(labelsWithStatus...).Observe(float64(resSize)) } func (p *Prometheus) labels(req *http.Request) []string { @@ -179,42 +134,3 @@ func (p *Prometheus) labels(req *http.Request) []string { } return labels } - -type counter interface { - Count() int64 -} - -func bodyCounter(b io.ReadCloser) io.ReadCloser { - if b == nil || b == http.NoBody { - return b - } - - if _, ok := b.(io.ReadWriteCloser); ok { - return &rwcBody{body{ReadCloser: b}} - } - - return &body{ReadCloser: b} -} - -type body struct { - io.ReadCloser - n int64 -} - -func (b *body) Count() int64 { - return b.n -} - -func (b *body) Read(p []byte) (n int, err error) { - n, err = b.ReadCloser.Read(p) - b.n += int64(n) - return -} - -type rwcBody struct { - body -} - -func (b *rwcBody) Write(p []byte) (int, error) { - return b.ReadCloser.(io.ReadWriteCloser).Write(p) //nolint:forcetypeassert // We know it's a ReadWriteCloser. -} diff --git a/middleware/prometheus_test.go b/middleware/prometheus_test.go index 73a86034..585aa530 100644 --- a/middleware/prometheus_test.go +++ b/middleware/prometheus_test.go @@ -7,33 +7,26 @@ package middleware import ( - "bytes" - "io" + "math" "net/http" "net/http/httptest" + "sync" "testing" "time" "github.com/prometheus/client_golang/prometheus" -) - -const ( - _ = iota // ignore first value by assigning to blank identifier - kb float64 = 1 << (10 * iota) - mb + dto "github.com/prometheus/client_model/go" + "github.com/saucelabs/forwarder/utils/golden" ) func TestPrometheusWrap(t *testing.T) { pages := []struct { path string duration time.Duration - status int - size float64 }{ - {"/1", 10 * time.Millisecond, http.StatusOK, 1.05 * kb}, - {"/2", 100 * time.Millisecond, http.StatusOK, 5.05 * kb}, - {"/3", 500 * time.Millisecond, http.StatusOK, 10.05 * kb}, - {"/4", 1000 * time.Millisecond, http.StatusOK, 100.05 * kb}, + {"/100", 100 * time.Millisecond}, + {"/200", 200 * time.Millisecond}, + {"/1000", 1000 * time.Millisecond}, } h := http.NewServeMux() @@ -41,27 +34,44 @@ func TestPrometheusWrap(t *testing.T) { p := pages[i] h.HandleFunc(p.path, func(w http.ResponseWriter, r *http.Request) { time.Sleep(p.duration) - w.WriteHeader(p.status) - n, err := io.Copy(w, r.Body) - if err != nil { - t.Error(err) - } - if n != int64(p.size) { - t.Errorf("expected %d, got %d", int(p.size), n) - } + w.WriteHeader(http.StatusOK) }) } r := prometheus.NewPedanticRegistry() s := NewPrometheus(r, "test").Wrap(h) - for i := range pages { - p := pages[i] - b := bytes.NewBuffer(make([]byte, int(p.size))) - r := httptest.NewRequest(http.MethodGet, p.path, b) - r.RemoteAddr = "localhost:1234" - r.URL.Host = "saucelabs.com" - w := httptest.NewRecorder() - s.ServeHTTP(w, r) + var wg sync.WaitGroup + for range [100]struct{}{} { + for i := range pages { + wg.Add(1) + go func() { + defer wg.Done() + p := pages[i] + r := httptest.NewRequest(http.MethodGet, p.path, http.NoBody) + r.RemoteAddr = "localhost:1234" + r.URL.Host = "saucelabs.com" + w := httptest.NewRecorder() + s.ServeHTTP(w, r) + }() + } + } + wg.Wait() + + oneDecimalDigit := func(v *float64) { + *v = math.Round(*v*10) / 10 } + + golden.DiffPrometheusMetrics(t, r, func(mf *dto.MetricFamily) bool { + if int(mf.GetType()) == 2 { + for _, m := range mf.GetMetric() { + m.Summary.SampleSum = nil + m.Summary.SampleCount = nil + for _, q := range m.GetSummary().GetQuantile() { + oneDecimalDigit(q.Value) //nolint:protogetter // We want to set the value. + } + } + } + return true + }) } diff --git a/middleware/testdata/TestPrometheusWrap.golden.txt b/middleware/testdata/TestPrometheusWrap.golden.txt index b9eef9f9..f1bf5593 100644 --- a/middleware/testdata/TestPrometheusWrap.golden.txt +++ b/middleware/testdata/TestPrometheusWrap.golden.txt @@ -1,27 +1,13 @@ # HELP test_http_request_duration_seconds The HTTP request latencies in seconds. # TYPE test_http_request_duration_seconds summary -test_http_request_duration_seconds{code="200",method="GET",quantile="0.5"} 0.101057833 -test_http_request_duration_seconds{code="200",method="GET",quantile="0.9"} 1.000892875 -test_http_request_duration_seconds{code="200",method="GET",quantile="0.99"} 1.000892875 +test_http_request_duration_seconds{code="200",method="GET",quantile="0.5"} 0.2 +test_http_request_duration_seconds{code="200",method="GET",quantile="0.9"} 1 +test_http_request_duration_seconds{code="200",method="GET",quantile="0.99"} 1 test_http_request_duration_seconds_sum{code="200",method="GET"} 0 -test_http_request_duration_seconds_count{code="200",method="GET"} 4 -# HELP test_http_request_size_bytes The HTTP request sizes in bytes. -# TYPE test_http_request_size_bytes summary -test_http_request_size_bytes{code="200",method="GET",quantile="0.5"} 5171 -test_http_request_size_bytes{code="200",method="GET",quantile="0.9"} 102451 -test_http_request_size_bytes{code="200",method="GET",quantile="0.99"} 102451 -test_http_request_size_bytes_sum{code="200",method="GET"} 0 -test_http_request_size_bytes_count{code="200",method="GET"} 4 +test_http_request_duration_seconds_count{code="200",method="GET"} 0 # HELP test_http_requests_in_flight Current number of HTTP requests being served. # TYPE test_http_requests_in_flight gauge test_http_requests_in_flight{method="GET"} 0 # HELP test_http_requests_total Total number of HTTP requests processed. # TYPE test_http_requests_total counter -test_http_requests_total{code="200",method="GET"} 4 -# HELP test_http_response_size_bytes The HTTP response sizes in bytes. -# TYPE test_http_response_size_bytes summary -test_http_response_size_bytes{code="200",method="GET",quantile="0.5"} 5171 -test_http_response_size_bytes{code="200",method="GET",quantile="0.9"} 102451 -test_http_response_size_bytes{code="200",method="GET",quantile="0.99"} 102451 -test_http_response_size_bytes_sum{code="200",method="GET"} 0 -test_http_response_size_bytes_count{code="200",method="GET"} 4 +test_http_requests_total{code="200",method="GET"} 300