From 78d7a94e468a715c09f5a2983232aff5017a76e8 Mon Sep 17 00:00:00 2001 From: Ivan Goncharov Date: Mon, 4 Nov 2024 20:26:59 +0100 Subject: [PATCH] add: linear search implementation (+ benchmarks) Signed-off-by: Ivan Goncharov --- prometheus/histogram.go | 38 ++++++++++++---- prometheus/histogram_test.go | 88 ++++++++++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+), 9 deletions(-) diff --git a/prometheus/histogram.go b/prometheus/histogram.go index 08bef3d..c0b6ecd 100644 --- a/prometheus/histogram.go +++ b/prometheus/histogram.go @@ -858,15 +858,35 @@ func (h *histogram) Write(out *dto.Metric) error { // findBucket returns the index of the bucket for the provided value, or // len(h.upperBounds) for the +Inf bucket. func (h *histogram) findBucket(v float64) int { - // TODO(beorn7): For small numbers of buckets (<30), a linear search is - // slightly faster than the binary search. If we really care, we could - // switch from one search strategy to the other depending on the number - // of buckets. - // - // Microbenchmarks (BenchmarkHistogramNoLabels): - // 11 buckets: 38.3 ns/op linear - binary 48.7 ns/op - // 100 buckets: 78.1 ns/op linear - binary 54.9 ns/op - // 300 buckets: 154 ns/op linear - binary 61.6 ns/op + n := len(h.upperBounds) + if n == 0 { + return 0 + } + + // Early exit: if v is less than or equal to the first upper bound, return 0 + if v <= h.upperBounds[0] { + return 0 + } + + // Early exit: if v is greater than the last upper bound, return len(h.upperBounds) + if v > h.upperBounds[n-1] { + return n + } + + // For small arrays, use simple linear search + // "magic number" 35 is result of tests on couple different (AWS and baremetal) servers + // see more details here: https://github.com/prometheus/client_golang/pull/1662 + if n < 35 { + for i, bound := range h.upperBounds { + if v <= bound { + return i + } + } + // If v is greater than all upper bounds, return len(h.upperBounds) + return n + } + + // For larger arrays, use stdlib's binary search return sort.SearchFloat64s(h.upperBounds, v) } diff --git a/prometheus/histogram_test.go b/prometheus/histogram_test.go index c2a14ae..9ed5971 100644 --- a/prometheus/histogram_test.go +++ b/prometheus/histogram_test.go @@ -1455,3 +1455,91 @@ func compareNativeExemplarValues(t *testing.T, exps []*dto.Exemplar, values []fl } } } + +var resultFindBucket int + +func benchmarkFindBucket(b *testing.B, l int) { + h := &histogram{upperBounds: make([]float64, l)} + for i := range h.upperBounds { + h.upperBounds[i] = float64(i) + } + v := float64(l / 2) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + resultFindBucket = h.findBucket(v) + } +} + +func BenchmarkFindBucketShort(b *testing.B) { + benchmarkFindBucket(b, 20) +} + +func BenchmarkFindBucketMid(b *testing.B) { + benchmarkFindBucket(b, 40) +} + +func BenchmarkFindBucketLarge(b *testing.B) { + benchmarkFindBucket(b, 100) +} + +func BenchmarkFindBucketHuge(b *testing.B) { + benchmarkFindBucket(b, 500) +} + +func BenchmarkFindBucketInf(b *testing.B) { + h := &histogram{upperBounds: make([]float64, 500)} + for i := range h.upperBounds { + h.upperBounds[i] = float64(i) + } + v := 1000.5 + + b.ResetTimer() + for i := 0; i < b.N; i++ { + resultFindBucket = h.findBucket(v) + } +} + +func BenchmarkFindBucketLow(b *testing.B) { + h := &histogram{upperBounds: make([]float64, 500)} + for i := range h.upperBounds { + h.upperBounds[i] = float64(i) + } + v := -1.1 + + b.ResetTimer() + for i := 0; i < b.N; i++ { + resultFindBucket = h.findBucket(v) + } +} + +func TestFindBucket(t *testing.T) { + smallHistogram := &histogram{upperBounds: []float64{1, 2, 3, 4, 5}} + largeHistogram := &histogram{upperBounds: make([]float64, 50)} + for i := range largeHistogram.upperBounds { + largeHistogram.upperBounds[i] = float64(i) + } + + tests := []struct { + h *histogram + v float64 + expected int + }{ + {smallHistogram, -1, 0}, + {smallHistogram, 0.5, 0}, + {smallHistogram, 2.5, 2}, + {smallHistogram, 5.5, 5}, + {largeHistogram, -1, 0}, + {largeHistogram, 25.5, 26}, + {largeHistogram, 49.5, 50}, + {largeHistogram, 50.5, 50}, + {largeHistogram, 5000.5, 50}, + } + + for _, tt := range tests { + result := tt.h.findBucket(tt.v) + if result != tt.expected { + t.Errorf("findBucket(%v) = %d; expected %d", tt.v, result, tt.expected) + } + } +}