Merge pull request #1673 from imorph/faster_find_bucket

PERF: faster algorithm to discover bucket of an histogram observation
2024-11-08 07:22:56 -03:00 · 2024-11-08 07:22:56 -03:00 · 2b11a4ba39
parent fcfad5c0b9 78d7a94e46
commit 2b11a4ba39
2 changed files with 117 additions and 9 deletions
--- a/prometheus/histogram.go
+++ b/prometheus/histogram.go
@ -858,15 +858,35 @@ func (h *histogram) Write(out *dto.Metric) error {
 // findBucket returns the index of the bucket for the provided value, or
 // len(h.upperBounds) for the +Inf bucket.
 func (h *histogram) findBucket(v float64) int {
-	// TODO(beorn7): For small numbers of buckets (<30), a linear search is
+	n := len(h.upperBounds)
-	// slightly faster than the binary search. If we really care, we could
+	if n == 0 {
-	// switch from one search strategy to the other depending on the number
+		return 0
-	// of buckets.
+	}
-	//
+
-	// Microbenchmarks (BenchmarkHistogramNoLabels):
+	// Early exit: if v is less than or equal to the first upper bound, return 0
-	// 11 buckets: 38.3 ns/op linear - binary 48.7 ns/op
+	if v <= h.upperBounds[0] {
-	// 100 buckets: 78.1 ns/op linear - binary 54.9 ns/op
+		return 0
-	// 300 buckets: 154 ns/op linear - binary 61.6 ns/op
+	}
 	// Early exit: if v is greater than the last upper bound, return len(h.upperBounds)
 	if v > h.upperBounds[n-1] {
 		return n
 	}
 	// For small arrays, use simple linear search
 	// "magic number" 35 is result of tests on couple different (AWS and baremetal) servers
 	// see more details here: https://github.com/prometheus/client_golang/pull/1662
 	if n < 35 {
 		for i, bound := range h.upperBounds {
 			if v <= bound {
 				return i
 			}
 		}
 		// If v is greater than all upper bounds, return len(h.upperBounds)
 		return n
 	}
 	// For larger arrays, use stdlib's binary search
 	return sort.SearchFloat64s(h.upperBounds, v)
 }
--- a/prometheus/histogram_test.go
+++ b/prometheus/histogram_test.go
@ -1455,3 +1455,91 @@ func compareNativeExemplarValues(t *testing.T, exps []*dto.Exemplar, values []fl
 		}
 	}
 }
 var resultFindBucket int
 func benchmarkFindBucket(b *testing.B, l int) {
 	h := &histogram{upperBounds: make([]float64, l)}
 	for i := range h.upperBounds {
 		h.upperBounds[i] = float64(i)
 	}
 	v := float64(l / 2)
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
 		resultFindBucket = h.findBucket(v)
 	}
 }
 func BenchmarkFindBucketShort(b *testing.B) {
 	benchmarkFindBucket(b, 20)
 }
 func BenchmarkFindBucketMid(b *testing.B) {
 	benchmarkFindBucket(b, 40)
 }
 func BenchmarkFindBucketLarge(b *testing.B) {
 	benchmarkFindBucket(b, 100)
 }
 func BenchmarkFindBucketHuge(b *testing.B) {
 	benchmarkFindBucket(b, 500)
 }
 func BenchmarkFindBucketInf(b *testing.B) {
 	h := &histogram{upperBounds: make([]float64, 500)}
 	for i := range h.upperBounds {
 		h.upperBounds[i] = float64(i)
 	}
 	v := 1000.5
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
 		resultFindBucket = h.findBucket(v)
 	}
 }
 func BenchmarkFindBucketLow(b *testing.B) {
 	h := &histogram{upperBounds: make([]float64, 500)}
 	for i := range h.upperBounds {
 		h.upperBounds[i] = float64(i)
 	}
 	v := -1.1
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
 		resultFindBucket = h.findBucket(v)
 	}
 }
 func TestFindBucket(t *testing.T) {
 	smallHistogram := &histogram{upperBounds: []float64{1, 2, 3, 4, 5}}
 	largeHistogram := &histogram{upperBounds: make([]float64, 50)}
 	for i := range largeHistogram.upperBounds {
 		largeHistogram.upperBounds[i] = float64(i)
 	}
 	tests := []struct {
 		h        *histogram
 		v        float64
 		expected int
 	}{
 		{smallHistogram, -1, 0},
 		{smallHistogram, 0.5, 0},
 		{smallHistogram, 2.5, 2},
 		{smallHistogram, 5.5, 5},
 		{largeHistogram, -1, 0},
 		{largeHistogram, 25.5, 26},
 		{largeHistogram, 49.5, 50},
 		{largeHistogram, 50.5, 50},
 		{largeHistogram, 5000.5, 50},
 	}
 	for _, tt := range tests {
 		result := tt.h.findBucket(tt.v)
 		if result != tt.expected {
 			t.Errorf("findBucket(%v) = %d; expected %d", tt.v, result, tt.expected)
 		}
 	}
 }