Merge pull request #1673 from imorph/faster_find_bucket
PERF: faster algorithm to discover bucket of an histogram observation
This commit is contained in:
commit
2b11a4ba39
|
@ -858,15 +858,35 @@ func (h *histogram) Write(out *dto.Metric) error {
|
||||||
// findBucket returns the index of the bucket for the provided value, or
|
// findBucket returns the index of the bucket for the provided value, or
|
||||||
// len(h.upperBounds) for the +Inf bucket.
|
// len(h.upperBounds) for the +Inf bucket.
|
||||||
func (h *histogram) findBucket(v float64) int {
|
func (h *histogram) findBucket(v float64) int {
|
||||||
// TODO(beorn7): For small numbers of buckets (<30), a linear search is
|
n := len(h.upperBounds)
|
||||||
// slightly faster than the binary search. If we really care, we could
|
if n == 0 {
|
||||||
// switch from one search strategy to the other depending on the number
|
return 0
|
||||||
// of buckets.
|
}
|
||||||
//
|
|
||||||
// Microbenchmarks (BenchmarkHistogramNoLabels):
|
// Early exit: if v is less than or equal to the first upper bound, return 0
|
||||||
// 11 buckets: 38.3 ns/op linear - binary 48.7 ns/op
|
if v <= h.upperBounds[0] {
|
||||||
// 100 buckets: 78.1 ns/op linear - binary 54.9 ns/op
|
return 0
|
||||||
// 300 buckets: 154 ns/op linear - binary 61.6 ns/op
|
}
|
||||||
|
|
||||||
|
// Early exit: if v is greater than the last upper bound, return len(h.upperBounds)
|
||||||
|
if v > h.upperBounds[n-1] {
|
||||||
|
return n
|
||||||
|
}
|
||||||
|
|
||||||
|
// For small arrays, use simple linear search
|
||||||
|
// "magic number" 35 is result of tests on couple different (AWS and baremetal) servers
|
||||||
|
// see more details here: https://github.com/prometheus/client_golang/pull/1662
|
||||||
|
if n < 35 {
|
||||||
|
for i, bound := range h.upperBounds {
|
||||||
|
if v <= bound {
|
||||||
|
return i
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// If v is greater than all upper bounds, return len(h.upperBounds)
|
||||||
|
return n
|
||||||
|
}
|
||||||
|
|
||||||
|
// For larger arrays, use stdlib's binary search
|
||||||
return sort.SearchFloat64s(h.upperBounds, v)
|
return sort.SearchFloat64s(h.upperBounds, v)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1455,3 +1455,91 @@ func compareNativeExemplarValues(t *testing.T, exps []*dto.Exemplar, values []fl
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var resultFindBucket int
|
||||||
|
|
||||||
|
func benchmarkFindBucket(b *testing.B, l int) {
|
||||||
|
h := &histogram{upperBounds: make([]float64, l)}
|
||||||
|
for i := range h.upperBounds {
|
||||||
|
h.upperBounds[i] = float64(i)
|
||||||
|
}
|
||||||
|
v := float64(l / 2)
|
||||||
|
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
resultFindBucket = h.findBucket(v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkFindBucketShort(b *testing.B) {
|
||||||
|
benchmarkFindBucket(b, 20)
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkFindBucketMid(b *testing.B) {
|
||||||
|
benchmarkFindBucket(b, 40)
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkFindBucketLarge(b *testing.B) {
|
||||||
|
benchmarkFindBucket(b, 100)
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkFindBucketHuge(b *testing.B) {
|
||||||
|
benchmarkFindBucket(b, 500)
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkFindBucketInf(b *testing.B) {
|
||||||
|
h := &histogram{upperBounds: make([]float64, 500)}
|
||||||
|
for i := range h.upperBounds {
|
||||||
|
h.upperBounds[i] = float64(i)
|
||||||
|
}
|
||||||
|
v := 1000.5
|
||||||
|
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
resultFindBucket = h.findBucket(v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkFindBucketLow(b *testing.B) {
|
||||||
|
h := &histogram{upperBounds: make([]float64, 500)}
|
||||||
|
for i := range h.upperBounds {
|
||||||
|
h.upperBounds[i] = float64(i)
|
||||||
|
}
|
||||||
|
v := -1.1
|
||||||
|
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
resultFindBucket = h.findBucket(v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFindBucket(t *testing.T) {
|
||||||
|
smallHistogram := &histogram{upperBounds: []float64{1, 2, 3, 4, 5}}
|
||||||
|
largeHistogram := &histogram{upperBounds: make([]float64, 50)}
|
||||||
|
for i := range largeHistogram.upperBounds {
|
||||||
|
largeHistogram.upperBounds[i] = float64(i)
|
||||||
|
}
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
h *histogram
|
||||||
|
v float64
|
||||||
|
expected int
|
||||||
|
}{
|
||||||
|
{smallHistogram, -1, 0},
|
||||||
|
{smallHistogram, 0.5, 0},
|
||||||
|
{smallHistogram, 2.5, 2},
|
||||||
|
{smallHistogram, 5.5, 5},
|
||||||
|
{largeHistogram, -1, 0},
|
||||||
|
{largeHistogram, 25.5, 26},
|
||||||
|
{largeHistogram, 49.5, 50},
|
||||||
|
{largeHistogram, 50.5, 50},
|
||||||
|
{largeHistogram, 5000.5, 50},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
result := tt.h.findBucket(tt.v)
|
||||||
|
if result != tt.expected {
|
||||||
|
t.Errorf("findBucket(%v) = %d; expected %d", tt.v, result, tt.expected)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue