histograms: Add timer to reset ASAP after bucket limiting has happened (#1367)
Fixes #1248. See issue description for all the details. Signed-off-by: beorn7 <beorn@grafana.com>
This commit is contained in:
parent
c3e797ebb3
commit
cd8cba2ceb
|
@ -475,6 +475,9 @@ type HistogramOpts struct {
|
||||||
|
|
||||||
// now is for testing purposes, by default it's time.Now.
|
// now is for testing purposes, by default it's time.Now.
|
||||||
now func() time.Time
|
now func() time.Time
|
||||||
|
|
||||||
|
// afterFunc is for testing purposes, by default it's time.AfterFunc.
|
||||||
|
afterFunc func(time.Duration, func()) *time.Timer
|
||||||
}
|
}
|
||||||
|
|
||||||
// HistogramVecOpts bundles the options to create a HistogramVec metric.
|
// HistogramVecOpts bundles the options to create a HistogramVec metric.
|
||||||
|
@ -526,7 +529,9 @@ func newHistogram(desc *Desc, opts HistogramOpts, labelValues ...string) Histogr
|
||||||
if opts.now == nil {
|
if opts.now == nil {
|
||||||
opts.now = time.Now
|
opts.now = time.Now
|
||||||
}
|
}
|
||||||
|
if opts.afterFunc == nil {
|
||||||
|
opts.afterFunc = time.AfterFunc
|
||||||
|
}
|
||||||
h := &histogram{
|
h := &histogram{
|
||||||
desc: desc,
|
desc: desc,
|
||||||
upperBounds: opts.Buckets,
|
upperBounds: opts.Buckets,
|
||||||
|
@ -536,6 +541,7 @@ func newHistogram(desc *Desc, opts HistogramOpts, labelValues ...string) Histogr
|
||||||
nativeHistogramMinResetDuration: opts.NativeHistogramMinResetDuration,
|
nativeHistogramMinResetDuration: opts.NativeHistogramMinResetDuration,
|
||||||
lastResetTime: opts.now(),
|
lastResetTime: opts.now(),
|
||||||
now: opts.now,
|
now: opts.now,
|
||||||
|
afterFunc: opts.afterFunc,
|
||||||
}
|
}
|
||||||
if len(h.upperBounds) == 0 && opts.NativeHistogramBucketFactor <= 1 {
|
if len(h.upperBounds) == 0 && opts.NativeHistogramBucketFactor <= 1 {
|
||||||
h.upperBounds = DefBuckets
|
h.upperBounds = DefBuckets
|
||||||
|
@ -716,9 +722,16 @@ type histogram struct {
|
||||||
nativeHistogramMinResetDuration time.Duration
|
nativeHistogramMinResetDuration time.Duration
|
||||||
// lastResetTime is protected by mtx. It is also used as created timestamp.
|
// lastResetTime is protected by mtx. It is also used as created timestamp.
|
||||||
lastResetTime time.Time
|
lastResetTime time.Time
|
||||||
|
// resetScheduled is protected by mtx. It is true if a reset is
|
||||||
|
// scheduled for a later time (when nativeHistogramMinResetDuration has
|
||||||
|
// passed).
|
||||||
|
resetScheduled bool
|
||||||
|
|
||||||
// now is for testing purposes, by default it's time.Now.
|
// now is for testing purposes, by default it's time.Now.
|
||||||
now func() time.Time
|
now func() time.Time
|
||||||
|
|
||||||
|
// afterFunc is for testing purposes, by default it's time.AfterFunc.
|
||||||
|
afterFunc func(time.Duration, func()) *time.Timer
|
||||||
}
|
}
|
||||||
|
|
||||||
func (h *histogram) Desc() *Desc {
|
func (h *histogram) Desc() *Desc {
|
||||||
|
@ -874,21 +887,31 @@ func (h *histogram) limitBuckets(counts *histogramCounts, value float64, bucket
|
||||||
if h.maybeReset(hotCounts, coldCounts, coldIdx, value, bucket) {
|
if h.maybeReset(hotCounts, coldCounts, coldIdx, value, bucket) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
// One of the other strategies will happen. To undo what they will do as
|
||||||
|
// soon as enough time has passed to satisfy
|
||||||
|
// h.nativeHistogramMinResetDuration, schedule a reset at the right time
|
||||||
|
// if we haven't done so already.
|
||||||
|
if h.nativeHistogramMinResetDuration > 0 && !h.resetScheduled {
|
||||||
|
h.resetScheduled = true
|
||||||
|
h.afterFunc(h.nativeHistogramMinResetDuration-h.now().Sub(h.lastResetTime), h.reset)
|
||||||
|
}
|
||||||
|
|
||||||
if h.maybeWidenZeroBucket(hotCounts, coldCounts) {
|
if h.maybeWidenZeroBucket(hotCounts, coldCounts) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
h.doubleBucketWidth(hotCounts, coldCounts)
|
h.doubleBucketWidth(hotCounts, coldCounts)
|
||||||
}
|
}
|
||||||
|
|
||||||
// maybeReset resets the whole histogram if at least h.nativeHistogramMinResetDuration
|
// maybeReset resets the whole histogram if at least
|
||||||
// has been passed. It returns true if the histogram has been reset. The caller
|
// h.nativeHistogramMinResetDuration has been passed. It returns true if the
|
||||||
// must have locked h.mtx.
|
// histogram has been reset. The caller must have locked h.mtx.
|
||||||
func (h *histogram) maybeReset(
|
func (h *histogram) maybeReset(
|
||||||
hot, cold *histogramCounts, coldIdx uint64, value float64, bucket int,
|
hot, cold *histogramCounts, coldIdx uint64, value float64, bucket int,
|
||||||
) bool {
|
) bool {
|
||||||
// We are using the possibly mocked h.now() rather than
|
// We are using the possibly mocked h.now() rather than
|
||||||
// time.Since(h.lastResetTime) to enable testing.
|
// time.Since(h.lastResetTime) to enable testing.
|
||||||
if h.nativeHistogramMinResetDuration == 0 ||
|
if h.nativeHistogramMinResetDuration == 0 || // No reset configured.
|
||||||
|
h.resetScheduled || // Do not interefere if a reset is already scheduled.
|
||||||
h.now().Sub(h.lastResetTime) < h.nativeHistogramMinResetDuration {
|
h.now().Sub(h.lastResetTime) < h.nativeHistogramMinResetDuration {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
@ -906,6 +929,29 @@ func (h *histogram) maybeReset(
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// reset resets the whole histogram. It locks h.mtx itself, i.e. it has to be
|
||||||
|
// called without having locked h.mtx.
|
||||||
|
func (h *histogram) reset() {
|
||||||
|
h.mtx.Lock()
|
||||||
|
defer h.mtx.Unlock()
|
||||||
|
|
||||||
|
n := atomic.LoadUint64(&h.countAndHotIdx)
|
||||||
|
hotIdx := n >> 63
|
||||||
|
coldIdx := (^n) >> 63
|
||||||
|
hot := h.counts[hotIdx]
|
||||||
|
cold := h.counts[coldIdx]
|
||||||
|
// Completely reset coldCounts.
|
||||||
|
h.resetCounts(cold)
|
||||||
|
// Make coldCounts the new hot counts while resetting countAndHotIdx.
|
||||||
|
n = atomic.SwapUint64(&h.countAndHotIdx, coldIdx<<63)
|
||||||
|
count := n & ((1 << 63) - 1)
|
||||||
|
waitForCooldown(count, hot)
|
||||||
|
// Finally, reset the formerly hot counts, too.
|
||||||
|
h.resetCounts(hot)
|
||||||
|
h.lastResetTime = h.now()
|
||||||
|
h.resetScheduled = false
|
||||||
|
}
|
||||||
|
|
||||||
// maybeWidenZeroBucket widens the zero bucket until it includes the existing
|
// maybeWidenZeroBucket widens the zero bucket until it includes the existing
|
||||||
// buckets closest to the zero bucket (which could be two, if an equidistant
|
// buckets closest to the zero bucket (which could be two, if an equidistant
|
||||||
// negative and a positive bucket exists, but usually it's only one bucket to be
|
// negative and a positive bucket exists, but usually it's only one bucket to be
|
||||||
|
|
|
@ -925,16 +925,16 @@ func TestNativeHistogram(t *testing.T) {
|
||||||
maxBuckets: 4,
|
maxBuckets: 4,
|
||||||
minResetDuration: 9 * time.Minute,
|
minResetDuration: 9 * time.Minute,
|
||||||
want: &dto.Histogram{
|
want: &dto.Histogram{
|
||||||
SampleCount: proto.Uint64(2),
|
SampleCount: proto.Uint64(3),
|
||||||
SampleSum: proto.Float64(7),
|
SampleSum: proto.Float64(12.1),
|
||||||
Schema: proto.Int32(2),
|
Schema: proto.Int32(2),
|
||||||
ZeroThreshold: proto.Float64(2.938735877055719e-39),
|
ZeroThreshold: proto.Float64(2.938735877055719e-39),
|
||||||
ZeroCount: proto.Uint64(0),
|
ZeroCount: proto.Uint64(0),
|
||||||
PositiveSpan: []*dto.BucketSpan{
|
PositiveSpan: []*dto.BucketSpan{
|
||||||
{Offset: proto.Int32(7), Length: proto.Uint32(2)},
|
{Offset: proto.Int32(7), Length: proto.Uint32(4)},
|
||||||
},
|
},
|
||||||
PositiveDelta: []int64{1, 0},
|
PositiveDelta: []int64{1, 0, -1, 1},
|
||||||
CreatedTimestamp: timestamppb.New(now.Add(10 * time.Minute)), // We expect reset to happen after 9 minutes.
|
CreatedTimestamp: timestamppb.New(now.Add(9 * time.Minute)), // We expect reset to happen after 8 minutes.
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -945,23 +945,27 @@ func TestNativeHistogram(t *testing.T) {
|
||||||
maxZeroThreshold: 1.2,
|
maxZeroThreshold: 1.2,
|
||||||
minResetDuration: 9 * time.Minute,
|
minResetDuration: 9 * time.Minute,
|
||||||
want: &dto.Histogram{
|
want: &dto.Histogram{
|
||||||
SampleCount: proto.Uint64(2),
|
SampleCount: proto.Uint64(3),
|
||||||
SampleSum: proto.Float64(7),
|
SampleSum: proto.Float64(12.1),
|
||||||
Schema: proto.Int32(2),
|
Schema: proto.Int32(2),
|
||||||
ZeroThreshold: proto.Float64(2.938735877055719e-39),
|
ZeroThreshold: proto.Float64(2.938735877055719e-39),
|
||||||
ZeroCount: proto.Uint64(0),
|
ZeroCount: proto.Uint64(0),
|
||||||
PositiveSpan: []*dto.BucketSpan{
|
PositiveSpan: []*dto.BucketSpan{
|
||||||
{Offset: proto.Int32(7), Length: proto.Uint32(2)},
|
{Offset: proto.Int32(7), Length: proto.Uint32(4)},
|
||||||
},
|
},
|
||||||
PositiveDelta: []int64{1, 0},
|
PositiveDelta: []int64{1, 0, -1, 1},
|
||||||
CreatedTimestamp: timestamppb.New(now.Add(10 * time.Minute)), // We expect reset to happen after 9 minutes.
|
CreatedTimestamp: timestamppb.New(now.Add(9 * time.Minute)), // We expect reset to happen after 8 minutes.
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, s := range scenarios {
|
for _, s := range scenarios {
|
||||||
t.Run(s.name, func(t *testing.T) {
|
t.Run(s.name, func(t *testing.T) {
|
||||||
ts := now
|
var (
|
||||||
|
ts = now
|
||||||
|
funcToCall func()
|
||||||
|
whenToCall time.Duration
|
||||||
|
)
|
||||||
|
|
||||||
his := NewHistogram(HistogramOpts{
|
his := NewHistogram(HistogramOpts{
|
||||||
Name: "name",
|
Name: "name",
|
||||||
|
@ -972,12 +976,22 @@ func TestNativeHistogram(t *testing.T) {
|
||||||
NativeHistogramMinResetDuration: s.minResetDuration,
|
NativeHistogramMinResetDuration: s.minResetDuration,
|
||||||
NativeHistogramMaxZeroThreshold: s.maxZeroThreshold,
|
NativeHistogramMaxZeroThreshold: s.maxZeroThreshold,
|
||||||
now: func() time.Time { return ts },
|
now: func() time.Time { return ts },
|
||||||
|
afterFunc: func(d time.Duration, f func()) *time.Timer {
|
||||||
|
funcToCall = f
|
||||||
|
whenToCall = d
|
||||||
|
return nil
|
||||||
|
},
|
||||||
})
|
})
|
||||||
|
|
||||||
ts = ts.Add(time.Minute)
|
ts = ts.Add(time.Minute)
|
||||||
for _, o := range s.observations {
|
for _, o := range s.observations {
|
||||||
his.Observe(o)
|
his.Observe(o)
|
||||||
ts = ts.Add(time.Minute)
|
ts = ts.Add(time.Minute)
|
||||||
|
whenToCall -= time.Minute
|
||||||
|
if funcToCall != nil && whenToCall <= 0 {
|
||||||
|
funcToCall()
|
||||||
|
funcToCall = nil
|
||||||
|
}
|
||||||
}
|
}
|
||||||
m := &dto.Metric{}
|
m := &dto.Metric{}
|
||||||
if err := his.Write(m); err != nil {
|
if err := his.Write(m); err != nil {
|
||||||
|
|
Loading…
Reference in New Issue