Save memory on histograms: slightly faster with less code (#536)
Use the highest bit for buffer switch tracking in histograms Signed-off-by: Pascal S. de Kloe <pascal@quies.net>
This commit is contained in:
parent
df7545b48d
commit
19ff277483
|
@ -224,33 +224,34 @@ type histogramCounts struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
type histogram struct {
|
type histogram struct {
|
||||||
// countAndHotIdx is a complicated one. For lock-free yet atomic
|
// CountAndHotIdx enables lock-free writes with use of atomic updates.
|
||||||
// observations, we need to save the total count of observations again,
|
// The most significant bit is the hot index [0 or 1] of the count field
|
||||||
// combined with the index of the currently-hot counts struct, so that
|
// below. Writes update the hot one. All remaining bits count the number
|
||||||
// we can perform the operation on both values atomically. The least
|
// of writes initiated. Write transactions start by incrementing this
|
||||||
// significant bit defines the hot counts struct. The remaining 63 bits
|
// counter, and finish by incrementing the count field in the respective
|
||||||
// represent the total count of observations. This happens under the
|
// histogramCounts, as a marker for completion.
|
||||||
// assumption that the 63bit count will never overflow. Rationale: An
|
|
||||||
// observations takes about 30ns. Let's assume it could happen in
|
|
||||||
// 10ns. Overflowing the counter will then take at least (2^63)*10ns,
|
|
||||||
// which is about 3000 years.
|
|
||||||
//
|
//
|
||||||
// This has to be first in the struct for 64bit alignment. See
|
// Reads swap the hot–cold in a switchMutex lock. A cooldown is awaited
|
||||||
|
// (in such lock) by comparing the number of writes with the initiation
|
||||||
|
// count. Once they match, then the last write transaction on the now
|
||||||
|
// cool one completed. All cool fields must be merged into the new hot
|
||||||
|
// before the unlock of switchMutex.
|
||||||
|
//
|
||||||
|
// Fields with atomic access first! See alignment constraint:
|
||||||
// http://golang.org/pkg/sync/atomic/#pkg-note-BUG
|
// http://golang.org/pkg/sync/atomic/#pkg-note-BUG
|
||||||
countAndHotIdx uint64
|
countAndHotIdx uint64
|
||||||
|
|
||||||
selfCollector
|
// Counts has to be an array of pointers to guarantee 64bit alignment of
|
||||||
desc *Desc
|
// the histogramCounts, see
|
||||||
writeMtx sync.Mutex // Only used in the Write method.
|
|
||||||
|
|
||||||
upperBounds []float64
|
|
||||||
|
|
||||||
// Two counts, one is "hot" for lock-free observations, the other is
|
|
||||||
// "cold" for writing out a dto.Metric. It has to be an array of
|
|
||||||
// pointers to guarantee 64bit alignment of the histogramCounts, see
|
|
||||||
// http://golang.org/pkg/sync/atomic/#pkg-note-BUG.
|
// http://golang.org/pkg/sync/atomic/#pkg-note-BUG.
|
||||||
counts [2]*histogramCounts
|
counts [2]*histogramCounts
|
||||||
hotIdx int // Index of currently-hot counts. Only used within Write.
|
|
||||||
|
switchMtx sync.Mutex
|
||||||
|
|
||||||
|
selfCollector
|
||||||
|
desc *Desc
|
||||||
|
|
||||||
|
upperBounds []float64
|
||||||
|
|
||||||
labelPairs []*dto.LabelPair
|
labelPairs []*dto.LabelPair
|
||||||
}
|
}
|
||||||
|
@ -271,11 +272,11 @@ func (h *histogram) Observe(v float64) {
|
||||||
// 300 buckets: 154 ns/op linear - binary 61.6 ns/op
|
// 300 buckets: 154 ns/op linear - binary 61.6 ns/op
|
||||||
i := sort.SearchFloat64s(h.upperBounds, v)
|
i := sort.SearchFloat64s(h.upperBounds, v)
|
||||||
|
|
||||||
// We increment h.countAndHotIdx by 2 so that the counter in the upper
|
// We increment h.countAndHotIdx so that the counter in the lower
|
||||||
// 63 bits gets incremented by 1. At the same time, we get the new value
|
// 63 bits gets incremented. At the same time, we get the new value
|
||||||
// back, which we can use to find the currently-hot counts.
|
// back, which we can use to find the currently-hot counts.
|
||||||
n := atomic.AddUint64(&h.countAndHotIdx, 2)
|
n := atomic.AddUint64(&h.countAndHotIdx, 1)
|
||||||
hotCounts := h.counts[n%2]
|
hotCounts := h.counts[n>>63]
|
||||||
|
|
||||||
if i < len(h.upperBounds) {
|
if i < len(h.upperBounds) {
|
||||||
atomic.AddUint64(&hotCounts.buckets[i], 1)
|
atomic.AddUint64(&hotCounts.buckets[i], 1)
|
||||||
|
@ -293,72 +294,39 @@ func (h *histogram) Observe(v float64) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (h *histogram) Write(out *dto.Metric) error {
|
func (h *histogram) Write(out *dto.Metric) error {
|
||||||
var (
|
|
||||||
his = &dto.Histogram{}
|
|
||||||
buckets = make([]*dto.Bucket, len(h.upperBounds))
|
|
||||||
hotCounts, coldCounts *histogramCounts
|
|
||||||
count uint64
|
|
||||||
)
|
|
||||||
|
|
||||||
// For simplicity, we mutex the rest of this method. It is not in the
|
// For simplicity, we mutex the rest of this method. It is not in the
|
||||||
// hot path, i.e. Observe is called much more often than Write. The
|
// hot path, i.e. Observe is called much more often than Write. The
|
||||||
// complication of making Write lock-free isn't worth it.
|
// complication of making Write lock-free isn't worth it.
|
||||||
h.writeMtx.Lock()
|
h.switchMtx.Lock()
|
||||||
defer h.writeMtx.Unlock()
|
defer h.switchMtx.Unlock()
|
||||||
|
|
||||||
// This is a bit arcane, which is why the following spells out this if
|
// Adding 1<<63 switches the hot index (from 0 to 1 or from 1 to 0)
|
||||||
// clause in English:
|
// without touching the count bits. See the struct comments for a full
|
||||||
//
|
// description of the algorithm.
|
||||||
// If the currently-hot counts struct is #0, we atomically increment
|
n := atomic.AddUint64(&h.countAndHotIdx, 1<<63)
|
||||||
// h.countAndHotIdx by 1 so that from now on Observe will use the counts
|
count := n & ((1 << 63) - 1)
|
||||||
// struct #1. Furthermore, the atomic increment gives us the new value,
|
hotCounts := h.counts[n>>63]
|
||||||
// which, in its most significant 63 bits, tells us the count of
|
coldCounts := h.counts[(^n)>>63]
|
||||||
// observations done so far up to and including currently ongoing
|
|
||||||
// observations still using the counts struct just changed from hot to
|
|
||||||
// cold. To have a normal uint64 for the count, we bitshift by 1 and
|
|
||||||
// save the result in count. We also set h.hotIdx to 1 for the next
|
|
||||||
// Write call, and we will refer to counts #1 as hotCounts and to counts
|
|
||||||
// #0 as coldCounts.
|
|
||||||
//
|
|
||||||
// If the currently-hot counts struct is #1, we do the corresponding
|
|
||||||
// things the other way round. We have to _decrement_ h.countAndHotIdx
|
|
||||||
// (which is a bit arcane in itself, as we have to express -1 with an
|
|
||||||
// unsigned int...).
|
|
||||||
if h.hotIdx == 0 {
|
|
||||||
count = atomic.AddUint64(&h.countAndHotIdx, 1) >> 1
|
|
||||||
h.hotIdx = 1
|
|
||||||
hotCounts = h.counts[1]
|
|
||||||
coldCounts = h.counts[0]
|
|
||||||
} else {
|
|
||||||
count = atomic.AddUint64(&h.countAndHotIdx, ^uint64(0)) >> 1 // Decrement.
|
|
||||||
h.hotIdx = 0
|
|
||||||
hotCounts = h.counts[0]
|
|
||||||
coldCounts = h.counts[1]
|
|
||||||
}
|
|
||||||
|
|
||||||
// Now we have to wait for the now-declared-cold counts to actually cool
|
// await cooldown
|
||||||
// down, i.e. wait for all observations still using it to finish. That's
|
for count != atomic.LoadUint64(&coldCounts.count) {
|
||||||
// the case once the count in the cold counts struct is the same as the
|
|
||||||
// one atomically retrieved from the upper 63bits of h.countAndHotIdx.
|
|
||||||
for {
|
|
||||||
if count == atomic.LoadUint64(&coldCounts.count) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
runtime.Gosched() // Let observations get work done.
|
runtime.Gosched() // Let observations get work done.
|
||||||
}
|
}
|
||||||
|
|
||||||
his.SampleCount = proto.Uint64(count)
|
his := &dto.Histogram{
|
||||||
his.SampleSum = proto.Float64(math.Float64frombits(atomic.LoadUint64(&coldCounts.sumBits)))
|
Bucket: make([]*dto.Bucket, len(h.upperBounds)),
|
||||||
|
SampleCount: proto.Uint64(count),
|
||||||
|
SampleSum: proto.Float64(math.Float64frombits(atomic.LoadUint64(&coldCounts.sumBits))),
|
||||||
|
}
|
||||||
var cumCount uint64
|
var cumCount uint64
|
||||||
for i, upperBound := range h.upperBounds {
|
for i, upperBound := range h.upperBounds {
|
||||||
cumCount += atomic.LoadUint64(&coldCounts.buckets[i])
|
cumCount += atomic.LoadUint64(&coldCounts.buckets[i])
|
||||||
buckets[i] = &dto.Bucket{
|
his.Bucket[i] = &dto.Bucket{
|
||||||
CumulativeCount: proto.Uint64(cumCount),
|
CumulativeCount: proto.Uint64(cumCount),
|
||||||
UpperBound: proto.Float64(upperBound),
|
UpperBound: proto.Float64(upperBound),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
his.Bucket = buckets
|
|
||||||
out.Histogram = his
|
out.Histogram = his
|
||||||
out.Label = h.labelPairs
|
out.Label = h.labelPairs
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue