Save memory on histograms: slightly faster with less code (#536)

Use the highest bit for buffer switch tracking in histograms

Signed-off-by: Pascal S. de Kloe <pascal@quies.net>
This commit is contained in:
Pascal S. de Kloe 2019-02-11 18:29:02 +01:00 committed by Björn Rabenstein
parent df7545b48d
commit 19ff277483
1 changed files with 43 additions and 75 deletions

View File

@ -224,33 +224,34 @@ type histogramCounts struct {
}
type histogram struct {
// countAndHotIdx is a complicated one. For lock-free yet atomic
// observations, we need to save the total count of observations again,
// combined with the index of the currently-hot counts struct, so that
// we can perform the operation on both values atomically. The least
// significant bit defines the hot counts struct. The remaining 63 bits
// represent the total count of observations. This happens under the
// assumption that the 63bit count will never overflow. Rationale: An
// observations takes about 30ns. Let's assume it could happen in
// 10ns. Overflowing the counter will then take at least (2^63)*10ns,
// which is about 3000 years.
// CountAndHotIdx enables lock-free writes with use of atomic updates.
// The most significant bit is the hot index [0 or 1] of the count field
// below. Writes update the hot one. All remaining bits count the number
// of writes initiated. Write transactions start by incrementing this
// counter, and finish by incrementing the count field in the respective
// histogramCounts, as a marker for completion.
//
// This has to be first in the struct for 64bit alignment. See
// Reads swap the hotcold in a switchMutex lock. A cooldown is awaited
// (in such lock) by comparing the number of writes with the initiation
// count. Once they match, then the last write transaction on the now
// cool one completed. All cool fields must be merged into the new hot
// before the unlock of switchMutex.
//
// Fields with atomic access first! See alignment constraint:
// http://golang.org/pkg/sync/atomic/#pkg-note-BUG
countAndHotIdx uint64
selfCollector
desc *Desc
writeMtx sync.Mutex // Only used in the Write method.
upperBounds []float64
// Two counts, one is "hot" for lock-free observations, the other is
// "cold" for writing out a dto.Metric. It has to be an array of
// pointers to guarantee 64bit alignment of the histogramCounts, see
// Counts has to be an array of pointers to guarantee 64bit alignment of
// the histogramCounts, see
// http://golang.org/pkg/sync/atomic/#pkg-note-BUG.
counts [2]*histogramCounts
hotIdx int // Index of currently-hot counts. Only used within Write.
switchMtx sync.Mutex
selfCollector
desc *Desc
upperBounds []float64
labelPairs []*dto.LabelPair
}
@ -271,11 +272,11 @@ func (h *histogram) Observe(v float64) {
// 300 buckets: 154 ns/op linear - binary 61.6 ns/op
i := sort.SearchFloat64s(h.upperBounds, v)
// We increment h.countAndHotIdx by 2 so that the counter in the upper
// 63 bits gets incremented by 1. At the same time, we get the new value
// We increment h.countAndHotIdx so that the counter in the lower
// 63 bits gets incremented. At the same time, we get the new value
// back, which we can use to find the currently-hot counts.
n := atomic.AddUint64(&h.countAndHotIdx, 2)
hotCounts := h.counts[n%2]
n := atomic.AddUint64(&h.countAndHotIdx, 1)
hotCounts := h.counts[n>>63]
if i < len(h.upperBounds) {
atomic.AddUint64(&hotCounts.buckets[i], 1)
@ -293,72 +294,39 @@ func (h *histogram) Observe(v float64) {
}
func (h *histogram) Write(out *dto.Metric) error {
var (
his = &dto.Histogram{}
buckets = make([]*dto.Bucket, len(h.upperBounds))
hotCounts, coldCounts *histogramCounts
count uint64
)
// For simplicity, we mutex the rest of this method. It is not in the
// hot path, i.e. Observe is called much more often than Write. The
// complication of making Write lock-free isn't worth it.
h.writeMtx.Lock()
defer h.writeMtx.Unlock()
h.switchMtx.Lock()
defer h.switchMtx.Unlock()
// This is a bit arcane, which is why the following spells out this if
// clause in English:
//
// If the currently-hot counts struct is #0, we atomically increment
// h.countAndHotIdx by 1 so that from now on Observe will use the counts
// struct #1. Furthermore, the atomic increment gives us the new value,
// which, in its most significant 63 bits, tells us the count of
// observations done so far up to and including currently ongoing
// observations still using the counts struct just changed from hot to
// cold. To have a normal uint64 for the count, we bitshift by 1 and
// save the result in count. We also set h.hotIdx to 1 for the next
// Write call, and we will refer to counts #1 as hotCounts and to counts
// #0 as coldCounts.
//
// If the currently-hot counts struct is #1, we do the corresponding
// things the other way round. We have to _decrement_ h.countAndHotIdx
// (which is a bit arcane in itself, as we have to express -1 with an
// unsigned int...).
if h.hotIdx == 0 {
count = atomic.AddUint64(&h.countAndHotIdx, 1) >> 1
h.hotIdx = 1
hotCounts = h.counts[1]
coldCounts = h.counts[0]
} else {
count = atomic.AddUint64(&h.countAndHotIdx, ^uint64(0)) >> 1 // Decrement.
h.hotIdx = 0
hotCounts = h.counts[0]
coldCounts = h.counts[1]
}
// Adding 1<<63 switches the hot index (from 0 to 1 or from 1 to 0)
// without touching the count bits. See the struct comments for a full
// description of the algorithm.
n := atomic.AddUint64(&h.countAndHotIdx, 1<<63)
count := n & ((1 << 63) - 1)
hotCounts := h.counts[n>>63]
coldCounts := h.counts[(^n)>>63]
// Now we have to wait for the now-declared-cold counts to actually cool
// down, i.e. wait for all observations still using it to finish. That's
// the case once the count in the cold counts struct is the same as the
// one atomically retrieved from the upper 63bits of h.countAndHotIdx.
for {
if count == atomic.LoadUint64(&coldCounts.count) {
break
}
// await cooldown
for count != atomic.LoadUint64(&coldCounts.count) {
runtime.Gosched() // Let observations get work done.
}
his.SampleCount = proto.Uint64(count)
his.SampleSum = proto.Float64(math.Float64frombits(atomic.LoadUint64(&coldCounts.sumBits)))
his := &dto.Histogram{
Bucket: make([]*dto.Bucket, len(h.upperBounds)),
SampleCount: proto.Uint64(count),
SampleSum: proto.Float64(math.Float64frombits(atomic.LoadUint64(&coldCounts.sumBits))),
}
var cumCount uint64
for i, upperBound := range h.upperBounds {
cumCount += atomic.LoadUint64(&coldCounts.buckets[i])
buckets[i] = &dto.Bucket{
his.Bucket[i] = &dto.Bucket{
CumulativeCount: proto.Uint64(cumCount),
UpperBound: proto.Float64(upperBound),
}
}
his.Bucket = buckets
out.Histogram = his
out.Label = h.labelPairs