Save memory on histograms: slightly faster with less code (#536)
Use the highest bit for buffer switch tracking in histograms Signed-off-by: Pascal S. de Kloe <pascal@quies.net>
This commit is contained in:
parent
df7545b48d
commit
19ff277483
|
@ -224,33 +224,34 @@ type histogramCounts struct {
|
|||
}
|
||||
|
||||
type histogram struct {
|
||||
// countAndHotIdx is a complicated one. For lock-free yet atomic
|
||||
// observations, we need to save the total count of observations again,
|
||||
// combined with the index of the currently-hot counts struct, so that
|
||||
// we can perform the operation on both values atomically. The least
|
||||
// significant bit defines the hot counts struct. The remaining 63 bits
|
||||
// represent the total count of observations. This happens under the
|
||||
// assumption that the 63bit count will never overflow. Rationale: An
|
||||
// observations takes about 30ns. Let's assume it could happen in
|
||||
// 10ns. Overflowing the counter will then take at least (2^63)*10ns,
|
||||
// which is about 3000 years.
|
||||
// CountAndHotIdx enables lock-free writes with use of atomic updates.
|
||||
// The most significant bit is the hot index [0 or 1] of the count field
|
||||
// below. Writes update the hot one. All remaining bits count the number
|
||||
// of writes initiated. Write transactions start by incrementing this
|
||||
// counter, and finish by incrementing the count field in the respective
|
||||
// histogramCounts, as a marker for completion.
|
||||
//
|
||||
// This has to be first in the struct for 64bit alignment. See
|
||||
// Reads swap the hot–cold in a switchMutex lock. A cooldown is awaited
|
||||
// (in such lock) by comparing the number of writes with the initiation
|
||||
// count. Once they match, then the last write transaction on the now
|
||||
// cool one completed. All cool fields must be merged into the new hot
|
||||
// before the unlock of switchMutex.
|
||||
//
|
||||
// Fields with atomic access first! See alignment constraint:
|
||||
// http://golang.org/pkg/sync/atomic/#pkg-note-BUG
|
||||
countAndHotIdx uint64
|
||||
|
||||
selfCollector
|
||||
desc *Desc
|
||||
writeMtx sync.Mutex // Only used in the Write method.
|
||||
|
||||
upperBounds []float64
|
||||
|
||||
// Two counts, one is "hot" for lock-free observations, the other is
|
||||
// "cold" for writing out a dto.Metric. It has to be an array of
|
||||
// pointers to guarantee 64bit alignment of the histogramCounts, see
|
||||
// Counts has to be an array of pointers to guarantee 64bit alignment of
|
||||
// the histogramCounts, see
|
||||
// http://golang.org/pkg/sync/atomic/#pkg-note-BUG.
|
||||
counts [2]*histogramCounts
|
||||
hotIdx int // Index of currently-hot counts. Only used within Write.
|
||||
|
||||
switchMtx sync.Mutex
|
||||
|
||||
selfCollector
|
||||
desc *Desc
|
||||
|
||||
upperBounds []float64
|
||||
|
||||
labelPairs []*dto.LabelPair
|
||||
}
|
||||
|
@ -271,11 +272,11 @@ func (h *histogram) Observe(v float64) {
|
|||
// 300 buckets: 154 ns/op linear - binary 61.6 ns/op
|
||||
i := sort.SearchFloat64s(h.upperBounds, v)
|
||||
|
||||
// We increment h.countAndHotIdx by 2 so that the counter in the upper
|
||||
// 63 bits gets incremented by 1. At the same time, we get the new value
|
||||
// We increment h.countAndHotIdx so that the counter in the lower
|
||||
// 63 bits gets incremented. At the same time, we get the new value
|
||||
// back, which we can use to find the currently-hot counts.
|
||||
n := atomic.AddUint64(&h.countAndHotIdx, 2)
|
||||
hotCounts := h.counts[n%2]
|
||||
n := atomic.AddUint64(&h.countAndHotIdx, 1)
|
||||
hotCounts := h.counts[n>>63]
|
||||
|
||||
if i < len(h.upperBounds) {
|
||||
atomic.AddUint64(&hotCounts.buckets[i], 1)
|
||||
|
@ -293,72 +294,39 @@ func (h *histogram) Observe(v float64) {
|
|||
}
|
||||
|
||||
func (h *histogram) Write(out *dto.Metric) error {
|
||||
var (
|
||||
his = &dto.Histogram{}
|
||||
buckets = make([]*dto.Bucket, len(h.upperBounds))
|
||||
hotCounts, coldCounts *histogramCounts
|
||||
count uint64
|
||||
)
|
||||
|
||||
// For simplicity, we mutex the rest of this method. It is not in the
|
||||
// hot path, i.e. Observe is called much more often than Write. The
|
||||
// complication of making Write lock-free isn't worth it.
|
||||
h.writeMtx.Lock()
|
||||
defer h.writeMtx.Unlock()
|
||||
h.switchMtx.Lock()
|
||||
defer h.switchMtx.Unlock()
|
||||
|
||||
// This is a bit arcane, which is why the following spells out this if
|
||||
// clause in English:
|
||||
//
|
||||
// If the currently-hot counts struct is #0, we atomically increment
|
||||
// h.countAndHotIdx by 1 so that from now on Observe will use the counts
|
||||
// struct #1. Furthermore, the atomic increment gives us the new value,
|
||||
// which, in its most significant 63 bits, tells us the count of
|
||||
// observations done so far up to and including currently ongoing
|
||||
// observations still using the counts struct just changed from hot to
|
||||
// cold. To have a normal uint64 for the count, we bitshift by 1 and
|
||||
// save the result in count. We also set h.hotIdx to 1 for the next
|
||||
// Write call, and we will refer to counts #1 as hotCounts and to counts
|
||||
// #0 as coldCounts.
|
||||
//
|
||||
// If the currently-hot counts struct is #1, we do the corresponding
|
||||
// things the other way round. We have to _decrement_ h.countAndHotIdx
|
||||
// (which is a bit arcane in itself, as we have to express -1 with an
|
||||
// unsigned int...).
|
||||
if h.hotIdx == 0 {
|
||||
count = atomic.AddUint64(&h.countAndHotIdx, 1) >> 1
|
||||
h.hotIdx = 1
|
||||
hotCounts = h.counts[1]
|
||||
coldCounts = h.counts[0]
|
||||
} else {
|
||||
count = atomic.AddUint64(&h.countAndHotIdx, ^uint64(0)) >> 1 // Decrement.
|
||||
h.hotIdx = 0
|
||||
hotCounts = h.counts[0]
|
||||
coldCounts = h.counts[1]
|
||||
}
|
||||
// Adding 1<<63 switches the hot index (from 0 to 1 or from 1 to 0)
|
||||
// without touching the count bits. See the struct comments for a full
|
||||
// description of the algorithm.
|
||||
n := atomic.AddUint64(&h.countAndHotIdx, 1<<63)
|
||||
count := n & ((1 << 63) - 1)
|
||||
hotCounts := h.counts[n>>63]
|
||||
coldCounts := h.counts[(^n)>>63]
|
||||
|
||||
// Now we have to wait for the now-declared-cold counts to actually cool
|
||||
// down, i.e. wait for all observations still using it to finish. That's
|
||||
// the case once the count in the cold counts struct is the same as the
|
||||
// one atomically retrieved from the upper 63bits of h.countAndHotIdx.
|
||||
for {
|
||||
if count == atomic.LoadUint64(&coldCounts.count) {
|
||||
break
|
||||
}
|
||||
// await cooldown
|
||||
for count != atomic.LoadUint64(&coldCounts.count) {
|
||||
runtime.Gosched() // Let observations get work done.
|
||||
}
|
||||
|
||||
his.SampleCount = proto.Uint64(count)
|
||||
his.SampleSum = proto.Float64(math.Float64frombits(atomic.LoadUint64(&coldCounts.sumBits)))
|
||||
his := &dto.Histogram{
|
||||
Bucket: make([]*dto.Bucket, len(h.upperBounds)),
|
||||
SampleCount: proto.Uint64(count),
|
||||
SampleSum: proto.Float64(math.Float64frombits(atomic.LoadUint64(&coldCounts.sumBits))),
|
||||
}
|
||||
var cumCount uint64
|
||||
for i, upperBound := range h.upperBounds {
|
||||
cumCount += atomic.LoadUint64(&coldCounts.buckets[i])
|
||||
buckets[i] = &dto.Bucket{
|
||||
his.Bucket[i] = &dto.Bucket{
|
||||
CumulativeCount: proto.Uint64(cumCount),
|
||||
UpperBound: proto.Float64(upperBound),
|
||||
}
|
||||
}
|
||||
|
||||
his.Bucket = buckets
|
||||
out.Histogram = his
|
||||
out.Label = h.labelPairs
|
||||
|
||||
|
|
Loading…
Reference in New Issue