diff --git a/prometheus/histogram.go b/prometheus/histogram.go index 20ee1af..98e06bf 100644 --- a/prometheus/histogram.go +++ b/prometheus/histogram.go @@ -224,33 +224,34 @@ type histogramCounts struct { } type histogram struct { - // countAndHotIdx is a complicated one. For lock-free yet atomic - // observations, we need to save the total count of observations again, - // combined with the index of the currently-hot counts struct, so that - // we can perform the operation on both values atomically. The least - // significant bit defines the hot counts struct. The remaining 63 bits - // represent the total count of observations. This happens under the - // assumption that the 63bit count will never overflow. Rationale: An - // observations takes about 30ns. Let's assume it could happen in - // 10ns. Overflowing the counter will then take at least (2^63)*10ns, - // which is about 3000 years. + // CountAndHotIdx enables lock-free writes with use of atomic updates. + // The most significant bit is the hot index [0 or 1] of the count field + // below. Writes update the hot one. All remaining bits count the number + // of writes initiated. Write transactions start by incrementing this + // counter, and finish by incrementing the count field in the respective + // histogramCounts, as a marker for completion. // - // This has to be first in the struct for 64bit alignment. See + // Reads swap the hot–cold in a switchMutex lock. A cooldown is awaited + // (in such lock) by comparing the number of writes with the initiation + // count. Once they match, then the last write transaction on the now + // cool one completed. All cool fields must be merged into the new hot + // before the unlock of switchMutex. + // + // Fields with atomic access first! See alignment constraint: // http://golang.org/pkg/sync/atomic/#pkg-note-BUG countAndHotIdx uint64 - selfCollector - desc *Desc - writeMtx sync.Mutex // Only used in the Write method. - - upperBounds []float64 - - // Two counts, one is "hot" for lock-free observations, the other is - // "cold" for writing out a dto.Metric. It has to be an array of - // pointers to guarantee 64bit alignment of the histogramCounts, see + // Counts has to be an array of pointers to guarantee 64bit alignment of + // the histogramCounts, see // http://golang.org/pkg/sync/atomic/#pkg-note-BUG. counts [2]*histogramCounts - hotIdx int // Index of currently-hot counts. Only used within Write. + + switchMtx sync.Mutex + + selfCollector + desc *Desc + + upperBounds []float64 labelPairs []*dto.LabelPair } @@ -271,11 +272,11 @@ func (h *histogram) Observe(v float64) { // 300 buckets: 154 ns/op linear - binary 61.6 ns/op i := sort.SearchFloat64s(h.upperBounds, v) - // We increment h.countAndHotIdx by 2 so that the counter in the upper - // 63 bits gets incremented by 1. At the same time, we get the new value + // We increment h.countAndHotIdx so that the counter in the lower + // 63 bits gets incremented. At the same time, we get the new value // back, which we can use to find the currently-hot counts. - n := atomic.AddUint64(&h.countAndHotIdx, 2) - hotCounts := h.counts[n%2] + n := atomic.AddUint64(&h.countAndHotIdx, 1) + hotCounts := h.counts[n>>63] if i < len(h.upperBounds) { atomic.AddUint64(&hotCounts.buckets[i], 1) @@ -293,72 +294,39 @@ func (h *histogram) Observe(v float64) { } func (h *histogram) Write(out *dto.Metric) error { - var ( - his = &dto.Histogram{} - buckets = make([]*dto.Bucket, len(h.upperBounds)) - hotCounts, coldCounts *histogramCounts - count uint64 - ) - // For simplicity, we mutex the rest of this method. It is not in the // hot path, i.e. Observe is called much more often than Write. The // complication of making Write lock-free isn't worth it. - h.writeMtx.Lock() - defer h.writeMtx.Unlock() + h.switchMtx.Lock() + defer h.switchMtx.Unlock() - // This is a bit arcane, which is why the following spells out this if - // clause in English: - // - // If the currently-hot counts struct is #0, we atomically increment - // h.countAndHotIdx by 1 so that from now on Observe will use the counts - // struct #1. Furthermore, the atomic increment gives us the new value, - // which, in its most significant 63 bits, tells us the count of - // observations done so far up to and including currently ongoing - // observations still using the counts struct just changed from hot to - // cold. To have a normal uint64 for the count, we bitshift by 1 and - // save the result in count. We also set h.hotIdx to 1 for the next - // Write call, and we will refer to counts #1 as hotCounts and to counts - // #0 as coldCounts. - // - // If the currently-hot counts struct is #1, we do the corresponding - // things the other way round. We have to _decrement_ h.countAndHotIdx - // (which is a bit arcane in itself, as we have to express -1 with an - // unsigned int...). - if h.hotIdx == 0 { - count = atomic.AddUint64(&h.countAndHotIdx, 1) >> 1 - h.hotIdx = 1 - hotCounts = h.counts[1] - coldCounts = h.counts[0] - } else { - count = atomic.AddUint64(&h.countAndHotIdx, ^uint64(0)) >> 1 // Decrement. - h.hotIdx = 0 - hotCounts = h.counts[0] - coldCounts = h.counts[1] - } + // Adding 1<<63 switches the hot index (from 0 to 1 or from 1 to 0) + // without touching the count bits. See the struct comments for a full + // description of the algorithm. + n := atomic.AddUint64(&h.countAndHotIdx, 1<<63) + count := n & ((1 << 63) - 1) + hotCounts := h.counts[n>>63] + coldCounts := h.counts[(^n)>>63] - // Now we have to wait for the now-declared-cold counts to actually cool - // down, i.e. wait for all observations still using it to finish. That's - // the case once the count in the cold counts struct is the same as the - // one atomically retrieved from the upper 63bits of h.countAndHotIdx. - for { - if count == atomic.LoadUint64(&coldCounts.count) { - break - } + // await cooldown + for count != atomic.LoadUint64(&coldCounts.count) { runtime.Gosched() // Let observations get work done. } - his.SampleCount = proto.Uint64(count) - his.SampleSum = proto.Float64(math.Float64frombits(atomic.LoadUint64(&coldCounts.sumBits))) + his := &dto.Histogram{ + Bucket: make([]*dto.Bucket, len(h.upperBounds)), + SampleCount: proto.Uint64(count), + SampleSum: proto.Float64(math.Float64frombits(atomic.LoadUint64(&coldCounts.sumBits))), + } var cumCount uint64 for i, upperBound := range h.upperBounds { cumCount += atomic.LoadUint64(&coldCounts.buckets[i]) - buckets[i] = &dto.Bucket{ + his.Bucket[i] = &dto.Bucket{ CumulativeCount: proto.Uint64(cumCount), UpperBound: proto.Float64(upperBound), } } - his.Bucket = buckets out.Histogram = his out.Label = h.labelPairs