From 2c1dac4e8f621557f2bdaed7faf9101baa2c3ecf Mon Sep 17 00:00:00 2001 From: xieyuschen Date: Wed, 27 Mar 2024 17:58:17 +0800 Subject: [PATCH] improve: enhance overflow and precision losing case --- prometheus/counter.go | 70 ++++++++++++++++++++++++---- prometheus/counter_test.go | 93 ++++++++++++++++++++++++++++++++++---- 2 files changed, 143 insertions(+), 20 deletions(-) diff --git a/prometheus/counter.go b/prometheus/counter.go index 4ce84e7..8ec6033 100644 --- a/prometheus/counter.go +++ b/prometheus/counter.go @@ -106,7 +106,11 @@ type counter struct { // in the struct to guarantee alignment for atomic operations. // http://golang.org/pkg/sync/atomic/#pkg-note-BUG valBits uint64 - valInt uint64 + + // change is used to record the numbers which will lose if it adds with float64 directly, + // due to the rounding error of IEEE754 representation. + // we aggregate the values until it's large enough to conquer the rounding error + change uint64 selfCollector desc *Desc @@ -119,6 +123,20 @@ type counter struct { now func() time.Time } +// addWithRoundingErrorChecking adds the base and addend, +// and returns the result and checks whether the addend is too small that causes a rounding error. +// +// note that if a part of addend is added on base but the left causes a rounding error, +// we don't respect this case +func addWithRoundingErrorChecking(base float64, addend float64) (float64, bool) { + if addend == 0 { + return base, false + } + + sum := base + addend + return sum, sum == base +} + func (c *counter) Desc() *Desc { return c.desc } @@ -128,15 +146,42 @@ func (c *counter) Add(v float64) { panic(errors.New("counter cannot decrease in value")) } - ival := uint64(v) - if float64(ival) == v { - atomic.AddUint64(&c.valInt, ival) - return - } - for { oldBits := atomic.LoadUint64(&c.valBits) - newBits := math.Float64bits(math.Float64frombits(oldBits) + v) + + u := uint64(v) + + // not the full value of v will be added into the float as a part of them might have rounding + // error as well, we don't respect this case(we don't have a proper way to handle it). + newF, hasRoundingErr := addWithRoundingErrorChecking(math.Float64frombits(oldBits), v) + if hasRoundingErr && v == float64(u) { + // we believe the float64(uint64(v)) is equal to v if it's an integer + // because it causes a rounding error, + // it doesn't equal only when v is a quite large number or it's a float + + oldChange := atomic.LoadUint64(&c.change) + newF, isChangeSmall := addWithRoundingErrorChecking(math.Float64frombits(oldBits), float64(oldChange+u)) + + if isChangeSmall { + if atomic.CompareAndSwapUint64(&c.change, oldChange, oldChange+u) { + return + } + continue + } + newBits := math.Float64bits(newF) + if atomic.CompareAndSwapUint64(&c.valBits, oldBits, newBits) { + // todo: here we might lose some small values, but it's acceptable + // otherwise we have no way to avoid this using atomic + // mutex might be too heavy for our case here + atomic.StoreUint64(&c.change, 0) + return + } + // fail to update the change as another go routine changes it + continue + } + + // we have no idea about the precision losing for float number + newBits := math.Float64bits(newF) if atomic.CompareAndSwapUint64(&c.valBits, oldBits, newBits) { return } @@ -149,12 +194,17 @@ func (c *counter) AddWithExemplar(v float64, e Labels) { } func (c *counter) Inc() { - atomic.AddUint64(&c.valInt, 1) + // to keep the efficiency, we still increase the change, + // and ignore the rare overflow case as only Inc and float with rounding error will use it + atomic.AddUint64(&c.change, 1) } func (c *counter) get() float64 { fval := math.Float64frombits(atomic.LoadUint64(&c.valBits)) - ival := atomic.LoadUint64(&c.valInt) + ival := atomic.LoadUint64(&c.change) + // it's tolerated to lose precision for float during collection + // as this is unavoidable. + // what the client could do is try to keep those data for the future when losing precision return fval + float64(ival) } diff --git a/prometheus/counter_test.go b/prometheus/counter_test.go index 3686199..3587865 100644 --- a/prometheus/counter_test.go +++ b/prometheus/counter_test.go @@ -38,22 +38,22 @@ func TestCounterAdd(t *testing.T) { if expected, got := 0.0, math.Float64frombits(counter.valBits); expected != got { t.Errorf("Expected %f, got %f.", expected, got) } - if expected, got := uint64(1), counter.valInt; expected != got { + if expected, got := uint64(1), counter.change; expected != got { t.Errorf("Expected %d, got %d.", expected, got) } counter.Add(42) - if expected, got := 0.0, math.Float64frombits(counter.valBits); expected != got { + if expected, got := 42.0, math.Float64frombits(counter.valBits); expected != got { t.Errorf("Expected %f, got %f.", expected, got) } - if expected, got := uint64(43), counter.valInt; expected != got { + if expected, got := uint64(1), counter.change; expected != got { t.Errorf("Expected %d, got %d.", expected, got) } counter.Add(24.42) - if expected, got := 24.42, math.Float64frombits(counter.valBits); expected != got { + if expected, got := 42+24.42, math.Float64frombits(counter.valBits); expected != got { t.Errorf("Expected %f, got %f.", expected, got) } - if expected, got := uint64(43), counter.valInt; expected != got { + if expected, got := uint64(1), counter.change; expected != got { t.Errorf("Expected %d, got %d.", expected, got) } @@ -157,7 +157,7 @@ func TestCounterAddInf(t *testing.T) { if expected, got := 0.0, math.Float64frombits(counter.valBits); expected != got { t.Errorf("Expected %f, got %f.", expected, got) } - if expected, got := uint64(1), counter.valInt; expected != got { + if expected, got := uint64(1), counter.change; expected != got { t.Errorf("Expected %d, got %d.", expected, got) } @@ -165,7 +165,7 @@ func TestCounterAddInf(t *testing.T) { if expected, got := math.Inf(1), math.Float64frombits(counter.valBits); expected != got { t.Errorf("valBits expected %f, got %f.", expected, got) } - if expected, got := uint64(1), counter.valInt; expected != got { + if expected, got := uint64(1), counter.change; expected != got { t.Errorf("valInts expected %d, got %d.", expected, got) } @@ -173,7 +173,7 @@ func TestCounterAddInf(t *testing.T) { if expected, got := math.Inf(1), math.Float64frombits(counter.valBits); expected != got { t.Errorf("Expected %f, got %f.", expected, got) } - if expected, got := uint64(2), counter.valInt; expected != got { + if expected, got := uint64(2), counter.change; expected != got { t.Errorf("Expected %d, got %d.", expected, got) } @@ -207,7 +207,7 @@ func TestCounterAddLarge(t *testing.T) { if expected, got := large, math.Float64frombits(counter.valBits); expected != got { t.Errorf("valBits expected %f, got %f.", expected, got) } - if expected, got := uint64(0), counter.valInt; expected != got { + if expected, got := uint64(0), counter.change; expected != got { t.Errorf("valInts expected %d, got %d.", expected, got) } @@ -240,7 +240,7 @@ func TestCounterAddSmall(t *testing.T) { if expected, got := small, math.Float64frombits(counter.valBits); expected != got { t.Errorf("valBits expected %f, got %f.", expected, got) } - if expected, got := uint64(0), counter.valInt; expected != got { + if expected, got := uint64(0), counter.change; expected != got { t.Errorf("valInts expected %d, got %d.", expected, got) } @@ -386,3 +386,76 @@ func expectCTsForMetricVecValues(t testing.TB, vec *MetricVec, typ dto.MetricTyp } } } + +func Test_hasRoundingError(t *testing.T) { + // for reviewing, could use this online tool to convert decimal + // https://baseconvert.com/ieee-754-floating-point + tests := []struct { + name string + base float64 + delta float64 + wantRoundingError bool + wantNumber float64 + }{ + { + name: "no rounding error", + base: 0, + delta: 1, + wantRoundingError: false, + wantNumber: 1, + }, + { + name: "no rounding error", + base: 1<<53 - 1, + delta: 1, + wantRoundingError: false, + wantNumber: 1 << 53, + }, + { + name: "rounding error", + base: 1 << 53, + delta: 1, + wantRoundingError: true, + wantNumber: 1 << 53, + }, + { + name: "rounding error", + base: 1 << 54, + delta: 1, + wantRoundingError: true, + wantNumber: 1 << 54, + }, + { + name: "rounding error", + base: 1 << 54, + delta: 3, + wantRoundingError: false, + wantNumber: 18014398509481988, + }, + { + name: "rounding error", + base: 1 << 64, + delta: 1000, + wantRoundingError: true, + wantNumber: 1 << 64, + }, + { + name: "no rounding error", + base: 1 << 64, + delta: 10000, + wantRoundingError: false, + wantNumber: 18446744073709559808, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + expectNumber, hasRoundingErr := addWithRoundingErrorChecking(tt.base, tt.delta) + if expectNumber != tt.wantNumber { + t.Errorf("addWithRoundingErrorChecking() = %f, wantRoundingError %f", expectNumber, tt.wantNumber) + } + if hasRoundingErr != tt.wantRoundingError { + t.Errorf("addWithRoundingErrorChecking() = %v, wantRoundingError %v", hasRoundingErr, tt.wantRoundingError) + } + }) + } +}