Update vendoring of perks to newest (fixed) version.

Adjust the API and usage accordingly.
Make tests stricter.

Since the merging is still faulty, test are broken now.
The next commit will fix it by avoiding merging.
This commit is contained in:
Bjoern Rabenstein 2015-01-20 18:27:10 +01:00
parent 26e2417d3e
commit 6b9530d72e
11 changed files with 563 additions and 159 deletions

View File

@ -1 +1 @@
Imported at 5d903d2c5dc7f55829e36c62ae6c5f5f6d75e70a from https://github.com/u-c-l/perks .
Imported at f15ca8fc2964cb9f291e1cf17bb1bf9a4f9e23d5 from https://github.com/beorn7/perks .

View File

@ -0,0 +1,26 @@
package histogram
import (
"math/rand"
"testing"
)
func BenchmarkInsert10Bins(b *testing.B) {
b.StopTimer()
h := New(10)
b.StartTimer()
for i := 0; i < b.N; i++ {
f := rand.ExpFloat64()
h.Insert(f)
}
}
func BenchmarkInsert100Bins(b *testing.B) {
b.StopTimer()
h := New(100)
b.StartTimer()
for i := 0; i < b.N; i++ {
f := rand.ExpFloat64()
h.Insert(f)
}
}

View File

@ -0,0 +1,108 @@
// Package histogram provides a Go implementation of BigML's histogram package
// for Clojure/Java. It is currently experimental.
package histogram
import (
"container/heap"
"math"
"sort"
)
type Bin struct {
Count int
Sum float64
}
func (b *Bin) Update(x *Bin) {
b.Count += x.Count
b.Sum += x.Sum
}
func (b *Bin) Mean() float64 {
return b.Sum / float64(b.Count)
}
type Bins []*Bin
func (bs Bins) Len() int { return len(bs) }
func (bs Bins) Less(i, j int) bool { return bs[i].Mean() < bs[j].Mean() }
func (bs Bins) Swap(i, j int) { bs[i], bs[j] = bs[j], bs[i] }
func (bs *Bins) Push(x interface{}) {
*bs = append(*bs, x.(*Bin))
}
func (bs *Bins) Pop() interface{} {
return bs.remove(len(*bs) - 1)
}
func (bs *Bins) remove(n int) *Bin {
if n < 0 || len(*bs) < n {
return nil
}
x := (*bs)[n]
*bs = append((*bs)[:n], (*bs)[n+1:]...)
return x
}
type Histogram struct {
res *reservoir
}
func New(maxBins int) *Histogram {
return &Histogram{res: newReservoir(maxBins)}
}
func (h *Histogram) Insert(f float64) {
h.res.insert(&Bin{1, f})
h.res.compress()
}
func (h *Histogram) Bins() Bins {
return h.res.bins
}
type reservoir struct {
n int
maxBins int
bins Bins
}
func newReservoir(maxBins int) *reservoir {
return &reservoir{maxBins: maxBins}
}
func (r *reservoir) insert(bin *Bin) {
r.n += bin.Count
i := sort.Search(len(r.bins), func(i int) bool {
return r.bins[i].Mean() >= bin.Mean()
})
if i < 0 || i == r.bins.Len() {
// TODO(blake): Maybe use an .insert(i, bin) instead of
// performing the extra work of a heap.Push.
heap.Push(&r.bins, bin)
return
}
r.bins[i].Update(bin)
}
func (r *reservoir) compress() {
for r.bins.Len() > r.maxBins {
minGapIndex := -1
minGap := math.MaxFloat64
for i := 0; i < r.bins.Len()-1; i++ {
gap := gapWeight(r.bins[i], r.bins[i+1])
if minGap > gap {
minGap = gap
minGapIndex = i
}
}
prev := r.bins[minGapIndex]
next := r.bins.remove(minGapIndex + 1)
prev.Update(next)
}
}
func gapWeight(prev, next *Bin) float64 {
return next.Mean() - prev.Mean()
}

View File

@ -0,0 +1,38 @@
package histogram
import (
"math/rand"
"testing"
)
func TestHistogram(t *testing.T) {
const numPoints = 1e6
const maxBins = 3
h := New(maxBins)
for i := 0; i < numPoints; i++ {
f := rand.ExpFloat64()
h.Insert(f)
}
bins := h.Bins()
if g := len(bins); g > maxBins {
t.Fatalf("got %d bins, wanted <= %d", g, maxBins)
}
for _, b := range bins {
t.Logf("%+v", b)
}
if g := count(h.Bins()); g != numPoints {
t.Fatalf("binned %d points, wanted %d", g, numPoints)
}
}
func count(bins Bins) int {
binCounts := 0
for _, b := range bins {
binCounts += b.Count
}
return binCounts
}

View File

@ -7,7 +7,7 @@ import (
func BenchmarkInsertTargeted(b *testing.B) {
b.ReportAllocs()
s := NewTargeted(0.01, 0.5, 0.9, 0.99)
s := NewTargeted(Targets)
b.ResetTimer()
for i := float64(0); i < float64(b.N); i++ {
s.Insert(i)
@ -15,8 +15,7 @@ func BenchmarkInsertTargeted(b *testing.B) {
}
func BenchmarkInsertTargetedSmallEpsilon(b *testing.B) {
s := NewTargeted(0.01, 0.5, 0.9, 0.99)
s.SetEpsilon(0.0001)
s := NewTargeted(TargetsSmallEpsilon)
b.ResetTimer()
for i := float64(0); i < float64(b.N); i++ {
s.Insert(i)
@ -24,7 +23,7 @@ func BenchmarkInsertTargetedSmallEpsilon(b *testing.B) {
}
func BenchmarkInsertBiased(b *testing.B) {
s := NewBiased()
s := NewLowBiased(0.01)
b.ResetTimer()
for i := float64(0); i < float64(b.N); i++ {
s.Insert(i)
@ -32,8 +31,7 @@ func BenchmarkInsertBiased(b *testing.B) {
}
func BenchmarkInsertBiasedSmallEpsilon(b *testing.B) {
s := NewBiased()
s.SetEpsilon(0.0001)
s := NewLowBiased(0.0001)
b.ResetTimer()
for i := float64(0); i < float64(b.N); i++ {
s.Insert(i)
@ -41,7 +39,7 @@ func BenchmarkInsertBiasedSmallEpsilon(b *testing.B) {
}
func BenchmarkQuery(b *testing.B) {
s := NewTargeted(0.01, 0.5, 0.9, 0.99)
s := NewTargeted(Targets)
for i := float64(0); i < 1e6; i++ {
s.Insert(i)
}
@ -53,8 +51,7 @@ func BenchmarkQuery(b *testing.B) {
}
func BenchmarkQuerySmallEpsilon(b *testing.B) {
s := NewTargeted(0.01, 0.5, 0.9, 0.99)
s.SetEpsilon(0.0001)
s := NewTargeted(TargetsSmallEpsilon)
for i := float64(0); i < 1e6; i++ {
s.Insert(i)
}

View File

@ -36,30 +36,56 @@ func (a Samples) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
type invariant func(s *stream, r float64) float64
// NewBiased returns an initialized Stream for high-biased quantiles (e.g.
// 50th, 90th, 99th) not known a priori with finer error guarantees for the
// higher ranks of the data distribution.
// See http://www.cs.rutgers.edu/~muthu/bquant.pdf for time, space, and error properties.
func NewBiased() *Stream {
// NewLowBiased returns an initialized Stream for low-biased quantiles
// (e.g. 0.01, 0.1, 0.5) where the needed quantiles are not known a priori, but
// error guarantees can still be given even for the lower ranks of the data
// distribution.
//
// The provided epsilon is a relative error, i.e. the true quantile of a value
// returned by a query is guaranteed to be within (1±Epsilon)*Quantile.
//
// See http://www.cs.rutgers.edu/~muthu/bquant.pdf for time, space, and error
// properties.
func NewLowBiased(epsilon float64) *Stream {
ƒ := func(s *stream, r float64) float64 {
return 2 * s.epsilon * r
return 2 * epsilon * r
}
return newStream(ƒ)
}
// NewHighBiased returns an initialized Stream for high-biased quantiles
// (e.g. 0.01, 0.1, 0.5) where the needed quantiles are not known a priori, but
// error guarantees can still be given even for the higher ranks of the data
// distribution.
//
// The provided epsilon is a relative error, i.e. the true quantile of a value
// returned by a query is guaranteed to be within 1-(1±Epsilon)*(1-Quantile).
//
// See http://www.cs.rutgers.edu/~muthu/bquant.pdf for time, space, and error
// properties.
func NewHighBiased(epsilon float64) *Stream {
ƒ := func(s *stream, r float64) float64 {
return 2 * epsilon * (s.n - r)
}
return newStream(ƒ)
}
// NewTargeted returns an initialized Stream concerned with a particular set of
// quantile values that are supplied a priori. Knowing these a priori reduces
// space and computation time.
// space and computation time. The targets map maps the desired quantiles to
// their absolute errors, i.e. the true quantile of a value returned by a query
// is guaranteed to be within (Quantile±Epsilon).
//
// See http://www.cs.rutgers.edu/~muthu/bquant.pdf for time, space, and error properties.
func NewTargeted(quantiles ...float64) *Stream {
func NewTargeted(targets map[float64]float64) *Stream {
ƒ := func(s *stream, r float64) float64 {
var m float64 = math.MaxFloat64
var m = math.MaxFloat64
var f float64
for _, q := range quantiles {
if q*s.n <= r {
f = (2 * s.epsilon * r) / q
for quantile, epsilon := range targets {
if quantile*s.n <= r {
f = (2 * epsilon * r) / quantile
} else {
f = (2 * s.epsilon * (s.n - r)) / (1 - q)
f = (2 * epsilon * (s.n - r)) / (1 - quantile)
}
if f < m {
m = f
@ -79,8 +105,7 @@ type Stream struct {
}
func newStream(ƒ invariant) *Stream {
const defaultEpsilon = 0.01
x := &stream{epsilon: defaultEpsilon, ƒ: ƒ}
x := &stream{ƒ: ƒ}
return &Stream{x, make(Samples, 0, 500), true}
}
@ -94,7 +119,6 @@ func (s *Stream) insert(sample Sample) {
s.sorted = false
if len(s.b) == cap(s.b) {
s.flush()
s.compress()
}
}
@ -122,6 +146,9 @@ func (s *Stream) Query(q float64) float64 {
// Merge merges samples into the underlying streams samples. This is handy when
// merging multiple streams from separate threads, database shards, etc.
//
// ATTENTION: This method is broken and does not yield correct results. The
// underlying algorithm is not capable of merging streams correctly.
func (s *Stream) Merge(samples Samples) {
sort.Sort(samples)
s.stream.merge(samples)
@ -139,7 +166,6 @@ func (s *Stream) Samples() Samples {
return s.b
}
s.flush()
s.compress()
return s.stream.samples()
}
@ -167,20 +193,11 @@ func (s *Stream) flushed() bool {
}
type stream struct {
epsilon float64
n float64
l []Sample
ƒ invariant
}
// SetEpsilon sets the error epsilon for the Stream. The default epsilon is
// 0.01 and is usually satisfactory. If needed, this must be called before all
// Inserts.
// To learn more, see: http://www.cs.rutgers.edu/~muthu/bquant.pdf
func (s *stream) SetEpsilon(epsilon float64) {
s.epsilon = epsilon
}
func (s *stream) reset() {
s.l = s.l[:0]
s.n = 0
@ -191,6 +208,10 @@ func (s *stream) insert(v float64) {
}
func (s *stream) merge(samples Samples) {
// TODO(beorn7): This tries to merge not only individual samples, but
// whole summaries. The paper doesn't mention merging summaries at
// all. Unittests show that the merging is inaccurate. Find out how to
// do merges properly.
var r float64
i := 0
for _, sample := range samples {
@ -200,7 +221,12 @@ func (s *stream) merge(samples Samples) {
// Insert at position i.
s.l = append(s.l, Sample{})
copy(s.l[i+1:], s.l[i:])
s.l[i] = Sample{sample.Value, sample.Width, math.Floor(s.ƒ(s, r)) - 1}
s.l[i] = Sample{
sample.Value,
sample.Width,
math.Max(sample.Delta, math.Floor(s.ƒ(s, r))-1),
// TODO(beorn7): How to calculate delta correctly?
}
i++
goto inserted
}
@ -210,7 +236,9 @@ func (s *stream) merge(samples Samples) {
i++
inserted:
s.n += sample.Width
r += sample.Width
}
s.compress()
}
func (s *stream) count() int {
@ -221,12 +249,12 @@ func (s *stream) query(q float64) float64 {
t := math.Ceil(q * s.n)
t += math.Ceil(s.ƒ(s, t) / 2)
p := s.l[0]
r := float64(0)
var r float64
for _, c := range s.l[1:] {
r += p.Width
if r+c.Width+c.Delta > t {
return p.Value
}
r += p.Width
p = c
}
return p.Value

View File

@ -1,81 +1,150 @@
package quantile
import (
"math"
"math/rand"
"sort"
"testing"
)
func TestQuantRandQuery(t *testing.T) {
s := NewTargeted(0.5, 0.90, 0.99)
a := make([]float64, 0, 1e5)
rand.Seed(42)
for i := 0; i < cap(a); i++ {
v := rand.NormFloat64()
s.Insert(v)
a = append(a, v)
var (
Targets = map[float64]float64{
0.01: 0.001,
0.10: 0.01,
0.50: 0.05,
0.90: 0.01,
0.99: 0.001,
}
t.Logf("len: %d", s.Count())
TargetsSmallEpsilon = map[float64]float64{
0.01: 0.0001,
0.10: 0.001,
0.50: 0.005,
0.90: 0.001,
0.99: 0.0001,
}
LowQuantiles = []float64{0.01, 0.1, 0.5}
HighQuantiles = []float64{0.99, 0.9, 0.5}
)
const RelativeEpsilon = 0.01
func verifyPercsWithAbsoluteEpsilon(t *testing.T, a []float64, s *Stream) {
sort.Float64s(a)
w, min, max := getPerc(a, 0.50)
if g := s.Query(0.50); g < min || g > max {
t.Errorf("perc50: want %v [%f,%f], got %v", w, min, max, g)
for quantile, epsilon := range Targets {
n := float64(len(a))
k := int(quantile * n)
lower := int((quantile - epsilon) * n)
if lower < 1 {
lower = 1
}
w, min, max = getPerc(a, 0.90)
if g := s.Query(0.90); g < min || g > max {
t.Errorf("perc90: want %v [%f,%f], got %v", w, min, max, g)
upper := int(math.Ceil((quantile + epsilon) * n))
if upper > len(a) {
upper = len(a)
}
w, min, max := a[k-1], a[lower-1], a[upper-1]
if g := s.Query(quantile); g < min || g > max {
t.Errorf("q=%f: want %v [%f,%f], got %v", quantile, w, min, max, g)
}
w, min, max = getPerc(a, 0.99)
if g := s.Query(0.99); g < min || g > max {
t.Errorf("perc99: want %v [%f,%f], got %v", w, min, max, g)
}
}
func TestQuantRandMergeQuery(t *testing.T) {
ch := make(chan float64)
done := make(chan *Stream)
for i := 0; i < 2; i++ {
go func() {
s := NewTargeted(0.5, 0.90, 0.99)
for v := range ch {
s.Insert(v)
}
done <- s
}()
}
func verifyLowPercsWithRelativeEpsilon(t *testing.T, a []float64, s *Stream) {
sort.Float64s(a)
for _, qu := range LowQuantiles {
n := float64(len(a))
k := int(qu * n)
rand.Seed(42)
a := make([]float64, 0, 1e6)
lowerRank := int((1 - RelativeEpsilon) * qu * n)
upperRank := int(math.Ceil((1 + RelativeEpsilon) * qu * n))
w, min, max := a[k-1], a[lowerRank-1], a[upperRank-1]
if g := s.Query(qu); g < min || g > max {
t.Errorf("q=%f: want %v [%f,%f], got %v", qu, w, min, max, g)
}
}
}
func verifyHighPercsWithRelativeEpsilon(t *testing.T, a []float64, s *Stream) {
sort.Float64s(a)
for _, qu := range HighQuantiles {
n := float64(len(a))
k := int(qu * n)
lowerRank := int((1 - (1+RelativeEpsilon)*(1-qu)) * n)
upperRank := int(math.Ceil((1 - (1-RelativeEpsilon)*(1-qu)) * n))
w, min, max := a[k-1], a[lowerRank-1], a[upperRank-1]
if g := s.Query(qu); g < min || g > max {
t.Errorf("q=%f: want %v [%f,%f], got %v", qu, w, min, max, g)
}
}
}
func populateStream(s *Stream) []float64 {
a := make([]float64, 0, 1e5+100)
for i := 0; i < cap(a); i++ {
v := rand.NormFloat64()
// Add 5% asymmetric outliers.
if i%20 == 0 {
v = v*v + 1
}
s.Insert(v)
a = append(a, v)
ch <- v
}
close(ch)
return a
}
s := <-done
o := <-done
s.Merge(o.Samples())
func TestTargetedQuery(t *testing.T) {
rand.Seed(42)
s := NewTargeted(Targets)
a := populateStream(s)
verifyPercsWithAbsoluteEpsilon(t, a, s)
}
t.Logf("len: %d", s.Count())
sort.Float64s(a)
w, min, max := getPerc(a, 0.50)
if g := s.Query(0.50); g < min || g > max {
t.Errorf("perc50: want %v [%f,%f], got %v", w, min, max, g)
}
w, min, max = getPerc(a, 0.90)
if g := s.Query(0.90); g < min || g > max {
t.Errorf("perc90: want %v [%f,%f], got %v", w, min, max, g)
}
w, min, max = getPerc(a, 0.99)
if g := s.Query(0.99); g < min || g > max {
t.Errorf("perc99: want %v [%f,%f], got %v", w, min, max, g)
}
func TestLowBiasedQuery(t *testing.T) {
rand.Seed(42)
s := NewLowBiased(RelativeEpsilon)
a := populateStream(s)
verifyLowPercsWithRelativeEpsilon(t, a, s)
}
func TestHighBiasedQuery(t *testing.T) {
rand.Seed(42)
s := NewHighBiased(RelativeEpsilon)
a := populateStream(s)
verifyHighPercsWithRelativeEpsilon(t, a, s)
}
func TestTargetedMerge(t *testing.T) {
rand.Seed(42)
s1 := NewTargeted(Targets)
s2 := NewTargeted(Targets)
a := populateStream(s1)
a = append(a, populateStream(s2)...)
s1.Merge(s2.Samples())
verifyPercsWithAbsoluteEpsilon(t, a, s1)
}
func TestLowBiasedMerge(t *testing.T) {
rand.Seed(42)
s1 := NewLowBiased(RelativeEpsilon)
s2 := NewLowBiased(RelativeEpsilon)
a := populateStream(s1)
a = append(a, populateStream(s2)...)
s1.Merge(s2.Samples())
verifyLowPercsWithRelativeEpsilon(t, a, s2)
}
func TestHighBiasedMerge(t *testing.T) {
rand.Seed(42)
s1 := NewHighBiased(RelativeEpsilon)
s2 := NewHighBiased(RelativeEpsilon)
a := populateStream(s1)
a = append(a, populateStream(s2)...)
s1.Merge(s2.Samples())
verifyHighPercsWithRelativeEpsilon(t, a, s2)
}
func TestUncompressed(t *testing.T) {
tests := []float64{0.50, 0.90, 0.95, 0.99}
q := NewTargeted(tests...)
q := NewTargeted(Targets)
for i := 100; i > 0; i-- {
q.Insert(float64(i))
}
@ -83,16 +152,16 @@ func TestUncompressed(t *testing.T) {
t.Errorf("want count 100, got %d", g)
}
// Before compression, Query should have 100% accuracy.
for _, v := range tests {
w := v * 100
if g := q.Query(v); g != w {
for quantile := range Targets {
w := quantile * 100
if g := q.Query(quantile); g != w {
t.Errorf("want %f, got %f", w, g)
}
}
}
func TestUncompressedSamples(t *testing.T) {
q := NewTargeted(0.99)
q := NewTargeted(map[float64]float64{0.99: 0.001})
for i := 1; i <= 100; i++ {
q.Insert(float64(i))
}
@ -102,7 +171,7 @@ func TestUncompressedSamples(t *testing.T) {
}
func TestUncompressedOne(t *testing.T) {
q := NewTargeted(0.90)
q := NewTargeted(map[float64]float64{0.99: 0.01})
q.Insert(3.14)
if g := q.Query(0.90); g != 3.14 {
t.Error("want PI, got", g)
@ -110,20 +179,7 @@ func TestUncompressedOne(t *testing.T) {
}
func TestDefaults(t *testing.T) {
if g := NewTargeted(0.99).Query(0.99); g != 0 {
if g := NewTargeted(map[float64]float64{0.99: 0.001}).Query(0.99); g != 0 {
t.Errorf("want 0, got %f", g)
}
}
func getPerc(x []float64, p float64) (want, min, max float64) {
k := int(float64(len(x)) * p)
lower := int(float64(len(x)) * (p - 0.04))
if lower < 0 {
lower = 0
}
upper := int(float64(len(x))*(p+0.04)) + 1
if upper >= len(x) {
upper = len(x) - 1
}
return x[k], x[lower], x[upper]
}

View File

@ -0,0 +1,90 @@
package topk
import (
"sort"
)
// http://www.cs.ucsb.edu/research/tech_reports/reports/2005-23.pdf
type Element struct {
Value string
Count int
}
type Samples []*Element
func (sm Samples) Len() int {
return len(sm)
}
func (sm Samples) Less(i, j int) bool {
return sm[i].Count < sm[j].Count
}
func (sm Samples) Swap(i, j int) {
sm[i], sm[j] = sm[j], sm[i]
}
type Stream struct {
k int
mon map[string]*Element
// the minimum Element
min *Element
}
func New(k int) *Stream {
s := new(Stream)
s.k = k
s.mon = make(map[string]*Element)
s.min = &Element{}
// Track k+1 so that less frequenet items contended for that spot,
// resulting in k being more accurate.
return s
}
func (s *Stream) Insert(x string) {
s.insert(&Element{x, 1})
}
func (s *Stream) Merge(sm Samples) {
for _, e := range sm {
s.insert(e)
}
}
func (s *Stream) insert(in *Element) {
e := s.mon[in.Value]
if e != nil {
e.Count++
} else {
if len(s.mon) < s.k+1 {
e = &Element{in.Value, in.Count}
s.mon[in.Value] = e
} else {
e = s.min
delete(s.mon, e.Value)
e.Value = in.Value
e.Count += in.Count
s.min = e
}
}
if e.Count < s.min.Count {
s.min = e
}
}
func (s *Stream) Query() Samples {
var sm Samples
for _, e := range s.mon {
sm = append(sm, e)
}
sort.Sort(sort.Reverse(sm))
if len(sm) < s.k {
return sm
}
return sm[:s.k]
}

View File

@ -0,0 +1,57 @@
package topk
import (
"fmt"
"math/rand"
"sort"
"testing"
)
func TestTopK(t *testing.T) {
stream := New(10)
ss := []*Stream{New(10), New(10), New(10)}
m := make(map[string]int)
for _, s := range ss {
for i := 0; i < 1e6; i++ {
v := fmt.Sprintf("%x", int8(rand.ExpFloat64()))
s.Insert(v)
m[v]++
}
stream.Merge(s.Query())
}
var sm Samples
for x, s := range m {
sm = append(sm, &Element{x, s})
}
sort.Sort(sort.Reverse(sm))
g := stream.Query()
if len(g) != 10 {
t.Fatalf("got %d, want 10", len(g))
}
for i, e := range g {
if sm[i].Value != e.Value {
t.Errorf("at %d: want %q, got %q", i, sm[i].Value, e.Value)
}
}
}
func TestQuery(t *testing.T) {
queryTests := []struct {
value string
expected int
}{
{"a", 1},
{"b", 2},
{"c", 2},
}
stream := New(2)
for _, tt := range queryTests {
stream.Insert(tt.value)
if n := len(stream.Query()); n != tt.expected {
t.Errorf("want %d, got %d", tt.expected, n)
}
}
}

View File

@ -46,7 +46,7 @@ type Summary interface {
// DefObjectives are the default Summary quantile values.
var (
DefObjectives = []float64{0.5, 0.9, 0.99}
DefObjectives = map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}
)
// Default values for SummaryOpts.
@ -59,8 +59,6 @@ const (
DefAgeBuckets = 10
// DefBufCap is the standard buffer size for collecting Summary observations.
DefBufCap = 500
// DefEpsilon is the default error epsilon for the quantile rank estimates.
DefEpsilon = 0.001
)
// SummaryOpts bundles the options for creating a Summary metric. It is
@ -101,9 +99,9 @@ type SummaryOpts struct {
// metric name).
ConstLabels Labels
// Objectives defines the quantile rank estimates. The default value is
// DefObjectives.
Objectives []float64
// Objectives defines the quantile rank estimates with their respective
// absolute error. The default value is DefObjectives.
Objectives map[float64]float64
// MaxAge defines the duration for which an observation stays relevant
// for the summary. Must be positive. The default value is DefMaxAge.
@ -164,18 +162,11 @@ func newSummary(desc *Desc, opts SummaryOpts, labelValues ...string) Summary {
opts.BufCap = DefBufCap
}
if opts.Epsilon < 0 {
panic(fmt.Errorf("illegal value for Epsilon=%f", opts.Epsilon))
}
if opts.Epsilon == 0. {
opts.Epsilon = DefEpsilon
}
s := &summary{
desc: desc,
objectives: opts.Objectives,
epsilon: opts.Epsilon,
sortedObjectives: make([]float64, 0, len(opts.Objectives)),
labelPairs: makeLabelPairs(desc, labelValues),
@ -193,6 +184,11 @@ func newSummary(desc *Desc, opts SummaryOpts, labelValues ...string) Summary {
}
s.headStream = s.streams[0]
for qu := range DefObjectives {
s.sortedObjectives = append(s.sortedObjectives, qu)
}
sort.Float64s(s.sortedObjectives)
s.Init(s) // Init self-collection.
return s
}
@ -206,8 +202,8 @@ type summary struct {
desc *Desc
objectives []float64
epsilon float64
objectives map[float64]float64
sortedObjectives []float64
labelPairs []*dto.LabelPair
@ -260,7 +256,7 @@ func (s *summary) Write(out *dto.Metric) error {
sum.SampleCount = proto.Uint64(s.cnt)
sum.SampleSum = proto.Float64(s.sum)
for _, rank := range s.objectives {
for _, rank := range s.sortedObjectives {
qs = append(qs, &dto.Quantile{
Quantile: proto.Float64(rank),
Value: proto.Float64(s.mergedAllStreams.Query(rank)),
@ -281,9 +277,7 @@ func (s *summary) Write(out *dto.Metric) error {
}
func (s *summary) newStream() *quantile.Stream {
stream := quantile.NewTargeted(s.objectives...)
stream.SetEpsilon(s.epsilon)
return stream
return quantile.NewTargeted(s.objectives)
}
// asyncFlush needs bufMtx locked.

View File

@ -126,7 +126,6 @@ func TestSummaryConcurrency(t *testing.T) {
mutations := int(n%10000 + 1)
concLevel := int(n%15 + 1)
total := mutations * concLevel
ε := 0.001
var start, end sync.WaitGroup
start.Add(1)
@ -135,7 +134,6 @@ func TestSummaryConcurrency(t *testing.T) {
sum := NewSummary(SummaryOpts{
Name: "test_summary",
Help: "helpless",
Epsilon: ε,
})
allVars := make([]float64, total)
@ -170,14 +168,21 @@ func TestSummaryConcurrency(t *testing.T) {
t.Errorf("got sample sum %f, want %f", got, want)
}
for i, wantQ := range DefObjectives {
objectives := make([]float64, 0, len(DefObjectives))
for qu := range DefObjectives {
objectives = append(objectives, qu)
}
sort.Float64s(objectives)
for i, wantQ := range objectives {
ε := DefObjectives[wantQ]
gotQ := *m.Summary.Quantile[i].Quantile
gotV := *m.Summary.Quantile[i].Value
min, max := getBounds(allVars, wantQ, ε)
if gotQ != wantQ {
t.Errorf("got quantile %f, want %f", gotQ, wantQ)
}
if (gotV < min || gotV > max) && len(allVars) > 500 { // Avoid statistical outliers.
if gotV < min || gotV > max {
t.Errorf("got %f for quantile %f, want [%f,%f]", gotV, gotQ, min, max)
}
}
@ -192,10 +197,15 @@ func TestSummaryConcurrency(t *testing.T) {
func TestSummaryVecConcurrency(t *testing.T) {
rand.Seed(42)
objectives := make([]float64, 0, len(DefObjectives))
for qu := range DefObjectives {
objectives = append(objectives, qu)
}
sort.Float64s(objectives)
it := func(n uint32) bool {
mutations := int(n%10000 + 1)
concLevel := int(n%15 + 1)
ε := 0.001
vecLength := int(n%5 + 1)
var start, end sync.WaitGroup
@ -206,7 +216,6 @@ func TestSummaryVecConcurrency(t *testing.T) {
SummaryOpts{
Name: "test_summary",
Help: "helpless",
Epsilon: ε,
},
[]string{"label"},
)
@ -249,14 +258,15 @@ func TestSummaryVecConcurrency(t *testing.T) {
if got, want := *m.Summary.SampleSum, sampleSums[i]; math.Abs((got-want)/want) > 0.001 {
t.Errorf("got sample sum %f for label %c, want %f", got, 'A'+i, want)
}
for j, wantQ := range DefObjectives {
for j, wantQ := range objectives {
ε := DefObjectives[wantQ]
gotQ := *m.Summary.Quantile[j].Quantile
gotV := *m.Summary.Quantile[j].Value
min, max := getBounds(allVars[i], wantQ, ε)
if gotQ != wantQ {
t.Errorf("got quantile %f for label %c, want %f", gotQ, 'A'+i, wantQ)
}
if (gotV < min || gotV > max) && len(allVars[i]) > 500 { // Avoid statistical outliers.
if gotV < min || gotV > max {
t.Errorf("got %f for quantile %f for label %c, want [%f,%f]", gotV, gotQ, 'A'+i, min, max)
t.Log(len(allVars[i]))
}
@ -276,9 +286,8 @@ func XTestSummaryDecay(t *testing.T) {
sum := NewSummary(SummaryOpts{
Name: "test_summary",
Help: "helpless",
Epsilon: 0.001,
MaxAge: 10 * time.Millisecond,
Objectives: []float64{0.1},
Objectives: map[float64]float64{0.1: 0.001},
})
m := &dto.Metric{}
@ -302,15 +311,16 @@ func XTestSummaryDecay(t *testing.T) {
}
func getBounds(vars []float64, q, ε float64) (min, max float64) {
lower := int((q - 4*ε) * float64(len(vars)))
upper := int((q+4*ε)*float64(len(vars))) + 1
n := float64(len(vars))
lower := int((q - ε) * n)
upper := int(math.Ceil((q + ε) * n))
min = vars[0]
if lower > 0 {
min = vars[lower]
if lower > 1 {
min = vars[lower-1]
}
max = vars[len(vars)-1]
if upper < len(vars)-1 {
max = vars[upper]
if upper < len(vars) {
max = vars[upper-1]
}
return
}