Switch to base-2 buckets

This seem what OTel is converging towards, see
https://github.com/open-telemetry/oteps/pull/149 .

I see pros and cons with base-10 vs base-2. They are discussed in
detail in that OTel PR, and the gist of the discussion is pretty much
in line with my design doc. Since the balance is easy to tip here, I
think we should go with base-2 if OTel picks base-2. This also seems
to be in agreement with several proprietary solution (see again the
discussion on that OTel PR.)

The idea to make the number of buckets per power of 2 (or formerly 10)
a power of 2 itself was also sketched out in the design doc
already. It guarantees mergeability of different resolutions. I was
undecided between making it a recommendation or mandatory. Now I think
it should be mandatory as it has the additional benefit of playing
well with OTel's plans.

This commit also addresses a number of outstanding TODOs.

Signed-off-by: beorn7 <beorn@grafana.com>
This commit is contained in:
beorn7 2021-06-12 00:58:46 +02:00
parent 5aa8534cd0
commit 31318b7523
5 changed files with 312 additions and 68 deletions

View File

@ -57,7 +57,7 @@ var (
Name: "rpc_durations_histogram_seconds", Name: "rpc_durations_histogram_seconds",
Help: "RPC latency distributions.", Help: "RPC latency distributions.",
Buckets: prometheus.LinearBuckets(*normMean-5**normDomain, .5**normDomain, 20), Buckets: prometheus.LinearBuckets(*normMean-5**normDomain, .5**normDomain, 20),
SparseBucketsResolution: 20, SparseBucketsFactor: 1.1,
}) })
) )

2
go.mod
View File

@ -5,7 +5,7 @@ require (
github.com/cespare/xxhash/v2 v2.1.1 github.com/cespare/xxhash/v2 v2.1.1
github.com/golang/protobuf v1.4.3 github.com/golang/protobuf v1.4.3
github.com/json-iterator/go v1.1.11 github.com/json-iterator/go v1.1.11
github.com/prometheus/client_model v0.2.1-0.20210403151606-24db95a3d5d6 github.com/prometheus/client_model v0.2.1-0.20210611125623-bbaf1cc17b15
github.com/prometheus/common v0.26.0 github.com/prometheus/common v0.26.0
github.com/prometheus/procfs v0.6.0 github.com/prometheus/procfs v0.6.0
golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40 golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40

5
go.sum
View File

@ -24,6 +24,7 @@ github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7a
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.5/go.mod h1:6O5/vntMXwX2lRkT1hjjk0nAC1IDOTvTlVgjlRvqsdk=
github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
@ -77,8 +78,8 @@ github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/client_model v0.2.1-0.20210403151606-24db95a3d5d6 h1:wlZYx9ITBsvMO/wVoi30A36fAdRlBC130JksGGfaYl8= github.com/prometheus/client_model v0.2.1-0.20210611125623-bbaf1cc17b15 h1:l+7cw41KLeOScRk7f9Tg//xT8LAz55Kg+Fg9i0i0Cyw=
github.com/prometheus/client_model v0.2.1-0.20210403151606-24db95a3d5d6/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.2.1-0.20210611125623-bbaf1cc17b15/go.mod h1:LDGWKZIo7rky3hgvBe+caln+Dr3dPggB5dvjtD7w9+w=
github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4=
github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo= github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo=
github.com/prometheus/common v0.26.0 h1:iMAkS2TDoNWnKM+Kopnx/8tnEStIfpYA0ur0xQzzhMQ= github.com/prometheus/common v0.26.0 h1:iMAkS2TDoNWnKM+Kopnx/8tnEStIfpYA0ur0xQzzhMQ=

View File

@ -538,8 +538,8 @@ func ExampleHistogram() {
// cumulative_count: 816 // cumulative_count: 816
// upper_bound: 40 // upper_bound: 40
// > // >
// sb_resolution: 0 // sb_schema: 0
// sb_zero_threshold: 1e-128 // sb_zero_threshold: 0
// > // >
} }

View File

@ -28,6 +28,176 @@ import (
dto "github.com/prometheus/client_model/go" dto "github.com/prometheus/client_model/go"
) )
// sparseBounds for the frac of observed values. Only relevant for schema > 0.
// Position in the slice is the schema. (0 is never used, just here for
// convenience of using the schema directly as the index.)
var sparseBounds = [][]float64{
// Schema "0":
[]float64{0.5},
// Schema 1:
[]float64{0.5, 0.7071067811865475},
// Schema 2:
[]float64{0.5, 0.5946035575013605, 0.7071067811865475, 0.8408964152537144},
// Schema 3:
[]float64{0.5, 0.5452538663326288, 0.5946035575013605, 0.6484197773255048,
0.7071067811865475, 0.7711054127039704, 0.8408964152537144, 0.9170040432046711},
// Schema 4:
[]float64{0.5, 0.5221368912137069, 0.5452538663326288, 0.5693943173783458,
0.5946035575013605, 0.620928906036742, 0.6484197773255048, 0.6771277734684463,
0.7071067811865475, 0.7384130729697496, 0.7711054127039704, 0.805245165974627,
0.8408964152537144, 0.8781260801866495, 0.9170040432046711, 0.9576032806985735},
// Schema 5:
[]float64{0.5, 0.5109485743270583, 0.5221368912137069, 0.5335702003384117,
0.5452538663326288, 0.5571933712979462, 0.5693943173783458, 0.5818624293887887,
0.5946035575013605, 0.6076236799902344, 0.620928906036742, 0.6345254785958666,
0.6484197773255048, 0.6626183215798706, 0.6771277734684463, 0.6919549409819159,
0.7071067811865475, 0.7225904034885232, 0.7384130729697496, 0.7545822137967112,
0.7711054127039704, 0.7879904225539431, 0.805245165974627, 0.8228777390769823,
0.8408964152537144, 0.8593096490612387, 0.8781260801866495, 0.8973545375015533,
0.9170040432046711, 0.9370838170551498, 0.9576032806985735, 0.9785720620876999},
// Schema 6:
[]float64{0.5, 0.5054446430258502, 0.5109485743270583, 0.5165124395106142,
0.5221368912137069, 0.5278225891802786, 0.5335702003384117, 0.5393803988785598,
0.5452538663326288, 0.5511912916539204, 0.5571933712979462, 0.5632608093041209,
0.5693943173783458, 0.5755946149764913, 0.5818624293887887, 0.5881984958251406,
0.5946035575013605, 0.6010783657263515, 0.6076236799902344, 0.6142402680534349,
0.620928906036742, 0.6276903785123455, 0.6345254785958666, 0.6414350080393891,
0.6484197773255048, 0.6554806057623822, 0.6626183215798706, 0.6698337620266515,
0.6771277734684463, 0.6845012114872953, 0.6919549409819159, 0.6994898362691555,
0.7071067811865475, 0.7148066691959849, 0.7225904034885232, 0.7304588970903234,
0.7384130729697496, 0.7464538641456323, 0.7545822137967112, 0.762799075372269,
0.7711054127039704, 0.7795022001189185, 0.7879904225539431, 0.7965710756711334,
0.805245165974627, 0.8140137109286738, 0.8228777390769823, 0.8318382901633681,
0.8408964152537144, 0.8500531768592616, 0.8593096490612387, 0.8686669176368529,
0.8781260801866495, 0.8876882462632604, 0.8973545375015533, 0.9071260877501991,
0.9170040432046711, 0.9269895625416926, 0.9370838170551498, 0.9472879907934827,
0.9576032806985735, 0.9680308967461471, 0.9785720620876999, 0.9892280131939752},
// Schema 7:
[]float64{0.5, 0.5027149505564014, 0.5054446430258502, 0.5081891574554764,
0.5109485743270583, 0.5137229745593818, 0.5165124395106142, 0.5193170509806894,
0.5221368912137069, 0.5249720429003435, 0.5278225891802786, 0.5306886136446309,
0.5335702003384117, 0.5364674337629877, 0.5393803988785598, 0.5423091811066545,
0.5452538663326288, 0.5482145409081883, 0.5511912916539204, 0.5541842058618393,
0.5571933712979462, 0.5602188762048033, 0.5632608093041209, 0.5663192597993595,
0.5693943173783458, 0.572486072215902, 0.5755946149764913, 0.5787200368168754,
0.5818624293887887, 0.585021884841625, 0.5881984958251406, 0.5913923554921704,
0.5946035575013605, 0.5978321960199137, 0.6010783657263515, 0.6043421618132907,
0.6076236799902344, 0.6109230164863786, 0.6142402680534349, 0.6175755319684665,
0.620928906036742, 0.6243004885946023, 0.6276903785123455, 0.6310986751971253,
0.6345254785958666, 0.637970889198196, 0.6414350080393891, 0.6449179367033329,
0.6484197773255048, 0.6519406325959679, 0.6554806057623822, 0.659039800633032,
0.6626183215798706, 0.6662162735415805, 0.6698337620266515, 0.6734708931164728,
0.6771277734684463, 0.6808045103191123, 0.6845012114872953, 0.688217985377265,
0.6919549409819159, 0.6957121878859629, 0.6994898362691555, 0.7032879969095076,
0.7071067811865475, 0.7109463010845827, 0.7148066691959849, 0.718687998724491,
0.7225904034885232, 0.7265139979245261, 0.7304588970903234, 0.7344252166684908,
0.7384130729697496, 0.7424225829363761, 0.7464538641456323, 0.7505070348132126,
0.7545822137967112, 0.7586795205991071, 0.762799075372269, 0.7669409989204777,
0.7711054127039704, 0.7752924388424999, 0.7795022001189185, 0.7837348199827764,
0.7879904225539431, 0.7922691326262467, 0.7965710756711334, 0.8008963778413465,
0.805245165974627, 0.8096175675974316, 0.8140137109286738, 0.8184337248834821,
0.8228777390769823, 0.8273458838280969, 0.8318382901633681, 0.8363550898207981,
0.8408964152537144, 0.8454623996346523, 0.8500531768592616, 0.8546688815502312,
0.8593096490612387, 0.8639756154809185, 0.8686669176368529, 0.8733836930995842,
0.8781260801866495, 0.8828942179666361, 0.8876882462632604, 0.8925083056594671,
0.8973545375015533, 0.9022270839033115, 0.9071260877501991, 0.9120516927035263,
0.9170040432046711, 0.9219832844793128, 0.9269895625416926, 0.9320230241988943,
0.9370838170551498, 0.9421720895161669, 0.9472879907934827, 0.9524316709088368,
0.9576032806985735, 0.9628029718180622, 0.9680308967461471, 0.9732872087896164,
0.9785720620876999, 0.9838856116165875, 0.9892280131939752, 0.9945994234836328},
// Schema 8:
[]float64{0.5, 0.5013556375251013, 0.5027149505564014, 0.5040779490592088,
0.5054446430258502, 0.5068150424757447, 0.5081891574554764, 0.509566998038869,
0.5109485743270583, 0.5123338964485679, 0.5137229745593818, 0.5151158188430205,
0.5165124395106142, 0.5179128468009786, 0.5193170509806894, 0.520725062344158,
0.5221368912137069, 0.5235525479396449, 0.5249720429003435, 0.526395386502313,
0.5278225891802786, 0.5292536613972564, 0.5306886136446309, 0.5321274564422321,
0.5335702003384117, 0.5350168559101208, 0.5364674337629877, 0.5379219445313954,
0.5393803988785598, 0.5408428074966075, 0.5423091811066545, 0.5437795304588847,
0.5452538663326288, 0.5467321995364429, 0.5482145409081883, 0.549700901315111,
0.5511912916539204, 0.5526857228508706, 0.5541842058618393, 0.5556867516724088,
0.5571933712979462, 0.5587040757836845, 0.5602188762048033, 0.5617377836665098,
0.5632608093041209, 0.564787964283144, 0.5663192597993595, 0.5678547070789026,
0.5693943173783458, 0.5709381019847808, 0.572486072215902, 0.5740382394200894,
0.5755946149764913, 0.5771552102951081, 0.5787200368168754, 0.5802891060137493,
0.5818624293887887, 0.5834400184762408, 0.585021884841625, 0.5866080400818185,
0.5881984958251406, 0.5897932637314379, 0.5913923554921704, 0.5929957828304968,
0.5946035575013605, 0.5962156912915756, 0.5978321960199137, 0.5994530835371903,
0.6010783657263515, 0.6027080545025619, 0.6043421618132907, 0.6059806996384005,
0.6076236799902344, 0.6092711149137041, 0.6109230164863786, 0.6125793968185725,
0.6142402680534349, 0.6159056423670379, 0.6175755319684665, 0.6192499490999082,
0.620928906036742, 0.622612415087629, 0.6243004885946023, 0.6259931389331581,
0.6276903785123455, 0.6293922197748583, 0.6310986751971253, 0.6328097572894031,
0.6345254785958666, 0.6362458516947014, 0.637970889198196, 0.6397006037528346,
0.6414350080393891, 0.6431741147730128, 0.6449179367033329, 0.6466664866145447,
0.6484197773255048, 0.6501778216898253, 0.6519406325959679, 0.6537082229673385,
0.6554806057623822, 0.6572577939746774, 0.659039800633032, 0.6608266388015788,
0.6626183215798706, 0.6644148621029772, 0.6662162735415805, 0.6680225691020727,
0.6698337620266515, 0.6716498655934177, 0.6734708931164728, 0.6752968579460171,
0.6771277734684463, 0.6789636531064505, 0.6808045103191123, 0.6826503586020058,
0.6845012114872953, 0.6863570825438342, 0.688217985377265, 0.690083933630119,
0.6919549409819159, 0.6938310211492645, 0.6957121878859629, 0.6975984549830999,
0.6994898362691555, 0.7013863456101023, 0.7032879969095076, 0.7051948041086352,
0.7071067811865475, 0.7090239421602076, 0.7109463010845827, 0.7128738720527471,
0.7148066691959849, 0.7167447066838943, 0.718687998724491, 0.7206365595643126,
0.7225904034885232, 0.7245495448210174, 0.7265139979245261, 0.7284837772007218,
0.7304588970903234, 0.7324393720732029, 0.7344252166684908, 0.7364164454346837,
0.7384130729697496, 0.7404151139112358, 0.7424225829363761, 0.7444354947621984,
0.7464538641456323, 0.7484777058836176, 0.7505070348132126, 0.7525418658117031,
0.7545822137967112, 0.7566280937263048, 0.7586795205991071, 0.7607365094544071,
0.762799075372269, 0.7648672334736434, 0.7669409989204777, 0.7690203869158282,
0.7711054127039704, 0.7731960915705107, 0.7752924388424999, 0.7773944698885442,
0.7795022001189185, 0.7816156449856788, 0.7837348199827764, 0.7858597406461707,
0.7879904225539431, 0.7901268813264122, 0.7922691326262467, 0.7944171921585818,
0.7965710756711334, 0.7987307989543135, 0.8008963778413465, 0.8030678282083853,
0.805245165974627, 0.8074284071024302, 0.8096175675974316, 0.8118126635086642,
0.8140137109286738, 0.8162207259936375, 0.8184337248834821, 0.820652723822003,
0.8228777390769823, 0.8251087869603088, 0.8273458838280969, 0.8295890460808079,
0.8318382901633681, 0.8340936325652911, 0.8363550898207981, 0.8386226785089391,
0.8408964152537144, 0.8431763167241966, 0.8454623996346523, 0.8477546807446661,
0.8500531768592616, 0.8523579048290255, 0.8546688815502312, 0.8569861239649629,
0.8593096490612387, 0.8616394738731368, 0.8639756154809185, 0.8663180910111553,
0.8686669176368529, 0.871022112577578, 0.8733836930995842, 0.8757516765159389,
0.8781260801866495, 0.8805069215187917, 0.8828942179666361, 0.8852879870317771,
0.8876882462632604, 0.890095013257712, 0.8925083056594671, 0.8949281411607002,
0.8973545375015533, 0.8997875124702672, 0.9022270839033115, 0.9046732696855155,
0.9071260877501991, 0.909585556079304, 0.9120516927035263, 0.9145245157024483,
0.9170040432046711, 0.9194902933879467, 0.9219832844793128, 0.9244830347552253,
0.9269895625416926, 0.92950288621441, 0.9320230241988943, 0.9345499949706191,
0.9370838170551498, 0.93962450902828, 0.9421720895161669, 0.9447265771954693,
0.9472879907934827, 0.9498563490882775, 0.9524316709088368, 0.9550139751351947,
0.9576032806985735, 0.9601996065815236, 0.9628029718180622, 0.9654133954938133,
0.9680308967461471, 0.9706554947643201, 0.9732872087896164, 0.9759260581154889,
0.9785720620876999, 0.9812252401044634, 0.9838856116165875, 0.9865531961276168,
0.9892280131939752, 0.9919100824251095, 0.9945994234836328, 0.9972960560854698},
}
// The sparseBounds above can be generated with the code below.
// TODO(beorn7): Actually do it via go generate.
//
// var sparseBounds [][]float64 = make([][]float64, 9)
//
// func init() {
// // Populate sparseBounds.
// numBuckets := 1
// for i := range sparseBounds {
// bounds := []float64{0.5}
// factor := math.Exp2(math.Exp2(float64(-i)))
// for j := 0; j < numBuckets-1; j++ {
// var bound float64
// if (j+1)%2 == 0 {
// // Use previously calculated value for increased precision.
// bound = sparseBounds[i-1][j/2+1]
// } else {
// bound = bounds[j] * factor
// }
// bounds = append(bounds, bound)
// }
// numBuckets *= 2
// sparseBounds[i] = bounds
// }
// }
// A Histogram counts individual observations from an event or sample stream in // A Histogram counts individual observations from an event or sample stream in
// configurable buckets. Similar to a summary, it also provides a sum of // configurable buckets. Similar to a summary, it also provides a sum of
// observations and an observation count. // observations and an observation count.
@ -68,7 +238,10 @@ var DefBuckets = []float64{.005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10}
// DefSparseBucketsZeroThreshold is the default value for // DefSparseBucketsZeroThreshold is the default value for
// SparseBucketsZeroThreshold in the HistogramOpts. // SparseBucketsZeroThreshold in the HistogramOpts.
var DefSparseBucketsZeroThreshold = 1e-128 const DefSparseBucketsZeroThreshold = 2.938735877055719e-39
// This is 2^-128 (or 0.5*2^-127 in the actual IEEE 754 representation), which
// is a bucket boundary at all possible resolutions.
var errBucketLabelNotAllowed = fmt.Errorf( var errBucketLabelNotAllowed = fmt.Errorf(
"%q is not allowed as label name in histograms", bucketLabel, "%q is not allowed as label name in histograms", bucketLabel,
@ -162,24 +335,41 @@ type HistogramOpts struct {
// buckets here explicitly.) // buckets here explicitly.)
Buckets []float64 Buckets []float64
// If SparseBucketsResolution is not zero, sparse buckets are used (in // If SparseBucketsFactor is greater than one, sparse buckets are used
// addition to the regular buckets, if defined above). Every power of // (in addition to the regular buckets, if defined above). Sparse
// ten is divided into the given number of exponential buckets. For // buckets are exponential buckets covering the whole float64 range
// example, if set to 3, the bucket boundaries are approximately […, // (with the exception of the “zero” bucket, see
// 0.1, 0.215, 0.464, 1, 2.15, 4,64, 10, 21.5, 46.4, 100, …] Histograms // SparseBucketsZeroThreshold below). From any one bucket to the next,
// can only be properly aggregated if they use the same // the width of the bucket grows by a constant factor.
// resolution. Therefore, it is recommended to use 20 as a resolution, // SparseBucketsFactor provides an upper bound for this factor
// which is generally expected to be a good tradeoff between resource // (exception see below). The smaller SparseBucketsFactor, the more
// usage and accuracy (resulting in a maximum error of quantile values // buckets will be used and thus the more costly the histogram will
// of about 6%). // become. A generally good trade-off between cost and accuracy is a
SparseBucketsResolution uint8 // value of 1.1 (each bucket is at most 10% wider than the previous
// one), which will result in each power of two divided into 8 buckets
// (e.g. there will be 8 buckets between 1 and 2, same as between 2 and
// 4, and 4 and 8, etc.).
//
// Details about the actually used factor: The factor is calculated as
// 2^(2^n), where n is an integer number between (and including) -8 and
// 4. n is chosen so that the resulting factor is the largest that is
// still smaller or equal to SparseBucketsFactor. Note that the smallest
// possible factor is therefore approx. 1.00271 (i.e. 2^(2^-8) ). If
// SparseBucketsFactor is greater than 1 but smaller than 2^(2^-8), then
// the actually used factor is still 2^(2^-8) even though it is larger
// than the provided SparseBucketsFactor.
SparseBucketsFactor float64
// All observations with an absolute value of less or equal // All observations with an absolute value of less or equal
// SparseBucketsZeroThreshold are accumulated into a “zero” bucket. For // SparseBucketsZeroThreshold are accumulated into a “zero” bucket. For
// best results, this should be close to a bucket boundary. This is // best results, this should be close to a bucket boundary. This is
// most easily accomplished by picking a power of ten. If // usually the case if picking a power of two. If
// SparseBucketsZeroThreshold is left at zero (or set to a negative // SparseBucketsZeroThreshold is left at zero (or set to a negative
// value), DefSparseBucketsZeroThreshold is used as the threshold. // value), DefSparseBucketsZeroThreshold is used as the threshold.
SparseBucketsZeroThreshold float64 SparseBucketsZeroThreshold float64
// TODO(beorn7): Need a setting to limit total bucket count and to
// configure a strategy to enforce the limit, e.g. if minimum duration
// after last reset, reset. If not, half the resolution and/or expand
// the zero bucket.
} }
// NewHistogram creates a new Histogram based on the provided HistogramOpts. It // NewHistogram creates a new Histogram based on the provided HistogramOpts. It
@ -219,18 +409,22 @@ func newHistogram(desc *Desc, opts HistogramOpts, labelValues ...string) Histogr
h := &histogram{ h := &histogram{
desc: desc, desc: desc,
upperBounds: opts.Buckets, upperBounds: opts.Buckets,
sparseResolution: uint32(opts.SparseBucketsResolution),
sparseThreshold: opts.SparseBucketsZeroThreshold, sparseThreshold: opts.SparseBucketsZeroThreshold,
labelPairs: MakeLabelPairs(desc, labelValues), labelPairs: MakeLabelPairs(desc, labelValues),
counts: [2]*histogramCounts{{}, {}}, counts: [2]*histogramCounts{{}, {}},
now: time.Now, now: time.Now,
} }
if len(h.upperBounds) == 0 && opts.SparseBucketsResolution == 0 { if len(h.upperBounds) == 0 && opts.SparseBucketsFactor <= 1 {
h.upperBounds = DefBuckets h.upperBounds = DefBuckets
} }
if h.sparseThreshold <= 0 { if h.sparseThreshold <= 0 {
h.sparseThreshold = DefSparseBucketsZeroThreshold h.sparseThreshold = DefSparseBucketsZeroThreshold
} }
if opts.SparseBucketsFactor <= 1 {
h.sparseThreshold = 0 // To mark that there are no sparse buckets.
} else {
h.sparseSchema = pickSparseSchema(opts.SparseBucketsFactor)
}
for i, upperBound := range h.upperBounds { for i, upperBound := range h.upperBounds {
if i < len(h.upperBounds)-1 { if i < len(h.upperBounds)-1 {
if upperBound >= h.upperBounds[i+1] { if upperBound >= h.upperBounds[i+1] {
@ -264,14 +458,14 @@ type histogramCounts struct {
sumBits uint64 sumBits uint64
count uint64 count uint64
buckets []uint64 buckets []uint64
// sparse buckets are implemented with a sync.Map for this PoC. A // sparse buckets are implemented with a sync.Map for now. A dedicated
// dedicated data structure will likely be more efficient. // data structure will likely be more efficient. There are separate maps
// There are separate maps for negative and positive observations. // for negative and positive observations. The map's value is an *int64,
// The map's value is a *uint64, counting observations in that bucket. // counting observations in that bucket. (Note that we don't use uint64
// The map's key is the logarithmic index of the bucket. Index 0 is for an // as an int64 won't overflow in practice, and working with signed
// upper bound of 1. Each increment/decrement by SparseBucketsResolution // numbers from the beginning simplifies the handling of deltas.) The
// multiplies/divides the upper bound by 10. Indices in between are // map's key is the index of the bucket according to the used
// spaced exponentially as defined in spareBounds. // sparseSchema. Index 0 is for an upper bound of 1.
sparseBucketsPositive, sparseBucketsNegative sync.Map sparseBucketsPositive, sparseBucketsNegative sync.Map
// sparseZeroBucket counts all (positive and negative) observations in // sparseZeroBucket counts all (positive and negative) observations in
// the zero bucket (with an absolute value less or equal // the zero bucket (with an absolute value less or equal
@ -312,10 +506,10 @@ func (hc *histogramCounts) observe(v float64, bucket int, doSparse bool, whichSp
atomic.AddUint64(&hc.count, 1) atomic.AddUint64(&hc.count, 1)
} }
func addToSparseBucket(buckets *sync.Map, key int, increment uint64) { func addToSparseBucket(buckets *sync.Map, key int, increment int64) {
if existingBucket, ok := buckets.Load(key); ok { if existingBucket, ok := buckets.Load(key); ok {
// Fast path without allocation. // Fast path without allocation.
atomic.AddUint64(existingBucket.(*uint64), increment) atomic.AddInt64(existingBucket.(*int64), increment)
return return
} }
// Bucket doesn't exist yet. Slow path allocating new counter. // Bucket doesn't exist yet. Slow path allocating new counter.
@ -323,7 +517,7 @@ func addToSparseBucket(buckets *sync.Map, key int, increment uint64) {
if actualBucket, loaded := buckets.LoadOrStore(key, &newBucket); loaded { if actualBucket, loaded := buckets.LoadOrStore(key, &newBucket); loaded {
// The bucket was created concurrently in another goroutine. // The bucket was created concurrently in another goroutine.
// Have to increment after all. // Have to increment after all.
atomic.AddUint64(actualBucket.(*uint64), increment) atomic.AddInt64(actualBucket.(*int64), increment)
} }
} }
@ -339,7 +533,7 @@ type histogram struct {
// perspective of the histogram) swap the hotcold under the writeMtx // perspective of the histogram) swap the hotcold under the writeMtx
// lock. A cooldown is awaited (while locked) by comparing the number of // lock. A cooldown is awaited (while locked) by comparing the number of
// observations with the initiation count. Once they match, then the // observations with the initiation count. Once they match, then the
// last observation on the now cool one has completed. All cool fields must // last observation on the now cool one has completed. All cold fields must
// be merged into the new hot before releasing writeMtx. // be merged into the new hot before releasing writeMtx.
// //
// Fields with atomic access first! See alignment constraint: // Fields with atomic access first! See alignment constraint:
@ -359,8 +553,8 @@ type histogram struct {
upperBounds []float64 upperBounds []float64
labelPairs []*dto.LabelPair labelPairs []*dto.LabelPair
exemplars []atomic.Value // One more than buckets (to include +Inf), each a *dto.Exemplar. exemplars []atomic.Value // One more than buckets (to include +Inf), each a *dto.Exemplar.
sparseResolution uint32 // Instead of uint8 to be ready for protobuf encoding. sparseSchema int32
sparseThreshold float64 sparseThreshold float64 // This is zero iff no sparse buckets are used.
now func() time.Time // To mock out time.Now() for testing. now func() time.Time // To mock out time.Now() for testing.
} }
@ -407,7 +601,7 @@ func (h *histogram) Write(out *dto.Metric) error {
Bucket: make([]*dto.Bucket, len(h.upperBounds)), Bucket: make([]*dto.Bucket, len(h.upperBounds)),
SampleCount: proto.Uint64(count), SampleCount: proto.Uint64(count),
SampleSum: proto.Float64(math.Float64frombits(atomic.LoadUint64(&coldCounts.sumBits))), SampleSum: proto.Float64(math.Float64frombits(atomic.LoadUint64(&coldCounts.sumBits))),
SbResolution: &h.sparseResolution, SbSchema: &h.sparseSchema,
SbZeroThreshold: &h.sparseThreshold, SbZeroThreshold: &h.sparseThreshold,
} }
out.Histogram = his out.Histogram = his
@ -448,7 +642,7 @@ func (h *histogram) Write(out *dto.Metric) error {
atomic.AddUint64(&hotCounts.buckets[i], atomic.LoadUint64(&coldCounts.buckets[i])) atomic.AddUint64(&hotCounts.buckets[i], atomic.LoadUint64(&coldCounts.buckets[i]))
atomic.StoreUint64(&coldCounts.buckets[i], 0) atomic.StoreUint64(&coldCounts.buckets[i], 0)
} }
if h.sparseResolution != 0 { if h.sparseThreshold != 0 {
zeroBucket := atomic.LoadUint64(&coldCounts.sparseZeroBucket) zeroBucket := atomic.LoadUint64(&coldCounts.sparseZeroBucket)
defer func() { defer func() {
@ -478,21 +672,41 @@ func makeSparseBuckets(buckets *sync.Map) *dto.SparseBuckets {
} }
sbs := dto.SparseBuckets{} sbs := dto.SparseBuckets{}
var prevCount uint64 var prevCount int64
var nextI int var nextI int
appendDelta := func(count int64) {
*sbs.Span[len(sbs.Span)-1].Length++
sbs.Delta = append(sbs.Delta, count-prevCount)
prevCount = count
}
for n, i := range ii { for n, i := range ii {
v, _ := buckets.Load(i) v, _ := buckets.Load(i)
count := atomic.LoadUint64(v.(*uint64)) count := atomic.LoadInt64(v.(*int64))
if n == 0 || i-nextI != 0 { // Multiple spans with only small gaps in between are probably
// encoded more efficiently as one larger span with a few empty
// buckets. Needs some research to find the sweet spot. For now,
// we assume that gaps of one ore two buckets should not create
// a new span.
iDelta := int32(i - nextI)
if n == 0 || iDelta > 2 {
// We have to create a new span, either because we are
// at the very beginning, or because we have found a gap
// of more than two buckets.
sbs.Span = append(sbs.Span, &dto.SparseBuckets_Span{ sbs.Span = append(sbs.Span, &dto.SparseBuckets_Span{
Offset: proto.Int32(int32(i - nextI)), Offset: proto.Int32(iDelta),
Length: proto.Uint32(1), Length: proto.Uint32(0),
}) })
} else { } else {
*sbs.Span[len(sbs.Span)-1].Length++ // We have found a small gap (or no gap at all).
// Insert empty buckets as needed.
for j := int32(0); j < iDelta; j++ {
appendDelta(0)
} }
sbs.Delta = append(sbs.Delta, int64(count)-int64(prevCount)) // TODO(beorn7): Do proper overflow handling. }
nextI, prevCount = i+1, count appendDelta(count)
nextI = i + 1
} }
return &sbs return &sbs
} }
@ -504,9 +718,9 @@ func makeSparseBuckets(buckets *sync.Map) *dto.SparseBuckets {
// recreated on the next scrape). // recreated on the next scrape).
func addAndReset(hotBuckets *sync.Map) func(k, v interface{}) bool { func addAndReset(hotBuckets *sync.Map) func(k, v interface{}) bool {
return func(k, v interface{}) bool { return func(k, v interface{}) bool {
bucket := v.(*uint64) bucket := v.(*int64)
addToSparseBucket(hotBuckets, k.(int), atomic.LoadUint64(bucket)) addToSparseBucket(hotBuckets, k.(int), atomic.LoadInt64(bucket))
atomic.StoreUint64(bucket, 0) atomic.StoreInt64(bucket, 0)
return true return true
} }
} }
@ -528,7 +742,8 @@ func (h *histogram) findBucket(v float64) int {
// observe is the implementation for Observe without the findBucket part. // observe is the implementation for Observe without the findBucket part.
func (h *histogram) observe(v float64, bucket int) { func (h *histogram) observe(v float64, bucket int) {
doSparse := h.sparseResolution != 0 // Do not add to sparse buckets for NaN observations.
doSparse := h.sparseThreshold != 0 && !math.IsNaN(v)
var whichSparse, sparseKey int var whichSparse, sparseKey int
if doSparse { if doSparse {
switch { switch {
@ -537,13 +752,20 @@ func (h *histogram) observe(v float64, bucket int) {
case v < -h.sparseThreshold: case v < -h.sparseThreshold:
whichSparse = -1 whichSparse = -1
} }
// TODO(beorn7): This sometimes gives inaccurate results for frac, exp := math.Frexp(math.Abs(v))
// floats that are actual powers of 10, e.g. math.Log10(0.1) is switch {
// calculated as -0.9999999999999999 rather than -1 and thus case math.IsInf(v, 0):
// yields a key unexpectedly one off. Maybe special-case precise sparseKey = math.MaxInt32 // Largest possible sparseKey.
// powers of 10. case h.sparseSchema > 0:
// TODO(beorn7): This needs special-casing for ±Inf and NaN. bounds := sparseBounds[h.sparseSchema]
sparseKey = int(math.Ceil(math.Log10(math.Abs(v)) * float64(h.sparseResolution))) sparseKey = sort.SearchFloat64s(bounds, frac) + (exp-1)*len(bounds)
default:
sparseKey = exp
if frac == 0.5 {
sparseKey--
}
sparseKey /= 1 << -h.sparseSchema
}
} }
// We increment h.countAndHotIdx so that the counter in the lower // We increment h.countAndHotIdx so that the counter in the lower
// 63 bits gets incremented. At the same time, we get the new value // 63 bits gets incremented. At the same time, we get the new value
@ -797,3 +1019,24 @@ func (s buckSort) Swap(i, j int) {
func (s buckSort) Less(i, j int) bool { func (s buckSort) Less(i, j int) bool {
return s[i].GetUpperBound() < s[j].GetUpperBound() return s[i].GetUpperBound() < s[j].GetUpperBound()
} }
// pickSparseschema returns the largest number n between -4 and 8 such that
// 2^(2^-n) is less or equal the provided bucketFactor.
//
// Special cases:
// - bucketFactor <= 1: panics.
// - bucketFactor < 2^(2^-8) (but > 1): still returns 8.
func pickSparseSchema(bucketFactor float64) int32 {
if bucketFactor <= 1 {
panic(fmt.Errorf("bucketFactor %f is <=1", bucketFactor))
}
floor := math.Floor(math.Log2(math.Log2(bucketFactor)))
switch {
case floor <= -8:
return 8
case floor >= 4:
return -4
default:
return -int32(floor)
}
}