Merge pull request #429 from prometheus/beorn7/summary
Add more checks around summaries and histograms
This commit is contained in:
commit
bcbbc08eb2
|
@ -740,7 +740,7 @@ temperature_kelvin 4.5
|
|||
// temperature_kelvin{location="outside"} 273.14
|
||||
// temperature_kelvin{location="somewhere else"} 4.5
|
||||
// ----------
|
||||
// collected metric temperature_kelvin label:<name:"location" value:"outside" > gauge:<value:265.3 > was collected before with the same name and label values
|
||||
// collected metric "temperature_kelvin" { label:<name:"location" value:"outside" > gauge:<value:265.3 > } was collected before with the same name and label values
|
||||
// # HELP humidity_percent Humidity in %.
|
||||
// # TYPE humidity_percent gauge
|
||||
// humidity_percent{location="inside"} 33.2
|
||||
|
|
|
@ -19,6 +19,7 @@ import (
|
|||
"os"
|
||||
"runtime"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"unicode/utf8"
|
||||
|
||||
|
@ -542,7 +543,7 @@ func processMetric(
|
|||
return fmt.Errorf("error collecting metric %v: %s", desc, err)
|
||||
}
|
||||
metricFamily, ok := metricFamiliesByName[desc.fqName]
|
||||
if ok {
|
||||
if ok { // Existing name.
|
||||
if metricFamily.GetHelp() != desc.help {
|
||||
return fmt.Errorf(
|
||||
"collected metric %s %s has help %q but should have %q",
|
||||
|
@ -589,7 +590,7 @@ func processMetric(
|
|||
default:
|
||||
panic("encountered MetricFamily with invalid type")
|
||||
}
|
||||
} else {
|
||||
} else { // New name.
|
||||
metricFamily = &dto.MetricFamily{}
|
||||
metricFamily.Name = proto.String(desc.fqName)
|
||||
metricFamily.Help = proto.String(desc.help)
|
||||
|
@ -608,6 +609,9 @@ func processMetric(
|
|||
default:
|
||||
return fmt.Errorf("empty metric collected: %s", dtoMetric)
|
||||
}
|
||||
if err := checkSuffixCollisions(metricFamily, metricFamiliesByName); err != nil {
|
||||
return err
|
||||
}
|
||||
metricFamiliesByName[desc.fqName] = metricFamily
|
||||
}
|
||||
if err := checkMetricConsistency(metricFamily, dtoMetric, metricHashes); err != nil {
|
||||
|
@ -688,6 +692,10 @@ func (gs Gatherers) Gather() ([]*dto.MetricFamily, error) {
|
|||
existingMF.Name = mf.Name
|
||||
existingMF.Help = mf.Help
|
||||
existingMF.Type = mf.Type
|
||||
if err := checkSuffixCollisions(existingMF, metricFamiliesByName); err != nil {
|
||||
errs = append(errs, err)
|
||||
continue
|
||||
}
|
||||
metricFamiliesByName[mf.GetName()] = existingMF
|
||||
}
|
||||
for _, m := range mf.Metric {
|
||||
|
@ -767,6 +775,66 @@ func normalizeMetricFamilies(metricFamiliesByName map[string]*dto.MetricFamily)
|
|||
return result
|
||||
}
|
||||
|
||||
// checkSuffixCollisions checks for collisions with the “magic” suffixes the
|
||||
// Prometheus text format and the internal metric representation of the
|
||||
// Prometheus server add while flattening Summaries and Histograms.
|
||||
func checkSuffixCollisions(mf *dto.MetricFamily, mfs map[string]*dto.MetricFamily) error {
|
||||
var (
|
||||
newName = mf.GetName()
|
||||
newType = mf.GetType()
|
||||
newNameWithoutSuffix = ""
|
||||
)
|
||||
switch {
|
||||
case strings.HasSuffix(newName, "_count"):
|
||||
newNameWithoutSuffix = newName[:len(newName)-6]
|
||||
case strings.HasSuffix(newName, "_sum"):
|
||||
newNameWithoutSuffix = newName[:len(newName)-4]
|
||||
case strings.HasSuffix(newName, "_bucket"):
|
||||
newNameWithoutSuffix = newName[:len(newName)-7]
|
||||
}
|
||||
if newNameWithoutSuffix != "" {
|
||||
if existingMF, ok := mfs[newNameWithoutSuffix]; ok {
|
||||
switch existingMF.GetType() {
|
||||
case dto.MetricType_SUMMARY:
|
||||
if !strings.HasSuffix(newName, "_bucket") {
|
||||
return fmt.Errorf(
|
||||
"collected metric named %q collides with previously collected summary named %q",
|
||||
newName, newNameWithoutSuffix,
|
||||
)
|
||||
}
|
||||
case dto.MetricType_HISTOGRAM:
|
||||
return fmt.Errorf(
|
||||
"collected metric named %q collides with previously collected histogram named %q",
|
||||
newName, newNameWithoutSuffix,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
if newType == dto.MetricType_SUMMARY || newType == dto.MetricType_HISTOGRAM {
|
||||
if _, ok := mfs[newName+"_count"]; ok {
|
||||
return fmt.Errorf(
|
||||
"collected histogram or summary named %q collides with previously collected metric named %q",
|
||||
newName, newName+"_count",
|
||||
)
|
||||
}
|
||||
if _, ok := mfs[newName+"_sum"]; ok {
|
||||
return fmt.Errorf(
|
||||
"collected histogram or summary named %q collides with previously collected metric named %q",
|
||||
newName, newName+"_sum",
|
||||
)
|
||||
}
|
||||
}
|
||||
if newType == dto.MetricType_HISTOGRAM {
|
||||
if _, ok := mfs[newName+"_bucket"]; ok {
|
||||
return fmt.Errorf(
|
||||
"collected histogram named %q collides with previously collected metric named %q",
|
||||
newName, newName+"_bucket",
|
||||
)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// checkMetricConsistency checks if the provided Metric is consistent with the
|
||||
// provided MetricFamily. It also hashes the Metric labels and the MetricFamily
|
||||
// name. If the resulting hash is already in the provided metricHashes, an error
|
||||
|
@ -783,14 +851,28 @@ func checkMetricConsistency(
|
|||
metricFamily.GetType() == dto.MetricType_HISTOGRAM && dtoMetric.Histogram == nil ||
|
||||
metricFamily.GetType() == dto.MetricType_UNTYPED && dtoMetric.Untyped == nil {
|
||||
return fmt.Errorf(
|
||||
"collected metric %s %s is not a %s",
|
||||
"collected metric %q { %s} is not a %s",
|
||||
metricFamily.GetName(), dtoMetric, metricFamily.GetType(),
|
||||
)
|
||||
}
|
||||
|
||||
for _, labelPair := range dtoMetric.GetLabel() {
|
||||
if !checkLabelName(labelPair.GetName()) {
|
||||
return fmt.Errorf(
|
||||
"collected metric %q { %s} has a label with an invalid name: %s",
|
||||
metricFamily.GetName(), dtoMetric, labelPair.GetName(),
|
||||
)
|
||||
}
|
||||
if dtoMetric.Summary != nil && labelPair.GetName() == quantileLabel {
|
||||
return fmt.Errorf(
|
||||
"collected metric %q { %s} must not have an explicit %q label",
|
||||
metricFamily.GetName(), dtoMetric, quantileLabel,
|
||||
)
|
||||
}
|
||||
if !utf8.ValidString(labelPair.GetValue()) {
|
||||
return fmt.Errorf("collected metric's label %s is not utf8: %#v", labelPair.GetName(), labelPair.GetValue())
|
||||
return fmt.Errorf(
|
||||
"collected metric %q { %s} has a label named %q whose value is not utf8: %#v",
|
||||
metricFamily.GetName(), dtoMetric, labelPair.GetName(), labelPair.GetValue())
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -809,7 +891,7 @@ func checkMetricConsistency(
|
|||
}
|
||||
if _, exists := metricHashes[h]; exists {
|
||||
return fmt.Errorf(
|
||||
"collected metric %s %s was collected before with the same name and label values",
|
||||
"collected metric %q { %s} was collected before with the same name and label values",
|
||||
metricFamily.GetName(), dtoMetric,
|
||||
)
|
||||
}
|
||||
|
|
|
@ -249,7 +249,64 @@ metric: <
|
|||
|
||||
expectedMetricFamilyInvalidLabelValueAsText := []byte(`An error has occurred during metrics gathering:
|
||||
|
||||
collected metric's label constname is not utf8: "\xff"
|
||||
collected metric "name" { label:<name:"constname" value:"\377" > label:<name:"labelname" value:"different_val" > counter:<value:42 > } has a label named "constname" whose value is not utf8: "\xff"
|
||||
`)
|
||||
|
||||
summary := prometheus.NewSummary(prometheus.SummaryOpts{
|
||||
Name: "complex",
|
||||
Help: "A metric to check collisions with _sum and _count.",
|
||||
})
|
||||
summaryAsText := []byte(`# HELP complex A metric to check collisions with _sum and _count.
|
||||
# TYPE complex summary
|
||||
complex{quantile="0.5"} NaN
|
||||
complex{quantile="0.9"} NaN
|
||||
complex{quantile="0.99"} NaN
|
||||
complex_sum 0
|
||||
complex_count 0
|
||||
`)
|
||||
histogram := prometheus.NewHistogram(prometheus.HistogramOpts{
|
||||
Name: "complex",
|
||||
Help: "A metric to check collisions with _sun, _count, and _bucket.",
|
||||
})
|
||||
externalMetricFamilyWithBucketSuffix := &dto.MetricFamily{
|
||||
Name: proto.String("complex_bucket"),
|
||||
Help: proto.String("externaldocstring"),
|
||||
Type: dto.MetricType_COUNTER.Enum(),
|
||||
Metric: []*dto.Metric{
|
||||
{
|
||||
Counter: &dto.Counter{
|
||||
Value: proto.Float64(1),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
externalMetricFamilyWithBucketSuffixAsText := []byte(`# HELP complex_bucket externaldocstring
|
||||
# TYPE complex_bucket counter
|
||||
complex_bucket 1
|
||||
`)
|
||||
externalMetricFamilyWithCountSuffix := &dto.MetricFamily{
|
||||
Name: proto.String("complex_count"),
|
||||
Help: proto.String("externaldocstring"),
|
||||
Type: dto.MetricType_COUNTER.Enum(),
|
||||
Metric: []*dto.Metric{
|
||||
{
|
||||
Counter: &dto.Counter{
|
||||
Value: proto.Float64(1),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
bucketCollisionMsg := []byte(`An error has occurred during metrics gathering:
|
||||
|
||||
collected metric named "complex_bucket" collides with previously collected histogram named "complex"
|
||||
`)
|
||||
summaryCountCollisionMsg := []byte(`An error has occurred during metrics gathering:
|
||||
|
||||
collected metric named "complex_count" collides with previously collected summary named "complex"
|
||||
`)
|
||||
histogramCountCollisionMsg := []byte(`An error has occurred during metrics gathering:
|
||||
|
||||
collected metric named "complex_count" collides with previously collected histogram named "complex"
|
||||
`)
|
||||
|
||||
type output struct {
|
||||
|
@ -513,7 +570,7 @@ collected metric's label constname is not utf8: "\xff"
|
|||
},
|
||||
{ // 17
|
||||
headers: map[string]string{
|
||||
"Accept": "application/json",
|
||||
"Accept": "text/plain",
|
||||
},
|
||||
out: output{
|
||||
headers: map[string]string{
|
||||
|
@ -523,6 +580,72 @@ collected metric's label constname is not utf8: "\xff"
|
|||
},
|
||||
collector: uncheckedCollector{metricVec},
|
||||
},
|
||||
{ // 18
|
||||
headers: map[string]string{
|
||||
"Accept": "text/plain",
|
||||
},
|
||||
out: output{
|
||||
headers: map[string]string{
|
||||
"Content-Type": `text/plain; charset=utf-8`,
|
||||
},
|
||||
body: histogramCountCollisionMsg,
|
||||
},
|
||||
collector: histogram,
|
||||
externalMF: []*dto.MetricFamily{
|
||||
externalMetricFamilyWithCountSuffix,
|
||||
},
|
||||
},
|
||||
{ // 19
|
||||
headers: map[string]string{
|
||||
"Accept": "text/plain",
|
||||
},
|
||||
out: output{
|
||||
headers: map[string]string{
|
||||
"Content-Type": `text/plain; charset=utf-8`,
|
||||
},
|
||||
body: bucketCollisionMsg,
|
||||
},
|
||||
collector: histogram,
|
||||
externalMF: []*dto.MetricFamily{
|
||||
externalMetricFamilyWithBucketSuffix,
|
||||
},
|
||||
},
|
||||
{ // 20
|
||||
headers: map[string]string{
|
||||
"Accept": "text/plain",
|
||||
},
|
||||
out: output{
|
||||
headers: map[string]string{
|
||||
"Content-Type": `text/plain; charset=utf-8`,
|
||||
},
|
||||
body: summaryCountCollisionMsg,
|
||||
},
|
||||
collector: summary,
|
||||
externalMF: []*dto.MetricFamily{
|
||||
externalMetricFamilyWithCountSuffix,
|
||||
},
|
||||
},
|
||||
{ // 21
|
||||
headers: map[string]string{
|
||||
"Accept": "text/plain",
|
||||
},
|
||||
out: output{
|
||||
headers: map[string]string{
|
||||
"Content-Type": `text/plain; version=0.0.4; charset=utf-8`,
|
||||
},
|
||||
body: bytes.Join(
|
||||
[][]byte{
|
||||
summaryAsText,
|
||||
externalMetricFamilyWithBucketSuffixAsText,
|
||||
},
|
||||
[]byte{},
|
||||
),
|
||||
},
|
||||
collector: summary,
|
||||
externalMF: []*dto.MetricFamily{
|
||||
externalMetricFamilyWithBucketSuffix,
|
||||
},
|
||||
},
|
||||
}
|
||||
for i, scenario := range scenarios {
|
||||
registry := prometheus.NewPedanticRegistry()
|
||||
|
|
|
@ -105,6 +105,11 @@ type SummaryOpts struct {
|
|||
// with the same fully-qualified name must have the same label names in
|
||||
// their ConstLabels.
|
||||
//
|
||||
// Due to the way a Summary is represented in the Prometheus text format
|
||||
// and how it is handled by the Prometheus server internally, “quantile”
|
||||
// is an illegal label name. Construction of a Summary or SummaryVec
|
||||
// will panic if this label name is used in ConstLabels.
|
||||
//
|
||||
// ConstLabels are only used rarely. In particular, do not use them to
|
||||
// attach the same labels to all your metrics. Those use cases are
|
||||
// better covered by target labels set by the scraping Prometheus
|
||||
|
@ -402,7 +407,16 @@ type SummaryVec struct {
|
|||
|
||||
// NewSummaryVec creates a new SummaryVec based on the provided SummaryOpts and
|
||||
// partitioned by the given label names.
|
||||
//
|
||||
// Due to the way a Summary is represented in the Prometheus text format and how
|
||||
// it is handled by the Prometheus server internally, “quantile” is an illegal
|
||||
// label name. NewSummaryVec will panic if this label name is used.
|
||||
func NewSummaryVec(opts SummaryOpts, labelNames []string) *SummaryVec {
|
||||
for _, ln := range labelNames {
|
||||
if ln == quantileLabel {
|
||||
panic(errQuantileLabelNotAllowed)
|
||||
}
|
||||
}
|
||||
desc := NewDesc(
|
||||
BuildFQName(opts.Namespace, opts.Subsystem, opts.Name),
|
||||
opts.Help,
|
||||
|
|
|
@ -64,6 +64,31 @@ func TestSummaryWithoutObjectives(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestSummaryWithQuantileLabel(t *testing.T) {
|
||||
defer func() {
|
||||
if r := recover(); r == nil {
|
||||
t.Error("Attempt to create Summary with 'quantile' label did not panic.")
|
||||
}
|
||||
}()
|
||||
_ = NewSummary(SummaryOpts{
|
||||
Name: "test_summary",
|
||||
Help: "less",
|
||||
ConstLabels: Labels{"quantile": "test"},
|
||||
})
|
||||
}
|
||||
|
||||
func TestSummaryVecWithQuantileLabel(t *testing.T) {
|
||||
defer func() {
|
||||
if r := recover(); r == nil {
|
||||
t.Error("Attempt to create SummaryVec with 'quantile' label did not panic.")
|
||||
}
|
||||
}()
|
||||
_ = NewSummaryVec(SummaryOpts{
|
||||
Name: "test_summary",
|
||||
Help: "less",
|
||||
}, []string{"quantile"})
|
||||
}
|
||||
|
||||
func benchmarkSummaryObserve(w int, b *testing.B) {
|
||||
b.StopTimer()
|
||||
|
||||
|
|
Loading…
Reference in New Issue