From e064aa97f19af0dc070387d6a6fa669184640c12 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Fri, 13 Jul 2018 13:43:21 +0200 Subject: [PATCH 1/3] Check quantile label during SummaryVec construction Also, document the existing behavior more clearly. Signed-off-by: beorn7 --- prometheus/summary.go | 14 ++++++++++++++ prometheus/summary_test.go | 25 +++++++++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/prometheus/summary.go b/prometheus/summary.go index f7dc85b..83b403c 100644 --- a/prometheus/summary.go +++ b/prometheus/summary.go @@ -105,6 +105,11 @@ type SummaryOpts struct { // with the same fully-qualified name must have the same label names in // their ConstLabels. // + // Due to the way a Summary is represented in the Prometheus text format + // and how it is handled by the Prometheus server internally, “quantile” + // is an illegal label name. Construction of a Summary or SummaryVec + // will panic if this label name is used in ConstLabels. + // // ConstLabels are only used rarely. In particular, do not use them to // attach the same labels to all your metrics. Those use cases are // better covered by target labels set by the scraping Prometheus @@ -402,7 +407,16 @@ type SummaryVec struct { // NewSummaryVec creates a new SummaryVec based on the provided SummaryOpts and // partitioned by the given label names. +// +// Due to the way a Summary is represented in the Prometheus text format and how +// it is handled by the Prometheus server internally, “quantile” is an illegal +// label name. NewSummaryVec will panic if this label name is used. func NewSummaryVec(opts SummaryOpts, labelNames []string) *SummaryVec { + for _, ln := range labelNames { + if ln == quantileLabel { + panic(errQuantileLabelNotAllowed) + } + } desc := NewDesc( BuildFQName(opts.Namespace, opts.Subsystem, opts.Name), opts.Help, diff --git a/prometheus/summary_test.go b/prometheus/summary_test.go index b162ed9..8b1a62e 100644 --- a/prometheus/summary_test.go +++ b/prometheus/summary_test.go @@ -64,6 +64,31 @@ func TestSummaryWithoutObjectives(t *testing.T) { } } +func TestSummaryWithQuantileLabel(t *testing.T) { + defer func() { + if r := recover(); r == nil { + t.Error("Attempt to create Summary with 'quantile' label did not panic.") + } + }() + _ = NewSummary(SummaryOpts{ + Name: "test_summary", + Help: "less", + ConstLabels: Labels{"quantile": "test"}, + }) +} + +func TestSummaryVecWithQuantileLabel(t *testing.T) { + defer func() { + if r := recover(); r == nil { + t.Error("Attempt to create SummaryVec with 'quantile' label did not panic.") + } + }() + _ = NewSummaryVec(SummaryOpts{ + Name: "test_summary", + Help: "less", + }, []string{"quantile"}) +} + func benchmarkSummaryObserve(w int, b *testing.B) { b.StopTimer() From 767a0218dfc546080ef03703a42364218ce962a0 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Fri, 13 Jul 2018 14:14:39 +0200 Subject: [PATCH 2/3] Add more label checksn during gathering Including check for an invalid "quantile" label in summaries. Also, improve error messages. Signed-off-by: beorn7 --- prometheus/examples_test.go | 2 +- prometheus/registry.go | 20 +++++++++++++++++--- prometheus/registry_test.go | 2 +- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/prometheus/examples_test.go b/prometheus/examples_test.go index 9808d5c..07b3929 100644 --- a/prometheus/examples_test.go +++ b/prometheus/examples_test.go @@ -740,7 +740,7 @@ temperature_kelvin 4.5 // temperature_kelvin{location="outside"} 273.14 // temperature_kelvin{location="somewhere else"} 4.5 // ---------- - // collected metric temperature_kelvin label: gauge: was collected before with the same name and label values + // collected metric "temperature_kelvin" { label: gauge: } was collected before with the same name and label values // # HELP humidity_percent Humidity in %. // # TYPE humidity_percent gauge // humidity_percent{location="inside"} 33.2 diff --git a/prometheus/registry.go b/prometheus/registry.go index 5c5cdfe..4fa2567 100644 --- a/prometheus/registry.go +++ b/prometheus/registry.go @@ -783,14 +783,28 @@ func checkMetricConsistency( metricFamily.GetType() == dto.MetricType_HISTOGRAM && dtoMetric.Histogram == nil || metricFamily.GetType() == dto.MetricType_UNTYPED && dtoMetric.Untyped == nil { return fmt.Errorf( - "collected metric %s %s is not a %s", + "collected metric %q { %s} is not a %s", metricFamily.GetName(), dtoMetric, metricFamily.GetType(), ) } for _, labelPair := range dtoMetric.GetLabel() { + if !checkLabelName(labelPair.GetName()) { + return fmt.Errorf( + "collected metric %q { %s} has a label with an invalid name: %s", + metricFamily.GetName(), dtoMetric, labelPair.GetName(), + ) + } + if dtoMetric.Summary != nil && labelPair.GetName() == quantileLabel { + return fmt.Errorf( + "collected metric %q { %s} must not have an explicit %q label", + metricFamily.GetName(), dtoMetric, quantileLabel, + ) + } if !utf8.ValidString(labelPair.GetValue()) { - return fmt.Errorf("collected metric's label %s is not utf8: %#v", labelPair.GetName(), labelPair.GetValue()) + return fmt.Errorf( + "collected metric %q { %s} has a label named %q whose value is not utf8: %#v", + metricFamily.GetName(), dtoMetric, labelPair.GetName(), labelPair.GetValue()) } } @@ -809,7 +823,7 @@ func checkMetricConsistency( } if _, exists := metricHashes[h]; exists { return fmt.Errorf( - "collected metric %s %s was collected before with the same name and label values", + "collected metric %q { %s} was collected before with the same name and label values", metricFamily.GetName(), dtoMetric, ) } diff --git a/prometheus/registry_test.go b/prometheus/registry_test.go index 10d4314..45b6dc9 100644 --- a/prometheus/registry_test.go +++ b/prometheus/registry_test.go @@ -249,7 +249,7 @@ metric: < expectedMetricFamilyInvalidLabelValueAsText := []byte(`An error has occurred during metrics gathering: -collected metric's label constname is not utf8: "\xff" +collected metric "name" { label: label: counter: } has a label named "constname" whose value is not utf8: "\xff" `) type output struct { From 4572e245460e83057586c8e237b4b622bb063622 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Fri, 13 Jul 2018 16:29:17 +0200 Subject: [PATCH 3/3] Add suffix collision checks during gathering So far, if a gauge was named `xxx_count`, and a summary or histogram `xxx`, this would have led to a legal protobuf exposition but would have created a name collision on `xxx_count` in the text format and within the Prometheus server. Signed-off-by: beorn7 --- prometheus/registry.go | 72 ++++++++++++++++++++- prometheus/registry_test.go | 125 +++++++++++++++++++++++++++++++++++- 2 files changed, 194 insertions(+), 3 deletions(-) diff --git a/prometheus/registry.go b/prometheus/registry.go index 4fa2567..896838f 100644 --- a/prometheus/registry.go +++ b/prometheus/registry.go @@ -19,6 +19,7 @@ import ( "os" "runtime" "sort" + "strings" "sync" "unicode/utf8" @@ -542,7 +543,7 @@ func processMetric( return fmt.Errorf("error collecting metric %v: %s", desc, err) } metricFamily, ok := metricFamiliesByName[desc.fqName] - if ok { + if ok { // Existing name. if metricFamily.GetHelp() != desc.help { return fmt.Errorf( "collected metric %s %s has help %q but should have %q", @@ -589,7 +590,7 @@ func processMetric( default: panic("encountered MetricFamily with invalid type") } - } else { + } else { // New name. metricFamily = &dto.MetricFamily{} metricFamily.Name = proto.String(desc.fqName) metricFamily.Help = proto.String(desc.help) @@ -608,6 +609,9 @@ func processMetric( default: return fmt.Errorf("empty metric collected: %s", dtoMetric) } + if err := checkSuffixCollisions(metricFamily, metricFamiliesByName); err != nil { + return err + } metricFamiliesByName[desc.fqName] = metricFamily } if err := checkMetricConsistency(metricFamily, dtoMetric, metricHashes); err != nil { @@ -688,6 +692,10 @@ func (gs Gatherers) Gather() ([]*dto.MetricFamily, error) { existingMF.Name = mf.Name existingMF.Help = mf.Help existingMF.Type = mf.Type + if err := checkSuffixCollisions(existingMF, metricFamiliesByName); err != nil { + errs = append(errs, err) + continue + } metricFamiliesByName[mf.GetName()] = existingMF } for _, m := range mf.Metric { @@ -767,6 +775,66 @@ func normalizeMetricFamilies(metricFamiliesByName map[string]*dto.MetricFamily) return result } +// checkSuffixCollisions checks for collisions with the “magic” suffixes the +// Prometheus text format and the internal metric representation of the +// Prometheus server add while flattening Summaries and Histograms. +func checkSuffixCollisions(mf *dto.MetricFamily, mfs map[string]*dto.MetricFamily) error { + var ( + newName = mf.GetName() + newType = mf.GetType() + newNameWithoutSuffix = "" + ) + switch { + case strings.HasSuffix(newName, "_count"): + newNameWithoutSuffix = newName[:len(newName)-6] + case strings.HasSuffix(newName, "_sum"): + newNameWithoutSuffix = newName[:len(newName)-4] + case strings.HasSuffix(newName, "_bucket"): + newNameWithoutSuffix = newName[:len(newName)-7] + } + if newNameWithoutSuffix != "" { + if existingMF, ok := mfs[newNameWithoutSuffix]; ok { + switch existingMF.GetType() { + case dto.MetricType_SUMMARY: + if !strings.HasSuffix(newName, "_bucket") { + return fmt.Errorf( + "collected metric named %q collides with previously collected summary named %q", + newName, newNameWithoutSuffix, + ) + } + case dto.MetricType_HISTOGRAM: + return fmt.Errorf( + "collected metric named %q collides with previously collected histogram named %q", + newName, newNameWithoutSuffix, + ) + } + } + } + if newType == dto.MetricType_SUMMARY || newType == dto.MetricType_HISTOGRAM { + if _, ok := mfs[newName+"_count"]; ok { + return fmt.Errorf( + "collected histogram or summary named %q collides with previously collected metric named %q", + newName, newName+"_count", + ) + } + if _, ok := mfs[newName+"_sum"]; ok { + return fmt.Errorf( + "collected histogram or summary named %q collides with previously collected metric named %q", + newName, newName+"_sum", + ) + } + } + if newType == dto.MetricType_HISTOGRAM { + if _, ok := mfs[newName+"_bucket"]; ok { + return fmt.Errorf( + "collected histogram named %q collides with previously collected metric named %q", + newName, newName+"_bucket", + ) + } + } + return nil +} + // checkMetricConsistency checks if the provided Metric is consistent with the // provided MetricFamily. It also hashes the Metric labels and the MetricFamily // name. If the resulting hash is already in the provided metricHashes, an error diff --git a/prometheus/registry_test.go b/prometheus/registry_test.go index 45b6dc9..5fbe6c9 100644 --- a/prometheus/registry_test.go +++ b/prometheus/registry_test.go @@ -250,6 +250,63 @@ metric: < expectedMetricFamilyInvalidLabelValueAsText := []byte(`An error has occurred during metrics gathering: collected metric "name" { label: label: counter: } has a label named "constname" whose value is not utf8: "\xff" +`) + + summary := prometheus.NewSummary(prometheus.SummaryOpts{ + Name: "complex", + Help: "A metric to check collisions with _sum and _count.", + }) + summaryAsText := []byte(`# HELP complex A metric to check collisions with _sum and _count. +# TYPE complex summary +complex{quantile="0.5"} NaN +complex{quantile="0.9"} NaN +complex{quantile="0.99"} NaN +complex_sum 0 +complex_count 0 +`) + histogram := prometheus.NewHistogram(prometheus.HistogramOpts{ + Name: "complex", + Help: "A metric to check collisions with _sun, _count, and _bucket.", + }) + externalMetricFamilyWithBucketSuffix := &dto.MetricFamily{ + Name: proto.String("complex_bucket"), + Help: proto.String("externaldocstring"), + Type: dto.MetricType_COUNTER.Enum(), + Metric: []*dto.Metric{ + { + Counter: &dto.Counter{ + Value: proto.Float64(1), + }, + }, + }, + } + externalMetricFamilyWithBucketSuffixAsText := []byte(`# HELP complex_bucket externaldocstring +# TYPE complex_bucket counter +complex_bucket 1 +`) + externalMetricFamilyWithCountSuffix := &dto.MetricFamily{ + Name: proto.String("complex_count"), + Help: proto.String("externaldocstring"), + Type: dto.MetricType_COUNTER.Enum(), + Metric: []*dto.Metric{ + { + Counter: &dto.Counter{ + Value: proto.Float64(1), + }, + }, + }, + } + bucketCollisionMsg := []byte(`An error has occurred during metrics gathering: + +collected metric named "complex_bucket" collides with previously collected histogram named "complex" +`) + summaryCountCollisionMsg := []byte(`An error has occurred during metrics gathering: + +collected metric named "complex_count" collides with previously collected summary named "complex" +`) + histogramCountCollisionMsg := []byte(`An error has occurred during metrics gathering: + +collected metric named "complex_count" collides with previously collected histogram named "complex" `) type output struct { @@ -513,7 +570,7 @@ collected metric "name" { label: label: label: