Add suffix collision checks during gathering

So far, if a gauge was named `xxx_count`, and a summary or histogram
`xxx`, this would have led to a legal protobuf exposition but would
have created a name collision on `xxx_count` in the text format and
within the Prometheus server.

Signed-off-by: beorn7 <beorn@soundcloud.com>
This commit is contained in:
beorn7 2018-07-13 16:29:17 +02:00
parent 767a0218df
commit 4572e24546
2 changed files with 194 additions and 3 deletions

View File

@ -19,6 +19,7 @@ import (
"os"
"runtime"
"sort"
"strings"
"sync"
"unicode/utf8"
@ -542,7 +543,7 @@ func processMetric(
return fmt.Errorf("error collecting metric %v: %s", desc, err)
}
metricFamily, ok := metricFamiliesByName[desc.fqName]
if ok {
if ok { // Existing name.
if metricFamily.GetHelp() != desc.help {
return fmt.Errorf(
"collected metric %s %s has help %q but should have %q",
@ -589,7 +590,7 @@ func processMetric(
default:
panic("encountered MetricFamily with invalid type")
}
} else {
} else { // New name.
metricFamily = &dto.MetricFamily{}
metricFamily.Name = proto.String(desc.fqName)
metricFamily.Help = proto.String(desc.help)
@ -608,6 +609,9 @@ func processMetric(
default:
return fmt.Errorf("empty metric collected: %s", dtoMetric)
}
if err := checkSuffixCollisions(metricFamily, metricFamiliesByName); err != nil {
return err
}
metricFamiliesByName[desc.fqName] = metricFamily
}
if err := checkMetricConsistency(metricFamily, dtoMetric, metricHashes); err != nil {
@ -688,6 +692,10 @@ func (gs Gatherers) Gather() ([]*dto.MetricFamily, error) {
existingMF.Name = mf.Name
existingMF.Help = mf.Help
existingMF.Type = mf.Type
if err := checkSuffixCollisions(existingMF, metricFamiliesByName); err != nil {
errs = append(errs, err)
continue
}
metricFamiliesByName[mf.GetName()] = existingMF
}
for _, m := range mf.Metric {
@ -767,6 +775,66 @@ func normalizeMetricFamilies(metricFamiliesByName map[string]*dto.MetricFamily)
return result
}
// checkSuffixCollisions checks for collisions with the “magic” suffixes the
// Prometheus text format and the internal metric representation of the
// Prometheus server add while flattening Summaries and Histograms.
func checkSuffixCollisions(mf *dto.MetricFamily, mfs map[string]*dto.MetricFamily) error {
var (
newName = mf.GetName()
newType = mf.GetType()
newNameWithoutSuffix = ""
)
switch {
case strings.HasSuffix(newName, "_count"):
newNameWithoutSuffix = newName[:len(newName)-6]
case strings.HasSuffix(newName, "_sum"):
newNameWithoutSuffix = newName[:len(newName)-4]
case strings.HasSuffix(newName, "_bucket"):
newNameWithoutSuffix = newName[:len(newName)-7]
}
if newNameWithoutSuffix != "" {
if existingMF, ok := mfs[newNameWithoutSuffix]; ok {
switch existingMF.GetType() {
case dto.MetricType_SUMMARY:
if !strings.HasSuffix(newName, "_bucket") {
return fmt.Errorf(
"collected metric named %q collides with previously collected summary named %q",
newName, newNameWithoutSuffix,
)
}
case dto.MetricType_HISTOGRAM:
return fmt.Errorf(
"collected metric named %q collides with previously collected histogram named %q",
newName, newNameWithoutSuffix,
)
}
}
}
if newType == dto.MetricType_SUMMARY || newType == dto.MetricType_HISTOGRAM {
if _, ok := mfs[newName+"_count"]; ok {
return fmt.Errorf(
"collected histogram or summary named %q collides with previously collected metric named %q",
newName, newName+"_count",
)
}
if _, ok := mfs[newName+"_sum"]; ok {
return fmt.Errorf(
"collected histogram or summary named %q collides with previously collected metric named %q",
newName, newName+"_sum",
)
}
}
if newType == dto.MetricType_HISTOGRAM {
if _, ok := mfs[newName+"_bucket"]; ok {
return fmt.Errorf(
"collected histogram named %q collides with previously collected metric named %q",
newName, newName+"_bucket",
)
}
}
return nil
}
// checkMetricConsistency checks if the provided Metric is consistent with the
// provided MetricFamily. It also hashes the Metric labels and the MetricFamily
// name. If the resulting hash is already in the provided metricHashes, an error

View File

@ -250,6 +250,63 @@ metric: <
expectedMetricFamilyInvalidLabelValueAsText := []byte(`An error has occurred during metrics gathering:
collected metric "name" { label:<name:"constname" value:"\377" > label:<name:"labelname" value:"different_val" > counter:<value:42 > } has a label named "constname" whose value is not utf8: "\xff"
`)
summary := prometheus.NewSummary(prometheus.SummaryOpts{
Name: "complex",
Help: "A metric to check collisions with _sum and _count.",
})
summaryAsText := []byte(`# HELP complex A metric to check collisions with _sum and _count.
# TYPE complex summary
complex{quantile="0.5"} NaN
complex{quantile="0.9"} NaN
complex{quantile="0.99"} NaN
complex_sum 0
complex_count 0
`)
histogram := prometheus.NewHistogram(prometheus.HistogramOpts{
Name: "complex",
Help: "A metric to check collisions with _sun, _count, and _bucket.",
})
externalMetricFamilyWithBucketSuffix := &dto.MetricFamily{
Name: proto.String("complex_bucket"),
Help: proto.String("externaldocstring"),
Type: dto.MetricType_COUNTER.Enum(),
Metric: []*dto.Metric{
{
Counter: &dto.Counter{
Value: proto.Float64(1),
},
},
},
}
externalMetricFamilyWithBucketSuffixAsText := []byte(`# HELP complex_bucket externaldocstring
# TYPE complex_bucket counter
complex_bucket 1
`)
externalMetricFamilyWithCountSuffix := &dto.MetricFamily{
Name: proto.String("complex_count"),
Help: proto.String("externaldocstring"),
Type: dto.MetricType_COUNTER.Enum(),
Metric: []*dto.Metric{
{
Counter: &dto.Counter{
Value: proto.Float64(1),
},
},
},
}
bucketCollisionMsg := []byte(`An error has occurred during metrics gathering:
collected metric named "complex_bucket" collides with previously collected histogram named "complex"
`)
summaryCountCollisionMsg := []byte(`An error has occurred during metrics gathering:
collected metric named "complex_count" collides with previously collected summary named "complex"
`)
histogramCountCollisionMsg := []byte(`An error has occurred during metrics gathering:
collected metric named "complex_count" collides with previously collected histogram named "complex"
`)
type output struct {
@ -513,7 +570,7 @@ collected metric "name" { label:<name:"constname" value:"\377" > label:<name:"la
},
{ // 17
headers: map[string]string{
"Accept": "application/json",
"Accept": "text/plain",
},
out: output{
headers: map[string]string{
@ -523,6 +580,72 @@ collected metric "name" { label:<name:"constname" value:"\377" > label:<name:"la
},
collector: uncheckedCollector{metricVec},
},
{ // 18
headers: map[string]string{
"Accept": "text/plain",
},
out: output{
headers: map[string]string{
"Content-Type": `text/plain; charset=utf-8`,
},
body: histogramCountCollisionMsg,
},
collector: histogram,
externalMF: []*dto.MetricFamily{
externalMetricFamilyWithCountSuffix,
},
},
{ // 19
headers: map[string]string{
"Accept": "text/plain",
},
out: output{
headers: map[string]string{
"Content-Type": `text/plain; charset=utf-8`,
},
body: bucketCollisionMsg,
},
collector: histogram,
externalMF: []*dto.MetricFamily{
externalMetricFamilyWithBucketSuffix,
},
},
{ // 20
headers: map[string]string{
"Accept": "text/plain",
},
out: output{
headers: map[string]string{
"Content-Type": `text/plain; charset=utf-8`,
},
body: summaryCountCollisionMsg,
},
collector: summary,
externalMF: []*dto.MetricFamily{
externalMetricFamilyWithCountSuffix,
},
},
{ // 21
headers: map[string]string{
"Accept": "text/plain",
},
out: output{
headers: map[string]string{
"Content-Type": `text/plain; version=0.0.4; charset=utf-8`,
},
body: bytes.Join(
[][]byte{
summaryAsText,
externalMetricFamilyWithBucketSuffixAsText,
},
[]byte{},
),
},
collector: summary,
externalMF: []*dto.MetricFamily{
externalMetricFamilyWithBucketSuffix,
},
},
}
for i, scenario := range scenarios {
registry := prometheus.NewPedanticRegistry()