Merge pull request #417 from prometheus/beorn7/registry

Relax consistency checks during gathering
This commit is contained in:
Björn Rabenstein 2018-06-07 14:36:07 +02:00 committed by GitHub
commit faf4ec335f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 29 additions and 41 deletions

View File

@ -712,7 +712,7 @@ humidity_percent{location="inside"} 33.2
# HELP temperature_kelvin Temperature in Kelvin. # HELP temperature_kelvin Temperature in Kelvin.
# Duplicate metric: # Duplicate metric:
temperature_kelvin{location="outside"} 265.3 temperature_kelvin{location="outside"} 265.3
# Wrong labels: # Missing location label (note that this is undesirable but valid):
temperature_kelvin 4.5 temperature_kelvin 4.5
` `
@ -740,15 +740,14 @@ temperature_kelvin 4.5
// temperature_kelvin{location="outside"} 273.14 // temperature_kelvin{location="outside"} 273.14
// temperature_kelvin{location="somewhere else"} 4.5 // temperature_kelvin{location="somewhere else"} 4.5
// ---------- // ----------
// 2 error(s) occurred: // collected metric temperature_kelvin label:<name:"location" value:"outside" > gauge:<value:265.3 > was collected before with the same name and label values
// * collected metric temperature_kelvin label:<name:"location" value:"outside" > gauge:<value:265.3 > was collected before with the same name and label values
// * collected metric temperature_kelvin gauge:<value:4.5 > has label dimensions inconsistent with previously collected metrics in the same metric family
// # HELP humidity_percent Humidity in %. // # HELP humidity_percent Humidity in %.
// # TYPE humidity_percent gauge // # TYPE humidity_percent gauge
// humidity_percent{location="inside"} 33.2 // humidity_percent{location="inside"} 33.2
// humidity_percent{location="outside"} 45.4 // humidity_percent{location="outside"} 45.4
// # HELP temperature_kelvin Temperature in Kelvin. // # HELP temperature_kelvin Temperature in Kelvin.
// # TYPE temperature_kelvin gauge // # TYPE temperature_kelvin gauge
// temperature_kelvin 4.5
// temperature_kelvin{location="inside"} 298.44 // temperature_kelvin{location="inside"} 298.44
// temperature_kelvin{location="outside"} 273.14 // temperature_kelvin{location="outside"} 273.14
} }

View File

@ -126,15 +126,23 @@ type Registerer interface {
type Gatherer interface { type Gatherer interface {
// Gather calls the Collect method of the registered Collectors and then // Gather calls the Collect method of the registered Collectors and then
// gathers the collected metrics into a lexicographically sorted slice // gathers the collected metrics into a lexicographically sorted slice
// of MetricFamily protobufs. Even if an error occurs, Gather attempts // of uniquely named MetricFamily protobufs. Gather ensures that the
// to gather as many metrics as possible. Hence, if a non-nil error is // returned slice is valid and self-consistent so that it can be used
// returned, the returned MetricFamily slice could be nil (in case of a // for valid exposition. As an exception to the strict consistency
// fatal error that prevented any meaningful metric collection) or // requirements described for metric.Desc, Gather will tolerate
// contain a number of MetricFamily protobufs, some of which might be // different sets of label names for metrics of the same metric family.
// incomplete, and some might be missing altogether. The returned error //
// (which might be a MultiError) explains the details. In scenarios // Even if an error occurs, Gather attempts to gather as many metrics as
// where complete collection is critical, the returned MetricFamily // possible. Hence, if a non-nil error is returned, the returned
// protobufs should be disregarded if the returned error is non-nil. // MetricFamily slice could be nil (in case of a fatal error that
// prevented any meaningful metric collection) or contain a number of
// MetricFamily protobufs, some of which might be incomplete, and some
// might be missing altogether. The returned error (which might be a
// MultiError) explains the details. Note that this is mostly useful for
// debugging purposes. If the gathered protobufs are to be used for
// exposition in actual monitoring, it is almost always better to not
// expose an incomplete result and instead disregard the returned
// MetricFamily protobufs in case the returned error is non-nil.
Gather() ([]*dto.MetricFamily, error) Gather() ([]*dto.MetricFamily, error)
} }
@ -370,7 +378,6 @@ func (r *Registry) Gather() ([]*dto.MetricFamily, error) {
var ( var (
metricChan = make(chan Metric, capMetricChan) metricChan = make(chan Metric, capMetricChan)
metricHashes = map[uint64]struct{}{} metricHashes = map[uint64]struct{}{}
dimHashes = map[string]uint64{}
wg sync.WaitGroup wg sync.WaitGroup
errs MultiError // The collected errors to return in the end. errs MultiError // The collected errors to return in the end.
registeredDescIDs map[uint64]struct{} // Only used for pedantic checks registeredDescIDs map[uint64]struct{} // Only used for pedantic checks
@ -433,7 +440,7 @@ collectLoop:
} }
errs.Append(processMetric( errs.Append(processMetric(
metric, metricFamiliesByName, metric, metricFamiliesByName,
metricHashes, dimHashes, metricHashes,
registeredDescIDs, registeredDescIDs,
)) ))
default: default:
@ -445,7 +452,7 @@ collectLoop:
for metric := range metricChan { for metric := range metricChan {
errs.Append(processMetric( errs.Append(processMetric(
metric, metricFamiliesByName, metric, metricFamiliesByName,
metricHashes, dimHashes, metricHashes,
registeredDescIDs, registeredDescIDs,
)) ))
} }
@ -465,7 +472,6 @@ func processMetric(
metric Metric, metric Metric,
metricFamiliesByName map[string]*dto.MetricFamily, metricFamiliesByName map[string]*dto.MetricFamily,
metricHashes map[uint64]struct{}, metricHashes map[uint64]struct{},
dimHashes map[string]uint64,
registeredDescIDs map[uint64]struct{}, registeredDescIDs map[uint64]struct{},
) error { ) error {
desc := metric.Desc() desc := metric.Desc()
@ -542,7 +548,7 @@ func processMetric(
} }
metricFamiliesByName[desc.fqName] = metricFamily metricFamiliesByName[desc.fqName] = metricFamily
} }
if err := checkMetricConsistency(metricFamily, dtoMetric, metricHashes, dimHashes); err != nil { if err := checkMetricConsistency(metricFamily, dtoMetric, metricHashes); err != nil {
return err return err
} }
if registeredDescIDs != nil { if registeredDescIDs != nil {
@ -584,7 +590,6 @@ func (gs Gatherers) Gather() ([]*dto.MetricFamily, error) {
var ( var (
metricFamiliesByName = map[string]*dto.MetricFamily{} metricFamiliesByName = map[string]*dto.MetricFamily{}
metricHashes = map[uint64]struct{}{} metricHashes = map[uint64]struct{}{}
dimHashes = map[string]uint64{}
errs MultiError // The collected errors to return in the end. errs MultiError // The collected errors to return in the end.
) )
@ -624,7 +629,7 @@ func (gs Gatherers) Gather() ([]*dto.MetricFamily, error) {
metricFamiliesByName[mf.GetName()] = existingMF metricFamiliesByName[mf.GetName()] = existingMF
} }
for _, m := range mf.Metric { for _, m := range mf.Metric {
if err := checkMetricConsistency(existingMF, m, metricHashes, dimHashes); err != nil { if err := checkMetricConsistency(existingMF, m, metricHashes); err != nil {
errs = append(errs, err) errs = append(errs, err)
continue continue
} }
@ -701,18 +706,13 @@ func normalizeMetricFamilies(metricFamiliesByName map[string]*dto.MetricFamily)
} }
// checkMetricConsistency checks if the provided Metric is consistent with the // checkMetricConsistency checks if the provided Metric is consistent with the
// provided MetricFamily. It also hashed the Metric labels and the MetricFamily // provided MetricFamily. It also hashes the Metric labels and the MetricFamily
// name. If the resulting hash is already in the provided metricHashes, an error // name. If the resulting hash is already in the provided metricHashes, an error
// is returned. If not, it is added to metricHashes. The provided dimHashes maps // is returned. If not, it is added to metricHashes.
// MetricFamily names to their dimHash (hashed sorted label names). If dimHashes
// doesn't yet contain a hash for the provided MetricFamily, it is
// added. Otherwise, an error is returned if the existing dimHashes in not equal
// the calculated dimHash.
func checkMetricConsistency( func checkMetricConsistency(
metricFamily *dto.MetricFamily, metricFamily *dto.MetricFamily,
dtoMetric *dto.Metric, dtoMetric *dto.Metric,
metricHashes map[uint64]struct{}, metricHashes map[uint64]struct{},
dimHashes map[string]uint64,
) error { ) error {
// Type consistency with metric family. // Type consistency with metric family.
if metricFamily.GetType() == dto.MetricType_GAUGE && dtoMetric.Gauge == nil || if metricFamily.GetType() == dto.MetricType_GAUGE && dtoMetric.Gauge == nil ||
@ -732,19 +732,18 @@ func checkMetricConsistency(
} }
} }
// Is the metric unique (i.e. no other metric with the same name and the same label values)? // Is the metric unique (i.e. no other metric with the same name and the same labels)?
h := hashNew() h := hashNew()
h = hashAdd(h, metricFamily.GetName()) h = hashAdd(h, metricFamily.GetName())
h = hashAddByte(h, separatorByte) h = hashAddByte(h, separatorByte)
dh := hashNew()
// Make sure label pairs are sorted. We depend on it for the consistency // Make sure label pairs are sorted. We depend on it for the consistency
// check. // check.
sort.Sort(LabelPairSorter(dtoMetric.Label)) sort.Sort(LabelPairSorter(dtoMetric.Label))
for _, lp := range dtoMetric.Label { for _, lp := range dtoMetric.Label {
h = hashAdd(h, lp.GetName())
h = hashAddByte(h, separatorByte)
h = hashAdd(h, lp.GetValue()) h = hashAdd(h, lp.GetValue())
h = hashAddByte(h, separatorByte) h = hashAddByte(h, separatorByte)
dh = hashAdd(dh, lp.GetName())
dh = hashAddByte(dh, separatorByte)
} }
if _, exists := metricHashes[h]; exists { if _, exists := metricHashes[h]; exists {
return fmt.Errorf( return fmt.Errorf(
@ -752,16 +751,6 @@ func checkMetricConsistency(
metricFamily.GetName(), dtoMetric, metricFamily.GetName(), dtoMetric,
) )
} }
if dimHash, ok := dimHashes[metricFamily.GetName()]; ok {
if dimHash != dh {
return fmt.Errorf(
"collected metric %s %s has label dimensions inconsistent with previously collected metrics in the same metric family",
metricFamily.GetName(), dtoMetric,
)
}
} else {
dimHashes[metricFamily.GetName()] = dh
}
metricHashes[h] = struct{}{} metricHashes[h] = struct{}{}
return nil return nil
} }