From 42825b62f4799fd58c2a3c818533bdac38c1775a Mon Sep 17 00:00:00 2001 From: Owen Williams Date: Tue, 8 Oct 2024 13:53:04 -0400 Subject: [PATCH 1/2] Return an http error during scraping if metrics collide when escaped to underscores Signed-off-by: Owen Williams --- prometheus/desc.go | 12 ++- prometheus/promhttp/http.go | 16 +++ prometheus/promhttp/http_test.go | 48 +++++++++ prometheus/registry.go | 98 +++++++++++++++--- prometheus/registry_test.go | 170 +++++++++++++++++++++++++++++++ prometheus/wrap.go | 4 + 6 files changed, 333 insertions(+), 15 deletions(-) diff --git a/prometheus/desc.go b/prometheus/desc.go index 68ffe3c..5f70982 100644 --- a/prometheus/desc.go +++ b/prometheus/desc.go @@ -57,6 +57,9 @@ type Desc struct { // must be unique among all registered descriptors and can therefore be // used as an identifier of the descriptor. id uint64 + // escapedID is similar to id, but with the metric and label names escaped + // with underscores. + escapedID uint64 // dimHash is a hash of the label names (preset and variable) and the // Help string. Each Desc with the same fqName must have the same // dimHash. @@ -142,11 +145,18 @@ func (v2) NewDesc(fqName, help string, variableLabels ConstrainableLabels, const } xxh := xxhash.New() - for _, val := range labelValues { + escapedXXH := xxhash.New() + for i, val := range labelValues { xxh.WriteString(val) xxh.Write(separatorByteSlice) + if i == 0 { + val = model.EscapeName(val, model.UnderscoreEscaping) + } + escapedXXH.WriteString(val) + escapedXXH.Write(separatorByteSlice) } d.id = xxh.Sum64() + d.escapedID = escapedXXH.Sum64() // Sort labelNames so that order doesn't matter for the hash. sort.Strings(labelNames) // Now hash together (in this order) the help string and the sorted diff --git a/prometheus/promhttp/http.go b/prometheus/promhttp/http.go index 02a36b6..52de01f 100644 --- a/prometheus/promhttp/http.go +++ b/prometheus/promhttp/http.go @@ -43,6 +43,7 @@ import ( "github.com/klauspost/compress/zstd" "github.com/prometheus/common/expfmt" + "github.com/prometheus/common/model" "github.com/prometheus/client_golang/internal/github.com/golang/gddo/httputil" "github.com/prometheus/client_golang/prometheus" @@ -121,6 +122,7 @@ func HandlerForTransactional(reg prometheus.TransactionalGatherer, opts HandlerO if opts.MaxRequestsInFlight > 0 { inFlightSem = make(chan struct{}, opts.MaxRequestsInFlight) } + var hasEscapedCollisions bool if opts.Registry != nil { // Initialize all possibilities that can occur below. errCnt.WithLabelValues("gathering") @@ -133,6 +135,7 @@ func HandlerForTransactional(reg prometheus.TransactionalGatherer, opts HandlerO panic(err) } } + hasEscapedCollisions = opts.Registry.HasEscapedCollision() } // Select compression formats to offer based on default or user choice. @@ -190,6 +193,19 @@ func HandlerForTransactional(reg prometheus.TransactionalGatherer, opts HandlerO } else { contentType = expfmt.Negotiate(req.Header) } + + if hasEscapedCollisions { + switch contentType.ToEscapingScheme() { + case model.UnderscoreEscaping, model.DotsEscaping: + if opts.ErrorLog != nil { + opts.ErrorLog.Println("error: one or more metrics collide when escaped") + } + httpError(rsp, fmt.Errorf("one or more metrics collide when escaped")) + return + default: + } + } + rsp.Header().Set(contentTypeHeader, string(contentType)) w, encodingHeader, closeWriter, err := negotiateEncodingWriter(req, rsp, compressions) diff --git a/prometheus/promhttp/http_test.go b/prometheus/promhttp/http_test.go index 3ad2d1d..c5a2cc5 100644 --- a/prometheus/promhttp/http_test.go +++ b/prometheus/promhttp/http_test.go @@ -28,6 +28,8 @@ import ( "github.com/klauspost/compress/zstd" dto "github.com/prometheus/client_model/go" + "github.com/prometheus/common/expfmt" + "github.com/prometheus/common/model" "github.com/prometheus/client_golang/prometheus" ) @@ -548,6 +550,52 @@ func TestNegotiateEncodingWriter(t *testing.T) { } } +func TestEscapedCollisions(t *testing.T) { + oldScheme := model.NameValidationScheme + defer func() { + model.NameValidationScheme = oldScheme + }() + model.NameValidationScheme = model.UTF8Validation + + reg := prometheus.NewRegistry() + reg.MustRegister(prometheus.NewCounter(prometheus.CounterOpts{ + Name: "test_metric", + Help: "A test metric with underscores", + })) + reg.MustRegister(prometheus.NewCounter(prometheus.CounterOpts{ + Name: "test.metric", + Help: "A test metric with dots", + })) + + handler := HandlerFor(reg, HandlerOpts{ + Registry: reg, + }) + + t.Run("fail case", func(t *testing.T) { + writer := httptest.NewRecorder() + request, _ := http.NewRequest("GET", "/metrics", nil) + request.Header.Add(acceptHeader, string(expfmt.NewFormat(expfmt.TypeTextPlain))) + handler.ServeHTTP(writer, request) + if writer.Code != 500 { + t.Errorf("wanted error 500, got %d", writer.Code) + } + expectErr := "An error has occurred while serving metrics:\n\none or more metrics collide when escaped\n" + if writer.Body.String() != expectErr { + t.Error("incorrect body returned, want " + expectErr + " got " + writer.Body.String()) + } + }) + + t.Run("success case", func(t *testing.T) { + writer := httptest.NewRecorder() + request, _ := http.NewRequest("GET", "/metrics", nil) + request.Header.Add(acceptHeader, string(expfmt.NewFormat(expfmt.TypeTextPlain).WithEscapingScheme(model.NoEscaping))) + handler.ServeHTTP(writer, request) + if writer.Code != 200 { + t.Errorf("wanted 200 OK, got %d", writer.Code) + } + }) +} + func BenchmarkCompression(b *testing.B) { benchmarks := []struct { name string diff --git a/prometheus/registry.go b/prometheus/registry.go index c6fd2f5..9504360 100644 --- a/prometheus/registry.go +++ b/prometheus/registry.go @@ -66,12 +66,18 @@ func init() { // pre-registered. func NewRegistry() *Registry { return &Registry{ - collectorsByID: map[uint64]Collector{}, - descIDs: map[uint64]struct{}{}, - dimHashesByName: map[string]uint64{}, + collectorsByID: map[uint64]Collector{}, + collectorsByEscapedID: map[uint64]Collector{}, + descIDs: map[uint64]struct{}{}, + escapedDescIDs: map[uint64]struct{}{}, + dimHashesByName: map[string]uint64{}, } } +func (r *Registry) HasEscapedCollision() bool { + return r.hasEscapedCollision +} + // NewPedanticRegistry returns a registry that checks during collection if each // collected Metric is consistent with its reported Desc, and if the Desc has // actually been registered with the registry. Unchecked Collectors (those whose @@ -131,6 +137,11 @@ type Registerer interface { // instance must only collect consistent metrics throughout its // lifetime. Unregister(Collector) bool + + // HasEscapedCollision returns true if any two of the registered metrics would + // be the same when escaped to underscores. This is needed to prevent + // duplicate metric issues when being scraped by a legacy system. + HasEscapedCollision() bool } // Gatherer is the interface for the part of a registry in charge of gathering @@ -258,22 +269,36 @@ func (errs MultiError) MaybeUnwrap() error { // Registry implements Collector to allow it to be used for creating groups of // metrics. See the Grouping example for how this can be done. type Registry struct { - mtx sync.RWMutex - collectorsByID map[uint64]Collector // ID is a hash of the descIDs. + mtx sync.RWMutex + collectorsByID map[uint64]Collector // ID is a hash of the descIDs. + // collectorsByEscapedID stores colletors by escapedID, only if escaped id is + // different (otherwise we can just do the lookup in the regular map). + collectorsByEscapedID map[uint64]Collector descIDs map[uint64]struct{} + // escapedDescIDs records desc ids of the escaped version of the metric, only + // if different from the regular name. + escapedDescIDs map[uint64]struct{} dimHashesByName map[string]uint64 uncheckedCollectors []Collector pedanticChecksEnabled bool + + // hasEscapedCollision is set to true if any two metrics that were not + // identical under UTF-8 would collide if scraped by a system that requires + // names to be escaped to legacy underscore replacement. + hasEscapedCollision bool } // Register implements Registerer. func (r *Registry) Register(c Collector) error { var ( - descChan = make(chan *Desc, capDescChan) - newDescIDs = map[uint64]struct{}{} - newDimHashesByName = map[string]uint64{} - collectorID uint64 // All desc IDs XOR'd together. - duplicateDescErr error + descChan = make(chan *Desc, capDescChan) + newDescIDs = map[uint64]struct{}{} + newEscapedIDs = map[uint64]struct{}{} + newDimHashesByName = map[string]uint64{} + collectorID uint64 // All desc IDs XOR'd together. + escapedID uint64 + duplicateDescErr error + duplicateEscapedDesc bool ) go func() { c.Describe(descChan) @@ -307,6 +332,22 @@ func (r *Registry) Register(c Collector) error { collectorID ^= desc.id } + // Also check to see if the descID is unique when all the names are escaped + // to underscores. First check the primary map, then check the secondary + // map. We only officially log a collision later. + if _, exists := r.descIDs[desc.escapedID]; exists { + duplicateEscapedDesc = true + } + if _, exists := r.escapedDescIDs[desc.escapedID]; exists { + duplicateEscapedDesc = true + } + if _, exists := newEscapedIDs[desc.escapedID]; !exists { + if desc.escapedID != desc.id { + newEscapedIDs[desc.escapedID] = struct{}{} + } + escapedID ^= desc.escapedID + } + // Are all the label names and the help string consistent with // previous descriptors of the same name? // First check existing descriptors... @@ -331,7 +372,17 @@ func (r *Registry) Register(c Collector) error { r.uncheckedCollectors = append(r.uncheckedCollectors, c) return nil } - if existing, exists := r.collectorsByID[collectorID]; exists { + + existing, collision := r.collectorsByID[collectorID] + // Also check whether the underscore-escaped versions of the IDs match. + if !collision { + _, escapedCollision := r.collectorsByID[escapedID] + r.hasEscapedCollision = r.hasEscapedCollision || escapedCollision + _, escapedCollision = r.collectorsByEscapedID[escapedID] + r.hasEscapedCollision = r.hasEscapedCollision || escapedCollision + } + + if collision { switch e := existing.(type) { case *wrappingCollector: return AlreadyRegisteredError{ @@ -351,23 +402,36 @@ func (r *Registry) Register(c Collector) error { return duplicateDescErr } + if duplicateEscapedDesc { + r.hasEscapedCollision = true + } + // Only after all tests have passed, actually register. r.collectorsByID[collectorID] = c + // We only need to store the escapedID if it doesn't match the unescaped one. + if escapedID != collectorID { + r.collectorsByEscapedID[escapedID] = c + } for hash := range newDescIDs { r.descIDs[hash] = struct{}{} } for name, dimHash := range newDimHashesByName { r.dimHashesByName[name] = dimHash } + for hash := range newEscapedIDs { + r.escapedDescIDs[hash] = struct{}{} + } return nil } // Unregister implements Registerer. func (r *Registry) Unregister(c Collector) bool { var ( - descChan = make(chan *Desc, capDescChan) - descIDs = map[uint64]struct{}{} - collectorID uint64 // All desc IDs XOR'd together. + descChan = make(chan *Desc, capDescChan) + descIDs = map[uint64]struct{}{} + escapedDescIDs = map[uint64]struct{}{} + collectorID uint64 // All desc IDs XOR'd together. + collectorEscapedID uint64 ) go func() { c.Describe(descChan) @@ -377,6 +441,8 @@ func (r *Registry) Unregister(c Collector) bool { if _, exists := descIDs[desc.id]; !exists { collectorID ^= desc.id descIDs[desc.id] = struct{}{} + collectorEscapedID ^= desc.escapedID + escapedDescIDs[desc.escapedID] = struct{}{} } } @@ -391,9 +457,13 @@ func (r *Registry) Unregister(c Collector) bool { defer r.mtx.Unlock() delete(r.collectorsByID, collectorID) + delete(r.collectorsByEscapedID, collectorEscapedID) for id := range descIDs { delete(r.descIDs, id) } + for id := range escapedDescIDs { + delete(r.escapedDescIDs, id) + } // dimHashesByName is left untouched as those must be consistent // throughout the lifetime of a program. return true diff --git a/prometheus/registry_test.go b/prometheus/registry_test.go index 35a74d2..38dc731 100644 --- a/prometheus/registry_test.go +++ b/prometheus/registry_test.go @@ -36,6 +36,7 @@ import ( dto "github.com/prometheus/client_model/go" "github.com/prometheus/common/expfmt" + "github.com/prometheus/common/model" "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/timestamppb" ) @@ -1181,6 +1182,175 @@ func TestAlreadyRegisteredCollision(t *testing.T) { } } +func TestAlreadyRegisteredEscapingCollision(t *testing.T) { + oldValidation := model.NameValidationScheme + model.NameValidationScheme = model.UTF8Validation + defer func() { + model.NameValidationScheme = oldValidation + }() + + tests := []struct { + name string + // These are functions because hashes that determine collision are created + // at metric creation time. + counterA func() prometheus.Counter + counterB func() prometheus.Counter + expectErr bool + expectLegacyCollision bool + }{ + { + name: "no metric name collision", + counterA: func() prometheus.Counter { + return prometheus.NewCounter(prometheus.CounterOpts{ + Name: "my_counter_a", + ConstLabels: prometheus.Labels{ + "name": "label", + "type": "test", + }, + }) + }, + counterB: func() prometheus.Counter { + return prometheus.NewCounter(prometheus.CounterOpts{ + Name: "myAcounterAa", + ConstLabels: prometheus.Labels{ + "name": "label", + "type": "test", + }, + }) + }, + }, + { + name: "compatibility metric name collision", + counterA: func() prometheus.Counter { + return prometheus.NewCounter(prometheus.CounterOpts{ + Name: "my_counter_a", + ConstLabels: prometheus.Labels{ + "name": "label", + "type": "test", + }, + }) + }, + counterB: func() prometheus.Counter { + return prometheus.NewCounter(prometheus.CounterOpts{ + Name: "my.counter.a", + ConstLabels: prometheus.Labels{ + "name": "label", + "type": "test", + }, + }) + }, + expectLegacyCollision: true, + }, + { + // This is a regression test to make sure we are not accidentally + // reporting collisions when label values are different. + name: "no label value collision", + counterA: func() prometheus.Counter { + return prometheus.NewCounter(prometheus.CounterOpts{ + Name: "my_counter_a", + ConstLabels: prometheus.Labels{ + "name": "label.value", + "type": "test", + }, + }) + }, + counterB: func() prometheus.Counter { + return prometheus.NewCounter(prometheus.CounterOpts{ + Name: "my_counter_a", + ConstLabels: prometheus.Labels{ + "name": "label_value", + "type": "test", + }, + }) + }, + }, + { + name: "compatibility label name collision", + counterA: func() prometheus.Counter { + return prometheus.NewCounter(prometheus.CounterOpts{ + Name: "my_counter_a", + ConstLabels: prometheus.Labels{ + "label.name": "name", + "type": "test", + }, + }) + }, + counterB: func() prometheus.Counter { + return prometheus.NewCounter(prometheus.CounterOpts{ + Name: "my_counter_a", + ConstLabels: prometheus.Labels{ + "label_name": "name", + "type": "test", + }, + }) + }, + expectErr: true, + expectLegacyCollision: false, + }, + { + name: "no utf8 metric name collision", + counterA: func() prometheus.Counter { + return prometheus.NewCounter(prometheus.CounterOpts{ + Name: "my_counter_a", + ConstLabels: prometheus.Labels{ + "name": "label", + "type": "test", + }, + }) + }, + counterB: func() prometheus.Counter { + return prometheus.NewCounter(prometheus.CounterOpts{ + Name: "my.counter.a", + ConstLabels: prometheus.Labels{ + "name": "label", + "type": "test", + }, + }) + }, + expectLegacyCollision: true, + }, + { + name: "post init flag flip, should collide", + counterA: func() prometheus.Counter { + return prometheus.NewCounter(prometheus.CounterOpts{ + Name: "my.counter.a", + ConstLabels: prometheus.Labels{ + "name": "label", + "type": "test", + }, + }) + }, + counterB: func() prometheus.Counter { + return prometheus.NewCounter(prometheus.CounterOpts{ + Name: "my.counter.a", + ConstLabels: prometheus.Labels{ + "name": "label", + "type": "test", + }, + }) + }, + expectErr: true, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + reg := prometheus.NewRegistry() + err := reg.Register(tc.counterA()) + if err != nil { + t.Errorf("required no error, got %v", err) + } + err = reg.Register(tc.counterB()) + if tc.expectErr != (err != nil) { + t.Errorf("required error state %v, got %v", tc.expectErr, err) + } + if tc.expectLegacyCollision != reg.HasEscapedCollision() { + t.Errorf("legacy collision mismatch, want %v got %v", tc.expectLegacyCollision, reg.HasEscapedCollision()) + } + }) + } +} + type tGatherer struct { done bool err error diff --git a/prometheus/wrap.go b/prometheus/wrap.go index 25da157..a80caa8 100644 --- a/prometheus/wrap.go +++ b/prometheus/wrap.go @@ -117,6 +117,10 @@ func (r *wrappingRegisterer) Unregister(c Collector) bool { }) } +func (r *wrappingRegisterer) HasEscapedCollision() bool { + return r.wrappedRegisterer.HasEscapedCollision() +} + type wrappingCollector struct { wrappedCollector Collector prefix string From 6514f6eb9140765180671468582cc9f481c44281 Mon Sep 17 00:00:00 2001 From: Owen Williams Date: Thu, 17 Oct 2024 13:55:20 -0400 Subject: [PATCH 2/2] move func to gatherer Signed-off-by: Owen Williams --- prometheus/promhttp/http.go | 2 +- prometheus/promhttp/http_test.go | 8 ++++--- prometheus/registry.go | 41 ++++++++++++++++++++++++++++---- prometheus/registry_test.go | 4 ++++ prometheus/wrap.go | 4 ---- 5 files changed, 46 insertions(+), 13 deletions(-) diff --git a/prometheus/promhttp/http.go b/prometheus/promhttp/http.go index 52de01f..d5070f3 100644 --- a/prometheus/promhttp/http.go +++ b/prometheus/promhttp/http.go @@ -135,8 +135,8 @@ func HandlerForTransactional(reg prometheus.TransactionalGatherer, opts HandlerO panic(err) } } - hasEscapedCollisions = opts.Registry.HasEscapedCollision() } + hasEscapedCollisions = reg.HasEscapedCollision() // Select compression formats to offer based on default or user choice. var compressions []string diff --git a/prometheus/promhttp/http_test.go b/prometheus/promhttp/http_test.go index c5a2cc5..358bea6 100644 --- a/prometheus/promhttp/http_test.go +++ b/prometheus/promhttp/http_test.go @@ -81,6 +81,10 @@ func (g *mockTransactionGatherer) Gather() (_ []*dto.MetricFamily, done func(), return mfs, func() { g.doneInvoked++ }, err } +func (g *mockTransactionGatherer) HasEscapedCollision() bool { + return g.g.HasEscapedCollision() +} + func readCompressedBody(r io.Reader, comp Compression) (string, error) { switch comp { case Gzip: @@ -567,9 +571,7 @@ func TestEscapedCollisions(t *testing.T) { Help: "A test metric with dots", })) - handler := HandlerFor(reg, HandlerOpts{ - Registry: reg, - }) + handler := HandlerFor(reg, HandlerOpts{}) t.Run("fail case", func(t *testing.T) { writer := httptest.NewRecorder() diff --git a/prometheus/registry.go b/prometheus/registry.go index 9504360..a4a4d8c 100644 --- a/prometheus/registry.go +++ b/prometheus/registry.go @@ -137,11 +137,6 @@ type Registerer interface { // instance must only collect consistent metrics throughout its // lifetime. Unregister(Collector) bool - - // HasEscapedCollision returns true if any two of the registered metrics would - // be the same when escaped to underscores. This is needed to prevent - // duplicate metric issues when being scraped by a legacy system. - HasEscapedCollision() bool } // Gatherer is the interface for the part of a registry in charge of gathering @@ -169,6 +164,11 @@ type Gatherer interface { // expose an incomplete result and instead disregard the returned // MetricFamily protobufs in case the returned error is non-nil. Gather() ([]*dto.MetricFamily, error) + + // HasEscapedCollision returns true if any two of the registered metrics would + // be the same when escaped to underscores. This is needed to prevent + // duplicate metric issues when being scraped by a legacy system. + HasEscapedCollision() bool } // Register registers the provided Collector with the DefaultRegisterer. @@ -205,6 +205,10 @@ func (gf GathererFunc) Gather() ([]*dto.MetricFamily, error) { return gf() } +func (gf GathererFunc) HasEscapedCollision() bool { + return false +} + // AlreadyRegisteredError is returned by the Register method if the Collector to // be registered has already been registered before, or a different Collector // that collects the same metrics has been registered before. Registration fails @@ -872,6 +876,15 @@ func (gs Gatherers) Gather() ([]*dto.MetricFamily, error) { return internal.NormalizeMetricFamilies(metricFamiliesByName), errs.MaybeUnwrap() } +func (gs Gatherers) HasEscapedCollision() bool { + for _, g := range gs { + if g.HasEscapedCollision() { + return true + } + } + return false +} + // checkSuffixCollisions checks for collisions with the “magic” suffixes the // Prometheus text format and the internal metric representation of the // Prometheus server add while flattening Summaries and Histograms. @@ -1103,6 +1116,15 @@ func (r *MultiTRegistry) Gather() (mfs []*dto.MetricFamily, done func(), err err }, errs.MaybeUnwrap() } +func (r *MultiTRegistry) HasEscapedCollision() bool { + for _, g := range r.tGatherers { + if g.HasEscapedCollision() { + return true + } + } + return false +} + // TransactionalGatherer represents transactional gatherer that can be triggered to notify gatherer that memory // used by metric family is no longer used by a caller. This allows implementations with cache. type TransactionalGatherer interface { @@ -1128,6 +1150,11 @@ type TransactionalGatherer interface { // Important: done is expected to be triggered (even if the error occurs!) // once caller does not need returned slice of dto.MetricFamily. Gather() (_ []*dto.MetricFamily, done func(), err error) + + // HasEscapedCollision returns true if any two of the registered metrics would + // be the same when escaped to underscores. This is needed to prevent + // duplicate metric issues when being scraped by a legacy system. + HasEscapedCollision() bool } // ToTransactionalGatherer transforms Gatherer to transactional one with noop as done function. @@ -1144,3 +1171,7 @@ func (g *noTransactionGatherer) Gather() (_ []*dto.MetricFamily, done func(), er mfs, err := g.g.Gather() return mfs, func() {}, err } + +func (g *noTransactionGatherer) HasEscapedCollision() bool { + return g.g.HasEscapedCollision() +} diff --git a/prometheus/registry_test.go b/prometheus/registry_test.go index 38dc731..e0f5bd0 100644 --- a/prometheus/registry_test.go +++ b/prometheus/registry_test.go @@ -1364,6 +1364,10 @@ func (g *tGatherer) Gather() (_ []*dto.MetricFamily, done func(), err error) { }, func() { g.done = true }, g.err } +func (g *tGatherer) HasEscapedCollision() bool { + return false +} + func TestNewMultiTRegistry(t *testing.T) { treg := &tGatherer{} diff --git a/prometheus/wrap.go b/prometheus/wrap.go index a80caa8..25da157 100644 --- a/prometheus/wrap.go +++ b/prometheus/wrap.go @@ -117,10 +117,6 @@ func (r *wrappingRegisterer) Unregister(c Collector) bool { }) } -func (r *wrappingRegisterer) HasEscapedCollision() bool { - return r.wrappedRegisterer.HasEscapedCollision() -} - type wrappingCollector struct { wrappedCollector Collector prefix string