Return an http error during scraping if metrics collide when escaped to underscores

Signed-off-by: Owen Williams <owen.williams@grafana.com>
This commit is contained in:
Owen Williams 2024-10-08 13:53:04 -04:00
parent e1675ced80
commit 42825b62f4
6 changed files with 333 additions and 15 deletions

View File

@ -57,6 +57,9 @@ type Desc struct {
// must be unique among all registered descriptors and can therefore be // must be unique among all registered descriptors and can therefore be
// used as an identifier of the descriptor. // used as an identifier of the descriptor.
id uint64 id uint64
// escapedID is similar to id, but with the metric and label names escaped
// with underscores.
escapedID uint64
// dimHash is a hash of the label names (preset and variable) and the // dimHash is a hash of the label names (preset and variable) and the
// Help string. Each Desc with the same fqName must have the same // Help string. Each Desc with the same fqName must have the same
// dimHash. // dimHash.
@ -142,11 +145,18 @@ func (v2) NewDesc(fqName, help string, variableLabels ConstrainableLabels, const
} }
xxh := xxhash.New() xxh := xxhash.New()
for _, val := range labelValues { escapedXXH := xxhash.New()
for i, val := range labelValues {
xxh.WriteString(val) xxh.WriteString(val)
xxh.Write(separatorByteSlice) xxh.Write(separatorByteSlice)
if i == 0 {
val = model.EscapeName(val, model.UnderscoreEscaping)
}
escapedXXH.WriteString(val)
escapedXXH.Write(separatorByteSlice)
} }
d.id = xxh.Sum64() d.id = xxh.Sum64()
d.escapedID = escapedXXH.Sum64()
// Sort labelNames so that order doesn't matter for the hash. // Sort labelNames so that order doesn't matter for the hash.
sort.Strings(labelNames) sort.Strings(labelNames)
// Now hash together (in this order) the help string and the sorted // Now hash together (in this order) the help string and the sorted

View File

@ -43,6 +43,7 @@ import (
"github.com/klauspost/compress/zstd" "github.com/klauspost/compress/zstd"
"github.com/prometheus/common/expfmt" "github.com/prometheus/common/expfmt"
"github.com/prometheus/common/model"
"github.com/prometheus/client_golang/internal/github.com/golang/gddo/httputil" "github.com/prometheus/client_golang/internal/github.com/golang/gddo/httputil"
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
@ -121,6 +122,7 @@ func HandlerForTransactional(reg prometheus.TransactionalGatherer, opts HandlerO
if opts.MaxRequestsInFlight > 0 { if opts.MaxRequestsInFlight > 0 {
inFlightSem = make(chan struct{}, opts.MaxRequestsInFlight) inFlightSem = make(chan struct{}, opts.MaxRequestsInFlight)
} }
var hasEscapedCollisions bool
if opts.Registry != nil { if opts.Registry != nil {
// Initialize all possibilities that can occur below. // Initialize all possibilities that can occur below.
errCnt.WithLabelValues("gathering") errCnt.WithLabelValues("gathering")
@ -133,6 +135,7 @@ func HandlerForTransactional(reg prometheus.TransactionalGatherer, opts HandlerO
panic(err) panic(err)
} }
} }
hasEscapedCollisions = opts.Registry.HasEscapedCollision()
} }
// Select compression formats to offer based on default or user choice. // Select compression formats to offer based on default or user choice.
@ -190,6 +193,19 @@ func HandlerForTransactional(reg prometheus.TransactionalGatherer, opts HandlerO
} else { } else {
contentType = expfmt.Negotiate(req.Header) contentType = expfmt.Negotiate(req.Header)
} }
if hasEscapedCollisions {
switch contentType.ToEscapingScheme() {
case model.UnderscoreEscaping, model.DotsEscaping:
if opts.ErrorLog != nil {
opts.ErrorLog.Println("error: one or more metrics collide when escaped")
}
httpError(rsp, fmt.Errorf("one or more metrics collide when escaped"))
return
default:
}
}
rsp.Header().Set(contentTypeHeader, string(contentType)) rsp.Header().Set(contentTypeHeader, string(contentType))
w, encodingHeader, closeWriter, err := negotiateEncodingWriter(req, rsp, compressions) w, encodingHeader, closeWriter, err := negotiateEncodingWriter(req, rsp, compressions)

View File

@ -28,6 +28,8 @@ import (
"github.com/klauspost/compress/zstd" "github.com/klauspost/compress/zstd"
dto "github.com/prometheus/client_model/go" dto "github.com/prometheus/client_model/go"
"github.com/prometheus/common/expfmt"
"github.com/prometheus/common/model"
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
) )
@ -548,6 +550,52 @@ func TestNegotiateEncodingWriter(t *testing.T) {
} }
} }
func TestEscapedCollisions(t *testing.T) {
oldScheme := model.NameValidationScheme
defer func() {
model.NameValidationScheme = oldScheme
}()
model.NameValidationScheme = model.UTF8Validation
reg := prometheus.NewRegistry()
reg.MustRegister(prometheus.NewCounter(prometheus.CounterOpts{
Name: "test_metric",
Help: "A test metric with underscores",
}))
reg.MustRegister(prometheus.NewCounter(prometheus.CounterOpts{
Name: "test.metric",
Help: "A test metric with dots",
}))
handler := HandlerFor(reg, HandlerOpts{
Registry: reg,
})
t.Run("fail case", func(t *testing.T) {
writer := httptest.NewRecorder()
request, _ := http.NewRequest("GET", "/metrics", nil)
request.Header.Add(acceptHeader, string(expfmt.NewFormat(expfmt.TypeTextPlain)))
handler.ServeHTTP(writer, request)
if writer.Code != 500 {
t.Errorf("wanted error 500, got %d", writer.Code)
}
expectErr := "An error has occurred while serving metrics:\n\none or more metrics collide when escaped\n"
if writer.Body.String() != expectErr {
t.Error("incorrect body returned, want " + expectErr + " got " + writer.Body.String())
}
})
t.Run("success case", func(t *testing.T) {
writer := httptest.NewRecorder()
request, _ := http.NewRequest("GET", "/metrics", nil)
request.Header.Add(acceptHeader, string(expfmt.NewFormat(expfmt.TypeTextPlain).WithEscapingScheme(model.NoEscaping)))
handler.ServeHTTP(writer, request)
if writer.Code != 200 {
t.Errorf("wanted 200 OK, got %d", writer.Code)
}
})
}
func BenchmarkCompression(b *testing.B) { func BenchmarkCompression(b *testing.B) {
benchmarks := []struct { benchmarks := []struct {
name string name string

View File

@ -67,11 +67,17 @@ func init() {
func NewRegistry() *Registry { func NewRegistry() *Registry {
return &Registry{ return &Registry{
collectorsByID: map[uint64]Collector{}, collectorsByID: map[uint64]Collector{},
collectorsByEscapedID: map[uint64]Collector{},
descIDs: map[uint64]struct{}{}, descIDs: map[uint64]struct{}{},
escapedDescIDs: map[uint64]struct{}{},
dimHashesByName: map[string]uint64{}, dimHashesByName: map[string]uint64{},
} }
} }
func (r *Registry) HasEscapedCollision() bool {
return r.hasEscapedCollision
}
// NewPedanticRegistry returns a registry that checks during collection if each // NewPedanticRegistry returns a registry that checks during collection if each
// collected Metric is consistent with its reported Desc, and if the Desc has // collected Metric is consistent with its reported Desc, and if the Desc has
// actually been registered with the registry. Unchecked Collectors (those whose // actually been registered with the registry. Unchecked Collectors (those whose
@ -131,6 +137,11 @@ type Registerer interface {
// instance must only collect consistent metrics throughout its // instance must only collect consistent metrics throughout its
// lifetime. // lifetime.
Unregister(Collector) bool Unregister(Collector) bool
// HasEscapedCollision returns true if any two of the registered metrics would
// be the same when escaped to underscores. This is needed to prevent
// duplicate metric issues when being scraped by a legacy system.
HasEscapedCollision() bool
} }
// Gatherer is the interface for the part of a registry in charge of gathering // Gatherer is the interface for the part of a registry in charge of gathering
@ -260,10 +271,21 @@ func (errs MultiError) MaybeUnwrap() error {
type Registry struct { type Registry struct {
mtx sync.RWMutex mtx sync.RWMutex
collectorsByID map[uint64]Collector // ID is a hash of the descIDs. collectorsByID map[uint64]Collector // ID is a hash of the descIDs.
// collectorsByEscapedID stores colletors by escapedID, only if escaped id is
// different (otherwise we can just do the lookup in the regular map).
collectorsByEscapedID map[uint64]Collector
descIDs map[uint64]struct{} descIDs map[uint64]struct{}
// escapedDescIDs records desc ids of the escaped version of the metric, only
// if different from the regular name.
escapedDescIDs map[uint64]struct{}
dimHashesByName map[string]uint64 dimHashesByName map[string]uint64
uncheckedCollectors []Collector uncheckedCollectors []Collector
pedanticChecksEnabled bool pedanticChecksEnabled bool
// hasEscapedCollision is set to true if any two metrics that were not
// identical under UTF-8 would collide if scraped by a system that requires
// names to be escaped to legacy underscore replacement.
hasEscapedCollision bool
} }
// Register implements Registerer. // Register implements Registerer.
@ -271,9 +293,12 @@ func (r *Registry) Register(c Collector) error {
var ( var (
descChan = make(chan *Desc, capDescChan) descChan = make(chan *Desc, capDescChan)
newDescIDs = map[uint64]struct{}{} newDescIDs = map[uint64]struct{}{}
newEscapedIDs = map[uint64]struct{}{}
newDimHashesByName = map[string]uint64{} newDimHashesByName = map[string]uint64{}
collectorID uint64 // All desc IDs XOR'd together. collectorID uint64 // All desc IDs XOR'd together.
escapedID uint64
duplicateDescErr error duplicateDescErr error
duplicateEscapedDesc bool
) )
go func() { go func() {
c.Describe(descChan) c.Describe(descChan)
@ -307,6 +332,22 @@ func (r *Registry) Register(c Collector) error {
collectorID ^= desc.id collectorID ^= desc.id
} }
// Also check to see if the descID is unique when all the names are escaped
// to underscores. First check the primary map, then check the secondary
// map. We only officially log a collision later.
if _, exists := r.descIDs[desc.escapedID]; exists {
duplicateEscapedDesc = true
}
if _, exists := r.escapedDescIDs[desc.escapedID]; exists {
duplicateEscapedDesc = true
}
if _, exists := newEscapedIDs[desc.escapedID]; !exists {
if desc.escapedID != desc.id {
newEscapedIDs[desc.escapedID] = struct{}{}
}
escapedID ^= desc.escapedID
}
// Are all the label names and the help string consistent with // Are all the label names and the help string consistent with
// previous descriptors of the same name? // previous descriptors of the same name?
// First check existing descriptors... // First check existing descriptors...
@ -331,7 +372,17 @@ func (r *Registry) Register(c Collector) error {
r.uncheckedCollectors = append(r.uncheckedCollectors, c) r.uncheckedCollectors = append(r.uncheckedCollectors, c)
return nil return nil
} }
if existing, exists := r.collectorsByID[collectorID]; exists {
existing, collision := r.collectorsByID[collectorID]
// Also check whether the underscore-escaped versions of the IDs match.
if !collision {
_, escapedCollision := r.collectorsByID[escapedID]
r.hasEscapedCollision = r.hasEscapedCollision || escapedCollision
_, escapedCollision = r.collectorsByEscapedID[escapedID]
r.hasEscapedCollision = r.hasEscapedCollision || escapedCollision
}
if collision {
switch e := existing.(type) { switch e := existing.(type) {
case *wrappingCollector: case *wrappingCollector:
return AlreadyRegisteredError{ return AlreadyRegisteredError{
@ -351,14 +402,25 @@ func (r *Registry) Register(c Collector) error {
return duplicateDescErr return duplicateDescErr
} }
if duplicateEscapedDesc {
r.hasEscapedCollision = true
}
// Only after all tests have passed, actually register. // Only after all tests have passed, actually register.
r.collectorsByID[collectorID] = c r.collectorsByID[collectorID] = c
// We only need to store the escapedID if it doesn't match the unescaped one.
if escapedID != collectorID {
r.collectorsByEscapedID[escapedID] = c
}
for hash := range newDescIDs { for hash := range newDescIDs {
r.descIDs[hash] = struct{}{} r.descIDs[hash] = struct{}{}
} }
for name, dimHash := range newDimHashesByName { for name, dimHash := range newDimHashesByName {
r.dimHashesByName[name] = dimHash r.dimHashesByName[name] = dimHash
} }
for hash := range newEscapedIDs {
r.escapedDescIDs[hash] = struct{}{}
}
return nil return nil
} }
@ -367,7 +429,9 @@ func (r *Registry) Unregister(c Collector) bool {
var ( var (
descChan = make(chan *Desc, capDescChan) descChan = make(chan *Desc, capDescChan)
descIDs = map[uint64]struct{}{} descIDs = map[uint64]struct{}{}
escapedDescIDs = map[uint64]struct{}{}
collectorID uint64 // All desc IDs XOR'd together. collectorID uint64 // All desc IDs XOR'd together.
collectorEscapedID uint64
) )
go func() { go func() {
c.Describe(descChan) c.Describe(descChan)
@ -377,6 +441,8 @@ func (r *Registry) Unregister(c Collector) bool {
if _, exists := descIDs[desc.id]; !exists { if _, exists := descIDs[desc.id]; !exists {
collectorID ^= desc.id collectorID ^= desc.id
descIDs[desc.id] = struct{}{} descIDs[desc.id] = struct{}{}
collectorEscapedID ^= desc.escapedID
escapedDescIDs[desc.escapedID] = struct{}{}
} }
} }
@ -391,9 +457,13 @@ func (r *Registry) Unregister(c Collector) bool {
defer r.mtx.Unlock() defer r.mtx.Unlock()
delete(r.collectorsByID, collectorID) delete(r.collectorsByID, collectorID)
delete(r.collectorsByEscapedID, collectorEscapedID)
for id := range descIDs { for id := range descIDs {
delete(r.descIDs, id) delete(r.descIDs, id)
} }
for id := range escapedDescIDs {
delete(r.escapedDescIDs, id)
}
// dimHashesByName is left untouched as those must be consistent // dimHashesByName is left untouched as those must be consistent
// throughout the lifetime of a program. // throughout the lifetime of a program.
return true return true

View File

@ -36,6 +36,7 @@ import (
dto "github.com/prometheus/client_model/go" dto "github.com/prometheus/client_model/go"
"github.com/prometheus/common/expfmt" "github.com/prometheus/common/expfmt"
"github.com/prometheus/common/model"
"google.golang.org/protobuf/proto" "google.golang.org/protobuf/proto"
"google.golang.org/protobuf/types/known/timestamppb" "google.golang.org/protobuf/types/known/timestamppb"
) )
@ -1181,6 +1182,175 @@ func TestAlreadyRegisteredCollision(t *testing.T) {
} }
} }
func TestAlreadyRegisteredEscapingCollision(t *testing.T) {
oldValidation := model.NameValidationScheme
model.NameValidationScheme = model.UTF8Validation
defer func() {
model.NameValidationScheme = oldValidation
}()
tests := []struct {
name string
// These are functions because hashes that determine collision are created
// at metric creation time.
counterA func() prometheus.Counter
counterB func() prometheus.Counter
expectErr bool
expectLegacyCollision bool
}{
{
name: "no metric name collision",
counterA: func() prometheus.Counter {
return prometheus.NewCounter(prometheus.CounterOpts{
Name: "my_counter_a",
ConstLabels: prometheus.Labels{
"name": "label",
"type": "test",
},
})
},
counterB: func() prometheus.Counter {
return prometheus.NewCounter(prometheus.CounterOpts{
Name: "myAcounterAa",
ConstLabels: prometheus.Labels{
"name": "label",
"type": "test",
},
})
},
},
{
name: "compatibility metric name collision",
counterA: func() prometheus.Counter {
return prometheus.NewCounter(prometheus.CounterOpts{
Name: "my_counter_a",
ConstLabels: prometheus.Labels{
"name": "label",
"type": "test",
},
})
},
counterB: func() prometheus.Counter {
return prometheus.NewCounter(prometheus.CounterOpts{
Name: "my.counter.a",
ConstLabels: prometheus.Labels{
"name": "label",
"type": "test",
},
})
},
expectLegacyCollision: true,
},
{
// This is a regression test to make sure we are not accidentally
// reporting collisions when label values are different.
name: "no label value collision",
counterA: func() prometheus.Counter {
return prometheus.NewCounter(prometheus.CounterOpts{
Name: "my_counter_a",
ConstLabels: prometheus.Labels{
"name": "label.value",
"type": "test",
},
})
},
counterB: func() prometheus.Counter {
return prometheus.NewCounter(prometheus.CounterOpts{
Name: "my_counter_a",
ConstLabels: prometheus.Labels{
"name": "label_value",
"type": "test",
},
})
},
},
{
name: "compatibility label name collision",
counterA: func() prometheus.Counter {
return prometheus.NewCounter(prometheus.CounterOpts{
Name: "my_counter_a",
ConstLabels: prometheus.Labels{
"label.name": "name",
"type": "test",
},
})
},
counterB: func() prometheus.Counter {
return prometheus.NewCounter(prometheus.CounterOpts{
Name: "my_counter_a",
ConstLabels: prometheus.Labels{
"label_name": "name",
"type": "test",
},
})
},
expectErr: true,
expectLegacyCollision: false,
},
{
name: "no utf8 metric name collision",
counterA: func() prometheus.Counter {
return prometheus.NewCounter(prometheus.CounterOpts{
Name: "my_counter_a",
ConstLabels: prometheus.Labels{
"name": "label",
"type": "test",
},
})
},
counterB: func() prometheus.Counter {
return prometheus.NewCounter(prometheus.CounterOpts{
Name: "my.counter.a",
ConstLabels: prometheus.Labels{
"name": "label",
"type": "test",
},
})
},
expectLegacyCollision: true,
},
{
name: "post init flag flip, should collide",
counterA: func() prometheus.Counter {
return prometheus.NewCounter(prometheus.CounterOpts{
Name: "my.counter.a",
ConstLabels: prometheus.Labels{
"name": "label",
"type": "test",
},
})
},
counterB: func() prometheus.Counter {
return prometheus.NewCounter(prometheus.CounterOpts{
Name: "my.counter.a",
ConstLabels: prometheus.Labels{
"name": "label",
"type": "test",
},
})
},
expectErr: true,
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
reg := prometheus.NewRegistry()
err := reg.Register(tc.counterA())
if err != nil {
t.Errorf("required no error, got %v", err)
}
err = reg.Register(tc.counterB())
if tc.expectErr != (err != nil) {
t.Errorf("required error state %v, got %v", tc.expectErr, err)
}
if tc.expectLegacyCollision != reg.HasEscapedCollision() {
t.Errorf("legacy collision mismatch, want %v got %v", tc.expectLegacyCollision, reg.HasEscapedCollision())
}
})
}
}
type tGatherer struct { type tGatherer struct {
done bool done bool
err error err error

View File

@ -117,6 +117,10 @@ func (r *wrappingRegisterer) Unregister(c Collector) bool {
}) })
} }
func (r *wrappingRegisterer) HasEscapedCollision() bool {
return r.wrappedRegisterer.HasEscapedCollision()
}
type wrappingCollector struct { type wrappingCollector struct {
wrappedCollector Collector wrappedCollector Collector
prefix string prefix string