From 998774096c880e98265b88b5de505b5aa0b16fb1 Mon Sep 17 00:00:00 2001 From: Bjoern Rabenstein Date: Thu, 26 Jun 2014 13:58:15 +0200 Subject: [PATCH] Make the collectorID independent of the desc order. This is actually the intended behavior, and (as a nice side effect) makes things cheaper to calculate. Also, introduce a separator character to avoid hash collisions (like label values {"ab","c"} vs {"a", "bc"}). Apply the same principles to signature.go. Change-Id: I607db544f278ed89684fe5fa11abdbc3e03d3061 --- model/signature.go | 96 ++++++++++++++++++++++++++--------------- model/signature_test.go | 12 +++--- prometheus/desc.go | 3 ++ prometheus/registry.go | 20 ++++----- 4 files changed, 79 insertions(+), 52 deletions(-) diff --git a/model/signature.go b/model/signature.go index 1285aa0..9623677 100644 --- a/model/signature.go +++ b/model/signature.go @@ -14,56 +14,84 @@ package model import ( + "bytes" + "hash" "hash/fnv" - "sort" ) -// cache the signature of an empty label set. -var emptyLabelSignature = fnv.New64a().Sum64() +// SeparatorByte is a byte that cannot occur in valid UTF-8 sequences and is +// used to separate label names, label values, and other strings from each other +// when calculating their combined hash value (aka signature aka fingerprint). +const SeparatorByte byte = 255 -// LabelsToSignature provides a way of building a unique signature -// (i.e., fingerprint) for a given label set sequence. +var ( + // cache the signature of an empty label set. + emptyLabelSignature = fnv.New64a().Sum64() + + hashAndBufPool = make(chan *hashAndBuf, 1024) +) + +type hashAndBuf struct { + h hash.Hash64 + b bytes.Buffer +} + +func getHashAndBuf() *hashAndBuf { + select { + case hb := <-hashAndBufPool: + return hb + default: + return &hashAndBuf{h: fnv.New64a()} + } +} + +func putHashAndBuf(hb *hashAndBuf) { + select { + case hashAndBufPool <- hb: + default: + } +} + +// LabelsToSignature returns a unique signature (i.e., fingerprint) for a given +// label set. func LabelsToSignature(labels map[string]string) uint64 { if len(labels) == 0 { return emptyLabelSignature } - names := make([]string, 0, len(labels)) - for name := range labels { - names = append(names, name) + var result uint64 + hb := getHashAndBuf() + defer putHashAndBuf(hb) + + for k, v := range labels { + hb.b.WriteString(k) + hb.b.WriteByte(SeparatorByte) + hb.b.WriteString(v) + hb.h.Write(hb.b.Bytes()) + result ^= hb.h.Sum64() + hb.h.Reset() + hb.b.Reset() } - - sort.Strings(names) - - hasher := fnv.New64a() - - for _, name := range names { - hasher.Write([]byte(name)) - hasher.Write([]byte(labels[name])) - } - - return hasher.Sum64() + return result } -// LabelValuesToSignature provides a way of building a unique signature -// (i.e., fingerprint) for a given set of label's values. +// LabelValuesToSignature returns a unique signature (i.e., fingerprint) for the +// values of a given label set. func LabelValuesToSignature(labels map[string]string) uint64 { if len(labels) == 0 { return emptyLabelSignature } - names := make([]string, 0, len(labels)) - for name := range labels { - names = append(names, name) + var result uint64 + hb := getHashAndBuf() + defer putHashAndBuf(hb) + + for _, v := range labels { + hb.b.WriteString(v) + hb.h.Write(hb.b.Bytes()) + result ^= hb.h.Sum64() + hb.h.Reset() + hb.b.Reset() } - - sort.Strings(names) - - hasher := fnv.New64a() - - for _, name := range names { - hasher.Write([]byte(labels[name])) - } - - return hasher.Sum64() + return result } diff --git a/model/signature_test.go b/model/signature_test.go index 8f4abcf..6e7afbc 100644 --- a/model/signature_test.go +++ b/model/signature_test.go @@ -29,7 +29,7 @@ func testLabelsToSignature(t testing.TB) { }, { in: map[string]string{"name": "garland, briggs", "fear": "love is not enough"}, - out: 12256296522964301276, + out: 12952432476264840823, }, } @@ -88,11 +88,11 @@ func BenchmarkLabelValuesToSignatureSingle(b *testing.B) { } func BenchmarkLabelValuesToSignatureDouble(b *testing.B) { - benchmarkLabelValuesToSignature(b, map[string]string{"first-label": "first-label-value", "second-label": "second-label-value"}, 5670080368112985613) + benchmarkLabelValuesToSignature(b, map[string]string{"first-label": "first-label-value", "second-label": "second-label-value"}, 8893559499616767364) } func BenchmarkLabelValuesToSignatureTriple(b *testing.B) { - benchmarkLabelValuesToSignature(b, map[string]string{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 2503588453955211397) + benchmarkLabelValuesToSignature(b, map[string]string{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 1685970066862087833) } func benchmarkLabelToSignature(b *testing.B, l map[string]string, e uint64) { @@ -108,13 +108,13 @@ func BenchmarkLabelToSignatureScalar(b *testing.B) { } func BenchmarkLabelToSignatureSingle(b *testing.B) { - benchmarkLabelToSignature(b, map[string]string{"first-label": "first-label-value"}, 2231159900647003583) + benchmarkLabelToSignature(b, map[string]string{"first-label": "first-label-value"}, 5147259542624943964) } func BenchmarkLabelToSignatureDouble(b *testing.B) { - benchmarkLabelToSignature(b, map[string]string{"first-label": "first-label-value", "second-label": "second-label-value"}, 14091549261072856487) + benchmarkLabelToSignature(b, map[string]string{"first-label": "first-label-value", "second-label": "second-label-value"}, 18269973311206963528) } func BenchmarkLabelToSignatureTriple(b *testing.B) { - benchmarkLabelToSignature(b, map[string]string{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 9120920685107702735) + benchmarkLabelToSignature(b, map[string]string{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676) } diff --git a/prometheus/desc.go b/prometheus/desc.go index e3397e4..b4f8652 100644 --- a/prometheus/desc.go +++ b/prometheus/desc.go @@ -134,6 +134,7 @@ func NewDesc(fqName, help string, variableLabels []string, constLabels Labels) * for _, val := range labelValues { b.Reset() b.WriteString(val) + b.WriteByte(model.SeparatorByte) h.Write(b.Bytes()) } d.id = h.Sum64() @@ -144,10 +145,12 @@ func NewDesc(fqName, help string, variableLabels []string, constLabels Labels) * h.Reset() b.Reset() b.WriteString(help) + b.WriteByte(model.SeparatorByte) h.Write(b.Bytes()) for _, labelName := range labelNames { b.Reset() b.WriteString(labelName) + b.WriteByte(model.SeparatorByte) h.Write(b.Bytes()) } d.dimHash = h.Sum64() diff --git a/prometheus/registry.go b/prometheus/registry.go index 52bbd92..32e43cb 100644 --- a/prometheus/registry.go +++ b/prometheus/registry.go @@ -21,7 +21,6 @@ package prometheus import ( "bytes" - "encoding/binary" "errors" "fmt" "hash/fnv" @@ -35,6 +34,7 @@ import ( "code.google.com/p/goprotobuf/proto" "github.com/prometheus/client_golang/_vendor/goautoneg" + "github.com/prometheus/client_golang/model" "github.com/prometheus/client_golang/text" ) @@ -207,8 +207,7 @@ func (r *registry) Register(c Collector) (Collector, error) { newDescIDs := map[uint64]struct{}{} newDimHashesByName := map[string]uint64{} - collectorIDHash := fnv.New64a() - buf := make([]byte, 8) + var collectorID uint64 // Just a sum of all desc IDs. var duplicateDescErr error r.mtx.Lock() @@ -227,12 +226,11 @@ func (r *registry) Register(c Collector) (Collector, error) { duplicateDescErr = fmt.Errorf("descriptor %s already exists with the same fully-qualified name and const label values", desc) } // If it is not a duplicate desc in this collector, add it to - // the hash. (We allow duplicate descs within the same + // the collectorID. (We allow duplicate descs within the same // collector, but their existence must be a no-op.) if _, exists := newDescIDs[desc.id]; !exists { newDescIDs[desc.id] = struct{}{} - binary.BigEndian.PutUint64(buf, desc.id) - collectorIDHash.Write(buf) + collectorID += desc.id } // Are all the label names and the help string consistent with @@ -257,7 +255,6 @@ func (r *registry) Register(c Collector) (Collector, error) { if len(newDescIDs) == 0 { return nil, errors.New("collector has no descriptors") } - collectorID := collectorIDHash.Sum64() if existing, exists := r.collectorsByID[collectorID]; exists { return existing, errAlreadyReg } @@ -294,16 +291,13 @@ func (r *registry) Unregister(c Collector) bool { }() descIDs := map[uint64]struct{}{} - collectorIDHash := fnv.New64a() - buf := make([]byte, 8) + var collectorID uint64 // Just a sum of the desc IDs. for desc := range descChan { if _, exists := descIDs[desc.id]; !exists { - binary.BigEndian.PutUint64(buf, desc.id) - collectorIDHash.Write(buf) + collectorID += desc.id descIDs[desc.id] = struct{}{} } } - collectorID := collectorIDHash.Sum64() r.mtx.RLock() if _, exists := r.collectorsByID[collectorID]; !exists { @@ -488,10 +482,12 @@ func (r *registry) checkConsistency(metricFamily *dto.MetricFamily, dtoMetric *d h := fnv.New64a() var buf bytes.Buffer buf.WriteString(desc.fqName) + buf.WriteByte(model.SeparatorByte) h.Write(buf.Bytes()) for _, lp := range dtoMetric.Label { buf.Reset() buf.WriteString(lp.GetValue()) + buf.WriteByte(model.SeparatorByte) h.Write(buf.Bytes()) } metricHash := h.Sum64()