Make the collectorID independent of the desc order.

This is actually the intended behavior, and (as a nice side effect)
makes things cheaper to calculate.

Also, introduce a separator character to avoid hash collisions
(like label values {"ab","c"} vs {"a", "bc"}).

Apply the same principles to signature.go.

Change-Id: I607db544f278ed89684fe5fa11abdbc3e03d3061
This commit is contained in:
Bjoern Rabenstein 2014-06-26 13:58:15 +02:00
parent 010dc1af88
commit 998774096c
4 changed files with 79 additions and 52 deletions

View File

@ -14,56 +14,84 @@
package model package model
import ( import (
"bytes"
"hash"
"hash/fnv" "hash/fnv"
"sort"
) )
// cache the signature of an empty label set. // SeparatorByte is a byte that cannot occur in valid UTF-8 sequences and is
var emptyLabelSignature = fnv.New64a().Sum64() // used to separate label names, label values, and other strings from each other
// when calculating their combined hash value (aka signature aka fingerprint).
const SeparatorByte byte = 255
// LabelsToSignature provides a way of building a unique signature var (
// (i.e., fingerprint) for a given label set sequence. // cache the signature of an empty label set.
emptyLabelSignature = fnv.New64a().Sum64()
hashAndBufPool = make(chan *hashAndBuf, 1024)
)
type hashAndBuf struct {
h hash.Hash64
b bytes.Buffer
}
func getHashAndBuf() *hashAndBuf {
select {
case hb := <-hashAndBufPool:
return hb
default:
return &hashAndBuf{h: fnv.New64a()}
}
}
func putHashAndBuf(hb *hashAndBuf) {
select {
case hashAndBufPool <- hb:
default:
}
}
// LabelsToSignature returns a unique signature (i.e., fingerprint) for a given
// label set.
func LabelsToSignature(labels map[string]string) uint64 { func LabelsToSignature(labels map[string]string) uint64 {
if len(labels) == 0 { if len(labels) == 0 {
return emptyLabelSignature return emptyLabelSignature
} }
names := make([]string, 0, len(labels)) var result uint64
for name := range labels { hb := getHashAndBuf()
names = append(names, name) defer putHashAndBuf(hb)
for k, v := range labels {
hb.b.WriteString(k)
hb.b.WriteByte(SeparatorByte)
hb.b.WriteString(v)
hb.h.Write(hb.b.Bytes())
result ^= hb.h.Sum64()
hb.h.Reset()
hb.b.Reset()
}
return result
} }
sort.Strings(names) // LabelValuesToSignature returns a unique signature (i.e., fingerprint) for the
// values of a given label set.
hasher := fnv.New64a()
for _, name := range names {
hasher.Write([]byte(name))
hasher.Write([]byte(labels[name]))
}
return hasher.Sum64()
}
// LabelValuesToSignature provides a way of building a unique signature
// (i.e., fingerprint) for a given set of label's values.
func LabelValuesToSignature(labels map[string]string) uint64 { func LabelValuesToSignature(labels map[string]string) uint64 {
if len(labels) == 0 { if len(labels) == 0 {
return emptyLabelSignature return emptyLabelSignature
} }
names := make([]string, 0, len(labels)) var result uint64
for name := range labels { hb := getHashAndBuf()
names = append(names, name) defer putHashAndBuf(hb)
for _, v := range labels {
hb.b.WriteString(v)
hb.h.Write(hb.b.Bytes())
result ^= hb.h.Sum64()
hb.h.Reset()
hb.b.Reset()
} }
return result
sort.Strings(names)
hasher := fnv.New64a()
for _, name := range names {
hasher.Write([]byte(labels[name]))
}
return hasher.Sum64()
} }

View File

@ -29,7 +29,7 @@ func testLabelsToSignature(t testing.TB) {
}, },
{ {
in: map[string]string{"name": "garland, briggs", "fear": "love is not enough"}, in: map[string]string{"name": "garland, briggs", "fear": "love is not enough"},
out: 12256296522964301276, out: 12952432476264840823,
}, },
} }
@ -88,11 +88,11 @@ func BenchmarkLabelValuesToSignatureSingle(b *testing.B) {
} }
func BenchmarkLabelValuesToSignatureDouble(b *testing.B) { func BenchmarkLabelValuesToSignatureDouble(b *testing.B) {
benchmarkLabelValuesToSignature(b, map[string]string{"first-label": "first-label-value", "second-label": "second-label-value"}, 5670080368112985613) benchmarkLabelValuesToSignature(b, map[string]string{"first-label": "first-label-value", "second-label": "second-label-value"}, 8893559499616767364)
} }
func BenchmarkLabelValuesToSignatureTriple(b *testing.B) { func BenchmarkLabelValuesToSignatureTriple(b *testing.B) {
benchmarkLabelValuesToSignature(b, map[string]string{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 2503588453955211397) benchmarkLabelValuesToSignature(b, map[string]string{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 1685970066862087833)
} }
func benchmarkLabelToSignature(b *testing.B, l map[string]string, e uint64) { func benchmarkLabelToSignature(b *testing.B, l map[string]string, e uint64) {
@ -108,13 +108,13 @@ func BenchmarkLabelToSignatureScalar(b *testing.B) {
} }
func BenchmarkLabelToSignatureSingle(b *testing.B) { func BenchmarkLabelToSignatureSingle(b *testing.B) {
benchmarkLabelToSignature(b, map[string]string{"first-label": "first-label-value"}, 2231159900647003583) benchmarkLabelToSignature(b, map[string]string{"first-label": "first-label-value"}, 5147259542624943964)
} }
func BenchmarkLabelToSignatureDouble(b *testing.B) { func BenchmarkLabelToSignatureDouble(b *testing.B) {
benchmarkLabelToSignature(b, map[string]string{"first-label": "first-label-value", "second-label": "second-label-value"}, 14091549261072856487) benchmarkLabelToSignature(b, map[string]string{"first-label": "first-label-value", "second-label": "second-label-value"}, 18269973311206963528)
} }
func BenchmarkLabelToSignatureTriple(b *testing.B) { func BenchmarkLabelToSignatureTriple(b *testing.B) {
benchmarkLabelToSignature(b, map[string]string{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 9120920685107702735) benchmarkLabelToSignature(b, map[string]string{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676)
} }

View File

@ -134,6 +134,7 @@ func NewDesc(fqName, help string, variableLabels []string, constLabels Labels) *
for _, val := range labelValues { for _, val := range labelValues {
b.Reset() b.Reset()
b.WriteString(val) b.WriteString(val)
b.WriteByte(model.SeparatorByte)
h.Write(b.Bytes()) h.Write(b.Bytes())
} }
d.id = h.Sum64() d.id = h.Sum64()
@ -144,10 +145,12 @@ func NewDesc(fqName, help string, variableLabels []string, constLabels Labels) *
h.Reset() h.Reset()
b.Reset() b.Reset()
b.WriteString(help) b.WriteString(help)
b.WriteByte(model.SeparatorByte)
h.Write(b.Bytes()) h.Write(b.Bytes())
for _, labelName := range labelNames { for _, labelName := range labelNames {
b.Reset() b.Reset()
b.WriteString(labelName) b.WriteString(labelName)
b.WriteByte(model.SeparatorByte)
h.Write(b.Bytes()) h.Write(b.Bytes())
} }
d.dimHash = h.Sum64() d.dimHash = h.Sum64()

View File

@ -21,7 +21,6 @@ package prometheus
import ( import (
"bytes" "bytes"
"encoding/binary"
"errors" "errors"
"fmt" "fmt"
"hash/fnv" "hash/fnv"
@ -35,6 +34,7 @@ import (
"code.google.com/p/goprotobuf/proto" "code.google.com/p/goprotobuf/proto"
"github.com/prometheus/client_golang/_vendor/goautoneg" "github.com/prometheus/client_golang/_vendor/goautoneg"
"github.com/prometheus/client_golang/model"
"github.com/prometheus/client_golang/text" "github.com/prometheus/client_golang/text"
) )
@ -207,8 +207,7 @@ func (r *registry) Register(c Collector) (Collector, error) {
newDescIDs := map[uint64]struct{}{} newDescIDs := map[uint64]struct{}{}
newDimHashesByName := map[string]uint64{} newDimHashesByName := map[string]uint64{}
collectorIDHash := fnv.New64a() var collectorID uint64 // Just a sum of all desc IDs.
buf := make([]byte, 8)
var duplicateDescErr error var duplicateDescErr error
r.mtx.Lock() r.mtx.Lock()
@ -227,12 +226,11 @@ func (r *registry) Register(c Collector) (Collector, error) {
duplicateDescErr = fmt.Errorf("descriptor %s already exists with the same fully-qualified name and const label values", desc) duplicateDescErr = fmt.Errorf("descriptor %s already exists with the same fully-qualified name and const label values", desc)
} }
// If it is not a duplicate desc in this collector, add it to // If it is not a duplicate desc in this collector, add it to
// the hash. (We allow duplicate descs within the same // the collectorID. (We allow duplicate descs within the same
// collector, but their existence must be a no-op.) // collector, but their existence must be a no-op.)
if _, exists := newDescIDs[desc.id]; !exists { if _, exists := newDescIDs[desc.id]; !exists {
newDescIDs[desc.id] = struct{}{} newDescIDs[desc.id] = struct{}{}
binary.BigEndian.PutUint64(buf, desc.id) collectorID += desc.id
collectorIDHash.Write(buf)
} }
// Are all the label names and the help string consistent with // Are all the label names and the help string consistent with
@ -257,7 +255,6 @@ func (r *registry) Register(c Collector) (Collector, error) {
if len(newDescIDs) == 0 { if len(newDescIDs) == 0 {
return nil, errors.New("collector has no descriptors") return nil, errors.New("collector has no descriptors")
} }
collectorID := collectorIDHash.Sum64()
if existing, exists := r.collectorsByID[collectorID]; exists { if existing, exists := r.collectorsByID[collectorID]; exists {
return existing, errAlreadyReg return existing, errAlreadyReg
} }
@ -294,16 +291,13 @@ func (r *registry) Unregister(c Collector) bool {
}() }()
descIDs := map[uint64]struct{}{} descIDs := map[uint64]struct{}{}
collectorIDHash := fnv.New64a() var collectorID uint64 // Just a sum of the desc IDs.
buf := make([]byte, 8)
for desc := range descChan { for desc := range descChan {
if _, exists := descIDs[desc.id]; !exists { if _, exists := descIDs[desc.id]; !exists {
binary.BigEndian.PutUint64(buf, desc.id) collectorID += desc.id
collectorIDHash.Write(buf)
descIDs[desc.id] = struct{}{} descIDs[desc.id] = struct{}{}
} }
} }
collectorID := collectorIDHash.Sum64()
r.mtx.RLock() r.mtx.RLock()
if _, exists := r.collectorsByID[collectorID]; !exists { if _, exists := r.collectorsByID[collectorID]; !exists {
@ -488,10 +482,12 @@ func (r *registry) checkConsistency(metricFamily *dto.MetricFamily, dtoMetric *d
h := fnv.New64a() h := fnv.New64a()
var buf bytes.Buffer var buf bytes.Buffer
buf.WriteString(desc.fqName) buf.WriteString(desc.fqName)
buf.WriteByte(model.SeparatorByte)
h.Write(buf.Bytes()) h.Write(buf.Bytes())
for _, lp := range dtoMetric.Label { for _, lp := range dtoMetric.Label {
buf.Reset() buf.Reset()
buf.WriteString(lp.GetValue()) buf.WriteString(lp.GetValue())
buf.WriteByte(model.SeparatorByte)
h.Write(buf.Bytes()) h.Write(buf.Bytes())
} }
metricHash := h.Sum64() metricHash := h.Sum64()