Make the collectorID independent of the desc order.

This is actually the intended behavior, and (as a nice side effect)
makes things cheaper to calculate.

Also, introduce a separator character to avoid hash collisions
(like label values {"ab","c"} vs {"a", "bc"}).

Apply the same principles to signature.go.

Change-Id: I607db544f278ed89684fe5fa11abdbc3e03d3061
This commit is contained in:
Bjoern Rabenstein 2014-06-26 13:58:15 +02:00
parent 010dc1af88
commit 998774096c
4 changed files with 79 additions and 52 deletions

View File

@ -14,56 +14,84 @@
package model
import (
"bytes"
"hash"
"hash/fnv"
"sort"
)
// cache the signature of an empty label set.
var emptyLabelSignature = fnv.New64a().Sum64()
// SeparatorByte is a byte that cannot occur in valid UTF-8 sequences and is
// used to separate label names, label values, and other strings from each other
// when calculating their combined hash value (aka signature aka fingerprint).
const SeparatorByte byte = 255
// LabelsToSignature provides a way of building a unique signature
// (i.e., fingerprint) for a given label set sequence.
var (
// cache the signature of an empty label set.
emptyLabelSignature = fnv.New64a().Sum64()
hashAndBufPool = make(chan *hashAndBuf, 1024)
)
type hashAndBuf struct {
h hash.Hash64
b bytes.Buffer
}
func getHashAndBuf() *hashAndBuf {
select {
case hb := <-hashAndBufPool:
return hb
default:
return &hashAndBuf{h: fnv.New64a()}
}
}
func putHashAndBuf(hb *hashAndBuf) {
select {
case hashAndBufPool <- hb:
default:
}
}
// LabelsToSignature returns a unique signature (i.e., fingerprint) for a given
// label set.
func LabelsToSignature(labels map[string]string) uint64 {
if len(labels) == 0 {
return emptyLabelSignature
}
names := make([]string, 0, len(labels))
for name := range labels {
names = append(names, name)
var result uint64
hb := getHashAndBuf()
defer putHashAndBuf(hb)
for k, v := range labels {
hb.b.WriteString(k)
hb.b.WriteByte(SeparatorByte)
hb.b.WriteString(v)
hb.h.Write(hb.b.Bytes())
result ^= hb.h.Sum64()
hb.h.Reset()
hb.b.Reset()
}
return result
}
sort.Strings(names)
hasher := fnv.New64a()
for _, name := range names {
hasher.Write([]byte(name))
hasher.Write([]byte(labels[name]))
}
return hasher.Sum64()
}
// LabelValuesToSignature provides a way of building a unique signature
// (i.e., fingerprint) for a given set of label's values.
// LabelValuesToSignature returns a unique signature (i.e., fingerprint) for the
// values of a given label set.
func LabelValuesToSignature(labels map[string]string) uint64 {
if len(labels) == 0 {
return emptyLabelSignature
}
names := make([]string, 0, len(labels))
for name := range labels {
names = append(names, name)
var result uint64
hb := getHashAndBuf()
defer putHashAndBuf(hb)
for _, v := range labels {
hb.b.WriteString(v)
hb.h.Write(hb.b.Bytes())
result ^= hb.h.Sum64()
hb.h.Reset()
hb.b.Reset()
}
sort.Strings(names)
hasher := fnv.New64a()
for _, name := range names {
hasher.Write([]byte(labels[name]))
}
return hasher.Sum64()
return result
}

View File

@ -29,7 +29,7 @@ func testLabelsToSignature(t testing.TB) {
},
{
in: map[string]string{"name": "garland, briggs", "fear": "love is not enough"},
out: 12256296522964301276,
out: 12952432476264840823,
},
}
@ -88,11 +88,11 @@ func BenchmarkLabelValuesToSignatureSingle(b *testing.B) {
}
func BenchmarkLabelValuesToSignatureDouble(b *testing.B) {
benchmarkLabelValuesToSignature(b, map[string]string{"first-label": "first-label-value", "second-label": "second-label-value"}, 5670080368112985613)
benchmarkLabelValuesToSignature(b, map[string]string{"first-label": "first-label-value", "second-label": "second-label-value"}, 8893559499616767364)
}
func BenchmarkLabelValuesToSignatureTriple(b *testing.B) {
benchmarkLabelValuesToSignature(b, map[string]string{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 2503588453955211397)
benchmarkLabelValuesToSignature(b, map[string]string{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 1685970066862087833)
}
func benchmarkLabelToSignature(b *testing.B, l map[string]string, e uint64) {
@ -108,13 +108,13 @@ func BenchmarkLabelToSignatureScalar(b *testing.B) {
}
func BenchmarkLabelToSignatureSingle(b *testing.B) {
benchmarkLabelToSignature(b, map[string]string{"first-label": "first-label-value"}, 2231159900647003583)
benchmarkLabelToSignature(b, map[string]string{"first-label": "first-label-value"}, 5147259542624943964)
}
func BenchmarkLabelToSignatureDouble(b *testing.B) {
benchmarkLabelToSignature(b, map[string]string{"first-label": "first-label-value", "second-label": "second-label-value"}, 14091549261072856487)
benchmarkLabelToSignature(b, map[string]string{"first-label": "first-label-value", "second-label": "second-label-value"}, 18269973311206963528)
}
func BenchmarkLabelToSignatureTriple(b *testing.B) {
benchmarkLabelToSignature(b, map[string]string{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 9120920685107702735)
benchmarkLabelToSignature(b, map[string]string{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676)
}

View File

@ -134,6 +134,7 @@ func NewDesc(fqName, help string, variableLabels []string, constLabels Labels) *
for _, val := range labelValues {
b.Reset()
b.WriteString(val)
b.WriteByte(model.SeparatorByte)
h.Write(b.Bytes())
}
d.id = h.Sum64()
@ -144,10 +145,12 @@ func NewDesc(fqName, help string, variableLabels []string, constLabels Labels) *
h.Reset()
b.Reset()
b.WriteString(help)
b.WriteByte(model.SeparatorByte)
h.Write(b.Bytes())
for _, labelName := range labelNames {
b.Reset()
b.WriteString(labelName)
b.WriteByte(model.SeparatorByte)
h.Write(b.Bytes())
}
d.dimHash = h.Sum64()

View File

@ -21,7 +21,6 @@ package prometheus
import (
"bytes"
"encoding/binary"
"errors"
"fmt"
"hash/fnv"
@ -35,6 +34,7 @@ import (
"code.google.com/p/goprotobuf/proto"
"github.com/prometheus/client_golang/_vendor/goautoneg"
"github.com/prometheus/client_golang/model"
"github.com/prometheus/client_golang/text"
)
@ -207,8 +207,7 @@ func (r *registry) Register(c Collector) (Collector, error) {
newDescIDs := map[uint64]struct{}{}
newDimHashesByName := map[string]uint64{}
collectorIDHash := fnv.New64a()
buf := make([]byte, 8)
var collectorID uint64 // Just a sum of all desc IDs.
var duplicateDescErr error
r.mtx.Lock()
@ -227,12 +226,11 @@ func (r *registry) Register(c Collector) (Collector, error) {
duplicateDescErr = fmt.Errorf("descriptor %s already exists with the same fully-qualified name and const label values", desc)
}
// If it is not a duplicate desc in this collector, add it to
// the hash. (We allow duplicate descs within the same
// the collectorID. (We allow duplicate descs within the same
// collector, but their existence must be a no-op.)
if _, exists := newDescIDs[desc.id]; !exists {
newDescIDs[desc.id] = struct{}{}
binary.BigEndian.PutUint64(buf, desc.id)
collectorIDHash.Write(buf)
collectorID += desc.id
}
// Are all the label names and the help string consistent with
@ -257,7 +255,6 @@ func (r *registry) Register(c Collector) (Collector, error) {
if len(newDescIDs) == 0 {
return nil, errors.New("collector has no descriptors")
}
collectorID := collectorIDHash.Sum64()
if existing, exists := r.collectorsByID[collectorID]; exists {
return existing, errAlreadyReg
}
@ -294,16 +291,13 @@ func (r *registry) Unregister(c Collector) bool {
}()
descIDs := map[uint64]struct{}{}
collectorIDHash := fnv.New64a()
buf := make([]byte, 8)
var collectorID uint64 // Just a sum of the desc IDs.
for desc := range descChan {
if _, exists := descIDs[desc.id]; !exists {
binary.BigEndian.PutUint64(buf, desc.id)
collectorIDHash.Write(buf)
collectorID += desc.id
descIDs[desc.id] = struct{}{}
}
}
collectorID := collectorIDHash.Sum64()
r.mtx.RLock()
if _, exists := r.collectorsByID[collectorID]; !exists {
@ -488,10 +482,12 @@ func (r *registry) checkConsistency(metricFamily *dto.MetricFamily, dtoMetric *d
h := fnv.New64a()
var buf bytes.Buffer
buf.WriteString(desc.fqName)
buf.WriteByte(model.SeparatorByte)
h.Write(buf.Bytes())
for _, lp := range dtoMetric.Label {
buf.Reset()
buf.WriteString(lp.GetValue())
buf.WriteByte(model.SeparatorByte)
h.Write(buf.Bytes())
}
metricHash := h.Sum64()