Move registry hashing to xxhash

This is a much stronger hash function than fnv64a and comparably fast
(with super-fast assembly implementation for amd64).

Performance is not critical here anyway.

The old fnv64a is kept for vectors, where collision detection is in
place and the weakness of the hashing doesn't matter that much. I
implemented a vector version with xxhash and found that xxhash is
slower in all cases except very very high cardinality (where it is
only slightly faster). Also, ``xxhash.New`` comes with an allocation
of 80 bytes. Thus, to keep vectors alloc-free, we needed to add a
`sync.Pool`, which would have an additional performance overhead.

Signed-off-by: beorn7 <beorn@grafana.com>
This commit is contained in:
beorn7 2019-10-14 20:41:45 +02:00
parent c2e3855f3b
commit ee1078a03c
5 changed files with 27 additions and 19 deletions

1
go.mod
View File

@ -2,6 +2,7 @@ module github.com/prometheus/client_golang
require ( require (
github.com/beorn7/perks v1.0.1 github.com/beorn7/perks v1.0.1
github.com/cespare/xxhash/v2 v2.1.0
github.com/golang/protobuf v1.3.2 github.com/golang/protobuf v1.3.2
github.com/json-iterator/go v1.1.7 github.com/json-iterator/go v1.1.7
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4 github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4

2
go.sum
View File

@ -8,6 +8,8 @@ github.com/beorn7/perks v1.0.0 h1:HWo1m869IqiPhD389kmkxeTalrjNbbJTC8LXupb+sl0=
github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/cespare/xxhash/v2 v2.1.0 h1:yTUvW7Vhb89inJ+8irsUqiWjh8iT6sQPZiQzI6ReGkA=
github.com/cespare/xxhash/v2 v2.1.0/go.mod h1:dgIUBU3pDso/gPgZ1osOZ0iQf77oPR28Tjxl5dIMyVM=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=

View File

@ -19,6 +19,7 @@ import (
"sort" "sort"
"strings" "strings"
"github.com/cespare/xxhash/v2"
"github.com/golang/protobuf/proto" "github.com/golang/protobuf/proto"
"github.com/prometheus/common/model" "github.com/prometheus/common/model"
@ -126,24 +127,24 @@ func NewDesc(fqName, help string, variableLabels []string, constLabels Labels) *
return d return d
} }
vh := hashNew() xxh := xxhash.New()
for _, val := range labelValues { for _, val := range labelValues {
vh = hashAdd(vh, val) xxh.WriteString(val)
vh = hashAddByte(vh, separatorByte) xxh.Write(separatorByteSlice)
} }
d.id = vh d.id = xxh.Sum64()
// Sort labelNames so that order doesn't matter for the hash. // Sort labelNames so that order doesn't matter for the hash.
sort.Strings(labelNames) sort.Strings(labelNames)
// Now hash together (in this order) the help string and the sorted // Now hash together (in this order) the help string and the sorted
// label names. // label names.
lh := hashNew() xxh.Reset()
lh = hashAdd(lh, help) xxh.WriteString(help)
lh = hashAddByte(lh, separatorByte) xxh.Write(separatorByteSlice)
for _, labelName := range labelNames { for _, labelName := range labelNames {
lh = hashAdd(lh, labelName) xxh.WriteString(labelName)
lh = hashAddByte(lh, separatorByte) xxh.Write(separatorByteSlice)
} }
d.dimHash = lh d.dimHash = xxh.Sum64()
d.constLabelPairs = make([]*dto.LabelPair, 0, len(constLabels)) d.constLabelPairs = make([]*dto.LabelPair, 0, len(constLabels))
for n, v := range constLabels { for n, v := range constLabels {

View File

@ -24,6 +24,8 @@ import (
const separatorByte byte = 255 const separatorByte byte = 255
var separatorByteSlice = []byte{255} // For convenient use with xxhash.
// A Metric models a single sample value with its meta data being exported to // A Metric models a single sample value with its meta data being exported to
// Prometheus. Implementations of Metric in this package are Gauge, Counter, // Prometheus. Implementations of Metric in this package are Gauge, Counter,
// Histogram, Summary, and Untyped. // Histogram, Summary, and Untyped.

View File

@ -25,6 +25,7 @@ import (
"sync" "sync"
"unicode/utf8" "unicode/utf8"
"github.com/cespare/xxhash/v2"
"github.com/golang/protobuf/proto" "github.com/golang/protobuf/proto"
"github.com/prometheus/common/expfmt" "github.com/prometheus/common/expfmt"
@ -875,9 +876,9 @@ func checkMetricConsistency(
} }
// Is the metric unique (i.e. no other metric with the same name and the same labels)? // Is the metric unique (i.e. no other metric with the same name and the same labels)?
h := hashNew() h := xxhash.New()
h = hashAdd(h, name) h.WriteString(name)
h = hashAddByte(h, separatorByte) h.Write(separatorByteSlice)
// Make sure label pairs are sorted. We depend on it for the consistency // Make sure label pairs are sorted. We depend on it for the consistency
// check. // check.
if !sort.IsSorted(labelPairSorter(dtoMetric.Label)) { if !sort.IsSorted(labelPairSorter(dtoMetric.Label)) {
@ -888,18 +889,19 @@ func checkMetricConsistency(
dtoMetric.Label = copiedLabels dtoMetric.Label = copiedLabels
} }
for _, lp := range dtoMetric.Label { for _, lp := range dtoMetric.Label {
h = hashAdd(h, lp.GetName()) h.WriteString(lp.GetName())
h = hashAddByte(h, separatorByte) h.Write(separatorByteSlice)
h = hashAdd(h, lp.GetValue()) h.WriteString(lp.GetValue())
h = hashAddByte(h, separatorByte) h.Write(separatorByteSlice)
} }
if _, exists := metricHashes[h]; exists { hSum := h.Sum64()
if _, exists := metricHashes[hSum]; exists {
return fmt.Errorf( return fmt.Errorf(
"collected metric %q { %s} was collected before with the same name and label values", "collected metric %q { %s} was collected before with the same name and label values",
name, dtoMetric, name, dtoMetric,
) )
} }
metricHashes[h] = struct{}{} metricHashes[hSum] = struct{}{}
return nil return nil
} }