Rework the signature aka fingerprint functions.

Move everything over to a more expensive but less collision-prone way.
However, keep the old fingerprinting under the name "FastFingerprint"
to be used in the storage layer (where we need collision detection
anyway).
This commit is contained in:
beorn7 2015-05-05 18:02:21 +02:00
parent 6efaf95d98
commit 0e0e6bff80
4 changed files with 161 additions and 56 deletions

View File

@ -28,11 +28,13 @@ type Metric map[LabelName]LabelValue
// Equal compares the fingerprints of both metrics.
func (m Metric) Equal(o Metric) bool {
// TODO do an actual map comparison
return m.Fingerprint().Equal(o.Fingerprint())
}
// Before compares the fingerprints of both metrics.
func (m Metric) Before(o Metric) bool {
// TODO do an actual map comparison
return m.Fingerprint().Less(o.Fingerprint())
}
@ -67,6 +69,12 @@ func (m Metric) Fingerprint() Fingerprint {
return metricToFingerprint(m)
}
// Fingerprint returns a Metric's Fingerprint calculated by a faster hashing
// algorithm, which is, however, more susceptible to hash collisions.
func (m Metric) FastFingerprint() Fingerprint {
return metricToFastFingerprint(m)
}
// Clone returns a copy of the Metric.
func (m Metric) Clone() Metric {
clone := Metric{}

View File

@ -17,12 +17,14 @@ import "testing"
func testMetric(t testing.TB) {
var scenarios = []struct {
input Metric
fingerprint Fingerprint
input Metric
fingerprint Fingerprint
fastFingerprint Fingerprint
}{
{
input: Metric{},
fingerprint: 14695981039346656037,
input: Metric{},
fingerprint: 14695981039346656037,
fastFingerprint: 14695981039346656037,
},
{
input: Metric{
@ -30,27 +32,31 @@ func testMetric(t testing.TB) {
"occupation": "robot",
"manufacturer": "westinghouse",
},
fingerprint: 11310079640881077873,
fingerprint: 5911716720268894962,
fastFingerprint: 11310079640881077873,
},
{
input: Metric{
"x": "y",
},
fingerprint: 13948396922932177635,
fingerprint: 8241431561484471700,
fastFingerprint: 13948396922932177635,
},
{
input: Metric{
"a": "bb",
"b": "c",
},
fingerprint: 3198632812309449502,
fingerprint: 3016285359649981711,
fastFingerprint: 3198632812309449502,
},
{
input: Metric{
"a": "b",
"bb": "c",
},
fingerprint: 5774953389407657638,
fingerprint: 7122421792099404749,
fastFingerprint: 5774953389407657638,
},
}
@ -58,6 +64,9 @@ func testMetric(t testing.TB) {
if scenario.fingerprint != scenario.input.Fingerprint() {
t.Errorf("%d. expected %d, got %d", i, scenario.fingerprint, scenario.input.Fingerprint())
}
if scenario.fastFingerprint != scenario.input.FastFingerprint() {
t.Errorf("%d. expected %d, got %d", i, scenario.fastFingerprint, scenario.input.FastFingerprint())
}
}
}

View File

@ -17,6 +17,7 @@ import (
"bytes"
"hash"
"hash/fnv"
"sort"
"sync"
)
@ -46,30 +47,37 @@ func getHashAndBuf() *hashAndBuf {
}
func putHashAndBuf(hb *hashAndBuf) {
hb.h.Reset()
hb.b.Reset()
hashAndBufPool.Put(hb)
}
// LabelsToSignature returns a unique signature (i.e., fingerprint) for a given
// label set.
// LabelsToSignature returns an quasi-unique signature (i.e., fingerprint) for a
// given label set. (Collisions are possible but unlikely if the number of label
// sets the function is applied to is small.)
func LabelsToSignature(labels map[string]string) uint64 {
if len(labels) == 0 {
return emptyLabelSignature
}
var result uint64
labelNames := make([]string, 0, len(labels))
for labelName := range labels {
labelNames = append(labelNames, labelName)
}
sort.Strings(labelNames)
hb := getHashAndBuf()
defer putHashAndBuf(hb)
for labelName, labelValue := range labels {
for _, labelName := range labelNames {
hb.b.WriteString(labelName)
hb.b.WriteByte(SeparatorByte)
hb.b.WriteString(labelValue)
hb.b.WriteString(labels[labelName])
hb.b.WriteByte(SeparatorByte)
hb.h.Write(hb.b.Bytes())
result ^= hb.h.Sum64()
hb.h.Reset()
hb.b.Reset()
}
return result
return hb.h.Sum64()
}
// metricToFingerprint works exactly as LabelsToSignature but takes a Metric as
@ -79,6 +87,34 @@ func metricToFingerprint(m Metric) Fingerprint {
return Fingerprint(emptyLabelSignature)
}
labelNames := make(LabelNames, 0, len(m))
for labelName := range m {
labelNames = append(labelNames, labelName)
}
sort.Sort(labelNames)
hb := getHashAndBuf()
defer putHashAndBuf(hb)
for _, labelName := range labelNames {
hb.b.WriteString(string(labelName))
hb.b.WriteByte(SeparatorByte)
hb.b.WriteString(string(m[labelName]))
hb.b.WriteByte(SeparatorByte)
hb.h.Write(hb.b.Bytes())
hb.b.Reset()
}
return Fingerprint(hb.h.Sum64())
}
// metricToFastFingerprint works similar to metricToFingerprint but uses a
// faster and less allocation-heavy hash function, which is more susceptible to
// create hash collisions. Therefore, collision detection should be applied.
func metricToFastFingerprint(m Metric) Fingerprint {
if len(m) == 0 {
return Fingerprint(emptyLabelSignature)
}
var result uint64
hb := getHashAndBuf()
defer putHashAndBuf(hb)
@ -97,13 +133,15 @@ func metricToFingerprint(m Metric) Fingerprint {
// SignatureForLabels works like LabelsToSignature but takes a Metric as
// parameter (rather than a label map) and only includes the labels with the
// specified LabelNames into the signature calculation.
// specified LabelNames into the signature calculation. The labels passed in
// will be sorted by this function.
func SignatureForLabels(m Metric, labels LabelNames) uint64 {
if len(m) == 0 || len(labels) == 0 {
return emptyLabelSignature
}
var result uint64
sort.Sort(labels)
hb := getHashAndBuf()
defer putHashAndBuf(hb)
@ -111,12 +149,11 @@ func SignatureForLabels(m Metric, labels LabelNames) uint64 {
hb.b.WriteString(string(label))
hb.b.WriteByte(SeparatorByte)
hb.b.WriteString(string(m[label]))
hb.b.WriteByte(SeparatorByte)
hb.h.Write(hb.b.Bytes())
result ^= hb.h.Sum64()
hb.h.Reset()
hb.b.Reset()
}
return result
return hb.h.Sum64()
}
// SignatureWithoutLabels works like LabelsToSignature but takes a Metric as
@ -127,24 +164,27 @@ func SignatureWithoutLabels(m Metric, labels map[LabelName]struct{}) uint64 {
return emptyLabelSignature
}
var result uint64
labelNames := make(LabelNames, 0, len(m))
for labelName := range m {
if _, exclude := labels[labelName]; !exclude {
labelNames = append(labelNames, labelName)
}
}
if len(labelNames) == 0 {
return emptyLabelSignature
}
sort.Sort(labelNames)
hb := getHashAndBuf()
defer putHashAndBuf(hb)
for labelName, labelValue := range m {
if _, exclude := labels[labelName]; exclude {
continue
}
for _, labelName := range labelNames {
hb.b.WriteString(string(labelName))
hb.b.WriteByte(SeparatorByte)
hb.b.WriteString(string(labelValue))
hb.b.WriteString(string(m[labelName]))
hb.b.WriteByte(SeparatorByte)
hb.h.Write(hb.b.Bytes())
result ^= hb.h.Sum64()
hb.h.Reset()
hb.b.Reset()
}
if result == 0 {
return emptyLabelSignature
}
return result
return hb.h.Sum64()
}

View File

@ -30,7 +30,7 @@ func TestLabelsToSignature(t *testing.T) {
},
{
in: map[string]string{"name": "garland, briggs", "fear": "love is not enough"},
out: 12952432476264840823,
out: 5799056148416392346,
},
}
@ -54,7 +54,7 @@ func TestMetricToFingerprint(t *testing.T) {
},
{
in: Metric{"name": "garland, briggs", "fear": "love is not enough"},
out: 12952432476264840823,
out: 5799056148416392346,
},
}
@ -67,6 +67,30 @@ func TestMetricToFingerprint(t *testing.T) {
}
}
func TestMetricToFastFingerprint(t *testing.T) {
var scenarios = []struct {
in Metric
out Fingerprint
}{
{
in: Metric{},
out: 14695981039346656037,
},
{
in: Metric{"name": "garland, briggs", "fear": "love is not enough"},
out: 12952432476264840823,
},
}
for i, scenario := range scenarios {
actual := metricToFastFingerprint(scenario.in)
if actual != scenario.out {
t.Errorf("%d. expected %d, got %d", i, scenario.out, actual)
}
}
}
func TestSignatureForLabels(t *testing.T) {
var scenarios = []struct {
in Metric
@ -81,12 +105,12 @@ func TestSignatureForLabels(t *testing.T) {
{
in: Metric{"name": "garland, briggs", "fear": "love is not enough"},
labels: LabelNames{"fear", "name"},
out: 12952432476264840823,
out: 5799056148416392346,
},
{
in: Metric{"name": "garland, briggs", "fear": "love is not enough", "foo": "bar"},
labels: LabelNames{"fear", "name"},
out: 12952432476264840823,
out: 5799056148416392346,
},
{
in: Metric{"name": "garland, briggs", "fear": "love is not enough"},
@ -128,17 +152,17 @@ func TestSignatureWithoutLabels(t *testing.T) {
{
in: Metric{"name": "garland, briggs", "fear": "love is not enough", "foo": "bar"},
labels: map[LabelName]struct{}{"foo": struct{}{}},
out: 12952432476264840823,
out: 5799056148416392346,
},
{
in: Metric{"name": "garland, briggs", "fear": "love is not enough"},
labels: map[LabelName]struct{}{},
out: 12952432476264840823,
out: 5799056148416392346,
},
{
in: Metric{"name": "garland, briggs", "fear": "love is not enough"},
labels: nil,
out: 12952432476264840823,
out: 5799056148416392346,
},
}
@ -164,15 +188,15 @@ func BenchmarkLabelToSignatureScalar(b *testing.B) {
}
func BenchmarkLabelToSignatureSingle(b *testing.B) {
benchmarkLabelToSignature(b, map[string]string{"first-label": "first-label-value"}, 5147259542624943964)
benchmarkLabelToSignature(b, map[string]string{"first-label": "first-label-value"}, 5146282821936882169)
}
func BenchmarkLabelToSignatureDouble(b *testing.B) {
benchmarkLabelToSignature(b, map[string]string{"first-label": "first-label-value", "second-label": "second-label-value"}, 18269973311206963528)
benchmarkLabelToSignature(b, map[string]string{"first-label": "first-label-value", "second-label": "second-label-value"}, 3195800080984914717)
}
func BenchmarkLabelToSignatureTriple(b *testing.B) {
benchmarkLabelToSignature(b, map[string]string{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676)
benchmarkLabelToSignature(b, map[string]string{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 13843036195897128121)
}
func benchmarkMetricToFingerprint(b *testing.B, m Metric, e Fingerprint) {
@ -188,15 +212,39 @@ func BenchmarkMetricToFingerprintScalar(b *testing.B) {
}
func BenchmarkMetricToFingerprintSingle(b *testing.B) {
benchmarkMetricToFingerprint(b, Metric{"first-label": "first-label-value"}, 5147259542624943964)
benchmarkMetricToFingerprint(b, Metric{"first-label": "first-label-value"}, 5146282821936882169)
}
func BenchmarkMetricToFingerprintDouble(b *testing.B) {
benchmarkMetricToFingerprint(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value"}, 18269973311206963528)
benchmarkMetricToFingerprint(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value"}, 3195800080984914717)
}
func BenchmarkMetricToFingerprintTriple(b *testing.B) {
benchmarkMetricToFingerprint(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676)
benchmarkMetricToFingerprint(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 13843036195897128121)
}
func benchmarkMetricToFastFingerprint(b *testing.B, m Metric, e Fingerprint) {
for i := 0; i < b.N; i++ {
if a := metricToFastFingerprint(m); a != e {
b.Fatalf("expected signature of %d for %s, got %d", e, m, a)
}
}
}
func BenchmarkMetricToFastFingerprintScalar(b *testing.B) {
benchmarkMetricToFastFingerprint(b, nil, 14695981039346656037)
}
func BenchmarkMetricToFastFingerprintSingle(b *testing.B) {
benchmarkMetricToFastFingerprint(b, Metric{"first-label": "first-label-value"}, 5147259542624943964)
}
func BenchmarkMetricToFastFingerprintDouble(b *testing.B) {
benchmarkMetricToFastFingerprint(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value"}, 18269973311206963528)
}
func BenchmarkMetricToFastFingerprintTriple(b *testing.B) {
benchmarkMetricToFastFingerprint(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676)
}
func TestEmptyLabelSignature(t *testing.T) {
@ -218,7 +266,7 @@ func TestEmptyLabelSignature(t *testing.T) {
}
}
func benchmarkMetricToFingerprintConc(b *testing.B, m Metric, e Fingerprint, concLevel int) {
func benchmarkMetricToFastFingerprintConc(b *testing.B, m Metric, e Fingerprint, concLevel int) {
var start, end sync.WaitGroup
start.Add(1)
end.Add(concLevel)
@ -227,7 +275,7 @@ func benchmarkMetricToFingerprintConc(b *testing.B, m Metric, e Fingerprint, con
go func() {
start.Wait()
for j := b.N / concLevel; j >= 0; j-- {
if a := metricToFingerprint(m); a != e {
if a := metricToFastFingerprint(m); a != e {
b.Fatalf("expected signature of %d for %s, got %d", e, m, a)
}
}
@ -239,18 +287,18 @@ func benchmarkMetricToFingerprintConc(b *testing.B, m Metric, e Fingerprint, con
end.Wait()
}
func BenchmarkMetricToFingerprintTripleConc1(b *testing.B) {
benchmarkMetricToFingerprintConc(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676, 1)
func BenchmarkMetricToFastFingerprintTripleConc1(b *testing.B) {
benchmarkMetricToFastFingerprintConc(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676, 1)
}
func BenchmarkMetricToFingerprintTripleConc2(b *testing.B) {
benchmarkMetricToFingerprintConc(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676, 2)
func BenchmarkMetricToFastFingerprintTripleConc2(b *testing.B) {
benchmarkMetricToFastFingerprintConc(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676, 2)
}
func BenchmarkMetricToFingerprintTripleConc4(b *testing.B) {
benchmarkMetricToFingerprintConc(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676, 4)
func BenchmarkMetricToFastFingerprintTripleConc4(b *testing.B) {
benchmarkMetricToFastFingerprintConc(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676, 4)
}
func BenchmarkMetricToFingerprintTripleConc8(b *testing.B) {
benchmarkMetricToFingerprintConc(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676, 8)
func BenchmarkMetricToFastFingerprintTripleConc8(b *testing.B) {
benchmarkMetricToFastFingerprintConc(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676, 8)
}