Rework the signature aka fingerprint functions.

Move everything over to a more expensive but less collision-prone way.
However, keep the old fingerprinting under the name "FastFingerprint"
to be used in the storage layer (where we need collision detection
anyway).
This commit is contained in:
beorn7 2015-05-05 18:02:21 +02:00
parent 6efaf95d98
commit 0e0e6bff80
4 changed files with 161 additions and 56 deletions

View File

@ -28,11 +28,13 @@ type Metric map[LabelName]LabelValue
// Equal compares the fingerprints of both metrics. // Equal compares the fingerprints of both metrics.
func (m Metric) Equal(o Metric) bool { func (m Metric) Equal(o Metric) bool {
// TODO do an actual map comparison
return m.Fingerprint().Equal(o.Fingerprint()) return m.Fingerprint().Equal(o.Fingerprint())
} }
// Before compares the fingerprints of both metrics. // Before compares the fingerprints of both metrics.
func (m Metric) Before(o Metric) bool { func (m Metric) Before(o Metric) bool {
// TODO do an actual map comparison
return m.Fingerprint().Less(o.Fingerprint()) return m.Fingerprint().Less(o.Fingerprint())
} }
@ -67,6 +69,12 @@ func (m Metric) Fingerprint() Fingerprint {
return metricToFingerprint(m) return metricToFingerprint(m)
} }
// Fingerprint returns a Metric's Fingerprint calculated by a faster hashing
// algorithm, which is, however, more susceptible to hash collisions.
func (m Metric) FastFingerprint() Fingerprint {
return metricToFastFingerprint(m)
}
// Clone returns a copy of the Metric. // Clone returns a copy of the Metric.
func (m Metric) Clone() Metric { func (m Metric) Clone() Metric {
clone := Metric{} clone := Metric{}

View File

@ -19,10 +19,12 @@ func testMetric(t testing.TB) {
var scenarios = []struct { var scenarios = []struct {
input Metric input Metric
fingerprint Fingerprint fingerprint Fingerprint
fastFingerprint Fingerprint
}{ }{
{ {
input: Metric{}, input: Metric{},
fingerprint: 14695981039346656037, fingerprint: 14695981039346656037,
fastFingerprint: 14695981039346656037,
}, },
{ {
input: Metric{ input: Metric{
@ -30,27 +32,31 @@ func testMetric(t testing.TB) {
"occupation": "robot", "occupation": "robot",
"manufacturer": "westinghouse", "manufacturer": "westinghouse",
}, },
fingerprint: 11310079640881077873, fingerprint: 5911716720268894962,
fastFingerprint: 11310079640881077873,
}, },
{ {
input: Metric{ input: Metric{
"x": "y", "x": "y",
}, },
fingerprint: 13948396922932177635, fingerprint: 8241431561484471700,
fastFingerprint: 13948396922932177635,
}, },
{ {
input: Metric{ input: Metric{
"a": "bb", "a": "bb",
"b": "c", "b": "c",
}, },
fingerprint: 3198632812309449502, fingerprint: 3016285359649981711,
fastFingerprint: 3198632812309449502,
}, },
{ {
input: Metric{ input: Metric{
"a": "b", "a": "b",
"bb": "c", "bb": "c",
}, },
fingerprint: 5774953389407657638, fingerprint: 7122421792099404749,
fastFingerprint: 5774953389407657638,
}, },
} }
@ -58,6 +64,9 @@ func testMetric(t testing.TB) {
if scenario.fingerprint != scenario.input.Fingerprint() { if scenario.fingerprint != scenario.input.Fingerprint() {
t.Errorf("%d. expected %d, got %d", i, scenario.fingerprint, scenario.input.Fingerprint()) t.Errorf("%d. expected %d, got %d", i, scenario.fingerprint, scenario.input.Fingerprint())
} }
if scenario.fastFingerprint != scenario.input.FastFingerprint() {
t.Errorf("%d. expected %d, got %d", i, scenario.fastFingerprint, scenario.input.FastFingerprint())
}
} }
} }

View File

@ -17,6 +17,7 @@ import (
"bytes" "bytes"
"hash" "hash"
"hash/fnv" "hash/fnv"
"sort"
"sync" "sync"
) )
@ -46,30 +47,37 @@ func getHashAndBuf() *hashAndBuf {
} }
func putHashAndBuf(hb *hashAndBuf) { func putHashAndBuf(hb *hashAndBuf) {
hb.h.Reset()
hb.b.Reset()
hashAndBufPool.Put(hb) hashAndBufPool.Put(hb)
} }
// LabelsToSignature returns a unique signature (i.e., fingerprint) for a given // LabelsToSignature returns an quasi-unique signature (i.e., fingerprint) for a
// label set. // given label set. (Collisions are possible but unlikely if the number of label
// sets the function is applied to is small.)
func LabelsToSignature(labels map[string]string) uint64 { func LabelsToSignature(labels map[string]string) uint64 {
if len(labels) == 0 { if len(labels) == 0 {
return emptyLabelSignature return emptyLabelSignature
} }
var result uint64 labelNames := make([]string, 0, len(labels))
for labelName := range labels {
labelNames = append(labelNames, labelName)
}
sort.Strings(labelNames)
hb := getHashAndBuf() hb := getHashAndBuf()
defer putHashAndBuf(hb) defer putHashAndBuf(hb)
for labelName, labelValue := range labels { for _, labelName := range labelNames {
hb.b.WriteString(labelName) hb.b.WriteString(labelName)
hb.b.WriteByte(SeparatorByte) hb.b.WriteByte(SeparatorByte)
hb.b.WriteString(labelValue) hb.b.WriteString(labels[labelName])
hb.b.WriteByte(SeparatorByte)
hb.h.Write(hb.b.Bytes()) hb.h.Write(hb.b.Bytes())
result ^= hb.h.Sum64()
hb.h.Reset()
hb.b.Reset() hb.b.Reset()
} }
return result return hb.h.Sum64()
} }
// metricToFingerprint works exactly as LabelsToSignature but takes a Metric as // metricToFingerprint works exactly as LabelsToSignature but takes a Metric as
@ -79,6 +87,34 @@ func metricToFingerprint(m Metric) Fingerprint {
return Fingerprint(emptyLabelSignature) return Fingerprint(emptyLabelSignature)
} }
labelNames := make(LabelNames, 0, len(m))
for labelName := range m {
labelNames = append(labelNames, labelName)
}
sort.Sort(labelNames)
hb := getHashAndBuf()
defer putHashAndBuf(hb)
for _, labelName := range labelNames {
hb.b.WriteString(string(labelName))
hb.b.WriteByte(SeparatorByte)
hb.b.WriteString(string(m[labelName]))
hb.b.WriteByte(SeparatorByte)
hb.h.Write(hb.b.Bytes())
hb.b.Reset()
}
return Fingerprint(hb.h.Sum64())
}
// metricToFastFingerprint works similar to metricToFingerprint but uses a
// faster and less allocation-heavy hash function, which is more susceptible to
// create hash collisions. Therefore, collision detection should be applied.
func metricToFastFingerprint(m Metric) Fingerprint {
if len(m) == 0 {
return Fingerprint(emptyLabelSignature)
}
var result uint64 var result uint64
hb := getHashAndBuf() hb := getHashAndBuf()
defer putHashAndBuf(hb) defer putHashAndBuf(hb)
@ -97,13 +133,15 @@ func metricToFingerprint(m Metric) Fingerprint {
// SignatureForLabels works like LabelsToSignature but takes a Metric as // SignatureForLabels works like LabelsToSignature but takes a Metric as
// parameter (rather than a label map) and only includes the labels with the // parameter (rather than a label map) and only includes the labels with the
// specified LabelNames into the signature calculation. // specified LabelNames into the signature calculation. The labels passed in
// will be sorted by this function.
func SignatureForLabels(m Metric, labels LabelNames) uint64 { func SignatureForLabels(m Metric, labels LabelNames) uint64 {
if len(m) == 0 || len(labels) == 0 { if len(m) == 0 || len(labels) == 0 {
return emptyLabelSignature return emptyLabelSignature
} }
var result uint64 sort.Sort(labels)
hb := getHashAndBuf() hb := getHashAndBuf()
defer putHashAndBuf(hb) defer putHashAndBuf(hb)
@ -111,12 +149,11 @@ func SignatureForLabels(m Metric, labels LabelNames) uint64 {
hb.b.WriteString(string(label)) hb.b.WriteString(string(label))
hb.b.WriteByte(SeparatorByte) hb.b.WriteByte(SeparatorByte)
hb.b.WriteString(string(m[label])) hb.b.WriteString(string(m[label]))
hb.b.WriteByte(SeparatorByte)
hb.h.Write(hb.b.Bytes()) hb.h.Write(hb.b.Bytes())
result ^= hb.h.Sum64()
hb.h.Reset()
hb.b.Reset() hb.b.Reset()
} }
return result return hb.h.Sum64()
} }
// SignatureWithoutLabels works like LabelsToSignature but takes a Metric as // SignatureWithoutLabels works like LabelsToSignature but takes a Metric as
@ -127,24 +164,27 @@ func SignatureWithoutLabels(m Metric, labels map[LabelName]struct{}) uint64 {
return emptyLabelSignature return emptyLabelSignature
} }
var result uint64 labelNames := make(LabelNames, 0, len(m))
for labelName := range m {
if _, exclude := labels[labelName]; !exclude {
labelNames = append(labelNames, labelName)
}
}
if len(labelNames) == 0 {
return emptyLabelSignature
}
sort.Sort(labelNames)
hb := getHashAndBuf() hb := getHashAndBuf()
defer putHashAndBuf(hb) defer putHashAndBuf(hb)
for labelName, labelValue := range m { for _, labelName := range labelNames {
if _, exclude := labels[labelName]; exclude {
continue
}
hb.b.WriteString(string(labelName)) hb.b.WriteString(string(labelName))
hb.b.WriteByte(SeparatorByte) hb.b.WriteByte(SeparatorByte)
hb.b.WriteString(string(labelValue)) hb.b.WriteString(string(m[labelName]))
hb.b.WriteByte(SeparatorByte)
hb.h.Write(hb.b.Bytes()) hb.h.Write(hb.b.Bytes())
result ^= hb.h.Sum64()
hb.h.Reset()
hb.b.Reset() hb.b.Reset()
} }
if result == 0 { return hb.h.Sum64()
return emptyLabelSignature
}
return result
} }

View File

@ -30,7 +30,7 @@ func TestLabelsToSignature(t *testing.T) {
}, },
{ {
in: map[string]string{"name": "garland, briggs", "fear": "love is not enough"}, in: map[string]string{"name": "garland, briggs", "fear": "love is not enough"},
out: 12952432476264840823, out: 5799056148416392346,
}, },
} }
@ -54,7 +54,7 @@ func TestMetricToFingerprint(t *testing.T) {
}, },
{ {
in: Metric{"name": "garland, briggs", "fear": "love is not enough"}, in: Metric{"name": "garland, briggs", "fear": "love is not enough"},
out: 12952432476264840823, out: 5799056148416392346,
}, },
} }
@ -67,6 +67,30 @@ func TestMetricToFingerprint(t *testing.T) {
} }
} }
func TestMetricToFastFingerprint(t *testing.T) {
var scenarios = []struct {
in Metric
out Fingerprint
}{
{
in: Metric{},
out: 14695981039346656037,
},
{
in: Metric{"name": "garland, briggs", "fear": "love is not enough"},
out: 12952432476264840823,
},
}
for i, scenario := range scenarios {
actual := metricToFastFingerprint(scenario.in)
if actual != scenario.out {
t.Errorf("%d. expected %d, got %d", i, scenario.out, actual)
}
}
}
func TestSignatureForLabels(t *testing.T) { func TestSignatureForLabels(t *testing.T) {
var scenarios = []struct { var scenarios = []struct {
in Metric in Metric
@ -81,12 +105,12 @@ func TestSignatureForLabels(t *testing.T) {
{ {
in: Metric{"name": "garland, briggs", "fear": "love is not enough"}, in: Metric{"name": "garland, briggs", "fear": "love is not enough"},
labels: LabelNames{"fear", "name"}, labels: LabelNames{"fear", "name"},
out: 12952432476264840823, out: 5799056148416392346,
}, },
{ {
in: Metric{"name": "garland, briggs", "fear": "love is not enough", "foo": "bar"}, in: Metric{"name": "garland, briggs", "fear": "love is not enough", "foo": "bar"},
labels: LabelNames{"fear", "name"}, labels: LabelNames{"fear", "name"},
out: 12952432476264840823, out: 5799056148416392346,
}, },
{ {
in: Metric{"name": "garland, briggs", "fear": "love is not enough"}, in: Metric{"name": "garland, briggs", "fear": "love is not enough"},
@ -128,17 +152,17 @@ func TestSignatureWithoutLabels(t *testing.T) {
{ {
in: Metric{"name": "garland, briggs", "fear": "love is not enough", "foo": "bar"}, in: Metric{"name": "garland, briggs", "fear": "love is not enough", "foo": "bar"},
labels: map[LabelName]struct{}{"foo": struct{}{}}, labels: map[LabelName]struct{}{"foo": struct{}{}},
out: 12952432476264840823, out: 5799056148416392346,
}, },
{ {
in: Metric{"name": "garland, briggs", "fear": "love is not enough"}, in: Metric{"name": "garland, briggs", "fear": "love is not enough"},
labels: map[LabelName]struct{}{}, labels: map[LabelName]struct{}{},
out: 12952432476264840823, out: 5799056148416392346,
}, },
{ {
in: Metric{"name": "garland, briggs", "fear": "love is not enough"}, in: Metric{"name": "garland, briggs", "fear": "love is not enough"},
labels: nil, labels: nil,
out: 12952432476264840823, out: 5799056148416392346,
}, },
} }
@ -164,15 +188,15 @@ func BenchmarkLabelToSignatureScalar(b *testing.B) {
} }
func BenchmarkLabelToSignatureSingle(b *testing.B) { func BenchmarkLabelToSignatureSingle(b *testing.B) {
benchmarkLabelToSignature(b, map[string]string{"first-label": "first-label-value"}, 5147259542624943964) benchmarkLabelToSignature(b, map[string]string{"first-label": "first-label-value"}, 5146282821936882169)
} }
func BenchmarkLabelToSignatureDouble(b *testing.B) { func BenchmarkLabelToSignatureDouble(b *testing.B) {
benchmarkLabelToSignature(b, map[string]string{"first-label": "first-label-value", "second-label": "second-label-value"}, 18269973311206963528) benchmarkLabelToSignature(b, map[string]string{"first-label": "first-label-value", "second-label": "second-label-value"}, 3195800080984914717)
} }
func BenchmarkLabelToSignatureTriple(b *testing.B) { func BenchmarkLabelToSignatureTriple(b *testing.B) {
benchmarkLabelToSignature(b, map[string]string{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676) benchmarkLabelToSignature(b, map[string]string{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 13843036195897128121)
} }
func benchmarkMetricToFingerprint(b *testing.B, m Metric, e Fingerprint) { func benchmarkMetricToFingerprint(b *testing.B, m Metric, e Fingerprint) {
@ -188,15 +212,39 @@ func BenchmarkMetricToFingerprintScalar(b *testing.B) {
} }
func BenchmarkMetricToFingerprintSingle(b *testing.B) { func BenchmarkMetricToFingerprintSingle(b *testing.B) {
benchmarkMetricToFingerprint(b, Metric{"first-label": "first-label-value"}, 5147259542624943964) benchmarkMetricToFingerprint(b, Metric{"first-label": "first-label-value"}, 5146282821936882169)
} }
func BenchmarkMetricToFingerprintDouble(b *testing.B) { func BenchmarkMetricToFingerprintDouble(b *testing.B) {
benchmarkMetricToFingerprint(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value"}, 18269973311206963528) benchmarkMetricToFingerprint(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value"}, 3195800080984914717)
} }
func BenchmarkMetricToFingerprintTriple(b *testing.B) { func BenchmarkMetricToFingerprintTriple(b *testing.B) {
benchmarkMetricToFingerprint(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676) benchmarkMetricToFingerprint(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 13843036195897128121)
}
func benchmarkMetricToFastFingerprint(b *testing.B, m Metric, e Fingerprint) {
for i := 0; i < b.N; i++ {
if a := metricToFastFingerprint(m); a != e {
b.Fatalf("expected signature of %d for %s, got %d", e, m, a)
}
}
}
func BenchmarkMetricToFastFingerprintScalar(b *testing.B) {
benchmarkMetricToFastFingerprint(b, nil, 14695981039346656037)
}
func BenchmarkMetricToFastFingerprintSingle(b *testing.B) {
benchmarkMetricToFastFingerprint(b, Metric{"first-label": "first-label-value"}, 5147259542624943964)
}
func BenchmarkMetricToFastFingerprintDouble(b *testing.B) {
benchmarkMetricToFastFingerprint(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value"}, 18269973311206963528)
}
func BenchmarkMetricToFastFingerprintTriple(b *testing.B) {
benchmarkMetricToFastFingerprint(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676)
} }
func TestEmptyLabelSignature(t *testing.T) { func TestEmptyLabelSignature(t *testing.T) {
@ -218,7 +266,7 @@ func TestEmptyLabelSignature(t *testing.T) {
} }
} }
func benchmarkMetricToFingerprintConc(b *testing.B, m Metric, e Fingerprint, concLevel int) { func benchmarkMetricToFastFingerprintConc(b *testing.B, m Metric, e Fingerprint, concLevel int) {
var start, end sync.WaitGroup var start, end sync.WaitGroup
start.Add(1) start.Add(1)
end.Add(concLevel) end.Add(concLevel)
@ -227,7 +275,7 @@ func benchmarkMetricToFingerprintConc(b *testing.B, m Metric, e Fingerprint, con
go func() { go func() {
start.Wait() start.Wait()
for j := b.N / concLevel; j >= 0; j-- { for j := b.N / concLevel; j >= 0; j-- {
if a := metricToFingerprint(m); a != e { if a := metricToFastFingerprint(m); a != e {
b.Fatalf("expected signature of %d for %s, got %d", e, m, a) b.Fatalf("expected signature of %d for %s, got %d", e, m, a)
} }
} }
@ -239,18 +287,18 @@ func benchmarkMetricToFingerprintConc(b *testing.B, m Metric, e Fingerprint, con
end.Wait() end.Wait()
} }
func BenchmarkMetricToFingerprintTripleConc1(b *testing.B) { func BenchmarkMetricToFastFingerprintTripleConc1(b *testing.B) {
benchmarkMetricToFingerprintConc(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676, 1) benchmarkMetricToFastFingerprintConc(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676, 1)
} }
func BenchmarkMetricToFingerprintTripleConc2(b *testing.B) { func BenchmarkMetricToFastFingerprintTripleConc2(b *testing.B) {
benchmarkMetricToFingerprintConc(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676, 2) benchmarkMetricToFastFingerprintConc(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676, 2)
} }
func BenchmarkMetricToFingerprintTripleConc4(b *testing.B) { func BenchmarkMetricToFastFingerprintTripleConc4(b *testing.B) {
benchmarkMetricToFingerprintConc(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676, 4) benchmarkMetricToFastFingerprintConc(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676, 4)
} }
func BenchmarkMetricToFingerprintTripleConc8(b *testing.B) { func BenchmarkMetricToFastFingerprintTripleConc8(b *testing.B) {
benchmarkMetricToFingerprintConc(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676, 8) benchmarkMetricToFastFingerprintConc(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676, 8)
} }