From 0e0e6bff801ec19c27c52bcc1b77c9c700fb67e8 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Tue, 5 May 2015 18:02:21 +0200 Subject: [PATCH] Rework the signature aka fingerprint functions. Move everything over to a more expensive but less collision-prone way. However, keep the old fingerprinting under the name "FastFingerprint" to be used in the storage layer (where we need collision detection anyway). --- model/metric.go | 8 ++++ model/metric_test.go | 25 +++++++---- model/signature.go | 90 ++++++++++++++++++++++++++++----------- model/signature_test.go | 94 +++++++++++++++++++++++++++++++---------- 4 files changed, 161 insertions(+), 56 deletions(-) diff --git a/model/metric.go b/model/metric.go index 32f9d7f..b43ddaf 100644 --- a/model/metric.go +++ b/model/metric.go @@ -28,11 +28,13 @@ type Metric map[LabelName]LabelValue // Equal compares the fingerprints of both metrics. func (m Metric) Equal(o Metric) bool { + // TODO do an actual map comparison return m.Fingerprint().Equal(o.Fingerprint()) } // Before compares the fingerprints of both metrics. func (m Metric) Before(o Metric) bool { + // TODO do an actual map comparison return m.Fingerprint().Less(o.Fingerprint()) } @@ -67,6 +69,12 @@ func (m Metric) Fingerprint() Fingerprint { return metricToFingerprint(m) } +// Fingerprint returns a Metric's Fingerprint calculated by a faster hashing +// algorithm, which is, however, more susceptible to hash collisions. +func (m Metric) FastFingerprint() Fingerprint { + return metricToFastFingerprint(m) +} + // Clone returns a copy of the Metric. func (m Metric) Clone() Metric { clone := Metric{} diff --git a/model/metric_test.go b/model/metric_test.go index d51b184..5dbc023 100644 --- a/model/metric_test.go +++ b/model/metric_test.go @@ -17,12 +17,14 @@ import "testing" func testMetric(t testing.TB) { var scenarios = []struct { - input Metric - fingerprint Fingerprint + input Metric + fingerprint Fingerprint + fastFingerprint Fingerprint }{ { - input: Metric{}, - fingerprint: 14695981039346656037, + input: Metric{}, + fingerprint: 14695981039346656037, + fastFingerprint: 14695981039346656037, }, { input: Metric{ @@ -30,27 +32,31 @@ func testMetric(t testing.TB) { "occupation": "robot", "manufacturer": "westinghouse", }, - fingerprint: 11310079640881077873, + fingerprint: 5911716720268894962, + fastFingerprint: 11310079640881077873, }, { input: Metric{ "x": "y", }, - fingerprint: 13948396922932177635, + fingerprint: 8241431561484471700, + fastFingerprint: 13948396922932177635, }, { input: Metric{ "a": "bb", "b": "c", }, - fingerprint: 3198632812309449502, + fingerprint: 3016285359649981711, + fastFingerprint: 3198632812309449502, }, { input: Metric{ "a": "b", "bb": "c", }, - fingerprint: 5774953389407657638, + fingerprint: 7122421792099404749, + fastFingerprint: 5774953389407657638, }, } @@ -58,6 +64,9 @@ func testMetric(t testing.TB) { if scenario.fingerprint != scenario.input.Fingerprint() { t.Errorf("%d. expected %d, got %d", i, scenario.fingerprint, scenario.input.Fingerprint()) } + if scenario.fastFingerprint != scenario.input.FastFingerprint() { + t.Errorf("%d. expected %d, got %d", i, scenario.fastFingerprint, scenario.input.FastFingerprint()) + } } } diff --git a/model/signature.go b/model/signature.go index cc77b19..3f7abec 100644 --- a/model/signature.go +++ b/model/signature.go @@ -17,6 +17,7 @@ import ( "bytes" "hash" "hash/fnv" + "sort" "sync" ) @@ -46,30 +47,37 @@ func getHashAndBuf() *hashAndBuf { } func putHashAndBuf(hb *hashAndBuf) { + hb.h.Reset() + hb.b.Reset() hashAndBufPool.Put(hb) } -// LabelsToSignature returns a unique signature (i.e., fingerprint) for a given -// label set. +// LabelsToSignature returns an quasi-unique signature (i.e., fingerprint) for a +// given label set. (Collisions are possible but unlikely if the number of label +// sets the function is applied to is small.) func LabelsToSignature(labels map[string]string) uint64 { if len(labels) == 0 { return emptyLabelSignature } - var result uint64 + labelNames := make([]string, 0, len(labels)) + for labelName := range labels { + labelNames = append(labelNames, labelName) + } + sort.Strings(labelNames) + hb := getHashAndBuf() defer putHashAndBuf(hb) - for labelName, labelValue := range labels { + for _, labelName := range labelNames { hb.b.WriteString(labelName) hb.b.WriteByte(SeparatorByte) - hb.b.WriteString(labelValue) + hb.b.WriteString(labels[labelName]) + hb.b.WriteByte(SeparatorByte) hb.h.Write(hb.b.Bytes()) - result ^= hb.h.Sum64() - hb.h.Reset() hb.b.Reset() } - return result + return hb.h.Sum64() } // metricToFingerprint works exactly as LabelsToSignature but takes a Metric as @@ -79,6 +87,34 @@ func metricToFingerprint(m Metric) Fingerprint { return Fingerprint(emptyLabelSignature) } + labelNames := make(LabelNames, 0, len(m)) + for labelName := range m { + labelNames = append(labelNames, labelName) + } + sort.Sort(labelNames) + + hb := getHashAndBuf() + defer putHashAndBuf(hb) + + for _, labelName := range labelNames { + hb.b.WriteString(string(labelName)) + hb.b.WriteByte(SeparatorByte) + hb.b.WriteString(string(m[labelName])) + hb.b.WriteByte(SeparatorByte) + hb.h.Write(hb.b.Bytes()) + hb.b.Reset() + } + return Fingerprint(hb.h.Sum64()) +} + +// metricToFastFingerprint works similar to metricToFingerprint but uses a +// faster and less allocation-heavy hash function, which is more susceptible to +// create hash collisions. Therefore, collision detection should be applied. +func metricToFastFingerprint(m Metric) Fingerprint { + if len(m) == 0 { + return Fingerprint(emptyLabelSignature) + } + var result uint64 hb := getHashAndBuf() defer putHashAndBuf(hb) @@ -97,13 +133,15 @@ func metricToFingerprint(m Metric) Fingerprint { // SignatureForLabels works like LabelsToSignature but takes a Metric as // parameter (rather than a label map) and only includes the labels with the -// specified LabelNames into the signature calculation. +// specified LabelNames into the signature calculation. The labels passed in +// will be sorted by this function. func SignatureForLabels(m Metric, labels LabelNames) uint64 { if len(m) == 0 || len(labels) == 0 { return emptyLabelSignature } - var result uint64 + sort.Sort(labels) + hb := getHashAndBuf() defer putHashAndBuf(hb) @@ -111,12 +149,11 @@ func SignatureForLabels(m Metric, labels LabelNames) uint64 { hb.b.WriteString(string(label)) hb.b.WriteByte(SeparatorByte) hb.b.WriteString(string(m[label])) + hb.b.WriteByte(SeparatorByte) hb.h.Write(hb.b.Bytes()) - result ^= hb.h.Sum64() - hb.h.Reset() hb.b.Reset() } - return result + return hb.h.Sum64() } // SignatureWithoutLabels works like LabelsToSignature but takes a Metric as @@ -127,24 +164,27 @@ func SignatureWithoutLabels(m Metric, labels map[LabelName]struct{}) uint64 { return emptyLabelSignature } - var result uint64 + labelNames := make(LabelNames, 0, len(m)) + for labelName := range m { + if _, exclude := labels[labelName]; !exclude { + labelNames = append(labelNames, labelName) + } + } + if len(labelNames) == 0 { + return emptyLabelSignature + } + sort.Sort(labelNames) + hb := getHashAndBuf() defer putHashAndBuf(hb) - for labelName, labelValue := range m { - if _, exclude := labels[labelName]; exclude { - continue - } + for _, labelName := range labelNames { hb.b.WriteString(string(labelName)) hb.b.WriteByte(SeparatorByte) - hb.b.WriteString(string(labelValue)) + hb.b.WriteString(string(m[labelName])) + hb.b.WriteByte(SeparatorByte) hb.h.Write(hb.b.Bytes()) - result ^= hb.h.Sum64() - hb.h.Reset() hb.b.Reset() } - if result == 0 { - return emptyLabelSignature - } - return result + return hb.h.Sum64() } diff --git a/model/signature_test.go b/model/signature_test.go index 7b3327d..01db531 100644 --- a/model/signature_test.go +++ b/model/signature_test.go @@ -30,7 +30,7 @@ func TestLabelsToSignature(t *testing.T) { }, { in: map[string]string{"name": "garland, briggs", "fear": "love is not enough"}, - out: 12952432476264840823, + out: 5799056148416392346, }, } @@ -54,7 +54,7 @@ func TestMetricToFingerprint(t *testing.T) { }, { in: Metric{"name": "garland, briggs", "fear": "love is not enough"}, - out: 12952432476264840823, + out: 5799056148416392346, }, } @@ -67,6 +67,30 @@ func TestMetricToFingerprint(t *testing.T) { } } +func TestMetricToFastFingerprint(t *testing.T) { + var scenarios = []struct { + in Metric + out Fingerprint + }{ + { + in: Metric{}, + out: 14695981039346656037, + }, + { + in: Metric{"name": "garland, briggs", "fear": "love is not enough"}, + out: 12952432476264840823, + }, + } + + for i, scenario := range scenarios { + actual := metricToFastFingerprint(scenario.in) + + if actual != scenario.out { + t.Errorf("%d. expected %d, got %d", i, scenario.out, actual) + } + } +} + func TestSignatureForLabels(t *testing.T) { var scenarios = []struct { in Metric @@ -81,12 +105,12 @@ func TestSignatureForLabels(t *testing.T) { { in: Metric{"name": "garland, briggs", "fear": "love is not enough"}, labels: LabelNames{"fear", "name"}, - out: 12952432476264840823, + out: 5799056148416392346, }, { in: Metric{"name": "garland, briggs", "fear": "love is not enough", "foo": "bar"}, labels: LabelNames{"fear", "name"}, - out: 12952432476264840823, + out: 5799056148416392346, }, { in: Metric{"name": "garland, briggs", "fear": "love is not enough"}, @@ -128,17 +152,17 @@ func TestSignatureWithoutLabels(t *testing.T) { { in: Metric{"name": "garland, briggs", "fear": "love is not enough", "foo": "bar"}, labels: map[LabelName]struct{}{"foo": struct{}{}}, - out: 12952432476264840823, + out: 5799056148416392346, }, { in: Metric{"name": "garland, briggs", "fear": "love is not enough"}, labels: map[LabelName]struct{}{}, - out: 12952432476264840823, + out: 5799056148416392346, }, { in: Metric{"name": "garland, briggs", "fear": "love is not enough"}, labels: nil, - out: 12952432476264840823, + out: 5799056148416392346, }, } @@ -164,15 +188,15 @@ func BenchmarkLabelToSignatureScalar(b *testing.B) { } func BenchmarkLabelToSignatureSingle(b *testing.B) { - benchmarkLabelToSignature(b, map[string]string{"first-label": "first-label-value"}, 5147259542624943964) + benchmarkLabelToSignature(b, map[string]string{"first-label": "first-label-value"}, 5146282821936882169) } func BenchmarkLabelToSignatureDouble(b *testing.B) { - benchmarkLabelToSignature(b, map[string]string{"first-label": "first-label-value", "second-label": "second-label-value"}, 18269973311206963528) + benchmarkLabelToSignature(b, map[string]string{"first-label": "first-label-value", "second-label": "second-label-value"}, 3195800080984914717) } func BenchmarkLabelToSignatureTriple(b *testing.B) { - benchmarkLabelToSignature(b, map[string]string{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676) + benchmarkLabelToSignature(b, map[string]string{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 13843036195897128121) } func benchmarkMetricToFingerprint(b *testing.B, m Metric, e Fingerprint) { @@ -188,15 +212,39 @@ func BenchmarkMetricToFingerprintScalar(b *testing.B) { } func BenchmarkMetricToFingerprintSingle(b *testing.B) { - benchmarkMetricToFingerprint(b, Metric{"first-label": "first-label-value"}, 5147259542624943964) + benchmarkMetricToFingerprint(b, Metric{"first-label": "first-label-value"}, 5146282821936882169) } func BenchmarkMetricToFingerprintDouble(b *testing.B) { - benchmarkMetricToFingerprint(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value"}, 18269973311206963528) + benchmarkMetricToFingerprint(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value"}, 3195800080984914717) } func BenchmarkMetricToFingerprintTriple(b *testing.B) { - benchmarkMetricToFingerprint(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676) + benchmarkMetricToFingerprint(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 13843036195897128121) +} + +func benchmarkMetricToFastFingerprint(b *testing.B, m Metric, e Fingerprint) { + for i := 0; i < b.N; i++ { + if a := metricToFastFingerprint(m); a != e { + b.Fatalf("expected signature of %d for %s, got %d", e, m, a) + } + } +} + +func BenchmarkMetricToFastFingerprintScalar(b *testing.B) { + benchmarkMetricToFastFingerprint(b, nil, 14695981039346656037) +} + +func BenchmarkMetricToFastFingerprintSingle(b *testing.B) { + benchmarkMetricToFastFingerprint(b, Metric{"first-label": "first-label-value"}, 5147259542624943964) +} + +func BenchmarkMetricToFastFingerprintDouble(b *testing.B) { + benchmarkMetricToFastFingerprint(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value"}, 18269973311206963528) +} + +func BenchmarkMetricToFastFingerprintTriple(b *testing.B) { + benchmarkMetricToFastFingerprint(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676) } func TestEmptyLabelSignature(t *testing.T) { @@ -218,7 +266,7 @@ func TestEmptyLabelSignature(t *testing.T) { } } -func benchmarkMetricToFingerprintConc(b *testing.B, m Metric, e Fingerprint, concLevel int) { +func benchmarkMetricToFastFingerprintConc(b *testing.B, m Metric, e Fingerprint, concLevel int) { var start, end sync.WaitGroup start.Add(1) end.Add(concLevel) @@ -227,7 +275,7 @@ func benchmarkMetricToFingerprintConc(b *testing.B, m Metric, e Fingerprint, con go func() { start.Wait() for j := b.N / concLevel; j >= 0; j-- { - if a := metricToFingerprint(m); a != e { + if a := metricToFastFingerprint(m); a != e { b.Fatalf("expected signature of %d for %s, got %d", e, m, a) } } @@ -239,18 +287,18 @@ func benchmarkMetricToFingerprintConc(b *testing.B, m Metric, e Fingerprint, con end.Wait() } -func BenchmarkMetricToFingerprintTripleConc1(b *testing.B) { - benchmarkMetricToFingerprintConc(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676, 1) +func BenchmarkMetricToFastFingerprintTripleConc1(b *testing.B) { + benchmarkMetricToFastFingerprintConc(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676, 1) } -func BenchmarkMetricToFingerprintTripleConc2(b *testing.B) { - benchmarkMetricToFingerprintConc(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676, 2) +func BenchmarkMetricToFastFingerprintTripleConc2(b *testing.B) { + benchmarkMetricToFastFingerprintConc(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676, 2) } -func BenchmarkMetricToFingerprintTripleConc4(b *testing.B) { - benchmarkMetricToFingerprintConc(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676, 4) +func BenchmarkMetricToFastFingerprintTripleConc4(b *testing.B) { + benchmarkMetricToFastFingerprintConc(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676, 4) } -func BenchmarkMetricToFingerprintTripleConc8(b *testing.B) { - benchmarkMetricToFingerprintConc(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676, 8) +func BenchmarkMetricToFastFingerprintTripleConc8(b *testing.B) { + benchmarkMetricToFastFingerprintConc(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676, 8) }