From 0e0e6bff801ec19c27c52bcc1b77c9c700fb67e8 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Tue, 5 May 2015 18:02:21 +0200 Subject: [PATCH 1/2] Rework the signature aka fingerprint functions. Move everything over to a more expensive but less collision-prone way. However, keep the old fingerprinting under the name "FastFingerprint" to be used in the storage layer (where we need collision detection anyway). --- model/metric.go | 8 ++++ model/metric_test.go | 25 +++++++---- model/signature.go | 90 ++++++++++++++++++++++++++++----------- model/signature_test.go | 94 +++++++++++++++++++++++++++++++---------- 4 files changed, 161 insertions(+), 56 deletions(-) diff --git a/model/metric.go b/model/metric.go index 32f9d7f..b43ddaf 100644 --- a/model/metric.go +++ b/model/metric.go @@ -28,11 +28,13 @@ type Metric map[LabelName]LabelValue // Equal compares the fingerprints of both metrics. func (m Metric) Equal(o Metric) bool { + // TODO do an actual map comparison return m.Fingerprint().Equal(o.Fingerprint()) } // Before compares the fingerprints of both metrics. func (m Metric) Before(o Metric) bool { + // TODO do an actual map comparison return m.Fingerprint().Less(o.Fingerprint()) } @@ -67,6 +69,12 @@ func (m Metric) Fingerprint() Fingerprint { return metricToFingerprint(m) } +// Fingerprint returns a Metric's Fingerprint calculated by a faster hashing +// algorithm, which is, however, more susceptible to hash collisions. +func (m Metric) FastFingerprint() Fingerprint { + return metricToFastFingerprint(m) +} + // Clone returns a copy of the Metric. func (m Metric) Clone() Metric { clone := Metric{} diff --git a/model/metric_test.go b/model/metric_test.go index d51b184..5dbc023 100644 --- a/model/metric_test.go +++ b/model/metric_test.go @@ -17,12 +17,14 @@ import "testing" func testMetric(t testing.TB) { var scenarios = []struct { - input Metric - fingerprint Fingerprint + input Metric + fingerprint Fingerprint + fastFingerprint Fingerprint }{ { - input: Metric{}, - fingerprint: 14695981039346656037, + input: Metric{}, + fingerprint: 14695981039346656037, + fastFingerprint: 14695981039346656037, }, { input: Metric{ @@ -30,27 +32,31 @@ func testMetric(t testing.TB) { "occupation": "robot", "manufacturer": "westinghouse", }, - fingerprint: 11310079640881077873, + fingerprint: 5911716720268894962, + fastFingerprint: 11310079640881077873, }, { input: Metric{ "x": "y", }, - fingerprint: 13948396922932177635, + fingerprint: 8241431561484471700, + fastFingerprint: 13948396922932177635, }, { input: Metric{ "a": "bb", "b": "c", }, - fingerprint: 3198632812309449502, + fingerprint: 3016285359649981711, + fastFingerprint: 3198632812309449502, }, { input: Metric{ "a": "b", "bb": "c", }, - fingerprint: 5774953389407657638, + fingerprint: 7122421792099404749, + fastFingerprint: 5774953389407657638, }, } @@ -58,6 +64,9 @@ func testMetric(t testing.TB) { if scenario.fingerprint != scenario.input.Fingerprint() { t.Errorf("%d. expected %d, got %d", i, scenario.fingerprint, scenario.input.Fingerprint()) } + if scenario.fastFingerprint != scenario.input.FastFingerprint() { + t.Errorf("%d. expected %d, got %d", i, scenario.fastFingerprint, scenario.input.FastFingerprint()) + } } } diff --git a/model/signature.go b/model/signature.go index cc77b19..3f7abec 100644 --- a/model/signature.go +++ b/model/signature.go @@ -17,6 +17,7 @@ import ( "bytes" "hash" "hash/fnv" + "sort" "sync" ) @@ -46,30 +47,37 @@ func getHashAndBuf() *hashAndBuf { } func putHashAndBuf(hb *hashAndBuf) { + hb.h.Reset() + hb.b.Reset() hashAndBufPool.Put(hb) } -// LabelsToSignature returns a unique signature (i.e., fingerprint) for a given -// label set. +// LabelsToSignature returns an quasi-unique signature (i.e., fingerprint) for a +// given label set. (Collisions are possible but unlikely if the number of label +// sets the function is applied to is small.) func LabelsToSignature(labels map[string]string) uint64 { if len(labels) == 0 { return emptyLabelSignature } - var result uint64 + labelNames := make([]string, 0, len(labels)) + for labelName := range labels { + labelNames = append(labelNames, labelName) + } + sort.Strings(labelNames) + hb := getHashAndBuf() defer putHashAndBuf(hb) - for labelName, labelValue := range labels { + for _, labelName := range labelNames { hb.b.WriteString(labelName) hb.b.WriteByte(SeparatorByte) - hb.b.WriteString(labelValue) + hb.b.WriteString(labels[labelName]) + hb.b.WriteByte(SeparatorByte) hb.h.Write(hb.b.Bytes()) - result ^= hb.h.Sum64() - hb.h.Reset() hb.b.Reset() } - return result + return hb.h.Sum64() } // metricToFingerprint works exactly as LabelsToSignature but takes a Metric as @@ -79,6 +87,34 @@ func metricToFingerprint(m Metric) Fingerprint { return Fingerprint(emptyLabelSignature) } + labelNames := make(LabelNames, 0, len(m)) + for labelName := range m { + labelNames = append(labelNames, labelName) + } + sort.Sort(labelNames) + + hb := getHashAndBuf() + defer putHashAndBuf(hb) + + for _, labelName := range labelNames { + hb.b.WriteString(string(labelName)) + hb.b.WriteByte(SeparatorByte) + hb.b.WriteString(string(m[labelName])) + hb.b.WriteByte(SeparatorByte) + hb.h.Write(hb.b.Bytes()) + hb.b.Reset() + } + return Fingerprint(hb.h.Sum64()) +} + +// metricToFastFingerprint works similar to metricToFingerprint but uses a +// faster and less allocation-heavy hash function, which is more susceptible to +// create hash collisions. Therefore, collision detection should be applied. +func metricToFastFingerprint(m Metric) Fingerprint { + if len(m) == 0 { + return Fingerprint(emptyLabelSignature) + } + var result uint64 hb := getHashAndBuf() defer putHashAndBuf(hb) @@ -97,13 +133,15 @@ func metricToFingerprint(m Metric) Fingerprint { // SignatureForLabels works like LabelsToSignature but takes a Metric as // parameter (rather than a label map) and only includes the labels with the -// specified LabelNames into the signature calculation. +// specified LabelNames into the signature calculation. The labels passed in +// will be sorted by this function. func SignatureForLabels(m Metric, labels LabelNames) uint64 { if len(m) == 0 || len(labels) == 0 { return emptyLabelSignature } - var result uint64 + sort.Sort(labels) + hb := getHashAndBuf() defer putHashAndBuf(hb) @@ -111,12 +149,11 @@ func SignatureForLabels(m Metric, labels LabelNames) uint64 { hb.b.WriteString(string(label)) hb.b.WriteByte(SeparatorByte) hb.b.WriteString(string(m[label])) + hb.b.WriteByte(SeparatorByte) hb.h.Write(hb.b.Bytes()) - result ^= hb.h.Sum64() - hb.h.Reset() hb.b.Reset() } - return result + return hb.h.Sum64() } // SignatureWithoutLabels works like LabelsToSignature but takes a Metric as @@ -127,24 +164,27 @@ func SignatureWithoutLabels(m Metric, labels map[LabelName]struct{}) uint64 { return emptyLabelSignature } - var result uint64 + labelNames := make(LabelNames, 0, len(m)) + for labelName := range m { + if _, exclude := labels[labelName]; !exclude { + labelNames = append(labelNames, labelName) + } + } + if len(labelNames) == 0 { + return emptyLabelSignature + } + sort.Sort(labelNames) + hb := getHashAndBuf() defer putHashAndBuf(hb) - for labelName, labelValue := range m { - if _, exclude := labels[labelName]; exclude { - continue - } + for _, labelName := range labelNames { hb.b.WriteString(string(labelName)) hb.b.WriteByte(SeparatorByte) - hb.b.WriteString(string(labelValue)) + hb.b.WriteString(string(m[labelName])) + hb.b.WriteByte(SeparatorByte) hb.h.Write(hb.b.Bytes()) - result ^= hb.h.Sum64() - hb.h.Reset() hb.b.Reset() } - if result == 0 { - return emptyLabelSignature - } - return result + return hb.h.Sum64() } diff --git a/model/signature_test.go b/model/signature_test.go index 7b3327d..01db531 100644 --- a/model/signature_test.go +++ b/model/signature_test.go @@ -30,7 +30,7 @@ func TestLabelsToSignature(t *testing.T) { }, { in: map[string]string{"name": "garland, briggs", "fear": "love is not enough"}, - out: 12952432476264840823, + out: 5799056148416392346, }, } @@ -54,7 +54,7 @@ func TestMetricToFingerprint(t *testing.T) { }, { in: Metric{"name": "garland, briggs", "fear": "love is not enough"}, - out: 12952432476264840823, + out: 5799056148416392346, }, } @@ -67,6 +67,30 @@ func TestMetricToFingerprint(t *testing.T) { } } +func TestMetricToFastFingerprint(t *testing.T) { + var scenarios = []struct { + in Metric + out Fingerprint + }{ + { + in: Metric{}, + out: 14695981039346656037, + }, + { + in: Metric{"name": "garland, briggs", "fear": "love is not enough"}, + out: 12952432476264840823, + }, + } + + for i, scenario := range scenarios { + actual := metricToFastFingerprint(scenario.in) + + if actual != scenario.out { + t.Errorf("%d. expected %d, got %d", i, scenario.out, actual) + } + } +} + func TestSignatureForLabels(t *testing.T) { var scenarios = []struct { in Metric @@ -81,12 +105,12 @@ func TestSignatureForLabels(t *testing.T) { { in: Metric{"name": "garland, briggs", "fear": "love is not enough"}, labels: LabelNames{"fear", "name"}, - out: 12952432476264840823, + out: 5799056148416392346, }, { in: Metric{"name": "garland, briggs", "fear": "love is not enough", "foo": "bar"}, labels: LabelNames{"fear", "name"}, - out: 12952432476264840823, + out: 5799056148416392346, }, { in: Metric{"name": "garland, briggs", "fear": "love is not enough"}, @@ -128,17 +152,17 @@ func TestSignatureWithoutLabels(t *testing.T) { { in: Metric{"name": "garland, briggs", "fear": "love is not enough", "foo": "bar"}, labels: map[LabelName]struct{}{"foo": struct{}{}}, - out: 12952432476264840823, + out: 5799056148416392346, }, { in: Metric{"name": "garland, briggs", "fear": "love is not enough"}, labels: map[LabelName]struct{}{}, - out: 12952432476264840823, + out: 5799056148416392346, }, { in: Metric{"name": "garland, briggs", "fear": "love is not enough"}, labels: nil, - out: 12952432476264840823, + out: 5799056148416392346, }, } @@ -164,15 +188,15 @@ func BenchmarkLabelToSignatureScalar(b *testing.B) { } func BenchmarkLabelToSignatureSingle(b *testing.B) { - benchmarkLabelToSignature(b, map[string]string{"first-label": "first-label-value"}, 5147259542624943964) + benchmarkLabelToSignature(b, map[string]string{"first-label": "first-label-value"}, 5146282821936882169) } func BenchmarkLabelToSignatureDouble(b *testing.B) { - benchmarkLabelToSignature(b, map[string]string{"first-label": "first-label-value", "second-label": "second-label-value"}, 18269973311206963528) + benchmarkLabelToSignature(b, map[string]string{"first-label": "first-label-value", "second-label": "second-label-value"}, 3195800080984914717) } func BenchmarkLabelToSignatureTriple(b *testing.B) { - benchmarkLabelToSignature(b, map[string]string{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676) + benchmarkLabelToSignature(b, map[string]string{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 13843036195897128121) } func benchmarkMetricToFingerprint(b *testing.B, m Metric, e Fingerprint) { @@ -188,15 +212,39 @@ func BenchmarkMetricToFingerprintScalar(b *testing.B) { } func BenchmarkMetricToFingerprintSingle(b *testing.B) { - benchmarkMetricToFingerprint(b, Metric{"first-label": "first-label-value"}, 5147259542624943964) + benchmarkMetricToFingerprint(b, Metric{"first-label": "first-label-value"}, 5146282821936882169) } func BenchmarkMetricToFingerprintDouble(b *testing.B) { - benchmarkMetricToFingerprint(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value"}, 18269973311206963528) + benchmarkMetricToFingerprint(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value"}, 3195800080984914717) } func BenchmarkMetricToFingerprintTriple(b *testing.B) { - benchmarkMetricToFingerprint(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676) + benchmarkMetricToFingerprint(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 13843036195897128121) +} + +func benchmarkMetricToFastFingerprint(b *testing.B, m Metric, e Fingerprint) { + for i := 0; i < b.N; i++ { + if a := metricToFastFingerprint(m); a != e { + b.Fatalf("expected signature of %d for %s, got %d", e, m, a) + } + } +} + +func BenchmarkMetricToFastFingerprintScalar(b *testing.B) { + benchmarkMetricToFastFingerprint(b, nil, 14695981039346656037) +} + +func BenchmarkMetricToFastFingerprintSingle(b *testing.B) { + benchmarkMetricToFastFingerprint(b, Metric{"first-label": "first-label-value"}, 5147259542624943964) +} + +func BenchmarkMetricToFastFingerprintDouble(b *testing.B) { + benchmarkMetricToFastFingerprint(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value"}, 18269973311206963528) +} + +func BenchmarkMetricToFastFingerprintTriple(b *testing.B) { + benchmarkMetricToFastFingerprint(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676) } func TestEmptyLabelSignature(t *testing.T) { @@ -218,7 +266,7 @@ func TestEmptyLabelSignature(t *testing.T) { } } -func benchmarkMetricToFingerprintConc(b *testing.B, m Metric, e Fingerprint, concLevel int) { +func benchmarkMetricToFastFingerprintConc(b *testing.B, m Metric, e Fingerprint, concLevel int) { var start, end sync.WaitGroup start.Add(1) end.Add(concLevel) @@ -227,7 +275,7 @@ func benchmarkMetricToFingerprintConc(b *testing.B, m Metric, e Fingerprint, con go func() { start.Wait() for j := b.N / concLevel; j >= 0; j-- { - if a := metricToFingerprint(m); a != e { + if a := metricToFastFingerprint(m); a != e { b.Fatalf("expected signature of %d for %s, got %d", e, m, a) } } @@ -239,18 +287,18 @@ func benchmarkMetricToFingerprintConc(b *testing.B, m Metric, e Fingerprint, con end.Wait() } -func BenchmarkMetricToFingerprintTripleConc1(b *testing.B) { - benchmarkMetricToFingerprintConc(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676, 1) +func BenchmarkMetricToFastFingerprintTripleConc1(b *testing.B) { + benchmarkMetricToFastFingerprintConc(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676, 1) } -func BenchmarkMetricToFingerprintTripleConc2(b *testing.B) { - benchmarkMetricToFingerprintConc(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676, 2) +func BenchmarkMetricToFastFingerprintTripleConc2(b *testing.B) { + benchmarkMetricToFastFingerprintConc(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676, 2) } -func BenchmarkMetricToFingerprintTripleConc4(b *testing.B) { - benchmarkMetricToFingerprintConc(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676, 4) +func BenchmarkMetricToFastFingerprintTripleConc4(b *testing.B) { + benchmarkMetricToFastFingerprintConc(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676, 4) } -func BenchmarkMetricToFingerprintTripleConc8(b *testing.B) { - benchmarkMetricToFingerprintConc(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676, 8) +func BenchmarkMetricToFastFingerprintTripleConc8(b *testing.B) { + benchmarkMetricToFastFingerprintConc(b, Metric{"first-label": "first-label-value", "second-label": "second-label-value", "third-label": "third-label-value"}, 15738406913934009676, 8) } From 41ecb6c6b2df1e3e2e1dbbe57524445ec015350d Mon Sep 17 00:00:00 2001 From: beorn7 Date: Tue, 5 May 2015 22:16:17 +0200 Subject: [PATCH 2/2] Improve the Metric.Equal and Metric.Before methods. --- model/metric.go | 64 +++++++++++++++++++++++++++++++++++++++----- model/sample_test.go | 60 +++++++++++++++++------------------------ model/signature.go | 2 +- 3 files changed, 83 insertions(+), 43 deletions(-) diff --git a/model/metric.go b/model/metric.go index b43ddaf..0870f23 100644 --- a/model/metric.go +++ b/model/metric.go @@ -26,16 +26,68 @@ var separator = []byte{0} // a singleton and refers to one and only one stream of samples. type Metric map[LabelName]LabelValue -// Equal compares the fingerprints of both metrics. +// Equal compares the metrics. func (m Metric) Equal(o Metric) bool { - // TODO do an actual map comparison - return m.Fingerprint().Equal(o.Fingerprint()) + if len(m) != len(o) { + return false + } + for ln, lv := range m { + olv, ok := o[ln] + if !ok { + return false + } + if olv != lv { + return false + } + } + return true } -// Before compares the fingerprints of both metrics. +// Before compares the metrics, using the following criteria: +// +// If m has fewer labels than o, it is before o. If it has more, it is not. +// +// If the number of labels is the same, the superset of all label names is +// sorted alphanumerically. The first differing label pair found in that order +// determines the outcome: If the label does not exist at all in m, then m is +// before o, and vice versa. Otherwise the label value is compared +// alphanumerically. +// +// If m and o are equal, the method returns false. func (m Metric) Before(o Metric) bool { - // TODO do an actual map comparison - return m.Fingerprint().Less(o.Fingerprint()) + if len(m) < len(o) { + return true + } + if len(m) > len(o) { + return false + } + + lns := make(LabelNames, 0, len(m)+len(o)) + for ln := range m { + lns = append(lns, ln) + } + for ln := range o { + lns = append(lns, ln) + } + // It's probably not worth it to de-dup lns. + sort.Sort(lns) + for _, ln := range lns { + mlv, ok := m[ln] + if !ok { + return true + } + olv, ok := o[ln] + if !ok { + return false + } + if mlv < olv { + return true + } + if mlv > olv { + return false + } + } + return false } // String implements Stringer. diff --git a/model/sample_test.go b/model/sample_test.go index 3dc4ad2..d5e065d 100644 --- a/model/sample_test.go +++ b/model/sample_test.go @@ -21,42 +21,36 @@ import ( func TestSamplesSort(t *testing.T) { input := Samples{ &Sample{ - // Fingerprint: 81f9c9ed24563f8f. Metric: Metric{ MetricNameLabel: "A", }, Timestamp: 1, }, &Sample{ - // Fingerprint: 81f9c9ed24563f8f. Metric: Metric{ MetricNameLabel: "A", }, Timestamp: 2, }, &Sample{ - // Fingerprint: 1bf6c9ed24543f8f. Metric: Metric{ MetricNameLabel: "C", }, Timestamp: 1, }, &Sample{ - // Fingerprint: 1bf6c9ed24543f8f. Metric: Metric{ MetricNameLabel: "C", }, Timestamp: 2, }, &Sample{ - // Fingerprint: 68f4c9ed24533f8f. Metric: Metric{ MetricNameLabel: "B", }, Timestamp: 1, }, &Sample{ - // Fingerprint: 68f4c9ed24533f8f. Metric: Metric{ MetricNameLabel: "B", }, @@ -66,47 +60,41 @@ func TestSamplesSort(t *testing.T) { expected := Samples{ &Sample{ - // Fingerprint: 1bf6c9ed24543f8f. - Metric: Metric{ - MetricNameLabel: "C", - }, - Timestamp: 1, - }, - &Sample{ - // Fingerprint: 1bf6c9ed24543f8f. - Metric: Metric{ - MetricNameLabel: "C", - }, - Timestamp: 2, - }, - &Sample{ - // Fingerprint: 68f4c9ed24533f8f. - Metric: Metric{ - MetricNameLabel: "B", - }, - Timestamp: 1, - }, - &Sample{ - // Fingerprint: 68f4c9ed24533f8f. - Metric: Metric{ - MetricNameLabel: "B", - }, - Timestamp: 2, - }, - &Sample{ - // Fingerprint: 81f9c9ed24563f8f. Metric: Metric{ MetricNameLabel: "A", }, Timestamp: 1, }, &Sample{ - // Fingerprint: 81f9c9ed24563f8f. Metric: Metric{ MetricNameLabel: "A", }, Timestamp: 2, }, + &Sample{ + Metric: Metric{ + MetricNameLabel: "B", + }, + Timestamp: 1, + }, + &Sample{ + Metric: Metric{ + MetricNameLabel: "B", + }, + Timestamp: 2, + }, + &Sample{ + Metric: Metric{ + MetricNameLabel: "C", + }, + Timestamp: 1, + }, + &Sample{ + Metric: Metric{ + MetricNameLabel: "C", + }, + Timestamp: 2, + }, } sort.Sort(input) diff --git a/model/signature.go b/model/signature.go index 3f7abec..7bd58f4 100644 --- a/model/signature.go +++ b/model/signature.go @@ -52,7 +52,7 @@ func putHashAndBuf(hb *hashAndBuf) { hashAndBufPool.Put(hb) } -// LabelsToSignature returns an quasi-unique signature (i.e., fingerprint) for a +// LabelsToSignature returns a quasi-unique signature (i.e., fingerprint) for a // given label set. (Collisions are possible but unlikely if the number of label // sets the function is applied to is small.) func LabelsToSignature(labels map[string]string) uint64 {