From c4004ef5f67df09f9c230303a70037a6a716f9aa Mon Sep 17 00:00:00 2001
From: Stephen J Day <stephen.day@docker.com>
Date: Wed, 10 Aug 2016 20:03:15 -0700
Subject: [PATCH 1/3] benchmark: measure label resolution in MetricVec

Signed-off-by: Stephen J Day <stephen.day@docker.com>
---
 prometheus/counter.go   | 22 ++++------
 prometheus/gauge.go     | 10 ++---
 prometheus/histogram.go | 10 ++---
 prometheus/summary.go   | 10 ++---
 prometheus/untyped.go   | 10 ++---
 prometheus/vec.go       | 10 +++++
 prometheus/vec_test.go  | 93 ++++++++++++++++++++++++++++++++++-------
 7 files changed, 108 insertions(+), 57 deletions(-)

diff --git a/prometheus/counter.go b/prometheus/counter.go
index 0537f88..7463858 100644
--- a/prometheus/counter.go
+++ b/prometheus/counter.go
@@ -96,19 +96,15 @@ func NewCounterVec(opts CounterOpts, labelNames []string) *CounterVec {
 		opts.ConstLabels,
 	)
 	return &CounterVec{
-		MetricVec: MetricVec{
-			children: map[uint64]Metric{},
-			desc:     desc,
-			newMetric: func(lvs ...string) Metric {
-				result := &counter{value: value{
-					desc:       desc,
-					valType:    CounterValue,
-					labelPairs: makeLabelPairs(desc, lvs),
-				}}
-				result.init(result) // Init self-collection.
-				return result
-			},
-		},
+		MetricVec: newMetricVec(desc, func(lvs ...string) Metric {
+			result := &counter{value: value{
+				desc:       desc,
+				valType:    CounterValue,
+				labelPairs: makeLabelPairs(desc, lvs),
+			}}
+			result.init(result) // Init self-collection.
+			return result
+		}),
 	}
 }
 
diff --git a/prometheus/gauge.go b/prometheus/gauge.go
index 390c074..af3037d 100644
--- a/prometheus/gauge.go
+++ b/prometheus/gauge.go
@@ -72,13 +72,9 @@ func NewGaugeVec(opts GaugeOpts, labelNames []string) *GaugeVec {
 		opts.ConstLabels,
 	)
 	return &GaugeVec{
-		MetricVec: MetricVec{
-			children: map[uint64]Metric{},
-			desc:     desc,
-			newMetric: func(lvs ...string) Metric {
-				return newValue(desc, GaugeValue, 0, lvs...)
-			},
-		},
+		MetricVec: newMetricVec(desc, func(lvs ...string) Metric {
+			return newValue(desc, GaugeValue, 0, lvs...)
+		}),
 	}
 }
 
diff --git a/prometheus/histogram.go b/prometheus/histogram.go
index 11a9083..3d550ba 100644
--- a/prometheus/histogram.go
+++ b/prometheus/histogram.go
@@ -301,13 +301,9 @@ func NewHistogramVec(opts HistogramOpts, labelNames []string) *HistogramVec {
 		opts.ConstLabels,
 	)
 	return &HistogramVec{
-		MetricVec: MetricVec{
-			children: map[uint64]Metric{},
-			desc:     desc,
-			newMetric: func(lvs ...string) Metric {
-				return newHistogram(desc, opts, lvs...)
-			},
-		},
+		MetricVec: newMetricVec(desc, func(lvs ...string) Metric {
+			return newHistogram(desc, opts, lvs...)
+		}),
 	}
 }
 
diff --git a/prometheus/summary.go b/prometheus/summary.go
index fda213c..2d6739e 100644
--- a/prometheus/summary.go
+++ b/prometheus/summary.go
@@ -404,13 +404,9 @@ func NewSummaryVec(opts SummaryOpts, labelNames []string) *SummaryVec {
 		opts.ConstLabels,
 	)
 	return &SummaryVec{
-		MetricVec: MetricVec{
-			children: map[uint64]Metric{},
-			desc:     desc,
-			newMetric: func(lvs ...string) Metric {
-				return newSummary(desc, opts, lvs...)
-			},
-		},
+		MetricVec: newMetricVec(desc, func(lvs ...string) Metric {
+			return newSummary(desc, opts, lvs...)
+		}),
 	}
 }
 
diff --git a/prometheus/untyped.go b/prometheus/untyped.go
index 89b86ea..b1baf07 100644
--- a/prometheus/untyped.go
+++ b/prometheus/untyped.go
@@ -70,13 +70,9 @@ func NewUntypedVec(opts UntypedOpts, labelNames []string) *UntypedVec {
 		opts.ConstLabels,
 	)
 	return &UntypedVec{
-		MetricVec: MetricVec{
-			children: map[uint64]Metric{},
-			desc:     desc,
-			newMetric: func(lvs ...string) Metric {
-				return newValue(desc, UntypedValue, 0, lvs...)
-			},
-		},
+		MetricVec: newMetricVec(desc, func(lvs ...string) Metric {
+			return newValue(desc, UntypedValue, 0, lvs...)
+		}),
 	}
 }
 
diff --git a/prometheus/vec.go b/prometheus/vec.go
index 68f9461..24b1606 100644
--- a/prometheus/vec.go
+++ b/prometheus/vec.go
@@ -31,6 +31,16 @@ type MetricVec struct {
 	newMetric func(labelValues ...string) Metric
 }
 
+// newMetricVec returns an initialized MetricVec. The concrete value is
+// returned for embedding into another struct.
+func newMetricVec(desc *Desc, newMetric func(lvs ...string) Metric) MetricVec {
+	return MetricVec{
+		children:  map[uint64]Metric{},
+		desc:      desc,
+		newMetric: newMetric,
+	}
+}
+
 // Describe implements Collector. The length of the returned slice
 // is always one.
 func (m *MetricVec) Describe(ch chan<- *Desc) {
diff --git a/prometheus/vec_test.go b/prometheus/vec_test.go
index 6760ed8..2986de3 100644
--- a/prometheus/vec_test.go
+++ b/prometheus/vec_test.go
@@ -14,18 +14,12 @@
 package prometheus
 
 import (
+	"fmt"
 	"testing"
 )
 
 func TestDelete(t *testing.T) {
-	desc := NewDesc("test", "helpless", []string{"l1", "l2"}, nil)
-	vec := MetricVec{
-		children: map[uint64]Metric{},
-		desc:     desc,
-		newMetric: func(lvs ...string) Metric {
-			return newValue(desc, UntypedValue, 0, lvs...)
-		},
-	}
+	vec := newUntypedMetricVec("test", "helpless", []string{"l1", "l2"})
 
 	if got, want := vec.Delete(Labels{"l1": "v1", "l2": "v2"}), false; got != want {
 		t.Errorf("got %v, want %v", got, want)
@@ -57,14 +51,7 @@ func TestDelete(t *testing.T) {
 }
 
 func TestDeleteLabelValues(t *testing.T) {
-	desc := NewDesc("test", "helpless", []string{"l1", "l2"}, nil)
-	vec := MetricVec{
-		children: map[uint64]Metric{},
-		desc:     desc,
-		newMetric: func(lvs ...string) Metric {
-			return newValue(desc, UntypedValue, 0, lvs...)
-		},
-	}
+	vec := newUntypedMetricVec("test", "helpless", []string{"l1", "l2"})
 
 	if got, want := vec.DeleteLabelValues("v1", "v2"), false; got != want {
 		t.Errorf("got %v, want %v", got, want)
@@ -86,3 +73,77 @@ func TestDeleteLabelValues(t *testing.T) {
 		t.Errorf("got %v, want %v", got, want)
 	}
 }
+
+func newUntypedMetricVec(name, help string, labels []string) *MetricVec {
+	desc := NewDesc("test", "helpless", labels, nil)
+	vec := newMetricVec(desc, func(lvs ...string) Metric {
+		return newValue(desc, UntypedValue, 0, lvs...)
+	})
+	return &vec
+}
+
+func BenchmarkMetricVecWithLabelValuesBasic(B *testing.B) {
+	benchmarkMetricVecWithLabelValues(B, map[string][]string{
+		"l1": []string{"onevalue"},
+		"l2": []string{"twovalue"},
+	})
+}
+
+func BenchmarkMetricVecWithLabelValues2Keys10ValueCardinality(B *testing.B) {
+	benchmarkMetricVecWithLabelValuesCardinality(B, 2, 10)
+}
+
+func BenchmarkMetricVecWithLabelValues4Keys10ValueCardinality(B *testing.B) {
+	benchmarkMetricVecWithLabelValuesCardinality(B, 4, 10)
+}
+
+func BenchmarkMetricVecWithLabelValues2Keys100ValueCardinality(B *testing.B) {
+	benchmarkMetricVecWithLabelValuesCardinality(B, 2, 100)
+}
+
+func BenchmarkMetricVecWithLabelValues10Keys100ValueCardinality(B *testing.B) {
+	benchmarkMetricVecWithLabelValuesCardinality(B, 10, 100)
+}
+
+func BenchmarkMetricVecWithLabelValues10Keys1000ValueCardinality(B *testing.B) {
+	benchmarkMetricVecWithLabelValuesCardinality(B, 10, 1000)
+}
+
+func benchmarkMetricVecWithLabelValuesCardinality(B *testing.B, nkeys, nvalues int) {
+	labels := map[string][]string{}
+
+	for i := 0; i < nkeys; i++ {
+		var (
+			k  = fmt.Sprintf("key-%v", i)
+			vs = make([]string, 0, nvalues)
+		)
+		for j := 0; j < nvalues; j++ {
+			vs = append(vs, fmt.Sprintf("value-%v", j))
+		}
+		labels[k] = vs
+	}
+
+	benchmarkMetricVecWithLabelValues(B, labels)
+}
+
+func benchmarkMetricVecWithLabelValues(B *testing.B, labels map[string][]string) {
+	var keys []string
+	for k := range labels { // map order dependent, who cares though
+		keys = append(keys, k)
+	}
+
+	values := make([]string, len(labels)) // value cache for permutations
+	vec := newUntypedMetricVec("test", "helpless", keys)
+
+	B.ReportAllocs()
+	B.ResetTimer()
+	for i := 0; i < B.N; i++ {
+		// varies input across provide map entries based on key size.
+		for j, k := range keys {
+			candidates := labels[k]
+			values[j] = candidates[i%len(candidates)]
+		}
+
+		vec.WithLabelValues(values...)
+	}
+}

From 3cf50db5fd088a619266deff7f62410948e45f9c Mon Sep 17 00:00:00 2001
From: Stephen J Day <stephen.day@docker.com>
Date: Thu, 11 Aug 2016 17:06:03 -0700
Subject: [PATCH 2/3] metricvec: handle hash collision for labeled metrics

While hash collisions are quite rare, the current state of the client
library carries a risk of merging two separate label values into a
single metric bucket. The effects are near impossible to detect and the
result will be missing or incorrect counters.

This changeset handles hash collisions by falling back to collision
resolution if multiple label values hash to the same value. This works
similar to separate chaining using a slice. Extra storage is minimized
to only the value key slice to that metrics can be differentiated
within a bucket.

In general, the cost of handling collisions is completely minimized
under normal operation. Performance does show slight increases in
certain areas, but these are more likely statistically anomalies. More
importantly, zero allocation behavior for metrics is preserved on the
fast path. Minimal allocations may be made during collision handling but
this has minimal effect.

Benchmark comparisons with and without collision resolution follow.

```
benchmark                                                         old ns/op     new ns/op     delta
BenchmarkCounterWithLabelValues-4                                 99.0          107           +8.08%
BenchmarkCounterWithLabelValuesConcurrent-4                       79.6          91.0          +14.32%
BenchmarkCounterWithMappedLabels-4                                518           542           +4.63%
BenchmarkCounterWithPreparedMappedLabels-4                        127           137           +7.87%
BenchmarkCounterNoLabels-4                                        19.5          19.1          -2.05%
BenchmarkGaugeWithLabelValues-4                                   97.4          110           +12.94%
BenchmarkGaugeNoLabels-4                                          12.4          10.3          -16.94%
BenchmarkSummaryWithLabelValues-4                                 1204          915           -24.00%
BenchmarkSummaryNoLabels-4                                        936           847           -9.51%
BenchmarkHistogramWithLabelValues-4                               147           147           +0.00%
BenchmarkHistogramNoLabels-4                                      50.6          49.3          -2.57%
BenchmarkHistogramObserve1-4                                      37.9          37.5          -1.06%
BenchmarkHistogramObserve2-4                                      122           137           +12.30%
BenchmarkHistogramObserve4-4                                      310           352           +13.55%
BenchmarkHistogramObserve8-4                                      691           729           +5.50%
BenchmarkHistogramWrite1-4                                        3374          3097          -8.21%
BenchmarkHistogramWrite2-4                                        5310          5051          -4.88%
BenchmarkHistogramWrite4-4                                        12094         10690         -11.61%
BenchmarkHistogramWrite8-4                                        19416         17755         -8.55%
BenchmarkHandler-4                                                11934304      13765894      +15.35%
BenchmarkSummaryObserve1-4                                        1119          1105          -1.25%
BenchmarkSummaryObserve2-4                                        3679          3430          -6.77%
BenchmarkSummaryObserve4-4                                        10678         7982          -25.25%
BenchmarkSummaryObserve8-4                                        22974         16689         -27.36%
BenchmarkSummaryWrite1-4                                          25962         14680         -43.46%
BenchmarkSummaryWrite2-4                                          38019         35073         -7.75%
BenchmarkSummaryWrite4-4                                          78027         56816         -27.18%
BenchmarkSummaryWrite8-4                                          117220        132248        +12.82%
BenchmarkMetricVecWithLabelValuesBasic-4                          138           133           -3.62%
BenchmarkMetricVecWithLabelValues2Keys10ValueCardinality-4        150           144           -4.00%
BenchmarkMetricVecWithLabelValues4Keys10ValueCardinality-4        263           256           -2.66%
BenchmarkMetricVecWithLabelValues2Keys100ValueCardinality-4       145           155           +6.90%
BenchmarkMetricVecWithLabelValues10Keys100ValueCardinality-4      606           634           +4.62%
BenchmarkMetricVecWithLabelValues10Keys1000ValueCardinality-4     746           641           -14.08%

benchmark                                                         old allocs     new allocs     delta
BenchmarkCounterWithLabelValues-4                                 0              0              +0.00%
BenchmarkCounterWithLabelValuesConcurrent-4                       0              0              +0.00%
BenchmarkCounterWithMappedLabels-4                                2              2              +0.00%
BenchmarkCounterWithPreparedMappedLabels-4                        0              0              +0.00%
BenchmarkCounterNoLabels-4                                        0              0              +0.00%
BenchmarkGaugeWithLabelValues-4                                   0              0              +0.00%
BenchmarkGaugeNoLabels-4                                          0              0              +0.00%
BenchmarkSummaryWithLabelValues-4                                 0              0              +0.00%
BenchmarkSummaryNoLabels-4                                        0              0              +0.00%
BenchmarkHistogramWithLabelValues-4                               0              0              +0.00%
BenchmarkHistogramNoLabels-4                                      0              0              +0.00%
BenchmarkMetricVecWithLabelValuesBasic-4                          0              0              +0.00%
BenchmarkMetricVecWithLabelValues2Keys10ValueCardinality-4        0              0              +0.00%
BenchmarkMetricVecWithLabelValues4Keys10ValueCardinality-4        0              0              +0.00%
BenchmarkMetricVecWithLabelValues2Keys100ValueCardinality-4       0              0              +0.00%
BenchmarkMetricVecWithLabelValues10Keys100ValueCardinality-4      0              0              +0.00%
BenchmarkMetricVecWithLabelValues10Keys1000ValueCardinality-4     0              0              +0.00%

benchmark                                                         old bytes     new bytes     delta
BenchmarkCounterWithLabelValues-4                                 0             0             +0.00%
BenchmarkCounterWithLabelValuesConcurrent-4                       0             0             +0.00%
BenchmarkCounterWithMappedLabels-4                                336           336           +0.00%
BenchmarkCounterWithPreparedMappedLabels-4                        0             0             +0.00%
BenchmarkCounterNoLabels-4                                        0             0             +0.00%
BenchmarkGaugeWithLabelValues-4                                   0             0             +0.00%
BenchmarkGaugeNoLabels-4                                          0             0             +0.00%
BenchmarkSummaryWithLabelValues-4                                 0             0             +0.00%
BenchmarkSummaryNoLabels-4                                        0             0             +0.00%
BenchmarkHistogramWithLabelValues-4                               0             0             +0.00%
BenchmarkHistogramNoLabels-4                                      0             0             +0.00%
BenchmarkMetricVecWithLabelValuesBasic-4                          0             0             +0.00%
BenchmarkMetricVecWithLabelValues2Keys10ValueCardinality-4        0             0             +0.00%
BenchmarkMetricVecWithLabelValues4Keys10ValueCardinality-4        0             0             +0.00%
BenchmarkMetricVecWithLabelValues2Keys100ValueCardinality-4       0             0             +0.00%
BenchmarkMetricVecWithLabelValues10Keys100ValueCardinality-4      0             0             +0.00%
BenchmarkMetricVecWithLabelValues10Keys1000ValueCardinality-4     0             0             +0.00%
```

Signed-off-by: Stephen J Day <stephen.day@docker.com>
---
 prometheus/vec.go      | 125 ++++++++++++++++++++++++++++++++++-------
 prometheus/vec_test.go |   9 +++
 2 files changed, 114 insertions(+), 20 deletions(-)

diff --git a/prometheus/vec.go b/prometheus/vec.go
index 24b1606..02a7455 100644
--- a/prometheus/vec.go
+++ b/prometheus/vec.go
@@ -25,22 +25,29 @@ import (
 // provided in this package.
 type MetricVec struct {
 	mtx      sync.RWMutex // Protects the children.
-	children map[uint64]Metric
+	children map[uint64][]metricLabelValue
 	desc     *Desc
 
 	newMetric func(labelValues ...string) Metric
+	hashAdd   func(h uint64, s string) uint64 // replace hash function for testing collision handling
 }
 
 // newMetricVec returns an initialized MetricVec. The concrete value is
 // returned for embedding into another struct.
 func newMetricVec(desc *Desc, newMetric func(lvs ...string) Metric) MetricVec {
 	return MetricVec{
-		children:  map[uint64]Metric{},
+		children:  map[uint64][]metricLabelValue{},
 		desc:      desc,
 		newMetric: newMetric,
+		hashAdd:   hashAdd,
 	}
 }
 
+type metricLabelValue struct {
+	values []string
+	metric Metric
+}
+
 // Describe implements Collector. The length of the returned slice
 // is always one.
 func (m *MetricVec) Describe(ch chan<- *Desc) {
@@ -52,8 +59,10 @@ func (m *MetricVec) Collect(ch chan<- Metric) {
 	m.mtx.RLock()
 	defer m.mtx.RUnlock()
 
-	for _, metric := range m.children {
-		ch <- metric
+	for _, metrics := range m.children {
+		for _, metric := range metrics {
+			ch <- metric.metric
+		}
 	}
 }
 
@@ -88,7 +97,7 @@ func (m *MetricVec) GetMetricWithLabelValues(lvs ...string) (Metric, error) {
 	}
 
 	m.mtx.RLock()
-	metric, ok := m.children[h]
+	metric, ok := m.getMetric(h, lvs...)
 	m.mtx.RUnlock()
 	if ok {
 		return metric, nil
@@ -118,11 +127,12 @@ func (m *MetricVec) GetMetricWith(labels Labels) (Metric, error) {
 	}
 
 	m.mtx.RLock()
-	metric, ok := m.children[h]
-	m.mtx.RUnlock()
-	if ok {
-		return metric, nil
+	metrics, ok := m.children[h]
+	if ok && len(metrics) == 1 {
+		m.mtx.RUnlock()
+		return metrics[0].metric, nil
 	}
+	m.mtx.RUnlock()
 
 	lvs := make([]string, len(labels))
 	for i, label := range m.desc.variableLabels {
@@ -178,11 +188,7 @@ func (m *MetricVec) DeleteLabelValues(lvs ...string) bool {
 	if err != nil {
 		return false
 	}
-	if _, ok := m.children[h]; !ok {
-		return false
-	}
-	delete(m.children, h)
-	return true
+	return m.deleteByHash(h, lvs...)
 }
 
 // Delete deletes the metric where the variable labels are the same as those
@@ -203,10 +209,38 @@ func (m *MetricVec) Delete(labels Labels) bool {
 	if err != nil {
 		return false
 	}
-	if _, ok := m.children[h]; !ok {
+
+	var lvs []string
+	for _, k := range m.desc.variableLabels {
+		lvs = append(lvs, labels[k])
+	}
+
+	return m.deleteByHash(h, lvs...)
+}
+
+// deleteByHash removes the metric from the hash bucket h. If there are
+// multiple matches in the bucket, use lvs to select a metric and remove only
+// that metric.
+func (m *MetricVec) deleteByHash(h uint64, lvs ...string) bool {
+	metrics, ok := m.children[h]
+	if !ok {
 		return false
 	}
-	delete(m.children, h)
+
+	if len(metrics) < 2 {
+		delete(m.children, h)
+	}
+
+	i := findMetric(lvs, metrics)
+	if i >= len(metrics) {
+		return false
+	}
+
+	if len(metrics) > 1 {
+		m.children[h] = append(metrics[:i], metrics[i+1:]...)
+	} else {
+		delete(m.children, h)
+	}
 	return true
 }
 
@@ -226,7 +260,7 @@ func (m *MetricVec) hashLabelValues(vals []string) (uint64, error) {
 	}
 	h := hashNew()
 	for _, val := range vals {
-		h = hashAdd(h, val)
+		h = m.hashAdd(h, val)
 	}
 	return h, nil
 }
@@ -241,19 +275,70 @@ func (m *MetricVec) hashLabels(labels Labels) (uint64, error) {
 		if !ok {
 			return 0, fmt.Errorf("label name %q missing in label map", label)
 		}
-		h = hashAdd(h, val)
+		h = m.hashAdd(h, val)
 	}
 	return h, nil
 }
 
 func (m *MetricVec) getOrCreateMetric(hash uint64, labelValues ...string) Metric {
-	metric, ok := m.children[hash]
+	metric, ok := m.getMetric(hash, labelValues...)
 	if !ok {
 		// Copy labelValues. Otherwise, they would be allocated even if we don't go
 		// down this code path.
 		copiedLabelValues := append(make([]string, 0, len(labelValues)), labelValues...)
 		metric = m.newMetric(copiedLabelValues...)
-		m.children[hash] = metric
+		m.children[hash] = append(m.children[hash], metricLabelValue{values: copiedLabelValues, metric: metric})
 	}
 	return metric
 }
+
+// getMetric while handling possible collisions in the hash space. Must be
+// called while holding read mutex.
+func (m *MetricVec) getMetric(h uint64, lvs ...string) (Metric, bool) {
+	metrics, ok := m.children[h]
+	if ok {
+		return m.selectMetric(lvs, metrics)
+	}
+
+	return nil, false
+}
+
+func (m *MetricVec) selectMetric(lvs []string, metrics []metricLabelValue) (Metric, bool) {
+	switch len(metrics) {
+	case 0:
+		return nil, false
+	case 1:
+		// collisions are rare, this should be the fast path.
+		return metrics[0].metric, true
+	}
+
+	i := findMetric(lvs, metrics)
+
+	if i < len(metrics) {
+		return metrics[i].metric, true
+	}
+
+	return nil, false
+}
+
+// findMetric returns the index of the matching metric or len(metrics) if not
+// found.
+func findMetric(lvs []string, metrics []metricLabelValue) int {
+next:
+	for i, metric := range metrics {
+		if len(metric.values) != len(lvs) {
+			continue
+		}
+
+		for j, v := range metric.values {
+			if v != lvs[j] {
+				continue next
+			}
+		}
+		// falling out of the loop here means we have a match!
+
+		return i
+	}
+
+	return len(metrics)
+}
diff --git a/prometheus/vec_test.go b/prometheus/vec_test.go
index 2986de3..674abb9 100644
--- a/prometheus/vec_test.go
+++ b/prometheus/vec_test.go
@@ -52,7 +52,16 @@ func TestDelete(t *testing.T) {
 
 func TestDeleteLabelValues(t *testing.T) {
 	vec := newUntypedMetricVec("test", "helpless", []string{"l1", "l2"})
+	testDeleteLabelValues(t, vec)
+}
 
+func TestDeleteLabelValuesWithCollisions(t *testing.T) {
+	vec := newUntypedMetricVec("test", "helpless", []string{"l1", "l2"})
+	vec.hashAdd = func(h uint64, s string) uint64 { return 1 }
+	testDeleteLabelValues(t, vec)
+}
+
+func testDeleteLabelValues(t *testing.T, vec *MetricVec) {
 	if got, want := vec.DeleteLabelValues("v1", "v2"), false; got != want {
 		t.Errorf("got %v, want %v", got, want)
 	}

From 4db77b04a8bc8b6675556a53bed22fad14d05db5 Mon Sep 17 00:00:00 2001
From: Stephen J Day <stephen.day@docker.com>
Date: Fri, 12 Aug 2016 16:20:05 -0700
Subject: [PATCH 3/3] metricvec: refactor collision handling to handle equality

After increasing unit test coverage, it was found that the split
function call nature of metric matching wasn't working well in many
cases. By increasing test coverage, we've ensured that both the fast
path and fallback collision path are working appropriately.

With these changes, there is a further performance hit, but now the
results are ensured to be correct.

Signed-off-by: Stephen J Day <stephen.day@docker.com>
---
 prometheus/vec.go      | 188 ++++++++++++++++++++++++-----------------
 prometheus/vec_test.go |  86 +++++++++++++++++++
 2 files changed, 196 insertions(+), 78 deletions(-)

diff --git a/prometheus/vec.go b/prometheus/vec.go
index 02a7455..ec847bd 100644
--- a/prometheus/vec.go
+++ b/prometheus/vec.go
@@ -16,6 +16,8 @@ package prometheus
 import (
 	"fmt"
 	"sync"
+
+	"github.com/prometheus/common/model"
 )
 
 // MetricVec is a Collector to bundle metrics of the same name that
@@ -25,25 +27,29 @@ import (
 // provided in this package.
 type MetricVec struct {
 	mtx      sync.RWMutex // Protects the children.
-	children map[uint64][]metricLabelValue
+	children map[uint64][]metricWithLabelValues
 	desc     *Desc
 
-	newMetric func(labelValues ...string) Metric
-	hashAdd   func(h uint64, s string) uint64 // replace hash function for testing collision handling
+	newMetric   func(labelValues ...string) Metric
+	hashAdd     func(h uint64, s string) uint64 // replace hash function for testing collision handling
+	hashAddByte func(h uint64, b byte) uint64
 }
 
 // newMetricVec returns an initialized MetricVec. The concrete value is
 // returned for embedding into another struct.
 func newMetricVec(desc *Desc, newMetric func(lvs ...string) Metric) MetricVec {
 	return MetricVec{
-		children:  map[uint64][]metricLabelValue{},
-		desc:      desc,
-		newMetric: newMetric,
-		hashAdd:   hashAdd,
+		children:    map[uint64][]metricWithLabelValues{},
+		desc:        desc,
+		newMetric:   newMetric,
+		hashAdd:     hashAdd,
+		hashAddByte: hashAddByte,
 	}
 }
 
-type metricLabelValue struct {
+// metricWithLabelValues provides the metric and its label values for
+// disambiguation on hash collision.
+type metricWithLabelValues struct {
 	values []string
 	metric Metric
 }
@@ -96,16 +102,7 @@ func (m *MetricVec) GetMetricWithLabelValues(lvs ...string) (Metric, error) {
 		return nil, err
 	}
 
-	m.mtx.RLock()
-	metric, ok := m.getMetric(h, lvs...)
-	m.mtx.RUnlock()
-	if ok {
-		return metric, nil
-	}
-
-	m.mtx.Lock()
-	defer m.mtx.Unlock()
-	return m.getOrCreateMetric(h, lvs...), nil
+	return m.getOrCreateMetric(h, lvs), nil
 }
 
 // GetMetricWith returns the Metric for the given Labels map (the label names
@@ -126,21 +123,7 @@ func (m *MetricVec) GetMetricWith(labels Labels) (Metric, error) {
 		return nil, err
 	}
 
-	m.mtx.RLock()
-	metrics, ok := m.children[h]
-	if ok && len(metrics) == 1 {
-		m.mtx.RUnlock()
-		return metrics[0].metric, nil
-	}
-	m.mtx.RUnlock()
-
-	lvs := make([]string, len(labels))
-	for i, label := range m.desc.variableLabels {
-		lvs[i] = labels[label]
-	}
-	m.mtx.Lock()
-	defer m.mtx.Unlock()
-	return m.getOrCreateMetric(h, lvs...), nil
+	return m.getOrCreateMetric(h, labels), nil
 }
 
 // WithLabelValues works as GetMetricWithLabelValues, but panics if an error
@@ -188,7 +171,7 @@ func (m *MetricVec) DeleteLabelValues(lvs ...string) bool {
 	if err != nil {
 		return false
 	}
-	return m.deleteByHash(h, lvs...)
+	return m.deleteByHash(h, lvs)
 }
 
 // Delete deletes the metric where the variable labels are the same as those
@@ -210,28 +193,21 @@ func (m *MetricVec) Delete(labels Labels) bool {
 		return false
 	}
 
-	var lvs []string
-	for _, k := range m.desc.variableLabels {
-		lvs = append(lvs, labels[k])
-	}
-
-	return m.deleteByHash(h, lvs...)
+	return m.deleteByHash(h, labels)
 }
 
 // deleteByHash removes the metric from the hash bucket h. If there are
 // multiple matches in the bucket, use lvs to select a metric and remove only
 // that metric.
-func (m *MetricVec) deleteByHash(h uint64, lvs ...string) bool {
+//
+// lvs MUST be of type Labels or []string or this method will panic.
+func (m *MetricVec) deleteByHash(h uint64, values interface{}) bool {
 	metrics, ok := m.children[h]
 	if !ok {
 		return false
 	}
 
-	if len(metrics) < 2 {
-		delete(m.children, h)
-	}
-
-	i := findMetric(lvs, metrics)
+	i := m.findMetric(metrics, values)
 	if i >= len(metrics) {
 		return false
 	}
@@ -261,6 +237,7 @@ func (m *MetricVec) hashLabelValues(vals []string) (uint64, error) {
 	h := hashNew()
 	for _, val := range vals {
 		h = m.hashAdd(h, val)
+		h = m.hashAddByte(h, model.SeparatorByte)
 	}
 	return h, nil
 }
@@ -276,43 +253,51 @@ func (m *MetricVec) hashLabels(labels Labels) (uint64, error) {
 			return 0, fmt.Errorf("label name %q missing in label map", label)
 		}
 		h = m.hashAdd(h, val)
+		h = m.hashAddByte(h, model.SeparatorByte)
 	}
 	return h, nil
 }
 
-func (m *MetricVec) getOrCreateMetric(hash uint64, labelValues ...string) Metric {
-	metric, ok := m.getMetric(hash, labelValues...)
+// getOrCreateMetric retrieves the metric by hash and label value or creates it
+// and returns the new one.
+//
+// lvs MUST be of type Labels or []string or this method will panic.
+//
+// This function holds the mutex.
+func (m *MetricVec) getOrCreateMetric(hash uint64, lvs interface{}) Metric {
+	m.mtx.RLock()
+	metric, ok := m.getMetric(hash, lvs)
+	m.mtx.RUnlock()
+	if ok {
+		return metric
+	}
+
+	m.mtx.Lock()
+	defer m.mtx.Unlock()
+	metric, ok = m.getMetric(hash, lvs)
 	if !ok {
-		// Copy labelValues. Otherwise, they would be allocated even if we don't go
-		// down this code path.
-		copiedLabelValues := append(make([]string, 0, len(labelValues)), labelValues...)
-		metric = m.newMetric(copiedLabelValues...)
-		m.children[hash] = append(m.children[hash], metricLabelValue{values: copiedLabelValues, metric: metric})
+		lvs := m.copyLabelValues(lvs)
+		metric = m.newMetric(lvs...)
+		m.children[hash] = append(m.children[hash], metricWithLabelValues{values: lvs, metric: metric})
 	}
 	return metric
 }
 
 // getMetric while handling possible collisions in the hash space. Must be
 // called while holding read mutex.
-func (m *MetricVec) getMetric(h uint64, lvs ...string) (Metric, bool) {
+//
+// lvs must be of type Labels or []string.
+func (m *MetricVec) getMetric(h uint64, lvs interface{}) (Metric, bool) {
 	metrics, ok := m.children[h]
 	if ok {
-		return m.selectMetric(lvs, metrics)
+		return m.selectMetric(metrics, lvs)
 	}
 
 	return nil, false
 }
 
-func (m *MetricVec) selectMetric(lvs []string, metrics []metricLabelValue) (Metric, bool) {
-	switch len(metrics) {
-	case 0:
-		return nil, false
-	case 1:
-		// collisions are rare, this should be the fast path.
-		return metrics[0].metric, true
-	}
-
-	i := findMetric(lvs, metrics)
+func (m *MetricVec) selectMetric(metrics []metricWithLabelValues, lvs interface{}) (Metric, bool) {
+	i := m.findMetric(metrics, lvs)
 
 	if i < len(metrics) {
 		return metrics[i].metric, true
@@ -323,22 +308,69 @@ func (m *MetricVec) selectMetric(lvs []string, metrics []metricLabelValue) (Metr
 
 // findMetric returns the index of the matching metric or len(metrics) if not
 // found.
-func findMetric(lvs []string, metrics []metricLabelValue) int {
-next:
+func (m *MetricVec) findMetric(metrics []metricWithLabelValues, lvs interface{}) int {
 	for i, metric := range metrics {
-		if len(metric.values) != len(lvs) {
-			continue
+		if m.matchLabels(metric.values, lvs) {
+			return i
 		}
-
-		for j, v := range metric.values {
-			if v != lvs[j] {
-				continue next
-			}
-		}
-		// falling out of the loop here means we have a match!
-
-		return i
 	}
 
 	return len(metrics)
 }
+
+func (m *MetricVec) matchLabels(values []string, lvs interface{}) bool {
+	switch lvs := lvs.(type) {
+	case []string:
+		if len(values) != len(lvs) {
+			return false
+		}
+
+		for i, v := range values {
+			if v != lvs[i] {
+				return false
+			}
+		}
+
+		return true
+	case Labels:
+		if len(lvs) != len(values) {
+			return false
+		}
+
+		for i, k := range m.desc.variableLabels {
+			if values[i] != lvs[k] {
+				return false
+			}
+		}
+
+		return true
+	default:
+		// If we reach this condition, there is an unexpected type being used
+		// as a labels value. Either add branch here for the new type or fix
+		// the bug causing the type to be passed in.
+		panic("unsupported type")
+	}
+}
+
+// copyLabelValues copies the labels values into common string slice format to
+// use when allocating the metric and to keep track of hash collision
+// ambiguity.
+//
+// lvs must be of type Labels or []string or this method will panic.
+func (m *MetricVec) copyLabelValues(lvs interface{}) []string {
+	var labelValues []string
+	switch lvs := lvs.(type) {
+	case []string:
+		labelValues = make([]string, len(lvs))
+		copy(labelValues, lvs)
+	case Labels:
+		labelValues = make([]string, len(lvs))
+		for i, k := range m.desc.variableLabels {
+			labelValues[i] = lvs[k]
+		}
+	default:
+		panic(fmt.Sprintf("unsupported type for lvs: %#v", lvs))
+	}
+
+	return labelValues
+}
diff --git a/prometheus/vec_test.go b/prometheus/vec_test.go
index 674abb9..fb26b67 100644
--- a/prometheus/vec_test.go
+++ b/prometheus/vec_test.go
@@ -16,11 +16,23 @@ package prometheus
 import (
 	"fmt"
 	"testing"
+
+	dto "github.com/prometheus/client_model/go"
 )
 
 func TestDelete(t *testing.T) {
 	vec := newUntypedMetricVec("test", "helpless", []string{"l1", "l2"})
+	testDelete(t, vec)
+}
 
+func TestDeleteWithCollisions(t *testing.T) {
+	vec := newUntypedMetricVec("test", "helpless", []string{"l1", "l2"})
+	vec.hashAdd = func(h uint64, s string) uint64 { return 1 }
+	vec.hashAddByte = func(h uint64, b byte) uint64 { return 1 }
+	testDelete(t, vec)
+}
+
+func testDelete(t *testing.T, vec *MetricVec) {
 	if got, want := vec.Delete(Labels{"l1": "v1", "l2": "v2"}), false; got != want {
 		t.Errorf("got %v, want %v", got, want)
 	}
@@ -58,6 +70,7 @@ func TestDeleteLabelValues(t *testing.T) {
 func TestDeleteLabelValuesWithCollisions(t *testing.T) {
 	vec := newUntypedMetricVec("test", "helpless", []string{"l1", "l2"})
 	vec.hashAdd = func(h uint64, s string) uint64 { return 1 }
+	vec.hashAddByte = func(h uint64, b byte) uint64 { return 1 }
 	testDeleteLabelValues(t, vec)
 }
 
@@ -67,14 +80,19 @@ func testDeleteLabelValues(t *testing.T, vec *MetricVec) {
 	}
 
 	vec.With(Labels{"l1": "v1", "l2": "v2"}).(Untyped).Set(42)
+	vec.With(Labels{"l1": "v1", "l2": "v3"}).(Untyped).Set(42) // add junk data for collision
 	if got, want := vec.DeleteLabelValues("v1", "v2"), true; got != want {
 		t.Errorf("got %v, want %v", got, want)
 	}
 	if got, want := vec.DeleteLabelValues("v1", "v2"), false; got != want {
 		t.Errorf("got %v, want %v", got, want)
 	}
+	if got, want := vec.DeleteLabelValues("v1", "v3"), true; got != want {
+		t.Errorf("got %v, want %v", got, want)
+	}
 
 	vec.With(Labels{"l1": "v1", "l2": "v2"}).(Untyped).Set(42)
+	// delete out of order
 	if got, want := vec.DeleteLabelValues("v2", "v1"), false; got != want {
 		t.Errorf("got %v, want %v", got, want)
 	}
@@ -83,6 +101,74 @@ func testDeleteLabelValues(t *testing.T, vec *MetricVec) {
 	}
 }
 
+func TestMetricVec(t *testing.T) {
+	vec := newUntypedMetricVec("test", "helpless", []string{"l1", "l2"})
+	testMetricVec(t, vec)
+}
+
+func TestMetricVecWithCollisions(t *testing.T) {
+	vec := newUntypedMetricVec("test", "helpless", []string{"l1", "l2"})
+	vec.hashAdd = func(h uint64, s string) uint64 { return 1 }
+	vec.hashAddByte = func(h uint64, b byte) uint64 { return 1 }
+	testMetricVec(t, vec)
+}
+
+func testMetricVec(t *testing.T, vec *MetricVec) {
+	vec.Reset() // actually test Reset now!
+
+	var pair [2]string
+	// keep track of metrics
+	expected := map[[2]string]int{}
+
+	for i := 0; i < 1000; i++ {
+		pair[0], pair[1] = fmt.Sprint(i%4), fmt.Sprint(i%5) // varying combinations multiples
+		expected[pair]++
+		vec.WithLabelValues(pair[0], pair[1]).(Untyped).Inc()
+
+		expected[[2]string{"v1", "v2"}]++
+		vec.WithLabelValues("v1", "v2").(Untyped).Inc()
+	}
+
+	var total int
+	for _, metrics := range vec.children {
+		for _, metric := range metrics {
+			total++
+			copy(pair[:], metric.values)
+
+			// is there a better way to access the value of a metric?
+			var metricOut dto.Metric
+			metric.metric.Write(&metricOut)
+			actual := *metricOut.Untyped.Value
+
+			var actualPair [2]string
+			for i, label := range metricOut.Label {
+				actualPair[i] = *label.Value
+			}
+
+			// test output pair against metric.values to ensure we've selected
+			// the right one. We check this to ensure the below check means
+			// anything at all.
+			if actualPair != pair {
+				t.Fatalf("unexpected pair association in metric map: %v != %v", actualPair, pair)
+			}
+
+			if actual != float64(expected[pair]) {
+				t.Fatalf("incorrect counter value for %v: %v != %v", pair, actual, expected[pair])
+			}
+		}
+	}
+
+	if total != len(expected) {
+		t.Fatalf("unexpected number of metrics: %v != %v", total, len(expected))
+	}
+
+	vec.Reset()
+
+	if len(vec.children) > 0 {
+		t.Fatalf("reset failed")
+	}
+}
+
 func newUntypedMetricVec(name, help string, labels []string) *MetricVec {
 	desc := NewDesc("test", "helpless", labels, nil)
 	vec := newMetricVec(desc, func(lvs ...string) Metric {