metricvec: handle hash collision for labeled metrics

While hash collisions are quite rare, the current state of the client library carries a risk of merging two separate label values into a single metric bucket. The effects are near impossible to detect and the result will be missing or incorrect counters. This changeset handles hash collisions by falling back to collision resolution if multiple label values hash to the same value. This works similar to separate chaining using a slice. Extra storage is minimized to only the value key slice to that metrics can be differentiated within a bucket. In general, the cost of handling collisions is completely minimized under normal operation. Performance does show slight increases in certain areas, but these are more likely statistically anomalies. More importantly, zero allocation behavior for metrics is preserved on the fast path. Minimal allocations may be made during collision handling but this has minimal effect. Benchmark comparisons with and without collision resolution follow. ``` benchmark old ns/op new ns/op delta BenchmarkCounterWithLabelValues-4 99.0 107 +8.08% BenchmarkCounterWithLabelValuesConcurrent-4 79.6 91.0 +14.32% BenchmarkCounterWithMappedLabels-4 518 542 +4.63% BenchmarkCounterWithPreparedMappedLabels-4 127 137 +7.87% BenchmarkCounterNoLabels-4 19.5 19.1 -2.05% BenchmarkGaugeWithLabelValues-4 97.4 110 +12.94% BenchmarkGaugeNoLabels-4 12.4 10.3 -16.94% BenchmarkSummaryWithLabelValues-4 1204 915 -24.00% BenchmarkSummaryNoLabels-4 936 847 -9.51% BenchmarkHistogramWithLabelValues-4 147 147 +0.00% BenchmarkHistogramNoLabels-4 50.6 49.3 -2.57% BenchmarkHistogramObserve1-4 37.9 37.5 -1.06% BenchmarkHistogramObserve2-4 122 137 +12.30% BenchmarkHistogramObserve4-4 310 352 +13.55% BenchmarkHistogramObserve8-4 691 729 +5.50% BenchmarkHistogramWrite1-4 3374 3097 -8.21% BenchmarkHistogramWrite2-4 5310 5051 -4.88% BenchmarkHistogramWrite4-4 12094 10690 -11.61% BenchmarkHistogramWrite8-4 19416 17755 -8.55% BenchmarkHandler-4 11934304 13765894 +15.35% BenchmarkSummaryObserve1-4 1119 1105 -1.25% BenchmarkSummaryObserve2-4 3679 3430 -6.77% BenchmarkSummaryObserve4-4 10678 7982 -25.25% BenchmarkSummaryObserve8-4 22974 16689 -27.36% BenchmarkSummaryWrite1-4 25962 14680 -43.46% BenchmarkSummaryWrite2-4 38019 35073 -7.75% BenchmarkSummaryWrite4-4 78027 56816 -27.18% BenchmarkSummaryWrite8-4 117220 132248 +12.82% BenchmarkMetricVecWithLabelValuesBasic-4 138 133 -3.62% BenchmarkMetricVecWithLabelValues2Keys10ValueCardinality-4 150 144 -4.00% BenchmarkMetricVecWithLabelValues4Keys10ValueCardinality-4 263 256 -2.66% BenchmarkMetricVecWithLabelValues2Keys100ValueCardinality-4 145 155 +6.90% BenchmarkMetricVecWithLabelValues10Keys100ValueCardinality-4 606 634 +4.62% BenchmarkMetricVecWithLabelValues10Keys1000ValueCardinality-4 746 641 -14.08% benchmark old allocs new allocs delta BenchmarkCounterWithLabelValues-4 0 0 +0.00% BenchmarkCounterWithLabelValuesConcurrent-4 0 0 +0.00% BenchmarkCounterWithMappedLabels-4 2 2 +0.00% BenchmarkCounterWithPreparedMappedLabels-4 0 0 +0.00% BenchmarkCounterNoLabels-4 0 0 +0.00% BenchmarkGaugeWithLabelValues-4 0 0 +0.00% BenchmarkGaugeNoLabels-4 0 0 +0.00% BenchmarkSummaryWithLabelValues-4 0 0 +0.00% BenchmarkSummaryNoLabels-4 0 0 +0.00% BenchmarkHistogramWithLabelValues-4 0 0 +0.00% BenchmarkHistogramNoLabels-4 0 0 +0.00% BenchmarkMetricVecWithLabelValuesBasic-4 0 0 +0.00% BenchmarkMetricVecWithLabelValues2Keys10ValueCardinality-4 0 0 +0.00% BenchmarkMetricVecWithLabelValues4Keys10ValueCardinality-4 0 0 +0.00% BenchmarkMetricVecWithLabelValues2Keys100ValueCardinality-4 0 0 +0.00% BenchmarkMetricVecWithLabelValues10Keys100ValueCardinality-4 0 0 +0.00% BenchmarkMetricVecWithLabelValues10Keys1000ValueCardinality-4 0 0 +0.00% benchmark old bytes new bytes delta BenchmarkCounterWithLabelValues-4 0 0 +0.00% BenchmarkCounterWithLabelValuesConcurrent-4 0 0 +0.00% BenchmarkCounterWithMappedLabels-4 336 336 +0.00% BenchmarkCounterWithPreparedMappedLabels-4 0 0 +0.00% BenchmarkCounterNoLabels-4 0 0 +0.00% BenchmarkGaugeWithLabelValues-4 0 0 +0.00% BenchmarkGaugeNoLabels-4 0 0 +0.00% BenchmarkSummaryWithLabelValues-4 0 0 +0.00% BenchmarkSummaryNoLabels-4 0 0 +0.00% BenchmarkHistogramWithLabelValues-4 0 0 +0.00% BenchmarkHistogramNoLabels-4 0 0 +0.00% BenchmarkMetricVecWithLabelValuesBasic-4 0 0 +0.00% BenchmarkMetricVecWithLabelValues2Keys10ValueCardinality-4 0 0 +0.00% BenchmarkMetricVecWithLabelValues4Keys10ValueCardinality-4 0 0 +0.00% BenchmarkMetricVecWithLabelValues2Keys100ValueCardinality-4 0 0 +0.00% BenchmarkMetricVecWithLabelValues10Keys100ValueCardinality-4 0 0 +0.00% BenchmarkMetricVecWithLabelValues10Keys1000ValueCardinality-4 0 0 +0.00% ``` Signed-off-by: Stephen J Day <stephen.day@docker.com>
2016-08-11 17:06:03 -07:00 · 2016-08-11 17:06:03 -07:00 · 3cf50db5fd
parent c4004ef5f6
commit 3cf50db5fd
2 changed files with 114 additions and 20 deletions
--- a/prometheus/vec.go
+++ b/prometheus/vec.go
@ -25,22 +25,29 @@ import (
 // provided in this package.
 type MetricVec struct {
 	mtx      sync.RWMutex // Protects the children.
-	children map[uint64]Metric
+	children map[uint64][]metricLabelValue
 	desc     *Desc

 	newMetric func(labelValues ...string) Metric
+	hashAdd   func(h uint64, s string) uint64 // replace hash function for testing collision handling
 }

 // newMetricVec returns an initialized MetricVec. The concrete value is
 // returned for embedding into another struct.
 func newMetricVec(desc *Desc, newMetric func(lvs ...string) Metric) MetricVec {
 	return MetricVec{
-		children:  map[uint64]Metric{},
+		children:  map[uint64][]metricLabelValue{},
 		desc:      desc,
 		newMetric: newMetric,
+		hashAdd:   hashAdd,
 	}
 }

+type metricLabelValue struct {
+	values []string
+	metric Metric
+}
+
 // Describe implements Collector. The length of the returned slice
 // is always one.
 func (m *MetricVec) Describe(ch chan<- *Desc) {
@ -52,8 +59,10 @@ func (m *MetricVec) Collect(ch chan<- Metric) {
 	m.mtx.RLock()
 	defer m.mtx.RUnlock()

-	for _, metric := range m.children {
-		ch <- metric
+	for _, metrics := range m.children {
+		for _, metric := range metrics {
+			ch <- metric.metric
+		}
 	}
 }

@ -88,7 +97,7 @@ func (m *MetricVec) GetMetricWithLabelValues(lvs ...string) (Metric, error) {
 	}

 	m.mtx.RLock()
-	metric, ok := m.children[h]
+	metric, ok := m.getMetric(h, lvs...)
 	m.mtx.RUnlock()
 	if ok {
 		return metric, nil
@ -118,11 +127,12 @@ func (m *MetricVec) GetMetricWith(labels Labels) (Metric, error) {
 	}

 	m.mtx.RLock()
-	metric, ok := m.children[h]
+	metrics, ok := m.children[h]
+	if ok && len(metrics) == 1 {
 		m.mtx.RUnlock()
-	if ok {
-		return metric, nil
+		return metrics[0].metric, nil
 	}
+	m.mtx.RUnlock()

 	lvs := make([]string, len(labels))
 	for i, label := range m.desc.variableLabels {
@ -178,11 +188,7 @@ func (m *MetricVec) DeleteLabelValues(lvs ...string) bool {
 	if err != nil {
 		return false
 	}
-	if _, ok := m.children[h]; !ok {
-		return false
-	}
-	delete(m.children, h)
-	return true
+	return m.deleteByHash(h, lvs...)
 }

 // Delete deletes the metric where the variable labels are the same as those
@ -203,10 +209,38 @@ func (m *MetricVec) Delete(labels Labels) bool {
 	if err != nil {
 		return false
 	}
-	if _, ok := m.children[h]; !ok {
+
+	var lvs []string
+	for _, k := range m.desc.variableLabels {
+		lvs = append(lvs, labels[k])
+	}
+
+	return m.deleteByHash(h, lvs...)
+}
+
+// deleteByHash removes the metric from the hash bucket h. If there are
+// multiple matches in the bucket, use lvs to select a metric and remove only
+// that metric.
+func (m *MetricVec) deleteByHash(h uint64, lvs ...string) bool {
+	metrics, ok := m.children[h]
+	if !ok {
 		return false
 	}
+
+	if len(metrics) < 2 {
 		delete(m.children, h)
+	}
+
+	i := findMetric(lvs, metrics)
+	if i >= len(metrics) {
+		return false
+	}
+
+	if len(metrics) > 1 {
+		m.children[h] = append(metrics[:i], metrics[i+1:]...)
+	} else {
+		delete(m.children, h)
+	}
 	return true
 }

@ -226,7 +260,7 @@ func (m *MetricVec) hashLabelValues(vals []string) (uint64, error) {
 	}
 	h := hashNew()
 	for _, val := range vals {
-		h = hashAdd(h, val)
+		h = m.hashAdd(h, val)
 	}
 	return h, nil
 }
@ -241,19 +275,70 @@ func (m *MetricVec) hashLabels(labels Labels) (uint64, error) {
 		if !ok {
 			return 0, fmt.Errorf("label name %q missing in label map", label)
 		}
-		h = hashAdd(h, val)
+		h = m.hashAdd(h, val)
 	}
 	return h, nil
 }

 func (m *MetricVec) getOrCreateMetric(hash uint64, labelValues ...string) Metric {
-	metric, ok := m.children[hash]
+	metric, ok := m.getMetric(hash, labelValues...)
 	if !ok {
 		// Copy labelValues. Otherwise, they would be allocated even if we don't go
 		// down this code path.
 		copiedLabelValues := append(make([]string, 0, len(labelValues)), labelValues...)
 		metric = m.newMetric(copiedLabelValues...)
-		m.children[hash] = metric
+		m.children[hash] = append(m.children[hash], metricLabelValue{values: copiedLabelValues, metric: metric})
 	}
 	return metric
 }
+
+// getMetric while handling possible collisions in the hash space. Must be
+// called while holding read mutex.
+func (m *MetricVec) getMetric(h uint64, lvs ...string) (Metric, bool) {
+	metrics, ok := m.children[h]
+	if ok {
+		return m.selectMetric(lvs, metrics)
+	}
+
+	return nil, false
+}
+
+func (m *MetricVec) selectMetric(lvs []string, metrics []metricLabelValue) (Metric, bool) {
+	switch len(metrics) {
+	case 0:
+		return nil, false
+	case 1:
+		// collisions are rare, this should be the fast path.
+		return metrics[0].metric, true
+	}
+
+	i := findMetric(lvs, metrics)
+
+	if i < len(metrics) {
+		return metrics[i].metric, true
+	}
+
+	return nil, false
+}
+
+// findMetric returns the index of the matching metric or len(metrics) if not
+// found.
+func findMetric(lvs []string, metrics []metricLabelValue) int {
+next:
+	for i, metric := range metrics {
+		if len(metric.values) != len(lvs) {
+			continue
+		}
+
+		for j, v := range metric.values {
+			if v != lvs[j] {
+				continue next
+			}
+		}
+		// falling out of the loop here means we have a match!
+
+		return i
+	}
+
+	return len(metrics)
+}
--- a/prometheus/vec_test.go
+++ b/prometheus/vec_test.go
@ -52,7 +52,16 @@ func TestDelete(t *testing.T) {

 func TestDeleteLabelValues(t *testing.T) {
 	vec := newUntypedMetricVec("test", "helpless", []string{"l1", "l2"})
+	testDeleteLabelValues(t, vec)
+}

+func TestDeleteLabelValuesWithCollisions(t *testing.T) {
+	vec := newUntypedMetricVec("test", "helpless", []string{"l1", "l2"})
+	vec.hashAdd = func(h uint64, s string) uint64 { return 1 }
+	testDeleteLabelValues(t, vec)
+}
+
+func testDeleteLabelValues(t *testing.T, vec *MetricVec) {
 	if got, want := vec.DeleteLabelValues("v1", "v2"), false; got != want {
 		t.Errorf("got %v, want %v", got, want)
 	}