Optimization attempt.

Signed-off-by: Bartlomiej Plotka <bwplotka@gmail.com>
This commit is contained in:
Bartlomiej Plotka 2022-01-26 00:27:38 +01:00
parent a1c9be45cf
commit 2fcaf51be9
3 changed files with 81 additions and 67 deletions

View File

@ -43,25 +43,61 @@ var separatorByteSlice = []byte{model.SeparatorByte} // For convenient use with
// Use CachedTGatherer with classic Registry using NewMultiTRegistry and ToTransactionalGatherer helpers. // Use CachedTGatherer with classic Registry using NewMultiTRegistry and ToTransactionalGatherer helpers.
// NOTE(bwplotka): Experimental, API and behaviour can change. // NOTE(bwplotka): Experimental, API and behaviour can change.
type CachedTGatherer struct { type CachedTGatherer struct {
metrics map[uint64]*dto.Metric metricFamilyByName map[string]*family
metricFamilyByName map[string]*dto.MetricFamily
mMu sync.RWMutex mMu sync.RWMutex
} }
func NewCachedTGatherer() *CachedTGatherer { func NewCachedTGatherer() *CachedTGatherer {
return &CachedTGatherer{ return &CachedTGatherer{
metrics: make(map[uint64]*dto.Metric), metricFamilyByName: map[string]*family{},
metricFamilyByName: map[string]*dto.MetricFamily{},
} }
} }
type family struct {
*dto.MetricFamily
metricsByHash map[uint64]*dto.Metric
}
// normalizeMetricFamilies returns a MetricFamily slice with empty
// MetricFamilies pruned and the remaining MetricFamilies sorted by name within
// the slice, with the contained Metrics sorted within each MetricFamily.
func normalizeMetricFamilies(metricFamiliesByName map[string]*family) []*dto.MetricFamily {
for _, mf := range metricFamiliesByName {
if cap(mf.Metric) < len(mf.metricsByHash) {
mf.Metric = make([]*dto.Metric, 0, len(mf.metricsByHash))
}
mf.Metric = mf.Metric[:0]
for _, m := range mf.metricsByHash {
mf.Metric = append(mf.Metric, m)
}
sort.Sort(internal.MetricSorter(mf.Metric))
}
for _, mf := range metricFamiliesByName {
sort.Sort(internal.MetricSorter(mf.Metric))
}
names := make([]string, 0, len(metricFamiliesByName))
for name, mf := range metricFamiliesByName {
if len(mf.Metric) > 0 {
names = append(names, name)
}
}
sort.Strings(names)
result := make([]*dto.MetricFamily, 0, len(names))
for _, name := range names {
result = append(result, metricFamiliesByName[name].MetricFamily)
}
return result
}
// Gather implements TransactionalGatherer interface. // Gather implements TransactionalGatherer interface.
func (c *CachedTGatherer) Gather() (_ []*dto.MetricFamily, done func(), err error) { func (c *CachedTGatherer) Gather() (_ []*dto.MetricFamily, done func(), err error) {
c.mMu.RLock() c.mMu.RLock()
// BenchmarkCachedTGatherer_Update shows, even for 1 million metrics with 1000 families // BenchmarkCachedTGatherer_Update shows, even for 1 million metrics among 1000 families
// this is efficient enough (~300µs and ~50 kB per op), no need to cache it for now. // this is efficient enough (~300µs and ~50 kB per op), no need to cache it for now.
return internal.NormalizeMetricFamilies(c.metricFamilyByName), c.mMu.RUnlock, nil return normalizeMetricFamilies(c.metricFamilyByName), c.mMu.RUnlock, nil
} }
type Key struct { type Key struct {
@ -123,11 +159,9 @@ func (c *CachedTGatherer) Update(reset bool, inserts []Insert, deletions []Key)
c.mMu.Lock() c.mMu.Lock()
defer c.mMu.Unlock() defer c.mMu.Unlock()
currMetrics := c.metrics currMetricFamilyByName := c.metricFamilyByName
currMetricFamilies := c.metricFamilyByName
if reset { if reset {
currMetrics = make(map[uint64]*dto.Metric, len(c.metrics)) currMetricFamilyByName = make(map[string]*family, len(c.metricFamilyByName))
currMetricFamilies = make(map[string]*dto.MetricFamily, len(c.metricFamilyByName))
} }
errs := prometheus.MultiError{} errs := prometheus.MultiError{}
@ -139,22 +173,35 @@ func (c *CachedTGatherer) Update(reset bool, inserts []Insert, deletions []Key)
} }
// Update metric family. // Update metric family.
mf, ok := c.metricFamilyByName[inserts[i].FQName] mf, ok := currMetricFamilyByName[inserts[i].FQName]
oldMf, oldOk := c.metricFamilyByName[inserts[i].FQName]
if !ok { if !ok {
mf = &dto.MetricFamily{} if !oldOk {
mf.Name = &inserts[i].FQName mf = &family{
} else if reset { MetricFamily: &dto.MetricFamily{},
// Reset metric slice, since we want to start from scratch. metricsByHash: map[uint64]*dto.Metric{},
mf.Metric = mf.Metric[:0] }
mf.Name = &inserts[i].FQName
} else if reset {
mf = &family{
MetricFamily: oldMf.MetricFamily,
metricsByHash: make(map[uint64]*dto.Metric, len(oldMf.metricsByHash)),
}
}
} }
mf.Type = inserts[i].ValueType.ToDTO() mf.Type = inserts[i].ValueType.ToDTO()
mf.Help = &inserts[i].Help mf.Help = &inserts[i].Help
currMetricFamilies[inserts[i].FQName] = mf currMetricFamilyByName[inserts[i].FQName] = mf
// Update metric pointer. // Update metric pointer.
hSum := inserts[i].hash() hSum := inserts[i].hash()
m, ok := c.metrics[hSum] m, ok := mf.metricsByHash[hSum]
if !ok && reset && oldOk {
m, ok = oldMf.metricsByHash[hSum]
}
if !ok { if !ok {
m = &dto.Metric{Label: make([]*dto.LabelPair, 0, len(inserts[i].LabelNames))} m = &dto.Metric{Label: make([]*dto.LabelPair, 0, len(inserts[i].LabelNames))}
for j := range inserts[i].LabelNames { for j := range inserts[i].LabelNames {
@ -202,16 +249,7 @@ func (c *CachedTGatherer) Update(reset bool, inserts []Insert, deletions []Key)
if inserts[i].Timestamp != nil { if inserts[i].Timestamp != nil {
m.TimestampMs = proto.Int64(inserts[i].Timestamp.Unix()*1000 + int64(inserts[i].Timestamp.Nanosecond()/1000000)) m.TimestampMs = proto.Int64(inserts[i].Timestamp.Unix()*1000 + int64(inserts[i].Timestamp.Nanosecond()/1000000))
} }
currMetrics[hSum] = m mf.metricsByHash[hSum] = m
if !reset && ok {
// If we did update without reset and we found metric in previous
// map, we know metric pointer exists in metric family map, so just continue.
continue
}
// Will be sorted later anyway, so just append.
mf.Metric = append(mf.Metric, m)
} }
for _, del := range deletions { for _, del := range deletions {
@ -220,42 +258,18 @@ func (c *CachedTGatherer) Update(reset bool, inserts []Insert, deletions []Key)
continue continue
} }
mf, ok := currMetricFamilyByName[del.FQName]
if !ok {
continue
}
hSum := del.hash() hSum := del.hash()
m, ok := currMetrics[hSum] if _, ok := mf.metricsByHash[hSum]; !ok {
if !ok {
continue continue
} }
delete(currMetrics, hSum) delete(mf.metricsByHash, hSum)
mf, ok := currMetricFamilies[del.FQName]
if !ok {
// Impossible, but well...
errs.Append(fmt.Errorf("could not remove metric %s(%s) from metric family, metric family does not exists", del.FQName, del.LabelValues))
continue
}
toDel := -1
for i := range mf.Metric {
if mf.Metric[i] == m {
toDel = i
break
}
}
if toDel == -1 {
errs.Append(fmt.Errorf("could not remove metric %s(%s) from metric family, metric family does not have such metric", del.FQName, del.LabelValues))
continue
}
if len(mf.Metric) == 1 {
delete(currMetricFamilies, del.FQName)
continue
}
mf.Metric = append(mf.Metric[:toDel], mf.Metric[toDel+1:]...)
} }
c.metrics = currMetrics c.metricFamilyByName = currMetricFamilyByName
c.metricFamilyByName = currMetricFamilies
return errs.MaybeUnwrap() return errs.MaybeUnwrap()
} }

View File

@ -220,7 +220,7 @@ func BenchmarkCachedTGatherer_Update(b *testing.B) {
b.Error("update:", err) b.Error("update:", err)
} }
if len(c.metricFamilyByName) != 1e3 || len(c.metrics) != 1e6 { if len(c.metricFamilyByName) != 1e3 || len(c.metricFamilyByName["realistic_longer_name_123"].metricsByHash) != 1e3 {
// Ensure we did not generate duplicates. // Ensure we did not generate duplicates.
panic("generated data set gave wrong numbers") panic("generated data set gave wrong numbers")
} }

View File

@ -35,18 +35,18 @@ func (s LabelPairSorter) Less(i, j int) bool {
return s[i].GetName() < s[j].GetName() return s[i].GetName() < s[j].GetName()
} }
// metricSorter is a sortable slice of *dto.Metric. // MetricSorter is a sortable slice of *dto.Metric.
type metricSorter []*dto.Metric type MetricSorter []*dto.Metric
func (s metricSorter) Len() int { func (s MetricSorter) Len() int {
return len(s) return len(s)
} }
func (s metricSorter) Swap(i, j int) { func (s MetricSorter) Swap(i, j int) {
s[i], s[j] = s[j], s[i] s[i], s[j] = s[j], s[i]
} }
func (s metricSorter) Less(i, j int) bool { func (s MetricSorter) Less(i, j int) bool {
if len(s[i].Label) != len(s[j].Label) { if len(s[i].Label) != len(s[j].Label) {
// This should not happen. The metrics are // This should not happen. The metrics are
// inconsistent. However, we have to deal with the fact, as // inconsistent. However, we have to deal with the fact, as
@ -84,7 +84,7 @@ func (s metricSorter) Less(i, j int) bool {
// the slice, with the contained Metrics sorted within each MetricFamily. // the slice, with the contained Metrics sorted within each MetricFamily.
func NormalizeMetricFamilies(metricFamiliesByName map[string]*dto.MetricFamily) []*dto.MetricFamily { func NormalizeMetricFamilies(metricFamiliesByName map[string]*dto.MetricFamily) []*dto.MetricFamily {
for _, mf := range metricFamiliesByName { for _, mf := range metricFamiliesByName {
sort.Sort(metricSorter(mf.Metric)) sort.Sort(MetricSorter(mf.Metric))
} }
names := make([]string, 0, len(metricFamiliesByName)) names := make([]string, 0, len(metricFamiliesByName))
for name, mf := range metricFamiliesByName { for name, mf := range metricFamiliesByName {