Merge branch 'mpoindexter-optimize-field-value-access'

This commit is contained in:
tidwall 2021-07-10 19:26:29 -07:00
commit f2bbf10c36
3 changed files with 207 additions and 88 deletions

View File

@ -23,8 +23,9 @@ type Cursor interface {
} }
type itemT struct { type itemT struct {
id string id string
obj geojson.Object obj geojson.Object
fieldValuesSlot fieldValuesSlot
} }
func byID(a, b interface{}) bool { func byID(a, b interface{}) bool {
@ -51,7 +52,7 @@ type Collection struct {
values *btree.BTree // items sorted by value+key values *btree.BTree // items sorted by value+key
fieldMap map[string]int fieldMap map[string]int
fieldArr []string fieldArr []string
fieldValues map[string][]float64 fieldValues *fieldValues
weight int weight int
points int points int
objects int // geometry count objects int // geometry count
@ -61,30 +62,15 @@ type Collection struct {
// New creates an empty collection // New creates an empty collection
func New() *Collection { func New() *Collection {
col := &Collection{ col := &Collection{
items: btree.New(byID), index: geoindex.Wrap(&rbang.RTree{}),
index: geoindex.Wrap(&rtree.RTree{}), values: btree.New(32, nil),
values: btree.New(byValue), fieldMap: make(map[string]int),
fieldMap: make(map[string]int), fieldArr: make([]string, 0),
fieldArr: make([]string, 0), fieldValues: &fieldValues{},
} }
return col return col
} }
func (c *Collection) setFieldValues(id string, values []float64) {
if c.fieldValues == nil {
c.fieldValues = make(map[string][]float64)
}
c.fieldValues[id] = values
}
func (c *Collection) getFieldValues(id string) (values []float64) {
return c.fieldValues[id]
}
func (c *Collection) deleteFieldValues(id string) {
if c.fieldValues != nil {
delete(c.fieldValues, id)
}
}
// Count returns the number of objects in collection. // Count returns the number of objects in collection.
func (c *Collection) Count() int { func (c *Collection) Count() int {
return c.objects + c.nobjects return c.objects + c.nobjects
@ -126,7 +112,7 @@ func (c *Collection) objWeight(item *itemT) int {
} else { } else {
weight = len(item.obj.String()) weight = len(item.obj.String())
} }
return weight + len(c.getFieldValues(item.id))*8 + len(item.id) return weight + len(c.fieldValues.get(item.fieldValuesSlot))*8 + len(item.id)
} }
func (c *Collection) indexDelete(item *itemT) { func (c *Collection) indexDelete(item *itemT) {
@ -157,9 +143,9 @@ func (c *Collection) indexInsert(item *itemT) {
func (c *Collection) Set( func (c *Collection) Set(
id string, obj geojson.Object, fields []string, values []float64, id string, obj geojson.Object, fields []string, values []float64,
) ( ) (
oldObject geojson.Object, oldFields []float64, newFields []float64, oldObject geojson.Object, oldFieldValues []float64, newFieldValues []float64,
) { ) {
newItem := &itemT{id: id, obj: obj} newItem := &itemT{id: id, obj: obj, fieldValuesSlot: nilValuesSlot}
// add the new item to main btree and remove the old one if needed // add the new item to main btree and remove the old one if needed
oldItem := c.items.Set(newItem) oldItem := c.items.Set(newItem)
@ -182,9 +168,21 @@ func (c *Collection) Set(
// references // references
oldObject = oldItem.obj oldObject = oldItem.obj
oldFields = c.getFieldValues(id) oldFieldValues = c.fieldValues.get(oldItem.fieldValuesSlot)
newFields = oldFields newFieldValues = oldFieldValues
newItem.fieldValuesSlot = oldItem.fieldValuesSlot
} }
if fields == nil {
if len(values) > 0 {
newFieldValues = values
newFieldValuesSlot := c.fieldValues.set(newItem.fieldValuesSlot, newFieldValues)
newItem.fieldValuesSlot = newFieldValuesSlot
}
} else {
newFieldValues, _, _ = c.setFieldValues(newItem, fields, values)
}
// insert the new item into the rtree or strings tree. // insert the new item into the rtree or strings tree.
if objIsSpatial(newItem.obj) { if objIsSpatial(newItem.obj) {
c.indexInsert(newItem) c.indexInsert(newItem)
@ -200,22 +198,7 @@ func (c *Collection) Set(
// add the new weights // add the new weights
c.weight += c.objWeight(newItem) c.weight += c.objWeight(newItem)
if fields == nil { return oldObject, oldFieldValues, newFieldValues
if len(values) > 0 {
// directly set the field values, update weight
c.weight -= len(newFields) * 8
newFields = values
c.setFieldValues(id, newFields)
c.weight += len(newFields) * 8
}
} else {
// map field name to value
for i, field := range fields {
c.setField(newItem, field, values[i])
}
newFields = c.getFieldValues(id)
}
return oldObject, oldFields, newFields
} }
// Delete removes an object and returns it. // Delete removes an object and returns it.
@ -240,8 +223,8 @@ func (c *Collection) Delete(id string) (
c.weight -= c.objWeight(oldItem) c.weight -= c.objWeight(oldItem)
c.points -= oldItem.obj.NumPoints() c.points -= oldItem.obj.NumPoints()
fields = c.getFieldValues(id) fields = c.fieldValues.get(oldItem.fieldValuesSlot)
c.deleteFieldValues(id) c.fieldValues.remove(oldItem.fieldValuesSlot)
return oldItem.obj, fields, true return oldItem.obj, fields, true
} }
@ -255,7 +238,7 @@ func (c *Collection) Get(id string) (
return nil, nil, false return nil, nil, false
} }
item := itemV.(*itemT) item := itemV.(*itemT)
return item.obj, c.getFieldValues(id), true return item.obj, c.fieldValues.get(item.fieldValuesSlot), true
} }
// SetField set a field value for an object and returns that object. // SetField set a field value for an object and returns that object.
@ -268,8 +251,9 @@ func (c *Collection) SetField(id, field string, value float64) (
return nil, nil, false, false return nil, nil, false, false
} }
item := itemV.(*itemT) item := itemV.(*itemT)
updated = c.setField(item, field, value) _, updateCount, weightDelta := c.setFieldValues(item, []string{field}, []float64{value})
return item.obj, c.getFieldValues(id), updated, true c.weight += weightDelta
return item.obj, c.fieldValues.get(item.fieldValuesSlot), updateCount > 0, true
} }
// SetFields is similar to SetField, just setting multiple fields at once // SetFields is similar to SetField, just setting multiple fields at once
@ -281,33 +265,38 @@ func (c *Collection) SetFields(
return nil, nil, 0, false return nil, nil, 0, false
} }
item := itemV.(*itemT) item := itemV.(*itemT)
for idx, field := range inFields { newFieldValues, updateCount, weightDelta := c.setFieldValues(item, inFields, inValues)
if c.setField(item, field, inValues[idx]) { c.weight += weightDelta
updatedCount++ return item.obj, newFieldValues, updateCount, true
}
}
return item.obj, c.getFieldValues(id), updatedCount, true
} }
func (c *Collection) setField(item *itemT, field string, value float64) ( func (c *Collection) setFieldValues(item *itemT, fields []string, updateValues []float64) (
updated bool, newValues []float64,
updated int,
weightDelta int,
) { ) {
idx, ok := c.fieldMap[field] newValues = c.fieldValues.get(item.fieldValuesSlot)
if !ok { for i, field := range fields {
idx = len(c.fieldMap) fieldIdx, ok := c.fieldMap[field]
c.fieldMap[field] = idx if !ok {
c.addToFieldArr(field) fieldIdx = len(c.fieldMap)
c.fieldMap[field] = fieldIdx
c.addToFieldArr(field)
}
for fieldIdx >= len(newValues) {
newValues = append(newValues, 0)
weightDelta += 8
}
ovalue := newValues[fieldIdx]
nvalue := updateValues[i]
newValues[fieldIdx] = nvalue
if ovalue != nvalue {
updated++
}
} }
fields := c.getFieldValues(item.id) newSlot := c.fieldValues.set(item.fieldValuesSlot, newValues)
c.weight -= len(fields) * 8 item.fieldValuesSlot = newSlot
for idx >= len(fields) { return newValues, updated, weightDelta
fields = append(fields, 0)
}
c.weight += len(fields) * 8
ovalue := fields[idx]
fields[idx] = value
c.setFieldValues(item.id, fields)
return ovalue != value
} }
// FieldMap return a maps of the field names. // FieldMap return a maps of the field names.
@ -365,8 +354,8 @@ func (c *Collection) Scan(
return true return true
} }
nextStep(count, cursor, deadline) nextStep(count, cursor, deadline)
iitm := item.(*itemT) iitm := value.(*itemT)
keepon = iterator(iitm.id, iitm.obj, c.getFieldValues(iitm.id)) keepon = iterator(iitm.id, iitm.obj, c.fieldValues.get(iitm.fieldValuesSlot))
return keepon return keepon
} }
if desc { if desc {
@ -409,7 +398,7 @@ func (c *Collection) ScanRange(
} }
} }
iitm := value.(*itemT) iitm := value.(*itemT)
keepon = iterator(iitm.id, iitm.obj, c.getFieldValues(iitm.id)) keepon = iterator(iitm.id, iitm.obj, c.fieldValues.get(iitm.fieldValuesSlot))
return keepon return keepon
} }
@ -442,7 +431,7 @@ func (c *Collection) SearchValues(
} }
nextStep(count, cursor, deadline) nextStep(count, cursor, deadline)
iitm := item.(*itemT) iitm := item.(*itemT)
keepon = iterator(iitm.id, iitm.obj, c.getFieldValues(iitm.id)) keepon = iterator(iitm.id, iitm.obj, c.fieldValues.get(iitm.fieldValuesSlot))
return keepon return keepon
} }
if desc { if desc {
@ -473,7 +462,7 @@ func (c *Collection) SearchValuesRange(start, end string, desc bool,
} }
nextStep(count, cursor, deadline) nextStep(count, cursor, deadline)
iitm := item.(*itemT) iitm := item.(*itemT)
keepon = iterator(iitm.id, iitm.obj, c.getFieldValues(iitm.id)) keepon = iterator(iitm.id, iitm.obj, c.fieldValues.get(iitm.fieldValuesSlot))
return keepon return keepon
} }
pstart := &itemT{obj: String(start)} pstart := &itemT{obj: String(start)}
@ -514,7 +503,7 @@ func (c *Collection) ScanGreaterOrEqual(id string, desc bool,
} }
nextStep(count, cursor, deadline) nextStep(count, cursor, deadline)
iitm := value.(*itemT) iitm := value.(*itemT)
keepon = iterator(iitm.id, iitm.obj, c.getFieldValues(iitm.id)) keepon = iterator(iitm.id, iitm.obj, c.fieldValues.get(iitm.fieldValuesSlot))
return keepon return keepon
} }
if desc { if desc {
@ -535,7 +524,7 @@ func (c *Collection) geoSearch(
[2]float64{rect.Max.X, rect.Max.Y}, [2]float64{rect.Max.X, rect.Max.Y},
func(_, _ [2]float64, itemv interface{}) bool { func(_, _ [2]float64, itemv interface{}) bool {
item := itemv.(*itemT) item := itemv.(*itemT)
alive = iter(item.id, item.obj, c.getFieldValues(item.id)) alive = iter(item.id, item.obj, c.fieldValues.get(item.fieldValuesSlot))
return alive return alive
}, },
) )
@ -755,7 +744,7 @@ func (c *Collection) Nearby(
} }
nextStep(count, cursor, deadline) nextStep(count, cursor, deadline)
item := itemv.(*itemT) item := itemv.(*itemT)
alive = iter(item.id, item.obj, c.getFieldValues(item.id)) alive = iter(item.id, item.obj, c.fieldValues.get(item.fieldValuesSlot))
return alive return alive
}, },
) )

View File

@ -635,36 +635,63 @@ func TestManyCollections(t *testing.T) {
type testPointItem struct { type testPointItem struct {
id string id string
object geojson.Object object geojson.Object
fields []float64
} }
func BenchmarkInsert(t *testing.B) { func makeBenchFields(nFields int) []float64 {
if nFields == 0 {
return nil
}
return make([]float64, nFields)
}
func BenchmarkInsert_Fields(t *testing.B) {
benchmarkInsert(t, 1)
}
func BenchmarkInsert_NoFields(t *testing.B) {
benchmarkInsert(t, 0)
}
func benchmarkInsert(t *testing.B, nFields int) {
rand.Seed(time.Now().UnixNano()) rand.Seed(time.Now().UnixNano())
items := make([]testPointItem, t.N) items := make([]testPointItem, t.N)
for i := 0; i < t.N; i++ { for i := 0; i < t.N; i++ {
items[i] = testPointItem{ items[i] = testPointItem{
fmt.Sprintf("%d", i), fmt.Sprintf("%d", i),
PO(rand.Float64()*360-180, rand.Float64()*180-90), PO(rand.Float64()*360-180, rand.Float64()*180-90),
makeBenchFields(nFields),
} }
} }
col := New() col := New()
t.ResetTimer() t.ResetTimer()
for i := 0; i < t.N; i++ { for i := 0; i < t.N; i++ {
col.Set(items[i].id, items[i].object, nil, nil) col.Set(items[i].id, items[i].object, nil, items[i].fields)
} }
} }
func BenchmarkReplace(t *testing.B) { func BenchmarkReplace_Fields(t *testing.B) {
benchmarkReplace(t, 1)
}
func BenchmarkReplace_NoFields(t *testing.B) {
benchmarkReplace(t, 0)
}
func benchmarkReplace(t *testing.B, nFields int) {
rand.Seed(time.Now().UnixNano()) rand.Seed(time.Now().UnixNano())
items := make([]testPointItem, t.N) items := make([]testPointItem, t.N)
for i := 0; i < t.N; i++ { for i := 0; i < t.N; i++ {
items[i] = testPointItem{ items[i] = testPointItem{
fmt.Sprintf("%d", i), fmt.Sprintf("%d", i),
PO(rand.Float64()*360-180, rand.Float64()*180-90), PO(rand.Float64()*360-180, rand.Float64()*180-90),
makeBenchFields(nFields),
} }
} }
col := New() col := New()
for i := 0; i < t.N; i++ { for i := 0; i < t.N; i++ {
col.Set(items[i].id, items[i].object, nil, nil) col.Set(items[i].id, items[i].object, nil, items[i].fields)
} }
t.ResetTimer() t.ResetTimer()
for _, i := range rand.Perm(t.N) { for _, i := range rand.Perm(t.N) {
@ -675,18 +702,27 @@ func BenchmarkReplace(t *testing.B) {
} }
} }
func BenchmarkGet(t *testing.B) { func BenchmarkGet_Fields(t *testing.B) {
benchmarkGet(t, 1)
}
func BenchmarkGet_NoFields(t *testing.B) {
benchmarkGet(t, 0)
}
func benchmarkGet(t *testing.B, nFields int) {
rand.Seed(time.Now().UnixNano()) rand.Seed(time.Now().UnixNano())
items := make([]testPointItem, t.N) items := make([]testPointItem, t.N)
for i := 0; i < t.N; i++ { for i := 0; i < t.N; i++ {
items[i] = testPointItem{ items[i] = testPointItem{
fmt.Sprintf("%d", i), fmt.Sprintf("%d", i),
PO(rand.Float64()*360-180, rand.Float64()*180-90), PO(rand.Float64()*360-180, rand.Float64()*180-90),
makeBenchFields(nFields),
} }
} }
col := New() col := New()
for i := 0; i < t.N; i++ { for i := 0; i < t.N; i++ {
col.Set(items[i].id, items[i].object, nil, nil) col.Set(items[i].id, items[i].object, nil, items[i].fields)
} }
t.ResetTimer() t.ResetTimer()
for _, i := range rand.Perm(t.N) { for _, i := range rand.Perm(t.N) {
@ -697,18 +733,27 @@ func BenchmarkGet(t *testing.B) {
} }
} }
func BenchmarkRemove(t *testing.B) { func BenchmarkRemove_Fields(t *testing.B) {
benchmarkRemove(t, 1)
}
func BenchmarkRemove_NoFields(t *testing.B) {
benchmarkRemove(t, 0)
}
func benchmarkRemove(t *testing.B, nFields int) {
rand.Seed(time.Now().UnixNano()) rand.Seed(time.Now().UnixNano())
items := make([]testPointItem, t.N) items := make([]testPointItem, t.N)
for i := 0; i < t.N; i++ { for i := 0; i < t.N; i++ {
items[i] = testPointItem{ items[i] = testPointItem{
fmt.Sprintf("%d", i), fmt.Sprintf("%d", i),
PO(rand.Float64()*360-180, rand.Float64()*180-90), PO(rand.Float64()*360-180, rand.Float64()*180-90),
makeBenchFields(nFields),
} }
} }
col := New() col := New()
for i := 0; i < t.N; i++ { for i := 0; i < t.N; i++ {
col.Set(items[i].id, items[i].object, nil, nil) col.Set(items[i].id, items[i].object, nil, items[i].fields)
} }
t.ResetTimer() t.ResetTimer()
for _, i := range rand.Perm(t.N) { for _, i := range rand.Perm(t.N) {
@ -718,3 +763,35 @@ func BenchmarkRemove(t *testing.B) {
} }
} }
} }
func BenchmarkScan_Fields(t *testing.B) {
benchmarkScan(t, 1)
}
func BenchmarkScan_NoFields(t *testing.B) {
benchmarkScan(t, 0)
}
func benchmarkScan(t *testing.B, nFields int) {
rand.Seed(time.Now().UnixNano())
items := make([]testPointItem, t.N)
for i := 0; i < t.N; i++ {
items[i] = testPointItem{
fmt.Sprintf("%d", i),
PO(rand.Float64()*360-180, rand.Float64()*180-90),
makeBenchFields(nFields),
}
}
col := New()
for i := 0; i < t.N; i++ {
col.Set(items[i].id, items[i].object, nil, items[i].fields)
}
t.ResetTimer()
for i := 0; i < t.N; i++ {
var scanIteration int
col.Scan(true, nil, nil, func(id string, obj geojson.Object, fields []float64) bool {
scanIteration++
return scanIteration <= 500
})
}
}

View File

@ -0,0 +1,53 @@
package collection
type fieldValues struct {
freelist []fieldValuesSlot
data [][]float64
}
type fieldValuesSlot int
const nilValuesSlot fieldValuesSlot = -1
func (f *fieldValues) get(k fieldValuesSlot) []float64 {
if k == nilValuesSlot {
return nil
}
return f.data[int(k)]
}
func (f *fieldValues) set(k fieldValuesSlot, itemData []float64) fieldValuesSlot {
// if we're asked to store into the nil values slot, it means one of two things:
// - we are doing a replace on an item that previously had nil fields
// - we are inserting a new item
// in either case, check if the new values are not nil, and if so allocate a
// new slot
if k == nilValuesSlot {
if itemData == nil {
return nilValuesSlot
}
// first check if there is a slot on the freelist to reuse
if len(f.freelist) > 0 {
var slot fieldValuesSlot
slot, f.freelist = f.freelist[len(f.freelist)-1], f.freelist[:len(f.freelist)-1]
f.data[slot] = itemData
return slot
}
// no reusable slot, append
f.data = append(f.data, itemData)
return fieldValuesSlot(len(f.data) - 1)
}
f.data[int(k)] = itemData
return k
}
func (f *fieldValues) remove(k fieldValuesSlot) {
if k == nilValuesSlot {
return
}
f.data[int(k)] = nil
f.freelist = append(f.freelist, k)
}