Merge branch 'mpoindexter-optimize-field-value-access'

This commit is contained in:
tidwall 2021-07-10 19:26:29 -07:00
commit f2bbf10c36
3 changed files with 207 additions and 88 deletions

View File

@ -23,8 +23,9 @@ type Cursor interface {
}
type itemT struct {
id string
obj geojson.Object
id string
obj geojson.Object
fieldValuesSlot fieldValuesSlot
}
func byID(a, b interface{}) bool {
@ -51,7 +52,7 @@ type Collection struct {
values *btree.BTree // items sorted by value+key
fieldMap map[string]int
fieldArr []string
fieldValues map[string][]float64
fieldValues *fieldValues
weight int
points int
objects int // geometry count
@ -61,30 +62,15 @@ type Collection struct {
// New creates an empty collection
func New() *Collection {
col := &Collection{
items: btree.New(byID),
index: geoindex.Wrap(&rtree.RTree{}),
values: btree.New(byValue),
fieldMap: make(map[string]int),
fieldArr: make([]string, 0),
index: geoindex.Wrap(&rbang.RTree{}),
values: btree.New(32, nil),
fieldMap: make(map[string]int),
fieldArr: make([]string, 0),
fieldValues: &fieldValues{},
}
return col
}
func (c *Collection) setFieldValues(id string, values []float64) {
if c.fieldValues == nil {
c.fieldValues = make(map[string][]float64)
}
c.fieldValues[id] = values
}
func (c *Collection) getFieldValues(id string) (values []float64) {
return c.fieldValues[id]
}
func (c *Collection) deleteFieldValues(id string) {
if c.fieldValues != nil {
delete(c.fieldValues, id)
}
}
// Count returns the number of objects in collection.
func (c *Collection) Count() int {
return c.objects + c.nobjects
@ -126,7 +112,7 @@ func (c *Collection) objWeight(item *itemT) int {
} else {
weight = len(item.obj.String())
}
return weight + len(c.getFieldValues(item.id))*8 + len(item.id)
return weight + len(c.fieldValues.get(item.fieldValuesSlot))*8 + len(item.id)
}
func (c *Collection) indexDelete(item *itemT) {
@ -157,9 +143,9 @@ func (c *Collection) indexInsert(item *itemT) {
func (c *Collection) Set(
id string, obj geojson.Object, fields []string, values []float64,
) (
oldObject geojson.Object, oldFields []float64, newFields []float64,
oldObject geojson.Object, oldFieldValues []float64, newFieldValues []float64,
) {
newItem := &itemT{id: id, obj: obj}
newItem := &itemT{id: id, obj: obj, fieldValuesSlot: nilValuesSlot}
// add the new item to main btree and remove the old one if needed
oldItem := c.items.Set(newItem)
@ -182,9 +168,21 @@ func (c *Collection) Set(
// references
oldObject = oldItem.obj
oldFields = c.getFieldValues(id)
newFields = oldFields
oldFieldValues = c.fieldValues.get(oldItem.fieldValuesSlot)
newFieldValues = oldFieldValues
newItem.fieldValuesSlot = oldItem.fieldValuesSlot
}
if fields == nil {
if len(values) > 0 {
newFieldValues = values
newFieldValuesSlot := c.fieldValues.set(newItem.fieldValuesSlot, newFieldValues)
newItem.fieldValuesSlot = newFieldValuesSlot
}
} else {
newFieldValues, _, _ = c.setFieldValues(newItem, fields, values)
}
// insert the new item into the rtree or strings tree.
if objIsSpatial(newItem.obj) {
c.indexInsert(newItem)
@ -200,22 +198,7 @@ func (c *Collection) Set(
// add the new weights
c.weight += c.objWeight(newItem)
if fields == nil {
if len(values) > 0 {
// directly set the field values, update weight
c.weight -= len(newFields) * 8
newFields = values
c.setFieldValues(id, newFields)
c.weight += len(newFields) * 8
}
} else {
// map field name to value
for i, field := range fields {
c.setField(newItem, field, values[i])
}
newFields = c.getFieldValues(id)
}
return oldObject, oldFields, newFields
return oldObject, oldFieldValues, newFieldValues
}
// Delete removes an object and returns it.
@ -240,8 +223,8 @@ func (c *Collection) Delete(id string) (
c.weight -= c.objWeight(oldItem)
c.points -= oldItem.obj.NumPoints()
fields = c.getFieldValues(id)
c.deleteFieldValues(id)
fields = c.fieldValues.get(oldItem.fieldValuesSlot)
c.fieldValues.remove(oldItem.fieldValuesSlot)
return oldItem.obj, fields, true
}
@ -255,7 +238,7 @@ func (c *Collection) Get(id string) (
return nil, nil, false
}
item := itemV.(*itemT)
return item.obj, c.getFieldValues(id), true
return item.obj, c.fieldValues.get(item.fieldValuesSlot), true
}
// SetField set a field value for an object and returns that object.
@ -268,8 +251,9 @@ func (c *Collection) SetField(id, field string, value float64) (
return nil, nil, false, false
}
item := itemV.(*itemT)
updated = c.setField(item, field, value)
return item.obj, c.getFieldValues(id), updated, true
_, updateCount, weightDelta := c.setFieldValues(item, []string{field}, []float64{value})
c.weight += weightDelta
return item.obj, c.fieldValues.get(item.fieldValuesSlot), updateCount > 0, true
}
// SetFields is similar to SetField, just setting multiple fields at once
@ -281,33 +265,38 @@ func (c *Collection) SetFields(
return nil, nil, 0, false
}
item := itemV.(*itemT)
for idx, field := range inFields {
if c.setField(item, field, inValues[idx]) {
updatedCount++
}
}
return item.obj, c.getFieldValues(id), updatedCount, true
newFieldValues, updateCount, weightDelta := c.setFieldValues(item, inFields, inValues)
c.weight += weightDelta
return item.obj, newFieldValues, updateCount, true
}
func (c *Collection) setField(item *itemT, field string, value float64) (
updated bool,
func (c *Collection) setFieldValues(item *itemT, fields []string, updateValues []float64) (
newValues []float64,
updated int,
weightDelta int,
) {
idx, ok := c.fieldMap[field]
if !ok {
idx = len(c.fieldMap)
c.fieldMap[field] = idx
c.addToFieldArr(field)
newValues = c.fieldValues.get(item.fieldValuesSlot)
for i, field := range fields {
fieldIdx, ok := c.fieldMap[field]
if !ok {
fieldIdx = len(c.fieldMap)
c.fieldMap[field] = fieldIdx
c.addToFieldArr(field)
}
for fieldIdx >= len(newValues) {
newValues = append(newValues, 0)
weightDelta += 8
}
ovalue := newValues[fieldIdx]
nvalue := updateValues[i]
newValues[fieldIdx] = nvalue
if ovalue != nvalue {
updated++
}
}
fields := c.getFieldValues(item.id)
c.weight -= len(fields) * 8
for idx >= len(fields) {
fields = append(fields, 0)
}
c.weight += len(fields) * 8
ovalue := fields[idx]
fields[idx] = value
c.setFieldValues(item.id, fields)
return ovalue != value
newSlot := c.fieldValues.set(item.fieldValuesSlot, newValues)
item.fieldValuesSlot = newSlot
return newValues, updated, weightDelta
}
// FieldMap return a maps of the field names.
@ -365,8 +354,8 @@ func (c *Collection) Scan(
return true
}
nextStep(count, cursor, deadline)
iitm := item.(*itemT)
keepon = iterator(iitm.id, iitm.obj, c.getFieldValues(iitm.id))
iitm := value.(*itemT)
keepon = iterator(iitm.id, iitm.obj, c.fieldValues.get(iitm.fieldValuesSlot))
return keepon
}
if desc {
@ -409,7 +398,7 @@ func (c *Collection) ScanRange(
}
}
iitm := value.(*itemT)
keepon = iterator(iitm.id, iitm.obj, c.getFieldValues(iitm.id))
keepon = iterator(iitm.id, iitm.obj, c.fieldValues.get(iitm.fieldValuesSlot))
return keepon
}
@ -442,7 +431,7 @@ func (c *Collection) SearchValues(
}
nextStep(count, cursor, deadline)
iitm := item.(*itemT)
keepon = iterator(iitm.id, iitm.obj, c.getFieldValues(iitm.id))
keepon = iterator(iitm.id, iitm.obj, c.fieldValues.get(iitm.fieldValuesSlot))
return keepon
}
if desc {
@ -473,7 +462,7 @@ func (c *Collection) SearchValuesRange(start, end string, desc bool,
}
nextStep(count, cursor, deadline)
iitm := item.(*itemT)
keepon = iterator(iitm.id, iitm.obj, c.getFieldValues(iitm.id))
keepon = iterator(iitm.id, iitm.obj, c.fieldValues.get(iitm.fieldValuesSlot))
return keepon
}
pstart := &itemT{obj: String(start)}
@ -514,7 +503,7 @@ func (c *Collection) ScanGreaterOrEqual(id string, desc bool,
}
nextStep(count, cursor, deadline)
iitm := value.(*itemT)
keepon = iterator(iitm.id, iitm.obj, c.getFieldValues(iitm.id))
keepon = iterator(iitm.id, iitm.obj, c.fieldValues.get(iitm.fieldValuesSlot))
return keepon
}
if desc {
@ -535,7 +524,7 @@ func (c *Collection) geoSearch(
[2]float64{rect.Max.X, rect.Max.Y},
func(_, _ [2]float64, itemv interface{}) bool {
item := itemv.(*itemT)
alive = iter(item.id, item.obj, c.getFieldValues(item.id))
alive = iter(item.id, item.obj, c.fieldValues.get(item.fieldValuesSlot))
return alive
},
)
@ -755,7 +744,7 @@ func (c *Collection) Nearby(
}
nextStep(count, cursor, deadline)
item := itemv.(*itemT)
alive = iter(item.id, item.obj, c.getFieldValues(item.id))
alive = iter(item.id, item.obj, c.fieldValues.get(item.fieldValuesSlot))
return alive
},
)

View File

@ -635,36 +635,63 @@ func TestManyCollections(t *testing.T) {
type testPointItem struct {
id string
object geojson.Object
fields []float64
}
func BenchmarkInsert(t *testing.B) {
func makeBenchFields(nFields int) []float64 {
if nFields == 0 {
return nil
}
return make([]float64, nFields)
}
func BenchmarkInsert_Fields(t *testing.B) {
benchmarkInsert(t, 1)
}
func BenchmarkInsert_NoFields(t *testing.B) {
benchmarkInsert(t, 0)
}
func benchmarkInsert(t *testing.B, nFields int) {
rand.Seed(time.Now().UnixNano())
items := make([]testPointItem, t.N)
for i := 0; i < t.N; i++ {
items[i] = testPointItem{
fmt.Sprintf("%d", i),
PO(rand.Float64()*360-180, rand.Float64()*180-90),
makeBenchFields(nFields),
}
}
col := New()
t.ResetTimer()
for i := 0; i < t.N; i++ {
col.Set(items[i].id, items[i].object, nil, nil)
col.Set(items[i].id, items[i].object, nil, items[i].fields)
}
}
func BenchmarkReplace(t *testing.B) {
func BenchmarkReplace_Fields(t *testing.B) {
benchmarkReplace(t, 1)
}
func BenchmarkReplace_NoFields(t *testing.B) {
benchmarkReplace(t, 0)
}
func benchmarkReplace(t *testing.B, nFields int) {
rand.Seed(time.Now().UnixNano())
items := make([]testPointItem, t.N)
for i := 0; i < t.N; i++ {
items[i] = testPointItem{
fmt.Sprintf("%d", i),
PO(rand.Float64()*360-180, rand.Float64()*180-90),
makeBenchFields(nFields),
}
}
col := New()
for i := 0; i < t.N; i++ {
col.Set(items[i].id, items[i].object, nil, nil)
col.Set(items[i].id, items[i].object, nil, items[i].fields)
}
t.ResetTimer()
for _, i := range rand.Perm(t.N) {
@ -675,18 +702,27 @@ func BenchmarkReplace(t *testing.B) {
}
}
func BenchmarkGet(t *testing.B) {
func BenchmarkGet_Fields(t *testing.B) {
benchmarkGet(t, 1)
}
func BenchmarkGet_NoFields(t *testing.B) {
benchmarkGet(t, 0)
}
func benchmarkGet(t *testing.B, nFields int) {
rand.Seed(time.Now().UnixNano())
items := make([]testPointItem, t.N)
for i := 0; i < t.N; i++ {
items[i] = testPointItem{
fmt.Sprintf("%d", i),
PO(rand.Float64()*360-180, rand.Float64()*180-90),
makeBenchFields(nFields),
}
}
col := New()
for i := 0; i < t.N; i++ {
col.Set(items[i].id, items[i].object, nil, nil)
col.Set(items[i].id, items[i].object, nil, items[i].fields)
}
t.ResetTimer()
for _, i := range rand.Perm(t.N) {
@ -697,18 +733,27 @@ func BenchmarkGet(t *testing.B) {
}
}
func BenchmarkRemove(t *testing.B) {
func BenchmarkRemove_Fields(t *testing.B) {
benchmarkRemove(t, 1)
}
func BenchmarkRemove_NoFields(t *testing.B) {
benchmarkRemove(t, 0)
}
func benchmarkRemove(t *testing.B, nFields int) {
rand.Seed(time.Now().UnixNano())
items := make([]testPointItem, t.N)
for i := 0; i < t.N; i++ {
items[i] = testPointItem{
fmt.Sprintf("%d", i),
PO(rand.Float64()*360-180, rand.Float64()*180-90),
makeBenchFields(nFields),
}
}
col := New()
for i := 0; i < t.N; i++ {
col.Set(items[i].id, items[i].object, nil, nil)
col.Set(items[i].id, items[i].object, nil, items[i].fields)
}
t.ResetTimer()
for _, i := range rand.Perm(t.N) {
@ -718,3 +763,35 @@ func BenchmarkRemove(t *testing.B) {
}
}
}
func BenchmarkScan_Fields(t *testing.B) {
benchmarkScan(t, 1)
}
func BenchmarkScan_NoFields(t *testing.B) {
benchmarkScan(t, 0)
}
func benchmarkScan(t *testing.B, nFields int) {
rand.Seed(time.Now().UnixNano())
items := make([]testPointItem, t.N)
for i := 0; i < t.N; i++ {
items[i] = testPointItem{
fmt.Sprintf("%d", i),
PO(rand.Float64()*360-180, rand.Float64()*180-90),
makeBenchFields(nFields),
}
}
col := New()
for i := 0; i < t.N; i++ {
col.Set(items[i].id, items[i].object, nil, items[i].fields)
}
t.ResetTimer()
for i := 0; i < t.N; i++ {
var scanIteration int
col.Scan(true, nil, nil, func(id string, obj geojson.Object, fields []float64) bool {
scanIteration++
return scanIteration <= 500
})
}
}

View File

@ -0,0 +1,53 @@
package collection
type fieldValues struct {
freelist []fieldValuesSlot
data [][]float64
}
type fieldValuesSlot int
const nilValuesSlot fieldValuesSlot = -1
func (f *fieldValues) get(k fieldValuesSlot) []float64 {
if k == nilValuesSlot {
return nil
}
return f.data[int(k)]
}
func (f *fieldValues) set(k fieldValuesSlot, itemData []float64) fieldValuesSlot {
// if we're asked to store into the nil values slot, it means one of two things:
// - we are doing a replace on an item that previously had nil fields
// - we are inserting a new item
// in either case, check if the new values are not nil, and if so allocate a
// new slot
if k == nilValuesSlot {
if itemData == nil {
return nilValuesSlot
}
// first check if there is a slot on the freelist to reuse
if len(f.freelist) > 0 {
var slot fieldValuesSlot
slot, f.freelist = f.freelist[len(f.freelist)-1], f.freelist[:len(f.freelist)-1]
f.data[slot] = itemData
return slot
}
// no reusable slot, append
f.data = append(f.data, itemData)
return fieldValuesSlot(len(f.data) - 1)
}
f.data[int(k)] = itemData
return k
}
func (f *fieldValues) remove(k fieldValuesSlot) {
if k == nilValuesSlot {
return
}
f.data[int(k)] = nil
f.freelist = append(f.freelist, k)
}