Switch field storage to an array vs map

This commit is contained in:
Mike Poindexter 2020-03-24 17:25:54 -07:00
parent cd70a3d2af
commit 9a5d608c21
2 changed files with 121 additions and 78 deletions

View File

@ -25,6 +25,7 @@ type Cursor interface {
type itemT struct {
id string
obj geojson.Object
fieldValuesSlot fieldValuesSlot
}
func (item *itemT) Less(other btree.Item, ctx interface{}) bool {
@ -47,7 +48,7 @@ type Collection struct {
values *btree.BTree // items sorted by value+key
fieldMap map[string]int
fieldArr []string
fieldValues map[string][]float64
fieldValues *fieldValues
weight int
points int
objects int // geometry count
@ -63,25 +64,11 @@ func New() *Collection {
values: btree.New(32, nil),
fieldMap: make(map[string]int),
fieldArr: make([]string, 0),
fieldValues: &fieldValues{},
}
return col
}
func (c *Collection) setFieldValues(id string, values []float64) {
if c.fieldValues == nil {
c.fieldValues = make(map[string][]float64)
}
c.fieldValues[id] = values
}
func (c *Collection) getFieldValues(id string) (values []float64) {
return c.fieldValues[id]
}
func (c *Collection) deleteFieldValues(id string) {
if c.fieldValues != nil {
delete(c.fieldValues, id)
}
}
// Count returns the number of objects in collection.
func (c *Collection) Count() int {
return c.objects + c.nobjects
@ -123,7 +110,7 @@ func (c *Collection) objWeight(item *itemT) int {
} else {
weight = len(item.obj.String())
}
return weight + len(c.getFieldValues(item.id))*8 + len(item.id)
return weight + len(c.fieldValues.get(item.fieldValuesSlot))*8 + len(item.id)
}
func (c *Collection) indexDelete(item *itemT) {
@ -154,9 +141,9 @@ func (c *Collection) indexInsert(item *itemT) {
func (c *Collection) Set(
id string, obj geojson.Object, fields []string, values []float64,
) (
oldObject geojson.Object, oldFields []float64, newFields []float64,
oldObject geojson.Object, oldFieldValues []float64, newFieldValues []float64,
) {
newItem := &itemT{id: id, obj: obj}
newItem := &itemT{id: id, obj: obj, fieldValuesSlot: nilValuesSlot}
// add the new item to main btree and remove the old one if needed
oldItem, ok := c.items.Set(id, newItem)
@ -179,9 +166,21 @@ func (c *Collection) Set(
// references
oldObject = oldItem.obj
oldFields = c.getFieldValues(id)
newFields = oldFields
oldFieldValues = c.fieldValues.get(oldItem.fieldValuesSlot)
newFieldValues = oldFieldValues
newItem.fieldValuesSlot = oldItem.fieldValuesSlot
}
if fields == nil {
if len(values) > 0 {
newFieldValues = values
newFieldValuesSlot := c.fieldValues.set(newItem.fieldValuesSlot, newFieldValues)
newItem.fieldValuesSlot = newFieldValuesSlot
}
} else {
newFieldValues, _, _ = c.setFieldValues(newItem, fields, values)
}
// insert the new item into the rtree or strings tree.
if objIsSpatial(newItem.obj) {
c.indexInsert(newItem)
@ -197,22 +196,7 @@ func (c *Collection) Set(
// add the new weights
c.weight += c.objWeight(newItem)
if fields == nil {
if len(values) > 0 {
// directly set the field values, update weight
c.weight -= len(newFields) * 8
newFields = values
c.setFieldValues(id, newFields)
c.weight += len(newFields) * 8
}
} else {
// map field name to value
for i, field := range fields {
c.setField(newItem, field, values[i])
}
newFields = c.getFieldValues(id)
}
return oldObject, oldFields, newFields
return oldObject, oldFieldValues, newFieldValues
}
// Delete removes an object and returns it.
@ -237,8 +221,8 @@ func (c *Collection) Delete(id string) (
c.weight -= c.objWeight(oldItem)
c.points -= oldItem.obj.NumPoints()
fields = c.getFieldValues(id)
c.deleteFieldValues(id)
fields = c.fieldValues.get(oldItem.fieldValuesSlot)
c.fieldValues.remove(oldItem.fieldValuesSlot)
return oldItem.obj, fields, true
}
@ -252,7 +236,7 @@ func (c *Collection) Get(id string) (
return nil, nil, false
}
item := itemV.(*itemT)
return item.obj, c.getFieldValues(id), true
return item.obj, c.fieldValues.get(item.fieldValuesSlot), true
}
// SetField set a field value for an object and returns that object.
@ -265,8 +249,9 @@ func (c *Collection) SetField(id, field string, value float64) (
return nil, nil, false, false
}
item := itemV.(*itemT)
updated = c.setField(item, field, value)
return item.obj, c.getFieldValues(id), updated, true
_, updateCount, weightDelta := c.setFieldValues(item, []string{field}, []float64{value})
c.weight += weightDelta
return item.obj, c.fieldValues.get(item.fieldValuesSlot), updateCount > 0, true
}
// SetFields is similar to SetField, just setting multiple fields at once
@ -278,33 +263,38 @@ func (c *Collection) SetFields(
return nil, nil, 0, false
}
item := itemV.(*itemT)
for idx, field := range inFields {
if c.setField(item, field, inValues[idx]) {
updatedCount++
}
}
return item.obj, c.getFieldValues(id), updatedCount, true
newFieldValues, updateCount, weightDelta := c.setFieldValues(item, inFields, inValues)
c.weight += weightDelta
return item.obj, newFieldValues, updateCount, true
}
func (c *Collection) setField(item *itemT, field string, value float64) (
updated bool,
func (c *Collection) setFieldValues(item *itemT, fields []string, updateValues []float64) (
newValues []float64,
updated int,
weightDelta int,
) {
idx, ok := c.fieldMap[field]
newValues = c.fieldValues.get(item.fieldValuesSlot)
for i, field := range fields {
fieldIdx, ok := c.fieldMap[field]
if !ok {
idx = len(c.fieldMap)
c.fieldMap[field] = idx
fieldIdx = len(c.fieldMap)
c.fieldMap[field] = fieldIdx
c.addToFieldArr(field)
}
fields := c.getFieldValues(item.id)
c.weight -= len(fields) * 8
for idx >= len(fields) {
fields = append(fields, 0)
for fieldIdx >= len(newValues) {
newValues = append(newValues, 0)
weightDelta += 8
}
c.weight += len(fields) * 8
ovalue := fields[idx]
fields[idx] = value
c.setFieldValues(item.id, fields)
return ovalue != value
ovalue := newValues[fieldIdx]
nvalue := updateValues[i]
newValues[fieldIdx] = nvalue
if ovalue != nvalue {
updated++
}
}
newSlot := c.fieldValues.set(item.fieldValuesSlot, newValues)
item.fieldValuesSlot = newSlot
return newValues, updated, weightDelta
}
// FieldMap return a maps of the field names.
@ -363,7 +353,7 @@ func (c *Collection) Scan(
}
nextStep(count, cursor, deadline)
iitm := value.(*itemT)
keepon = iterator(iitm.id, iitm.obj, c.getFieldValues(iitm.id))
keepon = iterator(iitm.id, iitm.obj, c.fieldValues.get(iitm.fieldValuesSlot))
return keepon
}
if desc {
@ -405,7 +395,7 @@ func (c *Collection) ScanRange(
}
}
iitm := value.(*itemT)
keepon = iterator(iitm.id, iitm.obj, c.getFieldValues(iitm.id))
keepon = iterator(iitm.id, iitm.obj, c.fieldValues.get(iitm.fieldValuesSlot))
return keepon
}
@ -438,7 +428,7 @@ func (c *Collection) SearchValues(
}
nextStep(count, cursor, deadline)
iitm := item.(*itemT)
keepon = iterator(iitm.id, iitm.obj, c.getFieldValues(iitm.id))
keepon = iterator(iitm.id, iitm.obj, c.fieldValues.get(iitm.fieldValuesSlot))
return keepon
}
if desc {
@ -469,7 +459,7 @@ func (c *Collection) SearchValuesRange(start, end string, desc bool,
}
nextStep(count, cursor, deadline)
iitm := item.(*itemT)
keepon = iterator(iitm.id, iitm.obj, c.getFieldValues(iitm.id))
keepon = iterator(iitm.id, iitm.obj, c.fieldValues.get(iitm.fieldValuesSlot))
return keepon
}
if desc {
@ -502,7 +492,7 @@ func (c *Collection) ScanGreaterOrEqual(id string, desc bool,
}
nextStep(count, cursor, deadline)
iitm := value.(*itemT)
keepon = iterator(iitm.id, iitm.obj, c.getFieldValues(iitm.id))
keepon = iterator(iitm.id, iitm.obj, c.fieldValues.get(iitm.fieldValuesSlot))
return keepon
}
if desc {
@ -523,7 +513,7 @@ func (c *Collection) geoSearch(
[2]float64{rect.Max.X, rect.Max.Y},
func(_, _ [2]float64, itemv interface{}) bool {
item := itemv.(*itemT)
alive = iter(item.id, item.obj, c.getFieldValues(item.id))
alive = iter(item.id, item.obj, c.fieldValues.get(item.fieldValuesSlot))
return alive
},
)
@ -743,7 +733,7 @@ func (c *Collection) Nearby(
}
nextStep(count, cursor, deadline)
item := itemv.(*itemT)
alive = iter(item.id, item.obj, c.getFieldValues(item.id))
alive = iter(item.id, item.obj, c.fieldValues.get(item.fieldValuesSlot))
return alive
},
)

View File

@ -0,0 +1,53 @@
package collection
type fieldValues struct {
freelist []fieldValuesSlot
data [][]float64
}
type fieldValuesSlot int
const nilValuesSlot fieldValuesSlot = -1
func (f *fieldValues) get(k fieldValuesSlot) []float64 {
if k == nilValuesSlot {
return nil
}
return f.data[int(k)]
}
func (f *fieldValues) set(k fieldValuesSlot, itemData []float64) fieldValuesSlot {
// if we're asked to store into the nil values slot, it means one of two things:
// - we are doing a replace on an item that previously had nil fields
// - we are inserting a new item
// in either case, check if the new values are not nil, and if so allocate a
// new slot
if k == nilValuesSlot {
if itemData == nil {
return nilValuesSlot
}
// first check if there is a slot on the freelist to reuse
if len(f.freelist) > 0 {
var slot fieldValuesSlot
slot, f.freelist = f.freelist[len(f.freelist)-1], f.freelist[:len(f.freelist)-1]
f.data[slot] = itemData
return slot
}
// no reusable slot, append
f.data = append(f.data, itemData)
return fieldValuesSlot(len(f.data) - 1)
}
f.data[int(k)] = itemData
return k
}
func (f *fieldValues) remove(k fieldValuesSlot) {
if k == nilValuesSlot {
return
}
f.data[int(k)] = nil
f.freelist = append(f.freelist, k)
}