diff --git a/internal/collection/collection.go b/internal/collection/collection.go index 7293be5e..87c39059 100644 --- a/internal/collection/collection.go +++ b/internal/collection/collection.go @@ -23,8 +23,9 @@ type Cursor interface { } type itemT struct { - id string - obj geojson.Object + id string + obj geojson.Object + fieldValuesSlot fieldValuesSlot } func byID(a, b interface{}) bool { @@ -51,7 +52,7 @@ type Collection struct { values *btree.BTree // items sorted by value+key fieldMap map[string]int fieldArr []string - fieldValues map[string][]float64 + fieldValues *fieldValues weight int points int objects int // geometry count @@ -61,30 +62,15 @@ type Collection struct { // New creates an empty collection func New() *Collection { col := &Collection{ - items: btree.New(byID), - index: geoindex.Wrap(&rtree.RTree{}), - values: btree.New(byValue), - fieldMap: make(map[string]int), - fieldArr: make([]string, 0), + index: geoindex.Wrap(&rbang.RTree{}), + values: btree.New(32, nil), + fieldMap: make(map[string]int), + fieldArr: make([]string, 0), + fieldValues: &fieldValues{}, } return col } -func (c *Collection) setFieldValues(id string, values []float64) { - if c.fieldValues == nil { - c.fieldValues = make(map[string][]float64) - } - c.fieldValues[id] = values -} -func (c *Collection) getFieldValues(id string) (values []float64) { - return c.fieldValues[id] -} -func (c *Collection) deleteFieldValues(id string) { - if c.fieldValues != nil { - delete(c.fieldValues, id) - } -} - // Count returns the number of objects in collection. func (c *Collection) Count() int { return c.objects + c.nobjects @@ -126,7 +112,7 @@ func (c *Collection) objWeight(item *itemT) int { } else { weight = len(item.obj.String()) } - return weight + len(c.getFieldValues(item.id))*8 + len(item.id) + return weight + len(c.fieldValues.get(item.fieldValuesSlot))*8 + len(item.id) } func (c *Collection) indexDelete(item *itemT) { @@ -157,9 +143,9 @@ func (c *Collection) indexInsert(item *itemT) { func (c *Collection) Set( id string, obj geojson.Object, fields []string, values []float64, ) ( - oldObject geojson.Object, oldFields []float64, newFields []float64, + oldObject geojson.Object, oldFieldValues []float64, newFieldValues []float64, ) { - newItem := &itemT{id: id, obj: obj} + newItem := &itemT{id: id, obj: obj, fieldValuesSlot: nilValuesSlot} // add the new item to main btree and remove the old one if needed oldItem := c.items.Set(newItem) @@ -182,9 +168,21 @@ func (c *Collection) Set( // references oldObject = oldItem.obj - oldFields = c.getFieldValues(id) - newFields = oldFields + oldFieldValues = c.fieldValues.get(oldItem.fieldValuesSlot) + newFieldValues = oldFieldValues + newItem.fieldValuesSlot = oldItem.fieldValuesSlot } + + if fields == nil { + if len(values) > 0 { + newFieldValues = values + newFieldValuesSlot := c.fieldValues.set(newItem.fieldValuesSlot, newFieldValues) + newItem.fieldValuesSlot = newFieldValuesSlot + } + } else { + newFieldValues, _, _ = c.setFieldValues(newItem, fields, values) + } + // insert the new item into the rtree or strings tree. if objIsSpatial(newItem.obj) { c.indexInsert(newItem) @@ -200,22 +198,7 @@ func (c *Collection) Set( // add the new weights c.weight += c.objWeight(newItem) - if fields == nil { - if len(values) > 0 { - // directly set the field values, update weight - c.weight -= len(newFields) * 8 - newFields = values - c.setFieldValues(id, newFields) - c.weight += len(newFields) * 8 - } - } else { - // map field name to value - for i, field := range fields { - c.setField(newItem, field, values[i]) - } - newFields = c.getFieldValues(id) - } - return oldObject, oldFields, newFields + return oldObject, oldFieldValues, newFieldValues } // Delete removes an object and returns it. @@ -240,8 +223,8 @@ func (c *Collection) Delete(id string) ( c.weight -= c.objWeight(oldItem) c.points -= oldItem.obj.NumPoints() - fields = c.getFieldValues(id) - c.deleteFieldValues(id) + fields = c.fieldValues.get(oldItem.fieldValuesSlot) + c.fieldValues.remove(oldItem.fieldValuesSlot) return oldItem.obj, fields, true } @@ -255,7 +238,7 @@ func (c *Collection) Get(id string) ( return nil, nil, false } item := itemV.(*itemT) - return item.obj, c.getFieldValues(id), true + return item.obj, c.fieldValues.get(item.fieldValuesSlot), true } // SetField set a field value for an object and returns that object. @@ -268,8 +251,9 @@ func (c *Collection) SetField(id, field string, value float64) ( return nil, nil, false, false } item := itemV.(*itemT) - updated = c.setField(item, field, value) - return item.obj, c.getFieldValues(id), updated, true + _, updateCount, weightDelta := c.setFieldValues(item, []string{field}, []float64{value}) + c.weight += weightDelta + return item.obj, c.fieldValues.get(item.fieldValuesSlot), updateCount > 0, true } // SetFields is similar to SetField, just setting multiple fields at once @@ -281,33 +265,38 @@ func (c *Collection) SetFields( return nil, nil, 0, false } item := itemV.(*itemT) - for idx, field := range inFields { - if c.setField(item, field, inValues[idx]) { - updatedCount++ - } - } - return item.obj, c.getFieldValues(id), updatedCount, true + newFieldValues, updateCount, weightDelta := c.setFieldValues(item, inFields, inValues) + c.weight += weightDelta + return item.obj, newFieldValues, updateCount, true } -func (c *Collection) setField(item *itemT, field string, value float64) ( - updated bool, +func (c *Collection) setFieldValues(item *itemT, fields []string, updateValues []float64) ( + newValues []float64, + updated int, + weightDelta int, ) { - idx, ok := c.fieldMap[field] - if !ok { - idx = len(c.fieldMap) - c.fieldMap[field] = idx - c.addToFieldArr(field) + newValues = c.fieldValues.get(item.fieldValuesSlot) + for i, field := range fields { + fieldIdx, ok := c.fieldMap[field] + if !ok { + fieldIdx = len(c.fieldMap) + c.fieldMap[field] = fieldIdx + c.addToFieldArr(field) + } + for fieldIdx >= len(newValues) { + newValues = append(newValues, 0) + weightDelta += 8 + } + ovalue := newValues[fieldIdx] + nvalue := updateValues[i] + newValues[fieldIdx] = nvalue + if ovalue != nvalue { + updated++ + } } - fields := c.getFieldValues(item.id) - c.weight -= len(fields) * 8 - for idx >= len(fields) { - fields = append(fields, 0) - } - c.weight += len(fields) * 8 - ovalue := fields[idx] - fields[idx] = value - c.setFieldValues(item.id, fields) - return ovalue != value + newSlot := c.fieldValues.set(item.fieldValuesSlot, newValues) + item.fieldValuesSlot = newSlot + return newValues, updated, weightDelta } // FieldMap return a maps of the field names. @@ -365,8 +354,8 @@ func (c *Collection) Scan( return true } nextStep(count, cursor, deadline) - iitm := item.(*itemT) - keepon = iterator(iitm.id, iitm.obj, c.getFieldValues(iitm.id)) + iitm := value.(*itemT) + keepon = iterator(iitm.id, iitm.obj, c.fieldValues.get(iitm.fieldValuesSlot)) return keepon } if desc { @@ -409,7 +398,7 @@ func (c *Collection) ScanRange( } } iitm := value.(*itemT) - keepon = iterator(iitm.id, iitm.obj, c.getFieldValues(iitm.id)) + keepon = iterator(iitm.id, iitm.obj, c.fieldValues.get(iitm.fieldValuesSlot)) return keepon } @@ -442,7 +431,7 @@ func (c *Collection) SearchValues( } nextStep(count, cursor, deadline) iitm := item.(*itemT) - keepon = iterator(iitm.id, iitm.obj, c.getFieldValues(iitm.id)) + keepon = iterator(iitm.id, iitm.obj, c.fieldValues.get(iitm.fieldValuesSlot)) return keepon } if desc { @@ -473,7 +462,7 @@ func (c *Collection) SearchValuesRange(start, end string, desc bool, } nextStep(count, cursor, deadline) iitm := item.(*itemT) - keepon = iterator(iitm.id, iitm.obj, c.getFieldValues(iitm.id)) + keepon = iterator(iitm.id, iitm.obj, c.fieldValues.get(iitm.fieldValuesSlot)) return keepon } pstart := &itemT{obj: String(start)} @@ -514,7 +503,7 @@ func (c *Collection) ScanGreaterOrEqual(id string, desc bool, } nextStep(count, cursor, deadline) iitm := value.(*itemT) - keepon = iterator(iitm.id, iitm.obj, c.getFieldValues(iitm.id)) + keepon = iterator(iitm.id, iitm.obj, c.fieldValues.get(iitm.fieldValuesSlot)) return keepon } if desc { @@ -535,7 +524,7 @@ func (c *Collection) geoSearch( [2]float64{rect.Max.X, rect.Max.Y}, func(_, _ [2]float64, itemv interface{}) bool { item := itemv.(*itemT) - alive = iter(item.id, item.obj, c.getFieldValues(item.id)) + alive = iter(item.id, item.obj, c.fieldValues.get(item.fieldValuesSlot)) return alive }, ) @@ -755,7 +744,7 @@ func (c *Collection) Nearby( } nextStep(count, cursor, deadline) item := itemv.(*itemT) - alive = iter(item.id, item.obj, c.getFieldValues(item.id)) + alive = iter(item.id, item.obj, c.fieldValues.get(item.fieldValuesSlot)) return alive }, ) diff --git a/internal/collection/collection_test.go b/internal/collection/collection_test.go index 7471f071..2f5b0ceb 100644 --- a/internal/collection/collection_test.go +++ b/internal/collection/collection_test.go @@ -635,36 +635,63 @@ func TestManyCollections(t *testing.T) { type testPointItem struct { id string object geojson.Object + fields []float64 } -func BenchmarkInsert(t *testing.B) { +func makeBenchFields(nFields int) []float64 { + if nFields == 0 { + return nil + } + + return make([]float64, nFields) +} + +func BenchmarkInsert_Fields(t *testing.B) { + benchmarkInsert(t, 1) +} + +func BenchmarkInsert_NoFields(t *testing.B) { + benchmarkInsert(t, 0) +} + +func benchmarkInsert(t *testing.B, nFields int) { rand.Seed(time.Now().UnixNano()) items := make([]testPointItem, t.N) for i := 0; i < t.N; i++ { items[i] = testPointItem{ fmt.Sprintf("%d", i), PO(rand.Float64()*360-180, rand.Float64()*180-90), + makeBenchFields(nFields), } } col := New() t.ResetTimer() for i := 0; i < t.N; i++ { - col.Set(items[i].id, items[i].object, nil, nil) + col.Set(items[i].id, items[i].object, nil, items[i].fields) } } -func BenchmarkReplace(t *testing.B) { +func BenchmarkReplace_Fields(t *testing.B) { + benchmarkReplace(t, 1) +} + +func BenchmarkReplace_NoFields(t *testing.B) { + benchmarkReplace(t, 0) +} + +func benchmarkReplace(t *testing.B, nFields int) { rand.Seed(time.Now().UnixNano()) items := make([]testPointItem, t.N) for i := 0; i < t.N; i++ { items[i] = testPointItem{ fmt.Sprintf("%d", i), PO(rand.Float64()*360-180, rand.Float64()*180-90), + makeBenchFields(nFields), } } col := New() for i := 0; i < t.N; i++ { - col.Set(items[i].id, items[i].object, nil, nil) + col.Set(items[i].id, items[i].object, nil, items[i].fields) } t.ResetTimer() for _, i := range rand.Perm(t.N) { @@ -675,18 +702,27 @@ func BenchmarkReplace(t *testing.B) { } } -func BenchmarkGet(t *testing.B) { +func BenchmarkGet_Fields(t *testing.B) { + benchmarkGet(t, 1) +} + +func BenchmarkGet_NoFields(t *testing.B) { + benchmarkGet(t, 0) +} + +func benchmarkGet(t *testing.B, nFields int) { rand.Seed(time.Now().UnixNano()) items := make([]testPointItem, t.N) for i := 0; i < t.N; i++ { items[i] = testPointItem{ fmt.Sprintf("%d", i), PO(rand.Float64()*360-180, rand.Float64()*180-90), + makeBenchFields(nFields), } } col := New() for i := 0; i < t.N; i++ { - col.Set(items[i].id, items[i].object, nil, nil) + col.Set(items[i].id, items[i].object, nil, items[i].fields) } t.ResetTimer() for _, i := range rand.Perm(t.N) { @@ -697,18 +733,27 @@ func BenchmarkGet(t *testing.B) { } } -func BenchmarkRemove(t *testing.B) { +func BenchmarkRemove_Fields(t *testing.B) { + benchmarkRemove(t, 1) +} + +func BenchmarkRemove_NoFields(t *testing.B) { + benchmarkRemove(t, 0) +} + +func benchmarkRemove(t *testing.B, nFields int) { rand.Seed(time.Now().UnixNano()) items := make([]testPointItem, t.N) for i := 0; i < t.N; i++ { items[i] = testPointItem{ fmt.Sprintf("%d", i), PO(rand.Float64()*360-180, rand.Float64()*180-90), + makeBenchFields(nFields), } } col := New() for i := 0; i < t.N; i++ { - col.Set(items[i].id, items[i].object, nil, nil) + col.Set(items[i].id, items[i].object, nil, items[i].fields) } t.ResetTimer() for _, i := range rand.Perm(t.N) { @@ -718,3 +763,35 @@ func BenchmarkRemove(t *testing.B) { } } } + +func BenchmarkScan_Fields(t *testing.B) { + benchmarkScan(t, 1) +} + +func BenchmarkScan_NoFields(t *testing.B) { + benchmarkScan(t, 0) +} + +func benchmarkScan(t *testing.B, nFields int) { + rand.Seed(time.Now().UnixNano()) + items := make([]testPointItem, t.N) + for i := 0; i < t.N; i++ { + items[i] = testPointItem{ + fmt.Sprintf("%d", i), + PO(rand.Float64()*360-180, rand.Float64()*180-90), + makeBenchFields(nFields), + } + } + col := New() + for i := 0; i < t.N; i++ { + col.Set(items[i].id, items[i].object, nil, items[i].fields) + } + t.ResetTimer() + for i := 0; i < t.N; i++ { + var scanIteration int + col.Scan(true, nil, nil, func(id string, obj geojson.Object, fields []float64) bool { + scanIteration++ + return scanIteration <= 500 + }) + } +} diff --git a/internal/collection/fieldvalues.go b/internal/collection/fieldvalues.go new file mode 100644 index 00000000..cfb1b4bb --- /dev/null +++ b/internal/collection/fieldvalues.go @@ -0,0 +1,53 @@ +package collection + +type fieldValues struct { + freelist []fieldValuesSlot + data [][]float64 +} + +type fieldValuesSlot int + +const nilValuesSlot fieldValuesSlot = -1 + +func (f *fieldValues) get(k fieldValuesSlot) []float64 { + if k == nilValuesSlot { + return nil + } + return f.data[int(k)] +} + +func (f *fieldValues) set(k fieldValuesSlot, itemData []float64) fieldValuesSlot { + // if we're asked to store into the nil values slot, it means one of two things: + // - we are doing a replace on an item that previously had nil fields + // - we are inserting a new item + // in either case, check if the new values are not nil, and if so allocate a + // new slot + if k == nilValuesSlot { + if itemData == nil { + return nilValuesSlot + } + + // first check if there is a slot on the freelist to reuse + if len(f.freelist) > 0 { + var slot fieldValuesSlot + slot, f.freelist = f.freelist[len(f.freelist)-1], f.freelist[:len(f.freelist)-1] + f.data[slot] = itemData + return slot + } + + // no reusable slot, append + f.data = append(f.data, itemData) + return fieldValuesSlot(len(f.data) - 1) + + } + f.data[int(k)] = itemData + return k +} + +func (f *fieldValues) remove(k fieldValuesSlot) { + if k == nilValuesSlot { + return + } + f.data[int(k)] = nil + f.freelist = append(f.freelist, k) +}