2018-10-11 00:25:40 +03:00
|
|
|
package collection
|
|
|
|
|
|
|
|
import (
|
2019-03-05 21:33:37 +03:00
|
|
|
"runtime"
|
|
|
|
|
2018-10-11 00:25:40 +03:00
|
|
|
"github.com/tidwall/btree"
|
2019-09-13 04:42:53 +03:00
|
|
|
"github.com/tidwall/geoindex"
|
2018-10-11 00:25:40 +03:00
|
|
|
"github.com/tidwall/geojson"
|
2018-11-06 13:40:52 +03:00
|
|
|
"github.com/tidwall/geojson/geo"
|
2018-10-11 00:25:40 +03:00
|
|
|
"github.com/tidwall/geojson/geometry"
|
2021-02-08 03:54:56 +03:00
|
|
|
"github.com/tidwall/rtree"
|
2019-04-24 15:09:41 +03:00
|
|
|
"github.com/tidwall/tile38/internal/deadline"
|
2018-10-11 00:25:40 +03:00
|
|
|
)
|
|
|
|
|
2021-04-28 15:10:18 +03:00
|
|
|
// yieldStep forces the iterator to yield goroutine every 256 steps.
|
|
|
|
const yieldStep = 256
|
2019-03-05 21:33:37 +03:00
|
|
|
|
2018-11-02 16:09:56 +03:00
|
|
|
// Cursor allows for quickly paging through Scan, Within, Intersects, and Nearby
|
|
|
|
type Cursor interface {
|
|
|
|
Offset() uint64
|
|
|
|
Step(count uint64)
|
|
|
|
}
|
|
|
|
|
2018-10-11 00:25:40 +03:00
|
|
|
type itemT struct {
|
2020-03-25 03:25:54 +03:00
|
|
|
id string
|
|
|
|
obj geojson.Object
|
Update expiration logic
This commit changes the logic for managing the expiration of
objects in the database.
Before: There was a server-wide hashmap that stored the
collection key, id, and expiration timestamp for all objects
that had a TTL. The hashmap was occasionally probed at 20
random positions, looking for objects that have expired. Those
expired objects were immediately deleted, and if there was 5
or more objects deleted, then the probe happened again, with
no delay. If the number of objects was less than 5 then the
there was a 1/10th of a second delay before the next probe.
Now: Rather than a server-wide hashmap, each collection has
its own ordered priority queue that stores objects with TTLs.
Rather than probing, there is a background routine that
executes every 1/10th of a second, which pops the expired
objects from the collection queues, and deletes them.
The collection/queue method is a more stable approach than
the hashmap/probing method. With probing, we can run into
major cache misses for some cases where there is wide
TTL duration, such as in the hours or days. This may cause
the system to occasionally fall behind, leaving should-be
expired objects in memory. Using a queue, there is no
cache misses, all objects that should be expired will be
right away, regardless of the TTL durations.
Fixes #616
2021-07-12 23:37:50 +03:00
|
|
|
expires int64 // unix nano expiration
|
2020-03-25 03:25:54 +03:00
|
|
|
fieldValuesSlot fieldValuesSlot
|
2018-10-11 00:25:40 +03:00
|
|
|
}
|
|
|
|
|
2020-10-28 01:29:50 +03:00
|
|
|
func byID(a, b interface{}) bool {
|
|
|
|
return a.(*itemT).id < b.(*itemT).id
|
|
|
|
}
|
|
|
|
|
|
|
|
func byValue(a, b interface{}) bool {
|
|
|
|
value1 := a.(*itemT).obj.String()
|
|
|
|
value2 := b.(*itemT).obj.String()
|
2018-10-11 00:25:40 +03:00
|
|
|
if value1 < value2 {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
if value1 > value2 {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
// the values match so we'll compare IDs, which are always unique.
|
2020-10-28 01:29:50 +03:00
|
|
|
return byID(a, b)
|
2018-10-11 00:25:40 +03:00
|
|
|
}
|
|
|
|
|
Update expiration logic
This commit changes the logic for managing the expiration of
objects in the database.
Before: There was a server-wide hashmap that stored the
collection key, id, and expiration timestamp for all objects
that had a TTL. The hashmap was occasionally probed at 20
random positions, looking for objects that have expired. Those
expired objects were immediately deleted, and if there was 5
or more objects deleted, then the probe happened again, with
no delay. If the number of objects was less than 5 then the
there was a 1/10th of a second delay before the next probe.
Now: Rather than a server-wide hashmap, each collection has
its own ordered priority queue that stores objects with TTLs.
Rather than probing, there is a background routine that
executes every 1/10th of a second, which pops the expired
objects from the collection queues, and deletes them.
The collection/queue method is a more stable approach than
the hashmap/probing method. With probing, we can run into
major cache misses for some cases where there is wide
TTL duration, such as in the hours or days. This may cause
the system to occasionally fall behind, leaving should-be
expired objects in memory. Using a queue, there is no
cache misses, all objects that should be expired will be
right away, regardless of the TTL durations.
Fixes #616
2021-07-12 23:37:50 +03:00
|
|
|
func byExpires(a, b interface{}) bool {
|
|
|
|
item1 := a.(*itemT)
|
|
|
|
item2 := b.(*itemT)
|
|
|
|
if item1.expires < item2.expires {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
if item1.expires > item2.expires {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
// the values match so we'll compare IDs, which are always unique.
|
|
|
|
return byID(a, b)
|
|
|
|
}
|
|
|
|
|
2018-10-11 00:25:40 +03:00
|
|
|
// Collection represents a collection of geojson objects.
|
|
|
|
type Collection struct {
|
Update expiration logic
This commit changes the logic for managing the expiration of
objects in the database.
Before: There was a server-wide hashmap that stored the
collection key, id, and expiration timestamp for all objects
that had a TTL. The hashmap was occasionally probed at 20
random positions, looking for objects that have expired. Those
expired objects were immediately deleted, and if there was 5
or more objects deleted, then the probe happened again, with
no delay. If the number of objects was less than 5 then the
there was a 1/10th of a second delay before the next probe.
Now: Rather than a server-wide hashmap, each collection has
its own ordered priority queue that stores objects with TTLs.
Rather than probing, there is a background routine that
executes every 1/10th of a second, which pops the expired
objects from the collection queues, and deletes them.
The collection/queue method is a more stable approach than
the hashmap/probing method. With probing, we can run into
major cache misses for some cases where there is wide
TTL duration, such as in the hours or days. This may cause
the system to occasionally fall behind, leaving should-be
expired objects in memory. Using a queue, there is no
cache misses, all objects that should be expired will be
right away, regardless of the TTL durations.
Fixes #616
2021-07-12 23:37:50 +03:00
|
|
|
items *btree.BTree // items sorted by id
|
2019-09-13 04:42:53 +03:00
|
|
|
index *geoindex.Index // items geospatially indexed
|
Update expiration logic
This commit changes the logic for managing the expiration of
objects in the database.
Before: There was a server-wide hashmap that stored the
collection key, id, and expiration timestamp for all objects
that had a TTL. The hashmap was occasionally probed at 20
random positions, looking for objects that have expired. Those
expired objects were immediately deleted, and if there was 5
or more objects deleted, then the probe happened again, with
no delay. If the number of objects was less than 5 then the
there was a 1/10th of a second delay before the next probe.
Now: Rather than a server-wide hashmap, each collection has
its own ordered priority queue that stores objects with TTLs.
Rather than probing, there is a background routine that
executes every 1/10th of a second, which pops the expired
objects from the collection queues, and deletes them.
The collection/queue method is a more stable approach than
the hashmap/probing method. With probing, we can run into
major cache misses for some cases where there is wide
TTL duration, such as in the hours or days. This may cause
the system to occasionally fall behind, leaving should-be
expired objects in memory. Using a queue, there is no
cache misses, all objects that should be expired will be
right away, regardless of the TTL durations.
Fixes #616
2021-07-12 23:37:50 +03:00
|
|
|
values *btree.BTree // items sorted by value+id
|
|
|
|
expires *btree.BTree // items sorted by ex+id
|
2018-10-11 00:25:40 +03:00
|
|
|
fieldMap map[string]int
|
2020-03-22 17:58:03 +03:00
|
|
|
fieldArr []string
|
2020-03-25 03:25:54 +03:00
|
|
|
fieldValues *fieldValues
|
2018-10-11 00:25:40 +03:00
|
|
|
weight int
|
|
|
|
points int
|
|
|
|
objects int // geometry count
|
|
|
|
nobjects int // non-geometry count
|
|
|
|
}
|
|
|
|
|
|
|
|
// New creates an empty collection
|
|
|
|
func New() *Collection {
|
|
|
|
col := &Collection{
|
2021-07-31 17:42:58 +03:00
|
|
|
items: btree.NewNonConcurrent(byID),
|
2021-07-11 05:29:25 +03:00
|
|
|
index: geoindex.Wrap(&rtree.RTree{}),
|
2021-07-31 17:42:58 +03:00
|
|
|
values: btree.NewNonConcurrent(byValue),
|
|
|
|
expires: btree.NewNonConcurrent(byExpires),
|
2020-03-25 03:25:54 +03:00
|
|
|
fieldMap: make(map[string]int),
|
|
|
|
fieldArr: make([]string, 0),
|
|
|
|
fieldValues: &fieldValues{},
|
2018-10-11 00:25:40 +03:00
|
|
|
}
|
|
|
|
return col
|
|
|
|
}
|
|
|
|
|
|
|
|
// Count returns the number of objects in collection.
|
|
|
|
func (c *Collection) Count() int {
|
|
|
|
return c.objects + c.nobjects
|
|
|
|
}
|
|
|
|
|
|
|
|
// StringCount returns the number of string values.
|
|
|
|
func (c *Collection) StringCount() int {
|
|
|
|
return c.nobjects
|
|
|
|
}
|
|
|
|
|
|
|
|
// PointCount returns the number of points (lat/lon coordinates) in collection.
|
|
|
|
func (c *Collection) PointCount() int {
|
|
|
|
return c.points
|
|
|
|
}
|
|
|
|
|
|
|
|
// TotalWeight calculates the in-memory cost of the collection in bytes.
|
|
|
|
func (c *Collection) TotalWeight() int {
|
|
|
|
return c.weight
|
|
|
|
}
|
|
|
|
|
|
|
|
// Bounds returns the bounds of all the items in the collection.
|
|
|
|
func (c *Collection) Bounds() (minX, minY, maxX, maxY float64) {
|
|
|
|
min, max := c.index.Bounds()
|
|
|
|
if len(min) >= 2 && len(max) >= 2 {
|
|
|
|
return min[0], min[1], max[0], max[1]
|
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
func objIsSpatial(obj geojson.Object) bool {
|
|
|
|
_, ok := obj.(geojson.Spatial)
|
|
|
|
return ok
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *Collection) objWeight(item *itemT) int {
|
|
|
|
var weight int
|
|
|
|
if objIsSpatial(item.obj) {
|
|
|
|
weight = item.obj.NumPoints() * 16
|
|
|
|
} else {
|
|
|
|
weight = len(item.obj.String())
|
|
|
|
}
|
2020-03-25 03:25:54 +03:00
|
|
|
return weight + len(c.fieldValues.get(item.fieldValuesSlot))*8 + len(item.id)
|
2018-10-11 00:25:40 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
func (c *Collection) indexDelete(item *itemT) {
|
|
|
|
if !item.obj.Empty() {
|
|
|
|
rect := item.obj.Rect()
|
|
|
|
c.index.Delete(
|
2019-09-13 04:42:53 +03:00
|
|
|
[2]float64{rect.Min.X, rect.Min.Y},
|
|
|
|
[2]float64{rect.Max.X, rect.Max.Y},
|
2018-10-11 00:25:40 +03:00
|
|
|
item)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *Collection) indexInsert(item *itemT) {
|
|
|
|
if !item.obj.Empty() {
|
|
|
|
rect := item.obj.Rect()
|
|
|
|
c.index.Insert(
|
2019-09-13 04:42:53 +03:00
|
|
|
[2]float64{rect.Min.X, rect.Min.Y},
|
|
|
|
[2]float64{rect.Max.X, rect.Max.Y},
|
2018-10-11 00:25:40 +03:00
|
|
|
item)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Set adds or replaces an object in the collection and returns the fields
|
|
|
|
// array. If an item with the same id is already in the collection then the
|
|
|
|
// new item will adopt the old item's fields.
|
|
|
|
// The fields argument is optional.
|
|
|
|
// The return values are the old object, the old fields, and the new fields
|
|
|
|
func (c *Collection) Set(
|
Update expiration logic
This commit changes the logic for managing the expiration of
objects in the database.
Before: There was a server-wide hashmap that stored the
collection key, id, and expiration timestamp for all objects
that had a TTL. The hashmap was occasionally probed at 20
random positions, looking for objects that have expired. Those
expired objects were immediately deleted, and if there was 5
or more objects deleted, then the probe happened again, with
no delay. If the number of objects was less than 5 then the
there was a 1/10th of a second delay before the next probe.
Now: Rather than a server-wide hashmap, each collection has
its own ordered priority queue that stores objects with TTLs.
Rather than probing, there is a background routine that
executes every 1/10th of a second, which pops the expired
objects from the collection queues, and deletes them.
The collection/queue method is a more stable approach than
the hashmap/probing method. With probing, we can run into
major cache misses for some cases where there is wide
TTL duration, such as in the hours or days. This may cause
the system to occasionally fall behind, leaving should-be
expired objects in memory. Using a queue, there is no
cache misses, all objects that should be expired will be
right away, regardless of the TTL durations.
Fixes #616
2021-07-12 23:37:50 +03:00
|
|
|
id string, obj geojson.Object, fields []string, values []float64, ex int64,
|
2018-10-11 00:25:40 +03:00
|
|
|
) (
|
2020-03-25 03:25:54 +03:00
|
|
|
oldObject geojson.Object, oldFieldValues []float64, newFieldValues []float64,
|
2018-10-11 00:25:40 +03:00
|
|
|
) {
|
Update expiration logic
This commit changes the logic for managing the expiration of
objects in the database.
Before: There was a server-wide hashmap that stored the
collection key, id, and expiration timestamp for all objects
that had a TTL. The hashmap was occasionally probed at 20
random positions, looking for objects that have expired. Those
expired objects were immediately deleted, and if there was 5
or more objects deleted, then the probe happened again, with
no delay. If the number of objects was less than 5 then the
there was a 1/10th of a second delay before the next probe.
Now: Rather than a server-wide hashmap, each collection has
its own ordered priority queue that stores objects with TTLs.
Rather than probing, there is a background routine that
executes every 1/10th of a second, which pops the expired
objects from the collection queues, and deletes them.
The collection/queue method is a more stable approach than
the hashmap/probing method. With probing, we can run into
major cache misses for some cases where there is wide
TTL duration, such as in the hours or days. This may cause
the system to occasionally fall behind, leaving should-be
expired objects in memory. Using a queue, there is no
cache misses, all objects that should be expired will be
right away, regardless of the TTL durations.
Fixes #616
2021-07-12 23:37:50 +03:00
|
|
|
newItem := &itemT{id: id, obj: obj, fieldValuesSlot: nilValuesSlot, expires: ex}
|
2018-10-11 00:25:40 +03:00
|
|
|
|
|
|
|
// add the new item to main btree and remove the old one if needed
|
2020-10-28 01:29:50 +03:00
|
|
|
oldItem := c.items.Set(newItem)
|
|
|
|
if oldItem != nil {
|
2018-10-11 00:25:40 +03:00
|
|
|
oldItem := oldItem.(*itemT)
|
|
|
|
// the old item was removed, now let's remove it from the rtree/btree.
|
|
|
|
if objIsSpatial(oldItem.obj) {
|
|
|
|
c.indexDelete(oldItem)
|
|
|
|
c.objects--
|
|
|
|
} else {
|
|
|
|
c.values.Delete(oldItem)
|
|
|
|
c.nobjects--
|
|
|
|
}
|
Update expiration logic
This commit changes the logic for managing the expiration of
objects in the database.
Before: There was a server-wide hashmap that stored the
collection key, id, and expiration timestamp for all objects
that had a TTL. The hashmap was occasionally probed at 20
random positions, looking for objects that have expired. Those
expired objects were immediately deleted, and if there was 5
or more objects deleted, then the probe happened again, with
no delay. If the number of objects was less than 5 then the
there was a 1/10th of a second delay before the next probe.
Now: Rather than a server-wide hashmap, each collection has
its own ordered priority queue that stores objects with TTLs.
Rather than probing, there is a background routine that
executes every 1/10th of a second, which pops the expired
objects from the collection queues, and deletes them.
The collection/queue method is a more stable approach than
the hashmap/probing method. With probing, we can run into
major cache misses for some cases where there is wide
TTL duration, such as in the hours or days. This may cause
the system to occasionally fall behind, leaving should-be
expired objects in memory. Using a queue, there is no
cache misses, all objects that should be expired will be
right away, regardless of the TTL durations.
Fixes #616
2021-07-12 23:37:50 +03:00
|
|
|
// delete old item from the expires queue
|
|
|
|
if oldItem.expires != 0 {
|
|
|
|
c.expires.Delete(oldItem)
|
|
|
|
}
|
2018-10-11 00:25:40 +03:00
|
|
|
|
|
|
|
// decrement the point count
|
|
|
|
c.points -= oldItem.obj.NumPoints()
|
|
|
|
|
|
|
|
// decrement the weights
|
|
|
|
c.weight -= c.objWeight(oldItem)
|
|
|
|
|
|
|
|
// references
|
|
|
|
oldObject = oldItem.obj
|
2020-03-25 03:25:54 +03:00
|
|
|
oldFieldValues = c.fieldValues.get(oldItem.fieldValuesSlot)
|
|
|
|
newFieldValues = oldFieldValues
|
|
|
|
newItem.fieldValuesSlot = oldItem.fieldValuesSlot
|
2022-08-27 02:23:28 +03:00
|
|
|
if len(oldFieldValues) > 0 {
|
|
|
|
oldFieldValues = append([]float64{}, oldFieldValues...)
|
|
|
|
}
|
2018-10-11 00:25:40 +03:00
|
|
|
}
|
2020-03-25 03:25:54 +03:00
|
|
|
if fields == nil {
|
|
|
|
if len(values) > 0 {
|
|
|
|
newFieldValues = values
|
|
|
|
newFieldValuesSlot := c.fieldValues.set(newItem.fieldValuesSlot, newFieldValues)
|
|
|
|
newItem.fieldValuesSlot = newFieldValuesSlot
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
newFieldValues, _, _ = c.setFieldValues(newItem, fields, values)
|
2018-10-11 00:25:40 +03:00
|
|
|
}
|
2020-03-25 03:25:54 +03:00
|
|
|
|
2018-10-11 00:25:40 +03:00
|
|
|
// insert the new item into the rtree or strings tree.
|
|
|
|
if objIsSpatial(newItem.obj) {
|
|
|
|
c.indexInsert(newItem)
|
|
|
|
c.objects++
|
|
|
|
} else {
|
2020-10-28 01:29:50 +03:00
|
|
|
c.values.Set(newItem)
|
2018-10-11 00:25:40 +03:00
|
|
|
c.nobjects++
|
|
|
|
}
|
Update expiration logic
This commit changes the logic for managing the expiration of
objects in the database.
Before: There was a server-wide hashmap that stored the
collection key, id, and expiration timestamp for all objects
that had a TTL. The hashmap was occasionally probed at 20
random positions, looking for objects that have expired. Those
expired objects were immediately deleted, and if there was 5
or more objects deleted, then the probe happened again, with
no delay. If the number of objects was less than 5 then the
there was a 1/10th of a second delay before the next probe.
Now: Rather than a server-wide hashmap, each collection has
its own ordered priority queue that stores objects with TTLs.
Rather than probing, there is a background routine that
executes every 1/10th of a second, which pops the expired
objects from the collection queues, and deletes them.
The collection/queue method is a more stable approach than
the hashmap/probing method. With probing, we can run into
major cache misses for some cases where there is wide
TTL duration, such as in the hours or days. This may cause
the system to occasionally fall behind, leaving should-be
expired objects in memory. Using a queue, there is no
cache misses, all objects that should be expired will be
right away, regardless of the TTL durations.
Fixes #616
2021-07-12 23:37:50 +03:00
|
|
|
// insert item into expires queue.
|
|
|
|
if newItem.expires != 0 {
|
|
|
|
c.expires.Set(newItem)
|
|
|
|
}
|
2018-10-11 00:25:40 +03:00
|
|
|
|
|
|
|
// increment the point count
|
|
|
|
c.points += newItem.obj.NumPoints()
|
|
|
|
|
|
|
|
// add the new weights
|
|
|
|
c.weight += c.objWeight(newItem)
|
|
|
|
|
2020-03-25 03:25:54 +03:00
|
|
|
return oldObject, oldFieldValues, newFieldValues
|
2018-10-11 00:25:40 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// Delete removes an object and returns it.
|
|
|
|
// If the object does not exist then the 'ok' return value will be false.
|
|
|
|
func (c *Collection) Delete(id string) (
|
|
|
|
obj geojson.Object, fields []float64, ok bool,
|
|
|
|
) {
|
Update expiration logic
This commit changes the logic for managing the expiration of
objects in the database.
Before: There was a server-wide hashmap that stored the
collection key, id, and expiration timestamp for all objects
that had a TTL. The hashmap was occasionally probed at 20
random positions, looking for objects that have expired. Those
expired objects were immediately deleted, and if there was 5
or more objects deleted, then the probe happened again, with
no delay. If the number of objects was less than 5 then the
there was a 1/10th of a second delay before the next probe.
Now: Rather than a server-wide hashmap, each collection has
its own ordered priority queue that stores objects with TTLs.
Rather than probing, there is a background routine that
executes every 1/10th of a second, which pops the expired
objects from the collection queues, and deletes them.
The collection/queue method is a more stable approach than
the hashmap/probing method. With probing, we can run into
major cache misses for some cases where there is wide
TTL duration, such as in the hours or days. This may cause
the system to occasionally fall behind, leaving should-be
expired objects in memory. Using a queue, there is no
cache misses, all objects that should be expired will be
right away, regardless of the TTL durations.
Fixes #616
2021-07-12 23:37:50 +03:00
|
|
|
v := c.items.Delete(&itemT{id: id})
|
|
|
|
if v == nil {
|
2018-10-11 00:25:40 +03:00
|
|
|
return nil, nil, false
|
|
|
|
}
|
Update expiration logic
This commit changes the logic for managing the expiration of
objects in the database.
Before: There was a server-wide hashmap that stored the
collection key, id, and expiration timestamp for all objects
that had a TTL. The hashmap was occasionally probed at 20
random positions, looking for objects that have expired. Those
expired objects were immediately deleted, and if there was 5
or more objects deleted, then the probe happened again, with
no delay. If the number of objects was less than 5 then the
there was a 1/10th of a second delay before the next probe.
Now: Rather than a server-wide hashmap, each collection has
its own ordered priority queue that stores objects with TTLs.
Rather than probing, there is a background routine that
executes every 1/10th of a second, which pops the expired
objects from the collection queues, and deletes them.
The collection/queue method is a more stable approach than
the hashmap/probing method. With probing, we can run into
major cache misses for some cases where there is wide
TTL duration, such as in the hours or days. This may cause
the system to occasionally fall behind, leaving should-be
expired objects in memory. Using a queue, there is no
cache misses, all objects that should be expired will be
right away, regardless of the TTL durations.
Fixes #616
2021-07-12 23:37:50 +03:00
|
|
|
oldItem := v.(*itemT)
|
2018-10-11 00:25:40 +03:00
|
|
|
if objIsSpatial(oldItem.obj) {
|
|
|
|
if !oldItem.obj.Empty() {
|
|
|
|
c.indexDelete(oldItem)
|
|
|
|
}
|
|
|
|
c.objects--
|
|
|
|
} else {
|
|
|
|
c.values.Delete(oldItem)
|
|
|
|
c.nobjects--
|
|
|
|
}
|
Update expiration logic
This commit changes the logic for managing the expiration of
objects in the database.
Before: There was a server-wide hashmap that stored the
collection key, id, and expiration timestamp for all objects
that had a TTL. The hashmap was occasionally probed at 20
random positions, looking for objects that have expired. Those
expired objects were immediately deleted, and if there was 5
or more objects deleted, then the probe happened again, with
no delay. If the number of objects was less than 5 then the
there was a 1/10th of a second delay before the next probe.
Now: Rather than a server-wide hashmap, each collection has
its own ordered priority queue that stores objects with TTLs.
Rather than probing, there is a background routine that
executes every 1/10th of a second, which pops the expired
objects from the collection queues, and deletes them.
The collection/queue method is a more stable approach than
the hashmap/probing method. With probing, we can run into
major cache misses for some cases where there is wide
TTL duration, such as in the hours or days. This may cause
the system to occasionally fall behind, leaving should-be
expired objects in memory. Using a queue, there is no
cache misses, all objects that should be expired will be
right away, regardless of the TTL durations.
Fixes #616
2021-07-12 23:37:50 +03:00
|
|
|
// delete old item from expires queue
|
|
|
|
if oldItem.expires != 0 {
|
|
|
|
c.expires.Delete(oldItem)
|
|
|
|
}
|
2018-10-11 00:25:40 +03:00
|
|
|
c.weight -= c.objWeight(oldItem)
|
|
|
|
c.points -= oldItem.obj.NumPoints()
|
|
|
|
|
2020-03-25 03:25:54 +03:00
|
|
|
fields = c.fieldValues.get(oldItem.fieldValuesSlot)
|
|
|
|
c.fieldValues.remove(oldItem.fieldValuesSlot)
|
2018-10-11 00:25:40 +03:00
|
|
|
return oldItem.obj, fields, true
|
|
|
|
}
|
|
|
|
|
|
|
|
// Get returns an object.
|
|
|
|
// If the object does not exist then the 'ok' return value will be false.
|
|
|
|
func (c *Collection) Get(id string) (
|
Update expiration logic
This commit changes the logic for managing the expiration of
objects in the database.
Before: There was a server-wide hashmap that stored the
collection key, id, and expiration timestamp for all objects
that had a TTL. The hashmap was occasionally probed at 20
random positions, looking for objects that have expired. Those
expired objects were immediately deleted, and if there was 5
or more objects deleted, then the probe happened again, with
no delay. If the number of objects was less than 5 then the
there was a 1/10th of a second delay before the next probe.
Now: Rather than a server-wide hashmap, each collection has
its own ordered priority queue that stores objects with TTLs.
Rather than probing, there is a background routine that
executes every 1/10th of a second, which pops the expired
objects from the collection queues, and deletes them.
The collection/queue method is a more stable approach than
the hashmap/probing method. With probing, we can run into
major cache misses for some cases where there is wide
TTL duration, such as in the hours or days. This may cause
the system to occasionally fall behind, leaving should-be
expired objects in memory. Using a queue, there is no
cache misses, all objects that should be expired will be
right away, regardless of the TTL durations.
Fixes #616
2021-07-12 23:37:50 +03:00
|
|
|
obj geojson.Object, fields []float64, ex int64, ok bool,
|
2018-10-11 00:25:40 +03:00
|
|
|
) {
|
2020-10-28 01:29:50 +03:00
|
|
|
itemV := c.items.Get(&itemT{id: id})
|
|
|
|
if itemV == nil {
|
Update expiration logic
This commit changes the logic for managing the expiration of
objects in the database.
Before: There was a server-wide hashmap that stored the
collection key, id, and expiration timestamp for all objects
that had a TTL. The hashmap was occasionally probed at 20
random positions, looking for objects that have expired. Those
expired objects were immediately deleted, and if there was 5
or more objects deleted, then the probe happened again, with
no delay. If the number of objects was less than 5 then the
there was a 1/10th of a second delay before the next probe.
Now: Rather than a server-wide hashmap, each collection has
its own ordered priority queue that stores objects with TTLs.
Rather than probing, there is a background routine that
executes every 1/10th of a second, which pops the expired
objects from the collection queues, and deletes them.
The collection/queue method is a more stable approach than
the hashmap/probing method. With probing, we can run into
major cache misses for some cases where there is wide
TTL duration, such as in the hours or days. This may cause
the system to occasionally fall behind, leaving should-be
expired objects in memory. Using a queue, there is no
cache misses, all objects that should be expired will be
right away, regardless of the TTL durations.
Fixes #616
2021-07-12 23:37:50 +03:00
|
|
|
return nil, nil, 0, false
|
2018-10-11 00:25:40 +03:00
|
|
|
}
|
|
|
|
item := itemV.(*itemT)
|
Update expiration logic
This commit changes the logic for managing the expiration of
objects in the database.
Before: There was a server-wide hashmap that stored the
collection key, id, and expiration timestamp for all objects
that had a TTL. The hashmap was occasionally probed at 20
random positions, looking for objects that have expired. Those
expired objects were immediately deleted, and if there was 5
or more objects deleted, then the probe happened again, with
no delay. If the number of objects was less than 5 then the
there was a 1/10th of a second delay before the next probe.
Now: Rather than a server-wide hashmap, each collection has
its own ordered priority queue that stores objects with TTLs.
Rather than probing, there is a background routine that
executes every 1/10th of a second, which pops the expired
objects from the collection queues, and deletes them.
The collection/queue method is a more stable approach than
the hashmap/probing method. With probing, we can run into
major cache misses for some cases where there is wide
TTL duration, such as in the hours or days. This may cause
the system to occasionally fall behind, leaving should-be
expired objects in memory. Using a queue, there is no
cache misses, all objects that should be expired will be
right away, regardless of the TTL durations.
Fixes #616
2021-07-12 23:37:50 +03:00
|
|
|
return item.obj, c.fieldValues.get(item.fieldValuesSlot), item.expires, true
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *Collection) SetExpires(id string, ex int64) bool {
|
|
|
|
v := c.items.Get(&itemT{id: id})
|
|
|
|
if v == nil {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
item := v.(*itemT)
|
|
|
|
if item.expires != 0 {
|
|
|
|
c.expires.Delete(item)
|
|
|
|
}
|
|
|
|
item.expires = ex
|
|
|
|
if item.expires != 0 {
|
|
|
|
c.expires.Set(item)
|
|
|
|
}
|
|
|
|
return true
|
2018-10-11 00:25:40 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// SetField set a field value for an object and returns that object.
|
|
|
|
// If the object does not exist then the 'ok' return value will be false.
|
|
|
|
func (c *Collection) SetField(id, field string, value float64) (
|
|
|
|
obj geojson.Object, fields []float64, updated bool, ok bool,
|
|
|
|
) {
|
2020-10-28 01:29:50 +03:00
|
|
|
itemV := c.items.Get(&itemT{id: id})
|
|
|
|
if itemV == nil {
|
2018-10-11 00:25:40 +03:00
|
|
|
return nil, nil, false, false
|
|
|
|
}
|
|
|
|
item := itemV.(*itemT)
|
2020-03-25 03:25:54 +03:00
|
|
|
_, updateCount, weightDelta := c.setFieldValues(item, []string{field}, []float64{value})
|
|
|
|
c.weight += weightDelta
|
|
|
|
return item.obj, c.fieldValues.get(item.fieldValuesSlot), updateCount > 0, true
|
2018-10-11 00:25:40 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// SetFields is similar to SetField, just setting multiple fields at once
|
|
|
|
func (c *Collection) SetFields(
|
|
|
|
id string, inFields []string, inValues []float64,
|
|
|
|
) (obj geojson.Object, fields []float64, updatedCount int, ok bool) {
|
2020-10-28 01:29:50 +03:00
|
|
|
itemV := c.items.Get(&itemT{id: id})
|
|
|
|
if itemV == nil {
|
2018-10-11 00:25:40 +03:00
|
|
|
return nil, nil, 0, false
|
|
|
|
}
|
|
|
|
item := itemV.(*itemT)
|
2020-03-25 03:25:54 +03:00
|
|
|
newFieldValues, updateCount, weightDelta := c.setFieldValues(item, inFields, inValues)
|
|
|
|
c.weight += weightDelta
|
|
|
|
return item.obj, newFieldValues, updateCount, true
|
2018-10-11 00:25:40 +03:00
|
|
|
}
|
|
|
|
|
2020-03-25 03:25:54 +03:00
|
|
|
func (c *Collection) setFieldValues(item *itemT, fields []string, updateValues []float64) (
|
|
|
|
newValues []float64,
|
|
|
|
updated int,
|
|
|
|
weightDelta int,
|
2018-10-11 00:25:40 +03:00
|
|
|
) {
|
2020-03-25 03:25:54 +03:00
|
|
|
newValues = c.fieldValues.get(item.fieldValuesSlot)
|
|
|
|
for i, field := range fields {
|
|
|
|
fieldIdx, ok := c.fieldMap[field]
|
|
|
|
if !ok {
|
|
|
|
fieldIdx = len(c.fieldMap)
|
|
|
|
c.fieldMap[field] = fieldIdx
|
|
|
|
c.addToFieldArr(field)
|
|
|
|
}
|
|
|
|
for fieldIdx >= len(newValues) {
|
|
|
|
newValues = append(newValues, 0)
|
|
|
|
weightDelta += 8
|
|
|
|
}
|
|
|
|
ovalue := newValues[fieldIdx]
|
|
|
|
nvalue := updateValues[i]
|
|
|
|
newValues[fieldIdx] = nvalue
|
|
|
|
if ovalue != nvalue {
|
|
|
|
updated++
|
|
|
|
}
|
2018-10-11 00:25:40 +03:00
|
|
|
}
|
2020-03-25 03:25:54 +03:00
|
|
|
newSlot := c.fieldValues.set(item.fieldValuesSlot, newValues)
|
|
|
|
item.fieldValuesSlot = newSlot
|
|
|
|
return newValues, updated, weightDelta
|
2018-10-11 00:25:40 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// FieldMap return a maps of the field names.
|
|
|
|
func (c *Collection) FieldMap() map[string]int {
|
|
|
|
return c.fieldMap
|
|
|
|
}
|
|
|
|
|
|
|
|
// FieldArr return an array representation of the field names.
|
|
|
|
func (c *Collection) FieldArr() []string {
|
2020-03-22 17:58:03 +03:00
|
|
|
return c.fieldArr
|
|
|
|
}
|
|
|
|
|
|
|
|
// bsearch searches array for value.
|
|
|
|
func bsearch(arr []string, val string) (index int, found bool) {
|
|
|
|
i, j := 0, len(arr)
|
|
|
|
for i < j {
|
|
|
|
h := i + (j-i)/2
|
|
|
|
if val >= arr[h] {
|
|
|
|
i = h + 1
|
|
|
|
} else {
|
|
|
|
j = h
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if i > 0 && arr[i-1] >= val {
|
|
|
|
return i - 1, true
|
|
|
|
}
|
|
|
|
return i, false
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *Collection) addToFieldArr(field string) {
|
|
|
|
if index, found := bsearch(c.fieldArr, field); !found {
|
|
|
|
c.fieldArr = append(c.fieldArr, "")
|
|
|
|
copy(c.fieldArr[index+1:], c.fieldArr[index:len(c.fieldArr)-1])
|
|
|
|
c.fieldArr[index] = field
|
2018-10-11 00:25:40 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Scan iterates though the collection ids.
|
2019-04-24 15:09:41 +03:00
|
|
|
func (c *Collection) Scan(
|
|
|
|
desc bool,
|
|
|
|
cursor Cursor,
|
|
|
|
deadline *deadline.Deadline,
|
2018-10-11 00:25:40 +03:00
|
|
|
iterator func(id string, obj geojson.Object, fields []float64) bool,
|
|
|
|
) bool {
|
|
|
|
var keepon = true
|
2018-11-01 08:00:09 +03:00
|
|
|
var count uint64
|
2018-11-02 16:09:56 +03:00
|
|
|
var offset uint64
|
|
|
|
if cursor != nil {
|
|
|
|
offset = cursor.Offset()
|
|
|
|
cursor.Step(offset)
|
|
|
|
}
|
2020-10-28 01:29:50 +03:00
|
|
|
iter := func(item interface{}) bool {
|
2018-11-01 08:00:09 +03:00
|
|
|
count++
|
|
|
|
if count <= offset {
|
|
|
|
return true
|
|
|
|
}
|
2019-04-24 15:09:41 +03:00
|
|
|
nextStep(count, cursor, deadline)
|
2021-07-11 05:29:25 +03:00
|
|
|
iitm := item.(*itemT)
|
2020-03-25 03:25:54 +03:00
|
|
|
keepon = iterator(iitm.id, iitm.obj, c.fieldValues.get(iitm.fieldValuesSlot))
|
2018-10-11 00:25:40 +03:00
|
|
|
return keepon
|
|
|
|
}
|
|
|
|
if desc {
|
2020-10-28 01:29:50 +03:00
|
|
|
c.items.Descend(nil, iter)
|
2018-10-11 00:25:40 +03:00
|
|
|
} else {
|
2020-10-28 01:29:50 +03:00
|
|
|
c.items.Ascend(nil, iter)
|
2018-10-11 00:25:40 +03:00
|
|
|
}
|
|
|
|
return keepon
|
|
|
|
}
|
|
|
|
|
|
|
|
// ScanRange iterates though the collection starting with specified id.
|
2019-04-24 15:09:41 +03:00
|
|
|
func (c *Collection) ScanRange(
|
|
|
|
start, end string,
|
|
|
|
desc bool,
|
|
|
|
cursor Cursor,
|
|
|
|
deadline *deadline.Deadline,
|
2018-10-11 00:25:40 +03:00
|
|
|
iterator func(id string, obj geojson.Object, fields []float64) bool,
|
|
|
|
) bool {
|
|
|
|
var keepon = true
|
2018-11-01 08:00:09 +03:00
|
|
|
var count uint64
|
2018-11-02 16:09:56 +03:00
|
|
|
var offset uint64
|
|
|
|
if cursor != nil {
|
|
|
|
offset = cursor.Offset()
|
|
|
|
cursor.Step(offset)
|
|
|
|
}
|
2020-10-28 01:29:50 +03:00
|
|
|
iter := func(value interface{}) bool {
|
|
|
|
item := value.(*itemT)
|
2018-11-01 08:00:09 +03:00
|
|
|
count++
|
|
|
|
if count <= offset {
|
|
|
|
return true
|
|
|
|
}
|
2019-04-24 15:09:41 +03:00
|
|
|
nextStep(count, cursor, deadline)
|
2018-10-11 00:25:40 +03:00
|
|
|
if !desc {
|
2020-10-28 01:29:50 +03:00
|
|
|
if item.id >= end {
|
2018-10-11 00:25:40 +03:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
} else {
|
2020-10-28 01:29:50 +03:00
|
|
|
if item.id <= end {
|
2018-10-11 00:25:40 +03:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
iitm := value.(*itemT)
|
2020-03-25 03:25:54 +03:00
|
|
|
keepon = iterator(iitm.id, iitm.obj, c.fieldValues.get(iitm.fieldValuesSlot))
|
2018-10-11 00:25:40 +03:00
|
|
|
return keepon
|
|
|
|
}
|
|
|
|
|
|
|
|
if desc {
|
2020-10-28 01:29:50 +03:00
|
|
|
c.items.Descend(&itemT{id: start}, iter)
|
2018-10-11 00:25:40 +03:00
|
|
|
} else {
|
2020-10-28 01:29:50 +03:00
|
|
|
c.items.Ascend(&itemT{id: start}, iter)
|
2018-10-11 00:25:40 +03:00
|
|
|
}
|
|
|
|
return keepon
|
|
|
|
}
|
|
|
|
|
|
|
|
// SearchValues iterates though the collection values.
|
2019-04-24 15:09:41 +03:00
|
|
|
func (c *Collection) SearchValues(
|
|
|
|
desc bool,
|
|
|
|
cursor Cursor,
|
|
|
|
deadline *deadline.Deadline,
|
2018-10-11 00:25:40 +03:00
|
|
|
iterator func(id string, obj geojson.Object, fields []float64) bool,
|
|
|
|
) bool {
|
|
|
|
var keepon = true
|
2018-11-01 08:00:09 +03:00
|
|
|
var count uint64
|
2018-11-02 16:09:56 +03:00
|
|
|
var offset uint64
|
|
|
|
if cursor != nil {
|
|
|
|
offset = cursor.Offset()
|
|
|
|
cursor.Step(offset)
|
|
|
|
}
|
2020-10-28 01:29:50 +03:00
|
|
|
iter := func(item interface{}) bool {
|
2018-11-01 08:00:09 +03:00
|
|
|
count++
|
|
|
|
if count <= offset {
|
|
|
|
return true
|
|
|
|
}
|
2019-04-24 15:09:41 +03:00
|
|
|
nextStep(count, cursor, deadline)
|
2018-10-11 00:25:40 +03:00
|
|
|
iitm := item.(*itemT)
|
2020-03-25 03:25:54 +03:00
|
|
|
keepon = iterator(iitm.id, iitm.obj, c.fieldValues.get(iitm.fieldValuesSlot))
|
2018-10-11 00:25:40 +03:00
|
|
|
return keepon
|
|
|
|
}
|
|
|
|
if desc {
|
2020-10-28 01:29:50 +03:00
|
|
|
c.values.Descend(nil, iter)
|
2018-10-11 00:25:40 +03:00
|
|
|
} else {
|
2020-10-28 01:29:50 +03:00
|
|
|
c.values.Ascend(nil, iter)
|
2018-10-11 00:25:40 +03:00
|
|
|
}
|
|
|
|
return keepon
|
|
|
|
}
|
|
|
|
|
|
|
|
// SearchValuesRange iterates though the collection values.
|
|
|
|
func (c *Collection) SearchValuesRange(start, end string, desc bool,
|
2018-11-02 16:09:56 +03:00
|
|
|
cursor Cursor,
|
2019-04-24 15:09:41 +03:00
|
|
|
deadline *deadline.Deadline,
|
2018-10-11 00:25:40 +03:00
|
|
|
iterator func(id string, obj geojson.Object, fields []float64) bool,
|
|
|
|
) bool {
|
|
|
|
var keepon = true
|
2018-11-01 08:00:09 +03:00
|
|
|
var count uint64
|
2018-11-02 16:09:56 +03:00
|
|
|
var offset uint64
|
|
|
|
if cursor != nil {
|
|
|
|
offset = cursor.Offset()
|
|
|
|
cursor.Step(offset)
|
|
|
|
}
|
2020-10-28 01:29:50 +03:00
|
|
|
iter := func(item interface{}) bool {
|
2018-11-01 08:00:09 +03:00
|
|
|
count++
|
|
|
|
if count <= offset {
|
|
|
|
return true
|
|
|
|
}
|
2019-04-24 15:09:41 +03:00
|
|
|
nextStep(count, cursor, deadline)
|
2018-10-11 00:25:40 +03:00
|
|
|
iitm := item.(*itemT)
|
2020-03-25 03:25:54 +03:00
|
|
|
keepon = iterator(iitm.id, iitm.obj, c.fieldValues.get(iitm.fieldValuesSlot))
|
2018-10-11 00:25:40 +03:00
|
|
|
return keepon
|
|
|
|
}
|
2020-10-28 01:29:50 +03:00
|
|
|
pstart := &itemT{obj: String(start)}
|
|
|
|
pend := &itemT{obj: String(end)}
|
2018-10-11 00:25:40 +03:00
|
|
|
if desc {
|
2020-10-28 01:29:50 +03:00
|
|
|
// descend range
|
|
|
|
c.values.Descend(pstart, func(item interface{}) bool {
|
|
|
|
return bGT(c.values, item, pend) && iter(item)
|
|
|
|
})
|
2018-10-11 00:25:40 +03:00
|
|
|
} else {
|
2020-10-28 01:29:50 +03:00
|
|
|
c.values.Ascend(pstart, func(item interface{}) bool {
|
|
|
|
return bLT(c.values, item, pend) && iter(item)
|
|
|
|
})
|
2018-10-11 00:25:40 +03:00
|
|
|
}
|
|
|
|
return keepon
|
|
|
|
}
|
|
|
|
|
2020-10-28 01:29:50 +03:00
|
|
|
func bLT(tr *btree.BTree, a, b interface{}) bool { return tr.Less(a, b) }
|
|
|
|
func bGT(tr *btree.BTree, a, b interface{}) bool { return tr.Less(b, a) }
|
|
|
|
|
2018-10-11 00:25:40 +03:00
|
|
|
// ScanGreaterOrEqual iterates though the collection starting with specified id.
|
|
|
|
func (c *Collection) ScanGreaterOrEqual(id string, desc bool,
|
2018-11-02 16:09:56 +03:00
|
|
|
cursor Cursor,
|
2019-04-24 15:09:41 +03:00
|
|
|
deadline *deadline.Deadline,
|
Update expiration logic
This commit changes the logic for managing the expiration of
objects in the database.
Before: There was a server-wide hashmap that stored the
collection key, id, and expiration timestamp for all objects
that had a TTL. The hashmap was occasionally probed at 20
random positions, looking for objects that have expired. Those
expired objects were immediately deleted, and if there was 5
or more objects deleted, then the probe happened again, with
no delay. If the number of objects was less than 5 then the
there was a 1/10th of a second delay before the next probe.
Now: Rather than a server-wide hashmap, each collection has
its own ordered priority queue that stores objects with TTLs.
Rather than probing, there is a background routine that
executes every 1/10th of a second, which pops the expired
objects from the collection queues, and deletes them.
The collection/queue method is a more stable approach than
the hashmap/probing method. With probing, we can run into
major cache misses for some cases where there is wide
TTL duration, such as in the hours or days. This may cause
the system to occasionally fall behind, leaving should-be
expired objects in memory. Using a queue, there is no
cache misses, all objects that should be expired will be
right away, regardless of the TTL durations.
Fixes #616
2021-07-12 23:37:50 +03:00
|
|
|
iterator func(id string, obj geojson.Object, fields []float64, ex int64) bool,
|
2018-10-11 00:25:40 +03:00
|
|
|
) bool {
|
|
|
|
var keepon = true
|
2018-11-02 16:09:56 +03:00
|
|
|
var count uint64
|
|
|
|
var offset uint64
|
|
|
|
if cursor != nil {
|
|
|
|
offset = cursor.Offset()
|
|
|
|
cursor.Step(offset)
|
|
|
|
}
|
Update expiration logic
This commit changes the logic for managing the expiration of
objects in the database.
Before: There was a server-wide hashmap that stored the
collection key, id, and expiration timestamp for all objects
that had a TTL. The hashmap was occasionally probed at 20
random positions, looking for objects that have expired. Those
expired objects were immediately deleted, and if there was 5
or more objects deleted, then the probe happened again, with
no delay. If the number of objects was less than 5 then the
there was a 1/10th of a second delay before the next probe.
Now: Rather than a server-wide hashmap, each collection has
its own ordered priority queue that stores objects with TTLs.
Rather than probing, there is a background routine that
executes every 1/10th of a second, which pops the expired
objects from the collection queues, and deletes them.
The collection/queue method is a more stable approach than
the hashmap/probing method. With probing, we can run into
major cache misses for some cases where there is wide
TTL duration, such as in the hours or days. This may cause
the system to occasionally fall behind, leaving should-be
expired objects in memory. Using a queue, there is no
cache misses, all objects that should be expired will be
right away, regardless of the TTL durations.
Fixes #616
2021-07-12 23:37:50 +03:00
|
|
|
iter := func(v interface{}) bool {
|
2018-11-02 16:09:56 +03:00
|
|
|
count++
|
|
|
|
if count <= offset {
|
|
|
|
return true
|
|
|
|
}
|
2019-04-24 15:09:41 +03:00
|
|
|
nextStep(count, cursor, deadline)
|
Update expiration logic
This commit changes the logic for managing the expiration of
objects in the database.
Before: There was a server-wide hashmap that stored the
collection key, id, and expiration timestamp for all objects
that had a TTL. The hashmap was occasionally probed at 20
random positions, looking for objects that have expired. Those
expired objects were immediately deleted, and if there was 5
or more objects deleted, then the probe happened again, with
no delay. If the number of objects was less than 5 then the
there was a 1/10th of a second delay before the next probe.
Now: Rather than a server-wide hashmap, each collection has
its own ordered priority queue that stores objects with TTLs.
Rather than probing, there is a background routine that
executes every 1/10th of a second, which pops the expired
objects from the collection queues, and deletes them.
The collection/queue method is a more stable approach than
the hashmap/probing method. With probing, we can run into
major cache misses for some cases where there is wide
TTL duration, such as in the hours or days. This may cause
the system to occasionally fall behind, leaving should-be
expired objects in memory. Using a queue, there is no
cache misses, all objects that should be expired will be
right away, regardless of the TTL durations.
Fixes #616
2021-07-12 23:37:50 +03:00
|
|
|
item := v.(*itemT)
|
|
|
|
keepon = iterator(item.id, item.obj, c.fieldValues.get(item.fieldValuesSlot), item.expires)
|
2018-10-11 00:25:40 +03:00
|
|
|
return keepon
|
|
|
|
}
|
|
|
|
if desc {
|
2020-10-28 01:29:50 +03:00
|
|
|
c.items.Descend(&itemT{id: id}, iter)
|
2018-10-11 00:25:40 +03:00
|
|
|
} else {
|
2020-10-28 01:29:50 +03:00
|
|
|
c.items.Ascend(&itemT{id: id}, iter)
|
2018-10-11 00:25:40 +03:00
|
|
|
}
|
|
|
|
return keepon
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *Collection) geoSearch(
|
|
|
|
rect geometry.Rect,
|
|
|
|
iter func(id string, obj geojson.Object, fields []float64) bool,
|
|
|
|
) bool {
|
|
|
|
alive := true
|
|
|
|
c.index.Search(
|
2019-09-13 04:42:53 +03:00
|
|
|
[2]float64{rect.Min.X, rect.Min.Y},
|
|
|
|
[2]float64{rect.Max.X, rect.Max.Y},
|
|
|
|
func(_, _ [2]float64, itemv interface{}) bool {
|
2018-10-11 00:25:40 +03:00
|
|
|
item := itemv.(*itemT)
|
2020-03-25 03:25:54 +03:00
|
|
|
alive = iter(item.id, item.obj, c.fieldValues.get(item.fieldValuesSlot))
|
2018-10-11 00:25:40 +03:00
|
|
|
return alive
|
|
|
|
},
|
|
|
|
)
|
|
|
|
return alive
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *Collection) geoSparse(
|
|
|
|
obj geojson.Object, sparse uint8,
|
|
|
|
iter func(id string, obj geojson.Object, fields []float64) (match, ok bool),
|
|
|
|
) bool {
|
|
|
|
matches := make(map[string]bool)
|
|
|
|
alive := true
|
|
|
|
c.geoSparseInner(obj.Rect(), sparse,
|
|
|
|
func(id string, o geojson.Object, fields []float64) (
|
|
|
|
match, ok bool,
|
|
|
|
) {
|
|
|
|
ok = true
|
|
|
|
if !matches[id] {
|
|
|
|
match, ok = iter(id, o, fields)
|
|
|
|
if match {
|
|
|
|
matches[id] = true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return match, ok
|
|
|
|
},
|
|
|
|
)
|
|
|
|
return alive
|
|
|
|
}
|
|
|
|
func (c *Collection) geoSparseInner(
|
|
|
|
rect geometry.Rect, sparse uint8,
|
|
|
|
iter func(id string, obj geojson.Object, fields []float64) (match, ok bool),
|
|
|
|
) bool {
|
|
|
|
if sparse > 0 {
|
|
|
|
w := rect.Max.X - rect.Min.X
|
|
|
|
h := rect.Max.Y - rect.Min.Y
|
|
|
|
quads := [4]geometry.Rect{
|
2021-02-04 00:30:55 +03:00
|
|
|
{
|
2018-10-11 00:25:40 +03:00
|
|
|
Min: geometry.Point{X: rect.Min.X, Y: rect.Min.Y + h/2},
|
|
|
|
Max: geometry.Point{X: rect.Min.X + w/2, Y: rect.Max.Y},
|
|
|
|
},
|
2021-02-04 00:30:55 +03:00
|
|
|
{
|
2018-10-11 00:25:40 +03:00
|
|
|
Min: geometry.Point{X: rect.Min.X + w/2, Y: rect.Min.Y + h/2},
|
|
|
|
Max: geometry.Point{X: rect.Max.X, Y: rect.Max.Y},
|
|
|
|
},
|
2021-02-04 00:30:55 +03:00
|
|
|
{
|
2018-10-11 00:25:40 +03:00
|
|
|
Min: geometry.Point{X: rect.Min.X, Y: rect.Min.Y},
|
|
|
|
Max: geometry.Point{X: rect.Min.X + w/2, Y: rect.Min.Y + h/2},
|
|
|
|
},
|
2021-02-04 00:30:55 +03:00
|
|
|
{
|
2018-10-11 00:25:40 +03:00
|
|
|
Min: geometry.Point{X: rect.Min.X + w/2, Y: rect.Min.Y},
|
|
|
|
Max: geometry.Point{X: rect.Max.X, Y: rect.Min.Y + h/2},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
for _, quad := range quads {
|
|
|
|
if !c.geoSparseInner(quad, sparse-1, iter) {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
alive := true
|
|
|
|
c.geoSearch(rect,
|
|
|
|
func(id string, obj geojson.Object, fields []float64) bool {
|
|
|
|
match, ok := iter(id, obj, fields)
|
|
|
|
if !ok {
|
|
|
|
alive = false
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
return !match
|
|
|
|
},
|
|
|
|
)
|
|
|
|
return alive
|
|
|
|
}
|
|
|
|
|
|
|
|
// Within returns all object that are fully contained within an object or
|
|
|
|
// bounding box. Set obj to nil in order to use the bounding box.
|
|
|
|
func (c *Collection) Within(
|
2018-11-01 01:01:55 +03:00
|
|
|
obj geojson.Object,
|
|
|
|
sparse uint8,
|
2018-11-02 16:09:56 +03:00
|
|
|
cursor Cursor,
|
2019-04-24 15:09:41 +03:00
|
|
|
deadline *deadline.Deadline,
|
2018-10-11 00:25:40 +03:00
|
|
|
iter func(id string, obj geojson.Object, fields []float64) bool,
|
|
|
|
) bool {
|
2018-11-01 01:01:55 +03:00
|
|
|
var count uint64
|
2018-11-02 16:09:56 +03:00
|
|
|
var offset uint64
|
|
|
|
if cursor != nil {
|
|
|
|
offset = cursor.Offset()
|
|
|
|
cursor.Step(offset)
|
|
|
|
}
|
2018-10-11 00:25:40 +03:00
|
|
|
if sparse > 0 {
|
|
|
|
return c.geoSparse(obj, sparse,
|
|
|
|
func(id string, o geojson.Object, fields []float64) (
|
|
|
|
match, ok bool,
|
|
|
|
) {
|
2018-11-01 01:01:55 +03:00
|
|
|
count++
|
|
|
|
if count <= offset {
|
|
|
|
return false, true
|
|
|
|
}
|
2019-04-24 15:09:41 +03:00
|
|
|
nextStep(count, cursor, deadline)
|
2018-10-11 00:25:40 +03:00
|
|
|
if match = o.Within(obj); match {
|
|
|
|
ok = iter(id, o, fields)
|
|
|
|
}
|
|
|
|
return match, ok
|
|
|
|
},
|
|
|
|
)
|
|
|
|
}
|
|
|
|
return c.geoSearch(obj.Rect(),
|
|
|
|
func(id string, o geojson.Object, fields []float64) bool {
|
2018-11-01 01:01:55 +03:00
|
|
|
count++
|
|
|
|
if count <= offset {
|
|
|
|
return true
|
|
|
|
}
|
2019-04-24 15:09:41 +03:00
|
|
|
nextStep(count, cursor, deadline)
|
2018-10-11 00:25:40 +03:00
|
|
|
if o.Within(obj) {
|
|
|
|
return iter(id, o, fields)
|
|
|
|
}
|
|
|
|
return true
|
|
|
|
},
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Intersects returns all object that are intersect an object or bounding box.
|
|
|
|
// Set obj to nil in order to use the bounding box.
|
|
|
|
func (c *Collection) Intersects(
|
2018-11-01 01:01:55 +03:00
|
|
|
obj geojson.Object,
|
|
|
|
sparse uint8,
|
2018-11-02 16:09:56 +03:00
|
|
|
cursor Cursor,
|
2019-04-24 15:09:41 +03:00
|
|
|
deadline *deadline.Deadline,
|
2018-10-11 00:25:40 +03:00
|
|
|
iter func(id string, obj geojson.Object, fields []float64) bool,
|
|
|
|
) bool {
|
2018-11-01 01:01:55 +03:00
|
|
|
var count uint64
|
2018-11-02 16:09:56 +03:00
|
|
|
var offset uint64
|
|
|
|
if cursor != nil {
|
|
|
|
offset = cursor.Offset()
|
|
|
|
cursor.Step(offset)
|
|
|
|
}
|
2018-10-11 00:25:40 +03:00
|
|
|
if sparse > 0 {
|
|
|
|
return c.geoSparse(obj, sparse,
|
|
|
|
func(id string, o geojson.Object, fields []float64) (
|
|
|
|
match, ok bool,
|
|
|
|
) {
|
2018-11-01 01:01:55 +03:00
|
|
|
count++
|
|
|
|
if count <= offset {
|
|
|
|
return false, true
|
|
|
|
}
|
2019-04-24 15:09:41 +03:00
|
|
|
nextStep(count, cursor, deadline)
|
2018-10-11 00:25:40 +03:00
|
|
|
if match = o.Intersects(obj); match {
|
|
|
|
ok = iter(id, o, fields)
|
|
|
|
}
|
|
|
|
return match, ok
|
|
|
|
},
|
|
|
|
)
|
|
|
|
}
|
|
|
|
return c.geoSearch(obj.Rect(),
|
|
|
|
func(id string, o geojson.Object, fields []float64) bool {
|
2018-11-01 01:01:55 +03:00
|
|
|
count++
|
|
|
|
if count <= offset {
|
|
|
|
return true
|
|
|
|
}
|
2019-04-24 15:09:41 +03:00
|
|
|
nextStep(count, cursor, deadline)
|
2018-10-11 00:25:40 +03:00
|
|
|
if o.Intersects(obj) {
|
|
|
|
return iter(id, o, fields)
|
|
|
|
}
|
|
|
|
return true
|
|
|
|
},
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Nearby returns the nearest neighbors
|
|
|
|
func (c *Collection) Nearby(
|
|
|
|
target geojson.Object,
|
2018-11-02 16:09:56 +03:00
|
|
|
cursor Cursor,
|
2019-04-24 15:09:41 +03:00
|
|
|
deadline *deadline.Deadline,
|
2020-04-08 06:10:58 +03:00
|
|
|
iter func(id string, obj geojson.Object, fields []float64, dist float64) bool,
|
2018-10-11 00:25:40 +03:00
|
|
|
) bool {
|
2018-11-11 16:26:23 +03:00
|
|
|
// First look to see if there's at least one candidate in the circle's
|
|
|
|
// outer rectangle. This is a fast-fail operation.
|
2018-11-06 13:40:52 +03:00
|
|
|
if circle, ok := target.(*geojson.Circle); ok {
|
|
|
|
meters := circle.Meters()
|
|
|
|
if meters > 0 {
|
|
|
|
center := circle.Center()
|
|
|
|
minLat, minLon, maxLat, maxLon :=
|
|
|
|
geo.RectFromCenter(center.Y, center.X, meters)
|
|
|
|
var exists bool
|
|
|
|
c.index.Search(
|
2019-09-13 04:42:53 +03:00
|
|
|
[2]float64{minLon, minLat},
|
|
|
|
[2]float64{maxLon, maxLat},
|
|
|
|
func(_, _ [2]float64, itemv interface{}) bool {
|
2018-11-06 13:40:52 +03:00
|
|
|
exists = true
|
|
|
|
return false
|
|
|
|
},
|
|
|
|
)
|
|
|
|
if !exists {
|
2018-11-11 16:26:23 +03:00
|
|
|
// no candidates
|
2018-11-06 13:40:52 +03:00
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2018-11-11 16:26:23 +03:00
|
|
|
// do the kNN operation
|
2018-10-11 00:25:40 +03:00
|
|
|
alive := true
|
|
|
|
center := target.Center()
|
2018-11-01 01:01:55 +03:00
|
|
|
var count uint64
|
2018-11-02 16:09:56 +03:00
|
|
|
var offset uint64
|
|
|
|
if cursor != nil {
|
|
|
|
offset = cursor.Offset()
|
|
|
|
cursor.Step(offset)
|
|
|
|
}
|
2018-10-11 00:25:40 +03:00
|
|
|
c.index.Nearby(
|
2020-04-08 06:10:58 +03:00
|
|
|
geodeticDistAlgo([2]float64{center.X, center.Y}),
|
|
|
|
func(_, _ [2]float64, itemv interface{}, dist float64) bool {
|
2018-11-01 01:01:55 +03:00
|
|
|
count++
|
|
|
|
if count <= offset {
|
|
|
|
return true
|
|
|
|
}
|
2019-04-24 15:09:41 +03:00
|
|
|
nextStep(count, cursor, deadline)
|
2018-10-11 00:25:40 +03:00
|
|
|
item := itemv.(*itemT)
|
2021-07-11 20:09:51 +03:00
|
|
|
alive = iter(item.id, item.obj, c.fieldValues.get(item.fieldValuesSlot), dist)
|
2018-10-11 00:25:40 +03:00
|
|
|
return alive
|
|
|
|
},
|
|
|
|
)
|
|
|
|
return alive
|
|
|
|
}
|
2019-04-24 15:09:41 +03:00
|
|
|
|
|
|
|
func nextStep(step uint64, cursor Cursor, deadline *deadline.Deadline) {
|
2021-04-28 15:10:18 +03:00
|
|
|
if step&(yieldStep-1) == (yieldStep - 1) {
|
2019-04-24 15:09:41 +03:00
|
|
|
runtime.Gosched()
|
|
|
|
deadline.Check()
|
|
|
|
}
|
|
|
|
if cursor != nil {
|
|
|
|
cursor.Step(1)
|
|
|
|
}
|
|
|
|
}
|
Update expiration logic
This commit changes the logic for managing the expiration of
objects in the database.
Before: There was a server-wide hashmap that stored the
collection key, id, and expiration timestamp for all objects
that had a TTL. The hashmap was occasionally probed at 20
random positions, looking for objects that have expired. Those
expired objects were immediately deleted, and if there was 5
or more objects deleted, then the probe happened again, with
no delay. If the number of objects was less than 5 then the
there was a 1/10th of a second delay before the next probe.
Now: Rather than a server-wide hashmap, each collection has
its own ordered priority queue that stores objects with TTLs.
Rather than probing, there is a background routine that
executes every 1/10th of a second, which pops the expired
objects from the collection queues, and deletes them.
The collection/queue method is a more stable approach than
the hashmap/probing method. With probing, we can run into
major cache misses for some cases where there is wide
TTL duration, such as in the hours or days. This may cause
the system to occasionally fall behind, leaving should-be
expired objects in memory. Using a queue, there is no
cache misses, all objects that should be expired will be
right away, regardless of the TTL durations.
Fixes #616
2021-07-12 23:37:50 +03:00
|
|
|
|
|
|
|
type Expired struct {
|
|
|
|
ID string
|
|
|
|
Obj geojson.Object
|
|
|
|
Fields []float64
|
|
|
|
}
|
|
|
|
|
|
|
|
// Expired returns a list of all objects that have expired.
|
|
|
|
func (c *Collection) Expired(now int64, buffer []string) (ids []string) {
|
|
|
|
ids = buffer[:0]
|
|
|
|
c.expires.Ascend(nil, func(v interface{}) bool {
|
|
|
|
item := v.(*itemT)
|
|
|
|
if now < item.expires {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
ids = append(ids, item.id)
|
|
|
|
return true
|
|
|
|
})
|
|
|
|
return ids
|
|
|
|
}
|