2018-10-11 00:25:40 +03:00
|
|
|
package collection
|
|
|
|
|
|
|
|
import (
|
2019-03-05 21:33:37 +03:00
|
|
|
"runtime"
|
|
|
|
|
2018-10-11 00:25:40 +03:00
|
|
|
"github.com/tidwall/btree"
|
|
|
|
"github.com/tidwall/geojson"
|
|
|
|
"github.com/tidwall/geojson/geometry"
|
2021-02-08 03:54:56 +03:00
|
|
|
"github.com/tidwall/rtree"
|
2019-04-24 15:09:41 +03:00
|
|
|
"github.com/tidwall/tile38/internal/deadline"
|
2022-09-20 03:47:38 +03:00
|
|
|
"github.com/tidwall/tile38/internal/field"
|
2022-09-21 00:20:53 +03:00
|
|
|
"github.com/tidwall/tile38/internal/object"
|
2018-10-11 00:25:40 +03:00
|
|
|
)
|
|
|
|
|
2021-04-28 15:10:18 +03:00
|
|
|
// yieldStep forces the iterator to yield goroutine every 256 steps.
|
|
|
|
const yieldStep = 256
|
2019-03-05 21:33:37 +03:00
|
|
|
|
2018-11-02 16:09:56 +03:00
|
|
|
// Cursor allows for quickly paging through Scan, Within, Intersects, and Nearby
|
|
|
|
type Cursor interface {
|
|
|
|
Offset() uint64
|
|
|
|
Step(count uint64)
|
|
|
|
}
|
|
|
|
|
2022-09-21 00:20:53 +03:00
|
|
|
func byID(a, b *object.Object) bool {
|
|
|
|
return a.ID() < b.ID()
|
2018-10-11 00:25:40 +03:00
|
|
|
}
|
|
|
|
|
2022-09-21 00:20:53 +03:00
|
|
|
func byValue(a, b *object.Object) bool {
|
|
|
|
value1 := a.String()
|
|
|
|
value2 := b.String()
|
2018-10-11 00:25:40 +03:00
|
|
|
if value1 < value2 {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
if value1 > value2 {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
// the values match so we'll compare IDs, which are always unique.
|
2020-10-28 01:29:50 +03:00
|
|
|
return byID(a, b)
|
2018-10-11 00:25:40 +03:00
|
|
|
}
|
|
|
|
|
2022-09-21 00:20:53 +03:00
|
|
|
func byExpires(a, b *object.Object) bool {
|
|
|
|
if a.Expires() < b.Expires() {
|
Update expiration logic
This commit changes the logic for managing the expiration of
objects in the database.
Before: There was a server-wide hashmap that stored the
collection key, id, and expiration timestamp for all objects
that had a TTL. The hashmap was occasionally probed at 20
random positions, looking for objects that have expired. Those
expired objects were immediately deleted, and if there was 5
or more objects deleted, then the probe happened again, with
no delay. If the number of objects was less than 5 then the
there was a 1/10th of a second delay before the next probe.
Now: Rather than a server-wide hashmap, each collection has
its own ordered priority queue that stores objects with TTLs.
Rather than probing, there is a background routine that
executes every 1/10th of a second, which pops the expired
objects from the collection queues, and deletes them.
The collection/queue method is a more stable approach than
the hashmap/probing method. With probing, we can run into
major cache misses for some cases where there is wide
TTL duration, such as in the hours or days. This may cause
the system to occasionally fall behind, leaving should-be
expired objects in memory. Using a queue, there is no
cache misses, all objects that should be expired will be
right away, regardless of the TTL durations.
Fixes #616
2021-07-12 23:37:50 +03:00
|
|
|
return true
|
|
|
|
}
|
2022-09-21 00:20:53 +03:00
|
|
|
if a.Expires() > b.Expires() {
|
Update expiration logic
This commit changes the logic for managing the expiration of
objects in the database.
Before: There was a server-wide hashmap that stored the
collection key, id, and expiration timestamp for all objects
that had a TTL. The hashmap was occasionally probed at 20
random positions, looking for objects that have expired. Those
expired objects were immediately deleted, and if there was 5
or more objects deleted, then the probe happened again, with
no delay. If the number of objects was less than 5 then the
there was a 1/10th of a second delay before the next probe.
Now: Rather than a server-wide hashmap, each collection has
its own ordered priority queue that stores objects with TTLs.
Rather than probing, there is a background routine that
executes every 1/10th of a second, which pops the expired
objects from the collection queues, and deletes them.
The collection/queue method is a more stable approach than
the hashmap/probing method. With probing, we can run into
major cache misses for some cases where there is wide
TTL duration, such as in the hours or days. This may cause
the system to occasionally fall behind, leaving should-be
expired objects in memory. Using a queue, there is no
cache misses, all objects that should be expired will be
right away, regardless of the TTL durations.
Fixes #616
2021-07-12 23:37:50 +03:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
// the values match so we'll compare IDs, which are always unique.
|
|
|
|
return byID(a, b)
|
|
|
|
}
|
|
|
|
|
2018-10-11 00:25:40 +03:00
|
|
|
// Collection represents a collection of geojson objects.
|
|
|
|
type Collection struct {
|
2022-09-23 00:22:45 +03:00
|
|
|
objs btree.Map[string, *object.Object] // sorted by id
|
|
|
|
spatial rtree.RTreeGN[float32, *object.Object] // geospatially indexed
|
|
|
|
values *btree.BTreeG[*object.Object] // sorted by value+id
|
|
|
|
expires *btree.BTreeG[*object.Object] // sorted by ex+id
|
2022-09-20 03:47:38 +03:00
|
|
|
weight int
|
|
|
|
points int
|
|
|
|
objects int // geometry count
|
|
|
|
nobjects int // non-geometry count
|
2018-10-11 00:25:40 +03:00
|
|
|
}
|
|
|
|
|
2022-09-12 19:12:51 +03:00
|
|
|
var optsNoLock = btree.Options{NoLocks: true}
|
|
|
|
|
2018-10-11 00:25:40 +03:00
|
|
|
// New creates an empty collection
|
|
|
|
func New() *Collection {
|
|
|
|
col := &Collection{
|
2022-09-20 03:47:38 +03:00
|
|
|
values: btree.NewBTreeGOptions(byValue, optsNoLock),
|
|
|
|
expires: btree.NewBTreeGOptions(byExpires, optsNoLock),
|
2018-10-11 00:25:40 +03:00
|
|
|
}
|
|
|
|
return col
|
|
|
|
}
|
|
|
|
|
|
|
|
// Count returns the number of objects in collection.
|
|
|
|
func (c *Collection) Count() int {
|
|
|
|
return c.objects + c.nobjects
|
|
|
|
}
|
|
|
|
|
|
|
|
// StringCount returns the number of string values.
|
|
|
|
func (c *Collection) StringCount() int {
|
|
|
|
return c.nobjects
|
|
|
|
}
|
|
|
|
|
|
|
|
// PointCount returns the number of points (lat/lon coordinates) in collection.
|
|
|
|
func (c *Collection) PointCount() int {
|
|
|
|
return c.points
|
|
|
|
}
|
|
|
|
|
|
|
|
// TotalWeight calculates the in-memory cost of the collection in bytes.
|
|
|
|
func (c *Collection) TotalWeight() int {
|
|
|
|
return c.weight
|
|
|
|
}
|
|
|
|
|
|
|
|
// Bounds returns the bounds of all the items in the collection.
|
|
|
|
func (c *Collection) Bounds() (minX, minY, maxX, maxY float64) {
|
2022-09-20 03:51:14 +03:00
|
|
|
_, _, left := c.spatial.LeftMost()
|
|
|
|
_, _, bottom := c.spatial.BottomMost()
|
|
|
|
_, _, right := c.spatial.RightMost()
|
|
|
|
_, _, top := c.spatial.TopMost()
|
|
|
|
if left == nil {
|
|
|
|
return
|
2018-10-11 00:25:40 +03:00
|
|
|
}
|
2022-09-20 03:51:14 +03:00
|
|
|
return left.Rect().Min.X, bottom.Rect().Min.Y,
|
|
|
|
right.Rect().Max.X, top.Rect().Max.Y
|
2018-10-11 00:25:40 +03:00
|
|
|
}
|
|
|
|
|
2022-09-21 00:20:53 +03:00
|
|
|
func (c *Collection) indexDelete(item *object.Object) {
|
|
|
|
if !item.Geo().Empty() {
|
2022-09-20 03:51:14 +03:00
|
|
|
c.spatial.Delete(rtreeItem(item))
|
2018-10-11 00:25:40 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-09-21 00:20:53 +03:00
|
|
|
func (c *Collection) indexInsert(item *object.Object) {
|
|
|
|
if !item.Geo().Empty() {
|
2022-09-20 03:51:14 +03:00
|
|
|
c.spatial.Insert(rtreeItem(item))
|
2018-10-11 00:25:40 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-09-20 03:51:14 +03:00
|
|
|
const dRNDTOWARDS = (1.0 - 1.0/8388608.0) /* Round towards zero */
|
|
|
|
const dRNDAWAY = (1.0 + 1.0/8388608.0) /* Round away from zero */
|
|
|
|
|
|
|
|
func rtreeValueDown(d float64) float32 {
|
|
|
|
f := float32(d)
|
|
|
|
if float64(f) > d {
|
|
|
|
if d < 0 {
|
|
|
|
f = float32(d * dRNDAWAY)
|
|
|
|
} else {
|
|
|
|
f = float32(d * dRNDTOWARDS)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return f
|
|
|
|
}
|
|
|
|
func rtreeValueUp(d float64) float32 {
|
|
|
|
f := float32(d)
|
|
|
|
if float64(f) < d {
|
|
|
|
if d < 0 {
|
|
|
|
f = float32(d * dRNDTOWARDS)
|
|
|
|
} else {
|
|
|
|
f = float32(d * dRNDAWAY)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return f
|
|
|
|
}
|
|
|
|
|
2022-09-21 00:20:53 +03:00
|
|
|
func rtreeItem(item *object.Object) (min, max [2]float32, data *object.Object) {
|
2022-09-20 03:51:14 +03:00
|
|
|
min, max = rtreeRect(item.Rect())
|
|
|
|
return min, max, item
|
|
|
|
}
|
|
|
|
|
|
|
|
func rtreeRect(rect geometry.Rect) (min, max [2]float32) {
|
|
|
|
return [2]float32{
|
|
|
|
rtreeValueDown(rect.Min.X),
|
|
|
|
rtreeValueDown(rect.Min.Y),
|
|
|
|
}, [2]float32{
|
|
|
|
rtreeValueUp(rect.Max.X),
|
|
|
|
rtreeValueUp(rect.Max.Y),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-10-11 00:25:40 +03:00
|
|
|
// Set adds or replaces an object in the collection and returns the fields
|
2022-09-20 03:47:38 +03:00
|
|
|
// array.
|
2022-09-21 00:20:53 +03:00
|
|
|
func (c *Collection) Set(obj *object.Object) (prev *object.Object) {
|
2022-09-23 00:22:45 +03:00
|
|
|
prev, _ = c.objs.Set(obj.ID(), obj)
|
|
|
|
c.setFill(prev, obj)
|
|
|
|
return prev
|
|
|
|
}
|
|
|
|
|
|
|
|
// SetMerged works just like Set but it will merge the new object fields and
|
|
|
|
// the previous object fields and create a newer object that is then set into
|
|
|
|
// the collection. The newer object is returned.
|
|
|
|
func (c *Collection) SetMerged(obj *object.Object,
|
|
|
|
) (prev, newObj *object.Object) {
|
|
|
|
prev, _ = c.objs.Set(obj.ID(), obj)
|
|
|
|
if prev != nil {
|
|
|
|
// Check if at least one field exists from the previous object and
|
|
|
|
// merge the two field lists, then re-set the new object. Otherwise,
|
|
|
|
// we stick with the current object.
|
|
|
|
// TODO: check if the old object has fields that new object does not
|
|
|
|
// and only reset those.
|
|
|
|
ofields := prev.Fields()
|
|
|
|
var reset bool
|
|
|
|
ofields.Scan(func(f field.Field) bool {
|
|
|
|
reset = true
|
|
|
|
return false
|
|
|
|
})
|
|
|
|
if reset {
|
|
|
|
obj.Fields().Scan(func(f field.Field) bool {
|
|
|
|
ofields = ofields.Set(f)
|
|
|
|
return true
|
|
|
|
})
|
|
|
|
obj = object.New(obj.ID(), obj.Geo(), obj.Expires(), ofields)
|
|
|
|
c.objs.Set(obj.ID(), obj)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
c.setFill(prev, obj)
|
|
|
|
return prev, obj
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *Collection) setFill(prev, obj *object.Object) {
|
2022-09-21 00:20:53 +03:00
|
|
|
if prev != nil {
|
|
|
|
if prev.IsSpatial() {
|
|
|
|
c.indexDelete(prev)
|
2018-10-11 00:25:40 +03:00
|
|
|
c.objects--
|
|
|
|
} else {
|
2022-09-21 00:20:53 +03:00
|
|
|
c.values.Delete(prev)
|
2018-10-11 00:25:40 +03:00
|
|
|
c.nobjects--
|
|
|
|
}
|
2022-09-21 00:20:53 +03:00
|
|
|
if prev.Expires() != 0 {
|
|
|
|
c.expires.Delete(prev)
|
Update expiration logic
This commit changes the logic for managing the expiration of
objects in the database.
Before: There was a server-wide hashmap that stored the
collection key, id, and expiration timestamp for all objects
that had a TTL. The hashmap was occasionally probed at 20
random positions, looking for objects that have expired. Those
expired objects were immediately deleted, and if there was 5
or more objects deleted, then the probe happened again, with
no delay. If the number of objects was less than 5 then the
there was a 1/10th of a second delay before the next probe.
Now: Rather than a server-wide hashmap, each collection has
its own ordered priority queue that stores objects with TTLs.
Rather than probing, there is a background routine that
executes every 1/10th of a second, which pops the expired
objects from the collection queues, and deletes them.
The collection/queue method is a more stable approach than
the hashmap/probing method. With probing, we can run into
major cache misses for some cases where there is wide
TTL duration, such as in the hours or days. This may cause
the system to occasionally fall behind, leaving should-be
expired objects in memory. Using a queue, there is no
cache misses, all objects that should be expired will be
right away, regardless of the TTL durations.
Fixes #616
2021-07-12 23:37:50 +03:00
|
|
|
}
|
2022-09-21 00:20:53 +03:00
|
|
|
c.points -= prev.Geo().NumPoints()
|
|
|
|
c.weight -= prev.Weight()
|
2018-10-11 00:25:40 +03:00
|
|
|
}
|
2022-09-21 00:20:53 +03:00
|
|
|
if obj.IsSpatial() {
|
|
|
|
c.indexInsert(obj)
|
2018-10-11 00:25:40 +03:00
|
|
|
c.objects++
|
|
|
|
} else {
|
2022-09-21 00:20:53 +03:00
|
|
|
c.values.Set(obj)
|
2018-10-11 00:25:40 +03:00
|
|
|
c.nobjects++
|
|
|
|
}
|
2022-09-21 00:20:53 +03:00
|
|
|
if obj.Expires() != 0 {
|
|
|
|
c.expires.Set(obj)
|
2022-09-20 03:47:38 +03:00
|
|
|
}
|
2022-09-21 00:20:53 +03:00
|
|
|
c.points += obj.Geo().NumPoints()
|
|
|
|
c.weight += obj.Weight()
|
2018-10-11 00:25:40 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// Delete removes an object and returns it.
|
|
|
|
// If the object does not exist then the 'ok' return value will be false.
|
2022-09-21 00:20:53 +03:00
|
|
|
func (c *Collection) Delete(id string) (prev *object.Object) {
|
2022-09-23 00:22:45 +03:00
|
|
|
prev, _ = c.objs.Delete(id)
|
2022-09-21 00:20:53 +03:00
|
|
|
if prev == nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
if prev.IsSpatial() {
|
|
|
|
if !prev.Geo().Empty() {
|
|
|
|
c.indexDelete(prev)
|
2018-10-11 00:25:40 +03:00
|
|
|
}
|
|
|
|
c.objects--
|
|
|
|
} else {
|
2022-09-21 00:20:53 +03:00
|
|
|
c.values.Delete(prev)
|
2018-10-11 00:25:40 +03:00
|
|
|
c.nobjects--
|
|
|
|
}
|
2022-09-21 00:20:53 +03:00
|
|
|
if prev.Expires() != 0 {
|
|
|
|
c.expires.Delete(prev)
|
Update expiration logic
This commit changes the logic for managing the expiration of
objects in the database.
Before: There was a server-wide hashmap that stored the
collection key, id, and expiration timestamp for all objects
that had a TTL. The hashmap was occasionally probed at 20
random positions, looking for objects that have expired. Those
expired objects were immediately deleted, and if there was 5
or more objects deleted, then the probe happened again, with
no delay. If the number of objects was less than 5 then the
there was a 1/10th of a second delay before the next probe.
Now: Rather than a server-wide hashmap, each collection has
its own ordered priority queue that stores objects with TTLs.
Rather than probing, there is a background routine that
executes every 1/10th of a second, which pops the expired
objects from the collection queues, and deletes them.
The collection/queue method is a more stable approach than
the hashmap/probing method. With probing, we can run into
major cache misses for some cases where there is wide
TTL duration, such as in the hours or days. This may cause
the system to occasionally fall behind, leaving should-be
expired objects in memory. Using a queue, there is no
cache misses, all objects that should be expired will be
right away, regardless of the TTL durations.
Fixes #616
2021-07-12 23:37:50 +03:00
|
|
|
}
|
2022-09-21 00:20:53 +03:00
|
|
|
c.points -= prev.Geo().NumPoints()
|
|
|
|
c.weight -= prev.Weight()
|
|
|
|
return prev
|
2018-10-11 00:25:40 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// Get returns an object.
|
|
|
|
// If the object does not exist then the 'ok' return value will be false.
|
2022-09-21 00:20:53 +03:00
|
|
|
func (c *Collection) Get(id string) *object.Object {
|
2022-09-23 00:22:45 +03:00
|
|
|
obj, _ := c.objs.Get(id)
|
2022-09-21 00:20:53 +03:00
|
|
|
return obj
|
2018-10-11 00:25:40 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// Scan iterates though the collection ids.
|
2019-04-24 15:09:41 +03:00
|
|
|
func (c *Collection) Scan(
|
|
|
|
desc bool,
|
|
|
|
cursor Cursor,
|
|
|
|
deadline *deadline.Deadline,
|
2022-09-21 00:20:53 +03:00
|
|
|
iterator func(obj *object.Object) bool,
|
2018-10-11 00:25:40 +03:00
|
|
|
) bool {
|
|
|
|
var keepon = true
|
2018-11-01 08:00:09 +03:00
|
|
|
var count uint64
|
2018-11-02 16:09:56 +03:00
|
|
|
var offset uint64
|
|
|
|
if cursor != nil {
|
|
|
|
offset = cursor.Offset()
|
|
|
|
cursor.Step(offset)
|
|
|
|
}
|
2022-09-23 00:22:45 +03:00
|
|
|
iter := func(_ string, obj *object.Object) bool {
|
2018-11-01 08:00:09 +03:00
|
|
|
count++
|
|
|
|
if count <= offset {
|
|
|
|
return true
|
|
|
|
}
|
2019-04-24 15:09:41 +03:00
|
|
|
nextStep(count, cursor, deadline)
|
2022-09-21 00:20:53 +03:00
|
|
|
keepon = iterator(obj)
|
2018-10-11 00:25:40 +03:00
|
|
|
return keepon
|
|
|
|
}
|
|
|
|
if desc {
|
2022-09-23 00:22:45 +03:00
|
|
|
c.objs.Reverse(iter)
|
2018-10-11 00:25:40 +03:00
|
|
|
} else {
|
2022-09-23 00:22:45 +03:00
|
|
|
c.objs.Scan(iter)
|
2018-10-11 00:25:40 +03:00
|
|
|
}
|
|
|
|
return keepon
|
|
|
|
}
|
|
|
|
|
|
|
|
// ScanRange iterates though the collection starting with specified id.
|
2019-04-24 15:09:41 +03:00
|
|
|
func (c *Collection) ScanRange(
|
|
|
|
start, end string,
|
|
|
|
desc bool,
|
|
|
|
cursor Cursor,
|
|
|
|
deadline *deadline.Deadline,
|
2022-09-21 00:20:53 +03:00
|
|
|
iterator func(o *object.Object) bool,
|
2018-10-11 00:25:40 +03:00
|
|
|
) bool {
|
|
|
|
var keepon = true
|
2018-11-01 08:00:09 +03:00
|
|
|
var count uint64
|
2018-11-02 16:09:56 +03:00
|
|
|
var offset uint64
|
|
|
|
if cursor != nil {
|
|
|
|
offset = cursor.Offset()
|
|
|
|
cursor.Step(offset)
|
|
|
|
}
|
2022-09-23 00:22:45 +03:00
|
|
|
iter := func(_ string, o *object.Object) bool {
|
2018-11-01 08:00:09 +03:00
|
|
|
count++
|
|
|
|
if count <= offset {
|
|
|
|
return true
|
|
|
|
}
|
2019-04-24 15:09:41 +03:00
|
|
|
nextStep(count, cursor, deadline)
|
2018-10-11 00:25:40 +03:00
|
|
|
if !desc {
|
2022-09-21 00:20:53 +03:00
|
|
|
if o.ID() >= end {
|
2018-10-11 00:25:40 +03:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
} else {
|
2022-09-21 00:20:53 +03:00
|
|
|
if o.ID() <= end {
|
2018-10-11 00:25:40 +03:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
}
|
2022-09-21 00:20:53 +03:00
|
|
|
keepon = iterator(o)
|
2018-10-11 00:25:40 +03:00
|
|
|
return keepon
|
|
|
|
}
|
|
|
|
|
|
|
|
if desc {
|
2022-09-23 00:22:45 +03:00
|
|
|
c.objs.Descend(start, iter)
|
2018-10-11 00:25:40 +03:00
|
|
|
} else {
|
2022-09-23 00:22:45 +03:00
|
|
|
c.objs.Ascend(start, iter)
|
2018-10-11 00:25:40 +03:00
|
|
|
}
|
|
|
|
return keepon
|
|
|
|
}
|
|
|
|
|
|
|
|
// SearchValues iterates though the collection values.
|
2019-04-24 15:09:41 +03:00
|
|
|
func (c *Collection) SearchValues(
|
|
|
|
desc bool,
|
|
|
|
cursor Cursor,
|
|
|
|
deadline *deadline.Deadline,
|
2022-09-21 00:20:53 +03:00
|
|
|
iterator func(o *object.Object) bool,
|
2018-10-11 00:25:40 +03:00
|
|
|
) bool {
|
|
|
|
var keepon = true
|
2018-11-01 08:00:09 +03:00
|
|
|
var count uint64
|
2018-11-02 16:09:56 +03:00
|
|
|
var offset uint64
|
|
|
|
if cursor != nil {
|
|
|
|
offset = cursor.Offset()
|
|
|
|
cursor.Step(offset)
|
|
|
|
}
|
2022-09-21 00:20:53 +03:00
|
|
|
iter := func(o *object.Object) bool {
|
2018-11-01 08:00:09 +03:00
|
|
|
count++
|
|
|
|
if count <= offset {
|
|
|
|
return true
|
|
|
|
}
|
2019-04-24 15:09:41 +03:00
|
|
|
nextStep(count, cursor, deadline)
|
2022-09-21 00:20:53 +03:00
|
|
|
keepon = iterator(o)
|
2018-10-11 00:25:40 +03:00
|
|
|
return keepon
|
|
|
|
}
|
|
|
|
if desc {
|
2022-09-12 19:12:51 +03:00
|
|
|
c.values.Reverse(iter)
|
2018-10-11 00:25:40 +03:00
|
|
|
} else {
|
2022-09-12 19:12:51 +03:00
|
|
|
c.values.Scan(iter)
|
2018-10-11 00:25:40 +03:00
|
|
|
}
|
|
|
|
return keepon
|
|
|
|
}
|
|
|
|
|
|
|
|
// SearchValuesRange iterates though the collection values.
|
|
|
|
func (c *Collection) SearchValuesRange(start, end string, desc bool,
|
2018-11-02 16:09:56 +03:00
|
|
|
cursor Cursor,
|
2019-04-24 15:09:41 +03:00
|
|
|
deadline *deadline.Deadline,
|
2022-09-21 00:20:53 +03:00
|
|
|
iterator func(o *object.Object) bool,
|
2018-10-11 00:25:40 +03:00
|
|
|
) bool {
|
|
|
|
var keepon = true
|
2018-11-01 08:00:09 +03:00
|
|
|
var count uint64
|
2018-11-02 16:09:56 +03:00
|
|
|
var offset uint64
|
|
|
|
if cursor != nil {
|
|
|
|
offset = cursor.Offset()
|
|
|
|
cursor.Step(offset)
|
|
|
|
}
|
2022-09-21 00:20:53 +03:00
|
|
|
iter := func(o *object.Object) bool {
|
2018-11-01 08:00:09 +03:00
|
|
|
count++
|
|
|
|
if count <= offset {
|
|
|
|
return true
|
|
|
|
}
|
2019-04-24 15:09:41 +03:00
|
|
|
nextStep(count, cursor, deadline)
|
2022-09-21 00:20:53 +03:00
|
|
|
keepon = iterator(o)
|
2018-10-11 00:25:40 +03:00
|
|
|
return keepon
|
|
|
|
}
|
2022-09-21 00:20:53 +03:00
|
|
|
|
2022-09-21 20:03:53 +03:00
|
|
|
pstart := object.New("", String(start), 0, field.List{})
|
|
|
|
pend := object.New("", String(end), 0, field.List{})
|
2018-10-11 00:25:40 +03:00
|
|
|
if desc {
|
2020-10-28 01:29:50 +03:00
|
|
|
// descend range
|
2022-09-21 00:20:53 +03:00
|
|
|
c.values.Descend(pstart, func(item *object.Object) bool {
|
2020-10-28 01:29:50 +03:00
|
|
|
return bGT(c.values, item, pend) && iter(item)
|
|
|
|
})
|
2018-10-11 00:25:40 +03:00
|
|
|
} else {
|
2022-09-21 00:20:53 +03:00
|
|
|
c.values.Ascend(pstart, func(item *object.Object) bool {
|
2020-10-28 01:29:50 +03:00
|
|
|
return bLT(c.values, item, pend) && iter(item)
|
|
|
|
})
|
2018-10-11 00:25:40 +03:00
|
|
|
}
|
|
|
|
return keepon
|
|
|
|
}
|
|
|
|
|
2022-09-21 00:20:53 +03:00
|
|
|
func bLT(tr *btree.BTreeG[*object.Object], a, b *object.Object) bool { return tr.Less(a, b) }
|
|
|
|
func bGT(tr *btree.BTreeG[*object.Object], a, b *object.Object) bool { return tr.Less(b, a) }
|
2020-10-28 01:29:50 +03:00
|
|
|
|
2018-10-11 00:25:40 +03:00
|
|
|
// ScanGreaterOrEqual iterates though the collection starting with specified id.
|
|
|
|
func (c *Collection) ScanGreaterOrEqual(id string, desc bool,
|
2018-11-02 16:09:56 +03:00
|
|
|
cursor Cursor,
|
2019-04-24 15:09:41 +03:00
|
|
|
deadline *deadline.Deadline,
|
2022-09-21 00:20:53 +03:00
|
|
|
iterator func(o *object.Object) bool,
|
2018-10-11 00:25:40 +03:00
|
|
|
) bool {
|
|
|
|
var keepon = true
|
2018-11-02 16:09:56 +03:00
|
|
|
var count uint64
|
|
|
|
var offset uint64
|
|
|
|
if cursor != nil {
|
|
|
|
offset = cursor.Offset()
|
|
|
|
cursor.Step(offset)
|
|
|
|
}
|
2022-09-23 00:22:45 +03:00
|
|
|
iter := func(_ string, o *object.Object) bool {
|
2018-11-02 16:09:56 +03:00
|
|
|
count++
|
|
|
|
if count <= offset {
|
|
|
|
return true
|
|
|
|
}
|
2019-04-24 15:09:41 +03:00
|
|
|
nextStep(count, cursor, deadline)
|
2022-09-21 00:20:53 +03:00
|
|
|
keepon = iterator(o)
|
2018-10-11 00:25:40 +03:00
|
|
|
return keepon
|
|
|
|
}
|
|
|
|
if desc {
|
2022-09-23 00:22:45 +03:00
|
|
|
c.objs.Descend(id, iter)
|
2018-10-11 00:25:40 +03:00
|
|
|
} else {
|
2022-09-23 00:22:45 +03:00
|
|
|
c.objs.Ascend(id, iter)
|
2018-10-11 00:25:40 +03:00
|
|
|
}
|
|
|
|
return keepon
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *Collection) geoSearch(
|
|
|
|
rect geometry.Rect,
|
2022-09-21 00:20:53 +03:00
|
|
|
iter func(o *object.Object) bool,
|
2018-10-11 00:25:40 +03:00
|
|
|
) bool {
|
|
|
|
alive := true
|
2022-09-20 03:51:14 +03:00
|
|
|
min, max := rtreeRect(rect)
|
2022-09-12 19:12:51 +03:00
|
|
|
c.spatial.Search(
|
2022-09-20 03:51:14 +03:00
|
|
|
min, max,
|
2022-09-21 00:20:53 +03:00
|
|
|
func(_, _ [2]float32, o *object.Object) bool {
|
|
|
|
alive = iter(o)
|
2018-10-11 00:25:40 +03:00
|
|
|
return alive
|
|
|
|
},
|
|
|
|
)
|
|
|
|
return alive
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *Collection) geoSparse(
|
|
|
|
obj geojson.Object, sparse uint8,
|
2022-09-21 00:20:53 +03:00
|
|
|
iter func(o *object.Object) (match, ok bool),
|
2018-10-11 00:25:40 +03:00
|
|
|
) bool {
|
|
|
|
matches := make(map[string]bool)
|
|
|
|
alive := true
|
2022-09-21 00:20:53 +03:00
|
|
|
c.geoSparseInner(obj.Rect(), sparse, func(o *object.Object) (match, ok bool) {
|
|
|
|
ok = true
|
|
|
|
if !matches[o.ID()] {
|
|
|
|
match, ok = iter(o)
|
|
|
|
if match {
|
|
|
|
matches[o.ID()] = true
|
2018-10-11 00:25:40 +03:00
|
|
|
}
|
2022-09-21 00:20:53 +03:00
|
|
|
}
|
|
|
|
return match, ok
|
|
|
|
})
|
2018-10-11 00:25:40 +03:00
|
|
|
return alive
|
|
|
|
}
|
|
|
|
func (c *Collection) geoSparseInner(
|
|
|
|
rect geometry.Rect, sparse uint8,
|
2022-09-21 00:20:53 +03:00
|
|
|
iter func(o *object.Object) (match, ok bool),
|
2018-10-11 00:25:40 +03:00
|
|
|
) bool {
|
|
|
|
if sparse > 0 {
|
|
|
|
w := rect.Max.X - rect.Min.X
|
|
|
|
h := rect.Max.Y - rect.Min.Y
|
|
|
|
quads := [4]geometry.Rect{
|
2021-02-04 00:30:55 +03:00
|
|
|
{
|
2018-10-11 00:25:40 +03:00
|
|
|
Min: geometry.Point{X: rect.Min.X, Y: rect.Min.Y + h/2},
|
|
|
|
Max: geometry.Point{X: rect.Min.X + w/2, Y: rect.Max.Y},
|
|
|
|
},
|
2021-02-04 00:30:55 +03:00
|
|
|
{
|
2018-10-11 00:25:40 +03:00
|
|
|
Min: geometry.Point{X: rect.Min.X + w/2, Y: rect.Min.Y + h/2},
|
|
|
|
Max: geometry.Point{X: rect.Max.X, Y: rect.Max.Y},
|
|
|
|
},
|
2021-02-04 00:30:55 +03:00
|
|
|
{
|
2018-10-11 00:25:40 +03:00
|
|
|
Min: geometry.Point{X: rect.Min.X, Y: rect.Min.Y},
|
|
|
|
Max: geometry.Point{X: rect.Min.X + w/2, Y: rect.Min.Y + h/2},
|
|
|
|
},
|
2021-02-04 00:30:55 +03:00
|
|
|
{
|
2018-10-11 00:25:40 +03:00
|
|
|
Min: geometry.Point{X: rect.Min.X + w/2, Y: rect.Min.Y},
|
|
|
|
Max: geometry.Point{X: rect.Max.X, Y: rect.Min.Y + h/2},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
for _, quad := range quads {
|
|
|
|
if !c.geoSparseInner(quad, sparse-1, iter) {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
alive := true
|
2022-09-21 00:20:53 +03:00
|
|
|
c.geoSearch(rect, func(o *object.Object) bool {
|
|
|
|
match, ok := iter(o)
|
|
|
|
if !ok {
|
|
|
|
alive = false
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
return !match
|
|
|
|
})
|
2018-10-11 00:25:40 +03:00
|
|
|
return alive
|
|
|
|
}
|
|
|
|
|
|
|
|
// Within returns all object that are fully contained within an object or
|
|
|
|
// bounding box. Set obj to nil in order to use the bounding box.
|
|
|
|
func (c *Collection) Within(
|
2018-11-01 01:01:55 +03:00
|
|
|
obj geojson.Object,
|
|
|
|
sparse uint8,
|
2018-11-02 16:09:56 +03:00
|
|
|
cursor Cursor,
|
2019-04-24 15:09:41 +03:00
|
|
|
deadline *deadline.Deadline,
|
2022-09-21 00:20:53 +03:00
|
|
|
iter func(o *object.Object) bool,
|
2018-10-11 00:25:40 +03:00
|
|
|
) bool {
|
2018-11-01 01:01:55 +03:00
|
|
|
var count uint64
|
2018-11-02 16:09:56 +03:00
|
|
|
var offset uint64
|
|
|
|
if cursor != nil {
|
|
|
|
offset = cursor.Offset()
|
|
|
|
cursor.Step(offset)
|
|
|
|
}
|
2018-10-11 00:25:40 +03:00
|
|
|
if sparse > 0 {
|
2022-09-21 00:20:53 +03:00
|
|
|
return c.geoSparse(obj, sparse, func(o *object.Object) (match, ok bool) {
|
2018-11-01 01:01:55 +03:00
|
|
|
count++
|
|
|
|
if count <= offset {
|
2022-09-21 00:20:53 +03:00
|
|
|
return false, true
|
2018-11-01 01:01:55 +03:00
|
|
|
}
|
2019-04-24 15:09:41 +03:00
|
|
|
nextStep(count, cursor, deadline)
|
2022-09-21 00:20:53 +03:00
|
|
|
if match = o.Geo().Within(obj); match {
|
|
|
|
ok = iter(o)
|
2018-10-11 00:25:40 +03:00
|
|
|
}
|
2022-09-21 00:20:53 +03:00
|
|
|
return match, ok
|
|
|
|
})
|
|
|
|
}
|
|
|
|
return c.geoSearch(obj.Rect(), func(o *object.Object) bool {
|
|
|
|
count++
|
|
|
|
if count <= offset {
|
2018-10-11 00:25:40 +03:00
|
|
|
return true
|
2022-09-21 00:20:53 +03:00
|
|
|
}
|
|
|
|
nextStep(count, cursor, deadline)
|
|
|
|
if o.Geo().Within(obj) {
|
|
|
|
return iter(o)
|
|
|
|
}
|
|
|
|
return true
|
|
|
|
})
|
2018-10-11 00:25:40 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// Intersects returns all object that are intersect an object or bounding box.
|
|
|
|
// Set obj to nil in order to use the bounding box.
|
|
|
|
func (c *Collection) Intersects(
|
2022-09-21 00:20:53 +03:00
|
|
|
gobj geojson.Object,
|
2018-11-01 01:01:55 +03:00
|
|
|
sparse uint8,
|
2018-11-02 16:09:56 +03:00
|
|
|
cursor Cursor,
|
2019-04-24 15:09:41 +03:00
|
|
|
deadline *deadline.Deadline,
|
2022-09-21 00:20:53 +03:00
|
|
|
iter func(o *object.Object) bool,
|
2018-10-11 00:25:40 +03:00
|
|
|
) bool {
|
2018-11-01 01:01:55 +03:00
|
|
|
var count uint64
|
2018-11-02 16:09:56 +03:00
|
|
|
var offset uint64
|
|
|
|
if cursor != nil {
|
|
|
|
offset = cursor.Offset()
|
|
|
|
cursor.Step(offset)
|
|
|
|
}
|
2018-10-11 00:25:40 +03:00
|
|
|
if sparse > 0 {
|
2022-09-21 00:20:53 +03:00
|
|
|
return c.geoSparse(gobj, sparse, func(o *object.Object) (match, ok bool) {
|
2018-11-01 01:01:55 +03:00
|
|
|
count++
|
|
|
|
if count <= offset {
|
2022-09-21 00:20:53 +03:00
|
|
|
return false, true
|
2018-11-01 01:01:55 +03:00
|
|
|
}
|
2019-04-24 15:09:41 +03:00
|
|
|
nextStep(count, cursor, deadline)
|
2022-09-21 00:20:53 +03:00
|
|
|
if match = o.Geo().Intersects(gobj); match {
|
|
|
|
ok = iter(o)
|
2018-10-11 00:25:40 +03:00
|
|
|
}
|
2022-09-21 00:20:53 +03:00
|
|
|
return match, ok
|
|
|
|
})
|
|
|
|
}
|
|
|
|
return c.geoSearch(gobj.Rect(), func(o *object.Object) bool {
|
|
|
|
count++
|
|
|
|
if count <= offset {
|
2018-10-11 00:25:40 +03:00
|
|
|
return true
|
2022-09-21 00:20:53 +03:00
|
|
|
}
|
|
|
|
nextStep(count, cursor, deadline)
|
|
|
|
if o.Geo().Intersects(gobj) {
|
|
|
|
return iter(o)
|
|
|
|
}
|
|
|
|
return true
|
|
|
|
},
|
2018-10-11 00:25:40 +03:00
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Nearby returns the nearest neighbors
|
|
|
|
func (c *Collection) Nearby(
|
|
|
|
target geojson.Object,
|
2018-11-02 16:09:56 +03:00
|
|
|
cursor Cursor,
|
2019-04-24 15:09:41 +03:00
|
|
|
deadline *deadline.Deadline,
|
2022-09-21 00:20:53 +03:00
|
|
|
iter func(o *object.Object, dist float64) bool,
|
2018-10-11 00:25:40 +03:00
|
|
|
) bool {
|
|
|
|
alive := true
|
|
|
|
center := target.Center()
|
2018-11-01 01:01:55 +03:00
|
|
|
var count uint64
|
2018-11-02 16:09:56 +03:00
|
|
|
var offset uint64
|
|
|
|
if cursor != nil {
|
|
|
|
offset = cursor.Offset()
|
|
|
|
cursor.Step(offset)
|
|
|
|
}
|
2022-09-21 00:20:53 +03:00
|
|
|
distFn := geodeticDistAlgo([2]float64{center.X, center.Y})
|
2022-09-12 19:12:51 +03:00
|
|
|
c.spatial.Nearby(
|
2022-09-21 00:20:53 +03:00
|
|
|
func(min, max [2]float32, data *object.Object, item bool) float64 {
|
|
|
|
return distFn(
|
2022-09-20 03:51:14 +03:00
|
|
|
[2]float64{float64(min[0]), float64(min[1])},
|
|
|
|
[2]float64{float64(max[0]), float64(max[1])},
|
|
|
|
data, item,
|
2022-09-21 00:20:53 +03:00
|
|
|
)
|
2022-09-20 03:51:14 +03:00
|
|
|
},
|
2022-09-21 00:20:53 +03:00
|
|
|
func(_, _ [2]float32, o *object.Object, dist float64) bool {
|
2018-11-01 01:01:55 +03:00
|
|
|
count++
|
|
|
|
if count <= offset {
|
|
|
|
return true
|
|
|
|
}
|
2019-04-24 15:09:41 +03:00
|
|
|
nextStep(count, cursor, deadline)
|
2022-09-21 00:20:53 +03:00
|
|
|
alive = iter(o, dist)
|
2018-10-11 00:25:40 +03:00
|
|
|
return alive
|
|
|
|
},
|
|
|
|
)
|
|
|
|
return alive
|
|
|
|
}
|
2019-04-24 15:09:41 +03:00
|
|
|
|
|
|
|
func nextStep(step uint64, cursor Cursor, deadline *deadline.Deadline) {
|
2021-04-28 15:10:18 +03:00
|
|
|
if step&(yieldStep-1) == (yieldStep - 1) {
|
2019-04-24 15:09:41 +03:00
|
|
|
runtime.Gosched()
|
|
|
|
deadline.Check()
|
|
|
|
}
|
|
|
|
if cursor != nil {
|
|
|
|
cursor.Step(1)
|
|
|
|
}
|
|
|
|
}
|
Update expiration logic
This commit changes the logic for managing the expiration of
objects in the database.
Before: There was a server-wide hashmap that stored the
collection key, id, and expiration timestamp for all objects
that had a TTL. The hashmap was occasionally probed at 20
random positions, looking for objects that have expired. Those
expired objects were immediately deleted, and if there was 5
or more objects deleted, then the probe happened again, with
no delay. If the number of objects was less than 5 then the
there was a 1/10th of a second delay before the next probe.
Now: Rather than a server-wide hashmap, each collection has
its own ordered priority queue that stores objects with TTLs.
Rather than probing, there is a background routine that
executes every 1/10th of a second, which pops the expired
objects from the collection queues, and deletes them.
The collection/queue method is a more stable approach than
the hashmap/probing method. With probing, we can run into
major cache misses for some cases where there is wide
TTL duration, such as in the hours or days. This may cause
the system to occasionally fall behind, leaving should-be
expired objects in memory. Using a queue, there is no
cache misses, all objects that should be expired will be
right away, regardless of the TTL durations.
Fixes #616
2021-07-12 23:37:50 +03:00
|
|
|
|
2022-09-13 18:16:41 +03:00
|
|
|
// ScanExpires returns a list of all objects that have expired.
|
2022-09-21 00:20:53 +03:00
|
|
|
func (c *Collection) ScanExpires(iter func(o *object.Object) bool) {
|
|
|
|
c.expires.Scan(iter)
|
Update expiration logic
This commit changes the logic for managing the expiration of
objects in the database.
Before: There was a server-wide hashmap that stored the
collection key, id, and expiration timestamp for all objects
that had a TTL. The hashmap was occasionally probed at 20
random positions, looking for objects that have expired. Those
expired objects were immediately deleted, and if there was 5
or more objects deleted, then the probe happened again, with
no delay. If the number of objects was less than 5 then the
there was a 1/10th of a second delay before the next probe.
Now: Rather than a server-wide hashmap, each collection has
its own ordered priority queue that stores objects with TTLs.
Rather than probing, there is a background routine that
executes every 1/10th of a second, which pops the expired
objects from the collection queues, and deletes them.
The collection/queue method is a more stable approach than
the hashmap/probing method. With probing, we can run into
major cache misses for some cases where there is wide
TTL duration, such as in the hours or days. This may cause
the system to occasionally fall behind, leaving should-be
expired objects in memory. Using a queue, there is no
cache misses, all objects that should be expired will be
right away, regardless of the TTL durations.
Fixes #616
2021-07-12 23:37:50 +03:00
|
|
|
}
|