2018-10-29 01:49:45 +03:00
|
|
|
package server
|
2016-03-30 19:32:02 +03:00
|
|
|
|
|
|
|
import (
|
2016-07-15 23:01:15 +03:00
|
|
|
"math"
|
2016-03-30 19:32:02 +03:00
|
|
|
"os"
|
2016-12-06 02:24:26 +03:00
|
|
|
"strconv"
|
2016-03-30 19:32:02 +03:00
|
|
|
"strings"
|
|
|
|
"time"
|
|
|
|
|
2021-09-13 20:02:36 +03:00
|
|
|
"github.com/tidwall/btree"
|
2018-10-29 01:49:45 +03:00
|
|
|
"github.com/tidwall/geojson"
|
2018-10-11 00:25:40 +03:00
|
|
|
"github.com/tidwall/tile38/core"
|
|
|
|
"github.com/tidwall/tile38/internal/collection"
|
|
|
|
"github.com/tidwall/tile38/internal/log"
|
2016-03-30 19:32:02 +03:00
|
|
|
)
|
|
|
|
|
2016-12-06 02:24:26 +03:00
|
|
|
const maxkeys = 8
|
|
|
|
const maxids = 32
|
|
|
|
const maxchunk = 4 * 1024 * 1024
|
2016-03-30 19:32:02 +03:00
|
|
|
|
2021-12-09 19:24:26 +03:00
|
|
|
func (s *Server) aofshrink() {
|
|
|
|
if s.aof == nil {
|
2018-04-11 20:53:36 +03:00
|
|
|
return
|
|
|
|
}
|
2016-03-30 19:32:02 +03:00
|
|
|
start := time.Now()
|
2021-12-09 19:24:26 +03:00
|
|
|
s.mu.Lock()
|
|
|
|
if s.shrinking {
|
|
|
|
s.mu.Unlock()
|
2016-03-30 19:32:02 +03:00
|
|
|
return
|
|
|
|
}
|
2021-12-09 19:24:26 +03:00
|
|
|
s.shrinking = true
|
|
|
|
s.shrinklog = nil
|
|
|
|
s.mu.Unlock()
|
2016-12-06 02:24:26 +03:00
|
|
|
|
2016-03-30 19:32:02 +03:00
|
|
|
defer func() {
|
2021-12-09 19:24:26 +03:00
|
|
|
s.mu.Lock()
|
|
|
|
s.shrinking = false
|
|
|
|
s.shrinklog = nil
|
|
|
|
s.mu.Unlock()
|
2021-03-31 18:13:44 +03:00
|
|
|
log.Infof("aof shrink ended %v", time.Since(start))
|
2016-12-06 02:24:26 +03:00
|
|
|
}()
|
2016-03-30 19:32:02 +03:00
|
|
|
|
2016-12-06 02:24:26 +03:00
|
|
|
err := func() error {
|
2018-04-11 20:53:36 +03:00
|
|
|
f, err := os.Create(core.AppendFileName + "-shrink")
|
2016-03-30 19:32:02 +03:00
|
|
|
if err != nil {
|
2016-12-06 02:24:26 +03:00
|
|
|
return err
|
2016-03-30 19:32:02 +03:00
|
|
|
}
|
2016-12-06 02:24:26 +03:00
|
|
|
defer f.Close()
|
|
|
|
var aofbuf []byte
|
|
|
|
var values []string
|
|
|
|
var keys []string
|
|
|
|
var nextkey string
|
|
|
|
var keysdone bool
|
2016-03-30 19:32:02 +03:00
|
|
|
for {
|
2016-12-06 02:24:26 +03:00
|
|
|
if len(keys) == 0 {
|
|
|
|
// load more keys
|
|
|
|
if keysdone {
|
2016-03-30 19:32:02 +03:00
|
|
|
break
|
|
|
|
}
|
2016-12-06 02:24:26 +03:00
|
|
|
keysdone = true
|
|
|
|
func() {
|
2021-12-09 19:24:26 +03:00
|
|
|
s.mu.Lock()
|
|
|
|
defer s.mu.Unlock()
|
2022-09-13 18:16:41 +03:00
|
|
|
s.cols.Ascend(nextkey,
|
|
|
|
func(key string, col *collection.Collection) bool {
|
|
|
|
if len(keys) == maxkeys {
|
|
|
|
keysdone = false
|
|
|
|
nextkey = key
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
keys = append(keys, key)
|
|
|
|
return true
|
|
|
|
},
|
|
|
|
)
|
2016-12-06 02:24:26 +03:00
|
|
|
}()
|
|
|
|
continue
|
2016-03-30 19:32:02 +03:00
|
|
|
}
|
|
|
|
|
2016-12-06 02:24:26 +03:00
|
|
|
var idsdone bool
|
|
|
|
var nextid string
|
|
|
|
for {
|
|
|
|
if idsdone {
|
|
|
|
keys = keys[1:]
|
|
|
|
break
|
|
|
|
}
|
2016-03-30 19:32:02 +03:00
|
|
|
|
2016-12-06 02:24:26 +03:00
|
|
|
// load more objects
|
|
|
|
func() {
|
|
|
|
idsdone = true
|
2021-12-09 19:24:26 +03:00
|
|
|
s.mu.Lock()
|
|
|
|
defer s.mu.Unlock()
|
2022-09-13 18:16:41 +03:00
|
|
|
col, ok := s.cols.Get(keys[0])
|
|
|
|
if !ok {
|
2016-12-06 02:24:26 +03:00
|
|
|
return
|
|
|
|
}
|
Update expiration logic
This commit changes the logic for managing the expiration of
objects in the database.
Before: There was a server-wide hashmap that stored the
collection key, id, and expiration timestamp for all objects
that had a TTL. The hashmap was occasionally probed at 20
random positions, looking for objects that have expired. Those
expired objects were immediately deleted, and if there was 5
or more objects deleted, then the probe happened again, with
no delay. If the number of objects was less than 5 then the
there was a 1/10th of a second delay before the next probe.
Now: Rather than a server-wide hashmap, each collection has
its own ordered priority queue that stores objects with TTLs.
Rather than probing, there is a background routine that
executes every 1/10th of a second, which pops the expired
objects from the collection queues, and deletes them.
The collection/queue method is a more stable approach than
the hashmap/probing method. With probing, we can run into
major cache misses for some cases where there is wide
TTL duration, such as in the hours or days. This may cause
the system to occasionally fall behind, leaving should-be
expired objects in memory. Using a queue, there is no
cache misses, all objects that should be expired will be
right away, regardless of the TTL durations.
Fixes #616
2021-07-12 23:37:50 +03:00
|
|
|
var fnames = col.FieldArr() // reload an array of field names to match each object
|
|
|
|
var fmap = col.FieldMap() //
|
2019-11-17 17:25:25 +03:00
|
|
|
var now = time.Now().UnixNano() // used for expiration
|
|
|
|
var count = 0 // the object count
|
2019-04-24 15:09:41 +03:00
|
|
|
col.ScanGreaterOrEqual(nextid, false, nil, nil,
|
Update expiration logic
This commit changes the logic for managing the expiration of
objects in the database.
Before: There was a server-wide hashmap that stored the
collection key, id, and expiration timestamp for all objects
that had a TTL. The hashmap was occasionally probed at 20
random positions, looking for objects that have expired. Those
expired objects were immediately deleted, and if there was 5
or more objects deleted, then the probe happened again, with
no delay. If the number of objects was less than 5 then the
there was a 1/10th of a second delay before the next probe.
Now: Rather than a server-wide hashmap, each collection has
its own ordered priority queue that stores objects with TTLs.
Rather than probing, there is a background routine that
executes every 1/10th of a second, which pops the expired
objects from the collection queues, and deletes them.
The collection/queue method is a more stable approach than
the hashmap/probing method. With probing, we can run into
major cache misses for some cases where there is wide
TTL duration, such as in the hours or days. This may cause
the system to occasionally fall behind, leaving should-be
expired objects in memory. Using a queue, there is no
cache misses, all objects that should be expired will be
right away, regardless of the TTL durations.
Fixes #616
2021-07-12 23:37:50 +03:00
|
|
|
func(id string, obj geojson.Object, fields []float64, ex int64) bool {
|
2016-12-06 02:24:26 +03:00
|
|
|
if count == maxids {
|
|
|
|
// we reached the max number of ids for one batch
|
|
|
|
nextid = id
|
|
|
|
idsdone = false
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
// here we fill the values array with a new command
|
|
|
|
values = values[:0]
|
|
|
|
values = append(values, "set")
|
|
|
|
values = append(values, keys[0])
|
|
|
|
values = append(values, id)
|
2020-11-10 00:45:40 +03:00
|
|
|
if len(fields) > 0 {
|
|
|
|
fvs := orderFields(fmap, fnames, fields)
|
|
|
|
for _, fv := range fvs {
|
|
|
|
if fv.value != 0 {
|
|
|
|
values = append(values, "field")
|
|
|
|
values = append(values, fv.field)
|
|
|
|
values = append(values, strconv.FormatFloat(fv.value, 'f', -1, 64))
|
|
|
|
}
|
2016-12-06 02:24:26 +03:00
|
|
|
}
|
|
|
|
}
|
Update expiration logic
This commit changes the logic for managing the expiration of
objects in the database.
Before: There was a server-wide hashmap that stored the
collection key, id, and expiration timestamp for all objects
that had a TTL. The hashmap was occasionally probed at 20
random positions, looking for objects that have expired. Those
expired objects were immediately deleted, and if there was 5
or more objects deleted, then the probe happened again, with
no delay. If the number of objects was less than 5 then the
there was a 1/10th of a second delay before the next probe.
Now: Rather than a server-wide hashmap, each collection has
its own ordered priority queue that stores objects with TTLs.
Rather than probing, there is a background routine that
executes every 1/10th of a second, which pops the expired
objects from the collection queues, and deletes them.
The collection/queue method is a more stable approach than
the hashmap/probing method. With probing, we can run into
major cache misses for some cases where there is wide
TTL duration, such as in the hours or days. This may cause
the system to occasionally fall behind, leaving should-be
expired objects in memory. Using a queue, there is no
cache misses, all objects that should be expired will be
right away, regardless of the TTL durations.
Fixes #616
2021-07-12 23:37:50 +03:00
|
|
|
if ex != 0 {
|
|
|
|
ttl := math.Floor(float64(ex-now)/float64(time.Second)*10) / 10
|
|
|
|
if ttl < 0.1 {
|
|
|
|
// always leave a little bit of ttl.
|
|
|
|
ttl = 0.1
|
2016-12-06 02:24:26 +03:00
|
|
|
}
|
Update expiration logic
This commit changes the logic for managing the expiration of
objects in the database.
Before: There was a server-wide hashmap that stored the
collection key, id, and expiration timestamp for all objects
that had a TTL. The hashmap was occasionally probed at 20
random positions, looking for objects that have expired. Those
expired objects were immediately deleted, and if there was 5
or more objects deleted, then the probe happened again, with
no delay. If the number of objects was less than 5 then the
there was a 1/10th of a second delay before the next probe.
Now: Rather than a server-wide hashmap, each collection has
its own ordered priority queue that stores objects with TTLs.
Rather than probing, there is a background routine that
executes every 1/10th of a second, which pops the expired
objects from the collection queues, and deletes them.
The collection/queue method is a more stable approach than
the hashmap/probing method. With probing, we can run into
major cache misses for some cases where there is wide
TTL duration, such as in the hours or days. This may cause
the system to occasionally fall behind, leaving should-be
expired objects in memory. Using a queue, there is no
cache misses, all objects that should be expired will be
right away, regardless of the TTL durations.
Fixes #616
2021-07-12 23:37:50 +03:00
|
|
|
values = append(values, "ex")
|
|
|
|
values = append(values, strconv.FormatFloat(ttl, 'f', -1, 64))
|
2016-12-06 02:24:26 +03:00
|
|
|
}
|
2018-10-11 00:25:40 +03:00
|
|
|
if objIsSpatial(obj) {
|
|
|
|
values = append(values, "object")
|
|
|
|
values = append(values, string(obj.AppendJSON(nil)))
|
|
|
|
} else {
|
|
|
|
values = append(values, "string")
|
|
|
|
values = append(values, obj.String())
|
2016-07-15 23:01:15 +03:00
|
|
|
}
|
2016-03-30 19:32:02 +03:00
|
|
|
|
2016-12-06 02:24:26 +03:00
|
|
|
// append the values to the aof buffer
|
|
|
|
aofbuf = append(aofbuf, '*')
|
|
|
|
aofbuf = append(aofbuf, strconv.FormatInt(int64(len(values)), 10)...)
|
|
|
|
aofbuf = append(aofbuf, '\r', '\n')
|
|
|
|
for _, value := range values {
|
|
|
|
aofbuf = append(aofbuf, '$')
|
|
|
|
aofbuf = append(aofbuf, strconv.FormatInt(int64(len(value)), 10)...)
|
|
|
|
aofbuf = append(aofbuf, '\r', '\n')
|
|
|
|
aofbuf = append(aofbuf, value...)
|
|
|
|
aofbuf = append(aofbuf, '\r', '\n')
|
|
|
|
}
|
2016-03-30 19:32:02 +03:00
|
|
|
|
2016-12-06 02:24:26 +03:00
|
|
|
// increment the object count
|
|
|
|
count++
|
|
|
|
return true
|
|
|
|
},
|
|
|
|
)
|
|
|
|
|
|
|
|
}()
|
2017-12-12 03:12:15 +03:00
|
|
|
if len(aofbuf) > maxchunk {
|
|
|
|
if _, err := f.Write(aofbuf); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
aofbuf = aofbuf[:0]
|
2016-03-30 19:32:02 +03:00
|
|
|
}
|
2016-12-06 02:24:26 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// load hooks
|
|
|
|
// first load the names of the hooks
|
|
|
|
var hnames []string
|
|
|
|
func() {
|
2021-12-09 19:24:26 +03:00
|
|
|
s.mu.Lock()
|
|
|
|
defer s.mu.Unlock()
|
|
|
|
hnames = make([]string, 0, s.hooks.Len())
|
|
|
|
s.hooks.Walk(func(v []interface{}) {
|
2021-09-13 20:02:36 +03:00
|
|
|
for _, v := range v {
|
|
|
|
hnames = append(hnames, v.(*Hook).Name)
|
|
|
|
}
|
|
|
|
})
|
2016-12-06 02:24:26 +03:00
|
|
|
}()
|
2021-09-13 20:02:36 +03:00
|
|
|
var hookHint btree.PathHint
|
2016-12-06 02:24:26 +03:00
|
|
|
for _, name := range hnames {
|
|
|
|
func() {
|
2021-12-09 19:24:26 +03:00
|
|
|
s.mu.Lock()
|
|
|
|
defer s.mu.Unlock()
|
|
|
|
hook, _ := s.hooks.GetHint(&Hook{Name: name}, &hookHint).(*Hook)
|
2016-12-06 02:24:26 +03:00
|
|
|
if hook == nil {
|
2016-03-30 19:32:02 +03:00
|
|
|
return
|
|
|
|
}
|
2018-08-14 03:05:30 +03:00
|
|
|
hook.cond.L.Lock()
|
|
|
|
defer hook.cond.L.Unlock()
|
2016-12-06 02:24:26 +03:00
|
|
|
|
|
|
|
var values []string
|
2018-08-14 03:05:30 +03:00
|
|
|
if hook.channel {
|
|
|
|
values = append(values, "setchan", name)
|
|
|
|
} else {
|
|
|
|
values = append(values, "sethook", name,
|
|
|
|
strings.Join(hook.Endpoints, ","))
|
|
|
|
}
|
|
|
|
for _, meta := range hook.Metas {
|
|
|
|
values = append(values, "meta", meta.Name, meta.Value)
|
|
|
|
}
|
2018-08-14 06:27:22 +03:00
|
|
|
if !hook.expires.IsZero() {
|
2021-03-31 18:13:44 +03:00
|
|
|
ex := float64(time.Until(hook.expires)) / float64(time.Second)
|
2018-08-14 06:27:22 +03:00
|
|
|
values = append(values, "ex",
|
|
|
|
strconv.FormatFloat(ex, 'f', 1, 64))
|
|
|
|
}
|
2021-03-31 18:13:44 +03:00
|
|
|
values = append(values, hook.Message.Args...)
|
2016-12-06 02:24:26 +03:00
|
|
|
// append the values to the aof buffer
|
|
|
|
aofbuf = append(aofbuf, '*')
|
|
|
|
aofbuf = append(aofbuf, strconv.FormatInt(int64(len(values)), 10)...)
|
|
|
|
aofbuf = append(aofbuf, '\r', '\n')
|
|
|
|
for _, value := range values {
|
|
|
|
aofbuf = append(aofbuf, '$')
|
|
|
|
aofbuf = append(aofbuf, strconv.FormatInt(int64(len(value)), 10)...)
|
|
|
|
aofbuf = append(aofbuf, '\r', '\n')
|
|
|
|
aofbuf = append(aofbuf, value...)
|
|
|
|
aofbuf = append(aofbuf, '\r', '\n')
|
|
|
|
}
|
|
|
|
}()
|
2016-03-30 19:32:02 +03:00
|
|
|
}
|
2016-12-06 02:24:26 +03:00
|
|
|
if len(aofbuf) > 0 {
|
|
|
|
if _, err := f.Write(aofbuf); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
aofbuf = aofbuf[:0]
|
2016-03-30 19:32:02 +03:00
|
|
|
}
|
2016-12-06 02:24:26 +03:00
|
|
|
if err := f.Sync(); err != nil {
|
|
|
|
return err
|
2016-03-30 19:32:02 +03:00
|
|
|
}
|
|
|
|
|
2016-12-06 02:24:26 +03:00
|
|
|
// finally grab any new data that may have been written since
|
|
|
|
// the aofshrink has started and swap out the files.
|
|
|
|
return func() error {
|
2021-12-09 19:24:26 +03:00
|
|
|
s.mu.Lock()
|
|
|
|
defer s.mu.Unlock()
|
2018-10-29 01:49:45 +03:00
|
|
|
|
2021-06-13 17:53:27 +03:00
|
|
|
// kill all followers connections and close their files. This
|
|
|
|
// ensures that there is only one opened AOF at a time which is
|
|
|
|
// what Windows requires in order to perform the Rename function
|
|
|
|
// below.
|
2021-12-09 19:24:26 +03:00
|
|
|
for conn, f := range s.aofconnM {
|
2021-06-13 17:53:27 +03:00
|
|
|
conn.Close()
|
|
|
|
f.Close()
|
|
|
|
}
|
|
|
|
|
|
|
|
// send a broadcast to all sleeping followers
|
2021-12-09 19:24:26 +03:00
|
|
|
s.fcond.Broadcast()
|
2021-06-13 17:53:27 +03:00
|
|
|
|
2018-10-29 01:49:45 +03:00
|
|
|
// flush the aof buffer
|
2021-12-09 19:24:26 +03:00
|
|
|
s.flushAOF(false)
|
2018-10-29 01:49:45 +03:00
|
|
|
|
2016-12-06 02:24:26 +03:00
|
|
|
aofbuf = aofbuf[:0]
|
2021-12-09 19:24:26 +03:00
|
|
|
for _, values := range s.shrinklog {
|
2016-12-06 02:24:26 +03:00
|
|
|
// append the values to the aof buffer
|
|
|
|
aofbuf = append(aofbuf, '*')
|
|
|
|
aofbuf = append(aofbuf, strconv.FormatInt(int64(len(values)), 10)...)
|
|
|
|
aofbuf = append(aofbuf, '\r', '\n')
|
|
|
|
for _, value := range values {
|
|
|
|
aofbuf = append(aofbuf, '$')
|
|
|
|
aofbuf = append(aofbuf, strconv.FormatInt(int64(len(value)), 10)...)
|
|
|
|
aofbuf = append(aofbuf, '\r', '\n')
|
|
|
|
aofbuf = append(aofbuf, value...)
|
|
|
|
aofbuf = append(aofbuf, '\r', '\n')
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if _, err := f.Write(aofbuf); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if err := f.Sync(); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
// we now have a shrunken aof file that is fully in-sync with
|
|
|
|
// the current dataset. let's swap out the on disk files and
|
|
|
|
// point to the new file.
|
|
|
|
|
|
|
|
// anything below this point is unrecoverable. just log and exit process
|
|
|
|
// back up the live aof, just in case of fatal error
|
2021-12-09 19:24:26 +03:00
|
|
|
if err := s.aof.Close(); err != nil {
|
2017-12-13 00:05:22 +03:00
|
|
|
log.Fatalf("shrink live aof close fatal operation: %v", err)
|
2017-08-11 04:34:22 +03:00
|
|
|
}
|
2018-05-25 21:50:31 +03:00
|
|
|
if err := f.Close(); err != nil {
|
|
|
|
log.Fatalf("shrink new aof close fatal operation: %v", err)
|
|
|
|
}
|
2018-04-11 20:53:36 +03:00
|
|
|
if err := os.Rename(core.AppendFileName, core.AppendFileName+"-bak"); err != nil {
|
2017-12-13 00:05:22 +03:00
|
|
|
log.Fatalf("shrink backup fatal operation: %v", err)
|
2016-12-06 02:24:26 +03:00
|
|
|
}
|
2018-04-11 20:53:36 +03:00
|
|
|
if err := os.Rename(core.AppendFileName+"-shrink", core.AppendFileName); err != nil {
|
2017-12-13 00:05:22 +03:00
|
|
|
log.Fatalf("shrink rename fatal operation: %v", err)
|
2016-12-06 02:24:26 +03:00
|
|
|
}
|
2021-12-09 19:24:26 +03:00
|
|
|
s.aof, err = os.OpenFile(core.AppendFileName, os.O_CREATE|os.O_RDWR, 0600)
|
2016-12-06 02:24:26 +03:00
|
|
|
if err != nil {
|
2017-12-13 00:05:22 +03:00
|
|
|
log.Fatalf("shrink openfile fatal operation: %v", err)
|
2016-12-06 02:24:26 +03:00
|
|
|
}
|
|
|
|
var n int64
|
2021-12-09 19:24:26 +03:00
|
|
|
n, err = s.aof.Seek(0, 2)
|
2016-12-06 02:24:26 +03:00
|
|
|
if err != nil {
|
2017-12-13 00:05:22 +03:00
|
|
|
log.Fatalf("shrink seek end fatal operation: %v", err)
|
2016-12-06 02:24:26 +03:00
|
|
|
}
|
2021-12-09 19:24:26 +03:00
|
|
|
s.aofsz = int(n)
|
2016-12-06 02:24:26 +03:00
|
|
|
|
2018-04-11 20:53:36 +03:00
|
|
|
os.Remove(core.AppendFileName + "-bak") // ignore error
|
2016-12-06 02:24:26 +03:00
|
|
|
|
|
|
|
return nil
|
|
|
|
}()
|
|
|
|
}()
|
|
|
|
if err != nil {
|
|
|
|
log.Errorf("aof shrink failed: %v", err)
|
|
|
|
return
|
|
|
|
}
|
2016-03-30 19:32:02 +03:00
|
|
|
}
|