diff --git a/.gitignore b/.gitignore index 8959171..8c0a2ef 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,5 @@ build nohup.out build_config.mk var* -_workspace \ No newline at end of file +_workspace +*.log \ No newline at end of file diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..52e07e4 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,9 @@ +language: go +go: 1.3.3 +before_install: + - go get github.com/tools/godep + - go get code.google.com/p/go.tools/cmd/cover + - go install -race std +script: + - godep go test -cover ./... + - godep go test -race ./... diff --git a/Godeps/Godeps.json b/Godeps/Godeps.json index de55e1d..4c1bf08 100644 --- a/Godeps/Godeps.json +++ b/Godeps/Godeps.json @@ -11,44 +11,56 @@ }, { "ImportPath": "github.com/boltdb/bolt", - "Comment": "data/v1-228-g8fb50d5", - "Rev": "8fb50d5ee57110936b904a7539d4c5f2bf2359db" + "Comment": "data/v1-254-gd285804", + "Rev": "d285804df1760edf4c602ecd901be5d5e67bf982" + }, + { + "ImportPath": "github.com/edsrzf/mmap-go", + "Rev": "6c75090c55983bef2e129e173681b20d24871ef8" + }, + { + "ImportPath": "github.com/siddontang/go/arena", + "Rev": "8f64946c30746240c2f3bdb606eed9a4aca34478" }, { "ImportPath": "github.com/siddontang/go/bson", - "Rev": "c7a17e4e4a1b72e4bc38b8b52cac8558aff4a4b1" + "Rev": "8f64946c30746240c2f3bdb606eed9a4aca34478" }, { "ImportPath": "github.com/siddontang/go/filelock", - "Rev": "c7a17e4e4a1b72e4bc38b8b52cac8558aff4a4b1" + "Rev": "8f64946c30746240c2f3bdb606eed9a4aca34478" }, { "ImportPath": "github.com/siddontang/go/hack", - "Rev": "c7a17e4e4a1b72e4bc38b8b52cac8558aff4a4b1" + "Rev": "8f64946c30746240c2f3bdb606eed9a4aca34478" }, { "ImportPath": "github.com/siddontang/go/ioutil2", - "Rev": "c7a17e4e4a1b72e4bc38b8b52cac8558aff4a4b1" + "Rev": "8f64946c30746240c2f3bdb606eed9a4aca34478" }, { "ImportPath": "github.com/siddontang/go/log", - "Rev": "c7a17e4e4a1b72e4bc38b8b52cac8558aff4a4b1" + "Rev": "8f64946c30746240c2f3bdb606eed9a4aca34478" }, { "ImportPath": "github.com/siddontang/go/num", - "Rev": "c7a17e4e4a1b72e4bc38b8b52cac8558aff4a4b1" + "Rev": "8f64946c30746240c2f3bdb606eed9a4aca34478" }, { "ImportPath": "github.com/siddontang/go/snappy", - "Rev": "c7a17e4e4a1b72e4bc38b8b52cac8558aff4a4b1" + "Rev": "8f64946c30746240c2f3bdb606eed9a4aca34478" }, { "ImportPath": "github.com/siddontang/go/sync2", - "Rev": "c7a17e4e4a1b72e4bc38b8b52cac8558aff4a4b1" + "Rev": "8f64946c30746240c2f3bdb606eed9a4aca34478" }, { - "ImportPath": "github.com/siddontang/goleveldb/leveldb", - "Rev": "71404b29ccd98b94ec2278afa806d59a11cd0d28" + "ImportPath": "github.com/syndtr/goleveldb/leveldb", + "Rev": "c9e0ae706141dc099005d6d247e4880c7feda2e1" + }, + { + "ImportPath": "github.com/syndtr/gosnappy/snappy", + "Rev": "ce8acff4829e0c2458a67ead32390ac0a381c862" }, { "ImportPath": "github.com/szferi/gomdb", diff --git a/Makefile b/Makefile index cfdddda..d52de40 100644 --- a/Makefile +++ b/Makefile @@ -22,5 +22,5 @@ clean: test: $(GO) test -tags '$(GO_BUILD_TAGS)' ./... -pytest: - sh client/ledis-py/tests/all.sh +test_race: + $(GO) test -race -tags '$(GO_BUILD_TAGS)' ./... diff --git a/README.md b/README.md index 5a8436b..a8f9e20 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,14 @@ -# LedisDB +# LedisDB + +[![Build Status](https://travis-ci.org/siddontang/ledisdb.svg?branch=develop)](https://travis-ci.org/siddontang/ledisdb) Ledisdb is a high performance NoSQL like Redis written by go. It supports some data structure like kv, list, hash, zset, bitmap,set. LedisDB now supports multiple databases as backend to store data, you can test and choose the proper one for you. +### **You must run `ledis-upgrade-ttl` before using LedisDB version 0.4, I fixed a very serious bug for key expiration and ttl.** + + ## Features + Rich data structure: KV, List, Hash, ZSet, Bitmap, Set. @@ -167,18 +172,9 @@ See [Clients](https://github.com/siddontang/ledisdb/wiki/Clients) to find or con + `pcall` and `xpcall` are not supported in lua, you can see the readme in [golua](https://github.com/aarzilli/golua). -## Thanks - -Gmail: cenqichao@gmail.com - -Gmail: chendahui007@gmail.com - -Gmail: cppgohan@gmail.com - -Gmail: tiaotiaoyly@gmail.com - -Gmail: wyk4true@gmail.com +## Requirement ++ go version >= 1.3 ## Feedback diff --git a/bootstrap.sh b/bootstrap.sh index ae18f27..185b9a8 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -6,23 +6,26 @@ godep path > /dev/null 2>&1 if [ "$?" = 0 ]; then GOPATH=`godep path` + # https://github.com/tools/godep/issues/60 + # have to rm Godeps/_workspace first, then restore + rm -rf $GOPATH godep restore exit 0 fi -go get github.com/siddontang/goleveldb/leveldb +go get -u github.com/szferi/gomdb -go get github.com/szferi/gomdb +go get -u github.com/boltdb/bolt -go get github.com/boltdb/bolt +go get -u github.com/ugorji/go/codec +go get -u github.com/BurntSushi/toml +go get -u github.com/edsrzf/mmap-go +go get -u github.com/syndtr/goleveldb/leveldb -go get github.com/ugorji/go/codec -go get github.com/BurntSushi/toml - - -go get github.com/siddontang/go/bson -go get github.com/siddontang/go/log -go get github.com/siddontang/go/snappy -go get github.com/siddontang/go/num -go get github.com/siddontang/go/filelock -go get github.com/siddontang/go/sync2 \ No newline at end of file +go get -u github.com/siddontang/go/bson +go get -u github.com/siddontang/go/log +go get -u github.com/siddontang/go/snappy +go get -u github.com/siddontang/go/num +go get -u github.com/siddontang/go/filelock +go get -u github.com/siddontang/go/sync2 +go get -u github.com/siddontang/go/arena diff --git a/client/go/ledis/client.go b/client/go/ledis/client.go index f6b0fc4..bdc532f 100644 --- a/client/go/ledis/client.go +++ b/client/go/ledis/client.go @@ -4,11 +4,6 @@ import ( "container/list" "strings" "sync" - "time" -) - -const ( - pingPeriod time.Duration = 3 * time.Second ) type Config struct { @@ -98,7 +93,6 @@ func (c *Client) put(conn *Conn) { c.Unlock() conn.finalize() } else { - conn.lastActive = time.Now() c.conns.PushFront(conn) c.Unlock() } diff --git a/client/go/ledis/conn.go b/client/go/ledis/conn.go index 7568772..1c988b6 100644 --- a/client/go/ledis/conn.go +++ b/client/go/ledis/conn.go @@ -8,7 +8,7 @@ import ( "io" "net" "strconv" - "time" + "sync" ) // Error represents an error returned in a command reply. @@ -17,6 +17,12 @@ type Error string func (err Error) Error() string { return string(err) } type Conn struct { + cm sync.Mutex + wm sync.Mutex + rm sync.Mutex + + closed bool + client *Client addr string @@ -28,8 +34,6 @@ type Conn struct { rSize int wSize int - lastActive time.Time - // Scratch space for formatting argument length. // '*' or '$', length, "\r\n" lenScratch [32]byte @@ -45,6 +49,8 @@ func NewConn(addr string) *Conn { co.rSize = 4096 co.wSize = 4096 + co.closed = false + return co } @@ -76,6 +82,9 @@ func (c *Conn) Send(cmd string, args ...interface{}) error { return err } + c.wm.Lock() + defer c.wm.Unlock() + if err := c.writeCommand(cmd, args); err != nil { c.finalize() return err @@ -89,6 +98,9 @@ func (c *Conn) Send(cmd string, args ...interface{}) error { } func (c *Conn) Receive() (interface{}, error) { + c.rm.Lock() + defer c.rm.Unlock() + if reply, err := c.readReply(); err != nil { c.finalize() return nil, err @@ -102,6 +114,9 @@ func (c *Conn) Receive() (interface{}, error) { } func (c *Conn) ReceiveBulkTo(w io.Writer) error { + c.rm.Lock() + defer c.rm.Unlock() + err := c.readBulkReplyTo(w) if err != nil { if _, ok := err.(Error); !ok { @@ -112,20 +127,26 @@ func (c *Conn) ReceiveBulkTo(w io.Writer) error { } func (c *Conn) finalize() { - if c.c != nil { + c.cm.Lock() + if !c.closed { c.c.Close() - c.c = nil + c.closed = true } + c.cm.Unlock() } func (c *Conn) connect() error { - if c.c != nil { + c.cm.Lock() + defer c.cm.Unlock() + + if !c.closed && c.c != nil { return nil } var err error c.c, err = net.Dial(getProto(c.addr), c.addr) if err != nil { + c.c = nil return err } diff --git a/cmd/ledis-benchmark/main.go b/cmd/ledis-benchmark/main.go index 510d436..640607d 100644 --- a/cmd/ledis-benchmark/main.go +++ b/cmd/ledis-benchmark/main.go @@ -6,6 +6,7 @@ import ( "github.com/siddontang/ledisdb/client/go/ledis" "math/rand" "runtime" + "strings" "sync" "sync/atomic" "time" @@ -15,36 +16,33 @@ var ip = flag.String("ip", "127.0.0.1", "redis/ledis/ssdb server ip") var port = flag.Int("port", 6380, "redis/ledis/ssdb server port") var number = flag.Int("n", 1000, "request number") var clients = flag.Int("c", 50, "number of clients") -var reverse = flag.Bool("rev", false, "enable zset rev benchmark") var round = flag.Int("r", 1, "benchmark round number") -var del = flag.Bool("del", true, "enable del benchmark") var valueSize = flag.Int("vsize", 100, "kv value size") +var tests = flag.String("t", "set,get,randget,del,lpush,lrange,lpop,hset,hget,hdel,zadd,zincr,zrange,zrevrange,zdel", "only run the comma separated list of tests") var wg sync.WaitGroup var client *ledis.Client - var loop int = 0 -func waitBench(cmd string, args ...interface{}) { - c := client.Get() - defer c.Close() - - _, err := c.Do(cmd, args...) +func waitBench(c *ledis.Conn, cmd string, args ...interface{}) { + _, err := c.Do(strings.ToUpper(cmd), args...) if err != nil { - fmt.Printf("do %s error %s", cmd, err.Error()) - return + fmt.Printf("do %s error %s\n", cmd, err.Error()) } + } -func bench(cmd string, f func()) { +func bench(cmd string, f func(c *ledis.Conn)) { wg.Add(*clients) t1 := time.Now() for i := 0; i < *clients; i++ { go func() { + c := client.Get() for j := 0; j < loop; j++ { - f() + f(c) } + c.Close() wg.Done() }() } @@ -53,7 +51,13 @@ func bench(cmd string, f func()) { t2 := time.Now() - fmt.Printf("%s: %0.2f op/s\n", cmd, (float64(*number) / t2.Sub(t1).Seconds())) + d := t2.Sub(t1) + + fmt.Printf("%s: %s %0.3f micros/op, %0.2fop/s\n", + cmd, + d.String(), + float64(d.Nanoseconds()/1e3)/float64(*number), + float64(*number)/d.Seconds()) } var kvSetBase int64 = 0 @@ -62,78 +66,78 @@ var kvIncrBase int64 = 0 var kvDelBase int64 = 0 func benchSet() { - f := func() { + f := func(c *ledis.Conn) { value := make([]byte, *valueSize) n := atomic.AddInt64(&kvSetBase, 1) - waitBench("set", n, value) + waitBench(c, "SET", n, value) } bench("set", f) } func benchGet() { - f := func() { + f := func(c *ledis.Conn) { n := atomic.AddInt64(&kvGetBase, 1) - waitBench("get", n) + waitBench(c, "GET", n) } bench("get", f) } func benchRandGet() { - f := func() { - n := rand.Int() - waitBench("get", n) + f := func(c *ledis.Conn) { + n := rand.Int() % *number + waitBench(c, "GET", n) } bench("randget", f) } func benchDel() { - f := func() { + f := func(c *ledis.Conn) { n := atomic.AddInt64(&kvDelBase, 1) - waitBench("del", n) + waitBench(c, "DEL", n) } bench("del", f) } func benchPushList() { - f := func() { + f := func(c *ledis.Conn) { value := make([]byte, 100) - waitBench("rpush", "mytestlist", value) + waitBench(c, "RPUSH", "mytestlist", value) } bench("rpush", f) } func benchRangeList10() { - f := func() { - waitBench("lrange", "mytestlist", 0, 10) + f := func(c *ledis.Conn) { + waitBench(c, "LRANGE", "mytestlist", 0, 10) } bench("lrange10", f) } func benchRangeList50() { - f := func() { - waitBench("lrange", "mytestlist", 0, 50) + f := func(c *ledis.Conn) { + waitBench(c, "LRANGE", "mytestlist", 0, 50) } bench("lrange50", f) } func benchRangeList100() { - f := func() { - waitBench("lrange", "mytestlist", 0, 100) + f := func(c *ledis.Conn) { + waitBench(c, "LRANGE", "mytestlist", 0, 100) } bench("lrange100", f) } func benchPopList() { - f := func() { - waitBench("lpop", "mytestlist") + f := func(c *ledis.Conn) { + waitBench(c, "LPOP", "mytestlist") } bench("lpop", f) @@ -145,38 +149,38 @@ var hashGetBase int64 = 0 var hashDelBase int64 = 0 func benchHset() { - f := func() { + f := func(c *ledis.Conn) { value := make([]byte, 100) n := atomic.AddInt64(&hashSetBase, 1) - waitBench("hset", "myhashkey", n, value) + waitBench(c, "HSET", "myhashkey", n, value) } bench("hset", f) } func benchHGet() { - f := func() { + f := func(c *ledis.Conn) { n := atomic.AddInt64(&hashGetBase, 1) - waitBench("hget", "myhashkey", n) + waitBench(c, "HGET", "myhashkey", n) } bench("hget", f) } func benchHRandGet() { - f := func() { - n := rand.Int() - waitBench("hget", "myhashkey", n) + f := func(c *ledis.Conn) { + n := rand.Int() % *number + waitBench(c, "HGET", "myhashkey", n) } bench("hrandget", f) } func benchHDel() { - f := func() { + f := func(c *ledis.Conn) { n := atomic.AddInt64(&hashDelBase, 1) - waitBench("hdel", "myhashkey", n) + waitBench(c, "HDEL", "myhashkey", n) } bench("hdel", f) @@ -187,60 +191,60 @@ var zsetDelBase int64 = 0 var zsetIncrBase int64 = 0 func benchZAdd() { - f := func() { + f := func(c *ledis.Conn) { member := make([]byte, 16) n := atomic.AddInt64(&zsetAddBase, 1) - waitBench("zadd", "myzsetkey", n, member) + waitBench(c, "ZADD", "myzsetkey", n, member) } bench("zadd", f) } func benchZDel() { - f := func() { + f := func(c *ledis.Conn) { n := atomic.AddInt64(&zsetDelBase, 1) - waitBench("zrem", "myzsetkey", n) + waitBench(c, "ZREM", "myzsetkey", n) } bench("zrem", f) } func benchZIncr() { - f := func() { + f := func(c *ledis.Conn) { n := atomic.AddInt64(&zsetIncrBase, 1) - waitBench("zincrby", "myzsetkey", 1, n) + waitBench(c, "ZINCRBY", "myzsetkey", 1, n) } bench("zincrby", f) } func benchZRangeByScore() { - f := func() { - waitBench("zrangebyscore", "myzsetkey", 0, rand.Int(), "withscores", "limit", rand.Int()%100, 100) + f := func(c *ledis.Conn) { + waitBench(c, "ZRANGEBYSCORE", "myzsetkey", 0, rand.Int(), "withscores", "limit", rand.Int()%100, 100) } bench("zrangebyscore", f) } func benchZRangeByRank() { - f := func() { - waitBench("zrange", "myzsetkey", 0, rand.Int()%100) + f := func(c *ledis.Conn) { + waitBench(c, "ZRANGE", "myzsetkey", 0, rand.Int()%100) } bench("zrange", f) } func benchZRevRangeByScore() { - f := func() { - waitBench("zrevrangebyscore", "myzsetkey", 0, rand.Int(), "withscores", "limit", rand.Int()%100, 100) + f := func(c *ledis.Conn) { + waitBench(c, "ZREVRANGEBYSCORE", "myzsetkey", 0, rand.Int(), "withscores", "limit", rand.Int()%100, 100) } bench("zrevrangebyscore", f) } func benchZRevRangeByRank() { - f := func() { - waitBench("zrevrange", "myzsetkey", 0, rand.Int()%100) + f := func(c *ledis.Conn) { + waitBench(c, "ZREVRANGE", "myzsetkey", 0, rand.Int()%100) } bench("zrevrange", f) @@ -272,50 +276,58 @@ func main() { cfg.WriteBufferSize = 10240 client = ledis.NewClient(cfg) + for i := 0; i < *clients; i++ { + c := client.Get() + c.Close() + } + if *round <= 0 { *round = 1 } + ts := strings.Split(*tests, ",") + for i := 0; i < *round; i++ { - benchSet() - benchGet() - benchRandGet() - - if *del == true { - benchDel() - } - - benchPushList() - benchRangeList10() - benchRangeList50() - benchRangeList100() - - if *del == true { - benchPopList() - } - - benchHset() - benchHGet() - benchHRandGet() - - if *del == true { - benchHDel() - } - - benchZAdd() - benchZIncr() - benchZRangeByRank() - benchZRangeByScore() - - //rev is too slow in leveldb, rocksdb or other - //maybe disable for huge data benchmark - if *reverse == true { - benchZRevRangeByRank() - benchZRevRangeByScore() - } - - if *del == true { - benchZDel() + for _, s := range ts { + switch strings.ToLower(s) { + case "set": + benchSet() + case "get": + benchGet() + case "randget": + benchRandGet() + case "del": + benchDel() + case "lpush": + benchPushList() + case "lrange": + benchRangeList10() + benchRangeList50() + benchRangeList100() + case "lpop": + benchPopList() + case "hset": + benchHset() + case "hget": + benchHGet() + benchHRandGet() + case "hdel": + benchHDel() + case "zadd": + benchZAdd() + case "zincr": + benchZIncr() + case "zrange": + benchZRangeByRank() + benchZRangeByScore() + case "zrevrange": + //rev is too slow in leveldb, rocksdb or other + //maybe disable for huge data benchmark + benchZRevRangeByRank() + benchZRevRangeByScore() + case "zdel": + benchZDel() + } } println("") diff --git a/cmd/ledis-cli/const.go b/cmd/ledis-cli/const.go index 790a77b..5586f1a 100644 --- a/cmd/ledis-cli/const.go +++ b/cmd/ledis-cli/const.go @@ -1,4 +1,4 @@ -//This file was generated by .tools/generate_commands.py on Mon Oct 20 2014 22:35:33 +0800 +//This file was generated by .tools/generate_commands.py on Sun Oct 26 2014 15:14:39 +0800 package main var helpCommands = [][]string{ @@ -86,6 +86,7 @@ var helpCommands = [][]string{ {"SDIFFSTORE", "destination key [key ...]", "Set"}, {"SELECT", "index", "Server"}, {"SET", "key value", "KV"}, + {"SETEX", "key seconds value", "KV"}, {"SETNX", "key value", "KV"}, {"SEXPIRE", "key seconds", "Set"}, {"SEXPIREAT", "key timestamp", "Set"}, diff --git a/cmd/ledis-storebench/main.go b/cmd/ledis-dbbench/main.go similarity index 73% rename from cmd/ledis-storebench/main.go rename to cmd/ledis-dbbench/main.go index 6d91a89..0ab8277 100644 --- a/cmd/ledis-storebench/main.go +++ b/cmd/ledis-dbbench/main.go @@ -5,7 +5,7 @@ import ( "fmt" "github.com/siddontang/go/num" "github.com/siddontang/ledisdb/config" - "github.com/siddontang/ledisdb/store" + "github.com/siddontang/ledisdb/ledis" "os" "runtime" "sync" @@ -24,7 +24,8 @@ var round = flag.Int("r", 1, "benchmark round number") var valueSize = flag.Int("vsize", 100, "kv value size") var wg sync.WaitGroup -var db *store.DB +var ldb *ledis.Ledis +var db *ledis.DB var loop int = 0 @@ -46,25 +47,31 @@ func bench(cmd string, f func()) { t2 := time.Now() d := t2.Sub(t1) - fmt.Printf("%s: %0.3f micros/op, %0.2fmb/s %0.2fop/s\n", cmd, float64(d.Nanoseconds()/1e3)/float64(*number), - float64((*valueSize+16)*(*number))/(1024.0*1024.0*(d.Seconds())), float64(*number)/d.Seconds()) + fmt.Printf("%s %s: %0.3f micros/op, %0.2fmb/s %0.2fop/s\n", + cmd, + d.String(), + float64(d.Nanoseconds()/1e3)/float64(*number), + float64((*valueSize+16)*(*number))/(1024.0*1024.0*(d.Seconds())), + float64(*number)/d.Seconds()) } var kvSetBase int64 = 0 var kvGetBase int64 = 0 +var value []byte + func benchSet() { f := func() { - value := make([]byte, *valueSize) n := atomic.AddInt64(&kvSetBase, 1) - db.Put(num.Int64ToBytes(n), value) + db.Set(num.Int64ToBytes(n), value) } bench("set", f) } func benchGet() { + kvGetBase = 0 f := func() { n := atomic.AddInt64(&kvGetBase, 1) v, err := db.Get(num.Int64ToBytes(n)) @@ -78,6 +85,23 @@ func benchGet() { bench("get", f) } +var kvGetSliceBase int64 = 0 + +func benchGetSlice() { + kvGetSliceBase = 0 + f := func() { + n := atomic.AddInt64(&kvGetSliceBase, 1) + v, err := db.GetSlice(num.Int64ToBytes(n)) + if err != nil { + println(err.Error()) + } else if v != nil { + v.Free() + } + } + + bench("getslice", f) +} + func setRocksDB(cfg *config.RocksDBConfig) { cfg.BlockSize = 64 * KB cfg.WriteBufferSize = 64 * MB @@ -99,10 +123,15 @@ func main() { runtime.GOMAXPROCS(runtime.NumCPU()) flag.Parse() + value = make([]byte, *valueSize) + cfg := config.NewConfigDefault() - cfg.DBPath = "./var/store_test" + cfg.DataDir = "./var/ledis_dbbench" cfg.DBName = *name os.RemoveAll(cfg.DBPath) + defer os.RemoveAll(cfg.DBPath) + + os.MkdirAll(cfg.DBPath, 0755) cfg.LevelDB.BlockSize = 32 * KB cfg.LevelDB.CacheSize = 512 * MB @@ -112,12 +141,14 @@ func main() { setRocksDB(&cfg.RocksDB) var err error - db, err = store.Open(cfg) + ldb, err = ledis.Open(cfg) if err != nil { - panic(err) + println(err.Error()) return } + db, _ = ldb.Select(0) + if *number <= 0 { panic("invalid number") return @@ -137,6 +168,9 @@ func main() { for i := 0; i < *round; i++ { benchSet() benchGet() + benchGetSlice() + benchGet() + benchGetSlice() println("") } diff --git a/cmd/ledis-respbench/main.go b/cmd/ledis-respbench/main.go new file mode 100644 index 0000000..fcdf239 --- /dev/null +++ b/cmd/ledis-respbench/main.go @@ -0,0 +1,158 @@ +package main + +import ( + "bufio" + "bytes" + "flag" + "fmt" + "github.com/siddontang/go/arena" + "github.com/siddontang/ledisdb/config" + "github.com/siddontang/ledisdb/ledis" + "github.com/siddontang/ledisdb/server" + "net" + "os" + "runtime" + "time" +) + +var KB = config.KB +var MB = config.MB +var GB = config.GB + +var addr = flag.String("addr", ":6380", "listen addr") +var name = flag.String("db_name", "", "db name") + +var ldb *ledis.Ledis +var db *ledis.DB + +func setRocksDB(cfg *config.RocksDBConfig) { + cfg.BlockSize = 64 * KB + cfg.WriteBufferSize = 64 * MB + cfg.MaxWriteBufferNum = 2 + cfg.MaxBytesForLevelBase = 512 * MB + cfg.TargetFileSizeBase = 64 * MB + cfg.BackgroundThreads = 4 + cfg.HighPriorityBackgroundThreads = 1 + cfg.MaxBackgroundCompactions = 3 + cfg.MaxBackgroundFlushes = 1 + cfg.CacheSize = 512 * MB + cfg.EnableStatistics = true + cfg.StatsDumpPeriodSec = 5 + cfg.Level0FileNumCompactionTrigger = 8 + cfg.MaxBytesForLevelMultiplier = 8 +} + +func main() { + runtime.GOMAXPROCS(runtime.NumCPU()) + + flag.Parse() + l, err := net.Listen("tcp", *addr) + + println("listen", *addr) + + if err != nil { + fmt.Println(err.Error()) + return + } + + if len(*name) > 0 { + cfg := config.NewConfigDefault() + cfg.DataDir = "./var/ledis_respbench" + cfg.DBName = *name + os.RemoveAll(cfg.DBPath) + defer os.RemoveAll(cfg.DBPath) + + os.MkdirAll(cfg.DBPath, 0755) + + cfg.LevelDB.BlockSize = 32 * KB + cfg.LevelDB.CacheSize = 512 * MB + cfg.LevelDB.WriteBufferSize = 64 * MB + cfg.LevelDB.MaxOpenFiles = 1000 + + setRocksDB(&cfg.RocksDB) + + ldb, err = ledis.Open(cfg) + if err != nil { + println(err.Error()) + return + } + + db, _ = ldb.Select(0) + } + + for { + c, err := l.Accept() + if err != nil { + println(err.Error()) + continue + } + go run(c) + } +} + +func run(c net.Conn) { + //buf := make([]byte, 10240) + ok := []byte("+OK\r\n") + data := []byte("$4096\r\n") + data = append(data, make([]byte, 4096)...) + data = append(data, "\r\n"...) + + var rt time.Duration + var wt time.Duration + var st time.Duration + var gt time.Duration + + rb := bufio.NewReaderSize(c, 10240) + wb := bufio.NewWriterSize(c, 10240) + + a := arena.NewArena(10240) + + for { + t1 := time.Now() + + a.Reset() + + req, err := server.ReadRequest(rb, a) + + if err != nil { + break + } + t2 := time.Now() + + rt += t2.Sub(t1) + + cmd := string(bytes.ToUpper(req[0])) + switch cmd { + case "SET": + if db != nil { + db.Set(req[1], req[2]) + st += time.Now().Sub(t2) + } + wb.Write(ok) + case "GET": + if db != nil { + d, _ := db.GetSlice(req[1]) + gt += time.Now().Sub(t2) + if d == nil { + wb.Write(data) + } else { + wb.WriteString(fmt.Sprintf("$%d\r\n", d.Size())) + wb.Write(d.Data()) + wb.WriteString("\r\n") + d.Free() + } + } else { + wb.Write(data) + } + default: + wb.WriteString(fmt.Sprintf("-Err %s Not Supported Now\r\n", req[0])) + } + + wb.Flush() + + t3 := time.Now() + wt += t3.Sub(t2) + } + + fmt.Printf("rt:%s wt %s, gt:%s, st:%s\n", rt.String(), wt.String(), gt.String(), st.String()) +} diff --git a/cmd/ledis-server/main.go b/cmd/ledis-server/main.go index 32be6c2..f1c695b 100644 --- a/cmd/ledis-server/main.go +++ b/cmd/ledis-server/main.go @@ -24,6 +24,7 @@ var slaveof = flag.String("slaveof", "", "make the server a slave of another ins var readonly = flag.Bool("readonly", false, "set readonly mode, salve server is always readonly") var rpl = flag.Bool("rpl", false, "enable replication or not, slave server is always enabled") var rplSync = flag.Bool("rpl_sync", false, "enable sync replication or not") +var ttlCheck = flag.Int("ttl_check", 0, "TTL check interval") func main() { runtime.GOMAXPROCS(runtime.NumCPU()) @@ -67,6 +68,10 @@ func main() { cfg.Replication.Sync = *rplSync } + if *ttlCheck > 0 { + cfg.TTLCheckInterval = *ttlCheck + } + var app *server.App app, err = server.NewApp(cfg) if err != nil { @@ -76,22 +81,22 @@ func main() { sc := make(chan os.Signal, 1) signal.Notify(sc, + os.Kill, + os.Interrupt, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT) - go func() { - <-sc - - app.Close() - }() - if *usePprof { go func() { log.Println(http.ListenAndServe(fmt.Sprintf(":%d", *pprofPort), nil)) }() } - app.Run() + go app.Run() + + <-sc + + app.Close() } diff --git a/config/config.go b/config/config.go index 5353ffc..92ea190 100644 --- a/config/config.go +++ b/config/config.go @@ -7,6 +7,7 @@ import ( "github.com/siddontang/go/ioutil2" "io" "io/ioutil" + "sync" ) var ( @@ -73,8 +74,12 @@ type ReplicationConfig struct { WaitSyncTime int `toml:"wait_sync_time"` WaitMaxSlaveAcks int `toml:"wait_max_slave_acks"` ExpiredLogDays int `toml:"expired_log_days"` + StoreName string `toml:"store_name"` + MaxLogFileSize int64 `toml:"max_log_file_size"` + MaxLogFileNum int `toml:"max_log_file_num"` SyncLog int `toml:"sync_log"` Compression bool `toml:"compression"` + UseMmap bool `toml:"use_mmap"` } type SnapshotConfig struct { @@ -83,6 +88,8 @@ type SnapshotConfig struct { } type Config struct { + m sync.RWMutex `toml:"-"` + FileName string `toml:"-"` Addr string `toml:"addr"` @@ -111,8 +118,11 @@ type Config struct { Snapshot SnapshotConfig `toml:"snapshot"` - ConnReadBufferSize int `toml:"conn_read_buffer_size"` - ConnWriteBufferSize int `toml:"conn_write_buffer_size"` + ConnReadBufferSize int `toml:"conn_read_buffer_size"` + ConnWriteBufferSize int `toml:"conn_write_buffer_size"` + ConnKeepaliveInterval int `toml:"conn_keepalive_interval"` + + TTLCheckInterval int `toml:"ttl_check_interval"` } func NewConfigWithFile(fileName string) (*Config, error) { @@ -166,6 +176,7 @@ func NewConfigDefault() *Config { cfg.Replication.Compression = true cfg.Replication.WaitMaxSlaveAcks = 2 cfg.Replication.SyncLog = 0 + cfg.Replication.UseMmap = true cfg.Snapshot.MaxNum = 1 cfg.RocksDB.AllowOsBuffer = true @@ -194,8 +205,11 @@ func (cfg *Config) adjust() { cfg.RocksDB.adjust() cfg.Replication.ExpiredLogDays = getDefault(7, cfg.Replication.ExpiredLogDays) + cfg.Replication.MaxLogFileNum = getDefault(50, cfg.Replication.MaxLogFileNum) cfg.ConnReadBufferSize = getDefault(4*KB, cfg.ConnReadBufferSize) cfg.ConnWriteBufferSize = getDefault(4*KB, cfg.ConnWriteBufferSize) + cfg.TTLCheckInterval = getDefault(1, cfg.TTLCheckInterval) + } func (cfg *LevelDBConfig) adjust() { @@ -250,3 +264,16 @@ func (cfg *Config) Rewrite() error { return cfg.DumpFile(cfg.FileName) } + +func (cfg *Config) GetReadonly() bool { + cfg.m.RLock() + b := cfg.Readonly + cfg.m.RUnlock() + return b +} + +func (cfg *Config) SetReadonly(b bool) { + cfg.m.Lock() + cfg.Readonly = b + cfg.m.Unlock() +} diff --git a/config/config.toml b/config/config.toml index aef46d6..44b38d6 100644 --- a/config/config.toml +++ b/config/config.toml @@ -41,12 +41,21 @@ db_path = "" db_sync_commit = 0 # enable replication or not -use_replication = true +use_replication = false # set connection buffer, you can increase them appropriately +# more size, more memory used conn_read_buffer_size = 10240 conn_write_buffer_size = 10240 +# if connection receives no data after n seconds, it may be dead, close +# 0 to disable and not check +conn_keepalive_interval = 0 + +# checking TTL (time to live) data every n seconds +# if you set big, the expired data may not be deleted immediately +ttl_check_interval = 1 + [leveldb] # for leveldb and goleveldb compression = false @@ -63,11 +72,11 @@ max_open_files = 1024 # 0:no, 1:snappy, 2:zlib, 3:bz2, 4:lz4, 5:lz4hc compression = 0 block_size = 65536 -write_buffer_size = 67108864 -cache_size = 524288000 +write_buffer_size = 134217728 +cache_size = 1073741824 max_open_files = 1024 -max_write_buffer_num = 2 -min_write_buffer_number_to_merge = 1 +max_write_buffer_num = 6 +min_write_buffer_number_to_merge = 2 num_levels = 7 level0_file_num_compaction_trigger = 8 level0_slowdown_writes_trigger = 16 @@ -79,9 +88,9 @@ max_bytes_for_level_multiplier = 8 disable_auto_compactions = false disable_data_sync = false use_fsync = false -background_theads = 4 +background_theads = 16 high_priority_background_threads = 1 -max_background_compactions = 3 +max_background_compactions = 15 max_background_flushes = 1 allow_os_buffer = true enable_statistics = false @@ -102,7 +111,7 @@ path = "" # If sync is true, the new log must be sent to some slaves, and then commit. # It will reduce performance but have better high availability. -sync = true +sync = false # If sync is true, wait at last wait_sync_time milliseconds for slave syncing this log wait_sync_time = 500 @@ -112,9 +121,22 @@ wait_sync_time = 500 # If 0, wait (n + 1) / 2 acks. wait_max_slave_acks = 2 +# store name: file, goleveldb +# change in runtime is very dangerous +store_name = "file" + # Expire write ahead logs after the given days expired_log_days = 7 +# for file store, if 0, use default 256MB, max is 1G +max_log_file_size = 0 + +# for file store, if 0, use default 50 +max_log_file_num = 0 + +# for file store, use mmap for file read and write +use_mmap = true + # Sync log to disk if possible # 0: no sync # 1: sync every second @@ -122,7 +144,7 @@ expired_log_days = 7 sync_log = 0 # Compress the log or not -compression = true +compression = false [snapshot] # Path to store snapshot dump file diff --git a/doc/commands.json b/doc/commands.json index b03e7f9..d435bc3 100644 --- a/doc/commands.json +++ b/doc/commands.json @@ -310,6 +310,11 @@ "group": "KV", "readonly": false }, + "SETEX": { + "arguments": "key seconds value", + "group": "KV", + "readonly": false + }, "SLAVEOF": { "arguments": "host port [RESTART] [READONLY]", "group": "Replication", diff --git a/doc/commands.md b/doc/commands.md index 9dcdca6..a5c0527 100644 --- a/doc/commands.md +++ b/doc/commands.md @@ -22,6 +22,7 @@ Table of Contents - [MSET key value [key value ...]](#mset-key-value-key-value-) - [SET key value](#set-key-value) - [SETNX key value](#setnx-key-value) + - [SETEX key seconds value](#setex-key-seconds-value) - [EXPIRE key seconds](#expire-key-seconds) - [EXPIREAT key timestamp](#expireat-key-timestamp) - [TTL key](#ttl-key) @@ -389,6 +390,30 @@ ledis> GET mykey "hello" ``` +### SETEX key seconds value +Set key to hold the string value and set key to timeout after a given number of seconds. This command is equivalent to executing the following commands: + +``` +SET mykey value +EXPIRE mykey seconds +``` + +**Return value** + +Simple string reply + +**Examples** + +``` +ledis> SETEX mykey 10 "Hello" +OK +ledis> TTL mykey +(integer) 10 +ledis> GET mykey +"Hello" +ledis> +``` + ### EXPIRE key seconds Set a timeout on key. After the timeout has expired, the key will be deleted. diff --git a/etc/ledis.conf b/etc/ledis.conf index a3b0833..44b38d6 100644 --- a/etc/ledis.conf +++ b/etc/ledis.conf @@ -41,7 +41,20 @@ db_path = "" db_sync_commit = 0 # enable replication or not -use_replication = true +use_replication = false + +# set connection buffer, you can increase them appropriately +# more size, more memory used +conn_read_buffer_size = 10240 +conn_write_buffer_size = 10240 + +# if connection receives no data after n seconds, it may be dead, close +# 0 to disable and not check +conn_keepalive_interval = 0 + +# checking TTL (time to live) data every n seconds +# if you set big, the expired data may not be deleted immediately +ttl_check_interval = 1 [leveldb] # for leveldb and goleveldb @@ -59,11 +72,11 @@ max_open_files = 1024 # 0:no, 1:snappy, 2:zlib, 3:bz2, 4:lz4, 5:lz4hc compression = 0 block_size = 65536 -write_buffer_size = 67108864 -cache_size = 524288000 +write_buffer_size = 134217728 +cache_size = 1073741824 max_open_files = 1024 -max_write_buffer_num = 2 -min_write_buffer_number_to_merge = 1 +max_write_buffer_num = 6 +min_write_buffer_number_to_merge = 2 num_levels = 7 level0_file_num_compaction_trigger = 8 level0_slowdown_writes_trigger = 16 @@ -75,9 +88,9 @@ max_bytes_for_level_multiplier = 8 disable_auto_compactions = false disable_data_sync = false use_fsync = false -background_theads = 4 +background_theads = 16 high_priority_background_threads = 1 -max_background_compactions = 3 +max_background_compactions = 15 max_background_flushes = 1 allow_os_buffer = true enable_statistics = false @@ -98,7 +111,7 @@ path = "" # If sync is true, the new log must be sent to some slaves, and then commit. # It will reduce performance but have better high availability. -sync = true +sync = false # If sync is true, wait at last wait_sync_time milliseconds for slave syncing this log wait_sync_time = 500 @@ -108,9 +121,22 @@ wait_sync_time = 500 # If 0, wait (n + 1) / 2 acks. wait_max_slave_acks = 2 +# store name: file, goleveldb +# change in runtime is very dangerous +store_name = "file" + # Expire write ahead logs after the given days expired_log_days = 7 +# for file store, if 0, use default 256MB, max is 1G +max_log_file_size = 0 + +# for file store, if 0, use default 50 +max_log_file_num = 0 + +# for file store, use mmap for file read and write +use_mmap = true + # Sync log to disk if possible # 0: no sync # 1: sync every second @@ -118,7 +144,7 @@ expired_log_days = 7 sync_log = 0 # Compress the log or not -compression = true +compression = false [snapshot] # Path to store snapshot dump file diff --git a/ledis/batch.go b/ledis/batch.go index f5fe061..c9064df 100644 --- a/ledis/batch.go +++ b/ledis/batch.go @@ -15,20 +15,20 @@ type batch struct { sync.Locker tx *Tx - - eb *eventBatch } func (b *batch) Commit() error { - if b.l.IsReadOnly() { + if b.l.cfg.GetReadonly() { return ErrWriteInROnly } if b.tx == nil { - return b.l.handleCommit(b.eb, b.WriteBatch) + return b.l.handleCommit(b.WriteBatch, b.WriteBatch) } else { if b.l.r != nil { - b.tx.eb.Write(b.eb.Bytes()) + if err := b.tx.data.Append(b.WriteBatch.BatchData()); err != nil { + return err + } } return b.WriteBatch.Commit() } @@ -39,25 +39,15 @@ func (b *batch) Lock() { } func (b *batch) Unlock() { - b.eb.Reset() - b.WriteBatch.Rollback() b.Locker.Unlock() } func (b *batch) Put(key []byte, value []byte) { - if b.l.r != nil { - b.eb.Put(key, value) - } - b.WriteBatch.Put(key, value) } func (b *batch) Delete(key []byte) { - if b.l.r != nil { - b.eb.Delete(key) - } - b.WriteBatch.Delete(key) } @@ -96,7 +86,6 @@ func (l *Ledis) newBatch(wb *store.WriteBatch, locker sync.Locker, tx *Tx) *batc b.Locker = locker b.tx = tx - b.eb = new(eventBatch) return b } @@ -105,14 +94,19 @@ type commiter interface { Commit() error } -func (l *Ledis) handleCommit(eb *eventBatch, c commiter) error { +type commitDataGetter interface { + Data() []byte +} + +func (l *Ledis) handleCommit(g commitDataGetter, c commiter) error { l.commitLock.Lock() - defer l.commitLock.Unlock() var err error if l.r != nil { var rl *rpl.Log - if rl, err = l.r.Log(eb.Bytes()); err != nil { + if rl, err = l.r.Log(g.Data()); err != nil { + l.commitLock.Unlock() + log.Fatal("write wal error %s", err.Error()) return err } @@ -120,19 +114,25 @@ func (l *Ledis) handleCommit(eb *eventBatch, c commiter) error { l.propagate(rl) if err = c.Commit(); err != nil { + l.commitLock.Unlock() + log.Fatal("commit error %s", err.Error()) l.noticeReplication() return err } if err = l.r.UpdateCommitID(rl.ID); err != nil { + l.commitLock.Unlock() + log.Fatal("update commit id error %s", err.Error()) l.noticeReplication() return err } - - return nil } else { - return c.Commit() + err = c.Commit() } + + l.commitLock.Unlock() + + return err } diff --git a/ledis/const.go b/ledis/const.go index 3e17a95..a61cb90 100644 --- a/ledis/const.go +++ b/ledis/const.go @@ -4,6 +4,8 @@ import ( "errors" ) +const Version = "0.4" + const ( NoneType byte = 0 KVType byte = 1 @@ -21,8 +23,13 @@ const ( maxDataType byte = 100 - ExpTimeType byte = 101 - ExpMetaType byte = 102 + /* + I make a big mistake about TTL time key format and have to use a new one (change 101 to 103). + You must run the ledis-upgrade-ttl to upgrade db. + */ + ObsoleteExpTimeType byte = 101 + ExpMetaType byte = 102 + ExpTimeType byte = 103 MetaType byte = 201 ) @@ -76,7 +83,7 @@ const ( MaxSetMemberSize int = 1024 //max value size - MaxValueSize int = 10 * 1024 * 1024 + MaxValueSize int = 1024 * 1024 * 1024 ) var ( diff --git a/ledis/dump.go b/ledis/dump.go index 1f6d5e2..863255c 100644 --- a/ledis/dump.go +++ b/ledis/dump.go @@ -155,6 +155,11 @@ func (l *Ledis) LoadDump(r io.Reader) (*DumpHead, error) { var key, value []byte + wb := l.ldb.NewWriteBatch() + defer wb.Close() + + n := 0 + for { if err = binary.Read(rb, binary.BigEndian, &keyLen); err != nil && err != io.EOF { return nil, err @@ -182,14 +187,26 @@ func (l *Ledis) LoadDump(r io.Reader) (*DumpHead, error) { return nil, err } - if err = l.ldb.Put(key, value); err != nil { - return nil, err + wb.Put(key, value) + n++ + if n%1024 == 0 { + if err = wb.Commit(); err != nil { + return nil, err + } } + // if err = l.ldb.Put(key, value); err != nil { + // return nil, err + // } + keyBuf.Reset() valueBuf.Reset() } + if err = wb.Commit(); err != nil { + return nil, err + } + deKeyBuf = nil deValueBuf = nil diff --git a/ledis/event.go b/ledis/event.go index 72ac373..2a3b54a 100644 --- a/ledis/event.go +++ b/ledis/event.go @@ -1,101 +1,13 @@ package ledis import ( - "bytes" - "encoding/binary" "errors" "fmt" "github.com/siddontang/go/hack" - "io" "strconv" ) -const ( - kTypeDeleteEvent uint8 = 0 - kTypePutEvent uint8 = 1 -) - -var ( - errInvalidPutEvent = errors.New("invalid put event") - errInvalidDeleteEvent = errors.New("invalid delete event") - errInvalidEvent = errors.New("invalid event") -) - -type eventBatch struct { - bytes.Buffer -} - -func (b *eventBatch) Put(key []byte, value []byte) { - l := uint32(len(key) + len(value) + 1 + 2) - binary.Write(b, binary.BigEndian, l) - b.WriteByte(kTypePutEvent) - keyLen := uint16(len(key)) - binary.Write(b, binary.BigEndian, keyLen) - b.Write(key) - b.Write(value) -} - -func (b *eventBatch) Delete(key []byte) { - l := uint32(len(key) + 1) - binary.Write(b, binary.BigEndian, l) - b.WriteByte(kTypeDeleteEvent) - b.Write(key) -} - -type eventWriter interface { - Put(key []byte, value []byte) - Delete(key []byte) -} - -func decodeEventBatch(w eventWriter, data []byte) error { - for { - if len(data) == 0 { - return nil - } - - if len(data) < 4 { - return io.ErrUnexpectedEOF - } - - l := binary.BigEndian.Uint32(data) - data = data[4:] - if uint32(len(data)) < l { - return io.ErrUnexpectedEOF - } - - if err := decodeEvent(w, data[0:l]); err != nil { - return err - } - data = data[l:] - } -} - -func decodeEvent(w eventWriter, b []byte) error { - if len(b) == 0 { - return errInvalidEvent - } - - switch b[0] { - case kTypePutEvent: - if len(b[1:]) < 2 { - return errInvalidPutEvent - } - - keyLen := binary.BigEndian.Uint16(b[1:3]) - b = b[3:] - if len(b) < int(keyLen) { - return errInvalidPutEvent - } - - w.Put(b[0:keyLen], b[keyLen:]) - case kTypeDeleteEvent: - w.Delete(b[1:]) - default: - return errInvalidEvent - } - - return nil -} +var errInvalidEvent = errors.New("invalid event") func formatEventKey(buf []byte, k []byte) ([]byte, error) { if len(k) < 2 { diff --git a/ledis/event_test.go b/ledis/event_test.go deleted file mode 100644 index d2271e2..0000000 --- a/ledis/event_test.go +++ /dev/null @@ -1,56 +0,0 @@ -package ledis - -import ( - "reflect" - "testing" -) - -type testEvent struct { - Key []byte - Value []byte -} - -type testEventWriter struct { - evs []testEvent -} - -func (w *testEventWriter) Put(key []byte, value []byte) { - e := testEvent{key, value} - w.evs = append(w.evs, e) -} - -func (w *testEventWriter) Delete(key []byte) { - e := testEvent{key, nil} - w.evs = append(w.evs, e) -} - -func TestEvent(t *testing.T) { - k1 := []byte("k1") - v1 := []byte("v1") - k2 := []byte("k2") - k3 := []byte("k3") - v3 := []byte("v3") - - b := new(eventBatch) - - b.Put(k1, v1) - b.Delete(k2) - b.Put(k3, v3) - - buf := b.Bytes() - - w := &testEventWriter{} - - ev2 := &testEventWriter{ - evs: []testEvent{ - testEvent{k1, v1}, - testEvent{k2, nil}, - testEvent{k3, v3}}, - } - - if err := decodeEventBatch(w, buf); err != nil { - t.Fatal(err) - } else if !reflect.DeepEqual(w, ev2) { - t.Fatal("not equal") - } -} diff --git a/ledis/ledis.go b/ledis/ledis.go index 10fa49c..6e390c5 100644 --- a/ledis/ledis.go +++ b/ledis/ledis.go @@ -34,6 +34,8 @@ type Ledis struct { commitLock sync.Mutex //allow one write commit at same time lock io.Closer + + tcs [MaxDBNumber]*ttlChecker } func Open(cfg *config.Config) (*Ledis, error) { @@ -80,8 +82,7 @@ func Open(cfg *config.Config) (*Ledis, error) { l.dbs[i] = l.newDB(i) } - l.wg.Add(1) - go l.onDataExpired() + l.checkTTL() return l, nil } @@ -94,12 +95,12 @@ func (l *Ledis) Close() { if l.r != nil { l.r.Close() - l.r = nil + //l.r = nil } if l.lock != nil { l.lock.Close() - l.lock = nil + //l.lock = nil } } @@ -155,7 +156,7 @@ func (l *Ledis) flushAll() error { } func (l *Ledis) IsReadOnly() bool { - if l.cfg.Readonly { + if l.cfg.GetReadonly() { return true } else if l.r != nil { if b, _ := l.r.CommitIDBehind(); b { @@ -165,37 +166,47 @@ func (l *Ledis) IsReadOnly() bool { return false } -func (l *Ledis) onDataExpired() { - defer l.wg.Done() - - var executors []*elimination = make([]*elimination, len(l.dbs)) +func (l *Ledis) checkTTL() { for i, db := range l.dbs { - executors[i] = db.newEliminator() + c := newTTLChecker(db) + + c.register(KVType, db.kvBatch, db.delete) + c.register(ListType, db.listBatch, db.lDelete) + c.register(HashType, db.hashBatch, db.hDelete) + c.register(ZSetType, db.zsetBatch, db.zDelete) + c.register(BitType, db.binBatch, db.bDelete) + c.register(SetType, db.setBatch, db.sDelete) + + l.tcs[i] = c } - tick := time.NewTicker(1 * time.Second) - defer tick.Stop() + if l.cfg.TTLCheckInterval == 0 { + l.cfg.TTLCheckInterval = 1 + } - done := make(chan struct{}) + l.wg.Add(1) + go func() { + defer l.wg.Done() - for { - select { - case <-tick.C: - if l.IsReadOnly() { - break - } + tick := time.NewTicker(time.Duration(l.cfg.TTLCheckInterval) * time.Second) + defer tick.Stop() - go func() { - for _, eli := range executors { - eli.active() + for { + select { + case <-tick.C: + if l.IsReadOnly() { + break } - done <- struct{}{} - }() - <-done - case <-l.quit: - return + + for _, c := range l.tcs { + c.check() + } + case <-l.quit: + return + } } - } + + }() } diff --git a/ledis/ledis_db.go b/ledis/ledis_db.go index c9609fb..ebde98e 100644 --- a/ledis/ledis_db.go +++ b/ledis/ledis_db.go @@ -8,6 +8,7 @@ import ( type ibucket interface { Get(key []byte) ([]byte, error) + GetSlice(key []byte) (store.Slice, error) Put(key []byte, value []byte) error Delete(key []byte) error @@ -100,19 +101,6 @@ func (db *DB) FlushAll() (drop int64, err error) { return } -func (db *DB) newEliminator() *elimination { - eliminator := newEliminator(db) - - eliminator.regRetireContext(KVType, db.kvBatch, db.delete) - eliminator.regRetireContext(ListType, db.listBatch, db.lDelete) - eliminator.regRetireContext(HashType, db.hashBatch, db.hDelete) - eliminator.regRetireContext(ZSetType, db.zsetBatch, db.zDelete) - eliminator.regRetireContext(BitType, db.binBatch, db.bDelete) - eliminator.regRetireContext(SetType, db.setBatch, db.sDelete) - - return eliminator -} - func (db *DB) flushType(t *batch, dataType byte) (drop int64, err error) { var deleteFunc func(t *batch, key []byte) int64 var metaDataType byte diff --git a/ledis/replication.go b/ledis/replication.go index 33a5e12..4f02259 100644 --- a/ledis/replication.go +++ b/ledis/replication.go @@ -6,6 +6,7 @@ import ( "github.com/siddontang/go/log" "github.com/siddontang/go/snappy" "github.com/siddontang/ledisdb/rpl" + "github.com/siddontang/ledisdb/store" "io" "time" ) @@ -49,7 +50,12 @@ func (l *Ledis) handleReplication() error { } } - decodeEventBatch(l.rbatch, rl.Data) + if bd, err := store.NewBatchData(rl.Data); err != nil { + log.Error("decode batch log error %s", err.Error()) + return err + } else if err = bd.Replay(l.rbatch); err != nil { + log.Error("replay batch log error %s", err.Error()) + } l.commitLock.Lock() if err = l.rbatch.Commit(); err != nil { @@ -196,7 +202,7 @@ func (l *Ledis) ReadLogsTo(startLogID uint64, w io.Writer) (n int, nextLogID uin } // try to read events, if no events read, try to wait the new event singal until timeout seconds -func (l *Ledis) ReadLogsToTimeout(startLogID uint64, w io.Writer, timeout int) (n int, nextLogID uint64, err error) { +func (l *Ledis) ReadLogsToTimeout(startLogID uint64, w io.Writer, timeout int, quitCh chan struct{}) (n int, nextLogID uint64, err error) { n, nextLogID, err = l.ReadLogsTo(startLogID, w) if err != nil { return @@ -207,6 +213,8 @@ func (l *Ledis) ReadLogsToTimeout(startLogID uint64, w io.Writer, timeout int) ( select { case <-l.r.WaitLog(): case <-time.After(time.Duration(timeout) * time.Second): + case <-quitCh: + return } return l.ReadLogsTo(startLogID, w) } diff --git a/ledis/replication_test.go b/ledis/replication_test.go index fc5e210..287480b 100644 --- a/ledis/replication_test.go +++ b/ledis/replication_test.go @@ -42,6 +42,7 @@ func TestReplication(t *testing.T) { if err != nil { t.Fatal(err) } + defer master.Close() cfgS := config.NewConfigDefault() cfgS.DataDir = "/tmp/test_repl/slave" @@ -54,6 +55,7 @@ func TestReplication(t *testing.T) { if err != nil { t.Fatal(err) } + defer slave.Close() db, _ := master.Select(0) db.Set([]byte("a"), []byte("value")) diff --git a/ledis/t_kv.go b/ledis/t_kv.go index 497dcf2..37315c1 100644 --- a/ledis/t_kv.go +++ b/ledis/t_kv.go @@ -3,6 +3,7 @@ package ledis import ( "errors" "github.com/siddontang/go/num" + "github.com/siddontang/ledisdb/store" "time" ) @@ -164,6 +165,16 @@ func (db *DB) Get(key []byte) ([]byte, error) { return db.bucket.Get(key) } +func (db *DB) GetSlice(key []byte) (store.Slice, error) { + if err := checkKeySize(key); err != nil { + return nil, err + } + + key = db.encodeKVKey(key) + + return db.bucket.GetSlice(key) +} + func (db *DB) GetSet(key []byte, value []byte) ([]byte, error) { if err := checkKeySize(key); err != nil { return nil, err @@ -300,6 +311,32 @@ func (db *DB) SetNX(key []byte, value []byte) (int64, error) { return n, err } +func (db *DB) SetEX(key []byte, duration int64, value []byte) error { + if err := checkKeySize(key); err != nil { + return err + } else if err := checkValueSize(value); err != nil { + return err + } else if duration <= 0 { + return errExpireValue + } + + ek := db.encodeKVKey(key) + + t := db.kvBatch + + t.Lock() + defer t.Unlock() + + t.Put(ek, value) + db.expireAt(t, KVType, key, time.Now().Unix()+duration) + + if err := t.Commit(); err != nil { + return err + } + + return nil +} + func (db *DB) flush() (drop int64, err error) { t := db.kvBatch t.Lock() diff --git a/ledis/t_kv_test.go b/ledis/t_kv_test.go index b0f3437..77ce706 100644 --- a/ledis/t_kv_test.go +++ b/ledis/t_kv_test.go @@ -116,3 +116,32 @@ func TestKVFlush(t *testing.T) { } } } + +func TestKVSetEX(t *testing.T) { + db := getTestDB() + db.FlushAll() + + key := []byte("testdb_kv_c") + + if err := db.SetEX(key, 10, []byte("hello world")); err != nil { + t.Fatal(err) + } + + v, err := db.Get(key) + if err != nil { + t.Fatal(err) + } else if string(v) == "" { + t.Fatal("v is nil") + } + + if n, err := db.TTL(key); err != nil { + t.Fatal(err) + } else if n != 10 { + t.Fatal(n) + } + + if v, _ := db.Get(key); string(v) != "hello world" { + t.Fatal(string(v)) + } + +} diff --git a/ledis/t_ttl.go b/ledis/t_ttl.go index 2d5e2d5..f16a735 100644 --- a/ledis/t_ttl.go +++ b/ledis/t_ttl.go @@ -4,6 +4,7 @@ import ( "encoding/binary" "errors" "github.com/siddontang/ledisdb/store" + "sync" "time" ) @@ -12,12 +13,16 @@ var ( errExpTimeKey = errors.New("invalid expire time key") ) -type retireCallback func(*batch, []byte) int64 +type onExpired func(*batch, []byte) int64 -type elimination struct { - db *DB - exp2Tx []*batch - exp2Retire []retireCallback +type ttlChecker struct { + sync.Mutex + db *DB + txs []*batch + cbs []onExpired + + //next check time + nc int64 } var errExpType = errors.New("invalid expire type") @@ -27,12 +32,14 @@ func (db *DB) expEncodeTimeKey(dataType byte, key []byte, when int64) []byte { buf[0] = db.index buf[1] = ExpTimeType - buf[2] = dataType - pos := 3 + pos := 2 binary.BigEndian.PutUint64(buf[pos:], uint64(when)) pos += 8 + buf[pos] = dataType + pos++ + copy(buf[pos:], key) return buf @@ -64,7 +71,7 @@ func (db *DB) expDecodeTimeKey(tk []byte) (byte, []byte, int64, error) { return 0, nil, 0, errExpTimeKey } - return tk[2], tk[11:], int64(binary.BigEndian.Uint64(tk[3:])), nil + return tk[10], tk[11:], int64(binary.BigEndian.Uint64(tk[2:])), nil } func (db *DB) expire(t *batch, dataType byte, key []byte, duration int64) { @@ -77,6 +84,9 @@ func (db *DB) expireAt(t *batch, dataType byte, key []byte, when int64) { t.Put(tk, mk) t.Put(mk, PutInt64(when)) + + tc := db.l.tcs[db.index] + tc.setNextCheckTime(when, false) } func (db *DB) ttl(dataType byte, key []byte) (t int64, err error) { @@ -111,48 +121,68 @@ func (db *DB) rmExpire(t *batch, dataType byte, key []byte) (int64, error) { } } -////////////////////////////////////////////////////////// -// -////////////////////////////////////////////////////////// - -func newEliminator(db *DB) *elimination { - eli := new(elimination) - eli.db = db - eli.exp2Tx = make([]*batch, maxDataType) - eli.exp2Retire = make([]retireCallback, maxDataType) - return eli +func newTTLChecker(db *DB) *ttlChecker { + c := new(ttlChecker) + c.db = db + c.txs = make([]*batch, maxDataType) + c.cbs = make([]onExpired, maxDataType) + c.nc = 0 + return c } -func (eli *elimination) regRetireContext(dataType byte, t *batch, onRetire retireCallback) { - - // todo .. need to ensure exist - mapExpMetaType[expType] - - eli.exp2Tx[dataType] = t - eli.exp2Retire[dataType] = onRetire +func (c *ttlChecker) register(dataType byte, t *batch, f onExpired) { + c.txs[dataType] = t + c.cbs[dataType] = f } -// call by outside ... (from *db to another *db) -func (eli *elimination) active() { +func (c *ttlChecker) setNextCheckTime(when int64, force bool) { + c.Lock() + if force { + c.nc = when + } else if !force && c.nc > when { + c.nc = when + } + c.Unlock() +} + +func (c *ttlChecker) check() { now := time.Now().Unix() - db := eli.db + + c.Lock() + nc := c.nc + c.Unlock() + + if now < nc { + return + } + + nc = now + 3600 + + db := c.db dbGet := db.bucket.Get minKey := db.expEncodeTimeKey(NoneType, nil, 0) - maxKey := db.expEncodeTimeKey(maxDataType, nil, now) + maxKey := db.expEncodeTimeKey(maxDataType, nil, nc) it := db.bucket.RangeLimitIterator(minKey, maxKey, store.RangeROpen, 0, -1) for ; it.Valid(); it.Next() { tk := it.RawKey() mk := it.RawValue() - dt, k, _, err := db.expDecodeTimeKey(tk) + dt, k, nt, err := db.expDecodeTimeKey(tk) if err != nil { continue } - t := eli.exp2Tx[dt] - onRetire := eli.exp2Retire[dt] - if tk == nil || onRetire == nil { + if nt > now { + //the next ttl check time is nt! + nc = nt + break + } + + t := c.txs[dt] + cb := c.cbs[dt] + if tk == nil || cb == nil { continue } @@ -161,7 +191,7 @@ func (eli *elimination) active() { if exp, err := Int64(dbGet(mk)); err == nil { // check expire again if exp <= now { - onRetire(t, k) + cb(t, k) t.Delete(tk) t.Delete(mk) @@ -174,5 +204,7 @@ func (eli *elimination) active() { } it.Close() + c.setNextCheckTime(nc, true) + return } diff --git a/ledis/tx.go b/ledis/tx.go index 5c1c52a..03d6c5b 100644 --- a/ledis/tx.go +++ b/ledis/tx.go @@ -16,7 +16,7 @@ type Tx struct { tx *store.Tx - eb *eventBatch + data *store.BatchData } func (db *DB) IsTransaction() bool { @@ -32,7 +32,7 @@ func (db *DB) Begin() (*Tx, error) { tx := new(Tx) - tx.eb = new(eventBatch) + tx.data = &store.BatchData{} tx.DB = new(DB) tx.DB.l = db.l @@ -71,7 +71,8 @@ func (tx *Tx) Commit() error { return ErrTxDone } - err := tx.l.handleCommit(tx.eb, tx.tx) + err := tx.l.handleCommit(tx.data, tx.tx) + tx.data.Reset() tx.tx = nil @@ -88,7 +89,7 @@ func (tx *Tx) Rollback() error { } err := tx.tx.Rollback() - tx.eb.Reset() + tx.data.Reset() tx.tx = nil tx.l.wLock.Unlock() diff --git a/ledis/tx_test.go b/ledis/tx_test.go index e21c0a8..26888b5 100644 --- a/ledis/tx_test.go +++ b/ledis/tx_test.go @@ -195,7 +195,7 @@ func testTx(t *testing.T, name string) { cfg.DBName = name cfg.LMDB.MapSize = 10 * 1024 * 1024 - cfg.UseReplication = true + //cfg.UseReplication = true os.RemoveAll(cfg.DataDir) diff --git a/rpl/file_io.go b/rpl/file_io.go new file mode 100644 index 0000000..08e9d2e --- /dev/null +++ b/rpl/file_io.go @@ -0,0 +1,362 @@ +package rpl + +import ( + "fmt" + "github.com/edsrzf/mmap-go" + "github.com/siddontang/go/log" + "io" + "os" +) + +//like leveldb or rocksdb file interface, haha! + +type writeFile interface { + Sync() error + Write(b []byte) (n int, err error) + Close() error + ReadAt(buf []byte, offset int64) (int, error) + Truncate(size int64) error + SetOffset(o int64) + Name() string + Size() int + Offset() int64 +} + +type readFile interface { + ReadAt(buf []byte, offset int64) (int, error) + Close() error + Size() int + Name() string +} + +type rawWriteFile struct { + writeFile + f *os.File + offset int64 + name string +} + +func newRawWriteFile(name string, size int64) (writeFile, error) { + m := new(rawWriteFile) + var err error + + m.name = name + + m.f, err = os.OpenFile(name, os.O_CREATE|os.O_RDWR, 0644) + if err != nil { + return nil, err + } + + return m, nil +} + +func (m *rawWriteFile) Close() error { + if err := m.f.Truncate(m.offset); err != nil { + return fmt.Errorf("close truncate %s error %s", m.name, err.Error()) + } + + if err := m.f.Close(); err != nil { + return fmt.Errorf("close %s error %s", m.name, err.Error()) + } + + return nil +} + +func (m *rawWriteFile) Sync() error { + return m.f.Sync() +} + +func (m *rawWriteFile) Write(b []byte) (n int, err error) { + n, err = m.f.WriteAt(b, m.offset) + if err != nil { + return + } else if n != len(b) { + err = io.ErrShortWrite + return + } + + m.offset += int64(n) + return +} + +func (m *rawWriteFile) ReadAt(buf []byte, offset int64) (int, error) { + return m.f.ReadAt(buf, offset) +} + +func (m *rawWriteFile) Truncate(size int64) error { + var err error + if err = m.f.Truncate(size); err != nil { + return err + } + + if m.offset > size { + m.offset = size + } + return nil +} + +func (m *rawWriteFile) SetOffset(o int64) { + m.offset = o +} + +func (m *rawWriteFile) Offset() int64 { + return m.offset +} + +func (m *rawWriteFile) Name() string { + return m.name +} + +func (m *rawWriteFile) Size() int { + st, _ := m.f.Stat() + return int(st.Size()) +} + +type rawReadFile struct { + readFile + + f *os.File + name string +} + +func newRawReadFile(name string) (readFile, error) { + m := new(rawReadFile) + + var err error + m.f, err = os.Open(name) + m.name = name + + if err != nil { + return nil, err + } + + return m, err +} + +func (m *rawReadFile) Close() error { + return m.f.Close() +} + +func (m *rawReadFile) Size() int { + st, _ := m.f.Stat() + return int(st.Size()) +} + +func (m *rawReadFile) ReadAt(b []byte, offset int64) (int, error) { + return m.f.ReadAt(b, offset) +} + +func (m *rawReadFile) Name() string { + return m.name +} + +///////////////////////////////////////////////// + +type mmapWriteFile struct { + writeFile + + f *os.File + m mmap.MMap + name string + size int64 + offset int64 +} + +func newMmapWriteFile(name string, size int64) (writeFile, error) { + m := new(mmapWriteFile) + + m.name = name + + var err error + + m.f, err = os.OpenFile(name, os.O_CREATE|os.O_RDWR, 0644) + if err != nil { + return nil, err + } + + if size == 0 { + st, _ := m.f.Stat() + size = st.Size() + } + + if err = m.f.Truncate(size); err != nil { + return nil, err + } + + if m.m, err = mmap.Map(m.f, mmap.RDWR, 0); err != nil { + return nil, err + } + + m.size = size + m.offset = 0 + return m, nil +} + +func (m *mmapWriteFile) Size() int { + return int(m.size) +} + +func (m *mmapWriteFile) Sync() error { + return m.m.Flush() +} + +func (m *mmapWriteFile) Close() error { + if err := m.m.Unmap(); err != nil { + return fmt.Errorf("unmap %s error %s", m.name, err.Error()) + } + + if err := m.f.Truncate(m.offset); err != nil { + return fmt.Errorf("close truncate %s error %s", m.name, err.Error()) + } + + if err := m.f.Close(); err != nil { + return fmt.Errorf("close %s error %s", m.name, err.Error()) + } + + return nil +} + +func (m *mmapWriteFile) Write(b []byte) (n int, err error) { + extra := int64(len(b)) - (m.size - m.offset) + if extra > 0 { + newSize := m.size + extra + m.size/10 + if err = m.Truncate(newSize); err != nil { + return + } + m.size = newSize + } + + n = copy(m.m[m.offset:], b) + if n != len(b) { + return 0, io.ErrShortWrite + } + + m.offset += int64(len(b)) + return len(b), nil +} + +func (m *mmapWriteFile) ReadAt(buf []byte, offset int64) (int, error) { + if offset > m.offset { + return 0, fmt.Errorf("invalid offset %d", offset) + } + + n := copy(buf, m.m[offset:m.offset]) + if n != len(buf) { + return n, io.ErrUnexpectedEOF + } + + return n, nil +} + +func (m *mmapWriteFile) Truncate(size int64) error { + var err error + if err = m.m.Unmap(); err != nil { + return err + } + + if err = m.f.Truncate(size); err != nil { + return err + } + + if m.m, err = mmap.Map(m.f, mmap.RDWR, 0); err != nil { + return err + } + + m.size = size + if m.offset > m.size { + m.offset = m.size + } + return nil +} + +func (m *mmapWriteFile) SetOffset(o int64) { + m.offset = o +} + +func (m *mmapWriteFile) Offset() int64 { + return m.offset +} + +func (m *mmapWriteFile) Name() string { + return m.name +} + +type mmapReadFile struct { + readFile + + f *os.File + m mmap.MMap + name string +} + +func newMmapReadFile(name string) (readFile, error) { + m := new(mmapReadFile) + + m.name = name + + var err error + m.f, err = os.Open(name) + if err != nil { + return nil, err + } + + m.m, err = mmap.Map(m.f, mmap.RDONLY, 0) + return m, err +} + +func (m *mmapReadFile) ReadAt(buf []byte, offset int64) (int, error) { + if int64(offset) > int64(len(m.m)) { + return 0, fmt.Errorf("invalid offset %d", offset) + } + + n := copy(buf, m.m[offset:]) + if n != len(buf) { + return n, io.ErrUnexpectedEOF + } + + return n, nil +} + +func (m *mmapReadFile) Close() error { + if m.m != nil { + if err := m.m.Unmap(); err != nil { + log.Error("unmap %s error %s", m.name, err.Error()) + } + m.m = nil + } + + if m.f != nil { + if err := m.f.Close(); err != nil { + log.Error("close %s error %s", m.name, err.Error()) + } + m.f = nil + } + + return nil +} + +func (m *mmapReadFile) Size() int { + return len(m.m) +} + +func (m *mmapReadFile) Name() string { + return m.name +} + +///////////////////////////////////// + +func newWriteFile(useMmap bool, name string, size int64) (writeFile, error) { + if useMmap { + return newMmapWriteFile(name, size) + } else { + return newRawWriteFile(name, size) + } +} + +func newReadFile(useMmap bool, name string) (readFile, error) { + if useMmap { + return newMmapReadFile(name) + } else { + return newRawReadFile(name) + } +} diff --git a/rpl/file_store.go b/rpl/file_store.go index 91ea418..161ab8d 100644 --- a/rpl/file_store.go +++ b/rpl/file_store.go @@ -2,228 +2,413 @@ package rpl import ( "fmt" - "github.com/siddontang/go/hack" - "github.com/siddontang/go/ioutil2" "github.com/siddontang/go/log" + "github.com/siddontang/go/num" + "github.com/siddontang/ledisdb/config" "io/ioutil" "os" - "path" - "strconv" - "strings" + "sort" "sync" + "time" ) const ( - defaultMaxLogFileSize = 1024 * 1024 * 1024 + defaultMaxLogFileSize = int64(256 * 1024 * 1024) + + maxLogFileSize = int64(1024 * 1024 * 1024) + + defaultLogNumInFile = int64(1024 * 1024) ) /* -index file format: -ledis-bin.00001 -ledis-bin.00002 -ledis-bin.00003 + File Store: + 00000001.data + 00000001.meta + 00000002.data + 00000002.meta + + data: log1 data | log2 data | magic data + + if data has no magic data, it means that we don't close replication gracefully. + so we must repair the log data + log data: id (bigendian uint64), create time (bigendian uint32), compression (byte), data len(bigendian uint32), data + split data = log0 data + [padding 0] -> file % pagesize() == 0 + + meta: log1 offset | log2 offset + log offset: bigendian uint32 | bigendian uint32 + + //sha1 of github.com/siddontang/ledisdb 20 bytes + magic data = "\x1c\x1d\xb8\x88\xff\x9e\x45\x55\x40\xf0\x4c\xda\xe0\xce\x47\xde\x65\x48\x71\x17" + + we must guarantee that the log id is monotonic increment strictly. + if log1's id is 1, log2 must be 2 */ type FileStore struct { LogStore - m sync.Mutex + cfg *config.Config - maxFileSize int + base string - first uint64 - last uint64 + rm sync.RWMutex + wm sync.Mutex - logFile *os.File - logNames []string - nextLogIndex int64 + rs tableReaders + w *tableWriter - indexName string - - path string + quit chan struct{} } -func NewFileStore(path string) (*FileStore, error) { +func NewFileStore(base string, cfg *config.Config) (*FileStore, error) { s := new(FileStore) - if err := os.MkdirAll(path, 0755); err != nil { + s.quit = make(chan struct{}) + + var err error + + if err = os.MkdirAll(base, 0755); err != nil { return nil, err } - s.path = path + s.base = base - s.maxFileSize = defaultMaxLogFileSize + if cfg.Replication.MaxLogFileSize == 0 { + cfg.Replication.MaxLogFileSize = defaultMaxLogFileSize + } - s.first = 0 - s.last = 0 + cfg.Replication.MaxLogFileSize = num.MinInt64(cfg.Replication.MaxLogFileSize, maxLogFileSize) - s.logNames = make([]string, 0, 16) + s.cfg = cfg - if err := s.loadIndex(); err != nil { + if err = s.load(); err != nil { return nil, err } + index := int64(1) + if len(s.rs) != 0 { + index = s.rs[len(s.rs)-1].index + 1 + } + + s.w = newTableWriter(s.base, index, cfg.Replication.MaxLogFileSize, cfg.Replication.UseMmap) + s.w.SetSyncType(cfg.Replication.SyncLog) + + go s.checkTableReaders() + return s, nil } -func (s *FileStore) SetMaxFileSize(size int) { - s.maxFileSize = size -} +func (s *FileStore) GetLog(id uint64, l *Log) error { + //first search in table writer + if err := s.w.GetLog(id, l); err == nil { + return nil + } else if err != ErrLogNotFound { + return err + } -func (s *FileStore) GetLog(id uint64, log *Log) error { - panic("not implementation") - return nil -} + s.rm.RLock() + t := s.rs.Search(id) -func (s *FileStore) SeekLog(id uint64, log *Log) error { - panic("not implementation") - return nil + if t == nil { + s.rm.RUnlock() + + return ErrLogNotFound + } + + err := t.GetLog(id, l) + s.rm.RUnlock() + + return err } func (s *FileStore) FirstID() (uint64, error) { - panic("not implementation") - return 0, nil + id := uint64(0) + + s.rm.RLock() + if len(s.rs) > 0 { + id = s.rs[0].first + } else { + id = 0 + } + s.rm.RUnlock() + + if id > 0 { + return id, nil + } + + //if id = 0, + + return s.w.First(), nil } func (s *FileStore) LastID() (uint64, error) { - panic("not implementation") - return 0, nil + id := s.w.Last() + if id > 0 { + return id, nil + } + + //if table writer has no last id, we may find in the last table reader + + s.rm.RLock() + if len(s.rs) > 0 { + id = s.rs[len(s.rs)-1].last + } + s.rm.RUnlock() + + return id, nil } -func (s *FileStore) StoreLog(log *Log) error { - panic("not implementation") +func (s *FileStore) StoreLog(l *Log) error { + s.wm.Lock() + err := s.storeLog(l) + s.wm.Unlock() + return err +} + +func (s *FileStore) storeLog(l *Log) error { + err := s.w.StoreLog(l) + if err == nil { + return nil + } else if err != errTableNeedFlush { + return err + } + + var r *tableReader + r, err = s.w.Flush() + + if err != nil { + log.Fatal("write table flush error %s, can not store!!!", err.Error()) + + s.w.Close() + + return err + } + + s.rm.Lock() + s.rs = append(s.rs, r) + s.rm.Unlock() + + err = s.w.StoreLog(l) + + return err +} + +func (s *FileStore) PurgeExpired(n int64) error { + s.rm.Lock() + + purges := []*tableReader{} + + t := uint32(time.Now().Unix() - int64(n)) + + for i, r := range s.rs { + if r.lastTime > t { + purges = s.rs[0:i] + s.rs = s.rs[i:] + break + } + } + + s.rm.Unlock() + + s.purgeTableReaders(purges) + return nil } -func (s *FileStore) StoreLogs(logs []*Log) error { - panic("not implementation") - return nil -} - -func (s *FileStore) Purge(n uint64) error { - panic("not implementation") - return nil -} - -func (s *FileStore) PuregeExpired(n int64) error { - panic("not implementation") - return nil +func (s *FileStore) Sync() error { + return s.w.Sync() } func (s *FileStore) Clear() error { - panic("not implementation") + s.wm.Lock() + s.rm.Lock() + + defer func() { + s.rm.Unlock() + s.wm.Unlock() + }() + + s.w.Close() + + for i := range s.rs { + s.rs[i].Close() + } + + s.rs = tableReaders{} + + if err := os.RemoveAll(s.base); err != nil { + return err + } + + if err := os.MkdirAll(s.base, 0755); err != nil { + return err + } + + s.w = newTableWriter(s.base, 1, s.cfg.Replication.MaxLogFileSize, s.cfg.Replication.UseMmap) + return nil } func (s *FileStore) Close() error { - panic("not implementation") + close(s.quit) + + s.wm.Lock() + s.rm.Lock() + + if r, err := s.w.Flush(); err != nil { + if err != errNilHandler { + log.Error("close err: %s", err.Error()) + } + } else { + r.Close() + s.w.Close() + } + + for i := range s.rs { + s.rs[i].Close() + } + + s.rs = tableReaders{} + + s.rm.Unlock() + s.wm.Unlock() + return nil } -func (s *FileStore) flushIndex() error { - data := strings.Join(s.logNames, "\n") +func (s *FileStore) checkTableReaders() { + t := time.NewTicker(60 * time.Second) + defer t.Stop() + for { + select { + case <-t.C: + s.rm.Lock() - if err := ioutil2.WriteFileAtomic(s.indexName, hack.Slice(data), 0644); err != nil { - log.Error("flush index error %s", err.Error()) + for _, r := range s.rs { + if !r.Keepalived() { + r.Close() + } + } + + purges := []*tableReader{} + maxNum := s.cfg.Replication.MaxLogFileNum + num := len(s.rs) + if num > maxNum { + purges = s.rs[:num-maxNum] + s.rs = s.rs[num-maxNum:] + } + + s.rm.Unlock() + + s.purgeTableReaders(purges) + + case <-s.quit: + return + } + } +} + +func (s *FileStore) purgeTableReaders(purges []*tableReader) { + for _, r := range purges { + dataName := fmtTableDataName(r.base, r.index) + metaName := fmtTableMetaName(r.base, r.index) + r.Close() + if err := os.Remove(dataName); err != nil { + log.Error("purge table data %s err: %s", dataName, err.Error()) + } + if err := os.Remove(metaName); err != nil { + log.Error("purge table meta %s err: %s", metaName, err.Error()) + } + + } +} + +func (s *FileStore) load() error { + fs, err := ioutil.ReadDir(s.base) + if err != nil { return err } - return nil -} + s.rs = make(tableReaders, 0, len(fs)) -func (s *FileStore) fileExists(name string) bool { - p := path.Join(s.path, name) - _, err := os.Stat(p) - return !os.IsNotExist(err) -} - -func (s *FileStore) loadIndex() error { - s.indexName = path.Join(s.path, fmt.Sprintf("ledis-bin.index")) - if _, err := os.Stat(s.indexName); os.IsNotExist(err) { - //no index file, nothing to do - } else { - indexData, err := ioutil.ReadFile(s.indexName) - if err != nil { - return err - } - - lines := strings.Split(string(indexData), "\n") - for _, line := range lines { - line = strings.Trim(line, "\r\n ") - if len(line) == 0 { - continue - } - - if s.fileExists(line) { - s.logNames = append(s.logNames, line) + var r *tableReader + var index int64 + for _, f := range fs { + if _, err := fmt.Sscanf(f.Name(), "%08d.data", &index); err == nil { + if r, err = newTableReader(s.base, index, s.cfg.Replication.UseMmap); err != nil { + log.Error("load table %s err: %s", f.Name(), err.Error()) } else { - log.Info("log %s has not exists", line) + s.rs = append(s.rs, r) } } } - var err error - if len(s.logNames) == 0 { - s.nextLogIndex = 1 - } else { - lastName := s.logNames[len(s.logNames)-1] + if err := s.rs.check(); err != nil { + return err + } - if s.nextLogIndex, err = strconv.ParseInt(path.Ext(lastName)[1:], 10, 64); err != nil { - log.Error("invalid logfile name %s", err.Error()) - return err + return nil +} + +type tableReaders []*tableReader + +func (ts tableReaders) Len() int { + return len(ts) +} + +func (ts tableReaders) Swap(i, j int) { + ts[i], ts[j] = ts[j], ts[i] +} + +func (ts tableReaders) Less(i, j int) bool { + return ts[i].first < ts[j].first +} + +func (ts tableReaders) Search(id uint64) *tableReader { + i, j := 0, len(ts)-1 + + for i <= j { + h := i + (j-i)/2 + + if ts[h].first <= id && id <= ts[h].last { + return ts[h] + } else if ts[h].last < id { + i = h + 1 + } else { + j = h - 1 + } + } + + return nil +} + +func (ts tableReaders) check() error { + if len(ts) == 0 { + return nil + } + + sort.Sort(ts) + + first := ts[0].first + last := ts[0].last + index := ts[0].index + + if first == 0 || first > last { + return fmt.Errorf("invalid log in table %s", ts[0]) + } + + for i := 1; i < len(ts); i++ { + if ts[i].first <= last { + return fmt.Errorf("invalid first log id %d in table %s", ts[i].first, ts[i]) } - //like mysql, if server restart, a new log will create - s.nextLogIndex++ - } + if ts[i].index <= index { + return fmt.Errorf("invalid index %d in table %s", ts[i].index, ts[i]) + } + first = ts[i].first + last = ts[i].last + index = ts[i].index + } return nil } - -func (s *FileStore) openNewLogFile() error { - var err error - lastName := s.formatLogFileName(s.nextLogIndex) - - logPath := path.Join(s.path, lastName) - if s.logFile, err = os.OpenFile(logPath, os.O_CREATE|os.O_WRONLY, 0644); err != nil { - log.Error("open new logfile error %s", err.Error()) - return err - } - - s.logNames = append(s.logNames, lastName) - - if err = s.flushIndex(); err != nil { - return err - } - - return nil -} - -func (s *FileStore) checkLogFileSize() bool { - if s.logFile == nil { - return false - } - - st, _ := s.logFile.Stat() - if st.Size() >= int64(s.maxFileSize) { - s.closeLog() - return true - } - - return false -} - -func (s *FileStore) closeLog() { - if s.logFile == nil { - return - } - - s.nextLogIndex++ - - s.logFile.Close() - s.logFile = nil -} - -func (s *FileStore) formatLogFileName(index int64) string { - return fmt.Sprintf("ledis-bin.%07d", index) -} diff --git a/rpl/file_table.go b/rpl/file_table.go new file mode 100644 index 0000000..b4dcbfd --- /dev/null +++ b/rpl/file_table.go @@ -0,0 +1,570 @@ +package rpl + +import ( + "bytes" + "encoding/binary" + "errors" + "fmt" + "github.com/siddontang/go/log" + "github.com/siddontang/go/sync2" + "io" + "path" + "sync" + "time" +) + +var ( + magic = []byte("\x1c\x1d\xb8\x88\xff\x9e\x45\x55\x40\xf0\x4c\xda\xe0\xce\x47\xde\x65\x48\x71\x17") + errTableNeedFlush = errors.New("write table need flush") + errNilHandler = errors.New("nil write handler") +) + +const tableReaderKeepaliveInterval int64 = 30 + +func fmtTableDataName(base string, index int64) string { + return path.Join(base, fmt.Sprintf("%08d.data", index)) +} + +func fmtTableMetaName(base string, index int64) string { + return path.Join(base, fmt.Sprintf("%08d.meta", index)) +} + +type tableReader struct { + sync.Mutex + + base string + index int64 + + data readFile + meta readFile + + first uint64 + last uint64 + + lastTime uint32 + + lastReadTime sync2.AtomicInt64 + + useMmap bool +} + +func newTableReader(base string, index int64, useMmap bool) (*tableReader, error) { + if index <= 0 { + return nil, fmt.Errorf("invalid index %d", index) + } + t := new(tableReader) + t.base = base + t.index = index + + t.useMmap = useMmap + + var err error + + if err = t.check(); err != nil { + log.Error("check %d error: %s, try to repair", t.index, err.Error()) + + if err = t.repair(); err != nil { + log.Error("repair %d error: %s", t.index, err.Error()) + return nil, err + } + } + + t.close() + + return t, nil +} + +func (t *tableReader) String() string { + return fmt.Sprintf("%d", t.index) +} + +func (t *tableReader) Close() { + t.Lock() + + t.close() + + t.Unlock() +} + +func (t *tableReader) close() { + if t.data != nil { + t.data.Close() + t.data = nil + } + + if t.meta != nil { + t.meta.Close() + t.meta = nil + } +} + +func (t *tableReader) Keepalived() bool { + l := t.lastReadTime.Get() + if l > 0 && time.Now().Unix()-l > tableReaderKeepaliveInterval { + return false + } + + return true +} + +func (t *tableReader) getLogPos(index int) (uint32, error) { + var buf [4]byte + if _, err := t.meta.ReadAt(buf[0:4], int64(index)*4); err != nil { + return 0, err + } + + return binary.BigEndian.Uint32(buf[0:4]), nil +} + +func (t *tableReader) checkData() error { + var err error + //check will use raw file mode + if t.data, err = newReadFile(false, fmtTableDataName(t.base, t.index)); err != nil { + return err + } + + if t.data.Size() < len(magic) { + return fmt.Errorf("data file %s size %d too short", t.data.Name(), t.data.Size()) + } + + buf := make([]byte, len(magic)) + if _, err := t.data.ReadAt(buf, int64(t.data.Size()-len(magic))); err != nil { + return err + } + + if !bytes.Equal(magic, buf) { + return fmt.Errorf("data file %s invalid magic data %q", t.data.Name(), buf) + } + + return nil +} + +func (t *tableReader) checkMeta() error { + var err error + //check will use raw file mode + if t.meta, err = newReadFile(false, fmtTableMetaName(t.base, t.index)); err != nil { + return err + } + + if t.meta.Size()%4 != 0 || t.meta.Size() == 0 { + return fmt.Errorf("meta file %s invalid offset len %d, must 4 multiple and not 0", t.meta.Name(), t.meta.Size()) + } + + return nil +} + +func (t *tableReader) check() error { + var err error + + if err := t.checkMeta(); err != nil { + return err + } + + if err := t.checkData(); err != nil { + return err + } + + firstLogPos, _ := t.getLogPos(0) + lastLogPos, _ := t.getLogPos(t.meta.Size()/4 - 1) + + if firstLogPos != 0 { + return fmt.Errorf("invalid first log pos %d, must 0", firstLogPos) + } + + var l Log + if _, err = t.decodeLogHead(&l, t.data, int64(firstLogPos)); err != nil { + return fmt.Errorf("decode first log err %s", err.Error()) + } + + t.first = l.ID + var n int64 + if n, err = t.decodeLogHead(&l, t.data, int64(lastLogPos)); err != nil { + return fmt.Errorf("decode last log err %s", err.Error()) + } else if n+int64(len(magic)) != int64(t.data.Size()) { + return fmt.Errorf("extra log data at offset %d", n) + } + + t.last = l.ID + t.lastTime = l.CreateTime + + if t.first > t.last { + return fmt.Errorf("invalid log table first %d > last %d", t.first, t.last) + } else if (t.last - t.first + 1) != uint64(t.meta.Size()/4) { + return fmt.Errorf("invalid log table, first %d, last %d, and log num %d", t.first, t.last, t.meta.Size()/4) + } + + return nil +} + +func (t *tableReader) repair() error { + t.close() + + var err error + var data writeFile + var meta writeFile + + //repair will use raw file mode + data, err = newWriteFile(false, fmtTableDataName(t.base, t.index), 0) + data.SetOffset(int64(data.Size())) + + meta, err = newWriteFile(false, fmtTableMetaName(t.base, t.index), int64(defaultLogNumInFile*4)) + + var l Log + var pos int64 = 0 + var nextPos int64 = 0 + b := make([]byte, 4) + + t.first = 0 + t.last = 0 + + for { + nextPos, err = t.decodeLogHead(&l, data, pos) + if err != nil { + //if error, we may lost all logs from pos + log.Error("%s may lost logs from %d", data.Name(), pos) + break + } + + if l.ID == 0 { + log.Error("%s may lost logs from %d, invalid log 0", data.Name(), pos) + break + } + + if t.first == 0 { + t.first = l.ID + } + + if t.last == 0 { + t.last = l.ID + } else if l.ID <= t.last { + log.Error("%s may lost logs from %d, invalid logid %d", t.data.Name(), pos, l.ID) + break + } + + t.last = l.ID + t.lastTime = l.CreateTime + + binary.BigEndian.PutUint32(b, uint32(pos)) + meta.Write(b) + + pos = nextPos + + t.lastTime = l.CreateTime + } + + var e error + if err := meta.Close(); err != nil { + e = err + } + + data.SetOffset(pos) + + if _, err = data.Write(magic); err != nil { + log.Error("write magic error %s", err.Error()) + } + + if err = data.Close(); err != nil { + return err + } + + return e +} + +func (t *tableReader) decodeLogHead(l *Log, r io.ReaderAt, pos int64) (int64, error) { + dataLen, err := l.DecodeHeadAt(r, pos) + if err != nil { + return 0, err + } + + return pos + int64(l.HeadSize()) + int64(dataLen), nil +} + +func (t *tableReader) GetLog(id uint64, l *Log) error { + if id < t.first || id > t.last { + return ErrLogNotFound + } + + t.lastReadTime.Set(time.Now().Unix()) + + t.Lock() + + if err := t.openTable(); err != nil { + t.close() + t.Unlock() + return err + } + t.Unlock() + + pos, err := t.getLogPos(int(id - t.first)) + if err != nil { + return err + } + + if err := l.DecodeAt(t.data, int64(pos)); err != nil { + return err + } else if l.ID != id { + return fmt.Errorf("invalid log id %d != %d", l.ID, id) + } + + return nil +} + +func (t *tableReader) openTable() error { + var err error + if t.data == nil { + if t.data, err = newReadFile(t.useMmap, fmtTableDataName(t.base, t.index)); err != nil { + return err + } + } + + if t.meta == nil { + if t.meta, err = newReadFile(t.useMmap, fmtTableMetaName(t.base, t.index)); err != nil { + return err + } + + } + + return nil +} + +type tableWriter struct { + sync.RWMutex + + data writeFile + meta writeFile + + base string + index int64 + + first uint64 + last uint64 + lastTime uint32 + + maxLogSize int64 + + closed bool + + syncType int + + posBuf []byte + + useMmap bool +} + +func newTableWriter(base string, index int64, maxLogSize int64, useMmap bool) *tableWriter { + if index <= 0 { + panic(fmt.Errorf("invalid index %d", index)) + } + + t := new(tableWriter) + + t.base = base + t.index = index + + t.maxLogSize = maxLogSize + + t.closed = false + + t.posBuf = make([]byte, 4) + + t.useMmap = useMmap + + return t +} + +func (t *tableWriter) String() string { + return fmt.Sprintf("%d", t.index) +} + +func (t *tableWriter) SetMaxLogSize(s int64) { + t.maxLogSize = s +} + +func (t *tableWriter) SetSyncType(tp int) { + t.syncType = tp +} + +func (t *tableWriter) close() { + if t.meta != nil { + if err := t.meta.Close(); err != nil { + log.Fatal("close log meta error %s", err.Error()) + } + t.meta = nil + } + + if t.data != nil { + if _, err := t.data.Write(magic); err != nil { + log.Fatal("write magic error %s", err.Error()) + } + + if err := t.data.Close(); err != nil { + log.Fatal("close log data error %s", err.Error()) + } + t.data = nil + } +} + +func (t *tableWriter) Close() { + t.Lock() + t.closed = true + + t.close() + t.Unlock() +} + +func (t *tableWriter) First() uint64 { + t.Lock() + id := t.first + t.Unlock() + return id +} + +func (t *tableWriter) Last() uint64 { + t.Lock() + id := t.last + t.Unlock() + return id +} + +func (t *tableWriter) Flush() (*tableReader, error) { + t.Lock() + + if t.data == nil || t.meta == nil { + t.Unlock() + return nil, errNilHandler + } + + tr := new(tableReader) + tr.base = t.base + tr.index = t.index + + tr.first = t.first + tr.last = t.last + tr.lastTime = t.lastTime + tr.useMmap = t.useMmap + + t.close() + + t.first = 0 + t.last = 0 + t.index = t.index + 1 + + t.Unlock() + + return tr, nil +} + +func (t *tableWriter) StoreLog(l *Log) error { + t.Lock() + err := t.storeLog(l) + t.Unlock() + + return err +} + +func (t *tableWriter) openFile() error { + var err error + if t.data == nil { + if t.data, err = newWriteFile(t.useMmap, fmtTableDataName(t.base, t.index), t.maxLogSize+t.maxLogSize/10+int64(len(magic))); err != nil { + return err + } + } + + if t.meta == nil { + if t.meta, err = newWriteFile(t.useMmap, fmtTableMetaName(t.base, t.index), int64(defaultLogNumInFile*4)); err != nil { + return err + } + } + return err +} + +func (t *tableWriter) storeLog(l *Log) error { + if l.ID == 0 { + return ErrStoreLogID + } + + if t.closed { + return fmt.Errorf("table writer is closed") + } + + if t.last > 0 && l.ID != t.last+1 { + return ErrStoreLogID + } + + if t.data != nil && t.data.Offset() > t.maxLogSize { + return errTableNeedFlush + } + + var err error + if err = t.openFile(); err != nil { + return err + } + + offsetPos := t.data.Offset() + if err = l.Encode(t.data); err != nil { + return err + } + + binary.BigEndian.PutUint32(t.posBuf, uint32(offsetPos)) + if _, err = t.meta.Write(t.posBuf); err != nil { + return err + } + + if t.first == 0 { + t.first = l.ID + } + + t.last = l.ID + t.lastTime = l.CreateTime + + if t.syncType == 2 { + if err := t.data.Sync(); err != nil { + log.Error("sync table error %s", err.Error()) + } + } + + return nil +} + +func (t *tableWriter) GetLog(id uint64, l *Log) error { + t.RLock() + defer t.RUnlock() + + if id < t.first || id > t.last { + return ErrLogNotFound + } + + var buf [4]byte + if _, err := t.meta.ReadAt(buf[0:4], int64((id-t.first)*4)); err != nil { + return err + } + + offset := binary.BigEndian.Uint32(buf[0:4]) + + if err := l.DecodeAt(t.data, int64(offset)); err != nil { + return err + } else if l.ID != id { + return fmt.Errorf("invalid log id %d != %d", id, l.ID) + } + + return nil +} + +func (t *tableWriter) Sync() error { + t.Lock() + + var err error + if t.data != nil { + err = t.data.Sync() + t.Unlock() + return err + } + + if t.meta != nil { + err = t.meta.Sync() + } + + t.Unlock() + + return err +} diff --git a/rpl/file_table_test.go b/rpl/file_table_test.go new file mode 100644 index 0000000..e020c8a --- /dev/null +++ b/rpl/file_table_test.go @@ -0,0 +1,193 @@ +package rpl + +import ( + "github.com/siddontang/go/log" + "io/ioutil" + "os" + "path" + "testing" + "time" +) + +func TestFileTable(t *testing.T) { + testFileTable(t, true) + testFileTable(t, false) +} + +func testFileTable(t *testing.T, useMmap bool) { + log.SetLevel(log.LevelInfo) + + base, err := ioutil.TempDir("", "test_table") + if err != nil { + t.Fatal(err) + } + + os.MkdirAll(base, 0755) + + defer os.RemoveAll(base) + + l := new(Log) + l.Compression = 0 + l.Data = make([]byte, 4096) + + w := newTableWriter(base, 1, 1024*1024, useMmap) + defer w.Close() + + for i := 0; i < 10; i++ { + l.ID = uint64(i + 1) + l.CreateTime = uint32(time.Now().Unix()) + + l.Data[0] = byte(i + 1) + + if err := w.StoreLog(l); err != nil { + t.Fatal(err) + } + } + + if w.first != 1 { + t.Fatal(w.first) + } else if w.last != 10 { + t.Fatal(w.last) + } + + l.ID = 10 + if err := w.StoreLog(l); err == nil { + t.Fatal("must err") + } + + var ll Log + + for i := 0; i < 10; i++ { + if err := w.GetLog(uint64(i+1), &ll); err != nil { + t.Fatal(err) + } else if len(ll.Data) != 4096 { + t.Fatal(len(ll.Data)) + } else if ll.Data[0] != byte(i+1) { + t.Fatal(ll.Data[0]) + } + } + + if err := w.GetLog(12, &ll); err == nil { + t.Fatal("must nil") + } + + var r *tableReader + + name := fmtTableDataName(w.base, w.index) + + if r, err = w.Flush(); err != nil { + t.Fatal(err) + } + + for i := 10; i < 20; i++ { + l.ID = uint64(i + 1) + l.CreateTime = uint32(time.Now().Unix()) + + l.Data[0] = byte(i + 1) + + if err := w.StoreLog(l); err != nil { + t.Fatal(err) + } + } + + if w.first != 11 { + t.Fatal(w.first) + } else if w.last != 20 { + t.Fatal(w.last) + } + + defer r.Close() + + for i := 0; i < 10; i++ { + if err := r.GetLog(uint64(i+1), &ll); err != nil { + t.Fatal(err) + } else if len(ll.Data) != 4096 { + t.Fatal(len(ll.Data)) + } else if ll.Data[0] != byte(i+1) { + t.Fatal(ll.Data[0]) + } + } + + if err := r.GetLog(12, &ll); err == nil { + t.Fatal("must nil") + } + + r.Close() + + if r, err = newTableReader(base, 1, useMmap); err != nil { + t.Fatal(err) + } + defer r.Close() + + for i := 0; i < 10; i++ { + if err := r.GetLog(uint64(i+1), &ll); err != nil { + t.Fatal(err) + } else if len(ll.Data) != 4096 { + t.Fatal(len(ll.Data)) + } else if ll.Data[0] != byte(i+1) { + t.Fatal(ll.Data[0]) + } + } + + if err := r.GetLog(12, &ll); err == nil { + t.Fatal("must nil") + } + + s := int64(r.data.Size()) + + r.Close() + + log.SetLevel(log.LevelFatal) + + testRepair(t, name, 1, s, 11, useMmap) + testRepair(t, name, 1, s, 20, useMmap) + + if err := os.Truncate(name, s-21); err != nil { + t.Fatal(err) + } + + if r, err := w.Flush(); err != nil { + t.Fatal(err) + } else { + r.Close() + } + + if r, err = newTableReader(base, 2, useMmap); err != nil { + t.Fatal(err) + } + r.Close() +} + +func testRepair(t *testing.T, name string, index int64, s int64, cutSize int64, useMmap bool) { + var r *tableReader + var err error + + if err := os.Truncate(name, s-cutSize); err != nil { + t.Fatal(err) + } + + if r, err = newTableReader(path.Dir(name), index, useMmap); err != nil { + t.Fatal(err) + } + defer r.Close() + + var ll Log + for i := 0; i < 10; i++ { + if err := r.GetLog(uint64(i+1), &ll); err != nil { + t.Fatal(err, i) + } else if len(ll.Data) != 4096 { + t.Fatal(len(ll.Data)) + } else if ll.Data[0] != byte(i+1) { + t.Fatal(ll.Data[0]) + } + } + + if err := r.GetLog(12, &ll); err == nil { + t.Fatal("must nil") + } + + if s != int64(r.data.Size()) { + t.Fatalf("repair error size %d != %d", s, r.data.Size()) + } + +} diff --git a/rpl/goleveldb_store.go b/rpl/goleveldb_store.go index 39bf63a..5ece8d5 100644 --- a/rpl/goleveldb_store.go +++ b/rpl/goleveldb_store.go @@ -21,6 +21,8 @@ type GoLevelDBStore struct { first uint64 last uint64 + + buf bytes.Buffer } func (s *GoLevelDBStore) FirstID() (uint64, error) { @@ -84,30 +86,10 @@ func (s *GoLevelDBStore) GetLog(id uint64, log *Log) error { } } -func (s *GoLevelDBStore) SeekLog(id uint64, log *Log) error { - it := s.db.NewIterator() - defer it.Close() - - it.Seek(num.Uint64ToBytes(id)) - - if !it.Valid() { - return ErrLogNotFound - } else { - return log.Decode(bytes.NewBuffer(it.RawValue())) - } -} - func (s *GoLevelDBStore) StoreLog(log *Log) error { - return s.StoreLogs([]*Log{log}) -} - -func (s *GoLevelDBStore) StoreLogs(logs []*Log) error { s.m.Lock() defer s.m.Unlock() - w := s.db.NewWriteBatch() - defer w.Rollback() - last, err := s.lastID() if err != nil { return err @@ -115,24 +97,20 @@ func (s *GoLevelDBStore) StoreLogs(logs []*Log) error { s.last = InvalidLogID - var buf bytes.Buffer - for _, log := range logs { - buf.Reset() + s.buf.Reset() - if log.ID <= last { - return ErrLessLogID - } - - last = log.ID - key := num.Uint64ToBytes(log.ID) - - if err := log.Encode(&buf); err != nil { - return err - } - w.Put(key, buf.Bytes()) + if log.ID != last+1 { + return ErrStoreLogID } - if err = w.Commit(); err != nil { + last = log.ID + key := num.Uint64ToBytes(log.ID) + + if err := log.Encode(&s.buf); err != nil { + return err + } + + if err = s.db.Put(key, s.buf.Bytes()); err != nil { return err } @@ -140,42 +118,6 @@ func (s *GoLevelDBStore) StoreLogs(logs []*Log) error { return nil } -func (s *GoLevelDBStore) Purge(n uint64) error { - s.m.Lock() - defer s.m.Unlock() - - var first, last uint64 - var err error - - first, err = s.firstID() - if err != nil { - return err - } - - last, err = s.lastID() - if err != nil { - return err - } - - start := first - stop := num.MinUint64(last, first+n) - - w := s.db.NewWriteBatch() - defer w.Rollback() - - s.reset() - - for i := start; i < stop; i++ { - w.Delete(num.Uint64ToBytes(i)) - } - - if err = w.Commit(); err != nil { - return err - } - - return nil -} - func (s *GoLevelDBStore) PurgeExpired(n int64) error { if n <= 0 { return fmt.Errorf("invalid expired time %d", n) @@ -214,6 +156,16 @@ func (s *GoLevelDBStore) PurgeExpired(n int64) error { return nil } +func (s *GoLevelDBStore) Sync() error { + //no other way for sync, so ignore here + return nil +} + +func (s *GoLevelDBStore) reset() { + s.first = InvalidLogID + s.last = InvalidLogID +} + func (s *GoLevelDBStore) Clear() error { s.m.Lock() defer s.m.Unlock() @@ -228,11 +180,6 @@ func (s *GoLevelDBStore) Clear() error { return s.open() } -func (s *GoLevelDBStore) reset() { - s.first = InvalidLogID - s.last = InvalidLogID -} - func (s *GoLevelDBStore) Close() error { s.m.Lock() defer s.m.Unlock() diff --git a/rpl/log.go b/rpl/log.go index 261e852..ad0b48c 100644 --- a/rpl/log.go +++ b/rpl/log.go @@ -4,8 +4,11 @@ import ( "bytes" "encoding/binary" "io" + "sync" ) +const LogHeadSize = 17 + type Log struct { ID uint64 CreateTime uint32 @@ -15,7 +18,7 @@ type Log struct { } func (l *Log) HeadSize() int { - return 17 + return LogHeadSize } func (l *Log) Size() int { @@ -23,7 +26,7 @@ func (l *Log) Size() int { } func (l *Log) Marshal() ([]byte, error) { - buf := bytes.NewBuffer(make([]byte, l.HeadSize()+len(l.Data))) + buf := bytes.NewBuffer(make([]byte, l.Size())) buf.Reset() if err := l.Encode(buf); err != nil { @@ -39,28 +42,32 @@ func (l *Log) Unmarshal(b []byte) error { return l.Decode(buf) } +var headPool = sync.Pool{ + New: func() interface{} { return make([]byte, LogHeadSize) }, +} + func (l *Log) Encode(w io.Writer) error { - buf := make([]byte, l.HeadSize()) - + b := headPool.Get().([]byte) pos := 0 - binary.BigEndian.PutUint64(buf[pos:], l.ID) + + binary.BigEndian.PutUint64(b[pos:], l.ID) pos += 8 - - binary.BigEndian.PutUint32(buf[pos:], l.CreateTime) + binary.BigEndian.PutUint32(b[pos:], uint32(l.CreateTime)) pos += 4 - - buf[pos] = l.Compression + b[pos] = l.Compression pos++ + binary.BigEndian.PutUint32(b[pos:], uint32(len(l.Data))) - binary.BigEndian.PutUint32(buf[pos:], uint32(len(l.Data))) + n, err := w.Write(b) + headPool.Put(b) - if n, err := w.Write(buf); err != nil { + if err != nil { return err - } else if n != len(buf) { + } else if n != LogHeadSize { return io.ErrShortWrite } - if n, err := w.Write(l.Data); err != nil { + if n, err = w.Write(l.Data); err != nil { return err } else if n != len(l.Data) { return io.ErrShortWrite @@ -69,12 +76,82 @@ func (l *Log) Encode(w io.Writer) error { } func (l *Log) Decode(r io.Reader) error { - buf := make([]byte, l.HeadSize()) - - if _, err := io.ReadFull(r, buf); err != nil { + length, err := l.DecodeHead(r) + if err != nil { return err } + l.growData(int(length)) + + if _, err := io.ReadFull(r, l.Data); err != nil { + return err + } + + return nil +} + +func (l *Log) DecodeHead(r io.Reader) (uint32, error) { + buf := headPool.Get().([]byte) + + if _, err := io.ReadFull(r, buf); err != nil { + headPool.Put(buf) + return 0, err + } + + length := l.decodeHeadBuf(buf) + + headPool.Put(buf) + + return length, nil +} + +func (l *Log) DecodeAt(r io.ReaderAt, pos int64) error { + length, err := l.DecodeHeadAt(r, pos) + if err != nil { + return err + } + + l.growData(int(length)) + var n int + n, err = r.ReadAt(l.Data, pos+int64(LogHeadSize)) + if err == io.EOF && n == len(l.Data) { + err = nil + } + + return err +} + +func (l *Log) growData(length int) { + l.Data = l.Data[0:0] + + if cap(l.Data) >= length { + l.Data = l.Data[0:length] + } else { + l.Data = make([]byte, length) + } +} + +func (l *Log) DecodeHeadAt(r io.ReaderAt, pos int64) (uint32, error) { + buf := headPool.Get().([]byte) + + n, err := r.ReadAt(buf, pos) + if err != nil && err != io.EOF { + headPool.Put(buf) + + return 0, err + } + + length := l.decodeHeadBuf(buf) + headPool.Put(buf) + + if err == io.EOF && (length != 0 || n != len(buf)) { + return 0, err + } + + return length, nil +} + +func (l *Log) decodeHeadBuf(buf []byte) uint32 { pos := 0 l.ID = binary.BigEndian.Uint64(buf[pos:]) pos += 8 @@ -86,17 +163,5 @@ func (l *Log) Decode(r io.Reader) error { pos++ length := binary.BigEndian.Uint32(buf[pos:]) - - l.Data = l.Data[0:0] - - if cap(l.Data) >= int(length) { - l.Data = l.Data[0:length] - } else { - l.Data = make([]byte, length) - } - if _, err := io.ReadFull(r, l.Data); err != nil { - return err - } - - return nil + return length } diff --git a/rpl/rpl.go b/rpl/rpl.go index d862132..d232992 100644 --- a/rpl/rpl.go +++ b/rpl/rpl.go @@ -32,6 +32,8 @@ type Replication struct { wg sync.WaitGroup nc chan struct{} + + ncm sync.Mutex } func NewReplication(cfg *config.Config) (*Replication, error) { @@ -49,8 +51,16 @@ func NewReplication(cfg *config.Config) (*Replication, error) { r.cfg = cfg var err error - if r.s, err = NewGoLevelDBStore(path.Join(base, "wal"), cfg.Replication.SyncLog); err != nil { - return nil, err + + switch cfg.Replication.StoreName { + case "goleveldb": + if r.s, err = NewGoLevelDBStore(path.Join(base, "wal"), cfg.Replication.SyncLog); err != nil { + return nil, err + } + default: + if r.s, err = NewFileStore(path.Join(base, "ldb"), cfg); err != nil { + return nil, err + } } if r.commitLog, err = os.OpenFile(path.Join(base, "commit.log"), os.O_RDWR|os.O_CREATE, 0644); err != nil { @@ -63,7 +73,8 @@ func NewReplication(cfg *config.Config) (*Replication, error) { return nil, err } - go r.onPurgeExpired() + r.wg.Add(1) + go r.run() return r, nil } @@ -73,11 +84,18 @@ func (r *Replication) Close() error { r.wg.Wait() + r.m.Lock() + defer r.m.Unlock() + if r.s != nil { r.s.Close() r.s = nil } + if err := r.updateCommitID(r.commitID, true); err != nil { + log.Error("update commit id err %s", err.Error()) + } + if r.commitLog != nil { r.commitLog.Close() r.commitLog = nil @@ -96,16 +114,19 @@ func (r *Replication) Log(data []byte) (*Log, error) { } r.m.Lock() - defer r.m.Unlock() lastID, err := r.s.LastID() if err != nil { + r.m.Unlock() return nil, err } commitId := r.commitID if lastID < commitId { lastID = commitId + } else if lastID > commitId { + r.m.Unlock() + return nil, ErrCommitIDBehind } l := new(Log) @@ -121,41 +142,47 @@ func (r *Replication) Log(data []byte) (*Log, error) { l.Data = data if err = r.s.StoreLog(l); err != nil { + r.m.Unlock() return nil, err } + r.m.Unlock() + + r.ncm.Lock() close(r.nc) r.nc = make(chan struct{}) + r.ncm.Unlock() return l, nil } func (r *Replication) WaitLog() <-chan struct{} { - return r.nc + r.ncm.Lock() + ch := r.nc + r.ncm.Unlock() + return ch } func (r *Replication) StoreLog(log *Log) error { - return r.StoreLogs([]*Log{log}) -} - -func (r *Replication) StoreLogs(logs []*Log) error { r.m.Lock() - defer r.m.Unlock() + err := r.s.StoreLog(log) + r.m.Unlock() - return r.s.StoreLogs(logs) + return err } func (r *Replication) FirstLogID() (uint64, error) { r.m.Lock() - defer r.m.Unlock() id, err := r.s.FirstID() + r.m.Unlock() + return id, err } func (r *Replication) LastLogID() (uint64, error) { r.m.Lock() - defer r.m.Unlock() id, err := r.s.LastID() + r.m.Unlock() return id, err } @@ -168,9 +195,10 @@ func (r *Replication) LastCommitID() (uint64, error) { func (r *Replication) UpdateCommitID(id uint64) error { r.m.Lock() - defer r.m.Unlock() + err := r.updateCommitID(id, r.cfg.Replication.SyncLog == 2) + r.m.Unlock() - return r.updateCommitID(id) + return err } func (r *Replication) Stat() (*Stat, error) { @@ -192,13 +220,15 @@ func (r *Replication) Stat() (*Stat, error) { return s, nil } -func (r *Replication) updateCommitID(id uint64) error { - if _, err := r.commitLog.Seek(0, os.SEEK_SET); err != nil { - return err - } +func (r *Replication) updateCommitID(id uint64, force bool) error { + if force { + if _, err := r.commitLog.Seek(0, os.SEEK_SET); err != nil { + return err + } - if err := binary.Write(r.commitLog, binary.BigEndian, id); err != nil { - return err + if err := binary.Write(r.commitLog, binary.BigEndian, id); err != nil { + return err + } } r.commitID = id @@ -208,14 +238,17 @@ func (r *Replication) updateCommitID(id uint64) error { func (r *Replication) CommitIDBehind() (bool, error) { r.m.Lock() - defer r.m.Unlock() id, err := r.s.LastID() if err != nil { + r.m.Unlock() return false, err } - return id > r.commitID, nil + behind := id > r.commitID + r.m.Unlock() + + return behind, nil } func (r *Replication) GetLog(id uint64, log *Log) error { @@ -251,23 +284,47 @@ func (r *Replication) ClearWithCommitID(id uint64) error { return err } - return r.updateCommitID(id) + return r.updateCommitID(id, true) } -func (r *Replication) onPurgeExpired() { - r.wg.Add(1) +func (r *Replication) run() { defer r.wg.Done() + syncTc := time.NewTicker(1 * time.Second) + purgeTc := time.NewTicker(1 * time.Hour) + for { select { - case <-time.After(1 * time.Hour): + case <-purgeTc.C: n := (r.cfg.Replication.ExpiredLogDays * 24 * 3600) r.m.Lock() - if err := r.s.PurgeExpired(int64(n)); err != nil { + err := r.s.PurgeExpired(int64(n)) + r.m.Unlock() + if err != nil { log.Error("purge expired log error %s", err.Error()) } - r.m.Unlock() + case <-syncTc.C: + if r.cfg.Replication.SyncLog == 1 { + r.m.Lock() + err := r.s.Sync() + r.m.Unlock() + if err != nil { + log.Error("sync store error %s", err.Error()) + } + } + if r.cfg.Replication.SyncLog != 2 { + //we will sync commit id every 1 second + r.m.Lock() + err := r.updateCommitID(r.commitID, true) + r.m.Unlock() + + if err != nil { + log.Error("sync commitid error %s", err.Error()) + } + } case <-r.quit: + syncTc.Stop() + purgeTc.Stop() return } } diff --git a/rpl/store.go b/rpl/store.go index 8d5e8ec..9f985ec 100644 --- a/rpl/store.go +++ b/rpl/store.go @@ -9,30 +9,26 @@ const ( ) var ( - ErrLogNotFound = errors.New("log not found") - ErrLessLogID = errors.New("log id is less") - ErrNoBehindLog = errors.New("no behind commit log") + ErrLogNotFound = errors.New("log not found") + ErrStoreLogID = errors.New("log id is less") + ErrNoBehindLog = errors.New("no behind commit log") + ErrCommitIDBehind = errors.New("commit id is behind last log id") ) type LogStore interface { GetLog(id uint64, log *Log) error - // Get the first log which ID is equal or larger than id - SeekLog(id uint64, log *Log) error - FirstID() (uint64, error) LastID() (uint64, error) // if log id is less than current last id, return error StoreLog(log *Log) error - StoreLogs(logs []*Log) error - - // Delete first n logs - Purge(n uint64) error // Delete logs before n seconds PurgeExpired(n int64) error + Sync() error + // Clear all logs Clear() error diff --git a/rpl/store_test.go b/rpl/store_test.go index 0dda1ce..9b8febe 100644 --- a/rpl/store_test.go +++ b/rpl/store_test.go @@ -1,10 +1,10 @@ package rpl import ( + "github.com/siddontang/ledisdb/config" "io/ioutil" "os" "testing" - "time" ) func TestGoLevelDBStore(t *testing.T) { @@ -25,6 +25,27 @@ func TestGoLevelDBStore(t *testing.T) { testLogs(t, l) } +func TestFileStore(t *testing.T) { + // Create a test dir + dir, err := ioutil.TempDir("", "ldb") + if err != nil { + t.Fatalf("err: %v ", err) + } + defer os.RemoveAll(dir) + + // New level + cfg := config.NewConfigDefault() + cfg.Replication.MaxLogFileSize = 4096 + + l, err := NewFileStore(dir, cfg) + if err != nil { + t.Fatalf("err: %v ", err) + } + defer l.Close() + + testLogs(t, l) +} + func testLogs(t *testing.T, l LogStore) { // Should be no first index idx, err := l.FirstID() @@ -34,7 +55,6 @@ func testLogs(t *testing.T, l LogStore) { if idx != 0 { t.Fatalf("bad idx: %d", idx) } - // Should be no last index idx, err = l.LastID() if err != nil { @@ -46,14 +66,16 @@ func testLogs(t *testing.T, l LogStore) { // Try a filed fetch var out Log - if err := l.GetLog(10, &out); err.Error() != "log not found" { + if err := l.GetLog(10, &out); err != ErrLogNotFound { t.Fatalf("err: %v ", err) } + data := make([]byte, 1024) + // Write out a log log := Log{ ID: 1, - Data: []byte("first"), + Data: data, } for i := 1; i <= 10; i++ { log.ID = uint64(i) @@ -63,16 +85,20 @@ func testLogs(t *testing.T, l LogStore) { } // Attempt to write multiple logs - var logs []*Log for i := 11; i <= 20; i++ { nl := &Log{ ID: uint64(i), - Data: []byte("first"), + Data: data, + } + + if err := l.StoreLog(nl); err != nil { + t.Fatalf("err: %v", err) } - logs = append(logs, nl) } - if err := l.StoreLogs(logs); err != nil { - t.Fatalf("err: %v", err) + + // Try to fetch + if err := l.GetLog(1, &out); err != nil { + t.Fatalf("err: %v ", err) } // Try to fetch @@ -103,87 +129,38 @@ func testLogs(t *testing.T, l LogStore) { t.Fatalf("bad idx: %d", idx) } - // Delete a suffix - if err := l.Purge(5); err != nil { - t.Fatalf("err: %v ", err) + if err = l.Clear(); err != nil { + t.Fatalf("err :%v", err) } - // Verify they are all deleted - for i := 1; i <= 5; i++ { - if err := l.GetLog(uint64(i), &out); err != ErrLogNotFound { - t.Fatalf("err: %v ", err) + // Check the lowest index + idx, err = l.FirstID() + if err != nil { + t.Fatalf("err: %v ", err) + } + if idx != 0 { + t.Fatalf("bad idx: %d", idx) + } + + // Check the highest index + idx, err = l.LastID() + if err != nil { + t.Fatalf("err: %v ", err) + } + if idx != 0 { + t.Fatalf("bad idx: %d", idx) + } + + // Write out a log + log = Log{ + ID: 1, + Data: data, + } + for i := 1; i <= 10; i++ { + log.ID = uint64(i) + if err := l.StoreLog(&log); err != nil { + t.Fatalf("err: %v", err) } } - // Index should be one - idx, err = l.FirstID() - if err != nil { - t.Fatalf("err: %v ", err) - } - if idx != 6 { - t.Fatalf("bad idx: %d", idx) - } - idx, err = l.LastID() - if err != nil { - t.Fatalf("err: %v ", err) - } - if idx != 20 { - t.Fatalf("bad idx: %d", idx) - } - - // Should not be able to fetch - if err := l.GetLog(5, &out); err != ErrLogNotFound { - t.Fatalf("err: %v ", err) - } - - if err := l.Clear(); err != nil { - t.Fatal(err) - } - - idx, err = l.FirstID() - if err != nil { - t.Fatalf("err: %v ", err) - } - if idx != 0 { - t.Fatalf("bad idx: %d", idx) - } - - idx, err = l.LastID() - if err != nil { - t.Fatalf("err: %v ", err) - } - if idx != 0 { - t.Fatalf("bad idx: %d", idx) - } - - now := uint32(time.Now().Unix()) - logs = []*Log{} - for i := 1; i <= 20; i++ { - nl := &Log{ - ID: uint64(i), - CreateTime: now - 20, - Data: []byte("first"), - } - logs = append(logs, nl) - } - - if err := l.PurgeExpired(1); err != nil { - t.Fatal(err) - } - - idx, err = l.FirstID() - if err != nil { - t.Fatalf("err: %v ", err) - } - if idx != 0 { - t.Fatalf("bad idx: %d", idx) - } - - idx, err = l.LastID() - if err != nil { - t.Fatalf("err: %v ", err) - } - if idx != 0 { - t.Fatalf("bad idx: %d", idx) - } } diff --git a/rpl/table_readers_test.go b/rpl/table_readers_test.go new file mode 100644 index 0000000..d0045e6 --- /dev/null +++ b/rpl/table_readers_test.go @@ -0,0 +1,38 @@ +package rpl + +import ( + "testing" +) + +func TestTableReaders(t *testing.T) { + ts := make(tableReaders, 0, 10) + + for i := uint64(0); i < 10; i++ { + t := new(tableReader) + t.index = int64(i) + 1 + t.first = i*10 + 1 + t.last = i*10 + 10 + + ts = append(ts, t) + } + + if err := ts.check(); err != nil { + t.Fatal(err) + } + + for i := 1; i <= 100; i++ { + if r := ts.Search(uint64(i)); r == nil { + t.Fatal("must hit", i) + } else if r.index != int64((i-1)/10)+1 { + t.Fatal("invalid index", r.index, i) + } + } + + if r := ts.Search(1000); r != nil { + t.Fatal("must not hit") + } + if r := ts.Search(0); r != nil { + t.Fatal("must not hit") + } + +} diff --git a/server/app.go b/server/app.go index 0e03b3f..393da09 100644 --- a/server/app.go +++ b/server/app.go @@ -37,6 +37,11 @@ type App struct { slaveSyncAck chan uint64 snap *snapshotStore + + connWait sync.WaitGroup + + rcm sync.Mutex + rcs map[*respClient]struct{} } func netType(s string) string { @@ -64,6 +69,8 @@ func NewApp(cfg *config.Config) (*App, error) { app.slaves = make(map[string]*client) app.slaveSyncAck = make(chan uint64) + app.rcs = make(map[*respClient]struct{}) + var err error if app.info, err = newInfo(app); err != nil { @@ -129,6 +136,11 @@ func (app *App) Close() { app.httpListener.Close() } + app.closeAllRespClients() + + //wait all connection closed + app.connWait.Wait() + app.closeScript() app.m.Close() @@ -149,13 +161,18 @@ func (app *App) Run() { go app.httpServe() - for !app.closed { - conn, err := app.listener.Accept() - if err != nil { - continue - } + for { + select { + case <-app.quit: + return + default: + conn, err := app.listener.Accept() + if err != nil { + continue + } - newClientRESP(conn, app) + newClientRESP(conn, app) + } } } diff --git a/server/client.go b/server/client.go index ba24821..57abc8c 100644 --- a/server/client.go +++ b/server/client.go @@ -3,8 +3,10 @@ package server import ( "bytes" "fmt" + "github.com/siddontang/go/sync2" "github.com/siddontang/ledisdb/ledis" "io" + "sync" "time" ) @@ -62,9 +64,9 @@ type client struct { syncBuf bytes.Buffer - lastLogID uint64 + lastLogID sync2.AtomicUint64 - reqErr chan error + // reqErr chan error buf bytes.Buffer @@ -73,6 +75,13 @@ type client struct { script *ledis.Multi slaveListeningAddr string + + quit chan struct{} + done chan error + + wg sync.WaitGroup + + fc chan CommandFunc } func newClient(app *App) *client { @@ -82,11 +91,37 @@ func newClient(app *App) *client { c.ldb = app.ldb c.db, _ = app.ldb.Select(0) //use default db - c.reqErr = make(chan error) + // c.reqErr = make(chan error) + + c.quit = make(chan struct{}) + c.done = make(chan error, 1) + c.fc = make(chan CommandFunc, 1) + + c.wg.Add(1) + go c.run() return c } +func (c *client) close() { + close(c.quit) + + c.wg.Wait() +} + +func (c *client) run() { + defer c.wg.Done() + + for { + select { + case <-c.quit: + return + case f := <-c.fc: + c.done <- f(c) + } + } +} + func (c *client) perform() { var err error @@ -108,17 +143,20 @@ func (c *client) perform() { } if err == nil { - go func() { - c.reqErr <- exeCmd(c) - }() + // go func() { + // c.reqErr <- exeCmd(c) + // }() - err = <-c.reqErr + // err = <-c.reqErr + c.fc <- exeCmd + + err = <-c.done } } - duration := time.Since(start) - if c.app.access != nil { + duration := time.Since(start) + fullCmd := c.catGenericCommand() cost := duration.Nanoseconds() / 1000000 diff --git a/server/client_http.go b/server/client_http.go index 057ba6b..4383673 100644 --- a/server/client_http.go +++ b/server/client_http.go @@ -40,16 +40,20 @@ type httpWriter struct { } func newClientHTTP(app *App, w http.ResponseWriter, r *http.Request) { + app.connWait.Add(1) + defer app.connWait.Done() + var err error c := new(httpClient) - c.client = newClient(app) err = c.makeRequest(app, r, w) if err != nil { w.Write([]byte(err.Error())) return } + c.client = newClient(app) c.perform() + c.client.close() } func (c *httpClient) addr(r *http.Request) string { diff --git a/server/client_resp.go b/server/client_resp.go index 8d8378f..65a83b4 100644 --- a/server/client_resp.go +++ b/server/client_resp.go @@ -3,6 +3,7 @@ package server import ( "bufio" "errors" + "github.com/siddontang/go/arena" "github.com/siddontang/go/hack" "github.com/siddontang/go/log" "github.com/siddontang/go/num" @@ -11,37 +12,81 @@ import ( "net" "runtime" "strconv" - "strings" + "time" ) var errReadRequest = errors.New("invalid request protocol") +var errClientQuit = errors.New("remote client quit") type respClient struct { *client conn net.Conn rb *bufio.Reader + + ar *arena.Arena + + activeQuit bool } type respWriter struct { buff *bufio.Writer } +func (app *App) addRespClient(c *respClient) { + app.rcm.Lock() + app.rcs[c] = struct{}{} + app.rcm.Unlock() +} + +func (app *App) delRespClient(c *respClient) { + app.rcm.Lock() + delete(app.rcs, c) + app.rcm.Unlock() +} + +func (app *App) closeAllRespClients() { + app.rcm.Lock() + + for c := range app.rcs { + c.conn.Close() + } + + app.rcm.Unlock() +} + +func (app *App) respClientNum() int { + app.rcm.Lock() + n := len(app.rcs) + app.rcm.Unlock() + return n +} + func newClientRESP(conn net.Conn, app *App) { c := new(respClient) c.client = newClient(app) c.conn = conn + c.activeQuit = false + if tcpConn, ok := conn.(*net.TCPConn); ok { tcpConn.SetReadBuffer(app.cfg.ConnReadBufferSize) tcpConn.SetWriteBuffer(app.cfg.ConnWriteBufferSize) } + c.rb = bufio.NewReaderSize(conn, app.cfg.ConnReadBufferSize) c.resp = newWriterRESP(conn, app.cfg.ConnWriteBufferSize) c.remoteAddr = conn.RemoteAddr().String() + //maybe another config? + c.ar = arena.NewArena(app.cfg.ConnReadBufferSize) + + app.connWait.Add(1) + + app.addRespClient(c) + go c.run() } @@ -55,109 +100,75 @@ func (c *respClient) run() { log.Fatal("client run panic %s:%v", buf, e) } - handleQuit := true - if c.conn != nil { - //if handle quit command before, conn is nil - handleQuit = false - c.conn.Close() - } + c.client.close() + + c.conn.Close() if c.tx != nil { c.tx.Rollback() c.tx = nil } - c.app.removeSlave(c.client, handleQuit) + c.app.removeSlave(c.client, c.activeQuit) + + c.app.delRespClient(c) + + c.app.connWait.Done() }() + select { + case <-c.app.quit: + //check app closed + return + default: + break + } + + kc := time.Duration(c.app.cfg.ConnKeepaliveInterval) * time.Second for { + if kc > 0 { + c.conn.SetReadDeadline(time.Now().Add(kc)) + } + reqData, err := c.readRequest() + if err == nil { + err = c.handleRequest(reqData) + } + if err != nil { return } - - c.handleRequest(reqData) } } -func (c *respClient) readLine() ([]byte, error) { - return ReadLine(c.rb) -} - -//A client sends to the Redis server a RESP Array consisting of just Bulk Strings. func (c *respClient) readRequest() ([][]byte, error) { - l, err := c.readLine() - if err != nil { - return nil, err - } else if len(l) == 0 || l[0] != '*' { - return nil, errReadRequest - } - - var nparams int - if nparams, err = strconv.Atoi(hack.String(l[1:])); err != nil { - return nil, err - } else if nparams <= 0 { - return nil, errReadRequest - } - - req := make([][]byte, 0, nparams) - var n int - for i := 0; i < nparams; i++ { - if l, err = c.readLine(); err != nil { - return nil, err - } - - if len(l) == 0 { - return nil, errReadRequest - } else if l[0] == '$' { - //handle resp string - if n, err = strconv.Atoi(hack.String(l[1:])); err != nil { - return nil, err - } else if n == -1 { - req = append(req, nil) - } else { - buf := make([]byte, n) - if _, err = io.ReadFull(c.rb, buf); err != nil { - return nil, err - } - - if l, err = c.readLine(); err != nil { - return nil, err - } else if len(l) != 0 { - return nil, errors.New("bad bulk string format") - } - - req = append(req, buf) - - } - - } else { - return nil, errReadRequest - } - } - - return req, nil + return ReadRequest(c.rb, c.ar) } -func (c *respClient) handleRequest(reqData [][]byte) { +func (c *respClient) handleRequest(reqData [][]byte) error { if len(reqData) == 0 { c.cmd = "" c.args = reqData[0:0] } else { - c.cmd = strings.ToLower(hack.String(reqData[0])) + c.cmd = hack.String(lowerSlice(reqData[0])) c.args = reqData[1:] } if c.cmd == "quit" { + c.activeQuit = true c.resp.writeStatus(OK) c.resp.flush() c.conn.Close() - c.conn = nil - return + return errClientQuit } c.perform() - return + c.cmd = "" + c.args = nil + + c.ar.Reset() + + return nil } // response writer diff --git a/server/cmd_kv.go b/server/cmd_kv.go index c62cc18..4e7d601 100644 --- a/server/cmd_kv.go +++ b/server/cmd_kv.go @@ -7,16 +7,35 @@ import ( "strings" ) +// func getCommand(c *client) error { +// args := c.args +// if len(args) != 1 { +// return ErrCmdParams +// } + +// if v, err := c.db.Get(args[0]); err != nil { +// return err +// } else { +// c.resp.writeBulk(v) +// } +// return nil +// } + func getCommand(c *client) error { args := c.args if len(args) != 1 { return ErrCmdParams } - if v, err := c.db.Get(args[0]); err != nil { + if v, err := c.db.GetSlice(args[0]); err != nil { return err } else { - c.resp.writeBulk(v) + if v == nil { + c.resp.writeBulk(nil) + } else { + c.resp.writeBulk(v.Data()) + v.Free() + } } return nil } @@ -66,6 +85,26 @@ func setnxCommand(c *client) error { return nil } +func setexCommand(c *client) error { + args := c.args + if len(args) != 3 { + return ErrCmdParams + } + + sec, err := ledis.StrInt64(args[1], nil) + if err != nil { + return ErrValue + } + + if err := c.db.SetEX(args[0], sec, args[2]); err != nil { + return err + } else { + c.resp.writeStatus(OK) + } + + return nil +} + func existsCommand(c *client) error { args := c.args if len(args) != 1 { @@ -365,6 +404,7 @@ func init() { register("mset", msetCommand) register("set", setCommand) register("setnx", setnxCommand) + register("setex", setexCommand) register("expire", expireCommand) register("expireat", expireAtCommand) register("ttl", ttlCommand) diff --git a/server/cmd_kv_test.go b/server/cmd_kv_test.go index d24fd25..d3b0fe4 100644 --- a/server/cmd_kv_test.go +++ b/server/cmd_kv_test.go @@ -27,6 +27,12 @@ func TestKV(t *testing.T) { t.Fatal(n) } + if ok, err := ledis.String(c.Do("setex", "xx", 10, "hello world")); err != nil { + t.Fatal(err) + } else if ok != OK { + t.Fatal(ok) + } + if v, err := ledis.String(c.Do("get", "a")); err != nil { t.Fatal(err) } else if v != "1234" { @@ -214,4 +220,8 @@ func TestKVErrorParams(t *testing.T) { t.Fatal("invalid err of %v", err) } + if _, err := c.Do("setex", "a", "blah", "hello world"); err == nil { + t.Fatalf("invalid err %v", err) + } + } diff --git a/server/cmd_replication.go b/server/cmd_replication.go index b401f35..b910e51 100644 --- a/server/cmd_replication.go +++ b/server/cmd_replication.go @@ -110,16 +110,20 @@ func syncCommand(c *client) error { return ErrCmdParams } - c.lastLogID = logId - 1 + lastLogID := logId - 1 stat, err := c.app.ldb.ReplicationStat() if err != nil { return err } - if c.lastLogID > stat.LastID { + if lastLogID > stat.LastID { return fmt.Errorf("invalid sync logid %d > %d + 1", logId, stat.LastID) - } else if c.lastLogID == stat.LastID { + } + + c.lastLogID.Set(lastLogID) + + if lastLogID == stat.LastID { c.app.slaveAck(c) } @@ -127,7 +131,7 @@ func syncCommand(c *client) error { c.syncBuf.Write(dummyBuf) - if _, _, err := c.app.ldb.ReadLogsToTimeout(logId, &c.syncBuf, 30); err != nil { + if _, _, err := c.app.ldb.ReadLogsToTimeout(logId, &c.syncBuf, 30, c.app.quit); err != nil { return err } else { buf := c.syncBuf.Bytes() diff --git a/server/cmd_ttl_test.go b/server/cmd_ttl_test.go index c9d388c..d851b83 100644 --- a/server/cmd_ttl_test.go +++ b/server/cmd_ttl_test.go @@ -72,8 +72,8 @@ func TestExpire(t *testing.T) { if ttl, err := ledis.Int64(c.Do(ttl, key)); err != nil { t.Fatal(err) - } else if ttl != exp { - t.Fatal(ttl) + } else if ttl == -1 { + t.Fatal("no ttl") } // expireat + ttl @@ -86,8 +86,8 @@ func TestExpire(t *testing.T) { if ttl, err := ledis.Int64(c.Do(ttl, key)); err != nil { t.Fatal(err) - } else if ttl != 3 { - t.Fatal(ttl) + } else if ttl == -1 { + t.Fatal("no ttl") } kErr := "not_exist_ttl" diff --git a/server/info.go b/server/info.go index ccdba98..b06b084 100644 --- a/server/info.go +++ b/server/info.go @@ -6,9 +6,10 @@ import ( "github.com/siddontang/go/sync2" "os" "runtime" + "runtime/debug" "strings" "sync" - "sync/atomic" + "time" ) type info struct { @@ -21,10 +22,6 @@ type info struct { ProceessId int } - Clients struct { - ConnectedClients int64 - } - Replication struct { PubLogNum sync2.AtomicInt64 PubLogAckNum sync2.AtomicInt64 @@ -45,21 +42,17 @@ func newInfo(app *App) (i *info, err error) { return i, nil } -func (i *info) addClients(delta int64) { - atomic.AddInt64(&i.Clients.ConnectedClients, delta) -} - func (i *info) Close() { } func getMemoryHuman(m uint64) string { if m > GB { - return fmt.Sprintf("%dG", m/GB) + return fmt.Sprintf("%0.3fG", float64(m)/float64(GB)) } else if m > MB { - return fmt.Sprintf("%dM", m/MB) + return fmt.Sprintf("%0.3fM", float64(m)/float64(MB)) } else if m > KB { - return fmt.Sprintf("%dK", m/KB) + return fmt.Sprintf("%0.3fK", float64(m)/float64(KB)) } else { return fmt.Sprintf("%d", m) } @@ -72,10 +65,10 @@ func (i *info) Dump(section string) []byte { i.dumpAll(buf) case "server": i.dumpServer(buf) - case "client": - i.dumpClients(buf) case "mem": i.dumpMem(buf) + case "gc": + i.dumpGC(buf) case "store": i.dumpStore(buf) case "replication": @@ -97,10 +90,10 @@ func (i *info) dumpAll(buf *bytes.Buffer) { buf.Write(Delims) i.dumpStore(buf) buf.Write(Delims) - i.dumpClients(buf) - buf.Write(Delims) i.dumpMem(buf) buf.Write(Delims) + i.dumpGC(buf) + buf.Write(Delims) i.dumpReplication(buf) } @@ -113,23 +106,57 @@ func (i *info) dumpServer(buf *bytes.Buffer) { infoPair{"http_addr", i.app.cfg.HttpAddr}, infoPair{"readonly", i.app.cfg.Readonly}, infoPair{"goroutine_num", runtime.NumGoroutine()}, + infoPair{"cgo_call_num", runtime.NumCgoCall()}, + infoPair{"resp_client_num", i.app.respClientNum()}, ) } -func (i *info) dumpClients(buf *bytes.Buffer) { - buf.WriteString("# Client\r\n") - - i.dumpPairs(buf, infoPair{"client_num", i.Clients.ConnectedClients}) -} - func (i *info) dumpMem(buf *bytes.Buffer) { buf.WriteString("# Mem\r\n") var mem runtime.MemStats runtime.ReadMemStats(&mem) - i.dumpPairs(buf, infoPair{"mem_alloc", mem.Alloc}, - infoPair{"mem_alloc_human", getMemoryHuman(mem.Alloc)}) + i.dumpPairs(buf, infoPair{"mem_alloc", getMemoryHuman(mem.Alloc)}, + infoPair{"mem_sys", getMemoryHuman(mem.Sys)}, + infoPair{"mem_looksups", getMemoryHuman(mem.Lookups)}, + infoPair{"mem_mallocs", getMemoryHuman(mem.Mallocs)}, + infoPair{"mem_frees", getMemoryHuman(mem.Frees)}, + infoPair{"mem_total", getMemoryHuman(mem.TotalAlloc)}, + infoPair{"mem_heap_alloc", getMemoryHuman(mem.HeapAlloc)}, + infoPair{"mem_heap_sys", getMemoryHuman(mem.HeapSys)}, + infoPair{"mem_head_idle", getMemoryHuman(mem.HeapIdle)}, + infoPair{"mem_head_inuse", getMemoryHuman(mem.HeapInuse)}, + infoPair{"mem_head_released", getMemoryHuman(mem.HeapReleased)}, + infoPair{"mem_head_objects", mem.HeapObjects}, + ) +} + +const ( + gcTimeFormat = "2006/01/02 15:04:05.000" +) + +func (i *info) dumpGC(buf *bytes.Buffer) { + buf.WriteString("# GC\r\n") + + count := 5 + + var st debug.GCStats + st.Pause = make([]time.Duration, count) + // st.PauseQuantiles = make([]time.Duration, count) + debug.ReadGCStats(&st) + + h := make([]string, 0, count) + + for i := 0; i < count && i < len(st.Pause); i++ { + h = append(h, st.Pause[i].String()) + } + + i.dumpPairs(buf, infoPair{"gc_last_time", st.LastGC.Format(gcTimeFormat)}, + infoPair{"gc_num", st.NumGC}, + infoPair{"gc_pause_total", st.PauseTotal.String()}, + infoPair{"gc_pause_history", strings.Join(h, ",")}, + ) } func (i *info) dumpStore(buf *bytes.Buffer) { @@ -137,15 +164,33 @@ func (i *info) dumpStore(buf *bytes.Buffer) { s := i.app.ldb.StoreStat() + // getNum := s.GetNum.Get() + // getTotalTime := s.GetTotalTime.Get() + + // gt := int64(0) + // if getNum > 0 { + // gt = getTotalTime.Nanoseconds() / (getNum * 1e3) + // } + + // commitNum := s.BatchCommitNum.Get() + // commitTotalTime := s.BatchCommitTotalTime.Get() + + // ct := int64(0) + // if commitNum > 0 { + // ct = commitTotalTime.Nanoseconds() / (commitNum * 1e3) + // } + i.dumpPairs(buf, infoPair{"name", i.app.cfg.DBName}, infoPair{"get", s.GetNum}, infoPair{"get_missing", s.GetMissingNum}, infoPair{"put", s.PutNum}, infoPair{"delete", s.DeleteNum}, + infoPair{"get_total_time", s.GetTotalTime.Get().String()}, infoPair{"iter", s.IterNum}, infoPair{"iter_seek", s.IterSeekNum}, infoPair{"iter_close", s.IterCloseNum}, infoPair{"batch_commit", s.BatchCommitNum}, + infoPair{"batch_commit_total_time", s.BatchCommitTotalTime.Get().String()}, ) } diff --git a/server/replication.go b/server/replication.go index 24a5c1a..3f95388 100644 --- a/server/replication.go +++ b/server/replication.go @@ -19,6 +19,7 @@ import ( var ( errConnectMaster = errors.New("connect master error") + errReplClosed = errors.New("replication is closed") ) type master struct { @@ -47,17 +48,16 @@ func newMaster(app *App) *master { } func (m *master) Close() { - ledis.AsyncNotify(m.quit) + m.quit <- struct{}{} - if m.conn != nil { - //for replication, we send quit command to close gracefully - m.conn.Send("quit") - - m.conn.Close() - m.conn = nil - } + m.closeConn() m.wg.Wait() + + select { + case <-m.quit: + default: + } } func (m *master) resetConn() error { @@ -67,7 +67,6 @@ func (m *master) resetConn() error { if m.conn != nil { m.conn.Close() - m.conn = nil } m.conn = goledis.NewConn(m.addr) @@ -75,6 +74,15 @@ func (m *master) resetConn() error { return nil } +func (m *master) closeConn() { + if m.conn != nil { + //for replication, we send quit command to close gracefully + m.conn.Send("quit") + + m.conn.Close() + } +} + func (m *master) stopReplication() error { m.Close() @@ -87,9 +95,7 @@ func (m *master) startReplication(masterAddr string, restart bool) error { m.addr = masterAddr - m.quit = make(chan struct{}, 1) - - m.app.cfg.Readonly = true + m.app.cfg.SetReadonly(true) m.wg.Add(1) go m.runReplication(restart) @@ -123,28 +129,20 @@ func (m *master) runReplication(restart bool) { if restart { if err := m.fullSync(); err != nil { - if m.conn != nil { - //if conn == nil, other close the replication, not error - log.Error("restart fullsync error %s", err.Error()) - } + log.Error("restart fullsync error %s", err.Error()) return } } for { - if err := m.sync(); err != nil { - if m.conn != nil { - //if conn == nil, other close the replication, not error - log.Error("sync error %s", err.Error()) - } - return - } - select { case <-m.quit: return default: - break + if err := m.sync(); err != nil { + log.Error("sync error %s", err.Error()) + return + } } } } @@ -266,7 +264,7 @@ func (app *App) slaveof(masterAddr string, restart bool, readonly bool) error { //in master mode and no slaveof, only set readonly if len(app.cfg.SlaveOf) == 0 && len(masterAddr) == 0 { - app.cfg.Readonly = readonly + app.cfg.SetReadonly(readonly) return nil } @@ -281,7 +279,7 @@ func (app *App) slaveof(masterAddr string, restart bool, readonly bool) error { return err } - app.cfg.Readonly = readonly + app.cfg.SetReadonly(readonly) } else { return app.m.startReplication(masterAddr, restart) } @@ -323,7 +321,7 @@ func (app *App) removeSlave(c *client, activeQuit bool) { delete(app.slaves, addr) log.Info("remove slave %s", addr) if activeQuit { - asyncNotifyUint64(app.slaveSyncAck, c.lastLogID) + asyncNotifyUint64(app.slaveSyncAck, c.lastLogID.Get()) } } } @@ -339,7 +337,7 @@ func (app *App) slaveAck(c *client) { return } - asyncNotifyUint64(app.slaveSyncAck, c.lastLogID) + asyncNotifyUint64(app.slaveSyncAck, c.lastLogID.Get()) } func asyncNotifyUint64(ch chan uint64, v uint64) { @@ -369,11 +367,12 @@ func (app *App) publishNewLog(l *rpl.Log) { n := 0 logId := l.ID for _, s := range app.slaves { - if s.lastLogID == logId { + lastLogID := s.lastLogID.Get() + if lastLogID == logId { //slave has already owned this log n++ - } else if s.lastLogID > logId { - log.Error("invalid slave %s, lastlogid %d > %d", s.slaveListeningAddr, s.lastLogID, logId) + } else if lastLogID > logId { + log.Error("invalid slave %s, lastlogid %d > %d", s.slaveListeningAddr, lastLogID, logId) } } diff --git a/server/util.go b/server/util.go index 44b289c..7c5b73d 100644 --- a/server/util.go +++ b/server/util.go @@ -3,6 +3,9 @@ package server import ( "bufio" "errors" + "fmt" + "github.com/siddontang/go/arena" + "io" ) var ( @@ -19,5 +22,115 @@ func ReadLine(rb *bufio.Reader) ([]byte, error) { if i < 0 || p[i] != '\r' { return nil, errLineFormat } + return p[:i], nil } + +func readBytes(br *bufio.Reader, a *arena.Arena) (bytes []byte, err error) { + size, err := readLong(br) + if err != nil { + return nil, err + } + if size == -1 { + return nil, nil + } + if size < 0 { + return nil, errors.New("Invalid size: " + fmt.Sprint("%d", size)) + } + + buf := a.Make(int(size) + 2) + if _, err = io.ReadFull(br, buf); err != nil { + return nil, err + } + + if buf[len(buf)-2] != '\r' && buf[len(buf)-1] != '\n' { + return nil, errors.New("bad bulk string format") + } + + bytes = buf[0 : len(buf)-2] + + return +} + +func readLong(in *bufio.Reader) (result int64, err error) { + read, err := in.ReadByte() + if err != nil { + return -1, err + } + var sign int + if read == '-' { + read, err = in.ReadByte() + if err != nil { + return -1, err + } + sign = -1 + } else { + sign = 1 + } + var number int64 + for number = 0; err == nil; read, err = in.ReadByte() { + if read == '\r' { + read, err = in.ReadByte() + if err != nil { + return -1, err + } + if read == '\n' { + return number * int64(sign), nil + } else { + return -1, errors.New("Bad line ending") + } + } + value := read - '0' + if value >= 0 && value < 10 { + number *= 10 + number += int64(value) + } else { + return -1, errors.New("Invalid digit") + } + } + return -1, err +} + +func ReadRequest(in *bufio.Reader, a *arena.Arena) ([][]byte, error) { + code, err := in.ReadByte() + if err != nil { + return nil, err + } + + if code != '*' { + return nil, errReadRequest + } + + var nparams int64 + if nparams, err = readLong(in); err != nil { + return nil, err + } else if nparams <= 0 { + return nil, errReadRequest + } + + req := make([][]byte, nparams) + for i := range req { + if code, err = in.ReadByte(); err != nil { + return nil, err + } else if code != '$' { + return nil, errReadRequest + } + + if req[i], err = readBytes(in, a); err != nil { + return nil, err + } + } + + return req, nil +} + +func lowerSlice(buf []byte) []byte { + for i, r := range buf { + if 'A' <= r && r <= 'Z' { + r += 'a' - 'A' + } + + buf[i] = r + } + return buf +} diff --git a/store/db.go b/store/db.go index b2d116b..9964c5e 100644 --- a/store/db.go +++ b/store/db.go @@ -39,8 +39,10 @@ func (db *DB) NewIterator() *Iterator { } func (db *DB) Get(key []byte) ([]byte, error) { + t := time.Now() v, err := db.db.Get(key) db.st.statGet(v, err) + db.st.GetTotalTime.Add(time.Now().Sub(t)) return v, err } @@ -156,3 +158,22 @@ func (db *DB) needSyncCommit() bool { } } + +func (db *DB) GetSlice(key []byte) (Slice, error) { + if d, ok := db.db.(driver.ISliceGeter); ok { + t := time.Now() + v, err := d.GetSlice(key) + db.st.statGet(v, err) + db.st.GetTotalTime.Add(time.Now().Sub(t)) + return v, err + } else { + v, err := db.Get(key) + if err != nil { + return nil, err + } else if v == nil { + return nil, nil + } else { + return driver.GoSlice(v), nil + } + } +} diff --git a/store/driver/batch.go b/store/driver/batch.go index 5fc461f..1c1e899 100644 --- a/store/driver/batch.go +++ b/store/driver/batch.go @@ -1,5 +1,9 @@ package driver +import ( + "github.com/syndtr/goleveldb/leveldb" +) + type BatchPuter interface { BatchPut([]Write) error SyncBatchPut([]Write) error @@ -11,34 +15,56 @@ type Write struct { } type WriteBatch struct { - batch BatchPuter - wb []Write + d *leveldb.Batch + + wb []Write + w BatchPuter } -func (w *WriteBatch) Put(key, value []byte) { +func (wb *WriteBatch) Close() { + wb.d.Reset() + wb.wb = wb.wb[0:0] +} + +func (wb *WriteBatch) Put(key, value []byte) { if value == nil { value = []byte{} } - w.wb = append(w.wb, Write{key, value}) + wb.wb = append(wb.wb, Write{key, value}) } -func (w *WriteBatch) Delete(key []byte) { - w.wb = append(w.wb, Write{key, nil}) +func (wb *WriteBatch) Delete(key []byte) { + wb.wb = append(wb.wb, Write{key, nil}) } -func (w *WriteBatch) Commit() error { - return w.batch.BatchPut(w.wb) +func (wb *WriteBatch) Commit() error { + return wb.w.BatchPut(wb.wb) } -func (w *WriteBatch) SyncCommit() error { - return w.batch.SyncBatchPut(w.wb) +func (wb *WriteBatch) SyncCommit() error { + return wb.w.SyncBatchPut(wb.wb) } -func (w *WriteBatch) Rollback() error { - w.wb = w.wb[0:0] +func (wb *WriteBatch) Rollback() error { + wb.wb = wb.wb[0:0] return nil } -func NewWriteBatch(puter BatchPuter) IWriteBatch { - return &WriteBatch{puter, []Write{}} +func (wb *WriteBatch) Data() []byte { + wb.d.Reset() + for _, w := range wb.wb { + if w.Value == nil { + wb.d.Delete(w.Key) + } else { + wb.d.Put(w.Key, w.Value) + } + } + return wb.d.Dump() +} + +func NewWriteBatch(puter BatchPuter) *WriteBatch { + return &WriteBatch{ + &leveldb.Batch{}, + []Write{}, + puter} } diff --git a/store/driver/driver.go b/store/driver/driver.go index e4312ce..b571738 100644 --- a/store/driver/driver.go +++ b/store/driver/driver.go @@ -58,6 +58,8 @@ type IWriteBatch interface { Commit() error SyncCommit() error Rollback() error + Data() []byte + Close() } type Tx interface { @@ -71,3 +73,7 @@ type Tx interface { Commit() error Rollback() error } + +type ISliceGeter interface { + GetSlice(key []byte) (ISlice, error) +} diff --git a/store/driver/slice.go b/store/driver/slice.go new file mode 100644 index 0000000..d0c80e0 --- /dev/null +++ b/store/driver/slice.go @@ -0,0 +1,21 @@ +package driver + +type ISlice interface { + Data() []byte + Size() int + Free() +} + +type GoSlice []byte + +func (s GoSlice) Data() []byte { + return []byte(s) +} + +func (s GoSlice) Size() int { + return len(s) +} + +func (s GoSlice) Free() { + +} diff --git a/store/goleveldb/batch.go b/store/goleveldb/batch.go index 85b78c6..2032279 100644 --- a/store/goleveldb/batch.go +++ b/store/goleveldb/batch.go @@ -1,7 +1,7 @@ package goleveldb import ( - "github.com/siddontang/goleveldb/leveldb" + "github.com/syndtr/goleveldb/leveldb" ) type WriteBatch struct { @@ -29,3 +29,11 @@ func (w *WriteBatch) Rollback() error { w.wbatch.Reset() return nil } + +func (w *WriteBatch) Close() { + w.wbatch.Reset() +} + +func (w *WriteBatch) Data() []byte { + return w.wbatch.Dump() +} diff --git a/store/goleveldb/db.go b/store/goleveldb/db.go index 9924067..af8633b 100644 --- a/store/goleveldb/db.go +++ b/store/goleveldb/db.go @@ -1,12 +1,12 @@ package goleveldb import ( - "github.com/siddontang/goleveldb/leveldb" - "github.com/siddontang/goleveldb/leveldb/cache" - "github.com/siddontang/goleveldb/leveldb/filter" - "github.com/siddontang/goleveldb/leveldb/opt" - "github.com/siddontang/goleveldb/leveldb/storage" - "github.com/siddontang/goleveldb/leveldb/util" + "github.com/syndtr/goleveldb/leveldb" + "github.com/syndtr/goleveldb/leveldb/cache" + "github.com/syndtr/goleveldb/leveldb/filter" + "github.com/syndtr/goleveldb/leveldb/opt" + "github.com/syndtr/goleveldb/leveldb/storage" + "github.com/syndtr/goleveldb/leveldb/util" "github.com/siddontang/ledisdb/config" "github.com/siddontang/ledisdb/store/driver" @@ -126,6 +126,12 @@ func newOptions(cfg *config.LevelDBConfig) *opt.Options { opts.BlockSize = cfg.BlockSize opts.WriteBuffer = cfg.WriteBufferSize + opts.CachedOpenFiles = cfg.MaxOpenFiles + + //here we use default value, later add config support + opts.CompactionTableSize = 32 * 1024 * 1024 + opts.WriteL0SlowdownTrigger = 16 + opts.WriteL0PauseTrigger = 64 return opts } diff --git a/store/goleveldb/iterator.go b/store/goleveldb/iterator.go index bd06376..c1fd8b5 100644 --- a/store/goleveldb/iterator.go +++ b/store/goleveldb/iterator.go @@ -1,7 +1,7 @@ package goleveldb import ( - "github.com/siddontang/goleveldb/leveldb/iterator" + "github.com/syndtr/goleveldb/leveldb/iterator" ) type Iterator struct { diff --git a/store/goleveldb/snapshot.go b/store/goleveldb/snapshot.go index 4dd56a9..c615579 100644 --- a/store/goleveldb/snapshot.go +++ b/store/goleveldb/snapshot.go @@ -1,8 +1,8 @@ package goleveldb import ( - "github.com/siddontang/goleveldb/leveldb" "github.com/siddontang/ledisdb/store/driver" + "github.com/syndtr/goleveldb/leveldb" ) type Snapshot struct { diff --git a/store/leveldb/batch.go b/store/leveldb/batch.go index caadc03..027aa39 100644 --- a/store/leveldb/batch.go +++ b/store/leveldb/batch.go @@ -4,22 +4,37 @@ package leveldb // #cgo LDFLAGS: -lleveldb // #include "leveldb/c.h" +// #include "leveldb_ext.h" import "C" import ( + "github.com/syndtr/goleveldb/leveldb" "unsafe" ) type WriteBatch struct { db *DB wbatch *C.leveldb_writebatch_t + + gbatch *leveldb.Batch } -func (w *WriteBatch) Close() error { - C.leveldb_writebatch_destroy(w.wbatch) - w.wbatch = nil +func newWriteBatch(db *DB) *WriteBatch { + w := new(WriteBatch) + w.db = db + w.wbatch = C.leveldb_writebatch_create() + w.gbatch = new(leveldb.Batch) - return nil + return w +} + +func (w *WriteBatch) Close() { + if w.wbatch != nil { + C.leveldb_writebatch_destroy(w.wbatch) + w.wbatch = nil + } + + w.gbatch = nil } func (w *WriteBatch) Put(key, value []byte) { @@ -52,6 +67,7 @@ func (w *WriteBatch) SyncCommit() error { func (w *WriteBatch) Rollback() error { C.leveldb_writebatch_clear(w.wbatch) + return nil } @@ -63,3 +79,26 @@ func (w *WriteBatch) commit(wb *WriteOptions) error { } return nil } + +//export leveldb_writebatch_iterate_put +func leveldb_writebatch_iterate_put(p unsafe.Pointer, k *C.char, klen C.size_t, v *C.char, vlen C.size_t) { + b := (*leveldb.Batch)(p) + key := slice(unsafe.Pointer(k), int(klen)) + value := slice(unsafe.Pointer(v), int(vlen)) + b.Put(key, value) +} + +//export leveldb_writebatch_iterate_delete +func leveldb_writebatch_iterate_delete(p unsafe.Pointer, k *C.char, klen C.size_t) { + b := (*leveldb.Batch)(p) + key := slice(unsafe.Pointer(k), int(klen)) + b.Delete(key) +} + +func (w *WriteBatch) Data() []byte { + w.gbatch.Reset() + C.leveldb_writebatch_iterate_ext(w.wbatch, + unsafe.Pointer(w.gbatch)) + b := w.gbatch.Dump() + return b +} diff --git a/store/leveldb/db.go b/store/leveldb/db.go index 449c32b..64bbc2b 100644 --- a/store/leveldb/db.go +++ b/store/leveldb/db.go @@ -14,6 +14,7 @@ import ( "github.com/siddontang/ledisdb/config" "github.com/siddontang/ledisdb/store/driver" "os" + "runtime" "unsafe" ) @@ -182,10 +183,11 @@ func (db *DB) SyncDelete(key []byte) error { } func (db *DB) NewWriteBatch() driver.IWriteBatch { - wb := &WriteBatch{ - db: db, - wbatch: C.leveldb_writebatch_create(), - } + wb := newWriteBatch(db) + + runtime.SetFinalizer(wb, func(w *WriteBatch) { + w.Close() + }) return wb } @@ -257,6 +259,28 @@ func (db *DB) get(ro *ReadOptions, key []byte) ([]byte, error) { return C.GoBytes(unsafe.Pointer(value), C.int(vallen)), nil } +func (db *DB) getSlice(ro *ReadOptions, key []byte) (driver.ISlice, error) { + var errStr *C.char + var vallen C.size_t + var k *C.char + if len(key) != 0 { + k = (*C.char)(unsafe.Pointer(&key[0])) + } + + value := C.leveldb_get( + db.db, ro.Opt, k, C.size_t(len(key)), &vallen, &errStr) + + if errStr != nil { + return nil, saveError(errStr) + } + + if value == nil { + return nil, nil + } + + return NewCSlice(unsafe.Pointer(value), int(vallen)), nil +} + func (db *DB) delete(wo *WriteOptions, key []byte) error { var errStr *C.char var k *C.char @@ -282,6 +306,10 @@ func (db *DB) Compact() error { return nil } +func (db *DB) GetSlice(key []byte) (driver.ISlice, error) { + return db.getSlice(db.readOpts, key) +} + func init() { driver.Register(Store{}) } diff --git a/store/leveldb/leveldb_ext.cc b/store/leveldb/leveldb_ext.cc index a362ab5..540b739 100644 --- a/store/leveldb/leveldb_ext.cc +++ b/store/leveldb/leveldb_ext.cc @@ -84,5 +84,12 @@ unsigned char leveldb_iter_prev_ext(leveldb_iterator_t* iter) { return leveldb_iter_valid(iter); } +extern void leveldb_writebatch_iterate_put(void*, const char* k, size_t klen, const char* v, size_t vlen); +extern void leveldb_writebatch_iterate_delete(void*, const char* k, size_t klen); + +void leveldb_writebatch_iterate_ext(leveldb_writebatch_t* w, void *p) { + leveldb_writebatch_iterate(w, p, + leveldb_writebatch_iterate_put, leveldb_writebatch_iterate_delete); +} } \ No newline at end of file diff --git a/store/leveldb/leveldb_ext.h b/store/leveldb/leveldb_ext.h index 1c5f986..3eed41b 100644 --- a/store/leveldb/leveldb_ext.h +++ b/store/leveldb/leveldb_ext.h @@ -32,6 +32,7 @@ extern unsigned char leveldb_iter_seek_ext(leveldb_iterator_t*, const char* k, s extern unsigned char leveldb_iter_next_ext(leveldb_iterator_t*); extern unsigned char leveldb_iter_prev_ext(leveldb_iterator_t*); +extern void leveldb_writebatch_iterate_ext(leveldb_writebatch_t*, void* p); #ifdef __cplusplus } diff --git a/store/leveldb/slice.go b/store/leveldb/slice.go new file mode 100644 index 0000000..83ebf55 --- /dev/null +++ b/store/leveldb/slice.go @@ -0,0 +1,40 @@ +// +build leveldb + +package leveldb + +// #cgo LDFLAGS: -lleveldb +// #include "leveldb/c.h" +import "C" + +import ( + "reflect" + "unsafe" +) + +type CSlice struct { + data unsafe.Pointer + size int +} + +func NewCSlice(p unsafe.Pointer, n int) *CSlice { + return &CSlice{p, n} +} + +func (s *CSlice) Data() []byte { + var value []byte + + sH := (*reflect.SliceHeader)(unsafe.Pointer(&value)) + sH.Cap = int(s.size) + sH.Len = int(s.size) + sH.Data = uintptr(s.data) + + return value +} + +func (s *CSlice) Size() int { + return int(s.size) +} + +func (s *CSlice) Free() { + C.leveldb_free(s.data) +} diff --git a/store/leveldb/snapshot.go b/store/leveldb/snapshot.go index e8e6ca7..bdc8d51 100644 --- a/store/leveldb/snapshot.go +++ b/store/leveldb/snapshot.go @@ -21,6 +21,10 @@ func (s *Snapshot) Get(key []byte) ([]byte, error) { return s.db.get(s.readOpts, key) } +func (s *Snapshot) GetSlice(key []byte) (driver.ISlice, error) { + return s.db.getSlice(s.readOpts, key) +} + func (s *Snapshot) NewIterator() driver.IIterator { it := new(Iterator) it.it = C.leveldb_create_iterator(s.db.db, s.db.iteratorOpts.Opt) diff --git a/store/rocksdb/batch.go b/store/rocksdb/batch.go index 017fc88..bb727e7 100644 --- a/store/rocksdb/batch.go +++ b/store/rocksdb/batch.go @@ -4,6 +4,7 @@ package rocksdb // #cgo LDFLAGS: -lrocksdb // #include "rocksdb/c.h" +// #include "rocksdb_ext.h" import "C" import ( @@ -11,17 +12,21 @@ import ( ) type WriteBatch struct { - db *DB - wbatch *C.rocksdb_writebatch_t + db *DB + wbatch *C.rocksdb_writebatch_t + commitOk bool } -func (w *WriteBatch) Close() error { - C.rocksdb_writebatch_destroy(w.wbatch) - w.wbatch = nil - return nil +func (w *WriteBatch) Close() { + if w.wbatch != nil { + C.rocksdb_writebatch_destroy(w.wbatch) + w.wbatch = nil + } } func (w *WriteBatch) Put(key, value []byte) { + w.commitOk = false + var k, v *C.char if len(key) != 0 { k = (*C.char)(unsafe.Pointer(&key[0])) @@ -37,6 +42,8 @@ func (w *WriteBatch) Put(key, value []byte) { } func (w *WriteBatch) Delete(key []byte) { + w.commitOk = false + C.rocksdb_writebatch_delete(w.wbatch, (*C.char)(unsafe.Pointer(&key[0])), C.size_t(len(key))) } @@ -50,15 +57,27 @@ func (w *WriteBatch) SyncCommit() error { } func (w *WriteBatch) Rollback() error { - C.rocksdb_writebatch_clear(w.wbatch) + if !w.commitOk { + C.rocksdb_writebatch_clear(w.wbatch) + } return nil } func (w *WriteBatch) commit(wb *WriteOptions) error { + w.commitOk = true + var errStr *C.char - C.rocksdb_write(w.db.db, wb.Opt, w.wbatch, &errStr) + C.rocksdb_write_ext(w.db.db, wb.Opt, w.wbatch, &errStr) if errStr != nil { + w.commitOk = false return saveError(errStr) } return nil } + +func (w *WriteBatch) Data() []byte { + var vallen C.size_t + value := C.rocksdb_writebatch_data(w.wbatch, &vallen) + + return slice(unsafe.Pointer(value), int(vallen)) +} diff --git a/store/rocksdb/db.go b/store/rocksdb/db.go index e1f10a4..952121b 100644 --- a/store/rocksdb/db.go +++ b/store/rocksdb/db.go @@ -15,6 +15,7 @@ import ( "github.com/siddontang/ledisdb/config" "github.com/siddontang/ledisdb/store/driver" "os" + "runtime" "unsafe" ) @@ -131,6 +132,7 @@ func (db *DB) initOptions(cfg *config.RocksDBConfig) { opts.SetMaxOpenFiles(cfg.MaxOpenFiles) opts.SetMaxBackgroundCompactions(cfg.MaxBackgroundCompactions) opts.SetMaxBackgroundFlushes(cfg.MaxBackgroundFlushes) + opts.SetLevel0FileNumCompactionTrigger(cfg.Level0FileNumCompactionTrigger) opts.SetLevel0SlowdownWritesTrigger(cfg.Level0SlowdownWritesTrigger) opts.SetLevel0StopWritesTrigger(cfg.Level0StopWritesTrigger) opts.SetTargetFileSizeBase(cfg.TargetFileSizeBase) @@ -215,6 +217,10 @@ func (db *DB) NewWriteBatch() driver.IWriteBatch { wbatch: C.rocksdb_writebatch_create(), } + runtime.SetFinalizer(wb, func(w *WriteBatch) { + w.Close() + }) + return wb } @@ -284,6 +290,28 @@ func (db *DB) get(ro *ReadOptions, key []byte) ([]byte, error) { return C.GoBytes(unsafe.Pointer(value), C.int(vallen)), nil } +func (db *DB) getSlice(ro *ReadOptions, key []byte) (driver.ISlice, error) { + var errStr *C.char + var vallen C.size_t + var k *C.char + if len(key) != 0 { + k = (*C.char)(unsafe.Pointer(&key[0])) + } + + value := C.rocksdb_get( + db.db, ro.Opt, k, C.size_t(len(key)), &vallen, &errStr) + + if errStr != nil { + return nil, saveError(errStr) + } + + if value == nil { + return nil, nil + } + + return NewCSlice(unsafe.Pointer(value), int(vallen)), nil +} + func (db *DB) delete(wo *WriteOptions, key []byte) error { var errStr *C.char var k *C.char @@ -309,6 +337,10 @@ func (db *DB) Compact() error { return nil } +func (db *DB) GetSlice(key []byte) (driver.ISlice, error) { + return db.getSlice(db.readOpts, key) +} + func init() { driver.Register(Store{}) } diff --git a/store/rocksdb/options.go b/store/rocksdb/options.go index 0404cdb..2783679 100644 --- a/store/rocksdb/options.go +++ b/store/rocksdb/options.go @@ -57,6 +57,14 @@ func (o *Options) Close() { C.rocksdb_options_destroy(o.Opt) } +func (o *Options) IncreaseParallelism(n int) { + C.rocksdb_options_increase_parallelism(o.Opt, C.int(n)) +} + +func (o *Options) OptimizeLevelStyleCompaction(n int) { + C.rocksdb_options_optimize_level_style_compaction(o.Opt, C.uint64_t(n)) +} + func (o *Options) SetComparator(cmp *C.rocksdb_comparator_t) { C.rocksdb_options_set_comparator(o.Opt, cmp) } diff --git a/store/rocksdb/rocksdb_ext.cc b/store/rocksdb/rocksdb_ext.cc index 4a7720f..39036ab 100644 --- a/store/rocksdb/rocksdb_ext.cc +++ b/store/rocksdb/rocksdb_ext.cc @@ -32,5 +32,13 @@ unsigned char rocksdb_iter_prev_ext(rocksdb_iterator_t* iter) { return rocksdb_iter_valid(iter); } +void rocksdb_write_ext(rocksdb_t* db, + const rocksdb_writeoptions_t* options, + rocksdb_writebatch_t* batch, char** errptr) { + rocksdb_write(db, options, batch, errptr); + if(*errptr == NULL) { + rocksdb_writebatch_clear(batch); + } +} } \ No newline at end of file diff --git a/store/rocksdb/rocksdb_ext.h b/store/rocksdb/rocksdb_ext.h index 4938294..11cb653 100644 --- a/store/rocksdb/rocksdb_ext.h +++ b/store/rocksdb/rocksdb_ext.h @@ -15,7 +15,7 @@ extern unsigned char rocksdb_iter_seek_to_last_ext(rocksdb_iterator_t*); extern unsigned char rocksdb_iter_seek_ext(rocksdb_iterator_t*, const char* k, size_t klen); extern unsigned char rocksdb_iter_next_ext(rocksdb_iterator_t*); extern unsigned char rocksdb_iter_prev_ext(rocksdb_iterator_t*); - +extern void rocksdb_write_ext(rocksdb_t* db, const rocksdb_writeoptions_t* options, rocksdb_writebatch_t* batch, char** errptr); #ifdef __cplusplus } diff --git a/store/rocksdb/slice.go b/store/rocksdb/slice.go new file mode 100644 index 0000000..bbaa65b --- /dev/null +++ b/store/rocksdb/slice.go @@ -0,0 +1,41 @@ +//+build rocksdb + +package rocksdb + +// #cgo LDFLAGS: -lrocksdb +// #include +// #include +import "C" + +import ( + "reflect" + "unsafe" +) + +type CSlice struct { + data unsafe.Pointer + size int +} + +func NewCSlice(p unsafe.Pointer, n int) *CSlice { + return &CSlice{p, n} +} + +func (s *CSlice) Data() []byte { + var value []byte + + sH := (*reflect.SliceHeader)(unsafe.Pointer(&value)) + sH.Cap = int(s.size) + sH.Len = int(s.size) + sH.Data = uintptr(s.data) + + return value +} + +func (s *CSlice) Size() int { + return int(s.size) +} + +func (s *CSlice) Free() { + C.free(s.data) +} diff --git a/store/rocksdb/snapshot.go b/store/rocksdb/snapshot.go index e560e8e..1ced600 100644 --- a/store/rocksdb/snapshot.go +++ b/store/rocksdb/snapshot.go @@ -21,6 +21,10 @@ func (s *Snapshot) Get(key []byte) ([]byte, error) { return s.db.get(s.readOpts, key) } +func (s *Snapshot) GetSlice(key []byte) (driver.ISlice, error) { + return s.db.getSlice(s.readOpts, key) +} + func (s *Snapshot) NewIterator() driver.IIterator { it := new(Iterator) it.it = C.rocksdb_create_iterator(s.db.db, s.db.iteratorOpts.Opt) diff --git a/store/slice.go b/store/slice.go new file mode 100644 index 0000000..b027f4f --- /dev/null +++ b/store/slice.go @@ -0,0 +1,9 @@ +package store + +import ( + "github.com/siddontang/ledisdb/store/driver" +) + +type Slice interface { + driver.ISlice +} diff --git a/store/snapshot.go b/store/snapshot.go index 80524ce..a1c9de9 100644 --- a/store/snapshot.go +++ b/store/snapshot.go @@ -25,6 +25,23 @@ func (s *Snapshot) Get(key []byte) ([]byte, error) { return v, err } +func (s *Snapshot) GetSlice(key []byte) (Slice, error) { + if d, ok := s.ISnapshot.(driver.ISliceGeter); ok { + v, err := d.GetSlice(key) + s.st.statGet(v, err) + return v, err + } else { + v, err := s.Get(key) + if err != nil { + return nil, err + } else if v == nil { + return nil, nil + } else { + return driver.GoSlice(v), nil + } + } +} + func (s *Snapshot) Close() { s.st.SnapshotCloseNum.Add(1) s.ISnapshot.Close() diff --git a/store/stat.go b/store/stat.go index 0b535d0..e0a035a 100644 --- a/store/stat.go +++ b/store/stat.go @@ -5,25 +5,27 @@ import ( ) type Stat struct { - GetNum sync2.AtomicInt64 - GetMissingNum sync2.AtomicInt64 - PutNum sync2.AtomicInt64 - DeleteNum sync2.AtomicInt64 - IterNum sync2.AtomicInt64 - IterSeekNum sync2.AtomicInt64 - IterCloseNum sync2.AtomicInt64 - SnapshotNum sync2.AtomicInt64 - SnapshotCloseNum sync2.AtomicInt64 - BatchNum sync2.AtomicInt64 - BatchCommitNum sync2.AtomicInt64 - TxNum sync2.AtomicInt64 - TxCommitNum sync2.AtomicInt64 - TxCloseNum sync2.AtomicInt64 - CompactNum sync2.AtomicInt64 - CompactTotalTime sync2.AtomicDuration + GetNum sync2.AtomicInt64 + GetMissingNum sync2.AtomicInt64 + GetTotalTime sync2.AtomicDuration + PutNum sync2.AtomicInt64 + DeleteNum sync2.AtomicInt64 + IterNum sync2.AtomicInt64 + IterSeekNum sync2.AtomicInt64 + IterCloseNum sync2.AtomicInt64 + SnapshotNum sync2.AtomicInt64 + SnapshotCloseNum sync2.AtomicInt64 + BatchNum sync2.AtomicInt64 + BatchCommitNum sync2.AtomicInt64 + BatchCommitTotalTime sync2.AtomicDuration + TxNum sync2.AtomicInt64 + TxCommitNum sync2.AtomicInt64 + TxCloseNum sync2.AtomicInt64 + CompactNum sync2.AtomicInt64 + CompactTotalTime sync2.AtomicDuration } -func (st *Stat) statGet(v []byte, err error) { +func (st *Stat) statGet(v interface{}, err error) { st.GetNum.Add(1) if v == nil && err == nil { st.GetMissingNum.Add(1) diff --git a/store/store_test.go b/store/store_test.go index b488158..9045ce2 100644 --- a/store/store_test.go +++ b/store/store_test.go @@ -6,6 +6,7 @@ import ( "github.com/siddontang/ledisdb/config" "github.com/siddontang/ledisdb/store/driver" "os" + "reflect" "testing" ) @@ -38,6 +39,7 @@ func testStore(db *DB, t *testing.T) { testBatch(db, t) testIterator(db, t) testSnapshot(db, t) + testBatchData(db, t) } func testClear(db *DB, t *testing.T) { @@ -61,6 +63,16 @@ func testSimple(db *DB, t *testing.T) { t.Fatal("not equal") } + if v, err := db.GetSlice(key); err != nil { + t.Fatal(err) + } else if v == nil { + t.Fatal("must not nil") + } else if !bytes.Equal(v.Data(), value) { + t.Fatal("not equal") + } else { + v.Free() + } + if err := db.Delete(key); err != nil { t.Fatal(err) } @@ -70,6 +82,12 @@ func testSimple(db *DB, t *testing.T) { t.Fatal("must nil") } + if v, err := db.GetSlice(key); err != nil { + t.Fatal(err) + } else if v != nil { + t.Fatal("must nil") + } + if err := db.Put(key, nil); err != nil { t.Fatal(err) } @@ -326,3 +344,49 @@ func testSnapshot(db *DB, t *testing.T) { } } + +func testBatchData(db *DB, t *testing.T) { + w := db.NewWriteBatch() + + w.Put([]byte("a"), []byte("1")) + w.Put([]byte("b"), nil) + w.Delete([]byte("c")) + + d := w.BatchData() + + if kvs, err := d.Items(); err != nil { + t.Fatal(err) + } else if len(kvs) != 3 { + t.Fatal(len(kvs)) + } else if !reflect.DeepEqual(kvs[0], BatchItem{[]byte("a"), []byte("1")}) { + t.Fatal("must equal") + } else if !reflect.DeepEqual(kvs[1], BatchItem{[]byte("b"), []byte{}}) { + t.Fatal("must equal") + } else if !reflect.DeepEqual(kvs[2], BatchItem{[]byte("c"), nil}) { + t.Fatal("must equal") + } + + if err := d.Append(d); err != nil { + t.Fatal(err) + } else if d.Len() != 6 { + t.Fatal(d.Len()) + } + + if kvs, err := d.Items(); err != nil { + t.Fatal(err) + } else if len(kvs) != 6 { + t.Fatal(len(kvs)) + } else if !reflect.DeepEqual(kvs[0], BatchItem{[]byte("a"), []byte("1")}) { + t.Fatal("must equal") + } else if !reflect.DeepEqual(kvs[1], BatchItem{[]byte("b"), []byte{}}) { + t.Fatal("must equal") + } else if !reflect.DeepEqual(kvs[2], BatchItem{[]byte("c"), nil}) { + t.Fatal("must equal") + } else if !reflect.DeepEqual(kvs[3], BatchItem{[]byte("a"), []byte("1")}) { + t.Fatal("must equal") + } else if !reflect.DeepEqual(kvs[4], BatchItem{[]byte("b"), []byte{}}) { + t.Fatal("must equal") + } else if !reflect.DeepEqual(kvs[5], BatchItem{[]byte("c"), nil}) { + t.Fatal("must equal") + } +} diff --git a/store/tx.go b/store/tx.go index 6845ee5..4dbf311 100644 --- a/store/tx.go +++ b/store/tx.go @@ -56,6 +56,16 @@ func (tx *Tx) Get(key []byte) ([]byte, error) { return v, err } +func (tx *Tx) GetSlice(key []byte) (Slice, error) { + if v, err := tx.Get(key); err != nil { + return nil, err + } else if v == nil { + return nil, nil + } else { + return driver.GoSlice(v), nil + } +} + func (tx *Tx) Put(key []byte, value []byte) error { tx.st.PutNum.Add(1) return tx.tx.Put(key, value) diff --git a/store/writebatch.go b/store/writebatch.go index bf4658c..c193ae0 100644 --- a/store/writebatch.go +++ b/store/writebatch.go @@ -1,16 +1,25 @@ package store import ( + "encoding/binary" "github.com/siddontang/ledisdb/store/driver" + "github.com/syndtr/goleveldb/leveldb" + "time" ) type WriteBatch struct { - wb driver.IWriteBatch - st *Stat + wb driver.IWriteBatch + st *Stat + putNum int64 deleteNum int64 + db *DB - db *DB + data *BatchData +} + +func (wb *WriteBatch) Close() { + wb.wb.Close() } func (wb *WriteBatch) Put(key []byte, value []byte) { @@ -29,13 +38,113 @@ func (wb *WriteBatch) Commit() error { wb.st.DeleteNum.Add(wb.deleteNum) wb.putNum = 0 wb.deleteNum = 0 + + var err error + t := time.Now() if wb.db == nil || !wb.db.needSyncCommit() { - return wb.wb.Commit() + err = wb.wb.Commit() } else { - return wb.wb.SyncCommit() + err = wb.wb.SyncCommit() } + + wb.st.BatchCommitTotalTime.Add(time.Now().Sub(t)) + + return err } func (wb *WriteBatch) Rollback() error { + wb.putNum = 0 + wb.deleteNum = 0 + return wb.wb.Rollback() } + +// the data will be undefined after commit or rollback +func (wb *WriteBatch) BatchData() *BatchData { + data := wb.wb.Data() + if wb.data == nil { + wb.data = new(BatchData) + } + + wb.data.Load(data) + return wb.data +} + +func (wb *WriteBatch) Data() []byte { + b := wb.BatchData() + return b.Data() +} + +const BatchDataHeadLen = 12 + +/* + see leveldb batch data format for more information +*/ + +type BatchData struct { + leveldb.Batch +} + +func NewBatchData(data []byte) (*BatchData, error) { + b := new(BatchData) + + if err := b.Load(data); err != nil { + return nil, err + } + + return b, nil +} + +func (d *BatchData) Append(do *BatchData) error { + d1 := d.Dump() + d2 := do.Dump() + + n := d.Len() + do.Len() + + d1 = append(d1, d2[BatchDataHeadLen:]...) + binary.LittleEndian.PutUint32(d1[8:], uint32(n)) + + return d.Load(d1) +} + +func (d *BatchData) Data() []byte { + return d.Dump() +} + +func (d *BatchData) Reset() { + d.Batch.Reset() +} + +type BatchDataReplay interface { + Put(key, value []byte) + Delete(key []byte) +} + +type BatchItem struct { + Key []byte + Value []byte +} + +type batchItems []BatchItem + +func (bs *batchItems) Put(key, value []byte) { + *bs = append(*bs, BatchItem{key, value}) +} + +func (bs *batchItems) Delete(key []byte) { + *bs = append(*bs, BatchItem{key, nil}) +} + +func (d *BatchData) Replay(r BatchDataReplay) error { + return d.Batch.Replay(r) +} + +func (d *BatchData) Items() ([]BatchItem, error) { + is := make(batchItems, 0, d.Len()) + + if err := d.Replay(&is); err != nil { + return nil, err + } + + return []BatchItem(is), nil +} diff --git a/upgrade/ledis-upgrade-ttl/main.go b/upgrade/ledis-upgrade-ttl/main.go new file mode 100644 index 0000000..96d1a28 --- /dev/null +++ b/upgrade/ledis-upgrade-ttl/main.go @@ -0,0 +1,116 @@ +package main + +import ( + "encoding/binary" + "flag" + "fmt" + "github.com/siddontang/ledisdb/config" + "github.com/siddontang/ledisdb/ledis" + "github.com/siddontang/ledisdb/store" +) + +var configPath = flag.String("config", "", "ledisdb config file") +var dataDir = flag.String("data_dir", "", "ledisdb base data dir") +var dbName = flag.String("db_name", "", "select a db to use, it will overwrite the config's db name") + +func main() { + flag.Parse() + + if len(*configPath) == 0 { + println("need ledis config file") + return + } + + cfg, err := config.NewConfigWithFile(*configPath) + if err != nil { + println(err.Error()) + return + } + + if len(*dataDir) > 0 { + cfg.DataDir = *dataDir + } + + if len(*dbName) > 0 { + cfg.DBName = *dbName + } + + db, err := store.Open(cfg) + if err != nil { + println(err.Error()) + return + } + + // upgrade: ttl time key 101 to ttl time key 103 + + wb := db.NewWriteBatch() + + for i := uint8(0); i < ledis.MaxDBNumber; i++ { + minK, maxK := oldKeyPair(i) + + it := db.RangeIterator(minK, maxK, store.RangeROpen) + num := 0 + for ; it.Valid(); it.Next() { + dt, k, t, err := decodeOldKey(i, it.RawKey()) + if err != nil { + continue + } + + newKey := encodeNewKey(i, dt, k, t) + + wb.Put(newKey, it.RawValue()) + wb.Delete(it.RawKey()) + num++ + if num%1024 == 0 { + if err := wb.Commit(); err != nil { + fmt.Printf("commit error :%s\n", err.Error()) + } + } + } + it.Close() + + if err := wb.Commit(); err != nil { + fmt.Printf("commit error :%s\n", err.Error()) + } + } +} + +func oldKeyPair(index uint8) ([]byte, []byte) { + minB := make([]byte, 11) + minB[0] = index + minB[1] = ledis.ObsoleteExpTimeType + minB[2] = 0 + + maxB := make([]byte, 11) + maxB[0] = index + maxB[1] = ledis.ObsoleteExpTimeType + maxB[2] = 255 + + return minB, maxB +} + +func decodeOldKey(index uint8, tk []byte) (byte, []byte, int64, error) { + if len(tk) < 11 || tk[0] != index || tk[1] != ledis.ObsoleteExpTimeType { + return 0, nil, 0, fmt.Errorf("invalid exp time key") + } + + return tk[2], tk[11:], int64(binary.BigEndian.Uint64(tk[3:])), nil +} + +func encodeNewKey(index uint8, dataType byte, key []byte, when int64) []byte { + buf := make([]byte, len(key)+11) + + buf[0] = index + buf[1] = ledis.ExpTimeType + pos := 2 + + binary.BigEndian.PutUint64(buf[pos:], uint64(when)) + pos += 8 + + buf[pos] = dataType + pos++ + + copy(buf[pos:], key) + + return buf +}