diff --git a/.gitignore b/.gitignore index 1955ca3..42e539f 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ build nohup.out build_config.mk var +_workspace \ No newline at end of file diff --git a/Godeps/Godeps.json b/Godeps/Godeps.json new file mode 100644 index 0000000..343f3a7 --- /dev/null +++ b/Godeps/Godeps.json @@ -0,0 +1,54 @@ +{ + "ImportPath": "github.com/siddontang/ledisdb", + "GoVersion": "go1.3.2", + "Packages": [ + "./..." + ], + "Deps": [ + { + "ImportPath": "github.com/BurntSushi/toml", + "Rev": "2ceedfee35ad3848e49308ab0c9a4f640cfb5fb2" + }, + { + "ImportPath": "github.com/boltdb/bolt", + "Comment": "data/v1-228-g8fb50d5", + "Rev": "8fb50d5ee57110936b904a7539d4c5f2bf2359db" + }, + { + "ImportPath": "github.com/siddontang/go/bson", + "Rev": "466d5bc779ad45f5923d0f59efbc5d696bf2099c" + }, + { + "ImportPath": "github.com/siddontang/go/filelock", + "Rev": "466d5bc779ad45f5923d0f59efbc5d696bf2099c" + }, + { + "ImportPath": "github.com/siddontang/go/hack", + "Rev": "466d5bc779ad45f5923d0f59efbc5d696bf2099c" + }, + { + "ImportPath": "github.com/siddontang/go/log", + "Rev": "466d5bc779ad45f5923d0f59efbc5d696bf2099c" + }, + { + "ImportPath": "github.com/siddontang/go/num", + "Rev": "466d5bc779ad45f5923d0f59efbc5d696bf2099c" + }, + { + "ImportPath": "github.com/siddontang/go/snappy", + "Rev": "466d5bc779ad45f5923d0f59efbc5d696bf2099c" + }, + { + "ImportPath": "github.com/siddontang/goleveldb/leveldb", + "Rev": "c1f6d721561c48f467b26a277741e55fd224df1e" + }, + { + "ImportPath": "github.com/szferi/gomdb", + "Rev": "d8a6d8371e2409b0787a782bf9b0c5daca364a3d" + }, + { + "ImportPath": "github.com/ugorji/go/codec", + "Rev": "71c2886f5a673a35f909803f38ece5810165097b" + } + ] +} diff --git a/Godeps/Readme b/Godeps/Readme new file mode 100644 index 0000000..4cdaa53 --- /dev/null +++ b/Godeps/Readme @@ -0,0 +1,5 @@ +This directory tree is generated automatically by godep. + +Please do not edit. + +See https://github.com/tools/godep for more information. diff --git a/Makefile b/Makefile index f5b6dcd..6764157 100644 --- a/Makefile +++ b/Makefile @@ -16,13 +16,13 @@ export GO_BUILD_TAGS all: build build: - go install -tags '$(GO_BUILD_TAGS)' ./... + $(GO) install -tags '$(GO_BUILD_TAGS)' ./... clean: - go clean -i ./... + $(GO) clean -i ./... test: - go test -tags '$(GO_BUILD_TAGS)' ./... + $(GO) test -tags '$(GO_BUILD_TAGS)' ./... pytest: sh client/ledis-py/tests/all.sh diff --git a/README.md b/README.md index 6779b94..2f12a55 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ LedisDB now supports multiple databases as backend to store data, you can test a + Supports lua scripting. + Supports expiration and ttl. + Supports using redis-cli directly. -+ Multiple client API supports, including Go, Python, Lua(Openresty), C/C++, Node.js. ++ Multiple client API support, including Go, Python, Lua(Openresty), C/C++, Node.js. + Easy to embed in your own Go application. + Restful API support, json/bson/msgpack output. + Replication to guarantee data safe. @@ -35,6 +35,9 @@ Create a workspace and checkout ledisdb source make make test +## Godep support + +LedisDB supports building with [godep](https://github.com/tools/godep) which can manage LedisDB go dependence automatically. ## LevelDB support @@ -60,7 +63,8 @@ Create a workspace and checkout ledisdb source + Set `ROCKSDB_DIR` and `SNAPPY_DIR` to the actual install path in `dev.sh`. + `make clean && make` -**Because RocksDB API may change sometimes, LedisDB may not build successfully. Now LedisDB supports RocksDB version 3.5 or newest master branch. ** + +Because RocksDB API may change sometimes, LedisDB may not build successfully. Now LedisDB supports RocksDB version 3.5 or newest master branch. ## HyperLevelDB support @@ -75,7 +79,7 @@ Create a workspace and checkout ledisdb source ## Choose store database -LedisDB now supports goleveldb, lmdb, leveldb, rocksdb, boltdb, hyperleveldb. it will choose goleveldb as default to store data if you not set. +LedisDB now supports goleveldb, lmdb, leveldb, rocksdb, boltdb, hyperleveldb, memory. it will use goleveldb as default to store data if you don't set. Choosing a store database to use is very simple, you have two ways: diff --git a/bootstrap.sh b/bootstrap.sh index ee260b7..ca844b2 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -2,8 +2,13 @@ . ./dev.sh -go get github.com/siddontang/go-log/log -go get github.com/siddontang/go-snappy/snappy +# Test godep install +godep path > /dev/null 2>&1 +if [ "$?" = 0 ]; then + GOPATH=`godep path` + godep restore + exit 0 +fi go get github.com/siddontang/goleveldb/leveldb @@ -14,4 +19,9 @@ go get github.com/boltdb/bolt go get github.com/ugorji/go/codec go get github.com/BurntSushi/toml -go get github.com/siddontang/go-bson/bson + +go get github.com/siddontang/go/bson +go get github.com/siddontang/go/log +go get github.com/siddontang/go/snappy +go get github.com/siddontang/go/num +go get github.com/siddontang/go/filelock diff --git a/client/ledis-py/ledis/client.py b/client/ledis-py/ledis/client.py index 0247d86..2504e5d 100644 --- a/client/ledis-py/ledis/client.py +++ b/client/ledis-py/ledis/client.py @@ -199,7 +199,7 @@ class Ledis(object): def set_response_callback(self, command, callback): "Set a custom Response Callback" self.response_callbacks[command] = callback - + def tx(self): return Transaction( self.connection_pool, @@ -733,6 +733,70 @@ class Ledis(object): "Return the score of element ``value`` in sorted set ``name``" return self.execute_command('ZSCORE', name, value) + def zinterstore(self, dest, keys, aggregate=None): + """ + Intersect multiple sorted sets specified by ``keys`` into + a new sorted set, ``dest``. Scores in the destination will be + aggregated based on the ``aggregate``, or SUM if none is provided. + """ + return self._zaggregate('ZINTERSTORE', dest, keys, aggregate) + + def zunionstore(self, dest, keys, aggregate=None): + """ + Union multiple sorted sets specified by ``keys`` into + a new sorted set, ``dest``. Scores in the destination will be + aggregated based on the ``aggregate``, or SUM if none is provided. + """ + return self._zaggregate('ZUNIONSTORE', dest, keys, aggregate) + + def _zaggregate(self, command, dest, keys, aggregate=None): + pieces = [command, dest, len(keys)] + if isinstance(keys, dict): + keys, weights = iterkeys(keys), itervalues(keys) + else: + weights = None + pieces.extend(keys) + if weights: + pieces.append(Token('WEIGHTS')) + pieces.extend(weights) + if aggregate: + pieces.append(Token('AGGREGATE')) + pieces.append(aggregate) + return self.execute_command(*pieces) + + def zrangebylex(self, name, min, max, start=None, num=None): + """ + Return the lexicographical range of values from sorted set ``name`` + between ``min`` and ``max``. + + If ``start`` and ``num`` are specified, then return a slice of the + range. + """ + if (start is not None and num is None) or \ + (num is not None and start is None): + raise RedisError("``start`` and ``num`` must both be specified") + pieces = ['ZRANGEBYLEX', name, min, max] + if start is not None and num is not None: + pieces.extend([Token('LIMIT'), start, num]) + return self.execute_command(*pieces) + + def zremrangebylex(self, name, min, max): + """ + Remove all elements in the sorted set ``name`` between the + lexicographical range specified by ``min`` and ``max``. + + Returns the number of elements removed. + """ + return self.execute_command('ZREMRANGEBYLEX', name, min, max) + + def zlexcount(self, name, min, max): + """ + Return the number of items in the sorted set ``name`` between the + lexicographical range ``min`` and ``max``. + """ + return self.execute_command('ZLEXCOUNT', name, min, max) + + # SPECIAL COMMANDS SUPPORTED BY LEDISDB def zclear(self, name): "Delete key of ``name`` from sorted set" diff --git a/client/nodejs/ledis/lib/commands.js b/client/nodejs/ledis/lib/commands.js index 1b77c00..8a24f6c 100644 --- a/client/nodejs/ledis/lib/commands.js +++ b/client/nodejs/ledis/lib/commands.js @@ -93,6 +93,9 @@ module.exports = [ "zscore", "zunionstore", "zinterstore", + "zrangebylex", + "zremrangebylex", + "zlexcount", "zclear", @@ -128,7 +131,7 @@ module.exports = [ "begin", "rollback", "commit", - + "eval", "evalsha", "script", diff --git a/client/openresty/ledis.lua b/client/openresty/ledis.lua index 07c3f2b..7834c2b 100644 --- a/client/openresty/ledis.lua +++ b/client/openresty/ledis.lua @@ -95,6 +95,12 @@ local commands = { "zrevrank", "zrevrangebyscore", "zscore", + "zunionstore", + "zinterstore", + "zrangebylex", + "zremrangebylex", + "zlexcount", + --[[ledisdb special commands]] "zclear", "zmclear", diff --git a/client/openresty/tx_test.lua b/client/openresty/tx_test.lua deleted file mode 100644 index 069ddf9..0000000 --- a/client/openresty/tx_test.lua +++ /dev/null @@ -1,96 +0,0 @@ -local ledis = require "ledis" -local lds = ledis:new() - -lds:set_timeout(1000) - - - - --- connect -local ok, err = lds:connect("127.0.0.1", "6380") -if not ok then - ngx.say("failed to connect:", err) - return -end - -lds:del("tx") - --- transaction - -ok, err = lds:set("tx", "a") -if not ok then - ngx.say("failed to execute set in tx: ", err) - return -end - -ngx.say("SET should be OK <=>", ok) - -res, err = lds:get("tx") -if not res then - ngx.say("failed to execute get in tx: ", err) - return -end - -ngx.say("GET should be a <=>", res) - - - -ok, err = lds:begin() -if not ok then - ngx.say("failed to run begin: ", err) - return -end - -ngx.say("BEGIN should be OK <=>", ok) - -ok, err = lds:set("tx", "b") -if not ok then - ngx.say("failed to execute set in tx: ", err) - return -end - -ngx.say("SET should be OK <=>", ok) - - -res, err = lds:get("tx") -if not res then - ngx.say("failed to execute get in tx: ", err) - return -end - -ngx.say("GET should be b <=>", res) - -ok, err = lds:rollback() -if not ok then - ngx.say("failed to rollback", err) - return -end -ngx.say("ROLLBACK should be OK <=>", ok) - -res, err = lds:get("tx") -if not res then - ngx.say("failed to execute get in tx: ", err) - return -end - -ngx.say("GET should be a <=>", res) - - -lds:begin() -lds:set("tx", "c") -lds:commit() -res, err = lds:get("tx") -if not res then - ngx.say("failed to execute get in tx: ", err) - return -end - -ngx.say("GET should be c <=>", res) - - -local ok, err = lds:close() -if not ok then - ngx.say("failed to close: ", err) - return -end -ngx.say("close success") diff --git a/cmd/ledis-binlog/main.go b/cmd/ledis-binlog/main.go deleted file mode 100644 index 3725920..0000000 --- a/cmd/ledis-binlog/main.go +++ /dev/null @@ -1,85 +0,0 @@ -package main - -import ( - "bufio" - "flag" - "fmt" - "github.com/siddontang/ledisdb/ledis" - "os" - "time" -) - -var TimeFormat = "2006-01-02 15:04:05" - -var startDateTime = flag.String("start-datetime", "", - "Start reading the binary log at the first event having a timestamp equal to or later than the datetime argument.") -var stopDateTime = flag.String("stop-datetime", "", - "Stop reading the binary log at the first event having a timestamp equal to or earlier than the datetime argument.") - -var startTime uint32 = 0 -var stopTime uint32 = 0xFFFFFFFF - -func main() { - flag.Usage = func() { - fmt.Fprintf(os.Stderr, "Usage of %s [options] log_file\n", os.Args[0]) - flag.PrintDefaults() - } - - flag.Parse() - - logFile := flag.Arg(0) - f, err := os.Open(logFile) - if err != nil { - println(err.Error()) - return - } - defer f.Close() - - var t time.Time - - if len(*startDateTime) > 0 { - if t, err = time.Parse(TimeFormat, *startDateTime); err != nil { - println("parse start-datetime error: ", err.Error()) - return - } - - startTime = uint32(t.Unix()) - } - - if len(*stopDateTime) > 0 { - if t, err = time.Parse(TimeFormat, *stopDateTime); err != nil { - println("parse stop-datetime error: ", err.Error()) - return - } - - stopTime = uint32(t.Unix()) - } - - rb := bufio.NewReaderSize(f, 4096) - err = ledis.ReadEventFromReader(rb, printEvent) - if err != nil { - println("read event error: ", err.Error()) - return - } -} - -func printEvent(head *ledis.BinLogHead, event []byte) error { - if head.CreateTime < startTime || head.CreateTime > stopTime { - return nil - } - - t := time.Unix(int64(head.CreateTime), 0) - - fmt.Printf("%s ", t.Format(TimeFormat)) - - s, err := ledis.FormatBinLogEvent(event) - if err != nil { - fmt.Printf("%s", err.Error()) - } else { - fmt.Printf(s) - } - - fmt.Printf("\n") - - return nil -} diff --git a/cmd/ledis-cli/const.go b/cmd/ledis-cli/const.go index f816c5e..3bca898 100644 --- a/cmd/ledis-cli/const.go +++ b/cmd/ledis-cli/const.go @@ -1,4 +1,4 @@ -//This file was generated by .tools/generate_commands.py on Tue Sep 09 2014 09:48:57 +0800 +//This file was generated by .tools/generate_commands.py on Thu Oct 02 2014 15:24:07 +0800 package main var helpCommands = [][]string{ @@ -86,7 +86,7 @@ var helpCommands = [][]string{ {"SINTER", "key [key ...]", "Set"}, {"SINTERSTORE", "destination key [key ...]", "Set"}, {"SISMEMBER", "key member", "Set"}, - {"SLAVEOF", "host port", "Replication"}, + {"SLAVEOF", "host port [restart]", "Replication"}, {"SMCLEAR", "key [key ...]", "Set"}, {"SMEMBERS", "key", "Set"}, {"SPERSIST", "key", "Set"}, @@ -95,7 +95,7 @@ var helpCommands = [][]string{ {"SUNION", "key [key ...]", "Set"}, {"SUNIONSTORE", "destination key [key ...]", "Set"}, {"SXSCAN", "key [MATCH match] [COUNT count]", "Set"}, - {"SYNC", "index offset", "Replication"}, + {"SYNC", "logid", "Replication"}, {"TTL", "key", "KV"}, {"XSCAN", "key [MATCH match] [COUNT count]", "KV"}, {"ZADD", "key score member [score member ...]", "ZSet"}, @@ -106,12 +106,15 @@ var helpCommands = [][]string{ {"ZEXPIREAT", "key timestamp", "ZSet"}, {"ZINCRBY", "key increment member", "ZSet"}, {"ZINTERSTORE", "destkey numkeys key [key ...] [WEIGHTS weight [weight ...]] [AGGREGATE SUM|MIN|MAX]", "ZSet"}, + {"ZLEXCOUNT", "key min max", "ZSet"}, {"ZMCLEAR", "key [key ...]", "ZSet"}, {"ZPERSIST", "key", "ZSet"}, {"ZRANGE", "key start stop [WITHSCORES]", "ZSet"}, + {"ZRANGEBYLEX", "key min max [LIMIT offset count]", "ZSet"}, {"ZRANGEBYSCORE", "key min max [WITHSCORES] [LIMIT offset count]", "ZSet"}, {"ZRANK", "key member", "ZSet"}, {"ZREM", "key member [member ...]", "ZSet"}, + {"ZREMRANGBYLEX", "key min max", "ZSet"}, {"ZREMRANGEBYRANK", "key start stop", "ZSet"}, {"ZREMRANGEBYSCORE", "key min max", "ZSet"}, {"ZREVRANGE", "key start stop [WITHSCORES]", "ZSet"}, diff --git a/cmd/ledis-load/main.go b/cmd/ledis-load/main.go index 34165b8..b9b6a9c 100644 --- a/cmd/ledis-load/main.go +++ b/cmd/ledis-load/main.go @@ -2,7 +2,6 @@ package main import ( "flag" - "fmt" "github.com/siddontang/ledisdb/config" "github.com/siddontang/ledisdb/ledis" ) @@ -57,18 +56,6 @@ func loadDump(cfg *config.Config, ldb *ledis.Ledis) error { return err } - var head *ledis.BinLogAnchor - head, err = ldb.LoadDumpFile(*dumpPath) - - if err != nil { - return err - } - - //master enable binlog, here output this like mysql - if head.LogFileIndex != 0 && head.LogPos != 0 { - format := "MASTER_LOG_FILE='binlog.%07d', MASTER_LOG_POS=%d;\n" - fmt.Printf(format, head.LogFileIndex, head.LogPos) - } - - return nil + _, err = ldb.LoadDumpFile(*dumpPath) + return err } diff --git a/config/config.go b/config/config.go index ca93d29..668b545 100644 --- a/config/config.go +++ b/config/config.go @@ -16,14 +16,6 @@ const ( DefaultDataDir string = "./var" ) -const ( - MaxBinLogFileSize int = 1024 * 1024 * 1024 - MaxBinLogFileNum int = 10000 - - DefaultBinLogFileSize int = MaxBinLogFileSize - DefaultBinLogFileNum int = 10 -) - type LevelDBConfig struct { Compression bool `toml:"compression"` BlockSize int `toml:"block_size"` @@ -37,9 +29,13 @@ type LMDBConfig struct { NoSync bool `toml:"nosync"` } -type BinLogConfig struct { - MaxFileSize int `toml:"max_file_size"` - MaxFileNum int `toml:"max_file_num"` +type ReplicationConfig struct { + Path string `toml:"path"` + ExpiredLogDays int `toml:"expired_log_days"` + Sync bool `toml:"sync"` + WaitSyncTime int `toml:"wait_sync_time"` + WaitMaxSlaveAcks int `toml:"wait_max_slave_acks"` + Compression bool `toml:"compression"` } type Config struct { @@ -47,19 +43,22 @@ type Config struct { HttpAddr string `toml:"http_addr"` + SlaveOf string `toml:"slaveof"` + DataDir string `toml:"data_dir"` DBName string `toml:"db_name"` + DBPath string `toml:"db_path"` + LevelDB LevelDBConfig `toml:"leveldb"` LMDB LMDBConfig `toml:"lmdb"` - BinLog BinLogConfig `toml:"binlog"` - - SlaveOf string `toml:"slaveof"` - AccessLog string `toml:"access_log"` + + UseReplication bool `toml:"use_replication"` + Replication ReplicationConfig `toml:"replication"` } func NewConfigWithFile(fileName string) (*Config, error) { @@ -92,11 +91,6 @@ func NewConfigDefault() *Config { cfg.DBName = DefaultDBName - // disable binlog - cfg.BinLog.MaxFileNum = 0 - cfg.BinLog.MaxFileSize = 0 - - // disable replication cfg.SlaveOf = "" // disable access log @@ -105,6 +99,10 @@ func NewConfigDefault() *Config { cfg.LMDB.MapSize = 20 * 1024 * 1024 cfg.LMDB.NoSync = true + cfg.Replication.WaitSyncTime = 1 + cfg.Replication.Compression = true + cfg.Replication.WaitMaxSlaveAcks = 2 + return cfg } @@ -125,17 +123,3 @@ func (cfg *LevelDBConfig) Adjust() { cfg.MaxOpenFiles = 1024 } } - -func (cfg *BinLogConfig) Adjust() { - if cfg.MaxFileSize <= 0 { - cfg.MaxFileSize = DefaultBinLogFileSize - } else if cfg.MaxFileSize > MaxBinLogFileSize { - cfg.MaxFileSize = MaxBinLogFileSize - } - - if cfg.MaxFileNum <= 0 { - cfg.MaxFileNum = DefaultBinLogFileNum - } else if cfg.MaxFileNum > MaxBinLogFileNum { - cfg.MaxFileNum = MaxBinLogFileNum - } -} diff --git a/config/config.toml b/config/config.toml index 2a3a246..b8d80ec 100644 --- a/config/config.toml +++ b/config/config.toml @@ -13,6 +13,7 @@ data_dir = "/tmp/ledis_server" access_log = "" # Set slaveof to enable replication from master, empty, no replication +# Any write operations except flushall and replication will be disabled in slave mode. slaveof = "" # Choose which backend storage to use, now support: @@ -27,6 +28,12 @@ slaveof = "" # db_name = "leveldb" +# If not set, use data_dir/"db_name"_data +db_path = "" + +# enable replication or not +use_replication = true + [leveldb] compression = false block_size = 32768 @@ -38,8 +45,25 @@ max_open_files = 1024 map_size = 524288000 nosync = true -[binlog] -max_file_size = 0 -max_file_num = 0 +[replication] +# Path to store replication information(write ahead log, commit log, etc.) +# if not set, use data_dir/rpl +path = "" +# Expire write ahead logs after the given days +expired_log_days = 7 +# If sync is true, the new log must be sent to some slaves, and then commit. +# It will reduce performance but have better high availability. +sync = true + +# If sync is true, wait at last wait_sync_time seconds for slave syncing this log +wait_sync_time = 1 + +# If sync is true, wait at most min(wait_max_slave_acks, (n + 1) / 2) to promise syncing ok. +# n is slave number +# If 0, wait (n + 1) / 2 acks. +wait_max_slave_acks = 2 + +# Compress the log or not +compression = true diff --git a/config/config_test.go b/config/config_test.go index 218ba0f..47779aa 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -1,31 +1,13 @@ package config import ( - "reflect" "testing" ) func TestConfig(t *testing.T) { - dstCfg := new(Config) - dstCfg.Addr = "127.0.0.1:6380" - dstCfg.HttpAddr = "127.0.0.1:11181" - dstCfg.DataDir = "/tmp/ledis_server" - dstCfg.DBName = "leveldb" - - dstCfg.LevelDB.Compression = false - dstCfg.LevelDB.BlockSize = 32768 - dstCfg.LevelDB.WriteBufferSize = 67108864 - dstCfg.LevelDB.CacheSize = 524288000 - dstCfg.LevelDB.MaxOpenFiles = 1024 - dstCfg.LMDB.MapSize = 524288000 - dstCfg.LMDB.NoSync = true - - cfg, err := NewConfigWithFile("./config.toml") + _, err := NewConfigWithFile("./ledis.toml") if err != nil { t.Fatal(err) } - if !reflect.DeepEqual(dstCfg, cfg) { - t.Fatal("parse toml error") - } } diff --git a/doc/Replication.md b/doc/Replication.md new file mode 100644 index 0000000..9e5b6f0 --- /dev/null +++ b/doc/Replication.md @@ -0,0 +1,76 @@ +At first, LedisDB uses BinLog (like MySQL BinLog) to support replication. Slave syncs logs from Master with specified BinLog filename and position. It is simple but not suitable for some cases. + +Let's assume below scenario: A -> B and A -> C, here A is master, B and C are slaves. -> means "replicates to". If master A failed, we must select B or C as the new master. Usually, we must choose the one which has most up to date from A, but it is not easy to check which one is it. + +MySQL has the same problem for this, so from MySQL 5.6, it introduces GTID (Global Transaction ID) to solve it. GTID is very powerful but a little complex, I just want to a simple and easy solution. + +Before GTID, Google has supplied a solution called [Global Transaction IDs](https://code.google.com/p/google-mysql-tools/wiki/GlobalTransactionIds) which uses a monotonically increasing group id to represent an unique transaction event in BinLog. Although it has some limitations for MySQL hierarchical replication, I still think using a integer id like group id for log event is simple and suitable for LedisDB. + +Another implementation influencing me is [Raft](http://raftconsensus.github.io/), a consensus algorithm based on the replicated log. Leader must ensure that some followers receive the replicated log before executing the commands in log. The log has an unique log id (like group id above), if the leader failed, the candidate which has the up to date log (checked by log id) will be elected a new leader. + +Refer above, I supply a simple solution for LedisDB's replication. + +## Keyword + ++ LogID: a monotonically increasing integer for a log ++ FirstLogID: the oldest log id for a server, all the logs before this id have been purged. ++ LastLogID: the newest log id for a server. ++ CommitID: the last log committed to execute. If LastLogID is 10 and CommitID is 5, server needs to commit logs from 6 - 10 to catch the up to date status. + +## Sync Flow + +For a master, every write changes will be handled below: + +1. Logs the changes to disk, it will calculate a new LogID based on LastLogID. +2. Sends this log to slaves and waits the ACK from slaves or timeout. +3. Commits to execute the changes. +4. Updates the CommitID to the LogID. + +For a slave: + +1. Connects to master and tells it which log to sync by LogID, it may have below cases: + + + The LogID is less than master's FirstLogID, master will tell slave log has been purged, the slave must do a full sync from master first. + + The master has this log and will send it to slave. + + The master has not this log (The slave has up to date with master). Slave will wait for some time or timeout then to start a new sync. + +2. After slave receiving a log (eg. LogID 10), it will save this log to disk and notice the replication thread to handle it. +3. Slave will start a new sync with LogID 11. + + +## Full Sync Flow + +If slave syncs a log but master has purged it, slave has to start a full sync. + ++ Master generates a snapshot with current LastLogID and dumps to a file. ++ Slave discards all old data and replicated logs, then loads the dump file and updates CommitID with LastLogID in dump file. ++ Slave starts to sync with LogID = CommitID + 1. + +## ReadOnly + +Slave is always read only, which means that any write operations will be denied except `FlushAll` and replication. + +For a master, if it first writes log OK but commits or updates CommitID error, it will also turn into read only mode until replication thread executes this log correctly. + +## Strong Consensus Replication + +For the sync flow, we see that master will wait some slaves to return an ACK telling it has received the log, this mechanism implements strong consensus replication. If master failed, we can choose a slave which has up to date data with the master. + +You must notice that this feature has a big influence on the performance. Use your own risk! + +## Use + +Using replication is very simple for LedisDB, only using `slaveof` command. + ++ Use `slaveof host port` to enable replication from master at "host:port". ++ Use `slaveof no one` to stop replication and change the slave to master. + +If a slave first syncs from a master A, then uses `slaveof` to sync from master B, it will sync with the LogID = LastLogID + 1. If you want to start over from B, you must use `slaveof host port restart` which will start a full sync first. + +## Limitation + ++ Multi-Master is not supported. ++ Replication can not store log less than current LastLogID. ++ Circular replication is not supported. ++ Master and slave must set `use_replication` to true to support replication. + diff --git a/doc/commands.json b/doc/commands.json index 2ef0bbb..b813d3b 100644 --- a/doc/commands.json +++ b/doc/commands.json @@ -301,12 +301,12 @@ "readonly": false }, "SLAVEOF": { - "arguments": "host port", + "arguments": "host port [restart]", "group": "Replication", "readonly": false }, "SYNC": { - "arguments": "index offset", + "arguments": "logid", "group": "Replication", "readonly": false }, @@ -511,7 +511,25 @@ "group": "ZSet", "readonly": false }, - + + "ZRANGEBYLEX":{ + "arguments": "key min max [LIMIT offset count]", + "group": "ZSet", + "readonly": true + }, + + "ZREMRANGBYLEX":{ + "arguments": "key min max", + "group": "ZSet", + "readonly": false + }, + + "ZLEXCOUNT":{ + "arguments": "key min max", + "group": "ZSet", + "readonly": true + }, + "BEGIN": { "arguments": "-", "group": "Transaction", diff --git a/doc/commands.md b/doc/commands.md index 183ab81..2cae8fd 100644 --- a/doc/commands.md +++ b/doc/commands.md @@ -106,6 +106,9 @@ Table of Contents - [ZINTERSTORE destination numkeys key [key ...] [WEIGHTS weight [weight ...]] [AGGREGATE SUM|MIN|MAX] ](#zinterstore-destination-numkeys-key-key--weights-weight-weight--aggregate-summinmax) - [ZXSCAN key [MATCH match] [COUNT count]](#zxscan-key-match-match-count-count) + - [ZRANGEBYLEX key min max [LIMIT offset count]](#zrangebylex-key-min-max-limit-offset-count) + - [ZREMRANGEBYLEX key min max](#zremrangebylex-key-min-max) + - [ZLEXCOUNT key min max](#zlexcount-key-min-max) - [Bitmap](#bitmap) - [BGET key](#bget-key) - [BGETBIT key offset](#bgetbit-key-offset) @@ -119,9 +122,9 @@ Table of Contents - [BPERSIST key](#bpersist-key) - [BXSCAN key [MATCH match] [COUNT count]](#bxscan-key-match-match-count-count) - [Replication](#replication) - - [SLAVEOF host port](#slaveof-host-port) + - [SLAVEOF host port [restart]](#slaveof-host-port-restart) - [FULLSYNC](#fullsync) - - [SYNC index offset](#sync-index-offset) + - [SYNC logid](#sync-logid) - [Server](#server) - [PING](#ping) - [ECHO message](#echo-message) @@ -2227,10 +2230,75 @@ Iterate ZSet keys incrementally. See [XSCAN](#xscan-key-match-match-count-count) for more information. +### ZRANGEBYLEX key min max [LIMIT offset count] + +When all the elements in a sorted set are inserted with the same score, in order to force lexicographical ordering, this command returns all the elements in the sorted set at key with a value between min and max. + +If the elements in the sorted set have different scores, the returned elements are unspecified. + +Valid start and stop must start with ( or [, in order to specify if the range item is respectively exclusive or inclusive. The special values of + or - for start and stop have the special meaning or positively infinite and negatively infinite strings, so for instance the command ZRANGEBYLEX myzset - + is guaranteed to return all the elements in the sorted set, if all the elements have the same score. + +**Return value** + +array: list of elements in the specified score range + +**Example** + +``` +ledis> ZADD myzset 0 a 0 b 0 c 0 d 0 e 0 f 0 g +(integer) 7 +ledis> ZRANGEBYLEX myzset - [c +1) "a" +2) "b" +3) "c" +ledis> ZRANGEBYLEX myzset - (c +1) "a" +2) "b" +ledis> ZRANGEBYLEX myzset [aaa (g +1) "b" +2) "c" +3) "d" +4) "e" +5) "f" +``` + +### ZREMRANGEBYLEX key min max + +Removes all elements in the sorted set stored at key between the lexicographical range specified by min and max. + +**Return value** + +int64: he number of elements removed. + +**Example** + +``` +ledis> ZADD myzset 0 a 0 b 0 c 0 d 0 e 0 f 0 g +(integer) 7 +ledis> ZREMRANGEBYLEX myzset - [c +(integer) 3 +``` + +### ZLEXCOUNT key min max + +Returns the number of elements in the sorted set at key with a value between min and max. + +**Return value** + +int64: the number of elements in the specified score range. + +**Example** + +``` +ledis> ZADD myzset 0 a 0 b 0 c 0 d 0 e 0 f 0 g +(integer) 7 +ledis> ZLEXCOUNT myzset - [c +(integer) 3 +``` + ## Bitmap - ### BGET key Returns the whole binary data stored at `key`. @@ -2396,13 +2464,13 @@ See [XSCAN](#xscan-key-match-match-count-count) for more information. ## Replication -### SLAVEOF host port +### SLAVEOF host port [restart] Changes the replication settings of a slave on the fly. If the server is already acting as slave, SLAVEOF NO ONE will turn off the replication. SLAVEOF host port will make the server a slave of another server listening at the specified host and port. -If a server is already a slave of a master, SLAVEOF host port will stop the replication against the old and start the synchronization against the new one, discarding the old dataset. +If a server is already a slave of a master, SLAVEOF host port will stop the replication against the old and start the synchronization against the new one, if restart is set, it will discard the old dataset, otherwise it will sync with LastLogID + 1. ### FULLSYNC @@ -2416,9 +2484,9 @@ FULLSYNC will first try to sync all data from the master, save in local disk, th **Examples** -### SYNC index offset +### SYNC logid -Inner command, syncs the new changed from master set by SLAVEOF at offset in binlog.index file. +Inner command, syncs the new changed from master set by SLAVEOF with logid. **Return value** @@ -2478,7 +2546,7 @@ ERR invalid db index 16 ### FLUSHALL -Delete all the keys of all the existing databases, not just the currently selected one. This command never fails. +Delete all the keys of all the existing databases and replication logs, not just the currently selected one. This command never fails. Very dangerous to use!!! diff --git a/etc/ledis.conf b/etc/ledis.conf index d3adbd8..b8d80ec 100644 --- a/etc/ledis.conf +++ b/etc/ledis.conf @@ -1,7 +1,5 @@ # LedisDB configuration -# Config format is toml, https://github.com/toml-lang/toml - # Server listen address addr = "127.0.0.1:6380" @@ -15,6 +13,7 @@ data_dir = "/tmp/ledis_server" access_log = "" # Set slaveof to enable replication from master, empty, no replication +# Any write operations except flushall and replication will be disabled in slave mode. slaveof = "" # Choose which backend storage to use, now support: @@ -29,6 +28,12 @@ slaveof = "" # db_name = "leveldb" +# If not set, use data_dir/"db_name"_data +db_path = "" + +# enable replication or not +use_replication = true + [leveldb] compression = false block_size = 32768 @@ -40,9 +45,25 @@ max_open_files = 1024 map_size = 524288000 nosync = true -[binlog] -# Set either size or num to 0 to disable binlog -max_file_size = 0 -max_file_num = 0 +[replication] +# Path to store replication information(write ahead log, commit log, etc.) +# if not set, use data_dir/rpl +path = "" +# Expire write ahead logs after the given days +expired_log_days = 7 +# If sync is true, the new log must be sent to some slaves, and then commit. +# It will reduce performance but have better high availability. +sync = true + +# If sync is true, wait at last wait_sync_time seconds for slave syncing this log +wait_sync_time = 1 + +# If sync is true, wait at most min(wait_max_slave_acks, (n + 1) / 2) to promise syncing ok. +# n is slave number +# If 0, wait (n + 1) / 2 acks. +wait_max_slave_acks = 2 + +# Compress the log or not +compression = true diff --git a/ledis/batch.go b/ledis/batch.go index b23cc47..61d5cd2 100644 --- a/ledis/batch.go +++ b/ledis/batch.go @@ -1,6 +1,8 @@ package ledis import ( + "github.com/siddontang/go/log" + "github.com/siddontang/ledisdb/rpl" "github.com/siddontang/ledisdb/store" "sync" ) @@ -12,29 +14,24 @@ type batch struct { sync.Locker - logs [][]byte - tx *Tx + + eb *eventBatch } func (b *batch) Commit() error { - b.l.commitLock.Lock() - defer b.l.commitLock.Unlock() - - err := b.WriteBatch.Commit() - - if b.l.binlog != nil { - if err == nil { - if b.tx == nil { - b.l.binlog.Log(b.logs...) - } else { - b.tx.logs = append(b.tx.logs, b.logs...) - } - } - b.logs = [][]byte{} + if b.l.IsReadOnly() { + return ErrWriteInROnly } - return err + if b.tx == nil { + return b.l.handleCommit(b.eb, b.WriteBatch) + } else { + if b.l.r != nil { + b.tx.eb.Write(b.eb.Bytes()) + } + return b.WriteBatch.Commit() + } } func (b *batch) Lock() { @@ -42,26 +39,25 @@ func (b *batch) Lock() { } func (b *batch) Unlock() { - if b.l.binlog != nil { - b.logs = [][]byte{} - } + b.eb.Reset() + b.WriteBatch.Rollback() b.Locker.Unlock() } func (b *batch) Put(key []byte, value []byte) { - if b.l.binlog != nil { - buf := encodeBinLogPut(key, value) - b.logs = append(b.logs, buf) + if b.l.r != nil { + b.eb.Put(key, value) } + b.WriteBatch.Put(key, value) } func (b *batch) Delete(key []byte) { - if b.l.binlog != nil { - buf := encodeBinLogDelete(key) - b.logs = append(b.logs, buf) + if b.l.r != nil { + b.eb.Delete(key) } + b.WriteBatch.Delete(key) } @@ -97,9 +93,46 @@ func (l *Ledis) newBatch(wb store.WriteBatch, locker sync.Locker, tx *Tx) *batch b.l = l b.WriteBatch = wb - b.tx = tx b.Locker = locker - b.logs = [][]byte{} + b.tx = tx + b.eb = new(eventBatch) + return b } + +type commiter interface { + Commit() error +} + +func (l *Ledis) handleCommit(eb *eventBatch, c commiter) error { + l.commitLock.Lock() + defer l.commitLock.Unlock() + + var err error + if l.r != nil { + var rl *rpl.Log + if rl, err = l.r.Log(eb.Bytes()); err != nil { + log.Fatal("write wal error %s", err.Error()) + return err + } + + l.propagate(rl) + + if err = c.Commit(); err != nil { + log.Fatal("commit error %s", err.Error()) + l.noticeReplication() + return err + } + + if err = l.r.UpdateCommitID(rl.ID); err != nil { + log.Fatal("update commit id error %s", err.Error()) + l.noticeReplication() + return err + } + + return nil + } else { + return c.Commit() + } +} diff --git a/ledis/binlog.go b/ledis/binlog.go deleted file mode 100644 index 0d9c251..0000000 --- a/ledis/binlog.go +++ /dev/null @@ -1,400 +0,0 @@ -package ledis - -import ( - "bufio" - "encoding/binary" - "fmt" - "github.com/siddontang/go-log/log" - "github.com/siddontang/ledisdb/config" - "io" - "io/ioutil" - "os" - "path" - "strconv" - "strings" - "sync" - "time" -) - -type BinLogHead struct { - CreateTime uint32 - BatchId uint32 - PayloadLen uint32 -} - -func (h *BinLogHead) Len() int { - return 12 -} - -func (h *BinLogHead) Write(w io.Writer) error { - if err := binary.Write(w, binary.BigEndian, h.CreateTime); err != nil { - return err - } - - if err := binary.Write(w, binary.BigEndian, h.BatchId); err != nil { - return err - } - - if err := binary.Write(w, binary.BigEndian, h.PayloadLen); err != nil { - return err - } - - return nil -} - -func (h *BinLogHead) handleReadError(err error) error { - if err == io.EOF { - return io.ErrUnexpectedEOF - } else { - return err - } -} - -func (h *BinLogHead) Read(r io.Reader) error { - var err error - if err = binary.Read(r, binary.BigEndian, &h.CreateTime); err != nil { - return err - } - - if err = binary.Read(r, binary.BigEndian, &h.BatchId); err != nil { - return h.handleReadError(err) - } - - if err = binary.Read(r, binary.BigEndian, &h.PayloadLen); err != nil { - return h.handleReadError(err) - } - - return nil -} - -func (h *BinLogHead) InSameBatch(ho *BinLogHead) bool { - if h.CreateTime == ho.CreateTime && h.BatchId == ho.BatchId { - return true - } else { - return false - } -} - -/* -index file format: -ledis-bin.00001 -ledis-bin.00002 -ledis-bin.00003 - -log file format - -Log: Head|PayloadData - -Head: createTime|batchId|payloadData - -*/ - -type BinLog struct { - sync.Mutex - - path string - - cfg *config.BinLogConfig - - logFile *os.File - - logWb *bufio.Writer - - indexName string - logNames []string - lastLogIndex int64 - - batchId uint32 - - ch chan struct{} -} - -func NewBinLog(cfg *config.Config) (*BinLog, error) { - l := new(BinLog) - - l.cfg = &cfg.BinLog - l.cfg.Adjust() - - l.path = path.Join(cfg.DataDir, "binlog") - - if err := os.MkdirAll(l.path, 0755); err != nil { - return nil, err - } - - l.logNames = make([]string, 0, 16) - - l.ch = make(chan struct{}) - - if err := l.loadIndex(); err != nil { - return nil, err - } - - return l, nil -} - -func (l *BinLog) flushIndex() error { - data := strings.Join(l.logNames, "\n") - - bakName := fmt.Sprintf("%s.bak", l.indexName) - f, err := os.OpenFile(bakName, os.O_WRONLY|os.O_CREATE, 0644) - if err != nil { - log.Error("create binlog bak index error %s", err.Error()) - return err - } - - if _, err := f.WriteString(data); err != nil { - log.Error("write binlog index error %s", err.Error()) - f.Close() - return err - } - - f.Close() - - if err := os.Rename(bakName, l.indexName); err != nil { - log.Error("rename binlog bak index error %s", err.Error()) - return err - } - - return nil -} - -func (l *BinLog) loadIndex() error { - l.indexName = path.Join(l.path, fmt.Sprintf("ledis-bin.index")) - if _, err := os.Stat(l.indexName); os.IsNotExist(err) { - //no index file, nothing to do - } else { - indexData, err := ioutil.ReadFile(l.indexName) - if err != nil { - return err - } - - lines := strings.Split(string(indexData), "\n") - for _, line := range lines { - line = strings.Trim(line, "\r\n ") - if len(line) == 0 { - continue - } - - if _, err := os.Stat(path.Join(l.path, line)); err != nil { - log.Error("load index line %s error %s", line, err.Error()) - return err - } else { - l.logNames = append(l.logNames, line) - } - } - } - if l.cfg.MaxFileNum > 0 && len(l.logNames) > l.cfg.MaxFileNum { - //remove oldest logfile - if err := l.Purge(len(l.logNames) - l.cfg.MaxFileNum); err != nil { - return err - } - } - - var err error - if len(l.logNames) == 0 { - l.lastLogIndex = 1 - } else { - lastName := l.logNames[len(l.logNames)-1] - - if l.lastLogIndex, err = strconv.ParseInt(path.Ext(lastName)[1:], 10, 64); err != nil { - log.Error("invalid logfile name %s", err.Error()) - return err - } - - //like mysql, if server restart, a new binlog will create - l.lastLogIndex++ - } - - return nil -} - -func (l *BinLog) getLogFile() string { - return l.FormatLogFileName(l.lastLogIndex) -} - -func (l *BinLog) openNewLogFile() error { - var err error - lastName := l.getLogFile() - - logPath := path.Join(l.path, lastName) - if l.logFile, err = os.OpenFile(logPath, os.O_CREATE|os.O_WRONLY, 0644); err != nil { - log.Error("open new logfile error %s", err.Error()) - return err - } - - if l.cfg.MaxFileNum > 0 && len(l.logNames) == l.cfg.MaxFileNum { - l.purge(1) - } - - l.logNames = append(l.logNames, lastName) - - if l.logWb == nil { - l.logWb = bufio.NewWriterSize(l.logFile, 1024) - } else { - l.logWb.Reset(l.logFile) - } - - if err = l.flushIndex(); err != nil { - return err - } - - return nil -} - -func (l *BinLog) checkLogFileSize() bool { - if l.logFile == nil { - return false - } - - st, _ := l.logFile.Stat() - if st.Size() >= int64(l.cfg.MaxFileSize) { - l.closeLog() - return true - } - - return false -} - -func (l *BinLog) closeLog() { - if l.logFile == nil { - return - } - - l.lastLogIndex++ - - l.logFile.Close() - l.logFile = nil -} - -func (l *BinLog) purge(n int) { - if len(l.logNames) < n { - n = len(l.logNames) - } - for i := 0; i < n; i++ { - logPath := path.Join(l.path, l.logNames[i]) - os.Remove(logPath) - } - - copy(l.logNames[0:], l.logNames[n:]) - l.logNames = l.logNames[0 : len(l.logNames)-n] -} - -func (l *BinLog) Close() { - if l.logFile != nil { - l.logFile.Close() - l.logFile = nil - } -} - -func (l *BinLog) LogNames() []string { - return l.logNames -} - -func (l *BinLog) LogFileName() string { - return l.getLogFile() -} - -func (l *BinLog) LogFilePos() int64 { - if l.logFile == nil { - return 0 - } else { - st, _ := l.logFile.Stat() - return st.Size() - } -} - -func (l *BinLog) LogFileIndex() int64 { - return l.lastLogIndex -} - -func (l *BinLog) FormatLogFileName(index int64) string { - return fmt.Sprintf("ledis-bin.%07d", index) -} - -func (l *BinLog) FormatLogFilePath(index int64) string { - return path.Join(l.path, l.FormatLogFileName(index)) -} - -func (l *BinLog) LogPath() string { - return l.path -} - -func (l *BinLog) Purge(n int) error { - l.Lock() - defer l.Unlock() - - if len(l.logNames) == 0 { - return nil - } - - if n >= len(l.logNames) { - n = len(l.logNames) - //can not purge current log file - if l.logNames[n-1] == l.getLogFile() { - n = n - 1 - } - } - - l.purge(n) - - return l.flushIndex() -} - -func (l *BinLog) PurgeAll() error { - l.Lock() - defer l.Unlock() - - l.closeLog() - - l.purge(len(l.logNames)) - - return l.openNewLogFile() -} - -func (l *BinLog) Log(args ...[]byte) error { - l.Lock() - defer l.Unlock() - - var err error - - if l.logFile == nil { - if err = l.openNewLogFile(); err != nil { - return err - } - } - - head := &BinLogHead{} - - head.CreateTime = uint32(time.Now().Unix()) - head.BatchId = l.batchId - - l.batchId++ - - for _, data := range args { - head.PayloadLen = uint32(len(data)) - - if err := head.Write(l.logWb); err != nil { - return err - } - - if _, err := l.logWb.Write(data); err != nil { - return err - } - } - - if err = l.logWb.Flush(); err != nil { - log.Error("write log error %s", err.Error()) - return err - } - - l.checkLogFileSize() - - close(l.ch) - l.ch = make(chan struct{}) - - return nil -} - -func (l *BinLog) Wait() <-chan struct{} { - return l.ch -} diff --git a/ledis/binlog_test.go b/ledis/binlog_test.go deleted file mode 100644 index ea62bd9..0000000 --- a/ledis/binlog_test.go +++ /dev/null @@ -1,49 +0,0 @@ -package ledis - -import ( - "github.com/siddontang/ledisdb/config" - "io/ioutil" - "os" - "testing" -) - -func TestBinLog(t *testing.T) { - cfg := new(config.Config) - - cfg.BinLog.MaxFileNum = 1 - cfg.BinLog.MaxFileSize = 1024 - cfg.DataDir = "/tmp/ledis_binlog" - - os.RemoveAll(cfg.DataDir) - - b, err := NewBinLog(cfg) - if err != nil { - t.Fatal(err) - } - - if err := b.Log(make([]byte, 1024)); err != nil { - t.Fatal(err) - } - - if err := b.Log(make([]byte, 1024)); err != nil { - t.Fatal(err) - } - - if fs, err := ioutil.ReadDir(b.LogPath()); err != nil { - t.Fatal(err) - } else if len(fs) != 2 { - t.Fatal(len(fs)) - } - - if err := b.PurgeAll(); err != nil { - t.Fatal(err) - } - - if fs, err := ioutil.ReadDir(b.LogPath()); err != nil { - t.Fatal(err) - } else if len(fs) != 2 { - t.Fatal(len(fs)) - } else if b.LogFilePos() != 0 { - t.Fatal(b.LogFilePos()) - } -} diff --git a/ledis/binlog_util.go b/ledis/binlog_util.go deleted file mode 100644 index da058bd..0000000 --- a/ledis/binlog_util.go +++ /dev/null @@ -1,215 +0,0 @@ -package ledis - -import ( - "encoding/binary" - "errors" - "fmt" - "strconv" -) - -var ( - errBinLogDeleteType = errors.New("invalid bin log delete type") - errBinLogPutType = errors.New("invalid bin log put type") - errBinLogCommandType = errors.New("invalid bin log command type") -) - -func encodeBinLogDelete(key []byte) []byte { - buf := make([]byte, 1+len(key)) - buf[0] = BinLogTypeDeletion - copy(buf[1:], key) - return buf -} - -func decodeBinLogDelete(sz []byte) ([]byte, error) { - if len(sz) < 1 || sz[0] != BinLogTypeDeletion { - return nil, errBinLogDeleteType - } - - return sz[1:], nil -} - -func encodeBinLogPut(key []byte, value []byte) []byte { - buf := make([]byte, 3+len(key)+len(value)) - buf[0] = BinLogTypePut - pos := 1 - binary.BigEndian.PutUint16(buf[pos:], uint16(len(key))) - pos += 2 - copy(buf[pos:], key) - pos += len(key) - copy(buf[pos:], value) - - return buf -} - -func decodeBinLogPut(sz []byte) ([]byte, []byte, error) { - if len(sz) < 3 || sz[0] != BinLogTypePut { - return nil, nil, errBinLogPutType - } - - keyLen := int(binary.BigEndian.Uint16(sz[1:])) - if 3+keyLen > len(sz) { - return nil, nil, errBinLogPutType - } - - return sz[3 : 3+keyLen], sz[3+keyLen:], nil -} - -func FormatBinLogEvent(event []byte) (string, error) { - logType := uint8(event[0]) - - var err error - var k []byte - var v []byte - - var buf []byte = make([]byte, 0, 1024) - - switch logType { - case BinLogTypePut: - k, v, err = decodeBinLogPut(event) - buf = append(buf, "PUT "...) - case BinLogTypeDeletion: - k, err = decodeBinLogDelete(event) - buf = append(buf, "DELETE "...) - default: - err = errInvalidBinLogEvent - } - - if err != nil { - return "", err - } - - if buf, err = formatDataKey(buf, k); err != nil { - return "", err - } - - if v != nil && len(v) != 0 { - buf = append(buf, fmt.Sprintf(" %q", v)...) - } - - return String(buf), nil -} - -func formatDataKey(buf []byte, k []byte) ([]byte, error) { - if len(k) < 2 { - return nil, errInvalidBinLogEvent - } - - buf = append(buf, fmt.Sprintf("DB:%2d ", k[0])...) - buf = append(buf, fmt.Sprintf("%s ", TypeName[k[1]])...) - - db := new(DB) - db.index = k[0] - - //to do format at respective place - - switch k[1] { - case KVType: - if key, err := db.decodeKVKey(k); err != nil { - return nil, err - } else { - buf = strconv.AppendQuote(buf, String(key)) - } - case HashType: - if key, field, err := db.hDecodeHashKey(k); err != nil { - return nil, err - } else { - buf = strconv.AppendQuote(buf, String(key)) - buf = append(buf, ' ') - buf = strconv.AppendQuote(buf, String(field)) - } - case HSizeType: - if key, err := db.hDecodeSizeKey(k); err != nil { - return nil, err - } else { - buf = strconv.AppendQuote(buf, String(key)) - } - case ListType: - if key, seq, err := db.lDecodeListKey(k); err != nil { - return nil, err - } else { - buf = strconv.AppendQuote(buf, String(key)) - buf = append(buf, ' ') - buf = strconv.AppendInt(buf, int64(seq), 10) - } - case LMetaType: - if key, err := db.lDecodeMetaKey(k); err != nil { - return nil, err - } else { - buf = strconv.AppendQuote(buf, String(key)) - } - case ZSetType: - if key, m, err := db.zDecodeSetKey(k); err != nil { - return nil, err - } else { - buf = strconv.AppendQuote(buf, String(key)) - buf = append(buf, ' ') - buf = strconv.AppendQuote(buf, String(m)) - } - case ZSizeType: - if key, err := db.zDecodeSizeKey(k); err != nil { - return nil, err - } else { - buf = strconv.AppendQuote(buf, String(key)) - } - case ZScoreType: - if key, m, score, err := db.zDecodeScoreKey(k); err != nil { - return nil, err - } else { - buf = strconv.AppendQuote(buf, String(key)) - buf = append(buf, ' ') - buf = strconv.AppendQuote(buf, String(m)) - buf = append(buf, ' ') - buf = strconv.AppendInt(buf, score, 10) - } - case BitType: - if key, seq, err := db.bDecodeBinKey(k); err != nil { - return nil, err - } else { - buf = strconv.AppendQuote(buf, String(key)) - buf = append(buf, ' ') - buf = strconv.AppendUint(buf, uint64(seq), 10) - } - case BitMetaType: - if key, err := db.bDecodeMetaKey(k); err != nil { - return nil, err - } else { - buf = strconv.AppendQuote(buf, String(key)) - } - case SetType: - if key, member, err := db.sDecodeSetKey(k); err != nil { - return nil, err - } else { - buf = strconv.AppendQuote(buf, String(key)) - buf = append(buf, ' ') - buf = strconv.AppendQuote(buf, String(member)) - } - case SSizeType: - if key, err := db.sDecodeSizeKey(k); err != nil { - return nil, err - } else { - buf = strconv.AppendQuote(buf, String(key)) - } - case ExpTimeType: - if tp, key, t, err := db.expDecodeTimeKey(k); err != nil { - return nil, err - } else { - buf = append(buf, TypeName[tp]...) - buf = append(buf, ' ') - buf = strconv.AppendQuote(buf, String(key)) - buf = append(buf, ' ') - buf = strconv.AppendInt(buf, t, 10) - } - case ExpMetaType: - if tp, key, err := db.expDecodeMetaKey(k); err != nil { - return nil, err - } else { - buf = append(buf, TypeName[tp]...) - buf = append(buf, ' ') - buf = strconv.AppendQuote(buf, String(key)) - } - default: - return nil, errInvalidBinLogEvent - } - - return buf, nil -} diff --git a/ledis/const.go b/ledis/const.go index e889f4e..3b30123 100644 --- a/ledis/const.go +++ b/ledis/const.go @@ -23,6 +23,8 @@ const ( ExpTimeType byte = 101 ExpMetaType byte = 102 + + MetaType byte = 201 ) var ( @@ -44,6 +46,11 @@ var ( } ) +const ( + RDWRMode = 0 + ROnlyMode = 1 +) + const ( defaultScanCount int = 10 ) @@ -78,13 +85,10 @@ const ( ) var ( - ErrScoreMiss = errors.New("zset score miss") -) - -const ( - BinLogTypeDeletion uint8 = 0x0 - BinLogTypePut uint8 = 0x1 - BinLogTypeCommand uint8 = 0x2 + ErrScoreMiss = errors.New("zset score miss") + ErrWriteInROnly = errors.New("write not support in readonly mode") + ErrRplInRDWR = errors.New("replication not support in read write mode") + ErrRplNotSupport = errors.New("replication not support") ) const ( diff --git a/ledis/doc.go b/ledis/doc.go index 6b04c52..c6bfe78 100644 --- a/ledis/doc.go +++ b/ledis/doc.go @@ -2,7 +2,7 @@ // // Ledis supports various data structure like kv, list, hash and zset like redis. // -// Other features include binlog replication, data with a limited time-to-live. +// Other features include replication, data with a limited time-to-live. // // Usage // @@ -54,8 +54,5 @@ // n, err := db.ZAdd(key, ScorePair{score1, member1}, ScorePair{score2, member2}) // ay, err := db.ZRangeByScore(key, minScore, maxScore, 0, -1) // -// Binlog -// -// ledis supports binlog, so you can sync binlog to another server for replication. If you want to open binlog support, set UseBinLog to true in config. // package ledis diff --git a/ledis/dump.go b/ledis/dump.go index f162481..436e707 100644 --- a/ledis/dump.go +++ b/ledis/dump.go @@ -4,41 +4,26 @@ import ( "bufio" "bytes" "encoding/binary" - "github.com/siddontang/go-snappy/snappy" + "github.com/siddontang/go/snappy" + "github.com/siddontang/ledisdb/store" "io" "os" ) -//dump format -// fileIndex(bigendian int64)|filePos(bigendian int64) -// |keylen(bigendian int32)|key|valuelen(bigendian int32)|value...... -// -//key and value are both compressed for fast transfer dump on network using snappy - -type BinLogAnchor struct { - LogFileIndex int64 - LogPos int64 +type DumpHead struct { + CommitID uint64 } -func (m *BinLogAnchor) WriteTo(w io.Writer) error { - if err := binary.Write(w, binary.BigEndian, m.LogFileIndex); err != nil { +func (h *DumpHead) Read(r io.Reader) error { + if err := binary.Read(r, binary.BigEndian, &h.CommitID); err != nil { return err } - if err := binary.Write(w, binary.BigEndian, m.LogPos); err != nil { - return err - } return nil } -func (m *BinLogAnchor) ReadFrom(r io.Reader) error { - err := binary.Read(r, binary.BigEndian, &m.LogFileIndex) - if err != nil { - return err - } - - err = binary.Read(r, binary.BigEndian, &m.LogPos) - if err != nil { +func (h *DumpHead) Write(w io.Writer) error { + if err := binary.Write(w, binary.BigEndian, h.CommitID); err != nil { return err } @@ -56,24 +41,35 @@ func (l *Ledis) DumpFile(path string) error { } func (l *Ledis) Dump(w io.Writer) error { - m := new(BinLogAnchor) - var err error - l.wLock.Lock() - defer l.wLock.Unlock() + var commitID uint64 + var snap *store.Snapshot - if l.binlog != nil { - m.LogFileIndex = l.binlog.LogFileIndex() - m.LogPos = l.binlog.LogFilePos() + { + l.wLock.Lock() + defer l.wLock.Unlock() + + if l.r != nil { + if commitID, err = l.r.LastCommitID(); err != nil { + return err + } + } + + if snap, err = l.ldb.NewSnapshot(); err != nil { + return err + } } wb := bufio.NewWriterSize(w, 4096) - if err = m.WriteTo(wb); err != nil { + + h := &DumpHead{commitID} + + if err = h.Write(wb); err != nil { return err } - it := l.ldb.NewIterator() + it := snap.NewIterator() it.SeekToFirst() compressBuf := make([]byte, 4096) @@ -118,7 +114,8 @@ func (l *Ledis) Dump(w io.Writer) error { return nil } -func (l *Ledis) LoadDumpFile(path string) (*BinLogAnchor, error) { +// clear all data and load dump file to db +func (l *Ledis) LoadDumpFile(path string) (*DumpHead, error) { f, err := os.Open(path) if err != nil { return nil, err @@ -128,16 +125,21 @@ func (l *Ledis) LoadDumpFile(path string) (*BinLogAnchor, error) { return l.LoadDump(f) } -func (l *Ledis) LoadDump(r io.Reader) (*BinLogAnchor, error) { +// clear all data and load dump file to db +func (l *Ledis) LoadDump(r io.Reader) (*DumpHead, error) { l.wLock.Lock() defer l.wLock.Unlock() - info := new(BinLogAnchor) + var err error + if err = l.flushAll(); err != nil { + return nil, err + } rb := bufio.NewReaderSize(r, 4096) - err := info.ReadFrom(rb) - if err != nil { + h := new(DumpHead) + + if err = h.Read(rb); err != nil { return nil, err } @@ -190,10 +192,11 @@ func (l *Ledis) LoadDump(r io.Reader) (*BinLogAnchor, error) { deKeyBuf = nil deValueBuf = nil - //if binlog enable, we will delete all binlogs and open a new one for handling simply - if l.binlog != nil { - l.binlog.PurgeAll() + if l.r != nil { + if err := l.r.UpdateCommitID(h.CommitID); err != nil { + return nil, err + } } - return info, nil + return h, nil } diff --git a/ledis/event.go b/ledis/event.go new file mode 100644 index 0000000..72ac373 --- /dev/null +++ b/ledis/event.go @@ -0,0 +1,223 @@ +package ledis + +import ( + "bytes" + "encoding/binary" + "errors" + "fmt" + "github.com/siddontang/go/hack" + "io" + "strconv" +) + +const ( + kTypeDeleteEvent uint8 = 0 + kTypePutEvent uint8 = 1 +) + +var ( + errInvalidPutEvent = errors.New("invalid put event") + errInvalidDeleteEvent = errors.New("invalid delete event") + errInvalidEvent = errors.New("invalid event") +) + +type eventBatch struct { + bytes.Buffer +} + +func (b *eventBatch) Put(key []byte, value []byte) { + l := uint32(len(key) + len(value) + 1 + 2) + binary.Write(b, binary.BigEndian, l) + b.WriteByte(kTypePutEvent) + keyLen := uint16(len(key)) + binary.Write(b, binary.BigEndian, keyLen) + b.Write(key) + b.Write(value) +} + +func (b *eventBatch) Delete(key []byte) { + l := uint32(len(key) + 1) + binary.Write(b, binary.BigEndian, l) + b.WriteByte(kTypeDeleteEvent) + b.Write(key) +} + +type eventWriter interface { + Put(key []byte, value []byte) + Delete(key []byte) +} + +func decodeEventBatch(w eventWriter, data []byte) error { + for { + if len(data) == 0 { + return nil + } + + if len(data) < 4 { + return io.ErrUnexpectedEOF + } + + l := binary.BigEndian.Uint32(data) + data = data[4:] + if uint32(len(data)) < l { + return io.ErrUnexpectedEOF + } + + if err := decodeEvent(w, data[0:l]); err != nil { + return err + } + data = data[l:] + } +} + +func decodeEvent(w eventWriter, b []byte) error { + if len(b) == 0 { + return errInvalidEvent + } + + switch b[0] { + case kTypePutEvent: + if len(b[1:]) < 2 { + return errInvalidPutEvent + } + + keyLen := binary.BigEndian.Uint16(b[1:3]) + b = b[3:] + if len(b) < int(keyLen) { + return errInvalidPutEvent + } + + w.Put(b[0:keyLen], b[keyLen:]) + case kTypeDeleteEvent: + w.Delete(b[1:]) + default: + return errInvalidEvent + } + + return nil +} + +func formatEventKey(buf []byte, k []byte) ([]byte, error) { + if len(k) < 2 { + return nil, errInvalidEvent + } + + buf = append(buf, fmt.Sprintf("DB:%2d ", k[0])...) + buf = append(buf, fmt.Sprintf("%s ", TypeName[k[1]])...) + + db := new(DB) + db.index = k[0] + + //to do format at respective place + + switch k[1] { + case KVType: + if key, err := db.decodeKVKey(k); err != nil { + return nil, err + } else { + buf = strconv.AppendQuote(buf, hack.String(key)) + } + case HashType: + if key, field, err := db.hDecodeHashKey(k); err != nil { + return nil, err + } else { + buf = strconv.AppendQuote(buf, hack.String(key)) + buf = append(buf, ' ') + buf = strconv.AppendQuote(buf, hack.String(field)) + } + case HSizeType: + if key, err := db.hDecodeSizeKey(k); err != nil { + return nil, err + } else { + buf = strconv.AppendQuote(buf, hack.String(key)) + } + case ListType: + if key, seq, err := db.lDecodeListKey(k); err != nil { + return nil, err + } else { + buf = strconv.AppendQuote(buf, hack.String(key)) + buf = append(buf, ' ') + buf = strconv.AppendInt(buf, int64(seq), 10) + } + case LMetaType: + if key, err := db.lDecodeMetaKey(k); err != nil { + return nil, err + } else { + buf = strconv.AppendQuote(buf, hack.String(key)) + } + case ZSetType: + if key, m, err := db.zDecodeSetKey(k); err != nil { + return nil, err + } else { + buf = strconv.AppendQuote(buf, hack.String(key)) + buf = append(buf, ' ') + buf = strconv.AppendQuote(buf, hack.String(m)) + } + case ZSizeType: + if key, err := db.zDecodeSizeKey(k); err != nil { + return nil, err + } else { + buf = strconv.AppendQuote(buf, hack.String(key)) + } + case ZScoreType: + if key, m, score, err := db.zDecodeScoreKey(k); err != nil { + return nil, err + } else { + buf = strconv.AppendQuote(buf, hack.String(key)) + buf = append(buf, ' ') + buf = strconv.AppendQuote(buf, hack.String(m)) + buf = append(buf, ' ') + buf = strconv.AppendInt(buf, score, 10) + } + case BitType: + if key, seq, err := db.bDecodeBinKey(k); err != nil { + return nil, err + } else { + buf = strconv.AppendQuote(buf, hack.String(key)) + buf = append(buf, ' ') + buf = strconv.AppendUint(buf, uint64(seq), 10) + } + case BitMetaType: + if key, err := db.bDecodeMetaKey(k); err != nil { + return nil, err + } else { + buf = strconv.AppendQuote(buf, hack.String(key)) + } + case SetType: + if key, member, err := db.sDecodeSetKey(k); err != nil { + return nil, err + } else { + buf = strconv.AppendQuote(buf, hack.String(key)) + buf = append(buf, ' ') + buf = strconv.AppendQuote(buf, hack.String(member)) + } + case SSizeType: + if key, err := db.sDecodeSizeKey(k); err != nil { + return nil, err + } else { + buf = strconv.AppendQuote(buf, hack.String(key)) + } + case ExpTimeType: + if tp, key, t, err := db.expDecodeTimeKey(k); err != nil { + return nil, err + } else { + buf = append(buf, TypeName[tp]...) + buf = append(buf, ' ') + buf = strconv.AppendQuote(buf, hack.String(key)) + buf = append(buf, ' ') + buf = strconv.AppendInt(buf, t, 10) + } + case ExpMetaType: + if tp, key, err := db.expDecodeMetaKey(k); err != nil { + return nil, err + } else { + buf = append(buf, TypeName[tp]...) + buf = append(buf, ' ') + buf = strconv.AppendQuote(buf, hack.String(key)) + } + default: + return nil, errInvalidEvent + } + + return buf, nil +} diff --git a/ledis/event_test.go b/ledis/event_test.go new file mode 100644 index 0000000..d2271e2 --- /dev/null +++ b/ledis/event_test.go @@ -0,0 +1,56 @@ +package ledis + +import ( + "reflect" + "testing" +) + +type testEvent struct { + Key []byte + Value []byte +} + +type testEventWriter struct { + evs []testEvent +} + +func (w *testEventWriter) Put(key []byte, value []byte) { + e := testEvent{key, value} + w.evs = append(w.evs, e) +} + +func (w *testEventWriter) Delete(key []byte) { + e := testEvent{key, nil} + w.evs = append(w.evs, e) +} + +func TestEvent(t *testing.T) { + k1 := []byte("k1") + v1 := []byte("v1") + k2 := []byte("k2") + k3 := []byte("k3") + v3 := []byte("v3") + + b := new(eventBatch) + + b.Put(k1, v1) + b.Delete(k2) + b.Put(k3, v3) + + buf := b.Bytes() + + w := &testEventWriter{} + + ev2 := &testEventWriter{ + evs: []testEvent{ + testEvent{k1, v1}, + testEvent{k2, nil}, + testEvent{k3, v3}}, + } + + if err := decodeEventBatch(w, buf); err != nil { + t.Fatal(err) + } else if !reflect.DeepEqual(w, ev2) { + t.Fatal("not equal") + } +} diff --git a/ledis/ledis.go b/ledis/ledis.go index 30e6ac7..8893eee 100644 --- a/ledis/ledis.go +++ b/ledis/ledis.go @@ -2,9 +2,14 @@ package ledis import ( "fmt" - "github.com/siddontang/go-log/log" + "github.com/siddontang/go/filelock" + "github.com/siddontang/go/log" "github.com/siddontang/ledisdb/config" + "github.com/siddontang/ledisdb/rpl" "github.com/siddontang/ledisdb/store" + "io" + "os" + "path" "sync" "time" ) @@ -16,59 +21,93 @@ type Ledis struct { dbs [MaxDBNumber]*DB quit chan struct{} - jobs *sync.WaitGroup + wg sync.WaitGroup - binlog *BinLog + //for replication + r *rpl.Replication + rc chan struct{} + rbatch store.WriteBatch + rwg sync.WaitGroup + rhs []NewLogEventHandler wLock sync.RWMutex //allow one write at same time commitLock sync.Mutex //allow one write commit at same time + + // for readonly mode, only replication and flushall can write + readOnly bool + + lock io.Closer } func Open(cfg *config.Config) (*Ledis, error) { + return Open2(cfg, RDWRMode) +} + +func Open2(cfg *config.Config, flags int) (*Ledis, error) { if len(cfg.DataDir) == 0 { cfg.DataDir = config.DefaultDataDir } - ldb, err := store.Open(cfg) - if err != nil { - return nil, err - } + os.MkdirAll(cfg.DataDir, 0755) + + var err error l := new(Ledis) + if l.lock, err = filelock.Lock(path.Join(cfg.DataDir, "LOCK")); err != nil { + return nil, err + } + + l.readOnly = (flags&ROnlyMode > 0) + l.quit = make(chan struct{}) - l.jobs = new(sync.WaitGroup) - l.ldb = ldb + if l.ldb, err = store.Open(cfg); err != nil { + return nil, err + } - if cfg.BinLog.MaxFileNum > 0 && cfg.BinLog.MaxFileSize > 0 { - println("binlog will be refactored later, use your own risk!!!") - l.binlog, err = NewBinLog(cfg) - if err != nil { + if cfg.UseReplication { + if l.r, err = rpl.NewReplication(cfg); err != nil { return nil, err } + + l.rc = make(chan struct{}, 1) + l.rbatch = l.ldb.NewWriteBatch() + + l.wg.Add(1) + go l.onReplication() + + //first we must try wait all replication ok + //maybe some logs are not committed + l.WaitReplication() } else { - l.binlog = nil + l.r = nil } for i := uint8(0); i < MaxDBNumber; i++ { l.dbs[i] = l.newDB(i) } - l.activeExpireCycle() + l.wg.Add(1) + go l.onDataExpired() return l, nil } func (l *Ledis) Close() { close(l.quit) - l.jobs.Wait() + l.wg.Wait() l.ldb.Close() - if l.binlog != nil { - l.binlog.Close() - l.binlog = nil + if l.r != nil { + l.r.Close() + l.r = nil + } + + if l.lock != nil { + l.lock.Close() + l.lock = nil } } @@ -80,49 +119,94 @@ func (l *Ledis) Select(index int) (*DB, error) { return l.dbs[index], nil } +// Flush All will clear all data and replication logs func (l *Ledis) FlushAll() error { - for index, db := range l.dbs { - if _, err := db.FlushAll(); err != nil { - log.Error("flush db %d error %s", index, err.Error()) + l.wLock.Lock() + defer l.wLock.Unlock() + + return l.flushAll() +} + +func (l *Ledis) flushAll() error { + it := l.ldb.NewIterator() + defer it.Close() + + w := l.ldb.NewWriteBatch() + defer w.Rollback() + + n := 0 + for ; it.Valid(); it.Next() { + n++ + if n == 10000 { + if err := w.Commit(); err != nil { + log.Fatal("flush all commit error: %s", err.Error()) + return err + } + n = 0 + } + w.Delete(it.RawKey()) + } + + if err := w.Commit(); err != nil { + log.Fatal("flush all commit error: %s", err.Error()) + return err + } + + if l.r != nil { + if err := l.r.Clear(); err != nil { + log.Fatal("flush all replication clear error: %s", err.Error()) + return err } } return nil } -// very dangerous to use -func (l *Ledis) DataDB() *store.DB { - return l.ldb +func (l *Ledis) IsReadOnly() bool { + if l.readOnly { + return true + } else if l.r != nil { + if b, _ := l.r.CommitIDBehind(); b { + return true + } + } + return false } -func (l *Ledis) activeExpireCycle() { +func (l *Ledis) SetReadOnly(b bool) { + l.readOnly = b +} + +func (l *Ledis) onDataExpired() { + defer l.wg.Done() + var executors []*elimination = make([]*elimination, len(l.dbs)) for i, db := range l.dbs { executors[i] = db.newEliminator() } - l.jobs.Add(1) - go func() { - tick := time.NewTicker(1 * time.Second) - end := false - done := make(chan struct{}) - for !end { - select { - case <-tick.C: - go func() { - for _, eli := range executors { - eli.active() - } - done <- struct{}{} - }() - <-done - case <-l.quit: - end = true + tick := time.NewTicker(1 * time.Second) + defer tick.Stop() + + done := make(chan struct{}) + + for { + select { + case <-tick.C: + if l.IsReadOnly() { break } - } - tick.Stop() - l.jobs.Done() - }() + go func() { + for _, eli := range executors { + eli.active() + } + done <- struct{}{} + }() + <-done + case <-l.quit: + return + } + } + } diff --git a/ledis/ledis_db.go b/ledis/ledis_db.go index dd8ff74..6a8eb9c 100644 --- a/ledis/ledis_db.go +++ b/ledis/ledis_db.go @@ -109,21 +109,6 @@ func (db *DB) newEliminator() *elimination { return eliminator } -func (db *DB) flushRegion(t *batch, minKey []byte, maxKey []byte) (drop int64, err error) { - it := db.bucket.RangeIterator(minKey, maxKey, store.RangeROpen) - for ; it.Valid(); it.Next() { - t.Delete(it.RawKey()) - drop++ - if drop&1023 == 0 { - if err = t.Commit(); err != nil { - return - } - } - } - it.Close() - return -} - func (db *DB) flushType(t *batch, dataType byte) (drop int64, err error) { var deleteFunc func(t *batch, key []byte) int64 var metaDataType byte diff --git a/ledis/ledis_test.go b/ledis/ledis_test.go index d5a5476..45f1c7f 100644 --- a/ledis/ledis_test.go +++ b/ledis/ledis_test.go @@ -14,8 +14,6 @@ func getTestDB() *DB { f := func() { cfg := new(config.Config) cfg.DataDir = "/tmp/test_ledis" - // cfg.BinLog.MaxFileSize = 1073741824 - // cfg.BinLog.MaxFileNum = 3 os.RemoveAll(cfg.DataDir) diff --git a/ledis/replication.go b/ledis/replication.go index 804573d..b68a990 100644 --- a/ledis/replication.go +++ b/ledis/replication.go @@ -1,114 +1,123 @@ package ledis import ( - "bufio" "bytes" "errors" - "github.com/siddontang/go-log/log" - "github.com/siddontang/ledisdb/store/driver" + "github.com/siddontang/go/log" + "github.com/siddontang/go/snappy" + "github.com/siddontang/ledisdb/rpl" "io" - "os" "time" ) const ( - maxReplBatchNum = 100 - maxReplLogSize = 1 * 1024 * 1024 + maxReplLogSize = 1 * 1024 * 1024 ) var ( - ErrSkipEvent = errors.New("skip to next event") + ErrLogMissed = errors.New("log is pured in server") ) -var ( - errInvalidBinLogEvent = errors.New("invalid binglog event") - errInvalidBinLogFile = errors.New("invalid binlog file") -) - -type replBatch struct { - wb driver.IWriteBatch - events [][]byte - l *Ledis - - lastHead *BinLogHead +func (l *Ledis) ReplicationUsed() bool { + return l.r != nil } -func (b *replBatch) Commit() error { - b.l.commitLock.Lock() - defer b.l.commitLock.Unlock() +func (l *Ledis) handleReplication() error { + l.wLock.Lock() + defer l.wLock.Unlock() + + l.rwg.Add(1) + rl := &rpl.Log{} + var err error + for { + if err = l.r.NextNeedCommitLog(rl); err != nil { + if err != rpl.ErrNoBehindLog { + log.Error("get next commit log err, %s", err.Error) + return err + } else { + l.rwg.Done() + return nil + } + } else { + l.rbatch.Rollback() + + if rl.Compression == 1 { + //todo optimize + if rl.Data, err = snappy.Decode(nil, rl.Data); err != nil { + log.Error("decode log error %s", err.Error()) + return err + } + } + + decodeEventBatch(l.rbatch, rl.Data) + + l.commitLock.Lock() + if err = l.rbatch.Commit(); err != nil { + log.Error("commit log error %s", err.Error()) + } else if err = l.r.UpdateCommitID(rl.ID); err != nil { + log.Error("update commit id error %s", err.Error()) + } + + l.commitLock.Unlock() + if err != nil { + return err + } + } + + } +} + +func (l *Ledis) onReplication() { + defer l.wg.Done() + + l.noticeReplication() + + for { + select { + case <-l.rc: + l.handleReplication() + case <-l.quit: + return + } + } +} + +func (l *Ledis) WaitReplication() error { + if !l.ReplicationUsed() { + return ErrRplNotSupport - err := b.wb.Commit() - if err != nil { - b.Rollback() - return err } - if b.l.binlog != nil { - if err = b.l.binlog.Log(b.events...); err != nil { - b.Rollback() + l.noticeReplication() + l.rwg.Wait() + + for i := 0; i < 100; i++ { + b, err := l.r.CommitIDBehind() + if err != nil { return err + } else if b { + l.noticeReplication() + l.rwg.Wait() + time.Sleep(100 * time.Millisecond) + } else { + return nil } } - b.events = [][]byte{} - b.lastHead = nil - - return nil + return errors.New("wait replication too many times") } -func (b *replBatch) Rollback() error { - b.wb.Rollback() - b.events = [][]byte{} - b.lastHead = nil - return nil -} - -func (l *Ledis) replicateEvent(b *replBatch, event []byte) error { - if len(event) == 0 { - return errInvalidBinLogEvent +func (l *Ledis) StoreLogsFromReader(rb io.Reader) error { + if !l.ReplicationUsed() { + return ErrRplNotSupport + } else if !l.readOnly { + return ErrRplInRDWR } - b.events = append(b.events, event) - - logType := uint8(event[0]) - switch logType { - case BinLogTypePut: - return l.replicatePutEvent(b, event) - case BinLogTypeDeletion: - return l.replicateDeleteEvent(b, event) - default: - return errInvalidBinLogEvent - } -} - -func (l *Ledis) replicatePutEvent(b *replBatch, event []byte) error { - key, value, err := decodeBinLogPut(event) - if err != nil { - return err - } - - b.wb.Put(key, value) - - return nil -} - -func (l *Ledis) replicateDeleteEvent(b *replBatch, event []byte) error { - key, err := decodeBinLogDelete(event) - if err != nil { - return err - } - - b.wb.Delete(key) - - return nil -} - -func ReadEventFromReader(rb io.Reader, f func(head *BinLogHead, event []byte) error) error { - head := &BinLogHead{} - var err error + log := &rpl.Log{} for { - if err = head.Read(rb); err != nil { + if err := log.Decode(rb); err != nil { if err == io.EOF { break } else { @@ -116,196 +125,114 @@ func ReadEventFromReader(rb io.Reader, f func(head *BinLogHead, event []byte) er } } - var dataBuf bytes.Buffer - - if _, err = io.CopyN(&dataBuf, rb, int64(head.PayloadLen)); err != nil { + if err := l.r.StoreLog(log); err != nil { return err } - err = f(head, dataBuf.Bytes()) - if err != nil && err != ErrSkipEvent { - return err - } } + l.noticeReplication() + return nil } -func (l *Ledis) ReplicateFromReader(rb io.Reader) error { - b := new(replBatch) - - b.wb = l.ldb.NewWriteBatch() - b.l = l - - f := func(head *BinLogHead, event []byte) error { - if b.lastHead == nil { - b.lastHead = head - } else if !b.lastHead.InSameBatch(head) { - if err := b.Commit(); err != nil { - log.Fatal("replication error %s, skip to next", err.Error()) - return ErrSkipEvent - } - b.lastHead = head - } - - err := l.replicateEvent(b, event) - if err != nil { - log.Fatal("replication error %s, skip to next", err.Error()) - return ErrSkipEvent - } - return nil - } - - err := ReadEventFromReader(rb, f) - if err != nil { - b.Rollback() - return err - } - return b.Commit() +func (l *Ledis) noticeReplication() { + AsyncNotify(l.rc) } -func (l *Ledis) ReplicateFromData(data []byte) error { +func (l *Ledis) StoreLogsFromData(data []byte) error { rb := bytes.NewReader(data) - err := l.ReplicateFromReader(rb) - - return err + return l.StoreLogsFromReader(rb) } -func (l *Ledis) ReplicateFromBinLog(filePath string) error { - f, err := os.Open(filePath) - if err != nil { - return err +func (l *Ledis) ReadLogsTo(startLogID uint64, w io.Writer) (n int, nextLogID uint64, err error) { + if !l.ReplicationUsed() { + // no replication log + nextLogID = 0 + err = ErrRplNotSupport + return } - rb := bufio.NewReaderSize(f, 4096) + var firtID, lastID uint64 - err = l.ReplicateFromReader(rb) + firtID, err = l.r.FirstLogID() + if err != nil { + return + } - f.Close() + if startLogID < firtID { + err = ErrLogMissed + return + } - return err + lastID, err = l.r.LastLogID() + if err != nil { + return + } + + nextLogID = startLogID + + log := &rpl.Log{} + for i := startLogID; i <= lastID; i++ { + if err = l.r.GetLog(i, log); err != nil { + return + } + + if err = log.Encode(w); err != nil { + return + } + + nextLogID = i + 1 + + n += log.Size() + + if n > maxReplLogSize { + break + } + } + + return } // try to read events, if no events read, try to wait the new event singal until timeout seconds -func (l *Ledis) ReadEventsToTimeout(info *BinLogAnchor, w io.Writer, timeout int) (n int, err error) { - lastIndex := info.LogFileIndex - lastPos := info.LogPos - - n = 0 - if l.binlog == nil { - //binlog not supported - info.LogFileIndex = 0 - info.LogPos = 0 - return - } - - n, err = l.ReadEventsTo(info, w) - if err == nil && info.LogFileIndex == lastIndex && info.LogPos == lastPos { - //no events read - select { - case <-l.binlog.Wait(): - case <-time.After(time.Duration(timeout) * time.Second): - } - return l.ReadEventsTo(info, w) - } - return -} - -func (l *Ledis) ReadEventsTo(info *BinLogAnchor, w io.Writer) (n int, err error) { - n = 0 - if l.binlog == nil { - //binlog not supported - info.LogFileIndex = 0 - info.LogPos = 0 - return - } - - index := info.LogFileIndex - offset := info.LogPos - - filePath := l.binlog.FormatLogFilePath(index) - - var f *os.File - f, err = os.Open(filePath) - if os.IsNotExist(err) { - lastIndex := l.binlog.LogFileIndex() - - if index == lastIndex { - //no binlog at all - info.LogPos = 0 - } else { - //slave binlog info had lost - info.LogFileIndex = -1 - } - } - +func (l *Ledis) ReadLogsToTimeout(startLogID uint64, w io.Writer, timeout int) (n int, nextLogID uint64, err error) { + n, nextLogID, err = l.ReadLogsTo(startLogID, w) if err != nil { - if os.IsNotExist(err) { - err = nil - } + return + } else if n != 0 { return } - - defer f.Close() - - var fileSize int64 - st, _ := f.Stat() - fileSize = st.Size() - - if fileSize == info.LogPos { - return + //no events read + select { + case <-l.r.WaitLog(): + case <-time.After(time.Duration(timeout) * time.Second): } - - if _, err = f.Seek(offset, os.SEEK_SET); err != nil { - //may be invliad seek offset - return - } - - var lastHead *BinLogHead = nil - - head := &BinLogHead{} - - batchNum := 0 - - for { - if err = head.Read(f); err != nil { - if err == io.EOF { - //we will try to use next binlog - if index < l.binlog.LogFileIndex() { - info.LogFileIndex += 1 - info.LogPos = 0 - } - err = nil - return - } else { - return - } - - } - - if lastHead == nil { - lastHead = head - batchNum++ - } else if !lastHead.InSameBatch(head) { - lastHead = head - batchNum++ - if batchNum > maxReplBatchNum || n > maxReplLogSize { - return - } - } - - if err = head.Write(w); err != nil { - return - } - - if _, err = io.CopyN(w, f, int64(head.PayloadLen)); err != nil { - return - } - - n += (head.Len() + int(head.PayloadLen)) - info.LogPos = info.LogPos + int64(head.Len()) + int64(head.PayloadLen) - } - - return + return l.ReadLogsTo(startLogID, w) +} + +func (l *Ledis) propagate(rl *rpl.Log) { + for _, h := range l.rhs { + h(rl) + } +} + +type NewLogEventHandler func(rl *rpl.Log) + +func (l *Ledis) AddNewLogEventHandler(h NewLogEventHandler) error { + if !l.ReplicationUsed() { + return ErrRplNotSupport + } + + l.rhs = append(l.rhs, h) + + return nil +} + +func (l *Ledis) ReplicationStat() (*rpl.Stat, error) { + if !l.ReplicationUsed() { + return nil, ErrRplNotSupport + } + + return l.r.Stat() } diff --git a/ledis/replication_test.go b/ledis/replication_test.go index 07643c6..c300ef8 100644 --- a/ledis/replication_test.go +++ b/ledis/replication_test.go @@ -6,7 +6,6 @@ import ( "github.com/siddontang/ledisdb/config" "github.com/siddontang/ledisdb/store" "os" - "path" "testing" ) @@ -34,8 +33,8 @@ func TestReplication(t *testing.T) { cfgM := new(config.Config) cfgM.DataDir = "/tmp/test_repl/master" - cfgM.BinLog.MaxFileNum = 10 - cfgM.BinLog.MaxFileSize = 50 + cfgM.UseReplication = true + cfgM.Replication.Compression = true os.RemoveAll(cfgM.DataDir) @@ -46,10 +45,11 @@ func TestReplication(t *testing.T) { cfgS := new(config.Config) cfgS.DataDir = "/tmp/test_repl/slave" + cfgS.UseReplication = true os.RemoveAll(cfgS.DataDir) - slave, err = Open(cfgS) + slave, err = Open2(cfgS, ROnlyMode) if err != nil { t.Fatal(err) } @@ -59,16 +59,9 @@ func TestReplication(t *testing.T) { db.Set([]byte("b"), []byte("value")) db.Set([]byte("c"), []byte("value")) - if tx, err := db.Begin(); err == nil { - tx.HSet([]byte("a"), []byte("1"), []byte("value")) - tx.HSet([]byte("b"), []byte("2"), []byte("value")) - tx.HSet([]byte("c"), []byte("3"), []byte("value")) - tx.Commit() - } else { - db.HSet([]byte("a"), []byte("1"), []byte("value")) - db.HSet([]byte("b"), []byte("2"), []byte("value")) - db.HSet([]byte("c"), []byte("3"), []byte("value")) - } + db.HSet([]byte("a"), []byte("1"), []byte("value")) + db.HSet([]byte("b"), []byte("2"), []byte("value")) + db.HSet([]byte("c"), []byte("3"), []byte("value")) m, _ := db.Multi() m.Set([]byte("a1"), []byte("value")) @@ -76,19 +69,6 @@ func TestReplication(t *testing.T) { m.Set([]byte("c1"), []byte("value")) m.Close() - for _, name := range master.binlog.LogNames() { - p := path.Join(master.binlog.LogPath(), name) - - err = slave.ReplicateFromBinLog(p) - if err != nil { - t.Fatal(err) - } - } - - if err = checkLedisEqual(master, slave); err != nil { - t.Fatal(err) - } - slave.FlushAll() db.Set([]byte("a1"), []byte("value")) @@ -99,38 +79,25 @@ func TestReplication(t *testing.T) { db.HSet([]byte("b1"), []byte("2"), []byte("value")) db.HSet([]byte("c1"), []byte("3"), []byte("value")) - if tx, err := db.Begin(); err == nil { - tx.HSet([]byte("a1"), []byte("1"), []byte("value1")) - tx.HSet([]byte("b1"), []byte("2"), []byte("value1")) - tx.HSet([]byte("c1"), []byte("3"), []byte("value1")) - tx.Rollback() - } - - info := new(BinLogAnchor) - info.LogFileIndex = 1 - info.LogPos = 0 var buf bytes.Buffer var n int - + var id uint64 = 1 for { buf.Reset() - n, err = master.ReadEventsTo(info, &buf) + n, id, err = master.ReadLogsTo(id, &buf) if err != nil { t.Fatal(err) - } else if info.LogFileIndex == -1 { - t.Fatal("invalid log file index -1") - } else if info.LogFileIndex == 0 { - t.Fatal("invalid log file index 0") - } else { - if err = slave.ReplicateFromReader(&buf); err != nil { + } else if n != 0 { + if err = slave.StoreLogsFromReader(&buf); err != nil { t.Fatal(err) } - if n == 0 { - break - } + } else if n == 0 { + break } } + slave.WaitReplication() + if err = checkLedisEqual(master, slave); err != nil { t.Fatal(err) } diff --git a/ledis/scan.go b/ledis/scan.go index 09e2b5c..f7fca13 100644 --- a/ledis/scan.go +++ b/ledis/scan.go @@ -24,17 +24,17 @@ func (db *DB) scan(dataType byte, key []byte, count int, inclusive bool, match s if err = checkKeySize(key); err != nil { return nil, err } - if minKey, err = db.encodeMetaKey(dataType, key); err != nil { + if minKey, err = db.encodeScanKey(dataType, key); err != nil { return nil, err } } else { - if minKey, err = db.encodeMinKey(dataType); err != nil { + if minKey, err = db.encodeScanMinKey(dataType); err != nil { return nil, err } } - if maxKey, err = db.encodeMaxKey(dataType); err != nil { + if maxKey, err = db.encodeScanMaxKey(dataType); err != nil { return nil, err } @@ -54,7 +54,7 @@ func (db *DB) scan(dataType byte, key []byte, count int, inclusive bool, match s } for i := 0; it.Valid() && i < count && bytes.Compare(it.RawKey(), maxKey) < 0; it.Next() { - if k, err := db.decodeMetaKey(dataType, it.Key()); err != nil { + if k, err := db.decodeScanKey(dataType, it.Key()); err != nil { continue } else if r != nil && !r.Match(k) { continue @@ -67,12 +67,12 @@ func (db *DB) scan(dataType byte, key []byte, count int, inclusive bool, match s return v, nil } -func (db *DB) encodeMinKey(dataType byte) ([]byte, error) { - return db.encodeMetaKey(dataType, nil) +func (db *DB) encodeScanMinKey(dataType byte) ([]byte, error) { + return db.encodeScanKey(dataType, nil) } -func (db *DB) encodeMaxKey(dataType byte) ([]byte, error) { - k, err := db.encodeMetaKey(dataType, nil) +func (db *DB) encodeScanMaxKey(dataType byte) ([]byte, error) { + k, err := db.encodeScanKey(dataType, nil) if err != nil { return nil, err } @@ -80,7 +80,7 @@ func (db *DB) encodeMaxKey(dataType byte) ([]byte, error) { return k, nil } -func (db *DB) encodeMetaKey(dataType byte, key []byte) ([]byte, error) { +func (db *DB) encodeScanKey(dataType byte, key []byte) ([]byte, error) { switch dataType { case KVType: return db.encodeKVKey(key), nil @@ -98,7 +98,7 @@ func (db *DB) encodeMetaKey(dataType byte, key []byte) ([]byte, error) { return nil, errDataType } } -func (db *DB) decodeMetaKey(dataType byte, ek []byte) ([]byte, error) { +func (db *DB) decodeScanKey(dataType byte, ek []byte) ([]byte, error) { if len(ek) < 2 || ek[0] != db.index || ek[1] != dataType { return nil, errMetaKey } diff --git a/ledis/t_bit.go b/ledis/t_bit.go index 496c37a..ab104db 100644 --- a/ledis/t_bit.go +++ b/ledis/t_bit.go @@ -3,6 +3,7 @@ package ledis import ( "encoding/binary" "errors" + "github.com/siddontang/go/num" "github.com/siddontang/ledisdb/store" "sort" "time" @@ -239,8 +240,8 @@ func (db *DB) bUpdateMeta(t *batch, key []byte, seq uint32, off uint32) (tailSeq } else if tseq < 0 { update = true } else { - tailSeq = uint32(MaxInt32(tseq, 0)) - tailOff = uint32(MaxInt32(toff, 0)) + tailSeq = uint32(num.MaxInt32(tseq, 0)) + tailOff = uint32(num.MaxInt32(toff, 0)) update = (seq > tailSeq || (seq == tailSeq && off > tailOff)) } @@ -461,7 +462,7 @@ func (db *DB) BGet(key []byte) (data []byte, err error) { } s = seq << segByteWidth - e = MinUInt32(s+segByteSize, capByteSize) + e = num.MinUint32(s+segByteSize, capByteSize) copy(data[s:e], it.RawValue()) } it.Close() diff --git a/ledis/t_hash.go b/ledis/t_hash.go index 8ee199e..a2e0bd3 100644 --- a/ledis/t_hash.go +++ b/ledis/t_hash.go @@ -3,6 +3,7 @@ package ledis import ( "encoding/binary" "errors" + "github.com/siddontang/go/num" "github.com/siddontang/ledisdb/store" "time" ) @@ -183,8 +184,6 @@ func (db *DB) HSet(key []byte, field []byte, value []byte) (int64, error) { return 0, err } - //todo add binlog - err = t.Commit() return n, err } @@ -334,7 +333,7 @@ func (db *DB) HIncrBy(key []byte, field []byte, delta int64) (int64, error) { n += delta - _, err = db.hSetItem(key, field, StrPutInt64(n)) + _, err = db.hSetItem(key, field, num.FormatInt64ToSlice(n)) if err != nil { return 0, err } diff --git a/ledis/t_kv.go b/ledis/t_kv.go index 1dd540a..14d477b 100644 --- a/ledis/t_kv.go +++ b/ledis/t_kv.go @@ -2,6 +2,7 @@ package ledis import ( "errors" + "github.com/siddontang/go/num" "time" ) @@ -75,9 +76,7 @@ func (db *DB) incr(key []byte, delta int64) (int64, error) { n += delta - t.Put(key, StrPutInt64(n)) - - //todo binlog + t.Put(key, num.FormatInt64ToSlice(n)) err = t.Commit() return n, err @@ -185,7 +184,6 @@ func (db *DB) GetSet(key []byte, value []byte) ([]byte, error) { } t.Put(key, value) - //todo, binlog err = t.Commit() @@ -244,7 +242,6 @@ func (db *DB) MSet(args ...KVPair) error { t.Put(key, value) - //todo binlog } err = t.Commit() @@ -297,8 +294,6 @@ func (db *DB) SetNX(key []byte, value []byte) (int64, error) { } else { t.Put(key, value) - //todo binlog - err = t.Commit() } diff --git a/ledis/t_set.go b/ledis/t_set.go index 330462a..2eb6c4c 100644 --- a/ledis/t_set.go +++ b/ledis/t_set.go @@ -3,6 +3,7 @@ package ledis import ( "encoding/binary" "errors" + "github.com/siddontang/go/hack" "github.com/siddontang/ledisdb/store" "time" ) @@ -240,7 +241,7 @@ func (db *DB) sDiffGeneric(keys ...[]byte) ([][]byte, error) { } for _, m := range members { - destMap[String(m)] = true + destMap[hack.String(m)] = true } for _, k := range keys[1:] { @@ -250,10 +251,10 @@ func (db *DB) sDiffGeneric(keys ...[]byte) ([][]byte, error) { } for _, m := range members { - if _, ok := destMap[String(m)]; !ok { + if _, ok := destMap[hack.String(m)]; !ok { continue } else if ok { - delete(destMap, String(m)) + delete(destMap, hack.String(m)) } } // O - A = O, O is zero set. @@ -294,7 +295,7 @@ func (db *DB) sInterGeneric(keys ...[]byte) ([][]byte, error) { } for _, m := range members { - destMap[String(m)] = true + destMap[hack.String(m)] = true } for _, key := range keys[1:] { @@ -314,8 +315,8 @@ func (db *DB) sInterGeneric(keys ...[]byte) ([][]byte, error) { if err := checkKeySize(member); err != nil { return nil, err } - if _, ok := destMap[String(member)]; ok { - tempMap[String(member)] = true //mark this item as selected + if _, ok := destMap[hack.String(member)]; ok { + tempMap[hack.String(member)] = true //mark this item as selected } } destMap = tempMap //reduce the size of the result set @@ -439,7 +440,7 @@ func (db *DB) sUnionGeneric(keys ...[]byte) ([][]byte, error) { } for _, member := range members { - dstMap[String(member)] = true + dstMap[hack.String(member)] = true } } @@ -508,14 +509,14 @@ func (db *DB) sStoreGeneric(dstKey []byte, optType byte, keys ...[]byte) (int64, t.Put(ek, nil) } - var num = int64(len(v)) + var n = int64(len(v)) sk := db.sEncodeSizeKey(dstKey) - t.Put(sk, PutInt64(num)) + t.Put(sk, PutInt64(n)) if err = t.Commit(); err != nil { return 0, err } - return num, nil + return n, nil } func (db *DB) SClear(key []byte) (int64, error) { diff --git a/ledis/t_ttl.go b/ledis/t_ttl.go index 3d12606..2d5e2d5 100644 --- a/ledis/t_ttl.go +++ b/ledis/t_ttl.go @@ -111,22 +111,6 @@ func (db *DB) rmExpire(t *batch, dataType byte, key []byte) (int64, error) { } } -func (db *DB) expFlush(t *batch, dataType byte) (err error) { - minKey := make([]byte, 3) - minKey[0] = db.index - minKey[1] = ExpTimeType - minKey[2] = dataType - - maxKey := make([]byte, 3) - maxKey[0] = db.index - maxKey[1] = ExpMetaType - maxKey[2] = dataType + 1 - - _, err = db.flushRegion(t, minKey, maxKey) - err = t.Commit() - return -} - ////////////////////////////////////////////////////////// // ////////////////////////////////////////////////////////// diff --git a/ledis/t_ttl_test.go b/ledis/t_ttl_test.go index abfe2bf..b041e41 100644 --- a/ledis/t_ttl_test.go +++ b/ledis/t_ttl_test.go @@ -2,6 +2,7 @@ package ledis import ( "fmt" + "github.com/siddontang/go/hack" "sync" "testing" "time" @@ -49,7 +50,7 @@ func listAdaptor(db *DB) *adaptor { adp.set = func(k []byte, v []byte) (int64, error) { eles := make([][]byte, 0) for i := 0; i < 3; i++ { - e := []byte(String(v) + fmt.Sprintf("_%d", i)) + e := []byte(hack.String(v) + fmt.Sprintf("_%d", i)) eles = append(eles, e) } @@ -87,8 +88,8 @@ func hashAdaptor(db *DB) *adaptor { for i := 0; i < 3; i++ { suffix := fmt.Sprintf("_%d", i) pair := FVPair{ - Field: []byte(String(k) + suffix), - Value: []byte(String(v) + suffix)} + Field: []byte(hack.String(k) + suffix), + Value: []byte(hack.String(v) + suffix)} datas = append(datas, pair) } @@ -125,7 +126,7 @@ func zsetAdaptor(db *DB) *adaptor { adp.set = func(k []byte, v []byte) (int64, error) { datas := make([]ScorePair, 0) for i := 0; i < 3; i++ { - memb := []byte(String(k) + fmt.Sprintf("_%d", i)) + memb := []byte(hack.String(k) + fmt.Sprintf("_%d", i)) pair := ScorePair{ Score: int64(i), Member: memb} @@ -165,7 +166,7 @@ func setAdaptor(db *DB) *adaptor { adp.set = func(k []byte, v []byte) (int64, error) { eles := make([][]byte, 0) for i := 0; i < 3; i++ { - e := []byte(String(v) + fmt.Sprintf("_%d", i)) + e := []byte(hack.String(v) + fmt.Sprintf("_%d", i)) eles = append(eles, e) } diff --git a/ledis/t_zset.go b/ledis/t_zset.go index 47af6ec..dc028c0 100644 --- a/ledis/t_zset.go +++ b/ledis/t_zset.go @@ -4,6 +4,7 @@ import ( "bytes" "encoding/binary" "errors" + "github.com/siddontang/go/hack" "github.com/siddontang/ledisdb/store" "time" ) @@ -305,7 +306,6 @@ func (db *DB) ZAdd(key []byte, args ...ScorePair) (int64, error) { return 0, err } - //todo add binlog err := t.Commit() return num, err } @@ -834,10 +834,10 @@ func (db *DB) ZUnionStore(destKey []byte, srcKeys [][]byte, weights []int64, agg return 0, err } for _, pair := range scorePairs { - if score, ok := destMap[String(pair.Member)]; !ok { - destMap[String(pair.Member)] = pair.Score * weights[i] + if score, ok := destMap[hack.String(pair.Member)]; !ok { + destMap[hack.String(pair.Member)] = pair.Score * weights[i] } else { - destMap[String(pair.Member)] = aggregateFunc(score, pair.Score*weights[i]) + destMap[hack.String(pair.Member)] = aggregateFunc(score, pair.Score*weights[i]) } } } @@ -858,15 +858,14 @@ func (db *DB) ZUnionStore(destKey []byte, srcKeys [][]byte, weights []int64, agg } } - var num = int64(len(destMap)) + var n = int64(len(destMap)) sk := db.zEncodeSizeKey(destKey) - t.Put(sk, PutInt64(num)) + t.Put(sk, PutInt64(n)) - //todo add binlog if err := t.Commit(); err != nil { return 0, err } - return num, nil + return n, nil } func (db *DB) ZInterStore(destKey []byte, srcKeys [][]byte, weights []int64, aggregate byte) (int64, error) { @@ -895,7 +894,7 @@ func (db *DB) ZInterStore(destKey []byte, srcKeys [][]byte, weights []int64, agg return 0, err } for _, pair := range scorePairs { - destMap[String(pair.Member)] = pair.Score * weights[0] + destMap[hack.String(pair.Member)] = pair.Score * weights[0] } for i, key := range srcKeys[1:] { @@ -905,8 +904,8 @@ func (db *DB) ZInterStore(destKey []byte, srcKeys [][]byte, weights []int64, agg } tmpMap := map[string]int64{} for _, pair := range scorePairs { - if score, ok := destMap[String(pair.Member)]; ok { - tmpMap[String(pair.Member)] = aggregateFunc(score, pair.Score*weights[i+1]) + if score, ok := destMap[hack.String(pair.Member)]; ok { + tmpMap[hack.String(pair.Member)] = aggregateFunc(score, pair.Score*weights[i+1]) } } destMap = tmpMap @@ -927,16 +926,96 @@ func (db *DB) ZInterStore(destKey []byte, srcKeys [][]byte, weights []int64, agg } } - var num int64 = int64(len(destMap)) + var n int64 = int64(len(destMap)) sk := db.zEncodeSizeKey(destKey) - t.Put(sk, PutInt64(num)) - //todo add binlog + t.Put(sk, PutInt64(n)) + if err := t.Commit(); err != nil { return 0, err } - return num, nil + return n, nil } func (db *DB) ZScan(key []byte, count int, inclusive bool, match string) ([][]byte, error) { return db.scan(ZSizeType, key, count, inclusive, match) } + +func (db *DB) ZRangeByLex(key []byte, min []byte, max []byte, rangeType uint8, offset int, count int) ([][]byte, error) { + if min == nil { + min = db.zEncodeStartSetKey(key) + } else { + min = db.zEncodeSetKey(key, min) + } + if max == nil { + max = db.zEncodeStopSetKey(key) + } else { + max = db.zEncodeSetKey(key, max) + } + + it := db.bucket.RangeLimitIterator(min, max, rangeType, offset, count) + defer it.Close() + + ay := make([][]byte, 0, 16) + for ; it.Valid(); it.Next() { + if _, m, err := db.zDecodeSetKey(it.Key()); err == nil { + ay = append(ay, m) + } + } + + return ay, nil +} + +func (db *DB) ZRemRangeByLex(key []byte, min []byte, max []byte, rangeType uint8) (int64, error) { + if min == nil { + min = db.zEncodeStartSetKey(key) + } else { + min = db.zEncodeSetKey(key, min) + } + if max == nil { + max = db.zEncodeStopSetKey(key) + } else { + max = db.zEncodeSetKey(key, max) + } + + t := db.zsetBatch + t.Lock() + defer t.Unlock() + + it := db.bucket.RangeIterator(min, max, rangeType) + defer it.Close() + + var n int64 = 0 + for ; it.Valid(); it.Next() { + t.Delete(it.RawKey()) + n++ + } + + if err := t.Commit(); err != nil { + return 0, err + } + + return n, nil +} + +func (db *DB) ZLexCount(key []byte, min []byte, max []byte, rangeType uint8) (int64, error) { + if min == nil { + min = db.zEncodeStartSetKey(key) + } else { + min = db.zEncodeSetKey(key, min) + } + if max == nil { + max = db.zEncodeStopSetKey(key) + } else { + max = db.zEncodeSetKey(key, max) + } + + it := db.bucket.RangeIterator(min, max, rangeType) + defer it.Close() + + var n int64 = 0 + for ; it.Valid(); it.Next() { + n++ + } + + return n, nil +} diff --git a/ledis/t_zset_test.go b/ledis/t_zset_test.go index a1754ed..98f0a81 100644 --- a/ledis/t_zset_test.go +++ b/ledis/t_zset_test.go @@ -2,6 +2,8 @@ package ledis import ( "fmt" + "github.com/siddontang/ledisdb/store" + "reflect" "testing" ) @@ -407,3 +409,59 @@ func TestZScan(t *testing.T) { t.Fatal("invalid value length ", len(v)) } } + +func TestZLex(t *testing.T) { + db := getTestDB() + if _, err := db.zFlush(); err != nil { + t.Fatal(err) + } + + key := []byte("myzset") + if _, err := db.ZAdd(key, ScorePair{0, []byte("a")}, + ScorePair{0, []byte("b")}, + ScorePair{0, []byte("c")}, + ScorePair{0, []byte("d")}, + ScorePair{0, []byte("e")}, + ScorePair{0, []byte("f")}, + ScorePair{0, []byte("g")}); err != nil { + t.Fatal(err) + } + + if ay, err := db.ZRangeByLex(key, nil, []byte("c"), store.RangeClose, 0, -1); err != nil { + t.Fatal(err) + } else if !reflect.DeepEqual(ay, [][]byte{[]byte("a"), []byte("b"), []byte("c")}) { + t.Fatal("must equal a, b, c") + } + + if ay, err := db.ZRangeByLex(key, nil, []byte("c"), store.RangeROpen, 0, -1); err != nil { + t.Fatal(err) + } else if !reflect.DeepEqual(ay, [][]byte{[]byte("a"), []byte("b")}) { + t.Fatal("must equal a, b") + } + + if ay, err := db.ZRangeByLex(key, []byte("aaa"), []byte("g"), store.RangeROpen, 0, -1); err != nil { + t.Fatal(err) + } else if !reflect.DeepEqual(ay, [][]byte{[]byte("b"), + []byte("c"), []byte("d"), []byte("e"), []byte("f")}) { + t.Fatal("must equal b, c, d, e, f", fmt.Sprintf("%q", ay)) + } + + if n, err := db.ZLexCount(key, nil, nil, store.RangeClose); err != nil { + t.Fatal(err) + } else if n != 7 { + t.Fatal(n) + } + + if n, err := db.ZRemRangeByLex(key, []byte("aaa"), []byte("g"), store.RangeROpen); err != nil { + t.Fatal(err) + } else if n != 5 { + t.Fatal(n) + } + + if n, err := db.ZLexCount(key, nil, nil, store.RangeClose); err != nil { + t.Fatal(err) + } else if n != 2 { + t.Fatal(n) + } + +} diff --git a/ledis/tx.go b/ledis/tx.go index 6339bae..a5ff883 100644 --- a/ledis/tx.go +++ b/ledis/tx.go @@ -16,7 +16,7 @@ type Tx struct { tx *store.Tx - logs [][]byte + eb *eventBatch } func (db *DB) IsTransaction() bool { @@ -32,6 +32,8 @@ func (db *DB) Begin() (*Tx, error) { tx := new(Tx) + tx.eb = new(eventBatch) + tx.DB = new(DB) tx.DB.l = db.l @@ -67,16 +69,10 @@ func (tx *Tx) Commit() error { return ErrTxDone } - tx.l.commitLock.Lock() - err := tx.tx.Commit() + err := tx.l.handleCommit(tx.eb, tx.tx) + tx.tx = nil - if len(tx.logs) > 0 { - tx.l.binlog.Log(tx.logs...) - } - - tx.l.commitLock.Unlock() - tx.l.wLock.Unlock() tx.DB.bucket = nil @@ -90,6 +86,7 @@ func (tx *Tx) Rollback() error { } err := tx.tx.Rollback() + tx.eb.Reset() tx.tx = nil tx.l.wLock.Unlock() diff --git a/ledis/tx_test.go b/ledis/tx_test.go index 026b70d..cb3a7f0 100644 --- a/ledis/tx_test.go +++ b/ledis/tx_test.go @@ -195,6 +195,7 @@ func testTx(t *testing.T, name string) { cfg.DBName = name cfg.LMDB.MapSize = 10 * 1024 * 1024 + cfg.UseReplication = true os.RemoveAll(cfg.DataDir) diff --git a/ledis/util.go b/ledis/util.go index 770bca1..a0abdd0 100644 --- a/ledis/util.go +++ b/ledis/util.go @@ -3,33 +3,16 @@ package ledis import ( "encoding/binary" "errors" - "reflect" + "github.com/siddontang/go/hack" "strconv" - "unsafe" ) var errIntNumber = errors.New("invalid integer") -// no copy to change slice to string -// use your own risk -func String(b []byte) (s string) { - pbytes := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - pstring := (*reflect.StringHeader)(unsafe.Pointer(&s)) - pstring.Data = pbytes.Data - pstring.Len = pbytes.Len - return -} - -// no copy to change string to slice -// use your own risk -func Slice(s string) (b []byte) { - pbytes := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - pstring := (*reflect.StringHeader)(unsafe.Pointer(&s)) - pbytes.Data = pstring.Data - pbytes.Len = pstring.Len - pbytes.Cap = pstring.Len - return -} +/* + Below I forget why I use little endian to store int. + Maybe I was foolish at that time. +*/ func Int64(v []byte, err error) (int64, error) { if err != nil { @@ -43,12 +26,21 @@ func Int64(v []byte, err error) (int64, error) { return int64(binary.LittleEndian.Uint64(v)), nil } +func Uint64(v []byte, err error) (uint64, error) { + if err != nil { + return 0, err + } else if v == nil || len(v) == 0 { + return 0, nil + } else if len(v) != 8 { + return 0, errIntNumber + } + + return binary.LittleEndian.Uint64(v), nil +} + func PutInt64(v int64) []byte { - var b []byte - pbytes := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - pbytes.Data = uintptr(unsafe.Pointer(&v)) - pbytes.Len = 8 - pbytes.Cap = 8 + b := make([]byte, 8) + binary.LittleEndian.PutUint64(b, uint64(v)) return b } @@ -58,7 +50,17 @@ func StrInt64(v []byte, err error) (int64, error) { } else if v == nil { return 0, nil } else { - return strconv.ParseInt(String(v), 10, 64) + return strconv.ParseInt(hack.String(v), 10, 64) + } +} + +func StrUint64(v []byte, err error) (uint64, error) { + if err != nil { + return 0, err + } else if v == nil { + return 0, nil + } else { + return strconv.ParseUint(hack.String(v), 10, 64) } } @@ -68,7 +70,7 @@ func StrInt32(v []byte, err error) (int32, error) { } else if v == nil { return 0, nil } else { - res, err := strconv.ParseInt(String(v), 10, 32) + res, err := strconv.ParseInt(hack.String(v), 10, 32) return int32(res), err } } @@ -79,35 +81,14 @@ func StrInt8(v []byte, err error) (int8, error) { } else if v == nil { return 0, nil } else { - res, err := strconv.ParseInt(String(v), 10, 8) + res, err := strconv.ParseInt(hack.String(v), 10, 8) return int8(res), err } } -func StrPutInt64(v int64) []byte { - return strconv.AppendInt(nil, v, 10) -} - -func MinUInt32(a uint32, b uint32) uint32 { - if a > b { - return b - } else { - return a - } -} - -func MaxUInt32(a uint32, b uint32) uint32 { - if a > b { - return a - } else { - return b - } -} - -func MaxInt32(a int32, b int32) int32 { - if a > b { - return a - } else { - return b +func AsyncNotify(ch chan struct{}) { + select { + case ch <- struct{}{}: + default: } } diff --git a/rpl/file_store.go b/rpl/file_store.go new file mode 100644 index 0000000..df51b03 --- /dev/null +++ b/rpl/file_store.go @@ -0,0 +1,242 @@ +package rpl + +import ( + "fmt" + "github.com/siddontang/go/log" + "io/ioutil" + "os" + "path" + "strconv" + "strings" + "sync" +) + +const ( + defaultMaxLogFileSize = 1024 * 1024 * 1024 +) + +/* +index file format: +ledis-bin.00001 +ledis-bin.00002 +ledis-bin.00003 +*/ + +type FileStore struct { + LogStore + + m sync.Mutex + + maxFileSize int + + first uint64 + last uint64 + + logFile *os.File + logNames []string + nextLogIndex int64 + + indexName string + + path string +} + +func NewFileStore(path string) (*FileStore, error) { + s := new(FileStore) + + if err := os.MkdirAll(path, 0755); err != nil { + return nil, err + } + + s.path = path + + s.maxFileSize = defaultMaxLogFileSize + + s.first = 0 + s.last = 0 + + s.logNames = make([]string, 0, 16) + + if err := s.loadIndex(); err != nil { + return nil, err + } + + return s, nil +} + +func (s *FileStore) SetMaxFileSize(size int) { + s.maxFileSize = size +} + +func (s *FileStore) GetLog(id uint64, log *Log) error { + panic("not implementation") + return nil +} + +func (s *FileStore) SeekLog(id uint64, log *Log) error { + panic("not implementation") + return nil +} + +func (s *FileStore) FirstID() (uint64, error) { + panic("not implementation") + return 0, nil +} + +func (s *FileStore) LastID() (uint64, error) { + panic("not implementation") + return 0, nil +} + +func (s *FileStore) StoreLog(log *Log) error { + panic("not implementation") + return nil +} + +func (s *FileStore) StoreLogs(logs []*Log) error { + panic("not implementation") + return nil +} + +func (s *FileStore) Purge(n uint64) error { + panic("not implementation") + return nil +} + +func (s *FileStore) PuregeExpired(n int64) error { + panic("not implementation") + return nil +} + +func (s *FileStore) Clear() error { + panic("not implementation") + return nil +} + +func (s *FileStore) Close() error { + panic("not implementation") + return nil +} + +func (s *FileStore) flushIndex() error { + data := strings.Join(s.logNames, "\n") + + bakName := fmt.Sprintf("%s.bak", s.indexName) + f, err := os.OpenFile(bakName, os.O_WRONLY|os.O_CREATE, 0644) + if err != nil { + log.Error("create bak index error %s", err.Error()) + return err + } + + if _, err := f.WriteString(data); err != nil { + log.Error("write index error %s", err.Error()) + f.Close() + return err + } + + f.Close() + + if err := os.Rename(bakName, s.indexName); err != nil { + log.Error("rename bak index error %s", err.Error()) + return err + } + + return nil +} + +func (s *FileStore) fileExists(name string) bool { + p := path.Join(s.path, name) + _, err := os.Stat(p) + return !os.IsNotExist(err) +} + +func (s *FileStore) loadIndex() error { + s.indexName = path.Join(s.path, fmt.Sprintf("ledis-bin.index")) + if _, err := os.Stat(s.indexName); os.IsNotExist(err) { + //no index file, nothing to do + } else { + indexData, err := ioutil.ReadFile(s.indexName) + if err != nil { + return err + } + + lines := strings.Split(string(indexData), "\n") + for _, line := range lines { + line = strings.Trim(line, "\r\n ") + if len(line) == 0 { + continue + } + + if s.fileExists(line) { + s.logNames = append(s.logNames, line) + } else { + log.Info("log %s has not exists", line) + } + } + } + + var err error + if len(s.logNames) == 0 { + s.nextLogIndex = 1 + } else { + lastName := s.logNames[len(s.logNames)-1] + + if s.nextLogIndex, err = strconv.ParseInt(path.Ext(lastName)[1:], 10, 64); err != nil { + log.Error("invalid logfile name %s", err.Error()) + return err + } + + //like mysql, if server restart, a new log will create + s.nextLogIndex++ + } + + return nil +} + +func (s *FileStore) openNewLogFile() error { + var err error + lastName := s.formatLogFileName(s.nextLogIndex) + + logPath := path.Join(s.path, lastName) + if s.logFile, err = os.OpenFile(logPath, os.O_CREATE|os.O_WRONLY, 0644); err != nil { + log.Error("open new logfile error %s", err.Error()) + return err + } + + s.logNames = append(s.logNames, lastName) + + if err = s.flushIndex(); err != nil { + return err + } + + return nil +} + +func (s *FileStore) checkLogFileSize() bool { + if s.logFile == nil { + return false + } + + st, _ := s.logFile.Stat() + if st.Size() >= int64(s.maxFileSize) { + s.closeLog() + return true + } + + return false +} + +func (s *FileStore) closeLog() { + if s.logFile == nil { + return + } + + s.nextLogIndex++ + + s.logFile.Close() + s.logFile = nil +} + +func (s *FileStore) formatLogFileName(index int64) string { + return fmt.Sprintf("ledis-bin.%07d", index) +} diff --git a/rpl/goleveldb_store.go b/rpl/goleveldb_store.go new file mode 100644 index 0000000..f9d2a7e --- /dev/null +++ b/rpl/goleveldb_store.go @@ -0,0 +1,276 @@ +package rpl + +import ( + "bytes" + "fmt" + "github.com/siddontang/go/num" + "github.com/siddontang/ledisdb/config" + "github.com/siddontang/ledisdb/store" + "os" + "sync" + "time" +) + +type GoLevelDBStore struct { + LogStore + + m sync.Mutex + db *store.DB + + cfg *config.Config + + first uint64 + last uint64 +} + +func (s *GoLevelDBStore) FirstID() (uint64, error) { + s.m.Lock() + id, err := s.firstID() + s.m.Unlock() + + return id, err +} + +func (s *GoLevelDBStore) LastID() (uint64, error) { + s.m.Lock() + id, err := s.lastID() + s.m.Unlock() + + return id, err +} + +func (s *GoLevelDBStore) firstID() (uint64, error) { + if s.first != InvalidLogID { + return s.first, nil + } + + it := s.db.NewIterator() + defer it.Close() + + it.SeekToFirst() + + if it.Valid() { + s.first = num.BytesToUint64(it.RawKey()) + } + + return s.first, nil +} + +func (s *GoLevelDBStore) lastID() (uint64, error) { + if s.last != InvalidLogID { + return s.last, nil + } + + it := s.db.NewIterator() + defer it.Close() + + it.SeekToLast() + + if it.Valid() { + s.last = num.BytesToUint64(it.RawKey()) + } + + return s.last, nil +} + +func (s *GoLevelDBStore) GetLog(id uint64, log *Log) error { + v, err := s.db.Get(num.Uint64ToBytes(id)) + if err != nil { + return err + } else if v == nil { + return ErrLogNotFound + } else { + return log.Decode(bytes.NewBuffer(v)) + } +} + +func (s *GoLevelDBStore) SeekLog(id uint64, log *Log) error { + it := s.db.NewIterator() + defer it.Close() + + it.Seek(num.Uint64ToBytes(id)) + + if !it.Valid() { + return ErrLogNotFound + } else { + return log.Decode(bytes.NewBuffer(it.RawValue())) + } +} + +func (s *GoLevelDBStore) StoreLog(log *Log) error { + return s.StoreLogs([]*Log{log}) +} + +func (s *GoLevelDBStore) StoreLogs(logs []*Log) error { + s.m.Lock() + defer s.m.Unlock() + + w := s.db.NewWriteBatch() + defer w.Rollback() + + last, err := s.lastID() + if err != nil { + return err + } + + s.last = InvalidLogID + + var buf bytes.Buffer + for _, log := range logs { + buf.Reset() + + if log.ID <= last { + return ErrLessLogID + } + + last = log.ID + key := num.Uint64ToBytes(log.ID) + + if err := log.Encode(&buf); err != nil { + return err + } + w.Put(key, buf.Bytes()) + } + + if err := w.Commit(); err != nil { + return err + } + + s.last = last + return nil +} + +func (s *GoLevelDBStore) Purge(n uint64) error { + s.m.Lock() + defer s.m.Unlock() + + var first, last uint64 + var err error + + first, err = s.firstID() + if err != nil { + return err + } + + last, err = s.lastID() + if err != nil { + return err + } + + start := first + stop := num.MinUint64(last, first+n) + + w := s.db.NewWriteBatch() + defer w.Rollback() + + s.reset() + + for i := start; i < stop; i++ { + w.Delete(num.Uint64ToBytes(i)) + } + + if err = w.Commit(); err != nil { + return err + } + + return nil +} + +func (s *GoLevelDBStore) PurgeExpired(n int64) error { + if n <= 0 { + return fmt.Errorf("invalid expired time %d", n) + } + + t := uint32(time.Now().Unix() - int64(n)) + + s.m.Lock() + defer s.m.Unlock() + + s.reset() + + it := s.db.NewIterator() + it.SeekToFirst() + + w := s.db.NewWriteBatch() + defer w.Rollback() + + l := new(Log) + for ; it.Valid(); it.Next() { + v := it.RawValue() + + if err := l.Unmarshal(v); err != nil { + return err + } else if l.CreateTime > t { + break + } else { + w.Delete(it.RawKey()) + } + } + + if err := w.Commit(); err != nil { + return err + } + + return nil +} + +func (s *GoLevelDBStore) Clear() error { + s.m.Lock() + defer s.m.Unlock() + + if s.db != nil { + s.db.Close() + } + + s.reset() + os.RemoveAll(s.cfg.DBPath) + + return s.open() +} + +func (s *GoLevelDBStore) reset() { + s.first = InvalidLogID + s.last = InvalidLogID +} + +func (s *GoLevelDBStore) Close() error { + s.m.Lock() + defer s.m.Unlock() + + if s.db == nil { + return nil + } + + err := s.db.Close() + s.db = nil + return err +} + +func (s *GoLevelDBStore) open() error { + var err error + + s.first = InvalidLogID + s.last = InvalidLogID + + s.db, err = store.Open(s.cfg) + return err +} + +func NewGoLevelDBStore(base string) (*GoLevelDBStore, error) { + cfg := new(config.Config) + cfg.DBName = "goleveldb" + cfg.DBPath = base + cfg.LevelDB.BlockSize = 4 * 1024 * 1024 + cfg.LevelDB.CacheSize = 16 * 1024 * 1024 + cfg.LevelDB.WriteBufferSize = 4 * 1024 * 1024 + cfg.LevelDB.Compression = false + + s := new(GoLevelDBStore) + s.cfg = cfg + + if err := s.open(); err != nil { + return nil, err + } + + return s, nil +} diff --git a/rpl/log.go b/rpl/log.go new file mode 100644 index 0000000..261e852 --- /dev/null +++ b/rpl/log.go @@ -0,0 +1,102 @@ +package rpl + +import ( + "bytes" + "encoding/binary" + "io" +) + +type Log struct { + ID uint64 + CreateTime uint32 + Compression uint8 + + Data []byte +} + +func (l *Log) HeadSize() int { + return 17 +} + +func (l *Log) Size() int { + return l.HeadSize() + len(l.Data) +} + +func (l *Log) Marshal() ([]byte, error) { + buf := bytes.NewBuffer(make([]byte, l.HeadSize()+len(l.Data))) + buf.Reset() + + if err := l.Encode(buf); err != nil { + return nil, err + } + + return buf.Bytes(), nil +} + +func (l *Log) Unmarshal(b []byte) error { + buf := bytes.NewBuffer(b) + + return l.Decode(buf) +} + +func (l *Log) Encode(w io.Writer) error { + buf := make([]byte, l.HeadSize()) + + pos := 0 + binary.BigEndian.PutUint64(buf[pos:], l.ID) + pos += 8 + + binary.BigEndian.PutUint32(buf[pos:], l.CreateTime) + pos += 4 + + buf[pos] = l.Compression + pos++ + + binary.BigEndian.PutUint32(buf[pos:], uint32(len(l.Data))) + + if n, err := w.Write(buf); err != nil { + return err + } else if n != len(buf) { + return io.ErrShortWrite + } + + if n, err := w.Write(l.Data); err != nil { + return err + } else if n != len(l.Data) { + return io.ErrShortWrite + } + return nil +} + +func (l *Log) Decode(r io.Reader) error { + buf := make([]byte, l.HeadSize()) + + if _, err := io.ReadFull(r, buf); err != nil { + return err + } + + pos := 0 + l.ID = binary.BigEndian.Uint64(buf[pos:]) + pos += 8 + + l.CreateTime = binary.BigEndian.Uint32(buf[pos:]) + pos += 4 + + l.Compression = uint8(buf[pos]) + pos++ + + length := binary.BigEndian.Uint32(buf[pos:]) + + l.Data = l.Data[0:0] + + if cap(l.Data) >= int(length) { + l.Data = l.Data[0:length] + } else { + l.Data = make([]byte, length) + } + if _, err := io.ReadFull(r, l.Data); err != nil { + return err + } + + return nil +} diff --git a/rpl/log_test.go b/rpl/log_test.go new file mode 100644 index 0000000..7ef008a --- /dev/null +++ b/rpl/log_test.go @@ -0,0 +1,39 @@ +package rpl + +import ( + "bytes" + "reflect" + "testing" +) + +func TestLog(t *testing.T) { + l1 := &Log{ID: 1, CreateTime: 100, Data: []byte("hello world")} + + var buf bytes.Buffer + + if err := l1.Encode(&buf); err != nil { + t.Fatal(err) + } + + l2 := &Log{} + + if err := l2.Decode(&buf); err != nil { + t.Fatal(err) + } + + if !reflect.DeepEqual(l1, l2) { + t.Fatal("must equal") + } + + if buf, err := l1.Marshal(); err != nil { + t.Fatal(err) + } else { + if err = l2.Unmarshal(buf); err != nil { + t.Fatal(err) + } + } + + if !reflect.DeepEqual(l1, l2) { + t.Fatal("must equal") + } +} diff --git a/rpl/rpl.go b/rpl/rpl.go new file mode 100644 index 0000000..3eaad9a --- /dev/null +++ b/rpl/rpl.go @@ -0,0 +1,274 @@ +package rpl + +import ( + "encoding/binary" + "github.com/siddontang/go/log" + "github.com/siddontang/go/snappy" + "github.com/siddontang/ledisdb/config" + "os" + "path" + "sync" + "time" +) + +type Stat struct { + FirstID uint64 + LastID uint64 + CommitID uint64 +} + +type Replication struct { + m sync.Mutex + + cfg *config.Config + + s LogStore + + commitID uint64 + commitLog *os.File + + quit chan struct{} + + wg sync.WaitGroup + + nc chan struct{} +} + +func NewReplication(cfg *config.Config) (*Replication, error) { + if len(cfg.Replication.Path) == 0 { + cfg.Replication.Path = path.Join(cfg.DataDir, "rpl") + } + + base := cfg.Replication.Path + + r := new(Replication) + + r.quit = make(chan struct{}) + r.nc = make(chan struct{}) + + r.cfg = cfg + + var err error + if r.s, err = NewGoLevelDBStore(path.Join(base, "wal")); err != nil { + return nil, err + } + + if r.commitLog, err = os.OpenFile(path.Join(base, "commit.log"), os.O_RDWR|os.O_CREATE, 0644); err != nil { + return nil, err + } + + if s, _ := r.commitLog.Stat(); s.Size() == 0 { + r.commitID = 0 + } else if err = binary.Read(r.commitLog, binary.BigEndian, &r.commitID); err != nil { + return nil, err + } + + go r.onPurgeExpired() + + return r, nil +} + +func (r *Replication) Close() error { + close(r.quit) + + r.wg.Wait() + + if r.s != nil { + r.s.Close() + r.s = nil + } + + if r.commitLog != nil { + r.commitLog.Close() + r.commitLog = nil + } + + return nil +} + +func (r *Replication) Log(data []byte) (*Log, error) { + if r.cfg.Replication.Compression { + //todo optimize + var err error + if data, err = snappy.Encode(nil, data); err != nil { + return nil, err + } + } + + r.m.Lock() + defer r.m.Unlock() + + lastID, err := r.s.LastID() + if err != nil { + return nil, err + } + + commitId := r.commitID + if lastID < commitId { + lastID = commitId + } + + l := new(Log) + l.ID = lastID + 1 + l.CreateTime = uint32(time.Now().Unix()) + + if r.cfg.Replication.Compression { + l.Compression = 1 + } else { + l.Compression = 0 + } + + l.Data = data + + if err = r.s.StoreLog(l); err != nil { + return nil, err + } + + close(r.nc) + r.nc = make(chan struct{}) + + return l, nil +} + +func (r *Replication) WaitLog() <-chan struct{} { + return r.nc +} + +func (r *Replication) StoreLog(log *Log) error { + return r.StoreLogs([]*Log{log}) +} + +func (r *Replication) StoreLogs(logs []*Log) error { + r.m.Lock() + defer r.m.Unlock() + + return r.s.StoreLogs(logs) +} + +func (r *Replication) FirstLogID() (uint64, error) { + r.m.Lock() + defer r.m.Unlock() + id, err := r.s.FirstID() + return id, err +} + +func (r *Replication) LastLogID() (uint64, error) { + r.m.Lock() + defer r.m.Unlock() + id, err := r.s.LastID() + return id, err +} + +func (r *Replication) LastCommitID() (uint64, error) { + r.m.Lock() + id := r.commitID + r.m.Unlock() + return id, nil +} + +func (r *Replication) UpdateCommitID(id uint64) error { + r.m.Lock() + defer r.m.Unlock() + + return r.updateCommitID(id) +} + +func (r *Replication) Stat() (*Stat, error) { + r.m.Lock() + defer r.m.Unlock() + + s := &Stat{} + var err error + + if s.FirstID, err = r.s.FirstID(); err != nil { + return nil, err + } + + if s.LastID, err = r.s.LastID(); err != nil { + return nil, err + } + + s.CommitID = r.commitID + return s, nil +} + +func (r *Replication) updateCommitID(id uint64) error { + if _, err := r.commitLog.Seek(0, os.SEEK_SET); err != nil { + return err + } + + if err := binary.Write(r.commitLog, binary.BigEndian, id); err != nil { + return err + } + + r.commitID = id + + return nil +} + +func (r *Replication) CommitIDBehind() (bool, error) { + r.m.Lock() + defer r.m.Unlock() + + id, err := r.s.LastID() + if err != nil { + return false, err + } + + return id > r.commitID, nil +} + +func (r *Replication) GetLog(id uint64, log *Log) error { + return r.s.GetLog(id, log) +} + +func (r *Replication) NextNeedCommitLog(log *Log) error { + r.m.Lock() + defer r.m.Unlock() + + id, err := r.s.LastID() + if err != nil { + return err + } + + if id <= r.commitID { + return ErrNoBehindLog + } + + return r.s.GetLog(r.commitID+1, log) + +} + +func (r *Replication) Clear() error { + return r.ClearWithCommitID(0) +} + +func (r *Replication) ClearWithCommitID(id uint64) error { + r.m.Lock() + defer r.m.Unlock() + + if err := r.s.Clear(); err != nil { + return err + } + + return r.updateCommitID(id) +} + +func (r *Replication) onPurgeExpired() { + r.wg.Add(1) + defer r.wg.Done() + + for { + select { + case <-time.After(1 * time.Hour): + n := (r.cfg.Replication.ExpiredLogDays * 24 * 3600) + r.m.Lock() + if err := r.s.PurgeExpired(int64(n)); err != nil { + log.Error("purge expired log error %s", err.Error()) + } + r.m.Unlock() + case <-r.quit: + return + } + } +} diff --git a/rpl/rpl_test.go b/rpl/rpl_test.go new file mode 100644 index 0000000..06fcf7d --- /dev/null +++ b/rpl/rpl_test.go @@ -0,0 +1,44 @@ +package rpl + +import ( + "github.com/siddontang/ledisdb/config" + "io/ioutil" + "os" + "testing" +) + +func TestReplication(t *testing.T) { + dir, err := ioutil.TempDir("", "rpl") + if err != nil { + t.Fatalf("err: %v ", err) + } + defer os.RemoveAll(dir) + + c := new(config.Config) + c.Replication.Path = dir + + r, err := NewReplication(c) + if err != nil { + t.Fatal(err) + } + + if l1, err := r.Log([]byte("hello world")); err != nil { + t.Fatal(err) + } else if l1.ID != 1 { + t.Fatal(l1.ID) + } + + if b, _ := r.CommitIDBehind(); !b { + t.Fatal("must backward") + } + + if err := r.UpdateCommitID(1); err != nil { + t.Fatal(err) + } + + if b, _ := r.CommitIDBehind(); b { + t.Fatal("must not backward") + } + + r.Close() +} diff --git a/rpl/store.go b/rpl/store.go new file mode 100644 index 0000000..8d5e8ec --- /dev/null +++ b/rpl/store.go @@ -0,0 +1,40 @@ +package rpl + +import ( + "errors" +) + +const ( + InvalidLogID uint64 = 0 +) + +var ( + ErrLogNotFound = errors.New("log not found") + ErrLessLogID = errors.New("log id is less") + ErrNoBehindLog = errors.New("no behind commit log") +) + +type LogStore interface { + GetLog(id uint64, log *Log) error + + // Get the first log which ID is equal or larger than id + SeekLog(id uint64, log *Log) error + + FirstID() (uint64, error) + LastID() (uint64, error) + + // if log id is less than current last id, return error + StoreLog(log *Log) error + StoreLogs(logs []*Log) error + + // Delete first n logs + Purge(n uint64) error + + // Delete logs before n seconds + PurgeExpired(n int64) error + + // Clear all logs + Clear() error + + Close() error +} diff --git a/rpl/store_test.go b/rpl/store_test.go new file mode 100644 index 0000000..ddb43f0 --- /dev/null +++ b/rpl/store_test.go @@ -0,0 +1,189 @@ +package rpl + +import ( + "io/ioutil" + "os" + "testing" + "time" +) + +func TestGoLevelDBStore(t *testing.T) { + // Create a test dir + dir, err := ioutil.TempDir("", "wal") + if err != nil { + t.Fatalf("err: %v ", err) + } + defer os.RemoveAll(dir) + + // New level + l, err := NewGoLevelDBStore(dir) + if err != nil { + t.Fatalf("err: %v ", err) + } + defer l.Close() + + testLogs(t, l) +} + +func testLogs(t *testing.T, l LogStore) { + // Should be no first index + idx, err := l.FirstID() + if err != nil { + t.Fatalf("err: %v ", err) + } + if idx != 0 { + t.Fatalf("bad idx: %d", idx) + } + + // Should be no last index + idx, err = l.LastID() + if err != nil { + t.Fatalf("err: %v ", err) + } + if idx != 0 { + t.Fatalf("bad idx: %d", idx) + } + + // Try a filed fetch + var out Log + if err := l.GetLog(10, &out); err.Error() != "log not found" { + t.Fatalf("err: %v ", err) + } + + // Write out a log + log := Log{ + ID: 1, + Data: []byte("first"), + } + for i := 1; i <= 10; i++ { + log.ID = uint64(i) + if err := l.StoreLog(&log); err != nil { + t.Fatalf("err: %v", err) + } + } + + // Attempt to write multiple logs + var logs []*Log + for i := 11; i <= 20; i++ { + nl := &Log{ + ID: uint64(i), + Data: []byte("first"), + } + logs = append(logs, nl) + } + if err := l.StoreLogs(logs); err != nil { + t.Fatalf("err: %v", err) + } + + // Try to fetch + if err := l.GetLog(10, &out); err != nil { + t.Fatalf("err: %v ", err) + } + + // Try to fetch + if err := l.GetLog(20, &out); err != nil { + t.Fatalf("err: %v ", err) + } + + // Check the lowest index + idx, err = l.FirstID() + if err != nil { + t.Fatalf("err: %v ", err) + } + if idx != 1 { + t.Fatalf("bad idx: %d", idx) + } + + // Check the highest index + idx, err = l.LastID() + if err != nil { + t.Fatalf("err: %v ", err) + } + if idx != 20 { + t.Fatalf("bad idx: %d", idx) + } + + // Delete a suffix + if err := l.Purge(5); err != nil { + t.Fatalf("err: %v ", err) + } + + // Verify they are all deleted + for i := 1; i <= 5; i++ { + if err := l.GetLog(uint64(i), &out); err != ErrLogNotFound { + t.Fatalf("err: %v ", err) + } + } + + // Index should be one + idx, err = l.FirstID() + if err != nil { + t.Fatalf("err: %v ", err) + } + if idx != 6 { + t.Fatalf("bad idx: %d", idx) + } + idx, err = l.LastID() + if err != nil { + t.Fatalf("err: %v ", err) + } + if idx != 20 { + t.Fatalf("bad idx: %d", idx) + } + + // Should not be able to fetch + if err := l.GetLog(5, &out); err != ErrLogNotFound { + t.Fatalf("err: %v ", err) + } + + if err := l.Clear(); err != nil { + t.Fatal(err) + } + + idx, err = l.FirstID() + if err != nil { + t.Fatalf("err: %v ", err) + } + if idx != 0 { + t.Fatalf("bad idx: %d", idx) + } + + idx, err = l.LastID() + if err != nil { + t.Fatalf("err: %v ", err) + } + if idx != 0 { + t.Fatalf("bad idx: %d", idx) + } + + now := uint32(time.Now().Unix()) + logs = []*Log{} + for i := 1; i <= 20; i++ { + nl := &Log{ + ID: uint64(i), + CreateTime: now - 20, + Data: []byte("first"), + } + logs = append(logs, nl) + } + + if err := l.PurgeExpired(1); err != nil { + t.Fatal(err) + } + + idx, err = l.FirstID() + if err != nil { + t.Fatalf("err: %v ", err) + } + if idx != 0 { + t.Fatalf("bad idx: %d", idx) + } + + idx, err = l.LastID() + if err != nil { + t.Fatalf("err: %v ", err) + } + if idx != 0 { + t.Fatalf("bad idx: %d", idx) + } +} diff --git a/server/accesslog.go b/server/accesslog.go index 9e517a8..4c41f1e 100644 --- a/server/accesslog.go +++ b/server/accesslog.go @@ -1,7 +1,7 @@ package server import ( - "github.com/siddontang/go-log/log" + "github.com/siddontang/go/log" ) const ( diff --git a/server/app.go b/server/app.go index edd65c8..dbf12e5 100644 --- a/server/app.go +++ b/server/app.go @@ -7,6 +7,7 @@ import ( "net/http" "path" "strings" + "sync" ) type App struct { @@ -29,6 +30,10 @@ type App struct { info *info s *script + + // handle slaves + slock sync.Mutex + slaves map[*client]struct{} } func netType(s string) string { @@ -53,6 +58,8 @@ func NewApp(cfg *config.Config) (*App, error) { app.cfg = cfg + app.slaves = make(map[*client]struct{}) + var err error if app.info, err = newInfo(app); err != nil { @@ -81,7 +88,13 @@ func NewApp(cfg *config.Config) (*App, error) { } } - if app.ldb, err = ledis.Open(cfg); err != nil { + flag := ledis.RDWRMode + if len(app.cfg.SlaveOf) > 0 { + //slave must readonly + flag = ledis.ROnlyMode + } + + if app.ldb, err = ledis.Open2(cfg, flag); err != nil { return nil, err } @@ -89,6 +102,8 @@ func NewApp(cfg *config.Config) (*App, error) { app.openScript() + app.ldb.AddNewLogEventHandler(app.publishNewLog) + return app, nil } @@ -120,7 +135,7 @@ func (app *App) Close() { func (app *App) Run() { if len(app.cfg.SlaveOf) > 0 { - app.slaveof(app.cfg.SlaveOf) + app.slaveof(app.cfg.SlaveOf, false) } go app.httpServe() diff --git a/server/client.go b/server/client.go index 27e08b1..ef9de76 100644 --- a/server/client.go +++ b/server/client.go @@ -43,6 +43,11 @@ type responseWriter interface { flush() } +type syncAck struct { + id uint64 + ch chan uint64 +} + type client struct { app *App ldb *ledis.Ledis @@ -55,14 +60,18 @@ type client struct { resp responseWriter - syncBuf bytes.Buffer - compressBuf []byte + syncBuf bytes.Buffer + + lastLogID uint64 + + ack *syncAck reqErr chan error buf bytes.Buffer - tx *ledis.Tx + tx *ledis.Tx + script *ledis.Multi } @@ -73,7 +82,6 @@ func newClient(app *App) *client { c.ldb = app.ldb c.db, _ = app.ldb.Select(0) //use default db - c.compressBuf = []byte{} c.reqErr = make(chan error) return c diff --git a/server/client_http.go b/server/client_http.go index 0db0843..057ba6b 100644 --- a/server/client_http.go +++ b/server/client_http.go @@ -3,8 +3,9 @@ package server import ( "encoding/json" "fmt" - "github.com/siddontang/go-bson/bson" - "github.com/siddontang/go-log/log" + "github.com/siddontang/go/bson" + "github.com/siddontang/go/hack" + "github.com/siddontang/go/log" "github.com/siddontang/ledisdb/ledis" "github.com/ugorji/go/codec" "io" @@ -154,7 +155,7 @@ func (w *httpWriter) writeBulk(b []byte) { if b == nil { w.genericWrite(nil) } else { - w.genericWrite(ledis.String(b)) + w.genericWrite(hack.String(b)) } } @@ -168,7 +169,7 @@ func (w *httpWriter) writeSliceArray(lst [][]byte) { if elem == nil { arr[i] = nil } else { - arr[i] = ledis.String(elem) + arr[i] = hack.String(elem) } } w.genericWrite(arr) @@ -177,7 +178,7 @@ func (w *httpWriter) writeSliceArray(lst [][]byte) { func (w *httpWriter) writeFVPairArray(lst []ledis.FVPair) { m := make(map[string]string) for _, elem := range lst { - m[ledis.String(elem.Field)] = ledis.String(elem.Value) + m[hack.String(elem.Field)] = hack.String(elem.Value) } w.genericWrite(m) } @@ -187,13 +188,13 @@ func (w *httpWriter) writeScorePairArray(lst []ledis.ScorePair, withScores bool) if withScores { arr = make([]string, 2*len(lst)) for i, data := range lst { - arr[2*i] = ledis.String(data.Member) + arr[2*i] = hack.String(data.Member) arr[2*i+1] = strconv.FormatInt(data.Score, 10) } } else { arr = make([]string, len(lst)) for i, data := range lst { - arr[i] = ledis.String(data.Member) + arr[i] = hack.String(data.Member) } } w.genericWrite(arr) diff --git a/server/client_resp.go b/server/client_resp.go index e8fb1ff..078a02c 100644 --- a/server/client_resp.go +++ b/server/client_resp.go @@ -3,7 +3,9 @@ package server import ( "bufio" "errors" - "github.com/siddontang/go-log/log" + "github.com/siddontang/go/hack" + "github.com/siddontang/go/log" + "github.com/siddontang/go/num" "github.com/siddontang/ledisdb/ledis" "io" "net" @@ -57,6 +59,8 @@ func (c *respClient) run() { c.tx.Rollback() c.tx = nil } + + c.app.removeSlave(c.client) }() for { @@ -83,7 +87,7 @@ func (c *respClient) readRequest() ([][]byte, error) { } var nparams int - if nparams, err = strconv.Atoi(ledis.String(l[1:])); err != nil { + if nparams, err = strconv.Atoi(hack.String(l[1:])); err != nil { return nil, err } else if nparams <= 0 { return nil, errReadRequest @@ -100,7 +104,7 @@ func (c *respClient) readRequest() ([][]byte, error) { return nil, errReadRequest } else if l[0] == '$' { //handle resp string - if n, err = strconv.Atoi(ledis.String(l[1:])); err != nil { + if n, err = strconv.Atoi(hack.String(l[1:])); err != nil { return nil, err } else if n == -1 { req = append(req, nil) @@ -133,7 +137,7 @@ func (c *respClient) handleRequest(reqData [][]byte) { c.cmd = "" c.args = reqData[0:0] } else { - c.cmd = strings.ToLower(ledis.String(reqData[0])) + c.cmd = strings.ToLower(hack.String(reqData[0])) c.args = reqData[1:] } if c.cmd == "quit" { @@ -157,23 +161,23 @@ func newWriterRESP(conn net.Conn) *respWriter { } func (w *respWriter) writeError(err error) { - w.buff.Write(ledis.Slice("-ERR")) + w.buff.Write(hack.Slice("-ERR")) if err != nil { w.buff.WriteByte(' ') - w.buff.Write(ledis.Slice(err.Error())) + w.buff.Write(hack.Slice(err.Error())) } w.buff.Write(Delims) } func (w *respWriter) writeStatus(status string) { w.buff.WriteByte('+') - w.buff.Write(ledis.Slice(status)) + w.buff.Write(hack.Slice(status)) w.buff.Write(Delims) } func (w *respWriter) writeInteger(n int64) { w.buff.WriteByte(':') - w.buff.Write(ledis.StrPutInt64(n)) + w.buff.Write(num.FormatInt64ToSlice(n)) w.buff.Write(Delims) } @@ -182,7 +186,7 @@ func (w *respWriter) writeBulk(b []byte) { if b == nil { w.buff.Write(NullBulk) } else { - w.buff.Write(ledis.Slice(strconv.Itoa(len(b)))) + w.buff.Write(hack.Slice(strconv.Itoa(len(b)))) w.buff.Write(Delims) w.buff.Write(b) } @@ -196,7 +200,7 @@ func (w *respWriter) writeArray(lst []interface{}) { w.buff.Write(NullArray) w.buff.Write(Delims) } else { - w.buff.Write(ledis.Slice(strconv.Itoa(len(lst)))) + w.buff.Write(hack.Slice(strconv.Itoa(len(lst)))) w.buff.Write(Delims) for i := 0; i < len(lst); i++ { @@ -224,7 +228,7 @@ func (w *respWriter) writeSliceArray(lst [][]byte) { w.buff.Write(NullArray) w.buff.Write(Delims) } else { - w.buff.Write(ledis.Slice(strconv.Itoa(len(lst)))) + w.buff.Write(hack.Slice(strconv.Itoa(len(lst)))) w.buff.Write(Delims) for i := 0; i < len(lst); i++ { @@ -239,7 +243,7 @@ func (w *respWriter) writeFVPairArray(lst []ledis.FVPair) { w.buff.Write(NullArray) w.buff.Write(Delims) } else { - w.buff.Write(ledis.Slice(strconv.Itoa(len(lst) * 2))) + w.buff.Write(hack.Slice(strconv.Itoa(len(lst) * 2))) w.buff.Write(Delims) for i := 0; i < len(lst); i++ { @@ -256,10 +260,10 @@ func (w *respWriter) writeScorePairArray(lst []ledis.ScorePair, withScores bool) w.buff.Write(Delims) } else { if withScores { - w.buff.Write(ledis.Slice(strconv.Itoa(len(lst) * 2))) + w.buff.Write(hack.Slice(strconv.Itoa(len(lst) * 2))) w.buff.Write(Delims) } else { - w.buff.Write(ledis.Slice(strconv.Itoa(len(lst)))) + w.buff.Write(hack.Slice(strconv.Itoa(len(lst)))) w.buff.Write(Delims) } @@ -268,7 +272,7 @@ func (w *respWriter) writeScorePairArray(lst []ledis.ScorePair, withScores bool) w.writeBulk(lst[i].Member) if withScores { - w.writeBulk(ledis.StrPutInt64(lst[i].Score)) + w.writeBulk(num.FormatInt64ToSlice(lst[i].Score)) } } } @@ -276,7 +280,7 @@ func (w *respWriter) writeScorePairArray(lst []ledis.ScorePair, withScores bool) func (w *respWriter) writeBulkFrom(n int64, rb io.Reader) { w.buff.WriteByte('$') - w.buff.Write(ledis.Slice(strconv.FormatInt(n, 10))) + w.buff.Write(hack.Slice(strconv.FormatInt(n, 10))) w.buff.Write(Delims) io.Copy(w.buff, rb) diff --git a/server/cmd_bit.go b/server/cmd_bit.go index 1f83bdc..22d34fe 100644 --- a/server/cmd_bit.go +++ b/server/cmd_bit.go @@ -1,6 +1,8 @@ package server import ( + "github.com/siddontang/go/hack" + "github.com/siddontang/ledisdb/ledis" "strings" ) @@ -173,7 +175,7 @@ func boptCommand(c *client) error { return ErrCmdParams } - opDesc := strings.ToLower(ledis.String(args[0])) + opDesc := strings.ToLower(hack.String(args[0])) dstKey := args[1] srcKeys := args[2:] diff --git a/server/cmd_kv.go b/server/cmd_kv.go index c170601..f7a90d8 100644 --- a/server/cmd_kv.go +++ b/server/cmd_kv.go @@ -1,6 +1,7 @@ package server import ( + "github.com/siddontang/go/hack" "github.com/siddontang/ledisdb/ledis" "strconv" "strings" @@ -292,26 +293,26 @@ func parseScanArgs(c *client) (key []byte, match string, count int, err error) { } if len(args) == 3 { - switch strings.ToLower(ledis.String(args[1])) { + switch strings.ToLower(hack.String(args[1])) { case "match": - match = ledis.String(args[2]) + match = hack.String(args[2]) case "count": - count, err = strconv.Atoi(ledis.String(args[2])) + count, err = strconv.Atoi(hack.String(args[2])) default: err = ErrCmdParams return } } else if len(args) == 5 { - if strings.ToLower(ledis.String(args[1])) != "match" { + if strings.ToLower(hack.String(args[1])) != "match" { err = ErrCmdParams return - } else if strings.ToLower(ledis.String(args[3])) != "count" { + } else if strings.ToLower(hack.String(args[3])) != "count" { err = ErrCmdParams return } - match = ledis.String(args[2]) - count, err = strconv.Atoi(ledis.String(args[4])) + match = hack.String(args[2]) + count, err = strconv.Atoi(hack.String(args[4])) } if count <= 0 { diff --git a/server/cmd_replication.go b/server/cmd_replication.go index ec501f6..aa6ede4 100644 --- a/server/cmd_replication.go +++ b/server/cmd_replication.go @@ -1,9 +1,8 @@ package server import ( - "encoding/binary" "fmt" - "github.com/siddontang/go-snappy/snappy" + "github.com/siddontang/go/hack" "github.com/siddontang/ledisdb/ledis" "io/ioutil" "os" @@ -14,24 +13,32 @@ import ( func slaveofCommand(c *client) error { args := c.args - if len(args) != 2 { + if len(args) != 2 || len(args) != 3 { return ErrCmdParams } masterAddr := "" + restart := false - if strings.ToLower(ledis.String(args[0])) == "no" && - strings.ToLower(ledis.String(args[1])) == "one" { + if strings.ToLower(hack.String(args[0])) == "no" && + strings.ToLower(hack.String(args[1])) == "one" { //stop replication, use master = "" + if len(args) != 2 { + return ErrCmdParams + } } else { - if _, err := strconv.ParseInt(ledis.String(args[1]), 10, 16); err != nil { + if _, err := strconv.ParseInt(hack.String(args[1]), 10, 16); err != nil { return err } masterAddr = fmt.Sprintf("%s:%s", args[0], args[1]) + + if len(args) == 3 && strings.ToLower(hack.String(args[2])) == "restart" { + restart = true + } } - if err := c.app.slaveof(masterAddr); err != nil { + if err := c.app.slaveof(masterAddr, restart); err != nil { return err } @@ -66,55 +73,38 @@ func fullsyncCommand(c *client) error { return nil } -var reserveInfoSpace = make([]byte, 16) - func syncCommand(c *client) error { args := c.args - if len(args) != 2 { + if len(args) != 1 { return ErrCmdParams } - var logIndex int64 - var logPos int64 + var logId uint64 var err error - logIndex, err = ledis.StrInt64(args[0], nil) - if err != nil { + + if logId, err = ledis.StrUint64(args[0], nil); err != nil { return ErrCmdParams } - logPos, err = ledis.StrInt64(args[1], nil) - if err != nil { - return ErrCmdParams + c.lastLogID = logId - 1 + + if c.ack != nil && logId > c.ack.id { + asyncNotifyUint64(c.ack.ch, logId) + c.ack = nil } c.syncBuf.Reset() - //reserve space to write binlog anchor - if _, err := c.syncBuf.Write(reserveInfoSpace); err != nil { - return err - } - - m := &ledis.BinLogAnchor{logIndex, logPos} - - if _, err := c.app.ldb.ReadEventsToTimeout(m, &c.syncBuf, 5); err != nil { + if _, _, err := c.app.ldb.ReadLogsToTimeout(logId, &c.syncBuf, 30); err != nil { return err } else { buf := c.syncBuf.Bytes() - binary.BigEndian.PutUint64(buf[0:], uint64(m.LogFileIndex)) - binary.BigEndian.PutUint64(buf[8:], uint64(m.LogPos)) - - if len(c.compressBuf) < snappy.MaxEncodedLen(len(buf)) { - c.compressBuf = make([]byte, snappy.MaxEncodedLen(len(buf))) - } - - if buf, err = snappy.Encode(c.compressBuf, buf); err != nil { - return err - } - c.resp.writeBulk(buf) } + c.app.addSlave(c) + return nil } diff --git a/server/cmd_replication_test.go b/server/cmd_replication_test.go index 3e7e285..76bf2c2 100644 --- a/server/cmd_replication_test.go +++ b/server/cmd_replication_test.go @@ -1,25 +1,32 @@ package server import ( - "bytes" "fmt" "github.com/siddontang/ledisdb/config" - "github.com/siddontang/ledisdb/store" "os" + "reflect" "testing" "time" ) func checkDataEqual(master *App, slave *App) error { - it := master.ldb.DataDB().RangeLimitIterator(nil, nil, store.RangeClose, 0, -1) - for ; it.Valid(); it.Next() { - key := it.Key() - value := it.Value() + mdb, _ := master.ldb.Select(0) + sdb, _ := slave.ldb.Select(0) - if v, err := slave.ldb.DataDB().Get(key); err != nil { - return err - } else if !bytes.Equal(v, value) { - return fmt.Errorf("replication error %d != %d", len(v), len(value)) + mkeys, _ := mdb.Scan(nil, 100, true, "") + skeys, _ := sdb.Scan(nil, 100, true, "") + + if len(mkeys) != len(skeys) { + return fmt.Errorf("keys number not equal %d != %d", len(mkeys), len(skeys)) + } else if !reflect.DeepEqual(mkeys, skeys) { + return fmt.Errorf("keys not equal") + } else { + for _, k := range mkeys { + v1, _ := mdb.Get(k) + v2, _ := sdb.Get(k) + if !reflect.DeepEqual(v1, v2) { + return fmt.Errorf("value not equal") + } } } @@ -33,8 +40,9 @@ func TestReplication(t *testing.T) { masterCfg := new(config.Config) masterCfg.DataDir = fmt.Sprintf("%s/master", data_dir) masterCfg.Addr = "127.0.0.1:11182" - masterCfg.BinLog.MaxFileSize = 1 * 1024 * 1024 - masterCfg.BinLog.MaxFileNum = 10 + masterCfg.UseReplication = true + masterCfg.Replication.Sync = true + masterCfg.Replication.WaitSyncTime = 5 var master *App var slave *App @@ -49,6 +57,7 @@ func TestReplication(t *testing.T) { slaveCfg.DataDir = fmt.Sprintf("%s/slave", data_dir) slaveCfg.Addr = "127.0.0.1:11183" slaveCfg.SlaveOf = masterCfg.Addr + slaveCfg.UseReplication = true slave, err = NewApp(slaveCfg) if err != nil { @@ -58,54 +67,67 @@ func TestReplication(t *testing.T) { go master.Run() + time.Sleep(1 * time.Second) + go slave.Run() + db, _ := master.ldb.Select(0) value := make([]byte, 10) db.Set([]byte("a"), value) db.Set([]byte("b"), value) - db.HSet([]byte("a"), []byte("1"), value) - db.HSet([]byte("b"), []byte("2"), value) - - go slave.Run() + db.Set([]byte("c"), value) + db.Set([]byte("d"), value) time.Sleep(1 * time.Second) - if err = checkDataEqual(master, slave); err != nil { t.Fatal(err) } db.Set([]byte("a1"), value) db.Set([]byte("b1"), value) - db.HSet([]byte("a1"), []byte("1"), value) - db.HSet([]byte("b1"), []byte("2"), value) + db.Set([]byte("c1"), value) + db.Set([]byte("d1"), value) + + //time.Sleep(1 * time.Second) + slave.ldb.WaitReplication() - time.Sleep(1 * time.Second) if err = checkDataEqual(master, slave); err != nil { t.Fatal(err) } - slave.slaveof("") + slave.slaveof("", false) db.Set([]byte("a2"), value) db.Set([]byte("b2"), value) - db.HSet([]byte("a2"), []byte("1"), value) - db.HSet([]byte("b2"), []byte("2"), value) + db.Set([]byte("c2"), value) + db.Set([]byte("d2"), value) db.Set([]byte("a3"), value) db.Set([]byte("b3"), value) - db.HSet([]byte("a3"), []byte("1"), value) - db.HSet([]byte("b3"), []byte("2"), value) + db.Set([]byte("c3"), value) + db.Set([]byte("d3"), value) if err = checkDataEqual(master, slave); err == nil { t.Fatal("must error") } - slave.slaveof(masterCfg.Addr) + slave.slaveof(masterCfg.Addr, false) + time.Sleep(1 * time.Second) if err = checkDataEqual(master, slave); err != nil { t.Fatal(err) } + slave.tryReSlaveof() + + time.Sleep(1 * time.Second) + + slave.ldb.WaitReplication() + + if err = checkDataEqual(master, slave); err != nil { + t.Fatal(err) + } + } diff --git a/server/cmd_script.go b/server/cmd_script.go index e7d62a4..a35c153 100644 --- a/server/cmd_script.go +++ b/server/cmd_script.go @@ -6,7 +6,8 @@ import ( "crypto/sha1" "encoding/hex" "fmt" - "github.com/siddontang/ledisdb/ledis" + "github.com/siddontang/go/hack" + "github.com/siddontang/ledisdb/lua" "strconv" "strings" @@ -20,7 +21,7 @@ func parseEvalArgs(l *lua.State, c *client) error { args = args[1:] - n, err := strconv.Atoi(ledis.String(args[0])) + n, err := strconv.Atoi(hack.String(args[0])) if err != nil { return err } @@ -72,7 +73,7 @@ func evalGenericCommand(c *client, evalSha1 bool) error { h := sha1.Sum(c.args[0]) key = hex.EncodeToString(h[0:20]) } else { - key = strings.ToLower(ledis.String(c.args[0])) + key = strings.ToLower(hack.String(c.args[0])) } l.GetGlobal(key) @@ -84,7 +85,7 @@ func evalGenericCommand(c *client, evalSha1 bool) error { return fmt.Errorf("missing %s script", key) } - if r := l.LoadString(ledis.String(c.args[0])); r != 0 { + if r := l.LoadString(hack.String(c.args[0])); r != 0 { err := fmt.Errorf("%s", l.ToString(-1)) l.Pop(1) return err @@ -139,7 +140,7 @@ func scriptCommand(c *client) error { return ErrCmdParams } - switch strings.ToLower(ledis.String(args[0])) { + switch strings.ToLower(hack.String(args[0])) { case "load": return scriptLoadCommand(c) case "exists": @@ -164,7 +165,7 @@ func scriptLoadCommand(c *client) error { h := sha1.Sum(c.args[1]) key := hex.EncodeToString(h[0:20]) - if r := l.LoadString(ledis.String(c.args[1])); r != 0 { + if r := l.LoadString(hack.String(c.args[1])); r != 0 { err := fmt.Errorf("%s", l.ToString(-1)) l.Pop(1) return err @@ -175,7 +176,7 @@ func scriptLoadCommand(c *client) error { s.chunks[key] = struct{}{} } - c.resp.writeBulk(ledis.Slice(key)) + c.resp.writeBulk(hack.Slice(key)) return nil } @@ -188,7 +189,7 @@ func scriptExistsCommand(c *client) error { ay := make([]interface{}, len(c.args[1:])) for i, n := range c.args[1:] { - if _, ok := s.chunks[ledis.String(n)]; ok { + if _, ok := s.chunks[hack.String(n)]; ok { ay[i] = int64(1) } else { ay[i] = int64(0) diff --git a/server/cmd_server.go b/server/cmd_server.go new file mode 100644 index 0000000..198003c --- /dev/null +++ b/server/cmd_server.go @@ -0,0 +1,95 @@ +package server + +import ( + "github.com/siddontang/go/hack" + "strconv" + "strings" +) + +func pingCommand(c *client) error { + c.resp.writeStatus(PONG) + return nil +} + +func echoCommand(c *client) error { + if len(c.args) != 1 { + return ErrCmdParams + } + + c.resp.writeBulk(c.args[0]) + return nil +} + +func selectCommand(c *client) error { + if len(c.args) != 1 { + return ErrCmdParams + } + + if index, err := strconv.Atoi(hack.String(c.args[0])); err != nil { + return err + } else { + if c.db.IsInMulti() { + if err := c.script.Select(index); err != nil { + return err + } else { + c.db = c.script.DB + } + } else { + if db, err := c.ldb.Select(index); err != nil { + return err + } else { + c.db = db + } + } + c.resp.writeStatus(OK) + } + + return nil +} + +func infoCommand(c *client) error { + if len(c.args) > 1 { + return ErrCmdParams + } + var section string + if len(c.args) == 1 { + section = strings.ToLower(hack.String(c.args[0])) + } + + buf := c.app.info.Dump(section) + c.resp.writeBulk(buf) + + return nil +} + +func flushallCommand(c *client) error { + err := c.ldb.FlushAll() + if err != nil { + return err + } + + //we will restart the replication from master if possible + c.app.tryReSlaveof() + + c.resp.writeStatus(OK) + return nil +} + +func flushdbCommand(c *client) error { + _, err := c.db.FlushAll() + if err != nil { + return err + } + + c.resp.writeStatus(OK) + return nil +} + +func init() { + register("ping", pingCommand) + register("echo", echoCommand) + register("select", selectCommand) + register("info", infoCommand) + register("flushall", flushallCommand) + register("flushdb", flushdbCommand) +} diff --git a/server/cmd_zset.go b/server/cmd_zset.go index ee55faf..3c5abcb 100644 --- a/server/cmd_zset.go +++ b/server/cmd_zset.go @@ -2,7 +2,10 @@ package server import ( "errors" + "github.com/siddontang/go/hack" + "github.com/siddontang/go/num" "github.com/siddontang/ledisdb/ledis" + "github.com/siddontang/ledisdb/store" "math" "strconv" "strings" @@ -73,7 +76,7 @@ func zscoreCommand(c *client) error { return err } } else { - c.resp.writeBulk(ledis.StrPutInt64(s)) + c.resp.writeBulk(num.FormatInt64ToSlice(s)) } return nil @@ -110,14 +113,14 @@ func zincrbyCommand(c *client) error { v, err := c.db.ZIncrBy(key, delta, args[2]) if err == nil { - c.resp.writeBulk(ledis.StrPutInt64(v)) + c.resp.writeBulk(num.FormatInt64ToSlice(v)) } return err } func zparseScoreRange(minBuf []byte, maxBuf []byte) (min int64, max int64, err error) { - if strings.ToLower(ledis.String(minBuf)) == "-inf" { + if strings.ToLower(hack.String(minBuf)) == "-inf" { min = math.MinInt64 } else { @@ -148,7 +151,7 @@ func zparseScoreRange(minBuf []byte, maxBuf []byte) (min int64, max int64, err e } } - if strings.ToLower(ledis.String(maxBuf)) == "+inf" { + if strings.ToLower(hack.String(maxBuf)) == "+inf" { max = math.MaxInt64 } else { var ropen = false @@ -289,11 +292,11 @@ func zremrangebyscoreCommand(c *client) error { } func zparseRange(c *client, a1 []byte, a2 []byte) (start int, stop int, err error) { - if start, err = strconv.Atoi(ledis.String(a1)); err != nil { + if start, err = strconv.Atoi(hack.String(a1)); err != nil { return } - if stop, err = strconv.Atoi(ledis.String(a2)); err != nil { + if stop, err = strconv.Atoi(hack.String(a2)); err != nil { return } @@ -320,7 +323,7 @@ func zrangeGeneric(c *client, reverse bool) error { if len(args) != 1 { return ErrCmdParams } - if strings.ToLower(ledis.String(args[0])) == "withscores" { + if strings.ToLower(hack.String(args[0])) == "withscores" { withScores = true } else { return ErrSyntax @@ -370,7 +373,7 @@ func zrangebyscoreGeneric(c *client, reverse bool) error { var withScores bool = false if len(args) > 0 { - if strings.ToLower(ledis.String(args[0])) == "withscores" { + if strings.ToLower(hack.String(args[0])) == "withscores" { withScores = true args = args[1:] } @@ -384,15 +387,15 @@ func zrangebyscoreGeneric(c *client, reverse bool) error { return ErrCmdParams } - if strings.ToLower(ledis.String(args[0])) != "limit" { + if strings.ToLower(hack.String(args[0])) != "limit" { return ErrSyntax } - if offset, err = strconv.Atoi(ledis.String(args[1])); err != nil { + if offset, err = strconv.Atoi(hack.String(args[1])); err != nil { return ErrValue } - if count, err = strconv.Atoi(ledis.String(args[2])); err != nil { + if count, err = strconv.Atoi(hack.String(args[2])); err != nil { return ErrValue } } @@ -523,7 +526,7 @@ func zpersistCommand(c *client) error { func zparseZsetoptStore(args [][]byte) (destKey []byte, srcKeys [][]byte, weights []int64, aggregate byte, err error) { destKey = args[0] - nKeys, err := strconv.Atoi(ledis.String(args[1])) + nKeys, err := strconv.Atoi(hack.String(args[1])) if err != nil { err = ErrValue return @@ -542,7 +545,7 @@ func zparseZsetoptStore(args [][]byte) (destKey []byte, srcKeys [][]byte, weight var aggregateFlag = false for len(args) > 0 { - if strings.ToLower(ledis.String(args[0])) == "weights" { + if strings.ToLower(hack.String(args[0])) == "weights" { if weightsFlag { err = ErrSyntax return @@ -565,7 +568,7 @@ func zparseZsetoptStore(args [][]byte) (destKey []byte, srcKeys [][]byte, weight weightsFlag = true - } else if strings.ToLower(ledis.String(args[0])) == "aggregate" { + } else if strings.ToLower(hack.String(args[0])) == "aggregate" { if aggregateFlag { err = ErrSyntax return @@ -575,11 +578,11 @@ func zparseZsetoptStore(args [][]byte) (destKey []byte, srcKeys [][]byte, weight return } - if strings.ToLower(ledis.String(args[1])) == "sum" { + if strings.ToLower(hack.String(args[1])) == "sum" { aggregate = ledis.AggregateSum - } else if strings.ToLower(ledis.String(args[1])) == "min" { + } else if strings.ToLower(hack.String(args[1])) == "min" { aggregate = ledis.AggregateMin - } else if strings.ToLower(ledis.String(args[1])) == "max" { + } else if strings.ToLower(hack.String(args[1])) == "max" { aggregate = ledis.AggregateMax } else { err = ErrSyntax @@ -659,6 +662,128 @@ func zxscanCommand(c *client) error { return nil } +func zparseMemberRange(minBuf []byte, maxBuf []byte) (min []byte, max []byte, rangeType uint8, err error) { + rangeType = store.RangeClose + if strings.ToLower(hack.String(minBuf)) == "-" { + min = nil + } else { + if len(minBuf) == 0 { + err = ErrCmdParams + return + } + + if minBuf[0] == '(' { + rangeType |= store.RangeLOpen + min = minBuf[1:] + } else if minBuf[0] == '[' { + min = minBuf[1:] + } else { + err = ErrCmdParams + return + } + } + + if strings.ToLower(hack.String(maxBuf)) == "+" { + max = nil + } else { + if len(maxBuf) == 0 { + err = ErrCmdParams + return + } + if maxBuf[0] == '(' { + rangeType |= store.RangeROpen + max = maxBuf[1:] + } else if maxBuf[0] == '[' { + max = maxBuf[1:] + } else { + err = ErrCmdParams + return + } + } + + return +} + +func zrangebylexCommand(c *client) error { + args := c.args + if len(args) != 3 && len(args) != 6 { + return ErrCmdParams + } + + min, max, rangeType, err := zparseMemberRange(args[1], args[2]) + if err != nil { + return err + } + + var offset int = 0 + var count int = -1 + + if len(args) == 6 { + if strings.ToLower(hack.String(args[3])) != "limit" { + return ErrSyntax + } + + if offset, err = strconv.Atoi(hack.String(args[4])); err != nil { + return ErrValue + } + + if count, err = strconv.Atoi(hack.String(args[5])); err != nil { + return ErrValue + } + } + + key := args[0] + if ay, err := c.db.ZRangeByLex(key, min, max, rangeType, offset, count); err != nil { + return err + } else { + c.resp.writeSliceArray(ay) + } + + return nil +} + +func zremrangebylexCommand(c *client) error { + args := c.args + if len(args) != 3 { + return ErrCmdParams + } + + min, max, rangeType, err := zparseMemberRange(args[1], args[2]) + if err != nil { + return err + } + + key := args[0] + if n, err := c.db.ZRemRangeByLex(key, min, max, rangeType); err != nil { + return err + } else { + c.resp.writeInteger(n) + } + + return nil +} + +func zlexcountCommand(c *client) error { + args := c.args + if len(args) != 3 { + return ErrCmdParams + } + + min, max, rangeType, err := zparseMemberRange(args[1], args[2]) + if err != nil { + return err + } + + key := args[0] + if n, err := c.db.ZLexCount(key, min, max, rangeType); err != nil { + return err + } else { + c.resp.writeInteger(n) + } + + return nil +} + func init() { register("zadd", zaddCommand) register("zcard", zcardCommand) @@ -678,6 +803,10 @@ func init() { register("zunionstore", zunionstoreCommand) register("zinterstore", zinterstoreCommand) + register("zrangebylex", zrangebylexCommand) + register("zremrangebylex", zremrangebylexCommand) + register("zlexcount", zlexcountCommand) + //ledisdb special command register("zclear", zclearCommand) diff --git a/server/cmd_zset_test.go b/server/cmd_zset_test.go index 8c74bdc..59411c5 100644 --- a/server/cmd_zset_test.go +++ b/server/cmd_zset_test.go @@ -3,6 +3,7 @@ package server import ( "fmt" "github.com/siddontang/ledisdb/client/go/ledis" + "reflect" "strconv" "testing" ) @@ -737,3 +738,51 @@ func TestZInterStore(t *testing.T) { } } } + +func TestZSetLex(t *testing.T) { + c := getTestConn() + defer c.Close() + + key := []byte("myzlexset") + if _, err := c.Do("zadd", key, + 0, "a", 0, "b", 0, "c", 0, "d", 0, "e", 0, "f", 0, "g"); err != nil { + t.Fatal(err) + } + + if ay, err := ledis.Strings(c.Do("zrangebylex", key, "-", "[c")); err != nil { + t.Fatal(err) + } else if !reflect.DeepEqual(ay, []string{"a", "b", "c"}) { + t.Fatal("must equal") + } + + if ay, err := ledis.Strings(c.Do("zrangebylex", key, "-", "(c")); err != nil { + t.Fatal(err) + } else if !reflect.DeepEqual(ay, []string{"a", "b"}) { + t.Fatal("must equal") + } + + if ay, err := ledis.Strings(c.Do("zrangebylex", key, "[aaa", "(g")); err != nil { + t.Fatal(err) + } else if !reflect.DeepEqual(ay, []string{"b", "c", "d", "e", "f"}) { + t.Fatal("must equal") + } + + if n, err := ledis.Int64(c.Do("zlexcount", key, "-", "(c")); err != nil { + t.Fatal(err) + } else if n != 2 { + t.Fatal(n) + } + + if n, err := ledis.Int64(c.Do("zremrangebylex", key, "[aaa", "(g")); err != nil { + t.Fatal(err) + } else if n != 5 { + t.Fatal(n) + } + + if n, err := ledis.Int64(c.Do("zlexcount", key, "-", "+")); err != nil { + t.Fatal(err) + } else if n != 2 { + t.Fatal(n) + } + +} diff --git a/server/command.go b/server/command.go index 458343b..1c54c90 100644 --- a/server/command.go +++ b/server/command.go @@ -2,8 +2,6 @@ package server import ( "fmt" - "github.com/siddontang/ledisdb/ledis" - "strconv" "strings" ) @@ -18,94 +16,3 @@ func register(name string, f CommandFunc) { regCmds[name] = f } - -func pingCommand(c *client) error { - c.resp.writeStatus(PONG) - return nil -} - -func echoCommand(c *client) error { - if len(c.args) != 1 { - return ErrCmdParams - } - - c.resp.writeBulk(c.args[0]) - return nil -} - -func selectCommand(c *client) error { - if len(c.args) != 1 { - return ErrCmdParams - } - - if index, err := strconv.Atoi(ledis.String(c.args[0])); err != nil { - return err - } else { - if c.db.IsTransaction() { - if err := c.tx.Select(index); err != nil { - return err - } else { - c.db = c.tx.DB - } - } else if c.db.IsInMulti() { - if err := c.script.Select(index); err != nil { - return err - } else { - c.db = c.script.DB - } - } else { - if db, err := c.ldb.Select(index); err != nil { - return err - } else { - c.db = db - } - } - c.resp.writeStatus(OK) - } - - return nil -} - -func infoCommand(c *client) error { - if len(c.args) > 1 { - return ErrSyntax - } - var section string - if len(c.args) == 1 { - section = strings.ToLower(ledis.String(c.args[0])) - } - - buf := c.app.info.Dump(section) - c.resp.writeBulk(buf) - - return nil -} - -func flushallCommand(c *client) error { - err := c.ldb.FlushAll() - if err != nil { - return err - } - - c.resp.writeStatus(OK) - return nil -} - -func flushdbCommand(c *client) error { - _, err := c.db.FlushAll() - if err != nil { - return err - } - - c.resp.writeStatus(OK) - return nil -} - -func init() { - register("ping", pingCommand) - register("echo", echoCommand) - register("select", selectCommand) - register("info", infoCommand) - register("flushall", flushallCommand) - register("flushdb", flushdbCommand) -} diff --git a/server/doc.go b/server/doc.go index d893a22..7dc47ff 100644 --- a/server/doc.go +++ b/server/doc.go @@ -24,7 +24,8 @@ // ledis-cli -p 6381 // ledis 127.0.0.1:6381 > slaveof 127.0.0.1 6380 // -// After you send slaveof command, the slave will start to sync master's binlog and replicate from binlog. +// After you send slaveof command, the slave will start to sync master's write ahead log and replicate from it. +// You must notice that use_replication must be set true if you want to use it. // // HTTP Interface // diff --git a/server/info.go b/server/info.go index cae6a3f..119c6d3 100644 --- a/server/info.go +++ b/server/info.go @@ -81,6 +81,8 @@ func (i *info) Dump(section string) []byte { i.dumpPersistence(buf) case "goroutine": i.dumpGoroutine(buf) + case "replication": + i.dumpReplication(buf) default: buf.WriteString(fmt.Sprintf("# %s\r\n", section)) } @@ -103,6 +105,8 @@ func (i *info) dumpAll(buf *bytes.Buffer) { i.dumpMem(buf) buf.Write(Delims) i.dumpGoroutine(buf) + buf.Write(Delims) + i.dumpReplication(buf) } func (i *info) dumpServer(buf *bytes.Buffer) { @@ -142,6 +146,29 @@ func (i *info) dumpPersistence(buf *bytes.Buffer) { i.dumpPairs(buf, infoPair{"db_name", i.Persistence.DBName}) } +func (i *info) dumpReplication(buf *bytes.Buffer) { + buf.WriteString("# Replication\r\n") + + p := []infoPair{} + slaves := make([]string, 0, len(i.app.slaves)) + for s, _ := range i.app.slaves { + slaves = append(slaves, s.remoteAddr) + } + + p = append(p, infoPair{"readonly", i.app.ldb.IsReadOnly()}) + + if len(slaves) > 0 { + p = append(p, infoPair{"slave", strings.Join(slaves, ",")}) + } + + s, _ := i.app.ldb.ReplicationStat() + p = append(p, infoPair{"last_log_id", s.LastID}) + p = append(p, infoPair{"first_log_id", s.FirstID}) + p = append(p, infoPair{"commit_log_id", s.CommitID}) + + i.dumpPairs(buf, p...) +} + func (i *info) dumpPairs(buf *bytes.Buffer, pairs ...infoPair) { for _, v := range pairs { buf.WriteString(fmt.Sprintf("%s:%v\r\n", v.Key, v.Value)) diff --git a/server/replication.go b/server/replication.go index 445a813..b8b1868 100644 --- a/server/replication.go +++ b/server/replication.go @@ -3,14 +3,13 @@ package server import ( "bufio" "bytes" - "encoding/binary" - "encoding/json" "errors" "fmt" - "github.com/siddontang/go-log/log" - "github.com/siddontang/go-snappy/snappy" + "github.com/siddontang/go/hack" + "github.com/siddontang/go/log" + "github.com/siddontang/go/num" "github.com/siddontang/ledisdb/ledis" - "io/ioutil" + "github.com/siddontang/ledisdb/rpl" "net" "os" "path" @@ -23,52 +22,6 @@ var ( errConnectMaster = errors.New("connect master error") ) -type MasterInfo struct { - Addr string `json:"addr"` - LogFileIndex int64 `json:"log_file_index"` - LogPos int64 `json:"log_pos"` -} - -func (m *MasterInfo) Save(filePath string) error { - data, err := json.Marshal(m) - if err != nil { - return err - } - - filePathBak := fmt.Sprintf("%s.bak", filePath) - - var fd *os.File - fd, err = os.OpenFile(filePathBak, os.O_CREATE|os.O_WRONLY, 0644) - if err != nil { - return err - } - - if _, err = fd.Write(data); err != nil { - fd.Close() - return err - } - - fd.Close() - return os.Rename(filePathBak, filePath) -} - -func (m *MasterInfo) Load(filePath string) error { - data, err := ioutil.ReadFile(filePath) - if err != nil { - if os.IsNotExist(err) { - return nil - } else { - return err - } - } - - if err = json.Unmarshal(data, m); err != nil { - return err - } - - return nil -} - type master struct { sync.Mutex @@ -79,40 +32,24 @@ type master struct { quit chan struct{} - infoName string - - info *MasterInfo + addr string wg sync.WaitGroup syncBuf bytes.Buffer - - compressBuf []byte } func newMaster(app *App) *master { m := new(master) m.app = app - m.infoName = path.Join(m.app.cfg.DataDir, "master.info") - m.quit = make(chan struct{}, 1) - m.compressBuf = make([]byte, 256) - - m.info = new(MasterInfo) - - //if load error, we will start a fullsync later - m.loadInfo() - return m } func (m *master) Close() { - select { - case m.quit <- struct{}{}: - default: - } + ledis.AsyncNotify(m.quit) if m.conn != nil { m.conn.Close() @@ -122,16 +59,8 @@ func (m *master) Close() { m.wg.Wait() } -func (m *master) loadInfo() error { - return m.info.Load(m.infoName) -} - -func (m *master) saveInfo() error { - return m.info.Save(m.infoName) -} - func (m *master) connect() error { - if len(m.info.Addr) == 0 { + if len(m.addr) == 0 { return fmt.Errorf("no assign master addr") } @@ -140,7 +69,7 @@ func (m *master) connect() error { m.conn = nil } - if conn, err := net.Dial("tcp", m.info.Addr); err != nil { + if conn, err := net.Dial("tcp", m.addr); err != nil { return err } else { m.conn = conn @@ -150,43 +79,28 @@ func (m *master) connect() error { return nil } -func (m *master) resetInfo(addr string) { - m.info.Addr = addr - m.info.LogFileIndex = 0 - m.info.LogPos = 0 -} - func (m *master) stopReplication() error { m.Close() - if err := m.saveInfo(); err != nil { - log.Error("save master info error %s", err.Error()) - return err - } - return nil } -func (m *master) startReplication(masterAddr string) error { +func (m *master) startReplication(masterAddr string, restart bool) error { //stop last replcation, if avaliable m.Close() - if masterAddr != m.info.Addr { - m.resetInfo(masterAddr) - if err := m.saveInfo(); err != nil { - log.Error("save master info error %s", err.Error()) - return err - } - } + m.addr = masterAddr m.quit = make(chan struct{}, 1) - go m.runReplication() + m.app.ldb.SetReadOnly(true) + + m.wg.Add(1) + go m.runReplication(restart) return nil } -func (m *master) runReplication() { - m.wg.Add(1) +func (m *master) runReplication(restart bool) { defer m.wg.Done() for { @@ -195,34 +109,27 @@ func (m *master) runReplication() { return default: if err := m.connect(); err != nil { - log.Error("connect master %s error %s, try 2s later", m.info.Addr, err.Error()) + log.Error("connect master %s error %s, try 2s later", m.addr, err.Error()) time.Sleep(2 * time.Second) continue } } - if m.info.LogFileIndex == 0 { - //try a fullsync + if restart { if err := m.fullSync(); err != nil { if m.conn != nil { //if conn == nil, other close the replication, not error - log.Warn("full sync error %s", err.Error()) + log.Error("restart fullsync error %s", err.Error()) } return } - - if m.info.LogFileIndex == 0 { - //master not support binlog, we cannot sync, so stop replication - m.stopReplication() - return - } } for { if err := m.sync(); err != nil { if m.conn != nil { //if conn == nil, other close the replication, not error - log.Warn("sync error %s", err.Error()) + log.Error("sync error %s", err.Error()) } return } @@ -240,11 +147,13 @@ func (m *master) runReplication() { } var ( - fullSyncCmd = []byte("*1\r\n$8\r\nfullsync\r\n") //fullsync - syncCmdFormat = "*3\r\n$4\r\nsync\r\n$%d\r\n%s\r\n$%d\r\n%s\r\n" //sync index pos + fullSyncCmd = []byte("*1\r\n$8\r\nfullsync\r\n") //fullsync + syncCmdFormat = "*2\r\n$4\r\nsync\r\n$%d\r\n%s\r\n" //sync logid ) func (m *master) fullSync() error { + log.Info("begin full sync") + if _, err := m.conn.Write(fullSyncCmd); err != nil { return err } @@ -264,30 +173,38 @@ func (m *master) fullSync() error { return err } - if err = m.app.ldb.FlushAll(); err != nil { - return err - } - - var head *ledis.BinLogAnchor - head, err = m.app.ldb.LoadDumpFile(dumpPath) - - if err != nil { + if _, err = m.app.ldb.LoadDumpFile(dumpPath); err != nil { log.Error("load dump file error %s", err.Error()) return err } - m.info.LogFileIndex = head.LogFileIndex - m.info.LogPos = head.LogPos + return nil +} - return m.saveInfo() +func (m *master) nextSyncLogID() (uint64, error) { + s, err := m.app.ldb.ReplicationStat() + if err != nil { + return 0, err + } + + if s.LastID > s.CommitID { + return s.LastID + 1, nil + } else { + return s.CommitID + 1, nil + } } func (m *master) sync() error { - logIndexStr := strconv.FormatInt(m.info.LogFileIndex, 10) - logPosStr := strconv.FormatInt(m.info.LogPos, 10) + var err error + var syncID uint64 + if syncID, err = m.nextSyncLogID(); err != nil { + return err + } - cmd := ledis.Slice(fmt.Sprintf(syncCmdFormat, len(logIndexStr), - logIndexStr, len(logPosStr), logPosStr)) + logIDStr := strconv.FormatUint(syncID, 10) + + cmd := hack.Slice(fmt.Sprintf(syncCmdFormat, len(logIDStr), + logIDStr)) if _, err := m.conn.Write(cmd); err != nil { return err @@ -295,53 +212,152 @@ func (m *master) sync() error { m.syncBuf.Reset() - err := ReadBulkTo(m.rb, &m.syncBuf) - if err != nil { - return err + if err = ReadBulkTo(m.rb, &m.syncBuf); err != nil { + switch err.Error() { + case ledis.ErrLogMissed.Error(): + return m.fullSync() + case ledis.ErrRplNotSupport.Error(): + m.stopReplication() + return nil + default: + return err + } } - var buf []byte - buf, err = snappy.Decode(m.compressBuf, m.syncBuf.Bytes()) - if err != nil { - return err - } else if len(buf) > len(m.compressBuf) { - m.compressBuf = buf - } + buf := m.syncBuf.Bytes() - if len(buf) < 16 { - return fmt.Errorf("invalid sync data len %d", len(buf)) - } - - m.info.LogFileIndex = int64(binary.BigEndian.Uint64(buf[0:8])) - m.info.LogPos = int64(binary.BigEndian.Uint64(buf[8:16])) - - if m.info.LogFileIndex == 0 { - //master now not support binlog, stop replication - m.stopReplication() + if len(buf) == 0 { return nil - } else if m.info.LogFileIndex == -1 { - //-1 means than binlog index and pos are lost, we must start a full sync instead - return m.fullSync() } - err = m.app.ldb.ReplicateFromData(buf[16:]) - if err != nil { + if err = m.app.ldb.StoreLogsFromData(buf); err != nil { return err } - return m.saveInfo() + return nil } -func (app *App) slaveof(masterAddr string) error { +func (app *App) slaveof(masterAddr string, restart bool) error { app.m.Lock() defer app.m.Unlock() + if !app.ldb.ReplicationUsed() { + return fmt.Errorf("slaveof must enable replication") + } + + app.cfg.SlaveOf = masterAddr + if len(masterAddr) == 0 { - return app.m.stopReplication() + if err := app.m.stopReplication(); err != nil { + return err + } + + app.ldb.SetReadOnly(false) } else { - return app.m.startReplication(masterAddr) + return app.m.startReplication(masterAddr, restart) } return nil } + +func (app *App) tryReSlaveof() error { + app.m.Lock() + defer app.m.Unlock() + + if !app.ldb.ReplicationUsed() { + return nil + } + + if len(app.cfg.SlaveOf) == 0 { + return nil + } else { + return app.m.startReplication(app.cfg.SlaveOf, true) + } +} + +func (app *App) addSlave(c *client) { + app.slock.Lock() + defer app.slock.Unlock() + + app.slaves[c] = struct{}{} +} + +func (app *App) removeSlave(c *client) { + app.slock.Lock() + defer app.slock.Unlock() + + delete(app.slaves, c) + + if c.ack != nil { + asyncNotifyUint64(c.ack.ch, c.lastLogID) + } +} + +func asyncNotifyUint64(ch chan uint64, v uint64) { + select { + case ch <- v: + default: + } +} + +func (app *App) publishNewLog(l *rpl.Log) { + if !app.cfg.Replication.Sync { + //no sync replication, we will do async + return + } + + ss := make([]*client, 0, 4) + app.slock.Lock() + + logId := l.ID + for s, _ := range app.slaves { + if s.lastLogID >= logId { + //slave has already this log + ss = []*client{} + break + } else { + ss = append(ss, s) + } + } + + app.slock.Unlock() + + if len(ss) == 0 { + return + } + + ack := &syncAck{ + logId, make(chan uint64, len(ss)), + } + + for _, s := range ss { + s.ack = ack + } + + total := (len(ss) + 1) / 2 + if app.cfg.Replication.WaitMaxSlaveAcks > 0 { + total = num.MinInt(total, app.cfg.Replication.WaitMaxSlaveAcks) + } + + done := make(chan struct{}, 1) + go func(total int) { + n := 0 + for i := 0; i < len(ss); i++ { + id := <-ack.ch + if id > logId { + n++ + if n >= total { + break + } + } + } + done <- struct{}{} + }(total) + + select { + case <-done: + case <-time.After(time.Duration(app.cfg.Replication.WaitSyncTime) * time.Second): + log.Info("replication wait timeout") + } +} diff --git a/server/script.go b/server/script.go index f8222c4..711b412 100644 --- a/server/script.go +++ b/server/script.go @@ -5,6 +5,8 @@ package server import ( "encoding/hex" "fmt" + "github.com/siddontang/go/hack" + "github.com/siddontang/go/num" "github.com/siddontang/ledisdb/ledis" "github.com/siddontang/ledisdb/lua" "io" @@ -38,7 +40,7 @@ func (w *luaWriter) writeBulk(b []byte) { if b == nil { w.l.PushBoolean(false) } else { - w.l.PushString(ledis.String(b)) + w.l.PushString(hack.String(b)) } } @@ -81,7 +83,7 @@ func (w *luaWriter) writeSliceArray(lst [][]byte) { w.l.CreateTable(len(lst), 0) for i, v := range lst { - w.l.PushString(ledis.String(v)) + w.l.PushString(hack.String(v)) w.l.RawSeti(-2, i+1) } } @@ -94,10 +96,10 @@ func (w *luaWriter) writeFVPairArray(lst []ledis.FVPair) { w.l.CreateTable(len(lst)*2, 0) for i, v := range lst { - w.l.PushString(ledis.String(v.Field)) + w.l.PushString(hack.String(v.Field)) w.l.RawSeti(-2, 2*i+1) - w.l.PushString(ledis.String(v.Value)) + w.l.PushString(hack.String(v.Value)) w.l.RawSeti(-2, 2*i+2) } } @@ -111,16 +113,16 @@ func (w *luaWriter) writeScorePairArray(lst []ledis.ScorePair, withScores bool) if withScores { w.l.CreateTable(len(lst)*2, 0) for i, v := range lst { - w.l.PushString(ledis.String(v.Member)) + w.l.PushString(hack.String(v.Member)) w.l.RawSeti(-2, 2*i+1) - w.l.PushString(ledis.String(ledis.StrPutInt64(v.Score))) + w.l.PushString(hack.String(num.FormatInt64ToSlice(v.Score))) w.l.RawSeti(-2, 2*i+2) } } else { w.l.CreateTable(len(lst), 0) for i, v := range lst { - w.l.PushString(ledis.String(v.Member)) + w.l.PushString(hack.String(v.Member)) w.l.RawSeti(-2, i+1) } } @@ -280,7 +282,7 @@ func luaSha1Hex(l *lua.State) int { } s := l.ToString(1) - s = hex.EncodeToString(ledis.Slice(s)) + s = hex.EncodeToString(hack.Slice(s)) l.PushString(s) return 1 @@ -333,7 +335,7 @@ func luaSetGlobalArray(l *lua.State, name string, ay [][]byte) { l.NewTable() for i := 0; i < len(ay); i++ { - l.PushString(ledis.String(ay[i])) + l.PushString(hack.String(ay[i])) l.RawSeti(-2, i+1) } @@ -348,7 +350,7 @@ func luaReplyToLedisReply(l *lua.State) interface{} { switch l.Type(-1) { case lua.LUA_TSTRING: - return ledis.Slice(l.ToString(-1)) + return hack.Slice(l.ToString(-1)) case lua.LUA_TBOOLEAN: if l.ToBoolean(-1) { return int64(1) diff --git a/server/util.go b/server/util.go index c015b60..abd6536 100644 --- a/server/util.go +++ b/server/util.go @@ -3,7 +3,7 @@ package server import ( "bufio" "errors" - "github.com/siddontang/ledisdb/ledis" + "github.com/siddontang/go/hack" "io" "strconv" ) @@ -36,7 +36,7 @@ func ReadBulkTo(rb *bufio.Reader, w io.Writer) error { } else if l[0] == '$' { var n int //handle resp string - if n, err = strconv.Atoi(ledis.String(l[1:])); err != nil { + if n, err = strconv.Atoi(hack.String(l[1:])); err != nil { return err } else if n == -1 { return nil diff --git a/store/store.go b/store/store.go index aa4b485..2edde30 100644 --- a/store/store.go +++ b/store/store.go @@ -16,7 +16,11 @@ import ( ) func getStorePath(cfg *config.Config) string { - return path.Join(cfg.DataDir, fmt.Sprintf("%s_data", cfg.DBName)) + if len(cfg.DBPath) > 0 { + return cfg.DBPath + } else { + return path.Join(cfg.DataDir, fmt.Sprintf("%s_data", cfg.DBName)) + } } func Open(cfg *config.Config) (*DB, error) { diff --git a/tools/build_config.sh b/tools/build_config.sh index 22b7bf2..c97e13d 100755 --- a/tools/build_config.sh +++ b/tools/build_config.sh @@ -13,6 +13,14 @@ touch $OUTPUT source ./dev.sh +# Test godep install +godep path > /dev/null 2>&1 +if [ "$?" = 0 ]; then + echo "GO=godep go" >> $OUTPUT +else + echo "GO=go" >> $OUTPUT +fi + echo "CGO_CFLAGS=$CGO_CFLAGS" >> $OUTPUT echo "CGO_CXXFLAGS=$CGO_CXXFLAGS" >> $OUTPUT echo "CGO_LDFLAGS=$CGO_LDFLAGS" >> $OUTPUT