2014-09-22 13:50:51 +04:00
|
|
|
package rpl
|
2014-09-15 18:42:25 +04:00
|
|
|
|
|
|
|
import (
|
2014-09-17 13:54:04 +04:00
|
|
|
"fmt"
|
2014-09-24 05:46:36 +04:00
|
|
|
"github.com/siddontang/go/log"
|
2014-11-05 17:35:43 +03:00
|
|
|
"github.com/siddontang/go/num"
|
2014-11-15 16:20:12 +03:00
|
|
|
"github.com/siddontang/ledisdb/config"
|
2014-09-17 13:54:04 +04:00
|
|
|
"io/ioutil"
|
2014-09-15 18:42:25 +04:00
|
|
|
"os"
|
2014-11-06 16:52:18 +03:00
|
|
|
"sort"
|
2014-09-15 18:42:25 +04:00
|
|
|
"sync"
|
2014-11-06 16:52:18 +03:00
|
|
|
"time"
|
2014-09-15 18:42:25 +04:00
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
2014-11-20 12:33:38 +03:00
|
|
|
defaultMaxLogFileSize = int64(256 * 1024 * 1024)
|
2014-10-31 10:40:47 +03:00
|
|
|
|
2014-11-20 12:33:38 +03:00
|
|
|
maxLogFileSize = int64(1024 * 1024 * 1024)
|
2014-11-05 12:34:14 +03:00
|
|
|
|
2014-11-20 12:33:38 +03:00
|
|
|
defaultLogNumInFile = int64(1024 * 1024)
|
2014-09-15 18:42:25 +04:00
|
|
|
)
|
|
|
|
|
2014-09-17 13:54:04 +04:00
|
|
|
/*
|
2014-10-31 10:40:47 +03:00
|
|
|
File Store:
|
2014-11-20 12:33:38 +03:00
|
|
|
00000001.data
|
|
|
|
00000001.meta
|
|
|
|
00000002.data
|
|
|
|
00000002.meta
|
2014-10-31 10:40:47 +03:00
|
|
|
|
2014-11-20 12:33:38 +03:00
|
|
|
data: log1 data | log2 data | magic data
|
2014-10-31 10:40:47 +03:00
|
|
|
|
2014-11-03 06:22:13 +03:00
|
|
|
if data has no magic data, it means that we don't close replication gracefully.
|
|
|
|
so we must repair the log data
|
2014-11-05 12:34:14 +03:00
|
|
|
log data: id (bigendian uint64), create time (bigendian uint32), compression (byte), data len(bigendian uint32), data
|
2014-11-06 12:37:22 +03:00
|
|
|
split data = log0 data + [padding 0] -> file % pagesize() == 0
|
2014-11-05 12:34:14 +03:00
|
|
|
|
2014-11-20 12:33:38 +03:00
|
|
|
meta: log1 offset | log2 offset
|
2014-11-05 12:34:14 +03:00
|
|
|
log offset: bigendian uint32 | bigendian uint32
|
|
|
|
|
2014-11-03 12:53:46 +03:00
|
|
|
//sha1 of github.com/siddontang/ledisdb 20 bytes
|
|
|
|
magic data = "\x1c\x1d\xb8\x88\xff\x9e\x45\x55\x40\xf0\x4c\xda\xe0\xce\x47\xde\x65\x48\x71\x17"
|
2014-11-03 06:22:13 +03:00
|
|
|
|
|
|
|
we must guarantee that the log id is monotonic increment strictly.
|
|
|
|
if log1's id is 1, log2 must be 2
|
2014-09-17 13:54:04 +04:00
|
|
|
*/
|
|
|
|
|
2014-09-15 18:42:25 +04:00
|
|
|
type FileStore struct {
|
2014-09-22 13:50:51 +04:00
|
|
|
LogStore
|
2014-09-15 18:42:25 +04:00
|
|
|
|
2014-11-15 16:20:12 +03:00
|
|
|
cfg *config.Config
|
2014-09-15 18:42:25 +04:00
|
|
|
|
2014-11-06 16:52:18 +03:00
|
|
|
base string
|
2014-09-17 13:54:04 +04:00
|
|
|
|
2014-11-06 16:52:18 +03:00
|
|
|
rm sync.RWMutex
|
|
|
|
wm sync.Mutex
|
2014-09-17 13:54:04 +04:00
|
|
|
|
2014-11-06 16:52:18 +03:00
|
|
|
rs tableReaders
|
|
|
|
w *tableWriter
|
2014-11-13 08:41:07 +03:00
|
|
|
|
|
|
|
quit chan struct{}
|
2014-09-15 18:42:25 +04:00
|
|
|
}
|
|
|
|
|
2014-11-15 16:20:12 +03:00
|
|
|
func NewFileStore(base string, cfg *config.Config) (*FileStore, error) {
|
2014-09-15 18:42:25 +04:00
|
|
|
s := new(FileStore)
|
|
|
|
|
2014-11-13 08:41:07 +03:00
|
|
|
s.quit = make(chan struct{})
|
|
|
|
|
2014-11-06 16:52:18 +03:00
|
|
|
var err error
|
|
|
|
|
|
|
|
if err = os.MkdirAll(base, 0755); err != nil {
|
2014-09-15 18:42:25 +04:00
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2014-11-06 16:52:18 +03:00
|
|
|
s.base = base
|
2014-09-15 18:42:25 +04:00
|
|
|
|
2014-11-15 16:20:12 +03:00
|
|
|
if cfg.Replication.MaxLogFileSize == 0 {
|
|
|
|
cfg.Replication.MaxLogFileSize = defaultMaxLogFileSize
|
2014-11-07 11:35:54 +03:00
|
|
|
}
|
2014-09-15 18:42:25 +04:00
|
|
|
|
2014-11-15 16:20:12 +03:00
|
|
|
cfg.Replication.MaxLogFileSize = num.MinInt64(cfg.Replication.MaxLogFileSize, maxLogFileSize)
|
|
|
|
|
|
|
|
s.cfg = cfg
|
|
|
|
|
2014-11-06 16:52:18 +03:00
|
|
|
if err = s.load(); err != nil {
|
2014-09-17 13:54:04 +04:00
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2014-11-06 16:52:18 +03:00
|
|
|
index := int64(1)
|
|
|
|
if len(s.rs) != 0 {
|
|
|
|
index = s.rs[len(s.rs)-1].index + 1
|
|
|
|
}
|
2014-09-15 18:42:25 +04:00
|
|
|
|
2014-11-20 17:28:08 +03:00
|
|
|
s.w = newTableWriter(s.base, index, cfg.Replication.MaxLogFileSize, cfg.Replication.UseMmap)
|
2014-11-15 16:20:12 +03:00
|
|
|
s.w.SetSyncType(cfg.Replication.SyncLog)
|
2014-11-07 11:35:54 +03:00
|
|
|
|
2014-11-13 08:41:07 +03:00
|
|
|
go s.checkTableReaders()
|
|
|
|
|
2014-11-06 16:52:18 +03:00
|
|
|
return s, nil
|
2014-09-15 18:42:25 +04:00
|
|
|
}
|
|
|
|
|
2014-11-07 11:35:54 +03:00
|
|
|
func (s *FileStore) GetLog(id uint64, l *Log) error {
|
|
|
|
//first search in table writer
|
|
|
|
if err := s.w.GetLog(id, l); err == nil {
|
|
|
|
return nil
|
|
|
|
} else if err != ErrLogNotFound {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
s.rm.RLock()
|
|
|
|
t := s.rs.Search(id)
|
|
|
|
|
|
|
|
if t == nil {
|
|
|
|
s.rm.RUnlock()
|
|
|
|
|
|
|
|
return ErrLogNotFound
|
|
|
|
}
|
|
|
|
|
|
|
|
err := t.GetLog(id, l)
|
|
|
|
s.rm.RUnlock()
|
|
|
|
|
|
|
|
return err
|
2014-09-15 18:42:25 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
func (s *FileStore) FirstID() (uint64, error) {
|
2014-11-07 11:35:54 +03:00
|
|
|
id := uint64(0)
|
|
|
|
|
|
|
|
s.rm.RLock()
|
|
|
|
if len(s.rs) > 0 {
|
|
|
|
id = s.rs[0].first
|
|
|
|
} else {
|
|
|
|
id = 0
|
|
|
|
}
|
|
|
|
s.rm.RUnlock()
|
|
|
|
|
|
|
|
if id > 0 {
|
|
|
|
return id, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
//if id = 0,
|
|
|
|
|
|
|
|
return s.w.First(), nil
|
2014-09-15 18:42:25 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
func (s *FileStore) LastID() (uint64, error) {
|
2014-11-07 11:35:54 +03:00
|
|
|
id := s.w.Last()
|
|
|
|
if id > 0 {
|
|
|
|
return id, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
//if table writer has no last id, we may find in the last table reader
|
|
|
|
|
|
|
|
s.rm.RLock()
|
|
|
|
if len(s.rs) > 0 {
|
|
|
|
id = s.rs[len(s.rs)-1].last
|
|
|
|
}
|
|
|
|
s.rm.RUnlock()
|
|
|
|
|
|
|
|
return id, nil
|
2014-09-15 18:42:25 +04:00
|
|
|
}
|
|
|
|
|
2014-11-06 16:52:18 +03:00
|
|
|
func (s *FileStore) StoreLog(l *Log) error {
|
|
|
|
s.wm.Lock()
|
2014-11-19 05:26:50 +03:00
|
|
|
err := s.storeLog(l)
|
|
|
|
s.wm.Unlock()
|
|
|
|
return err
|
|
|
|
}
|
2014-11-06 16:52:18 +03:00
|
|
|
|
2014-11-19 05:26:50 +03:00
|
|
|
func (s *FileStore) storeLog(l *Log) error {
|
2014-11-06 16:52:18 +03:00
|
|
|
err := s.w.StoreLog(l)
|
|
|
|
if err == nil {
|
|
|
|
return nil
|
|
|
|
} else if err != errTableNeedFlush {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
var r *tableReader
|
2014-11-19 05:26:50 +03:00
|
|
|
r, err = s.w.Flush()
|
|
|
|
|
|
|
|
if err != nil {
|
2014-11-21 05:56:56 +03:00
|
|
|
log.Fatal("write table flush error %s, can not store!!!", err.Error())
|
2014-11-06 16:52:18 +03:00
|
|
|
|
|
|
|
s.w.Close()
|
2014-11-07 11:35:54 +03:00
|
|
|
|
2014-11-06 16:52:18 +03:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2014-11-19 05:26:50 +03:00
|
|
|
s.rm.Lock()
|
2014-11-06 16:52:18 +03:00
|
|
|
s.rs = append(s.rs, r)
|
|
|
|
s.rm.Unlock()
|
2014-09-15 18:42:25 +04:00
|
|
|
|
2014-11-19 05:26:50 +03:00
|
|
|
err = s.w.StoreLog(l)
|
|
|
|
|
|
|
|
return err
|
2014-09-17 13:54:04 +04:00
|
|
|
}
|
|
|
|
|
2014-11-15 15:59:40 +03:00
|
|
|
func (s *FileStore) PurgeExpired(n int64) error {
|
2014-11-06 16:52:18 +03:00
|
|
|
s.rm.Lock()
|
2014-09-15 18:42:25 +04:00
|
|
|
|
2014-11-27 15:51:21 +03:00
|
|
|
var purges []*tableReader
|
2014-09-15 18:42:25 +04:00
|
|
|
|
2014-11-06 16:52:18 +03:00
|
|
|
t := uint32(time.Now().Unix() - int64(n))
|
2014-09-17 13:54:04 +04:00
|
|
|
|
2014-11-07 11:35:54 +03:00
|
|
|
for i, r := range s.rs {
|
|
|
|
if r.lastTime > t {
|
2014-11-27 15:51:21 +03:00
|
|
|
purges = append([]*tableReader{}, s.rs[0:i]...)
|
|
|
|
n := copy(s.rs, s.rs[i:])
|
|
|
|
s.rs = s.rs[0:n]
|
2014-11-07 11:35:54 +03:00
|
|
|
break
|
2014-11-06 16:52:18 +03:00
|
|
|
}
|
|
|
|
}
|
2014-09-17 13:54:04 +04:00
|
|
|
|
2014-11-06 16:52:18 +03:00
|
|
|
s.rm.Unlock()
|
|
|
|
|
2014-11-15 16:20:12 +03:00
|
|
|
s.purgeTableReaders(purges)
|
2014-09-17 13:54:04 +04:00
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2014-11-11 10:20:26 +03:00
|
|
|
func (s *FileStore) Sync() error {
|
|
|
|
return s.w.Sync()
|
|
|
|
}
|
|
|
|
|
2014-11-06 16:52:18 +03:00
|
|
|
func (s *FileStore) Clear() error {
|
2014-11-07 11:35:54 +03:00
|
|
|
s.wm.Lock()
|
|
|
|
s.rm.Lock()
|
|
|
|
|
|
|
|
defer func() {
|
|
|
|
s.rm.Unlock()
|
|
|
|
s.wm.Unlock()
|
|
|
|
}()
|
|
|
|
|
|
|
|
s.w.Close()
|
|
|
|
|
|
|
|
for i := range s.rs {
|
|
|
|
s.rs[i].Close()
|
|
|
|
}
|
|
|
|
|
|
|
|
s.rs = tableReaders{}
|
|
|
|
|
|
|
|
if err := os.RemoveAll(s.base); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := os.MkdirAll(s.base, 0755); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2014-11-20 17:28:08 +03:00
|
|
|
s.w = newTableWriter(s.base, 1, s.cfg.Replication.MaxLogFileSize, s.cfg.Replication.UseMmap)
|
2014-11-07 11:35:54 +03:00
|
|
|
|
2014-11-06 16:52:18 +03:00
|
|
|
return nil
|
2014-09-17 13:54:04 +04:00
|
|
|
}
|
|
|
|
|
2014-11-06 16:52:18 +03:00
|
|
|
func (s *FileStore) Close() error {
|
2014-11-13 08:41:07 +03:00
|
|
|
close(s.quit)
|
|
|
|
|
2014-11-06 16:52:18 +03:00
|
|
|
s.wm.Lock()
|
|
|
|
s.rm.Lock()
|
2014-09-17 13:54:04 +04:00
|
|
|
|
2014-11-07 11:35:54 +03:00
|
|
|
if r, err := s.w.Flush(); err != nil {
|
2014-11-11 10:20:26 +03:00
|
|
|
if err != errNilHandler {
|
|
|
|
log.Error("close err: %s", err.Error())
|
|
|
|
}
|
2014-11-07 11:35:54 +03:00
|
|
|
} else {
|
|
|
|
r.Close()
|
|
|
|
s.w.Close()
|
|
|
|
}
|
|
|
|
|
2014-11-06 16:52:18 +03:00
|
|
|
for i := range s.rs {
|
|
|
|
s.rs[i].Close()
|
2014-09-17 13:54:04 +04:00
|
|
|
}
|
2014-11-20 12:33:38 +03:00
|
|
|
|
2014-11-07 11:35:54 +03:00
|
|
|
s.rs = tableReaders{}
|
2014-11-06 16:52:18 +03:00
|
|
|
|
|
|
|
s.rm.Unlock()
|
2014-11-07 11:35:54 +03:00
|
|
|
s.wm.Unlock()
|
2014-09-17 13:54:04 +04:00
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2014-11-13 08:41:07 +03:00
|
|
|
func (s *FileStore) checkTableReaders() {
|
|
|
|
t := time.NewTicker(60 * time.Second)
|
|
|
|
defer t.Stop()
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case <-t.C:
|
|
|
|
s.rm.Lock()
|
|
|
|
|
|
|
|
for _, r := range s.rs {
|
|
|
|
if !r.Keepalived() {
|
|
|
|
r.Close()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-11-15 16:20:12 +03:00
|
|
|
purges := []*tableReader{}
|
|
|
|
maxNum := s.cfg.Replication.MaxLogFileNum
|
|
|
|
num := len(s.rs)
|
|
|
|
if num > maxNum {
|
|
|
|
purges = s.rs[:num-maxNum]
|
|
|
|
s.rs = s.rs[num-maxNum:]
|
|
|
|
}
|
|
|
|
|
2014-11-13 08:41:07 +03:00
|
|
|
s.rm.Unlock()
|
2014-11-15 16:20:12 +03:00
|
|
|
|
|
|
|
s.purgeTableReaders(purges)
|
|
|
|
|
2014-11-13 08:41:07 +03:00
|
|
|
case <-s.quit:
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-11-15 16:20:12 +03:00
|
|
|
func (s *FileStore) purgeTableReaders(purges []*tableReader) {
|
|
|
|
for _, r := range purges {
|
2014-11-20 12:33:38 +03:00
|
|
|
dataName := fmtTableDataName(r.base, r.index)
|
|
|
|
metaName := fmtTableMetaName(r.base, r.index)
|
2014-11-15 16:20:12 +03:00
|
|
|
r.Close()
|
2014-11-20 12:33:38 +03:00
|
|
|
if err := os.Remove(dataName); err != nil {
|
|
|
|
log.Error("purge table data %s err: %s", dataName, err.Error())
|
2014-11-15 16:20:12 +03:00
|
|
|
}
|
2014-11-20 12:33:38 +03:00
|
|
|
if err := os.Remove(metaName); err != nil {
|
|
|
|
log.Error("purge table meta %s err: %s", metaName, err.Error())
|
|
|
|
}
|
|
|
|
|
2014-11-15 16:20:12 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-11-06 16:52:18 +03:00
|
|
|
func (s *FileStore) load() error {
|
|
|
|
fs, err := ioutil.ReadDir(s.base)
|
|
|
|
if err != nil {
|
2014-09-17 13:54:04 +04:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2014-11-15 15:59:40 +03:00
|
|
|
s.rs = make(tableReaders, 0, len(fs))
|
|
|
|
|
2014-11-06 16:52:18 +03:00
|
|
|
var r *tableReader
|
|
|
|
var index int64
|
|
|
|
for _, f := range fs {
|
2014-11-20 12:33:38 +03:00
|
|
|
if _, err := fmt.Sscanf(f.Name(), "%08d.data", &index); err == nil {
|
2014-11-20 17:28:08 +03:00
|
|
|
if r, err = newTableReader(s.base, index, s.cfg.Replication.UseMmap); err != nil {
|
2014-11-06 16:52:18 +03:00
|
|
|
log.Error("load table %s err: %s", f.Name(), err.Error())
|
|
|
|
} else {
|
|
|
|
s.rs = append(s.rs, r)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2014-09-17 13:54:04 +04:00
|
|
|
|
2014-11-06 16:52:18 +03:00
|
|
|
if err := s.rs.check(); err != nil {
|
2014-09-17 13:54:04 +04:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2014-11-06 16:52:18 +03:00
|
|
|
type tableReaders []*tableReader
|
|
|
|
|
|
|
|
func (ts tableReaders) Len() int {
|
|
|
|
return len(ts)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (ts tableReaders) Swap(i, j int) {
|
|
|
|
ts[i], ts[j] = ts[j], ts[i]
|
|
|
|
}
|
|
|
|
|
|
|
|
func (ts tableReaders) Less(i, j int) bool {
|
2014-11-07 11:35:54 +03:00
|
|
|
return ts[i].first < ts[j].first
|
2014-11-06 16:52:18 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
func (ts tableReaders) Search(id uint64) *tableReader {
|
2014-11-07 11:35:54 +03:00
|
|
|
i, j := 0, len(ts)-1
|
2014-11-06 16:52:18 +03:00
|
|
|
|
2014-11-07 11:35:54 +03:00
|
|
|
for i <= j {
|
|
|
|
h := i + (j-i)/2
|
|
|
|
|
|
|
|
if ts[h].first <= id && id <= ts[h].last {
|
|
|
|
return ts[h]
|
|
|
|
} else if ts[h].last < id {
|
|
|
|
i = h + 1
|
|
|
|
} else {
|
|
|
|
j = h - 1
|
|
|
|
}
|
2014-09-17 13:54:04 +04:00
|
|
|
}
|
2014-11-07 11:35:54 +03:00
|
|
|
|
|
|
|
return nil
|
2014-11-06 16:52:18 +03:00
|
|
|
}
|
2014-09-17 13:54:04 +04:00
|
|
|
|
2014-11-06 16:52:18 +03:00
|
|
|
func (ts tableReaders) check() error {
|
|
|
|
if len(ts) == 0 {
|
|
|
|
return nil
|
2014-09-17 13:54:04 +04:00
|
|
|
}
|
|
|
|
|
2014-11-06 16:52:18 +03:00
|
|
|
sort.Sort(ts)
|
|
|
|
|
|
|
|
first := ts[0].first
|
|
|
|
last := ts[0].last
|
|
|
|
index := ts[0].index
|
2014-09-17 13:54:04 +04:00
|
|
|
|
2014-11-06 16:52:18 +03:00
|
|
|
if first == 0 || first > last {
|
2014-11-20 12:33:38 +03:00
|
|
|
return fmt.Errorf("invalid log in table %s", ts[0])
|
2014-09-17 13:54:04 +04:00
|
|
|
}
|
|
|
|
|
2014-11-06 16:52:18 +03:00
|
|
|
for i := 1; i < len(ts); i++ {
|
|
|
|
if ts[i].first <= last {
|
2014-11-20 12:33:38 +03:00
|
|
|
return fmt.Errorf("invalid first log id %d in table %s", ts[i].first, ts[i])
|
2014-11-06 16:52:18 +03:00
|
|
|
}
|
2014-09-17 13:54:04 +04:00
|
|
|
|
2014-11-07 11:35:54 +03:00
|
|
|
if ts[i].index <= index {
|
2014-11-20 12:33:38 +03:00
|
|
|
return fmt.Errorf("invalid index %d in table %s", ts[i].index, ts[i])
|
2014-11-06 16:52:18 +03:00
|
|
|
}
|
2014-09-17 13:54:04 +04:00
|
|
|
|
2014-11-06 16:52:18 +03:00
|
|
|
first = ts[i].first
|
|
|
|
last = ts[i].last
|
|
|
|
index = ts[i].index
|
|
|
|
}
|
|
|
|
return nil
|
2014-09-17 13:54:04 +04:00
|
|
|
}
|