ledisdb/rpl/file_store.go

412 lines
6.9 KiB
Go
Raw Normal View History

2014-09-22 13:50:51 +04:00
package rpl
2014-09-15 18:42:25 +04:00
import (
2014-09-17 13:54:04 +04:00
"fmt"
2014-09-24 05:46:36 +04:00
"github.com/siddontang/go/log"
2014-11-05 17:35:43 +03:00
"github.com/siddontang/go/num"
2014-11-15 16:20:12 +03:00
"github.com/siddontang/ledisdb/config"
2014-09-17 13:54:04 +04:00
"io/ioutil"
2014-09-15 18:42:25 +04:00
"os"
2014-11-06 16:52:18 +03:00
"sort"
2014-09-15 18:42:25 +04:00
"sync"
2014-11-06 16:52:18 +03:00
"time"
2014-09-15 18:42:25 +04:00
)
const (
2014-11-05 17:35:43 +03:00
defaultMaxLogFileSize = int64(1024 * 1024 * 1024)
2014-10-31 10:40:47 +03:00
//why 4G, we can use uint32 as offset, reduce memory useage
2014-11-05 17:35:43 +03:00
maxLogFileSize = int64(uint32(4*1024*1024*1024 - 1))
2014-11-05 12:34:14 +03:00
maxLogNumInFile = uint64(10000000)
2014-09-15 18:42:25 +04:00
)
2014-09-17 13:54:04 +04:00
/*
2014-10-31 10:40:47 +03:00
File Store:
2014-11-05 12:34:14 +03:00
00000001.ldb
00000002.ldb
2014-10-31 10:40:47 +03:00
2014-11-03 12:53:46 +03:00
log: log1 data | log2 data | split data | log1 offset | log 2 offset | offset start pos | offset length | magic data
2014-10-31 10:40:47 +03:00
2014-11-03 12:53:46 +03:00
log id can not be 0, we use here for split data
2014-11-03 06:22:13 +03:00
if data has no magic data, it means that we don't close replication gracefully.
so we must repair the log data
2014-11-05 12:34:14 +03:00
log data: id (bigendian uint64), create time (bigendian uint32), compression (byte), data len(bigendian uint32), data
split data = log0 data + [padding 0] -> file % pagesize() == 0
log0: id 0, create time 0, compression 0, data len 7, data "ledisdb"
2014-11-05 12:34:14 +03:00
log offset: bigendian uint32 | bigendian uint32
offset start pos: bigendian uint64
offset length: bigendian uint32
2014-11-03 12:53:46 +03:00
//sha1 of github.com/siddontang/ledisdb 20 bytes
magic data = "\x1c\x1d\xb8\x88\xff\x9e\x45\x55\x40\xf0\x4c\xda\xe0\xce\x47\xde\x65\x48\x71\x17"
2014-11-03 06:22:13 +03:00
we must guarantee that the log id is monotonic increment strictly.
if log1's id is 1, log2 must be 2
2014-09-17 13:54:04 +04:00
*/
2014-09-15 18:42:25 +04:00
type FileStore struct {
2014-09-22 13:50:51 +04:00
LogStore
2014-09-15 18:42:25 +04:00
2014-11-15 16:20:12 +03:00
cfg *config.Config
2014-09-15 18:42:25 +04:00
2014-11-06 16:52:18 +03:00
base string
2014-09-17 13:54:04 +04:00
2014-11-06 16:52:18 +03:00
rm sync.RWMutex
wm sync.Mutex
2014-09-17 13:54:04 +04:00
2014-11-06 16:52:18 +03:00
rs tableReaders
w *tableWriter
quit chan struct{}
2014-09-15 18:42:25 +04:00
}
2014-11-15 16:20:12 +03:00
func NewFileStore(base string, cfg *config.Config) (*FileStore, error) {
2014-09-15 18:42:25 +04:00
s := new(FileStore)
s.quit = make(chan struct{})
2014-11-06 16:52:18 +03:00
var err error
if err = os.MkdirAll(base, 0755); err != nil {
2014-09-15 18:42:25 +04:00
return nil, err
}
2014-11-06 16:52:18 +03:00
s.base = base
2014-09-15 18:42:25 +04:00
2014-11-15 16:20:12 +03:00
if cfg.Replication.MaxLogFileSize == 0 {
cfg.Replication.MaxLogFileSize = defaultMaxLogFileSize
2014-11-07 11:35:54 +03:00
}
2014-09-15 18:42:25 +04:00
2014-11-15 16:20:12 +03:00
cfg.Replication.MaxLogFileSize = num.MinInt64(cfg.Replication.MaxLogFileSize, maxLogFileSize)
s.cfg = cfg
2014-11-06 16:52:18 +03:00
if err = s.load(); err != nil {
2014-09-17 13:54:04 +04:00
return nil, err
}
2014-11-06 16:52:18 +03:00
index := int64(1)
if len(s.rs) != 0 {
index = s.rs[len(s.rs)-1].index + 1
}
2014-09-15 18:42:25 +04:00
2014-11-15 16:20:12 +03:00
s.w = newTableWriter(s.base, index, cfg.Replication.MaxLogFileSize)
s.w.SetSyncType(cfg.Replication.SyncLog)
2014-11-07 11:35:54 +03:00
go s.checkTableReaders()
2014-11-06 16:52:18 +03:00
return s, nil
2014-09-15 18:42:25 +04:00
}
2014-11-07 11:35:54 +03:00
func (s *FileStore) GetLog(id uint64, l *Log) error {
//first search in table writer
if err := s.w.GetLog(id, l); err == nil {
return nil
} else if err != ErrLogNotFound {
return err
}
s.rm.RLock()
t := s.rs.Search(id)
if t == nil {
s.rm.RUnlock()
return ErrLogNotFound
}
err := t.GetLog(id, l)
s.rm.RUnlock()
return err
2014-09-15 18:42:25 +04:00
}
func (s *FileStore) FirstID() (uint64, error) {
2014-11-07 11:35:54 +03:00
id := uint64(0)
s.rm.RLock()
if len(s.rs) > 0 {
id = s.rs[0].first
} else {
id = 0
}
s.rm.RUnlock()
if id > 0 {
return id, nil
}
//if id = 0,
return s.w.First(), nil
2014-09-15 18:42:25 +04:00
}
func (s *FileStore) LastID() (uint64, error) {
2014-11-07 11:35:54 +03:00
id := s.w.Last()
if id > 0 {
return id, nil
}
//if table writer has no last id, we may find in the last table reader
s.rm.RLock()
if len(s.rs) > 0 {
id = s.rs[len(s.rs)-1].last
}
s.rm.RUnlock()
return id, nil
2014-09-15 18:42:25 +04:00
}
2014-11-06 16:52:18 +03:00
func (s *FileStore) StoreLog(l *Log) error {
s.wm.Lock()
err := s.storeLog(l)
s.wm.Unlock()
return err
}
2014-11-06 16:52:18 +03:00
func (s *FileStore) storeLog(l *Log) error {
2014-11-06 16:52:18 +03:00
err := s.w.StoreLog(l)
if err == nil {
return nil
} else if err != errTableNeedFlush {
return err
}
var r *tableReader
r, err = s.w.Flush()
if err != nil {
2014-11-06 16:52:18 +03:00
log.Error("write table flush error %s, can not store now", err.Error())
s.w.Close()
2014-11-07 11:35:54 +03:00
2014-11-06 16:52:18 +03:00
return err
}
s.rm.Lock()
2014-11-06 16:52:18 +03:00
s.rs = append(s.rs, r)
s.rm.Unlock()
2014-09-15 18:42:25 +04:00
err = s.w.StoreLog(l)
return err
2014-09-17 13:54:04 +04:00
}
2014-11-15 15:59:40 +03:00
func (s *FileStore) PurgeExpired(n int64) error {
2014-11-06 16:52:18 +03:00
s.rm.Lock()
2014-09-15 18:42:25 +04:00
2014-11-06 16:52:18 +03:00
purges := []*tableReader{}
2014-09-15 18:42:25 +04:00
2014-11-06 16:52:18 +03:00
t := uint32(time.Now().Unix() - int64(n))
2014-09-17 13:54:04 +04:00
2014-11-07 11:35:54 +03:00
for i, r := range s.rs {
if r.lastTime > t {
purges = s.rs[0:i]
s.rs = s.rs[i:]
break
2014-11-06 16:52:18 +03:00
}
}
2014-09-17 13:54:04 +04:00
2014-11-06 16:52:18 +03:00
s.rm.Unlock()
2014-11-15 16:20:12 +03:00
s.purgeTableReaders(purges)
2014-09-17 13:54:04 +04:00
return nil
}
2014-11-11 10:20:26 +03:00
func (s *FileStore) Sync() error {
return s.w.Sync()
}
2014-11-06 16:52:18 +03:00
func (s *FileStore) Clear() error {
2014-11-07 11:35:54 +03:00
s.wm.Lock()
s.rm.Lock()
defer func() {
s.rm.Unlock()
s.wm.Unlock()
}()
s.w.Close()
for i := range s.rs {
s.rs[i].Close()
}
s.rs = tableReaders{}
if err := os.RemoveAll(s.base); err != nil {
return err
}
if err := os.MkdirAll(s.base, 0755); err != nil {
return err
}
2014-11-15 16:20:12 +03:00
s.w = newTableWriter(s.base, 1, s.cfg.Replication.MaxLogFileSize)
2014-11-07 11:35:54 +03:00
2014-11-06 16:52:18 +03:00
return nil
2014-09-17 13:54:04 +04:00
}
2014-11-06 16:52:18 +03:00
func (s *FileStore) Close() error {
close(s.quit)
2014-11-06 16:52:18 +03:00
s.wm.Lock()
s.rm.Lock()
2014-09-17 13:54:04 +04:00
2014-11-07 11:35:54 +03:00
if r, err := s.w.Flush(); err != nil {
2014-11-11 10:20:26 +03:00
if err != errNilHandler {
log.Error("close err: %s", err.Error())
}
2014-11-07 11:35:54 +03:00
} else {
r.Close()
s.w.Close()
}
2014-11-06 16:52:18 +03:00
for i := range s.rs {
s.rs[i].Close()
2014-09-17 13:54:04 +04:00
}
2014-11-07 11:35:54 +03:00
s.rs = tableReaders{}
2014-11-06 16:52:18 +03:00
s.rm.Unlock()
2014-11-07 11:35:54 +03:00
s.wm.Unlock()
2014-09-17 13:54:04 +04:00
return nil
}
func (s *FileStore) checkTableReaders() {
t := time.NewTicker(60 * time.Second)
defer t.Stop()
for {
select {
case <-t.C:
s.rm.Lock()
for _, r := range s.rs {
if !r.Keepalived() {
r.Close()
}
}
2014-11-15 16:20:12 +03:00
purges := []*tableReader{}
maxNum := s.cfg.Replication.MaxLogFileNum
num := len(s.rs)
if num > maxNum {
purges = s.rs[:num-maxNum]
s.rs = s.rs[num-maxNum:]
}
s.rm.Unlock()
2014-11-15 16:20:12 +03:00
s.purgeTableReaders(purges)
case <-s.quit:
return
}
}
}
2014-11-15 16:20:12 +03:00
func (s *FileStore) purgeTableReaders(purges []*tableReader) {
for _, r := range purges {
name := r.name
r.Close()
if err := os.Remove(name); err != nil {
log.Error("purge table %s err: %s", name, err.Error())
}
}
}
2014-11-06 16:52:18 +03:00
func (s *FileStore) load() error {
fs, err := ioutil.ReadDir(s.base)
if err != nil {
2014-09-17 13:54:04 +04:00
return err
}
2014-11-15 15:59:40 +03:00
s.rs = make(tableReaders, 0, len(fs))
2014-11-06 16:52:18 +03:00
var r *tableReader
var index int64
for _, f := range fs {
if _, err := fmt.Sscanf(f.Name(), "%08d.ldb", &index); err == nil {
if r, err = newTableReader(s.base, index); err != nil {
log.Error("load table %s err: %s", f.Name(), err.Error())
} else {
s.rs = append(s.rs, r)
}
}
}
2014-09-17 13:54:04 +04:00
2014-11-06 16:52:18 +03:00
if err := s.rs.check(); err != nil {
2014-09-17 13:54:04 +04:00
return err
}
return nil
}
2014-11-06 16:52:18 +03:00
type tableReaders []*tableReader
func (ts tableReaders) Len() int {
return len(ts)
}
func (ts tableReaders) Swap(i, j int) {
ts[i], ts[j] = ts[j], ts[i]
}
func (ts tableReaders) Less(i, j int) bool {
2014-11-07 11:35:54 +03:00
return ts[i].first < ts[j].first
2014-11-06 16:52:18 +03:00
}
func (ts tableReaders) Search(id uint64) *tableReader {
2014-11-07 11:35:54 +03:00
i, j := 0, len(ts)-1
2014-11-06 16:52:18 +03:00
2014-11-07 11:35:54 +03:00
for i <= j {
h := i + (j-i)/2
if ts[h].first <= id && id <= ts[h].last {
return ts[h]
} else if ts[h].last < id {
i = h + 1
} else {
j = h - 1
}
2014-09-17 13:54:04 +04:00
}
2014-11-07 11:35:54 +03:00
return nil
2014-11-06 16:52:18 +03:00
}
2014-09-17 13:54:04 +04:00
2014-11-06 16:52:18 +03:00
func (ts tableReaders) check() error {
if len(ts) == 0 {
return nil
2014-09-17 13:54:04 +04:00
}
2014-11-06 16:52:18 +03:00
sort.Sort(ts)
first := ts[0].first
last := ts[0].last
index := ts[0].index
2014-09-17 13:54:04 +04:00
2014-11-06 16:52:18 +03:00
if first == 0 || first > last {
return fmt.Errorf("invalid log in table %s", ts[0].name)
2014-09-17 13:54:04 +04:00
}
2014-11-06 16:52:18 +03:00
for i := 1; i < len(ts); i++ {
if ts[i].first <= last {
return fmt.Errorf("invalid first log id %d in table %s", ts[i].first, ts[i].name)
}
2014-09-17 13:54:04 +04:00
2014-11-07 11:35:54 +03:00
if ts[i].index <= index {
2014-11-06 16:52:18 +03:00
return fmt.Errorf("invalid index %d in table %s", ts[i].index, ts[i].name)
}
2014-09-17 13:54:04 +04:00
2014-11-06 16:52:18 +03:00
first = ts[i].first
last = ts[i].last
index = ts[i].index
}
return nil
2014-09-17 13:54:04 +04:00
}