ledisdb/server/replication.go

344 lines
5.9 KiB
Go
Raw Normal View History

package server
2014-06-08 12:43:59 +04:00
import (
2014-06-09 13:23:32 +04:00
"bufio"
"bytes"
"encoding/binary"
2014-06-08 12:43:59 +04:00
"encoding/json"
2014-06-09 13:23:32 +04:00
"errors"
"fmt"
"github.com/siddontang/go-log/log"
2014-07-04 13:55:47 +04:00
"github.com/siddontang/go-snappy/snappy"
2014-06-09 13:23:32 +04:00
"github.com/siddontang/ledisdb/ledis"
2014-06-08 12:43:59 +04:00
"io/ioutil"
2014-06-09 13:23:32 +04:00
"net"
2014-06-08 12:43:59 +04:00
"os"
"path"
2014-06-09 13:23:32 +04:00
"strconv"
"sync"
"time"
2014-06-08 12:43:59 +04:00
)
2014-06-09 13:23:32 +04:00
var (
errConnectMaster = errors.New("connect master error")
)
type master struct {
sync.Mutex
addr string `json:"addr"`
logFileIndex int64 `json:"log_file_index"`
logPos int64 `json:"log_pos"`
c net.Conn
rb *bufio.Reader
app *App
quit chan struct{}
infoName string
infoNameBak string
wg sync.WaitGroup
syncBuf bytes.Buffer
2014-07-04 13:55:47 +04:00
compressBuf []byte
2014-06-09 13:23:32 +04:00
}
func newMaster(app *App) *master {
m := new(master)
m.app = app
m.infoName = path.Join(m.app.cfg.DataDir, "master.info")
m.infoNameBak = fmt.Sprintf("%s.bak", m.infoName)
2014-06-10 06:41:50 +04:00
m.quit = make(chan struct{}, 1)
2014-06-09 13:23:32 +04:00
2014-07-04 13:55:47 +04:00
m.compressBuf = make([]byte, 256)
2014-06-09 13:23:32 +04:00
//if load error, we will start a fullsync later
m.loadInfo()
return m
2014-06-08 12:43:59 +04:00
}
2014-06-09 13:23:32 +04:00
func (m *master) Close() {
2014-06-10 06:41:50 +04:00
select {
case m.quit <- struct{}{}:
default:
}
2014-06-09 13:23:32 +04:00
if m.c != nil {
m.c.Close()
m.c = nil
}
m.wg.Wait()
2014-06-08 12:43:59 +04:00
}
2014-06-09 13:23:32 +04:00
func (m *master) loadInfo() error {
data, err := ioutil.ReadFile(m.infoName)
2014-06-08 12:43:59 +04:00
if err != nil {
if os.IsNotExist(err) {
return nil
} else {
return err
}
}
2014-06-09 13:23:32 +04:00
if err = json.Unmarshal(data, m); err != nil {
2014-06-08 12:43:59 +04:00
return err
}
return nil
}
2014-06-09 13:23:32 +04:00
func (m *master) saveInfo() error {
2014-06-12 17:29:41 +04:00
data, err := json.Marshal(struct {
Addr string `json:"addr"`
LogFileIndex int64 `json:"log_file_index"`
LogPos int64 `json:"log_pos"`
}{
m.addr,
m.logFileIndex,
m.logPos,
})
2014-06-08 12:43:59 +04:00
if err != nil {
return err
}
var fd *os.File
2014-06-09 13:23:32 +04:00
fd, err = os.OpenFile(m.infoNameBak, os.O_CREATE|os.O_WRONLY, os.ModePerm)
2014-06-08 12:43:59 +04:00
if err != nil {
return err
}
if _, err = fd.Write(data); err != nil {
fd.Close()
return err
}
fd.Close()
2014-06-09 13:23:32 +04:00
return os.Rename(m.infoNameBak, m.infoName)
2014-06-08 12:43:59 +04:00
}
2014-06-09 13:23:32 +04:00
func (m *master) connect() error {
if len(m.addr) == 0 {
return fmt.Errorf("no assign master addr")
}
if m.c != nil {
m.c.Close()
m.c = nil
2014-06-08 12:43:59 +04:00
}
2014-06-09 13:23:32 +04:00
if c, err := net.Dial("tcp", m.addr); err != nil {
return err
} else {
m.c = c
m.rb = bufio.NewReaderSize(m.c, 4096)
}
return nil
}
2014-06-09 13:23:32 +04:00
func (m *master) resetInfo(addr string) {
m.addr = addr
m.logFileIndex = 0
m.logPos = 0
2014-06-08 12:43:59 +04:00
}
2014-06-09 13:23:32 +04:00
func (m *master) stopReplication() error {
m.Close()
if err := m.saveInfo(); err != nil {
log.Error("save master info error %s", err.Error())
return err
}
return nil
}
func (m *master) startReplication(masterAddr string) error {
//stop last replcation, if avaliable
m.Close()
if masterAddr != m.addr {
m.resetInfo(masterAddr)
if err := m.saveInfo(); err != nil {
log.Error("save master info error %s", err.Error())
2014-06-08 12:43:59 +04:00
return err
}
2014-06-09 13:23:32 +04:00
}
2014-06-10 06:41:50 +04:00
m.quit = make(chan struct{}, 1)
2014-06-09 13:23:32 +04:00
go m.runReplication()
return nil
}
func (m *master) runReplication() {
m.wg.Add(1)
defer m.wg.Done()
for {
select {
case <-m.quit:
return
default:
if err := m.connect(); err != nil {
log.Error("connect master %s error %s, try 2s later", m.addr, err.Error())
time.Sleep(2 * time.Second)
continue
}
}
if m.logFileIndex == 0 {
//try a fullsync
if err := m.fullSync(); err != nil {
log.Warn("full sync error %s", err.Error())
return
}
if m.logFileIndex == 0 {
//master not support binlog, we cannot sync, so stop replication
m.stopReplication()
return
}
}
for {
2014-06-10 06:41:50 +04:00
for {
lastIndex := m.logFileIndex
lastPos := m.logPos
2014-06-09 13:23:32 +04:00
if err := m.sync(); err != nil {
log.Warn("sync error %s", err.Error())
return
}
2014-06-10 06:41:50 +04:00
if m.logFileIndex == lastIndex && m.logPos == lastPos {
//sync no data, wait 1s and retry
break
}
}
select {
2014-06-09 13:23:32 +04:00
case <-m.quit:
return
2014-06-10 06:41:50 +04:00
case <-time.After(1 * time.Second):
break
2014-06-09 13:23:32 +04:00
}
}
}
return
}
var (
2014-06-10 06:41:50 +04:00
fullSyncCmd = []byte("*1\r\n$8\r\nfullsync\r\n") //fullsync
syncCmdFormat = "*3\r\n$4\r\nsync\r\n$%d\r\n%s\r\n$%d\r\n%s\r\n" //sync index pos
2014-06-09 13:23:32 +04:00
)
2014-06-09 13:23:32 +04:00
func (m *master) fullSync() error {
if _, err := m.c.Write(fullSyncCmd); err != nil {
return err
}
dumpPath := path.Join(m.app.cfg.DataDir, "master.dump")
f, err := os.OpenFile(dumpPath, os.O_CREATE|os.O_WRONLY, os.ModePerm)
if err != nil {
return err
}
defer os.Remove(dumpPath)
err = readBulkTo(m.rb, f)
f.Close()
if err != nil {
log.Error("read dump data error %s", err.Error())
return err
}
if err = m.app.ldb.FlushAll(); err != nil {
return err
}
var head *ledis.MasterInfo
head, err = m.app.ldb.LoadDumpFile(dumpPath)
if err != nil {
log.Error("load dump file error %s", err.Error())
return err
}
m.logFileIndex = head.LogFileIndex
m.logPos = head.LogPos
2014-06-12 17:29:41 +04:00
return m.saveInfo()
2014-06-09 13:23:32 +04:00
}
func (m *master) sync() error {
logIndexStr := strconv.FormatInt(m.logFileIndex, 10)
logPosStr := strconv.FormatInt(m.logPos, 10)
2014-06-10 06:41:50 +04:00
cmd := ledis.Slice(fmt.Sprintf(syncCmdFormat, len(logIndexStr),
logIndexStr, len(logPosStr), logPosStr))
if _, err := m.c.Write(cmd); err != nil {
2014-06-09 13:23:32 +04:00
return err
}
m.syncBuf.Reset()
err := readBulkTo(m.rb, &m.syncBuf)
if err != nil {
return err
}
2014-07-04 13:55:47 +04:00
var buf []byte
buf, err = snappy.Decode(m.compressBuf, m.syncBuf.Bytes())
2014-06-09 13:23:32 +04:00
if err != nil {
return err
2014-07-04 13:55:47 +04:00
} else if len(buf) > len(m.compressBuf) {
m.compressBuf = buf
2014-06-09 13:23:32 +04:00
}
2014-07-04 13:55:47 +04:00
if len(buf) < 16 {
return fmt.Errorf("invalid sync data len %d", len(buf))
2014-06-09 13:23:32 +04:00
}
2014-07-04 13:55:47 +04:00
m.logFileIndex = int64(binary.BigEndian.Uint64(buf[0:8]))
m.logPos = int64(binary.BigEndian.Uint64(buf[8:16]))
2014-06-09 13:23:32 +04:00
if m.logFileIndex == 0 {
//master now not support binlog, stop replication
m.stopReplication()
return nil
} else if m.logFileIndex == -1 {
//-1 means than binlog index and pos are lost, we must start a full sync instead
return m.fullSync()
}
2014-07-04 13:55:47 +04:00
err = m.app.ldb.ReplicateFromData(buf[16:])
2014-06-09 13:23:32 +04:00
if err != nil {
return err
}
2014-06-12 17:29:41 +04:00
return m.saveInfo()
2014-06-09 13:23:32 +04:00
}
func (app *App) slaveof(masterAddr string) error {
app.m.Lock()
defer app.m.Unlock()
if len(masterAddr) == 0 {
return app.m.stopReplication()
} else {
return app.m.startReplication(masterAddr)
2014-06-08 12:43:59 +04:00
}
return nil
}