2018-10-29 01:49:45 +03:00
|
|
|
package server
|
2016-03-05 02:08:16 +03:00
|
|
|
|
|
|
|
import (
|
|
|
|
"crypto/md5"
|
|
|
|
"errors"
|
|
|
|
"fmt"
|
|
|
|
"io"
|
|
|
|
"os"
|
2016-04-01 02:26:36 +03:00
|
|
|
"time"
|
2016-03-05 02:08:16 +03:00
|
|
|
|
2016-04-01 03:42:22 +03:00
|
|
|
"github.com/tidwall/resp"
|
2018-10-11 00:25:40 +03:00
|
|
|
"github.com/tidwall/tile38/internal/log"
|
2016-03-05 02:08:16 +03:00
|
|
|
)
|
|
|
|
|
|
|
|
// checksum performs a simple md5 checksum on the aof file
|
2019-10-30 20:17:59 +03:00
|
|
|
func (s *Server) checksum(pos, size int64) (sum string, err error) {
|
|
|
|
if pos+size > int64(s.aofsz) {
|
2016-03-19 17:16:19 +03:00
|
|
|
return "", io.EOF
|
|
|
|
}
|
2016-03-05 02:08:16 +03:00
|
|
|
var f *os.File
|
2019-10-30 20:17:59 +03:00
|
|
|
f, err = os.Open(s.aof.Name())
|
2016-03-05 02:08:16 +03:00
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
defer f.Close()
|
2016-04-01 02:26:36 +03:00
|
|
|
sumr := md5.New()
|
2016-03-05 02:08:16 +03:00
|
|
|
err = func() error {
|
|
|
|
if size == 0 {
|
2019-10-30 20:17:59 +03:00
|
|
|
n, err := f.Seek(int64(s.aofsz), 0)
|
2016-03-05 02:08:16 +03:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if pos >= n {
|
|
|
|
return io.EOF
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
_, err = f.Seek(pos, 0)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2016-04-01 02:26:36 +03:00
|
|
|
_, err = io.CopyN(sumr, f, size)
|
2016-03-05 02:08:16 +03:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}()
|
|
|
|
if err != nil {
|
|
|
|
if err == io.ErrUnexpectedEOF {
|
|
|
|
err = io.EOF
|
|
|
|
}
|
|
|
|
return "", err
|
|
|
|
}
|
2016-04-01 02:26:36 +03:00
|
|
|
return fmt.Sprintf("%x", sumr.Sum(nil)), nil
|
2016-03-05 02:08:16 +03:00
|
|
|
}
|
|
|
|
|
2018-10-29 01:49:45 +03:00
|
|
|
func connAOFMD5(conn *RESPConn, pos, size int64) (sum string, err error) {
|
2016-04-01 02:26:36 +03:00
|
|
|
v, err := conn.Do("aofmd5", pos, size)
|
2016-03-05 02:08:16 +03:00
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
2016-04-01 02:26:36 +03:00
|
|
|
if v.Error() != nil {
|
2016-04-01 03:42:22 +03:00
|
|
|
errmsg := v.Error().Error()
|
|
|
|
if errmsg == "ERR EOF" || errmsg == "EOF" {
|
|
|
|
return "", io.EOF
|
|
|
|
}
|
2016-04-01 02:26:36 +03:00
|
|
|
return "", v.Error()
|
2016-03-05 02:08:16 +03:00
|
|
|
}
|
2016-04-01 02:26:36 +03:00
|
|
|
sum = v.String()
|
|
|
|
if len(sum) != 32 {
|
2016-03-05 02:08:16 +03:00
|
|
|
return "", errors.New("checksum not ok")
|
|
|
|
}
|
2016-04-01 02:26:36 +03:00
|
|
|
return sum, nil
|
2016-03-05 02:08:16 +03:00
|
|
|
}
|
|
|
|
|
2019-10-30 20:17:59 +03:00
|
|
|
func (s *Server) matchChecksums(conn *RESPConn, pos, size int64) (match bool, err error) {
|
|
|
|
sum, err := s.checksum(pos, size)
|
2016-03-05 02:08:16 +03:00
|
|
|
if err != nil {
|
|
|
|
if err == io.EOF {
|
|
|
|
return false, nil
|
|
|
|
}
|
|
|
|
return false, err
|
|
|
|
}
|
|
|
|
csum, err := connAOFMD5(conn, pos, size)
|
|
|
|
if err != nil {
|
|
|
|
if err == io.EOF {
|
|
|
|
return false, nil
|
|
|
|
}
|
|
|
|
return false, err
|
|
|
|
}
|
|
|
|
return csum == sum, nil
|
|
|
|
}
|
|
|
|
|
2016-04-01 03:42:22 +03:00
|
|
|
// getEndOfLastValuePositionInFile is a very slow operation because it reads the file
|
|
|
|
// backwards on byte at a time. Eek. It seek+read, seek+read, etc.
|
|
|
|
func getEndOfLastValuePositionInFile(fname string, startPos int64) (int64, error) {
|
|
|
|
pos := startPos
|
|
|
|
f, err := os.Open(fname)
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
defer f.Close()
|
|
|
|
readByte := func() (byte, error) {
|
|
|
|
if pos <= 0 {
|
|
|
|
return 0, io.EOF
|
|
|
|
}
|
|
|
|
pos--
|
|
|
|
if _, err := f.Seek(pos, 0); err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
b := make([]byte, 1)
|
|
|
|
if n, err := f.Read(b); err != nil {
|
|
|
|
return 0, err
|
|
|
|
} else if n != 1 {
|
|
|
|
return 0, errors.New("invalid read")
|
|
|
|
}
|
|
|
|
return b[0], nil
|
|
|
|
}
|
|
|
|
for {
|
|
|
|
c, err := readByte()
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
if c == '*' {
|
|
|
|
if _, err := f.Seek(pos, 0); err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
rd := resp.NewReader(f)
|
|
|
|
_, telnet, n, err := rd.ReadMultiBulk()
|
|
|
|
if err != nil || telnet {
|
|
|
|
continue // keep reading backwards
|
|
|
|
}
|
|
|
|
return pos + int64(n), nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-03-05 02:08:16 +03:00
|
|
|
// followCheckSome is not a full checksum. It just "checks some" data.
|
|
|
|
// We will do some various checksums on the leader until we find the correct position to start at.
|
2022-09-22 04:44:09 +03:00
|
|
|
func (s *Server) followCheckSome(addr string, followc int, auth string,
|
|
|
|
) (pos int64, err error) {
|
2022-09-25 01:42:07 +03:00
|
|
|
if s.opts.ShowDebugMessages {
|
2016-03-05 02:08:16 +03:00
|
|
|
log.Debug("follow:", addr, ":check some")
|
|
|
|
}
|
2019-10-30 20:17:59 +03:00
|
|
|
s.mu.Lock()
|
|
|
|
defer s.mu.Unlock()
|
2022-09-27 20:15:31 +03:00
|
|
|
if int(s.followc.Load()) != followc {
|
2016-03-05 02:08:16 +03:00
|
|
|
return 0, errNoLongerFollowing
|
|
|
|
}
|
2019-10-30 20:17:59 +03:00
|
|
|
if s.aofsz < checksumsz {
|
2016-03-05 02:08:16 +03:00
|
|
|
return 0, nil
|
|
|
|
}
|
2016-04-01 02:26:36 +03:00
|
|
|
|
|
|
|
conn, err := DialTimeout(addr, time.Second*2)
|
2016-03-05 02:08:16 +03:00
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
defer conn.Close()
|
2016-04-01 02:26:36 +03:00
|
|
|
|
2022-09-22 04:44:09 +03:00
|
|
|
if auth != "" {
|
|
|
|
if err := s.followDoLeaderAuth(conn, auth); err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-03-05 02:08:16 +03:00
|
|
|
min := int64(0)
|
2019-10-30 20:17:59 +03:00
|
|
|
max := int64(s.aofsz) - checksumsz
|
|
|
|
limit := int64(s.aofsz)
|
|
|
|
match, err := s.matchChecksums(conn, min, checksumsz)
|
2016-03-05 02:08:16 +03:00
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
2016-04-01 03:42:22 +03:00
|
|
|
|
2016-03-05 02:08:16 +03:00
|
|
|
if match {
|
|
|
|
min += checksumsz // bump up the min
|
|
|
|
for {
|
|
|
|
if max < min || max+checksumsz > limit {
|
|
|
|
pos = min
|
|
|
|
break
|
|
|
|
} else {
|
2019-10-30 20:17:59 +03:00
|
|
|
match, err = s.matchChecksums(conn, max, checksumsz)
|
2016-03-05 02:08:16 +03:00
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
if match {
|
|
|
|
min = max + checksumsz
|
|
|
|
} else {
|
|
|
|
limit = max
|
|
|
|
}
|
|
|
|
max = (limit-min)/2 - checksumsz/2 + min // multiply
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
fullpos := pos
|
2019-10-30 20:17:59 +03:00
|
|
|
fname := s.aof.Name()
|
2016-03-05 02:08:16 +03:00
|
|
|
if pos == 0 {
|
2019-10-30 20:17:59 +03:00
|
|
|
s.aof.Close()
|
|
|
|
s.aof, err = os.Create(fname)
|
2016-03-05 02:08:16 +03:00
|
|
|
if err != nil {
|
|
|
|
log.Fatalf("could not recreate aof, possible data loss. %s", err.Error())
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
return 0, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// we want to truncate at a command location
|
|
|
|
// search for nearest command
|
2019-10-30 20:17:59 +03:00
|
|
|
pos, err = getEndOfLastValuePositionInFile(s.aof.Name(), fullpos)
|
2016-03-05 02:08:16 +03:00
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
if pos == fullpos {
|
2022-09-25 01:42:07 +03:00
|
|
|
if s.opts.ShowDebugMessages {
|
2016-03-05 02:08:16 +03:00
|
|
|
log.Debug("follow: aof fully intact")
|
|
|
|
}
|
|
|
|
return pos, nil
|
|
|
|
}
|
|
|
|
log.Warnf("truncating aof to %d", pos)
|
|
|
|
// any errror below are fatal.
|
2019-10-30 20:17:59 +03:00
|
|
|
s.aof.Close()
|
2016-03-05 02:08:16 +03:00
|
|
|
if err := os.Truncate(fname, pos); err != nil {
|
|
|
|
log.Fatalf("could not truncate aof, possible data loss. %s", err.Error())
|
|
|
|
return 0, err
|
|
|
|
}
|
2019-10-30 20:17:59 +03:00
|
|
|
s.aof, err = os.OpenFile(fname, os.O_CREATE|os.O_RDWR, 0600)
|
2016-03-05 02:08:16 +03:00
|
|
|
if err != nil {
|
|
|
|
log.Fatalf("could not create aof, possible data loss. %s", err.Error())
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
// reset the entire system.
|
|
|
|
log.Infof("reloading aof commands")
|
2019-10-30 20:17:59 +03:00
|
|
|
s.reset()
|
|
|
|
if err := s.loadAOF(); err != nil {
|
2016-03-05 02:08:16 +03:00
|
|
|
log.Fatalf("could not reload aof, possible data loss. %s", err.Error())
|
|
|
|
return 0, err
|
|
|
|
}
|
2019-10-30 20:17:59 +03:00
|
|
|
if int64(s.aofsz) != pos {
|
2016-03-05 02:08:16 +03:00
|
|
|
log.Fatalf("aof size mismatch during reload, possible data loss.")
|
|
|
|
return 0, errors.New("?")
|
|
|
|
}
|
|
|
|
return pos, nil
|
|
|
|
}
|