redis/cluster.go

460 lines
9.8 KiB
Go
Raw Normal View History

2015-01-24 15:12:48 +03:00
package redis
import (
"math/rand"
"sync"
"sync/atomic"
"time"
2015-12-30 16:53:45 +03:00
2016-04-09 14:52:01 +03:00
"gopkg.in/redis.v4/internal"
"gopkg.in/redis.v4/internal/hashtag"
"gopkg.in/redis.v4/internal/pool"
2015-01-24 15:12:48 +03:00
)
2015-09-12 09:36:03 +03:00
// ClusterClient is a Redis Cluster client representing a pool of zero
// or more underlying connections. It's safe for concurrent use by
// multiple goroutines.
2015-01-24 15:12:48 +03:00
type ClusterClient struct {
commandable
2016-03-17 19:00:47 +03:00
opt *ClusterOptions
slotsMx sync.RWMutex // protects slots and addrs
2015-04-04 16:46:57 +03:00
addrs []string
2015-04-08 12:28:21 +03:00
slots [][]string
2015-03-30 17:10:53 +03:00
2016-03-17 19:00:47 +03:00
clientsMx sync.RWMutex // protects clients and closed
2015-04-04 16:46:57 +03:00
clients map[string]*Client
2015-03-30 17:10:53 +03:00
2016-03-17 19:00:47 +03:00
_closed int32 // atomic
2015-01-24 15:12:48 +03:00
2015-05-01 10:42:58 +03:00
// Reports where slots reloading is in progress.
reloading uint32
2015-01-24 15:12:48 +03:00
}
2015-09-12 09:36:03 +03:00
// NewClusterClient returns a Redis Cluster client as described in
// http://redis.io/topics/cluster-spec.
2015-04-04 16:46:57 +03:00
func NewClusterClient(opt *ClusterOptions) *ClusterClient {
2015-01-24 15:12:48 +03:00
client := &ClusterClient{
2016-03-17 19:00:47 +03:00
opt: opt,
2015-04-08 12:28:50 +03:00
addrs: opt.Addrs,
2015-12-30 16:53:45 +03:00
slots: make([][]string, hashtag.SlotNumber),
2015-04-04 16:46:57 +03:00
clients: make(map[string]*Client),
2015-01-24 15:12:48 +03:00
}
client.commandable.process = client.process
2015-05-01 10:42:58 +03:00
client.reloadSlots()
2015-04-04 16:46:57 +03:00
return client
2015-01-24 15:12:48 +03:00
}
2016-03-17 19:00:47 +03:00
// getClients returns a snapshot of clients for cluster nodes
// this ClusterClient has been working with recently.
// Note that snapshot can contain closed clients.
func (c *ClusterClient) getClients() map[string]*Client {
c.clientsMx.RLock()
clients := make(map[string]*Client, len(c.clients))
for addr, client := range c.clients {
clients[addr] = client
}
c.clientsMx.RUnlock()
return clients
}
2016-05-02 15:54:15 +03:00
func (c *ClusterClient) Watch(fn func(*Tx) error, keys ...string) error {
2015-12-30 16:53:45 +03:00
addr := c.slotMasterAddr(hashtag.Slot(keys[0]))
2015-12-16 17:11:52 +03:00
client, err := c.getClient(addr)
if err != nil {
2016-05-02 15:54:15 +03:00
return err
2015-12-16 17:11:52 +03:00
}
2016-05-02 15:54:15 +03:00
return client.Watch(fn, keys...)
2015-12-16 17:11:52 +03:00
}
2016-01-25 16:57:09 +03:00
// PoolStats returns accumulated connection pool stats.
2016-01-19 19:36:40 +03:00
func (c *ClusterClient) PoolStats() *PoolStats {
acc := PoolStats{}
2016-03-17 19:00:47 +03:00
for _, client := range c.getClients() {
s := client.connPool.Stats()
acc.Requests += s.Requests
acc.Hits += s.Hits
acc.Timeouts += s.Timeouts
acc.TotalConns += s.TotalConns
acc.FreeConns += s.FreeConns
2016-01-19 19:36:40 +03:00
}
return &acc
}
2016-03-17 19:00:47 +03:00
func (c *ClusterClient) closed() bool {
return atomic.LoadInt32(&c._closed) == 1
}
// Close closes the cluster client, releasing any open resources.
//
2015-09-12 09:36:03 +03:00
// It is rare to Close a ClusterClient, as the ClusterClient is meant
// to be long-lived and shared between many goroutines.
2015-01-24 15:12:48 +03:00
func (c *ClusterClient) Close() error {
2016-03-17 19:00:47 +03:00
if !atomic.CompareAndSwapInt32(&c._closed, 0, 1) {
return pool.ErrClosed
}
2016-03-17 19:00:47 +03:00
c.clientsMx.Lock()
c.resetClients()
2016-03-17 19:00:47 +03:00
c.clientsMx.Unlock()
c.setSlots(nil)
2015-04-04 16:46:57 +03:00
return nil
2015-01-24 15:12:48 +03:00
}
2015-04-04 16:46:57 +03:00
// getClient returns a Client for a given address.
2015-03-18 13:41:24 +03:00
func (c *ClusterClient) getClient(addr string) (*Client, error) {
2016-03-17 19:00:47 +03:00
if c.closed() {
return nil, pool.ErrClosed
}
2015-03-18 13:41:24 +03:00
if addr == "" {
return c.randomClient()
}
2015-04-04 16:46:57 +03:00
c.clientsMx.RLock()
client, ok := c.clients[addr]
2016-03-17 19:00:47 +03:00
c.clientsMx.RUnlock()
2015-04-04 16:46:57 +03:00
if ok {
2015-03-18 13:41:24 +03:00
return client, nil
2015-01-24 15:12:48 +03:00
}
2015-04-04 16:46:57 +03:00
c.clientsMx.Lock()
client, ok = c.clients[addr]
2015-01-24 15:12:48 +03:00
if !ok {
opt := c.opt.clientOptions()
opt.Addr = addr
2015-05-02 16:19:22 +03:00
client = NewClient(opt)
2015-04-04 16:46:57 +03:00
c.clients[addr] = client
2015-01-24 15:12:48 +03:00
}
2015-04-04 16:46:57 +03:00
c.clientsMx.Unlock()
2015-03-18 13:41:24 +03:00
return client, nil
}
func (c *ClusterClient) slotAddrs(slot int) []string {
c.slotsMx.RLock()
addrs := c.slots[slot]
c.slotsMx.RUnlock()
return addrs
2015-01-24 15:12:48 +03:00
}
func (c *ClusterClient) slotMasterAddr(slot int) string {
addrs := c.slotAddrs(slot)
if len(addrs) > 0 {
return addrs[0]
}
return ""
}
// randomClient returns a Client for the first live node.
2015-04-04 16:46:57 +03:00
func (c *ClusterClient) randomClient() (client *Client, err error) {
for i := 0; i < 10; i++ {
n := rand.Intn(len(c.addrs))
2015-03-18 13:41:24 +03:00
client, err = c.getClient(c.addrs[n])
if err != nil {
continue
}
err = client.ClusterInfo().Err()
2015-04-04 16:46:57 +03:00
if err == nil {
return client, nil
}
}
return nil, err
}
2015-01-24 15:12:48 +03:00
func (c *ClusterClient) process(cmd Cmder) {
var ask bool
2015-12-30 16:53:45 +03:00
slot := hashtag.Slot(cmd.clusterKey())
2015-01-24 15:12:48 +03:00
addr := c.slotMasterAddr(slot)
2015-03-18 13:41:24 +03:00
client, err := c.getClient(addr)
if err != nil {
cmd.setErr(err)
return
2015-04-04 16:46:57 +03:00
}
for attempt := 0; attempt <= c.opt.getMaxRedirects(); attempt++ {
if attempt > 0 {
cmd.reset()
}
2015-01-24 15:12:48 +03:00
if ask {
2015-04-04 16:46:57 +03:00
pipe := client.Pipeline()
2015-01-24 15:12:48 +03:00
pipe.Process(NewCmd("ASKING"))
pipe.Process(cmd)
_, _ = pipe.Exec()
pipe.Close()
2015-01-24 15:12:48 +03:00
ask = false
} else {
2015-04-04 16:46:57 +03:00
client.Process(cmd)
2015-01-24 15:12:48 +03:00
}
// If there is no (real) error, we are done!
err := cmd.Err()
2016-03-19 17:33:14 +03:00
if err == nil {
2015-01-24 15:12:48 +03:00
return
}
2015-04-07 12:30:06 +03:00
// On network errors try random node.
2016-03-19 17:33:14 +03:00
if shouldRetry(err) {
2015-04-07 12:30:06 +03:00
client, err = c.randomClient()
if err != nil {
return
2015-01-24 15:12:48 +03:00
}
continue
}
2015-03-18 13:41:24 +03:00
var moved bool
var addr string
moved, ask, addr = isMovedError(err)
if moved || ask {
if moved && c.slotMasterAddr(slot) != addr {
2015-05-01 10:42:58 +03:00
c.lazyReloadSlots()
2015-03-18 13:41:24 +03:00
}
client, err = c.getClient(addr)
if err != nil {
return
}
continue
2015-01-24 15:12:48 +03:00
}
2015-03-18 13:41:24 +03:00
break
2015-01-24 15:12:48 +03:00
}
}
2015-04-04 16:46:57 +03:00
// Closes all clients and returns last error if there are any.
2015-12-22 12:44:49 +03:00
func (c *ClusterClient) resetClients() (retErr error) {
2015-04-04 16:46:57 +03:00
for addr, client := range c.clients {
2015-12-22 12:44:49 +03:00
if err := client.Close(); err != nil && retErr == nil {
retErr = err
2015-04-04 16:46:57 +03:00
}
delete(c.clients, addr)
2015-01-24 15:12:48 +03:00
}
2015-12-22 12:44:49 +03:00
return retErr
2015-04-04 16:46:57 +03:00
}
2015-01-24 15:12:48 +03:00
func (c *ClusterClient) setSlots(slots []ClusterSlot) {
2015-03-30 17:53:28 +03:00
c.slotsMx.Lock()
2015-01-24 15:12:48 +03:00
2015-04-07 12:30:06 +03:00
seen := make(map[string]struct{})
2015-04-08 12:28:21 +03:00
for _, addr := range c.addrs {
seen[addr] = struct{}{}
}
2015-12-30 16:53:45 +03:00
for i := 0; i < hashtag.SlotNumber; i++ {
c.slots[i] = c.slots[i][:0]
}
for _, slot := range slots {
var addrs []string
for _, node := range slot.Nodes {
addrs = append(addrs, node.Addr)
2015-04-07 12:30:06 +03:00
}
for i := slot.Start; i <= slot.End; i++ {
c.slots[i] = addrs
}
for _, node := range slot.Nodes {
if _, ok := seen[node.Addr]; !ok {
c.addrs = append(c.addrs, node.Addr)
seen[node.Addr] = struct{}{}
2015-04-08 12:28:21 +03:00
}
2015-01-24 15:12:48 +03:00
}
}
2015-04-04 16:46:57 +03:00
c.slotsMx.Unlock()
2015-01-24 15:12:48 +03:00
}
2015-05-01 10:42:58 +03:00
func (c *ClusterClient) reloadSlots() {
defer atomic.StoreUint32(&c.reloading, 0)
2015-01-24 15:12:48 +03:00
2015-04-04 16:46:57 +03:00
client, err := c.randomClient()
if err != nil {
2016-04-09 14:52:01 +03:00
internal.Logf("randomClient failed: %s", err)
2015-05-01 10:42:58 +03:00
return
2015-01-24 15:12:48 +03:00
}
2015-04-04 16:46:57 +03:00
slots, err := client.ClusterSlots().Result()
if err != nil {
2016-04-09 14:52:01 +03:00
internal.Logf("ClusterSlots failed: %s", err)
2015-05-01 10:42:58 +03:00
return
2015-04-04 16:46:57 +03:00
}
c.setSlots(slots)
2015-01-24 15:12:48 +03:00
}
2015-05-01 10:42:58 +03:00
func (c *ClusterClient) lazyReloadSlots() {
if !atomic.CompareAndSwapUint32(&c.reloading, 0, 1) {
return
}
go c.reloadSlots()
2015-01-24 15:12:48 +03:00
}
// reaper closes idle connections to the cluster.
2016-03-17 19:00:47 +03:00
func (c *ClusterClient) reaper(frequency time.Duration) {
ticker := time.NewTicker(frequency)
defer ticker.Stop()
2016-03-17 19:00:47 +03:00
for _ = range ticker.C {
if c.closed() {
break
}
2016-03-17 19:00:47 +03:00
var n int
for _, client := range c.getClients() {
nn, err := client.connPool.(*pool.ConnPool).ReapStaleConns()
if err != nil {
2016-04-09 14:52:01 +03:00
internal.Logf("ReapStaleConns failed: %s", err)
2016-03-17 19:00:47 +03:00
} else {
n += nn
}
}
2016-03-17 19:00:47 +03:00
s := c.PoolStats()
2016-04-09 14:52:01 +03:00
internal.Logf(
2016-03-17 19:00:47 +03:00
"reaper: removed %d stale conns (TotalConns=%d FreeConns=%d Requests=%d Hits=%d Timeouts=%d)",
n, s.TotalConns, s.FreeConns, s.Requests, s.Hits, s.Timeouts,
)
}
}
func (c *ClusterClient) Pipeline() *Pipeline {
pipe := &Pipeline{
exec: c.pipelineExec,
}
pipe.commandable.process = pipe.process
return pipe
}
func (c *ClusterClient) Pipelined(fn func(*Pipeline) error) ([]Cmder, error) {
return c.Pipeline().pipelined(fn)
}
func (c *ClusterClient) pipelineExec(cmds []Cmder) error {
var retErr error
cmdsMap := make(map[string][]Cmder)
for _, cmd := range cmds {
slot := hashtag.Slot(cmd.clusterKey())
addr := c.slotMasterAddr(slot)
cmdsMap[addr] = append(cmdsMap[addr], cmd)
}
for attempt := 0; attempt <= c.opt.getMaxRedirects(); attempt++ {
failedCmds := make(map[string][]Cmder)
for addr, cmds := range cmdsMap {
client, err := c.getClient(addr)
if err != nil {
setCmdsErr(cmds, err)
retErr = err
continue
}
cn, err := client.conn()
if err != nil {
setCmdsErr(cmds, err)
retErr = err
continue
}
failedCmds, err = c.execClusterCmds(cn, cmds, failedCmds)
if err != nil {
retErr = err
}
client.putConn(cn, err, false)
}
cmdsMap = failedCmds
}
return retErr
}
func (c *ClusterClient) execClusterCmds(
cn *pool.Conn, cmds []Cmder, failedCmds map[string][]Cmder,
) (map[string][]Cmder, error) {
if err := writeCmd(cn, cmds...); err != nil {
setCmdsErr(cmds, err)
return failedCmds, err
}
var firstCmdErr error
for i, cmd := range cmds {
err := cmd.readReply(cn)
if err == nil {
continue
}
if isNetworkError(err) {
cmd.reset()
failedCmds[""] = append(failedCmds[""], cmds[i:]...)
break
} else if moved, ask, addr := isMovedError(err); moved {
c.lazyReloadSlots()
cmd.reset()
failedCmds[addr] = append(failedCmds[addr], cmd)
} else if ask {
cmd.reset()
failedCmds[addr] = append(failedCmds[addr], NewCmd("ASKING"), cmd)
} else if firstCmdErr == nil {
firstCmdErr = err
}
}
return failedCmds, firstCmdErr
}
2015-01-24 15:12:48 +03:00
//------------------------------------------------------------------------------
// ClusterOptions are used to configure a cluster client and should be
// passed to NewClusterClient.
2015-01-24 15:12:48 +03:00
type ClusterOptions struct {
// A seed list of host:port addresses of cluster nodes.
2015-01-24 15:12:48 +03:00
Addrs []string
2016-04-06 13:13:03 +03:00
// The maximum number of retries before giving up. Command is retried
// on network errors and MOVED/ASK redirects.
// Default is 16.
MaxRedirects int
// Following options are copied from Options struct.
2015-01-24 15:12:48 +03:00
Password string
DialTimeout time.Duration
ReadTimeout time.Duration
WriteTimeout time.Duration
2015-01-24 15:12:48 +03:00
2016-01-25 16:57:09 +03:00
// PoolSize applies per cluster node and not for the whole cluster.
2016-03-17 19:00:47 +03:00
PoolSize int
PoolTimeout time.Duration
IdleTimeout time.Duration
IdleCheckFrequency time.Duration
2015-01-24 15:12:48 +03:00
}
func (opt *ClusterOptions) getMaxRedirects() int {
if opt.MaxRedirects == -1 {
return 0
}
if opt.MaxRedirects == 0 {
2015-01-24 15:12:48 +03:00
return 16
}
return opt.MaxRedirects
}
func (opt *ClusterOptions) clientOptions() *Options {
return &Options{
Password: opt.Password,
DialTimeout: opt.DialTimeout,
2015-01-24 15:12:48 +03:00
ReadTimeout: opt.ReadTimeout,
WriteTimeout: opt.WriteTimeout,
PoolSize: opt.PoolSize,
PoolTimeout: opt.PoolTimeout,
2015-01-24 15:12:48 +03:00
IdleTimeout: opt.IdleTimeout,
2016-03-17 19:00:47 +03:00
// IdleCheckFrequency is not copied to disable reaper
2015-01-24 15:12:48 +03:00
}
}