forked from mirror/redis
Rework retrying
This commit is contained in:
parent
4598ed0eac
commit
fa7f64f7f2
252
cluster.go
252
cluster.go
|
@ -13,10 +13,10 @@ import (
|
|||
"github.com/go-redis/redis/internal/hashtag"
|
||||
"github.com/go-redis/redis/internal/pool"
|
||||
"github.com/go-redis/redis/internal/proto"
|
||||
"github.com/go-redis/redis/internal/singleflight"
|
||||
)
|
||||
|
||||
var errClusterNoNodes = fmt.Errorf("redis: cluster has no nodes")
|
||||
var errNilClusterState = fmt.Errorf("redis: cannot load cluster slots")
|
||||
|
||||
// ClusterOptions are used to configure a cluster client and should be
|
||||
// passed to NewClusterClient.
|
||||
|
@ -122,8 +122,8 @@ type clusterNode struct {
|
|||
Client *Client
|
||||
|
||||
latency uint32 // atomic
|
||||
generation uint32
|
||||
loading int64 // atomic
|
||||
generation uint32 // atomic
|
||||
loading int64 // atomic
|
||||
}
|
||||
|
||||
func newClusterNode(clOpt *ClusterOptions, addr string) *clusterNode {
|
||||
|
@ -141,6 +141,14 @@ func newClusterNode(clOpt *ClusterOptions, addr string) *clusterNode {
|
|||
return &node
|
||||
}
|
||||
|
||||
func (n *clusterNode) Close() error {
|
||||
return n.Client.Close()
|
||||
}
|
||||
|
||||
func (n *clusterNode) Test() error {
|
||||
return n.Client.ClusterInfo().Err()
|
||||
}
|
||||
|
||||
func (n *clusterNode) updateLatency() {
|
||||
const probes = 10
|
||||
|
||||
|
@ -154,14 +162,6 @@ func (n *clusterNode) updateLatency() {
|
|||
atomic.StoreUint32(&n.latency, latency)
|
||||
}
|
||||
|
||||
func (n *clusterNode) Close() error {
|
||||
return n.Client.Close()
|
||||
}
|
||||
|
||||
func (n *clusterNode) Test() error {
|
||||
return n.Client.ClusterInfo().Err()
|
||||
}
|
||||
|
||||
func (n *clusterNode) Latency() time.Duration {
|
||||
latency := atomic.LoadUint32(&n.latency)
|
||||
return time.Duration(latency) * time.Microsecond
|
||||
|
@ -186,14 +186,16 @@ func (n *clusterNode) Loading() bool {
|
|||
}
|
||||
|
||||
func (n *clusterNode) Generation() uint32 {
|
||||
return n.generation
|
||||
return atomic.LoadUint32(&n.generation)
|
||||
}
|
||||
|
||||
func (n *clusterNode) SetGeneration(gen uint32) {
|
||||
if gen < n.generation {
|
||||
panic("gen < n.generation")
|
||||
for {
|
||||
v := atomic.LoadUint32(&n.generation)
|
||||
if gen < v || atomic.CompareAndSwapUint32(&n.generation, v, gen) {
|
||||
break
|
||||
}
|
||||
}
|
||||
n.generation = gen
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
@ -201,18 +203,23 @@ func (n *clusterNode) SetGeneration(gen uint32) {
|
|||
type clusterNodes struct {
|
||||
opt *ClusterOptions
|
||||
|
||||
mu sync.RWMutex
|
||||
addrs []string
|
||||
nodes map[string]*clusterNode
|
||||
closed bool
|
||||
mu sync.RWMutex
|
||||
allAddrs []string
|
||||
addrs []string
|
||||
nodes map[string]*clusterNode
|
||||
closed bool
|
||||
|
||||
nodeCreateGroup singleflight.Group
|
||||
|
||||
generation uint32
|
||||
}
|
||||
|
||||
func newClusterNodes(opt *ClusterOptions) *clusterNodes {
|
||||
return &clusterNodes{
|
||||
opt: opt,
|
||||
nodes: make(map[string]*clusterNode),
|
||||
opt: opt,
|
||||
|
||||
allAddrs: opt.Addrs,
|
||||
nodes: make(map[string]*clusterNode),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -231,6 +238,7 @@ func (c *clusterNodes) Close() error {
|
|||
firstErr = err
|
||||
}
|
||||
}
|
||||
|
||||
c.addrs = nil
|
||||
c.nodes = nil
|
||||
|
||||
|
@ -238,9 +246,16 @@ func (c *clusterNodes) Close() error {
|
|||
}
|
||||
|
||||
func (c *clusterNodes) Addrs() ([]string, error) {
|
||||
var addrs []string
|
||||
c.mu.RLock()
|
||||
closed := c.closed
|
||||
addrs := c.addrs
|
||||
if !closed {
|
||||
if len(c.addrs) > 0 {
|
||||
addrs = c.addrs
|
||||
} else {
|
||||
addrs = c.allAddrs
|
||||
}
|
||||
}
|
||||
c.mu.RUnlock()
|
||||
|
||||
if closed {
|
||||
|
@ -310,6 +325,14 @@ func (c *clusterNodes) GetOrCreate(addr string) (*clusterNode, error) {
|
|||
return node, nil
|
||||
}
|
||||
|
||||
v, err := c.nodeCreateGroup.Do(addr, func() (interface{}, error) {
|
||||
node := newClusterNode(c.opt, addr)
|
||||
return node, node.Test()
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
|
||||
|
@ -319,12 +342,15 @@ func (c *clusterNodes) GetOrCreate(addr string) (*clusterNode, error) {
|
|||
|
||||
node, ok = c.nodes[addr]
|
||||
if ok {
|
||||
_ = v.(*clusterNode).Close()
|
||||
return node, nil
|
||||
}
|
||||
node = v.(*clusterNode)
|
||||
|
||||
c.allAddrs = appendIfNotExists(c.allAddrs, addr)
|
||||
c.addrs = append(c.addrs, addr)
|
||||
node = newClusterNode(c.opt, addr)
|
||||
c.nodes[addr] = node
|
||||
|
||||
return node, nil
|
||||
}
|
||||
|
||||
|
@ -334,20 +360,8 @@ func (c *clusterNodes) Random() (*clusterNode, error) {
|
|||
return nil, err
|
||||
}
|
||||
|
||||
var nodeErr error
|
||||
for i := 0; i <= c.opt.MaxRedirects; i++ {
|
||||
n := rand.Intn(len(addrs))
|
||||
node, err := c.GetOrCreate(addrs[n])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
nodeErr = node.Test()
|
||||
if nodeErr == nil {
|
||||
return node, nil
|
||||
}
|
||||
}
|
||||
return nil, nodeErr
|
||||
n := rand.Intn(len(addrs))
|
||||
return c.GetOrCreate(addrs[n])
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
@ -470,9 +484,12 @@ func (c *clusterState) slotNodes(slot int) []*clusterNode {
|
|||
type ClusterClient struct {
|
||||
cmdable
|
||||
|
||||
opt *ClusterOptions
|
||||
nodes *clusterNodes
|
||||
_state atomic.Value
|
||||
opt *ClusterOptions
|
||||
nodes *clusterNodes
|
||||
|
||||
_state atomic.Value
|
||||
stateErrMu sync.RWMutex
|
||||
stateErr error
|
||||
|
||||
cmdsInfoOnce internal.Once
|
||||
cmdsInfo map[string]*CommandInfo
|
||||
|
@ -501,20 +518,7 @@ func NewClusterClient(opt *ClusterOptions) *ClusterClient {
|
|||
|
||||
c.cmdable.setProcessor(c.Process)
|
||||
|
||||
// Add initial nodes.
|
||||
for _, addr := range opt.Addrs {
|
||||
_, _ = c.nodes.GetOrCreate(addr)
|
||||
}
|
||||
|
||||
// Preload cluster slots.
|
||||
for i := 0; i < 10; i++ {
|
||||
state, err := c.reloadState()
|
||||
if err == nil {
|
||||
c._state.Store(state)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
c.reloadState()
|
||||
if opt.IdleCheckFrequency > 0 {
|
||||
go c.reaper(opt.IdleCheckFrequency)
|
||||
}
|
||||
|
@ -531,21 +535,6 @@ func (c *ClusterClient) retryBackoff(attempt int) time.Duration {
|
|||
return internal.RetryBackoff(attempt, c.opt.MinRetryBackoff, c.opt.MaxRetryBackoff)
|
||||
}
|
||||
|
||||
func (c *ClusterClient) state() (*clusterState, error) {
|
||||
v := c._state.Load()
|
||||
if v != nil {
|
||||
return v.(*clusterState), nil
|
||||
}
|
||||
|
||||
_, err := c.nodes.Addrs()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
c.lazyReloadState()
|
||||
return nil, errNilClusterState
|
||||
}
|
||||
|
||||
func (c *ClusterClient) cmdInfo(name string) *CommandInfo {
|
||||
err := c.cmdsInfoOnce.Do(func() error {
|
||||
node, err := c.nodes.Random()
|
||||
|
@ -584,7 +573,12 @@ func (c *ClusterClient) cmdSlot(cmd Cmder) int {
|
|||
return cmdSlot(cmd, cmdFirstKeyPos(cmd, cmdInfo))
|
||||
}
|
||||
|
||||
func (c *ClusterClient) cmdSlotAndNode(state *clusterState, cmd Cmder) (int, *clusterNode, error) {
|
||||
func (c *ClusterClient) cmdSlotAndNode(cmd Cmder) (int, *clusterNode, error) {
|
||||
state, err := c.state()
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
|
||||
cmdInfo := c.cmdInfo(cmd.Name())
|
||||
slot := cmdSlot(cmd, cmdFirstKeyPos(cmd, cmdInfo))
|
||||
|
||||
|
@ -602,16 +596,24 @@ func (c *ClusterClient) cmdSlotAndNode(state *clusterState, cmd Cmder) (int, *cl
|
|||
return slot, node, err
|
||||
}
|
||||
|
||||
func (c *ClusterClient) slotMasterNode(slot int) (*clusterNode, error) {
|
||||
state, err := c.state()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
nodes := state.slotNodes(slot)
|
||||
if len(nodes) > 0 {
|
||||
return nodes[0], nil
|
||||
}
|
||||
return c.nodes.Random()
|
||||
}
|
||||
|
||||
func (c *ClusterClient) Watch(fn func(*Tx) error, keys ...string) error {
|
||||
if len(keys) == 0 {
|
||||
return fmt.Errorf("redis: keys don't hash to the same slot")
|
||||
}
|
||||
|
||||
state, err := c.state()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
slot := hashtag.Slot(keys[0])
|
||||
for _, key := range keys[1:] {
|
||||
if hashtag.Slot(key) != slot {
|
||||
|
@ -619,7 +621,7 @@ func (c *ClusterClient) Watch(fn func(*Tx) error, keys ...string) error {
|
|||
}
|
||||
}
|
||||
|
||||
node, err := state.slotMasterNode(slot)
|
||||
node, err := c.slotMasterNode(slot)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -649,10 +651,11 @@ func (c *ClusterClient) Watch(fn func(*Tx) error, keys ...string) error {
|
|||
}
|
||||
|
||||
if err == pool.ErrClosed {
|
||||
node, err = state.slotMasterNode(slot)
|
||||
node, err = c.slotMasterNode(slot)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
return err
|
||||
|
@ -683,13 +686,7 @@ func (c *ClusterClient) Process(cmd Cmder) error {
|
|||
}
|
||||
|
||||
func (c *ClusterClient) defaultProcess(cmd Cmder) error {
|
||||
state, err := c.state()
|
||||
if err != nil {
|
||||
cmd.setErr(err)
|
||||
return err
|
||||
}
|
||||
|
||||
_, node, err := c.cmdSlotAndNode(state, cmd)
|
||||
_, node, err := c.cmdSlotAndNode(cmd)
|
||||
if err != nil {
|
||||
cmd.setErr(err)
|
||||
return err
|
||||
|
@ -747,11 +744,12 @@ func (c *ClusterClient) defaultProcess(cmd Cmder) error {
|
|||
}
|
||||
|
||||
if err == pool.ErrClosed {
|
||||
_, node, err = c.cmdSlotAndNode(state, cmd)
|
||||
_, node, err = c.cmdSlotAndNode(cmd)
|
||||
if err != nil {
|
||||
cmd.setErr(err)
|
||||
return err
|
||||
break
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
break
|
||||
|
@ -903,31 +901,37 @@ func (c *ClusterClient) lazyReloadState() {
|
|||
if !atomic.CompareAndSwapUint32(&c.reloading, 0, 1) {
|
||||
return
|
||||
}
|
||||
|
||||
go func() {
|
||||
defer atomic.StoreUint32(&c.reloading, 0)
|
||||
|
||||
for {
|
||||
state, err := c.reloadState()
|
||||
if err == pool.ErrClosed {
|
||||
return
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
time.Sleep(time.Millisecond)
|
||||
continue
|
||||
}
|
||||
|
||||
c._state.Store(state)
|
||||
time.Sleep(5 * time.Second)
|
||||
c.nodes.GC(state.generation)
|
||||
break
|
||||
if c.reloadState() {
|
||||
time.Sleep(time.Second)
|
||||
}
|
||||
atomic.StoreUint32(&c.reloading, 0)
|
||||
}()
|
||||
}
|
||||
|
||||
// Not thread-safe.
|
||||
func (c *ClusterClient) reloadState() (*clusterState, error) {
|
||||
func (c *ClusterClient) reloadState() bool {
|
||||
for attempt := 0; attempt <= c.opt.MaxRedirects; attempt++ {
|
||||
if attempt > 0 {
|
||||
time.Sleep(c.retryBackoff(attempt))
|
||||
}
|
||||
|
||||
state, err := c.loadState()
|
||||
if err == nil {
|
||||
c._state.Store(state)
|
||||
c.nodes.GC(state.generation)
|
||||
return true
|
||||
}
|
||||
|
||||
c.setStateErr(err)
|
||||
switch err {
|
||||
case pool.ErrClosed, errClusterNoNodes:
|
||||
return false
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (c *ClusterClient) loadState() (*clusterState, error) {
|
||||
node, err := c.nodes.Random()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -941,6 +945,27 @@ func (c *ClusterClient) reloadState() (*clusterState, error) {
|
|||
return newClusterState(c.nodes, slots, node.Client.opt.Addr)
|
||||
}
|
||||
|
||||
func (c *ClusterClient) state() (*clusterState, error) {
|
||||
v := c._state.Load()
|
||||
if v != nil {
|
||||
return v.(*clusterState), nil
|
||||
}
|
||||
return nil, c.getStateErr()
|
||||
}
|
||||
|
||||
func (c *ClusterClient) setStateErr(err error) {
|
||||
c.stateErrMu.Lock()
|
||||
c.stateErr = err
|
||||
c.stateErrMu.Unlock()
|
||||
}
|
||||
|
||||
func (c *ClusterClient) getStateErr() error {
|
||||
c.stateErrMu.RLock()
|
||||
err := c.stateErr
|
||||
c.stateErrMu.RUnlock()
|
||||
return err
|
||||
}
|
||||
|
||||
// reaper closes idle connections to the cluster.
|
||||
func (c *ClusterClient) reaper(idleCheckFrequency time.Duration) {
|
||||
ticker := time.NewTicker(idleCheckFrequency)
|
||||
|
@ -1055,15 +1080,17 @@ func (c *ClusterClient) remapCmds(cmds []Cmder, failedCmds map[*clusterNode][]Cm
|
|||
func (c *ClusterClient) pipelineProcessCmds(
|
||||
node *clusterNode, cn *pool.Conn, cmds []Cmder, failedCmds map[*clusterNode][]Cmder,
|
||||
) error {
|
||||
cn.SetWriteTimeout(c.opt.WriteTimeout)
|
||||
if err := writeCmd(cn, cmds...); err != nil {
|
||||
_ = cn.SetWriteTimeout(c.opt.WriteTimeout)
|
||||
|
||||
err := writeCmd(cn, cmds...)
|
||||
if err != nil {
|
||||
setCmdsErr(cmds, err)
|
||||
failedCmds[node] = cmds
|
||||
return err
|
||||
}
|
||||
|
||||
// Set read timeout for all commands.
|
||||
cn.SetReadTimeout(c.opt.ReadTimeout)
|
||||
_ = cn.SetReadTimeout(c.opt.ReadTimeout)
|
||||
|
||||
return c.pipelineReadCmds(cn, cmds, failedCmds)
|
||||
}
|
||||
|
@ -1280,12 +1307,7 @@ func (c *ClusterClient) pubSub(channels []string) *PubSub {
|
|||
slot = -1
|
||||
}
|
||||
|
||||
state, err := c.state()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
masterNode, err := state.slotMasterNode(slot)
|
||||
masterNode, err := c.slotMasterNode(slot)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
|
@ -190,13 +190,11 @@ var _ = Describe("ClusterClient", func() {
|
|||
|
||||
assertClusterClient := func() {
|
||||
It("should GET/SET/DEL", func() {
|
||||
val, err := client.Get("A").Result()
|
||||
err := client.Get("A").Err()
|
||||
Expect(err).To(Equal(redis.Nil))
|
||||
Expect(val).To(Equal(""))
|
||||
|
||||
val, err = client.Set("A", "VALUE", 0).Result()
|
||||
err = client.Set("A", "VALUE", 0).Err()
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
Expect(val).To(Equal("OK"))
|
||||
|
||||
Eventually(func() string {
|
||||
return client.Get("A").Val()
|
||||
|
@ -295,9 +293,9 @@ var _ = Describe("ClusterClient", func() {
|
|||
}
|
||||
wg.Wait()
|
||||
|
||||
n, err := client.Get("key").Int64()
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
Expect(n).To(Equal(int64(100)))
|
||||
Eventually(func() string {
|
||||
return client.Get("key").Val()
|
||||
}, 30*time.Second).Should(Equal("100"))
|
||||
})
|
||||
|
||||
Describe("pipelining", func() {
|
||||
|
@ -578,12 +576,13 @@ var _ = Describe("ClusterClient", func() {
|
|||
opt = redisClusterOptions()
|
||||
client = cluster.clusterClient(opt)
|
||||
|
||||
_ = client.ForEachMaster(func(master *redis.Client) error {
|
||||
return master.FlushDB().Err()
|
||||
})
|
||||
|
||||
_ = client.ForEachSlave(func(slave *redis.Client) error {
|
||||
defer GinkgoRecover()
|
||||
|
||||
_ = client.ForEachMaster(func(master *redis.Client) error {
|
||||
return master.FlushDB().Err()
|
||||
})
|
||||
|
||||
Eventually(func() int64 {
|
||||
return slave.DBSize().Val()
|
||||
}, 30*time.Second).Should(Equal(int64(0)))
|
||||
|
@ -670,7 +669,7 @@ var _ = Describe("ClusterClient without valid nodes", func() {
|
|||
|
||||
It("returns an error", func() {
|
||||
err := client.Ping().Err()
|
||||
Expect(err).To(MatchError("redis: cannot load cluster slots"))
|
||||
Expect(err).To(MatchError("ERR This instance has cluster support disabled"))
|
||||
})
|
||||
|
||||
It("pipeline returns an error", func() {
|
||||
|
@ -678,7 +677,7 @@ var _ = Describe("ClusterClient without valid nodes", func() {
|
|||
pipe.Ping()
|
||||
return nil
|
||||
})
|
||||
Expect(err).To(MatchError("redis: cannot load cluster slots"))
|
||||
Expect(err).To(MatchError("ERR This instance has cluster support disabled"))
|
||||
})
|
||||
})
|
||||
|
||||
|
@ -743,7 +742,8 @@ var _ = Describe("ClusterClient timeout", func() {
|
|||
})
|
||||
|
||||
AfterEach(func() {
|
||||
client.ForEachNode(func(client *redis.Client) error {
|
||||
_ = client.ForEachNode(func(client *redis.Client) error {
|
||||
defer GinkgoRecover()
|
||||
Eventually(func() error {
|
||||
return client.Ping().Err()
|
||||
}, 2*pause).ShouldNot(HaveOccurred())
|
||||
|
|
|
@ -42,6 +42,10 @@ func IsNetworkError(err error) bool {
|
|||
return ok
|
||||
}
|
||||
|
||||
func IsReadOnlyError(err error) bool {
|
||||
return strings.HasPrefix(err.Error(), "READONLY ")
|
||||
}
|
||||
|
||||
func IsBadConn(err error, allowTimeout bool) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
|
|
|
@ -0,0 +1,64 @@
|
|||
/*
|
||||
Copyright 2013 Google Inc.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package singleflight provides a duplicate function call suppression
|
||||
// mechanism.
|
||||
package singleflight
|
||||
|
||||
import "sync"
|
||||
|
||||
// call is an in-flight or completed Do call
|
||||
type call struct {
|
||||
wg sync.WaitGroup
|
||||
val interface{}
|
||||
err error
|
||||
}
|
||||
|
||||
// Group represents a class of work and forms a namespace in which
|
||||
// units of work can be executed with duplicate suppression.
|
||||
type Group struct {
|
||||
mu sync.Mutex // protects m
|
||||
m map[string]*call // lazily initialized
|
||||
}
|
||||
|
||||
// Do executes and returns the results of the given function, making
|
||||
// sure that only one execution is in-flight for a given key at a
|
||||
// time. If a duplicate comes in, the duplicate caller waits for the
|
||||
// original to complete and receives the same results.
|
||||
func (g *Group) Do(key string, fn func() (interface{}, error)) (interface{}, error) {
|
||||
g.mu.Lock()
|
||||
if g.m == nil {
|
||||
g.m = make(map[string]*call)
|
||||
}
|
||||
if c, ok := g.m[key]; ok {
|
||||
g.mu.Unlock()
|
||||
c.wg.Wait()
|
||||
return c.val, c.err
|
||||
}
|
||||
c := new(call)
|
||||
c.wg.Add(1)
|
||||
g.m[key] = c
|
||||
g.mu.Unlock()
|
||||
|
||||
c.val, c.err = fn()
|
||||
c.wg.Done()
|
||||
|
||||
g.mu.Lock()
|
||||
delete(g.m, key)
|
||||
g.mu.Unlock()
|
||||
|
||||
return c.val, c.err
|
||||
}
|
Loading…
Reference in New Issue