package redis import ( "context" "crypto/tls" "errors" "net" "strings" "sync" "time" "github.com/go-redis/redis/v9/internal" "github.com/go-redis/redis/v9/internal/pool" "github.com/go-redis/redis/v9/internal/rand" ) //------------------------------------------------------------------------------ // FailoverOptions are used to configure a failover client and should // be passed to NewFailoverClient. type FailoverOptions struct { // The master name. MasterName string // A seed list of host:port addresses of sentinel nodes. SentinelAddrs []string // If specified with SentinelPassword, enables ACL-based authentication (via // AUTH ). SentinelUsername string // Sentinel password from "requirepass " (if enabled) in Sentinel // configuration, or, if SentinelUsername is also supplied, used for ACL-based // authentication. SentinelPassword string // Allows routing read-only commands to the closest master or replica node. // This option only works with NewFailoverClusterClient. RouteByLatency bool // Allows routing read-only commands to the random master or replica node. // This option only works with NewFailoverClusterClient. RouteRandomly bool // Route all commands to replica read-only nodes. ReplicaOnly bool // Use replicas disconnected with master when cannot get connected replicas // Now, this option only works in RandomReplicaAddr function. UseDisconnectedReplicas bool // Following options are copied from Options struct. Dialer func(ctx context.Context, network, addr string) (net.Conn, error) OnConnect func(ctx context.Context, cn *Conn) error Username string Password string DB int MaxRetries int MinRetryBackoff time.Duration MaxRetryBackoff time.Duration DialTimeout time.Duration ReadTimeout time.Duration WriteTimeout time.Duration // PoolFIFO uses FIFO mode for each node connection pool GET/PUT (default LIFO). PoolFIFO bool PoolSize int MinIdleConns int MaxConnAge time.Duration PoolTimeout time.Duration IdleTimeout time.Duration IdleCheckFrequency time.Duration TLSConfig *tls.Config } func (opt *FailoverOptions) clientOptions() *Options { return &Options{ Addr: "FailoverClient", Dialer: opt.Dialer, OnConnect: opt.OnConnect, DB: opt.DB, Username: opt.Username, Password: opt.Password, MaxRetries: opt.MaxRetries, MinRetryBackoff: opt.MinRetryBackoff, MaxRetryBackoff: opt.MaxRetryBackoff, DialTimeout: opt.DialTimeout, ReadTimeout: opt.ReadTimeout, WriteTimeout: opt.WriteTimeout, PoolFIFO: opt.PoolFIFO, PoolSize: opt.PoolSize, PoolTimeout: opt.PoolTimeout, IdleTimeout: opt.IdleTimeout, IdleCheckFrequency: opt.IdleCheckFrequency, MinIdleConns: opt.MinIdleConns, MaxConnAge: opt.MaxConnAge, TLSConfig: opt.TLSConfig, } } func (opt *FailoverOptions) sentinelOptions(addr string) *Options { return &Options{ Addr: addr, Dialer: opt.Dialer, OnConnect: opt.OnConnect, DB: 0, Username: opt.SentinelUsername, Password: opt.SentinelPassword, MaxRetries: opt.MaxRetries, MinRetryBackoff: opt.MinRetryBackoff, MaxRetryBackoff: opt.MaxRetryBackoff, DialTimeout: opt.DialTimeout, ReadTimeout: opt.ReadTimeout, WriteTimeout: opt.WriteTimeout, PoolFIFO: opt.PoolFIFO, PoolSize: opt.PoolSize, PoolTimeout: opt.PoolTimeout, IdleTimeout: opt.IdleTimeout, IdleCheckFrequency: opt.IdleCheckFrequency, MinIdleConns: opt.MinIdleConns, MaxConnAge: opt.MaxConnAge, TLSConfig: opt.TLSConfig, } } func (opt *FailoverOptions) clusterOptions() *ClusterOptions { return &ClusterOptions{ Dialer: opt.Dialer, OnConnect: opt.OnConnect, Username: opt.Username, Password: opt.Password, MaxRedirects: opt.MaxRetries, RouteByLatency: opt.RouteByLatency, RouteRandomly: opt.RouteRandomly, MinRetryBackoff: opt.MinRetryBackoff, MaxRetryBackoff: opt.MaxRetryBackoff, DialTimeout: opt.DialTimeout, ReadTimeout: opt.ReadTimeout, WriteTimeout: opt.WriteTimeout, PoolFIFO: opt.PoolFIFO, PoolSize: opt.PoolSize, PoolTimeout: opt.PoolTimeout, IdleTimeout: opt.IdleTimeout, IdleCheckFrequency: opt.IdleCheckFrequency, MinIdleConns: opt.MinIdleConns, MaxConnAge: opt.MaxConnAge, TLSConfig: opt.TLSConfig, } } // NewFailoverClient returns a Redis client that uses Redis Sentinel // for automatic failover. It's safe for concurrent use by multiple // goroutines. func NewFailoverClient(failoverOpt *FailoverOptions) *Client { if failoverOpt.RouteByLatency { panic("to route commands by latency, use NewFailoverClusterClient") } if failoverOpt.RouteRandomly { panic("to route commands randomly, use NewFailoverClusterClient") } sentinelAddrs := make([]string, len(failoverOpt.SentinelAddrs)) copy(sentinelAddrs, failoverOpt.SentinelAddrs) rand.Shuffle(len(sentinelAddrs), func(i, j int) { sentinelAddrs[i], sentinelAddrs[j] = sentinelAddrs[j], sentinelAddrs[i] }) failover := &sentinelFailover{ opt: failoverOpt, sentinelAddrs: sentinelAddrs, } opt := failoverOpt.clientOptions() opt.Dialer = masterReplicaDialer(failover) opt.init() connPool := newConnPool(opt) failover.mu.Lock() failover.onFailover = func(ctx context.Context, addr string) { _ = connPool.Filter(func(cn *pool.Conn) bool { return cn.RemoteAddr().String() != addr }) } failover.mu.Unlock() c := Client{ baseClient: newBaseClient(opt, connPool), ctx: context.Background(), } c.cmdable = c.Process c.onClose = failover.Close return &c } func masterReplicaDialer( failover *sentinelFailover, ) func(ctx context.Context, network, addr string) (net.Conn, error) { return func(ctx context.Context, network, _ string) (net.Conn, error) { var addr string var err error if failover.opt.ReplicaOnly { addr, err = failover.RandomReplicaAddr(ctx) } else { addr, err = failover.MasterAddr(ctx) if err == nil { failover.trySwitchMaster(ctx, addr) } } if err != nil { return nil, err } if failover.opt.Dialer != nil { return failover.opt.Dialer(ctx, network, addr) } netDialer := &net.Dialer{ Timeout: failover.opt.DialTimeout, KeepAlive: 5 * time.Minute, } if failover.opt.TLSConfig == nil { return netDialer.DialContext(ctx, network, addr) } return tls.DialWithDialer(netDialer, network, addr, failover.opt.TLSConfig) } } //------------------------------------------------------------------------------ // SentinelClient is a client for a Redis Sentinel. type SentinelClient struct { *baseClient hooks } func NewSentinelClient(opt *Options) *SentinelClient { opt.init() c := &SentinelClient{ baseClient: &baseClient{ opt: opt, connPool: newConnPool(opt), }, } return c } func (c *SentinelClient) Process(ctx context.Context, cmd Cmder) error { return c.hooks.process(ctx, cmd, c.baseClient.process) } func (c *SentinelClient) pubSub() *PubSub { pubsub := &PubSub{ opt: c.opt, newConn: func(ctx context.Context, channels []string) (*pool.Conn, error) { return c.newConn(ctx) }, closeConn: c.connPool.CloseConn, } pubsub.init() return pubsub } // Ping is used to test if a connection is still alive, or to // measure latency. func (c *SentinelClient) Ping(ctx context.Context) *StringCmd { cmd := NewStringCmd(ctx, "ping") _ = c.Process(ctx, cmd) return cmd } // Subscribe subscribes the client to the specified channels. // Channels can be omitted to create empty subscription. func (c *SentinelClient) Subscribe(ctx context.Context, channels ...string) *PubSub { pubsub := c.pubSub() if len(channels) > 0 { _ = pubsub.Subscribe(ctx, channels...) } return pubsub } // PSubscribe subscribes the client to the given patterns. // Patterns can be omitted to create empty subscription. func (c *SentinelClient) PSubscribe(ctx context.Context, channels ...string) *PubSub { pubsub := c.pubSub() if len(channels) > 0 { _ = pubsub.PSubscribe(ctx, channels...) } return pubsub } func (c *SentinelClient) GetMasterAddrByName(ctx context.Context, name string) *StringSliceCmd { cmd := NewStringSliceCmd(ctx, "sentinel", "get-master-addr-by-name", name) _ = c.Process(ctx, cmd) return cmd } func (c *SentinelClient) Sentinels(ctx context.Context, name string) *MapStringStringSliceCmd { cmd := NewMapStringStringSliceCmd(ctx, "sentinel", "sentinels", name) _ = c.Process(ctx, cmd) return cmd } // Failover forces a failover as if the master was not reachable, and without // asking for agreement to other Sentinels. func (c *SentinelClient) Failover(ctx context.Context, name string) *StatusCmd { cmd := NewStatusCmd(ctx, "sentinel", "failover", name) _ = c.Process(ctx, cmd) return cmd } // Reset resets all the masters with matching name. The pattern argument is a // glob-style pattern. The reset process clears any previous state in a master // (including a failover in progress), and removes every replica and sentinel // already discovered and associated with the master. func (c *SentinelClient) Reset(ctx context.Context, pattern string) *IntCmd { cmd := NewIntCmd(ctx, "sentinel", "reset", pattern) _ = c.Process(ctx, cmd) return cmd } // FlushConfig forces Sentinel to rewrite its configuration on disk, including // the current Sentinel state. func (c *SentinelClient) FlushConfig(ctx context.Context) *StatusCmd { cmd := NewStatusCmd(ctx, "sentinel", "flushconfig") _ = c.Process(ctx, cmd) return cmd } // Master shows the state and info of the specified master. func (c *SentinelClient) Master(ctx context.Context, name string) *MapStringStringCmd { cmd := NewMapStringStringCmd(ctx, "sentinel", "master", name) _ = c.Process(ctx, cmd) return cmd } // Masters shows a list of monitored masters and their state. func (c *SentinelClient) Masters(ctx context.Context) *SliceCmd { cmd := NewSliceCmd(ctx, "sentinel", "masters") _ = c.Process(ctx, cmd) return cmd } // Replicas shows a list of replicas for the specified master and their state. func (c *SentinelClient) Replicas(ctx context.Context, name string) *MapStringStringSliceCmd { cmd := NewMapStringStringSliceCmd(ctx, "sentinel", "replicas", name) _ = c.Process(ctx, cmd) return cmd } // CkQuorum checks if the current Sentinel configuration is able to reach the // quorum needed to failover a master, and the majority needed to authorize the // failover. This command should be used in monitoring systems to check if a // Sentinel deployment is ok. func (c *SentinelClient) CkQuorum(ctx context.Context, name string) *StringCmd { cmd := NewStringCmd(ctx, "sentinel", "ckquorum", name) _ = c.Process(ctx, cmd) return cmd } // Monitor tells the Sentinel to start monitoring a new master with the specified // name, ip, port, and quorum. func (c *SentinelClient) Monitor(ctx context.Context, name, ip, port, quorum string) *StringCmd { cmd := NewStringCmd(ctx, "sentinel", "monitor", name, ip, port, quorum) _ = c.Process(ctx, cmd) return cmd } // Set is used in order to change configuration parameters of a specific master. func (c *SentinelClient) Set(ctx context.Context, name, option, value string) *StringCmd { cmd := NewStringCmd(ctx, "sentinel", "set", name, option, value) _ = c.Process(ctx, cmd) return cmd } // Remove is used in order to remove the specified master: the master will no // longer be monitored, and will totally be removed from the internal state of // the Sentinel. func (c *SentinelClient) Remove(ctx context.Context, name string) *StringCmd { cmd := NewStringCmd(ctx, "sentinel", "remove", name) _ = c.Process(ctx, cmd) return cmd } //------------------------------------------------------------------------------ type sentinelFailover struct { opt *FailoverOptions sentinelAddrs []string onFailover func(ctx context.Context, addr string) onUpdate func(ctx context.Context) mu sync.RWMutex _masterAddr string sentinel *SentinelClient pubsub *PubSub } func (c *sentinelFailover) Close() error { c.mu.Lock() defer c.mu.Unlock() if c.sentinel != nil { return c.closeSentinel() } return nil } func (c *sentinelFailover) closeSentinel() error { firstErr := c.pubsub.Close() c.pubsub = nil err := c.sentinel.Close() if err != nil && firstErr == nil { firstErr = err } c.sentinel = nil return firstErr } func (c *sentinelFailover) RandomReplicaAddr(ctx context.Context) (string, error) { if c.opt == nil { return "", errors.New("opt is nil") } addresses, err := c.replicaAddrs(ctx, false) if err != nil { return "", err } if len(addresses) == 0 && c.opt.UseDisconnectedReplicas { addresses, err = c.replicaAddrs(ctx, true) if err != nil { return "", err } } if len(addresses) == 0 { return c.MasterAddr(ctx) } return addresses[rand.Intn(len(addresses))], nil } func (c *sentinelFailover) MasterAddr(ctx context.Context) (string, error) { c.mu.RLock() sentinel := c.sentinel c.mu.RUnlock() if sentinel != nil { addr := c.getMasterAddr(ctx, sentinel) if addr != "" { return addr, nil } } c.mu.Lock() defer c.mu.Unlock() if c.sentinel != nil { addr := c.getMasterAddr(ctx, c.sentinel) if addr != "" { return addr, nil } _ = c.closeSentinel() } for i, sentinelAddr := range c.sentinelAddrs { sentinel := NewSentinelClient(c.opt.sentinelOptions(sentinelAddr)) masterAddr, err := sentinel.GetMasterAddrByName(ctx, c.opt.MasterName).Result() if err != nil { internal.Logger.Printf(ctx, "sentinel: GetMasterAddrByName master=%q failed: %s", c.opt.MasterName, err) _ = sentinel.Close() continue } // Push working sentinel to the top. c.sentinelAddrs[0], c.sentinelAddrs[i] = c.sentinelAddrs[i], c.sentinelAddrs[0] c.setSentinel(ctx, sentinel) addr := net.JoinHostPort(masterAddr[0], masterAddr[1]) return addr, nil } return "", errors.New("redis: all sentinels specified in configuration are unreachable") } func (c *sentinelFailover) replicaAddrs(ctx context.Context, useDisconnected bool) ([]string, error) { c.mu.RLock() sentinel := c.sentinel c.mu.RUnlock() if sentinel != nil { addrs := c.getReplicaAddrs(ctx, sentinel) if len(addrs) > 0 { return addrs, nil } } c.mu.Lock() defer c.mu.Unlock() if c.sentinel != nil { addrs := c.getReplicaAddrs(ctx, c.sentinel) if len(addrs) > 0 { return addrs, nil } _ = c.closeSentinel() } var sentinelReachable bool for i, sentinelAddr := range c.sentinelAddrs { sentinel := NewSentinelClient(c.opt.sentinelOptions(sentinelAddr)) replicas, err := sentinel.Replicas(ctx, c.opt.MasterName).Result() if err != nil { internal.Logger.Printf(ctx, "sentinel: Replicas master=%q failed: %s", c.opt.MasterName, err) _ = sentinel.Close() continue } sentinelReachable = true addrs := parseReplicaAddrs(replicas, useDisconnected) if len(addrs) == 0 { continue } // Push working sentinel to the top. c.sentinelAddrs[0], c.sentinelAddrs[i] = c.sentinelAddrs[i], c.sentinelAddrs[0] c.setSentinel(ctx, sentinel) return addrs, nil } if sentinelReachable { return []string{}, nil } return []string{}, errors.New("redis: all sentinels specified in configuration are unreachable") } func (c *sentinelFailover) getMasterAddr(ctx context.Context, sentinel *SentinelClient) string { addr, err := sentinel.GetMasterAddrByName(ctx, c.opt.MasterName).Result() if err != nil { internal.Logger.Printf(ctx, "sentinel: GetMasterAddrByName name=%q failed: %s", c.opt.MasterName, err) return "" } return net.JoinHostPort(addr[0], addr[1]) } func (c *sentinelFailover) getReplicaAddrs(ctx context.Context, sentinel *SentinelClient) []string { addrs, err := sentinel.Replicas(ctx, c.opt.MasterName).Result() if err != nil { internal.Logger.Printf(ctx, "sentinel: Replicas name=%q failed: %s", c.opt.MasterName, err) return []string{} } return parseReplicaAddrs(addrs, false) } func parseReplicaAddrs(addrs []map[string]string, keepDisconnected bool) []string { nodes := make([]string, 0, len(addrs)) for _, node := range addrs { isDown := false if flags, ok := node["flags"]; ok { for _, flag := range strings.Split(flags, ",") { switch flag { case "s_down", "o_down": isDown = true case "disconnected": if !keepDisconnected { isDown = true } } } } if !isDown && node["ip"] != "" && node["port"] != "" { nodes = append(nodes, net.JoinHostPort(node["ip"], node["port"])) } } return nodes } func (c *sentinelFailover) trySwitchMaster(ctx context.Context, addr string) { c.mu.RLock() currentAddr := c._masterAddr //nolint:ifshort c.mu.RUnlock() if addr == currentAddr { return } c.mu.Lock() defer c.mu.Unlock() if addr == c._masterAddr { return } c._masterAddr = addr internal.Logger.Printf(ctx, "sentinel: new master=%q addr=%q", c.opt.MasterName, addr) if c.onFailover != nil { c.onFailover(ctx, addr) } } func (c *sentinelFailover) setSentinel(ctx context.Context, sentinel *SentinelClient) { if c.sentinel != nil { panic("not reached") } c.sentinel = sentinel c.discoverSentinels(ctx) c.pubsub = sentinel.Subscribe(ctx, "+switch-master", "+replica-reconf-done") go c.listen(c.pubsub) } func (c *sentinelFailover) discoverSentinels(ctx context.Context) { sentinels, err := c.sentinel.Sentinels(ctx, c.opt.MasterName).Result() if err != nil { internal.Logger.Printf(ctx, "sentinel: Sentinels master=%q failed: %s", c.opt.MasterName, err) return } for _, sentinel := range sentinels { ip, ok := sentinel["ip"] if !ok { continue } port, ok := sentinel["port"] if !ok { continue } if ip != "" && port != "" { sentinelAddr := net.JoinHostPort(ip, port) if !contains(c.sentinelAddrs, sentinelAddr) { internal.Logger.Printf(ctx, "sentinel: discovered new sentinel=%q for master=%q", sentinelAddr, c.opt.MasterName) c.sentinelAddrs = append(c.sentinelAddrs, sentinelAddr) } } } } func (c *sentinelFailover) listen(pubsub *PubSub) { ctx := context.TODO() if c.onUpdate != nil { c.onUpdate(ctx) } ch := pubsub.Channel() for msg := range ch { if msg.Channel == "+switch-master" { parts := strings.Split(msg.Payload, " ") if parts[0] != c.opt.MasterName { internal.Logger.Printf(pubsub.getContext(), "sentinel: ignore addr for master=%q", parts[0]) continue } addr := net.JoinHostPort(parts[3], parts[4]) c.trySwitchMaster(pubsub.getContext(), addr) } if c.onUpdate != nil { c.onUpdate(ctx) } } } func contains(slice []string, str string) bool { for _, s := range slice { if s == str { return true } } return false } //------------------------------------------------------------------------------ // NewFailoverClusterClient returns a client that supports routing read-only commands // to a replica node. func NewFailoverClusterClient(failoverOpt *FailoverOptions) *ClusterClient { sentinelAddrs := make([]string, len(failoverOpt.SentinelAddrs)) copy(sentinelAddrs, failoverOpt.SentinelAddrs) failover := &sentinelFailover{ opt: failoverOpt, sentinelAddrs: sentinelAddrs, } opt := failoverOpt.clusterOptions() opt.ClusterSlots = func(ctx context.Context) ([]ClusterSlot, error) { masterAddr, err := failover.MasterAddr(ctx) if err != nil { return nil, err } nodes := []ClusterNode{{ Addr: masterAddr, }} replicaAddrs, err := failover.replicaAddrs(ctx, false) if err != nil { return nil, err } for _, replicaAddr := range replicaAddrs { nodes = append(nodes, ClusterNode{ Addr: replicaAddr, }) } slots := []ClusterSlot{ { Start: 0, End: 16383, Nodes: nodes, }, } return slots, nil } c := NewClusterClient(opt) failover.mu.Lock() failover.onUpdate = func(ctx context.Context) { c.ReloadState(ctx) } failover.mu.Unlock() return c }