From 98bb99ddc254be0c30b59d59bc73892d510c0de8 Mon Sep 17 00:00:00 2001 From: Pau Freixes Date: Mon, 4 Oct 2021 12:10:42 +0200 Subject: [PATCH] Fix Redis Cluster issue during roll outs of new nodes with same addr (#1914) * fix: recycle connections in some Redis Cluster scenarios This issue was surfaced in a Cloud Provider solution that used for rolling out new nodes using the same address (hostname) of the nodes that will be replaced in a Redis Cluster, while the former ones once depromoted as Slaves would continue in service during some mintues for redirecting traffic. The solution basically identifies when the connection could be stale since a MOVED response will be returned using the same address (hostname) that is being used by the connection. At that moment we consider the connection as no longer usable forcing to recycle the connection. --- error.go | 26 ++++++++++++++++++++++---- pubsub.go | 2 +- redis.go | 2 +- 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/error.go b/error.go index ad9e173..5025215 100644 --- a/error.go +++ b/error.go @@ -65,7 +65,7 @@ func isRedisError(err error) bool { return ok } -func isBadConn(err error, allowTimeout bool) bool { +func isBadConn(err error, allowTimeout bool, addr string) bool { switch err { case nil: return false @@ -74,9 +74,19 @@ func isBadConn(err error, allowTimeout bool) bool { } if isRedisError(err) { - // Close connections in read only state in case domain addr is used - // and domain resolves to a different Redis Server. See #790. - return isReadOnlyError(err) + switch { + case isReadOnlyError(err): + // Close connections in read only state in case domain addr is used + // and domain resolves to a different Redis Server. See #790. + return true + case isMovedSameConnAddr(err, addr): + // Close connections when we are asked to move to the same addr + // of the connection. Force a DNS resolution when all connections + // of the pool are recycled + return true + default: + return false + } } if allowTimeout { @@ -119,6 +129,14 @@ func isReadOnlyError(err error) bool { return strings.HasPrefix(err.Error(), "READONLY ") } +func isMovedSameConnAddr(err error, addr string) bool { + redisError := err.Error() + if !strings.HasPrefix(redisError, "MOVED ") { + return false + } + return strings.HasSuffix(redisError, addr) +} + //------------------------------------------------------------------------------ type timeoutError interface { diff --git a/pubsub.go b/pubsub.go index c591240..efc2354 100644 --- a/pubsub.go +++ b/pubsub.go @@ -141,7 +141,7 @@ func (c *PubSub) releaseConn(ctx context.Context, cn *pool.Conn, err error, allo if c.cn != cn { return } - if isBadConn(err, allowTimeout) { + if isBadConn(err, allowTimeout, c.opt.Addr) { c.reconnect(ctx, err) } } diff --git a/redis.go b/redis.go index 53a4700..32cdabd 100644 --- a/redis.go +++ b/redis.go @@ -261,7 +261,7 @@ func (c *baseClient) releaseConn(ctx context.Context, cn *pool.Conn, err error) c.opt.Limiter.ReportResult(err) } - if isBadConn(err, false) { + if isBadConn(err, false, c.opt.Addr) { c.connPool.Remove(ctx, cn, err) } else { c.connPool.Put(ctx, cn)