Fix Redis Cluster issue during roll outs of new nodes with same addr (#1914)

* fix: recycle connections in some Redis Cluster scenarios

This issue was surfaced in a Cloud Provider solution that used for
rolling out new nodes using the same address (hostname) of the nodes
that will be replaced in a Redis Cluster, while the former ones once
depromoted as Slaves would continue in service during some mintues
for redirecting traffic.

The solution basically identifies when the connection could be stale
since a MOVED response will be returned using the same address (hostname)
that is being used by the connection. At that moment we consider the
connection as no longer usable forcing to recycle the connection.
This commit is contained in:
Pau Freixes 2021-10-04 12:10:42 +02:00 committed by GitHub
parent 507203108a
commit 98bb99ddc2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 24 additions and 6 deletions

View File

@ -65,7 +65,7 @@ func isRedisError(err error) bool {
return ok
}
func isBadConn(err error, allowTimeout bool) bool {
func isBadConn(err error, allowTimeout bool, addr string) bool {
switch err {
case nil:
return false
@ -74,9 +74,19 @@ func isBadConn(err error, allowTimeout bool) bool {
}
if isRedisError(err) {
// Close connections in read only state in case domain addr is used
// and domain resolves to a different Redis Server. See #790.
return isReadOnlyError(err)
switch {
case isReadOnlyError(err):
// Close connections in read only state in case domain addr is used
// and domain resolves to a different Redis Server. See #790.
return true
case isMovedSameConnAddr(err, addr):
// Close connections when we are asked to move to the same addr
// of the connection. Force a DNS resolution when all connections
// of the pool are recycled
return true
default:
return false
}
}
if allowTimeout {
@ -119,6 +129,14 @@ func isReadOnlyError(err error) bool {
return strings.HasPrefix(err.Error(), "READONLY ")
}
func isMovedSameConnAddr(err error, addr string) bool {
redisError := err.Error()
if !strings.HasPrefix(redisError, "MOVED ") {
return false
}
return strings.HasSuffix(redisError, addr)
}
//------------------------------------------------------------------------------
type timeoutError interface {

View File

@ -141,7 +141,7 @@ func (c *PubSub) releaseConn(ctx context.Context, cn *pool.Conn, err error, allo
if c.cn != cn {
return
}
if isBadConn(err, allowTimeout) {
if isBadConn(err, allowTimeout, c.opt.Addr) {
c.reconnect(ctx, err)
}
}

View File

@ -261,7 +261,7 @@ func (c *baseClient) releaseConn(ctx context.Context, cn *pool.Conn, err error)
c.opt.Limiter.ReportResult(err)
}
if isBadConn(err, false) {
if isBadConn(err, false, c.opt.Addr) {
c.connPool.Remove(ctx, cn, err)
} else {
c.connPool.Put(ctx, cn)