fix node routing in slotClosestNode (#3043)

* fix node routing when all nodes are failing

* fix minlatency zero value
This commit is contained in:
Srikar Jilugu 2024-07-10 08:56:27 +05:30 committed by GitHub
parent 26e0c49acf
commit 9c1f4f0642
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 33 additions and 11 deletions

View File

@ -341,6 +341,8 @@ func (n *clusterNode) Close() error {
return n.Client.Close()
}
const maximumNodeLatency = 1 * time.Minute
func (n *clusterNode) updateLatency() {
const numProbe = 10
var dur uint64
@ -361,7 +363,7 @@ func (n *clusterNode) updateLatency() {
if successes == 0 {
// If none of the pings worked, set latency to some arbitrarily high value so this node gets
// least priority.
latency = float64((1 * time.Minute) / time.Microsecond)
latency = float64((maximumNodeLatency) / time.Microsecond)
} else {
latency = float64(dur) / float64(successes)
}
@ -735,20 +737,40 @@ func (c *clusterState) slotClosestNode(slot int) (*clusterNode, error) {
return c.nodes.Random()
}
var node *clusterNode
var allNodesFailing = true
var (
closestNonFailingNode *clusterNode
closestNode *clusterNode
minLatency time.Duration
)
// setting the max possible duration as zerovalue for minlatency
minLatency = time.Duration(math.MaxInt64)
for _, n := range nodes {
if n.Failing() {
continue
if closestNode == nil || n.Latency() < minLatency {
closestNode = n
minLatency = n.Latency()
if !n.Failing() {
closestNonFailingNode = n
allNodesFailing = false
}
}
if node == nil || n.Latency() < node.Latency() {
node = n
}
}
if node != nil {
return node, nil
}
// If all nodes are failing - return random node
// pick the healthly node with the lowest latency
if !allNodesFailing && closestNonFailingNode != nil {
return closestNonFailingNode, nil
}
// if all nodes are failing, we will pick the temporarily failing node with lowest latency
if minLatency < maximumNodeLatency && closestNode != nil {
internal.Logger.Printf(context.TODO(), "redis: all nodes are marked as failed, picking the temporarily failing node with lowest latency")
return closestNode, nil
}
// If all nodes are having the maximum latency(all pings are failing) - return a random node across the cluster
internal.Logger.Printf(context.TODO(), "redis: pings to all nodes are failing, picking a random node across the cluster")
return c.nodes.Random()
}