Merge 7c68d27694 into f1ffb55c9a

Only check latencies once every 10 seconds with `routeByLatency` (#2795 )
* Only check latencies once every 10 seconds with `routeByLatency` `routeByLatency` currently checks latencies any time a server returns a MOVED or READONLY reply. When a shard is down, the ClusterClient chooses to issue the request to a random server, which returns a MOVED reply. This causes a state refresh and a latency update on all servers. This can lead to significant ping load to clusters with a large number of clients. This introduces logic to ping only once every 10 seconds, only performing a latency update on a node during the `GC` function if the latency was set later than 10 seconds ago. Fixes https://github.com/redis/go-redis/issues/2782 * use UnixNano instead of Unix for better precision --------- Co-authored-by: ofekshenawa <104765379+ofekshenawa@users.noreply.github.com>
2024-11-21 08:40:26 +01:00 · 2024-11-20 14:36:39 +02:00 · 2024-11-20 13:38:06 +02:00 · 2023-11-19 17:20:38 +01:00
4 changed files with 90 additions and 4 deletions
--- a/README.md
+++ b/README.md
@ -70,7 +70,7 @@ go-redis supports 2 last Go versions and requires a Go version with
 module:
 ```shell
-go mod init github.com/my/repo
+go mod init example.com/greetings
 ```
 Then install go-redis/**v9**:
--- a/osscluster.go
+++ b/osscluster.go
@ -21,6 +21,10 @@ import (
 	"github.com/redis/go-redis/v9/internal/rand"
 )
 const (
 	minLatencyMeasurementInterval = 10 * time.Second
 )
 var errClusterNoNodes = fmt.Errorf("redis: cluster has no nodes")
 // ClusterOptions are used to configure a cluster client and should be
@ -316,6 +320,10 @@ type clusterNode struct {
 	latency    uint32 // atomic
 	generation uint32 // atomic
 	failing    uint32 // atomic
 	// last time the latency measurement was performed for the node, stored in nanoseconds
 	// from epoch
 	lastLatencyMeasurement int64 // atomic
 }
 func newClusterNode(clOpt *ClusterOptions, addr string) *clusterNode {
@ -368,6 +376,7 @@ func (n *clusterNode) updateLatency() {
 		latency = float64(dur) / float64(successes)
 	}
 	atomic.StoreUint32(&n.latency, uint32(latency+0.5))
 	n.SetLastLatencyMeasurement(time.Now())
 }
 func (n *clusterNode) Latency() time.Duration {
@ -397,6 +406,10 @@ func (n *clusterNode) Generation() uint32 {
 	return atomic.LoadUint32(&n.generation)
 }
 func (n *clusterNode) LastLatencyMeasurement() int64 {
 	return atomic.LoadInt64(&n.lastLatencyMeasurement)
 }
 func (n *clusterNode) SetGeneration(gen uint32) {
 	for {
 		v := atomic.LoadUint32(&n.generation)
@ -406,6 +419,15 @@ func (n *clusterNode) SetGeneration(gen uint32) {
 	}
 }
 func (n *clusterNode) SetLastLatencyMeasurement(t time.Time) {
 	for {
 		v := atomic.LoadInt64(&n.lastLatencyMeasurement)
 		if t.UnixNano() < v || atomic.CompareAndSwapInt64(&n.lastLatencyMeasurement, v, t.UnixNano()) {
 			break
 		}
 	}
 }
 //------------------------------------------------------------------------------
 type clusterNodes struct {
@ -493,10 +515,11 @@ func (c *clusterNodes) GC(generation uint32) {
 	c.mu.Lock()
 	c.activeAddrs = c.activeAddrs[:0]
 	now := time.Now()
 	for addr, node := range c.nodes {
 		if node.Generation() >= generation {
 			c.activeAddrs = append(c.activeAddrs, addr)
-			if c.opt.RouteByLatency {
+			if c.opt.RouteByLatency && node.LastLatencyMeasurement() < now.Add(-minLatencyMeasurementInterval).UnixNano() {
 				go node.updateLatency()
 			}
 			continue
--- a/redis.go
+++ b/redis.go
@ -176,8 +176,6 @@ func (hs *hooksMixin) withProcessPipelineHook(
 }
 func (hs *hooksMixin) dialHook(ctx context.Context, network, addr string) (net.Conn, error) {
 	hs.hooksMu.Lock()
 	defer hs.hooksMu.Unlock()
 	return hs.current.dial(ctx, network, addr)
 }
--- a/redis_test.go
+++ b/redis_test.go
@ -6,6 +6,7 @@ import (
 	"errors"
 	"fmt"
 	"net"
 	"sync"
 	"testing"
 	"time"
@ -633,3 +634,67 @@ var _ = Describe("Hook with MinIdleConns", func() {
 		}))
 	})
 })
 var _ = Describe("Dialer connection timeouts", func() {
 	var client *redis.Client
 	const dialSimulatedDelay = 1 * time.Second
 	BeforeEach(func() {
 		options := redisOptions()
 		options.Dialer = func(ctx context.Context, network, addr string) (net.Conn, error) {
 			// Simulated slow dialer.
 			// Note that the following sleep is deliberately not context-aware.
 			time.Sleep(dialSimulatedDelay)
 			return net.Dial("tcp", options.Addr)
 		}
 		options.MinIdleConns = 1
 		client = redis.NewClient(options)
 	})
 	AfterEach(func() {
 		err := client.Close()
 		Expect(err).NotTo(HaveOccurred())
 	})
 	It("does not contend on connection dial for concurrent commands", func() {
 		var wg sync.WaitGroup
 		const concurrency = 10
 		durations := make(chan time.Duration, concurrency)
 		errs := make(chan error, concurrency)
 		start := time.Now()
 		wg.Add(concurrency)
 		for i := 0; i < concurrency; i++ {
 			go func() {
 				defer wg.Done()
 				start := time.Now()
 				err := client.Ping(ctx).Err()
 				durations <- time.Since(start)
 				errs <- err
 			}()
 		}
 		wg.Wait()
 		close(durations)
 		close(errs)
 		// All commands should eventually succeed, after acquiring a connection.
 		for err := range errs {
 			Expect(err).NotTo(HaveOccurred())
 		}
 		// Each individual command should complete within the simulated dial duration bound.
 		for duration := range durations {
 			Expect(duration).To(BeNumerically("<", 2*dialSimulatedDelay))
 		}
 		// Due to concurrent execution, the entire test suite should also complete within
 		// the same dial duration bound applied for individual commands.
 		Expect(time.Since(start)).To(BeNumerically("<", 2*dialSimulatedDelay))
 	})
 })
Author	SHA1	Message	Date
Jonas Kwiedor	2f9b37f940	Merge `7c68d27694` into `f1ffb55c9a`	2024-11-21 08:40:26 +01:00
Justin	f1ffb55c9a	Only check latencies once every 10 seconds with `routeByLatency` (#2795 ) * Only check latencies once every 10 seconds with `routeByLatency` `routeByLatency` currently checks latencies any time a server returns a MOVED or READONLY reply. When a shard is down, the ClusterClient chooses to issue the request to a random server, which returns a MOVED reply. This causes a state refresh and a latency update on all servers. This can lead to significant ping load to clusters with a large number of clients. This introduces logic to ping only once every 10 seconds, only performing a latency update on a node during the `GC` function if the latency was set later than 10 seconds ago. Fixes https://github.com/redis/go-redis/issues/2782 * use UnixNano instead of Unix for better precision --------- Co-authored-by: ofekshenawa <104765379+ofekshenawa@users.noreply.github.com>	2024-11-20 14:36:39 +02:00
LINKIWI	080e051124	Eliminate redundant dial mutex causing unbounded connection queue contention (#3088 ) * Eliminate redundant dial mutex causing unbounded connection queue contention * Dialer connection timeouts unit test --------- Co-authored-by: ofekshenawa <104765379+ofekshenawa@users.noreply.github.com>	2024-11-20 13:38:06 +02:00
Jonas Kwiedor	7c68d27694	docs: Change go mod init command example Doesn't it make sense to use the command from the official Go tutorial for the description? Here is the link to it: https://go.dev/doc/tutorial/create-module	2023-11-19 17:20:38 +01:00