From 91ef777771d04f0fa32c25076f932cfdc4f266e8 Mon Sep 17 00:00:00 2001 From: Alex Roitman <aroitman@housecanary.com> Date: Wed, 25 Mar 2020 13:11:05 -0700 Subject: [PATCH 1/8] Add benchmark for fieldMatch. --- internal/server/scanner_test.go | 52 +++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 internal/server/scanner_test.go diff --git a/internal/server/scanner_test.go b/internal/server/scanner_test.go new file mode 100644 index 00000000..fc021cb5 --- /dev/null +++ b/internal/server/scanner_test.go @@ -0,0 +1,52 @@ +package server + +import ( + "math" + "math/rand" + "testing" + "time" + + "github.com/tidwall/geojson" + "github.com/tidwall/geojson/geometry" +) + +type testPointItem struct { + object geojson.Object + fields []float64 +} + +func PO(x, y float64) *geojson.Point { + return geojson.NewPoint(geometry.Point{X: x, Y: y}) +} + + +func BenchmarkFieldMatch(t *testing.B) { + rand.Seed(time.Now().UnixNano()) + items := make([]testPointItem, t.N) + for i := 0; i < t.N; i++ { + items[i] = testPointItem{ + PO(rand.Float64()*360-180, rand.Float64()*180-90), + []float64{rand.Float64()*9+1, math.Round(rand.Float64()*30) + 1}, + } + } + sw := &scanWriter{ + wheres: []whereT{ + {"foo", false, 1, false, 3}, + {"bar", false, 10, false, 30}, + }, + whereins: []whereinT{ + {"foo", map[float64]struct{}{1: {}, 2: {}}}, + {"bar", map[float64]struct{}{11: {}, 25: {}}}, + }, + fmap: map[string]int{"foo": 0, "bar": 1}, + farr: []string{"bar", "foo"}, + } + sw.fvals = make([]float64, len(sw.farr)) + t.ResetTimer() + for i := 0; i < t.N; i++ { + // one call is super fast, measurements are not reliable, let's do 100 + for ix := 0; ix < 100; ix++ { + sw.fieldMatch(items[i].fields, items[i].object) + } + } +} From 27c6980f82cbae0119d951d602b75ead40f47cc0 Mon Sep 17 00:00:00 2001 From: Alex Roitman <aroitman@housecanary.com> Date: Tue, 24 Mar 2020 20:12:05 -0700 Subject: [PATCH 2/8] Copy array and only loop if we need to pad. --- internal/server/scanner.go | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/internal/server/scanner.go b/internal/server/scanner.go index bfa6f176..df5cfbf6 100644 --- a/internal/server/scanner.go +++ b/internal/server/scanner.go @@ -248,12 +248,10 @@ func (sw *scanWriter) fieldMatch(fields []float64, o geojson.Object) (fvals []fl } } } else { - for idx := range sw.farr { - var value float64 - if len(fields) > idx { - value = fields[idx] - } - sw.fvals[idx] = value + copy(sw.fvals, fields) + // fields might be shorter for this item, need to pad sw.fvals with zeros + for i := len(fields); i < len(sw.fvals); i++ { + sw.fvals[i] = 0 } for _, where := range sw.wheres { if where.field == "z" { @@ -267,21 +265,13 @@ func (sw *scanWriter) fieldMatch(fields []float64, o geojson.Object) (fvals []fl } continue } - var value float64 - idx, ok := sw.fmap[where.field] - if ok { - value = sw.fvals[idx] - } + value := sw.fvals[sw.fmap[where.field]] if !where.match(value) { return } } for _, wherein := range sw.whereins { - var value float64 - idx, ok := sw.fmap[wherein.field] - if ok { - value = sw.fvals[idx] - } + value := sw.fvals[sw.fmap[wherein.field]] if !wherein.match(value) { return } From 9e7766b346b77aec9fd7c36bd74f6121f3a06637 Mon Sep 17 00:00:00 2001 From: Alex Roitman <aroitman@housecanary.com> Date: Tue, 24 Mar 2020 22:08:55 -0700 Subject: [PATCH 3/8] Array of values instead of map for whereins. --- internal/server/scanner_test.go | 4 ++-- internal/server/token.go | 17 ++++++++++------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/internal/server/scanner_test.go b/internal/server/scanner_test.go index fc021cb5..013d1d0a 100644 --- a/internal/server/scanner_test.go +++ b/internal/server/scanner_test.go @@ -35,8 +35,8 @@ func BenchmarkFieldMatch(t *testing.B) { {"bar", false, 10, false, 30}, }, whereins: []whereinT{ - {"foo", map[float64]struct{}{1: {}, 2: {}}}, - {"bar", map[float64]struct{}{11: {}, 25: {}}}, + {"foo", []float64{1, 2}}, + {"bar", []float64{11, 25}}, }, fmap: map[string]int{"foo": 0, "bar": 1}, farr: []string{"bar", "foo"}, diff --git a/internal/server/token.go b/internal/server/token.go index 52f97774..75cca315 100644 --- a/internal/server/token.go +++ b/internal/server/token.go @@ -159,12 +159,16 @@ func zMinMaxFromWheres(wheres []whereT) (minZ, maxZ float64) { type whereinT struct { field string - valMap map[float64]struct{} + valArr []float64 } func (wherein whereinT) match(value float64) bool { - _, ok := wherein.valMap[value] - return ok + for _, val := range wherein.valArr { + if val == value { + return true + } + } + return false } type whereevalT struct { @@ -342,9 +346,8 @@ func (s *Server) parseSearchScanBaseTokens( err = errInvalidArgument(nvalsStr) return } - valMap := make(map[float64]struct{}) + valArr := make([]float64, nvals) var val float64 - var empty struct{} for i = 0; i < nvals; i++ { if vs, valStr, ok = tokenval(vs); !ok || valStr == "" { err = errInvalidNumberOfArguments @@ -354,9 +357,9 @@ func (s *Server) parseSearchScanBaseTokens( err = errInvalidArgument(valStr) return } - valMap[val] = empty + valArr = append(valArr, val) } - t.whereins = append(t.whereins, whereinT{field, valMap}) + t.whereins = append(t.whereins, whereinT{field, valArr}) continue case "whereevalsha": fallthrough From d5132a9eae6e4c0b455fe2078d051123524f509d Mon Sep 17 00:00:00 2001 From: Alex Roitman <aroitman@housecanary.com> Date: Wed, 25 Mar 2020 11:53:44 -0700 Subject: [PATCH 4/8] Map field names to array indices in scanwriter, once per query. --- internal/server/scanner.go | 33 +++++++++++++++++++-------------- internal/server/scanner_test.go | 8 ++++---- internal/server/token.go | 6 ++++-- 3 files changed, 27 insertions(+), 20 deletions(-) diff --git a/internal/server/scanner.go b/internal/server/scanner.go index df5cfbf6..30c21076 100644 --- a/internal/server/scanner.go +++ b/internal/server/scanner.go @@ -97,8 +97,6 @@ func (s *Server) newScanWriter( msg: msg, cursor: cursor, limit: limit, - wheres: wheres, - whereins: whereins, whereevals: whereevals, output: output, nofields: nofields, @@ -117,6 +115,19 @@ func (s *Server) newScanWriter( if sw.col != nil { sw.fmap = sw.col.FieldMap() sw.farr = sw.col.FieldArr() + // This fills index value in wheres/whereins + // so we don't have to map string field names for each tested object + var ok bool + for _, where := range wheres { + if where.index, ok = sw.fmap[where.field]; ok { + sw.wheres = append(sw.wheres, where) + } + } + for _, wherein := range whereins { + if wherein.index, ok = sw.fmap[wherein.field]; ok { + sw.whereins = append(sw.whereins, wherein) + } + } } sw.fvals = make([]float64, len(sw.farr)) return sw, nil @@ -212,11 +223,8 @@ func (sw *scanWriter) fieldMatch(fields []float64, o geojson.Object) (fvals []fl continue } var value float64 - idx, ok := sw.fmap[where.field] - if ok { - if len(fields) > idx { - value = fields[idx] - } + if len(fields) > where.index { + value = fields[where.index] } if !where.match(value) { return @@ -224,11 +232,8 @@ func (sw *scanWriter) fieldMatch(fields []float64, o geojson.Object) (fvals []fl } for _, wherein := range sw.whereins { var value float64 - idx, ok := sw.fmap[wherein.field] - if ok { - if len(fields) > idx { - value = fields[idx] - } + if len(fields) > wherein.index { + value = fields[wherein.index] } if !wherein.match(value) { return @@ -265,13 +270,13 @@ func (sw *scanWriter) fieldMatch(fields []float64, o geojson.Object) (fvals []fl } continue } - value := sw.fvals[sw.fmap[where.field]] + value := sw.fvals[where.index] if !where.match(value) { return } } for _, wherein := range sw.whereins { - value := sw.fvals[sw.fmap[wherein.field]] + value := sw.fvals[wherein.index] if !wherein.match(value) { return } diff --git a/internal/server/scanner_test.go b/internal/server/scanner_test.go index 013d1d0a..31f186a2 100644 --- a/internal/server/scanner_test.go +++ b/internal/server/scanner_test.go @@ -31,12 +31,12 @@ func BenchmarkFieldMatch(t *testing.B) { } sw := &scanWriter{ wheres: []whereT{ - {"foo", false, 1, false, 3}, - {"bar", false, 10, false, 30}, + {"foo", 0, false, 1, false, 3}, + {"bar", 1, false, 10, false, 30}, }, whereins: []whereinT{ - {"foo", []float64{1, 2}}, - {"bar", []float64{11, 25}}, + {"foo", 0, []float64{1, 2}}, + {"bar", 1, []float64{11, 25}}, }, fmap: map[string]int{"foo": 0, "bar": 1}, farr: []string{"bar", "foo"}, diff --git a/internal/server/token.go b/internal/server/token.go index 75cca315..6b471b0c 100644 --- a/internal/server/token.go +++ b/internal/server/token.go @@ -116,6 +116,7 @@ func lc(s1, s2 string) bool { type whereT struct { field string + index int minx bool min float64 maxx bool @@ -159,6 +160,7 @@ func zMinMaxFromWheres(wheres []whereT) (minZ, maxZ float64) { type whereinT struct { field string + index int valArr []float64 } @@ -328,7 +330,7 @@ func (s *Server) parseSearchScanBaseTokens( return } } - t.wheres = append(t.wheres, whereT{field, minx, min, maxx, max}) + t.wheres = append(t.wheres, whereT{field, -1, minx, min, maxx, max}) continue case "wherein": vs = nvs @@ -359,7 +361,7 @@ func (s *Server) parseSearchScanBaseTokens( } valArr = append(valArr, val) } - t.whereins = append(t.whereins, whereinT{field, valArr}) + t.whereins = append(t.whereins, whereinT{field, -1, valArr}) continue case "whereevalsha": fallthrough From f3cc365d246ed9c8c01ebe9f04d35d45b0a02abb Mon Sep 17 00:00:00 2001 From: Alex Roitman <aroitman@housecanary.com> Date: Thu, 9 Apr 2020 09:30:38 -0700 Subject: [PATCH 5/8] Pre-allocate where and wherein arrays. --- internal/server/scanner.go | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/internal/server/scanner.go b/internal/server/scanner.go index 30c21076..8d310b8a 100644 --- a/internal/server/scanner.go +++ b/internal/server/scanner.go @@ -118,14 +118,20 @@ func (s *Server) newScanWriter( // This fills index value in wheres/whereins // so we don't have to map string field names for each tested object var ok bool - for _, where := range wheres { - if where.index, ok = sw.fmap[where.field]; ok { - sw.wheres = append(sw.wheres, where) + if len(wheres) > 0 { + sw.wheres = make([]whereT, 0, len(wheres)) + for _, where := range wheres { + if where.index, ok = sw.fmap[where.field]; ok { + sw.wheres = append(sw.wheres, where) + } } } - for _, wherein := range whereins { - if wherein.index, ok = sw.fmap[wherein.field]; ok { - sw.whereins = append(sw.whereins, wherein) + if len(whereins) > 0 { + sw.whereins = make([]whereinT, 0, len(whereins)) + for _, wherein := range whereins { + if wherein.index, ok = sw.fmap[wherein.field]; ok { + sw.whereins = append(sw.whereins, wherein) + } } } } From fe0216c42c05ae1ffb839cdaae5ed805c12c3389 Mon Sep 17 00:00:00 2001 From: Alex Roitman <aroitman@housecanary.com> Date: Thu, 9 Apr 2020 09:51:19 -0700 Subject: [PATCH 6/8] Restore previous behavior where non-existing fields are treated as zero-value. --- internal/server/scanner.go | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/internal/server/scanner.go b/internal/server/scanner.go index 8d310b8a..7e85a027 100644 --- a/internal/server/scanner.go +++ b/internal/server/scanner.go @@ -120,18 +120,20 @@ func (s *Server) newScanWriter( var ok bool if len(wheres) > 0 { sw.wheres = make([]whereT, 0, len(wheres)) - for _, where := range wheres { + for i, where := range wheres { if where.index, ok = sw.fmap[where.field]; ok { - sw.wheres = append(sw.wheres, where) + where.index = math.MaxInt32 } + sw.wheres[i] = where } } if len(whereins) > 0 { sw.whereins = make([]whereinT, 0, len(whereins)) - for _, wherein := range whereins { + for i, wherein := range whereins { if wherein.index, ok = sw.fmap[wherein.field]; ok { - sw.whereins = append(sw.whereins, wherein) + wherein.index = math.MaxInt32 } + sw.whereins[i] = wherein } } } @@ -276,13 +278,19 @@ func (sw *scanWriter) fieldMatch(fields []float64, o geojson.Object) (fvals []fl } continue } - value := sw.fvals[where.index] + var value float64 + if where.index < len(sw.fvals) { + value = sw.fvals[where.index] + } if !where.match(value) { return } } for _, wherein := range sw.whereins { - value := sw.fvals[wherein.index] + var value float64 + if wherein.index < len(sw.fvals) { + value = sw.fvals[wherein.index] + } if !wherein.match(value) { return } From 914f51de11e4678137a7c25caf810df6901b9606 Mon Sep 17 00:00:00 2001 From: Alex Roitman <aroitman@housecanary.com> Date: Thu, 9 Apr 2020 09:59:24 -0700 Subject: [PATCH 7/8] Fixes --- internal/server/scanner.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/internal/server/scanner.go b/internal/server/scanner.go index 7e85a027..8eed94e4 100644 --- a/internal/server/scanner.go +++ b/internal/server/scanner.go @@ -119,18 +119,18 @@ func (s *Server) newScanWriter( // so we don't have to map string field names for each tested object var ok bool if len(wheres) > 0 { - sw.wheres = make([]whereT, 0, len(wheres)) + sw.wheres = make([]whereT, len(wheres)) for i, where := range wheres { - if where.index, ok = sw.fmap[where.field]; ok { + if where.index, ok = sw.fmap[where.field]; !ok { where.index = math.MaxInt32 } sw.wheres[i] = where } } if len(whereins) > 0 { - sw.whereins = make([]whereinT, 0, len(whereins)) + sw.whereins = make([]whereinT, len(whereins)) for i, wherein := range whereins { - if wherein.index, ok = sw.fmap[wherein.field]; ok { + if wherein.index, ok = sw.fmap[wherein.field]; !ok { wherein.index = math.MaxInt32 } sw.whereins[i] = wherein @@ -231,7 +231,7 @@ func (sw *scanWriter) fieldMatch(fields []float64, o geojson.Object) (fvals []fl continue } var value float64 - if len(fields) > where.index { + if where.index < len(fields) { value = fields[where.index] } if !where.match(value) { @@ -240,7 +240,7 @@ func (sw *scanWriter) fieldMatch(fields []float64, o geojson.Object) (fvals []fl } for _, wherein := range sw.whereins { var value float64 - if len(fields) > wherein.index { + if wherein.index < len(fields) { value = fields[wherein.index] } if !wherein.match(value) { From 25579a052c4f0a62616a7fcb463f970434c12e63 Mon Sep 17 00:00:00 2001 From: Alex Roitman <aroitman@housecanary.com> Date: Sun, 12 Apr 2020 16:04:18 -0700 Subject: [PATCH 8/8] Fix a bug in WHEREIN -- 0 values would always match, incorrectly. --- internal/server/token.go | 2 +- tests/keys_test.go | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/internal/server/token.go b/internal/server/token.go index 6b471b0c..72b70e8a 100644 --- a/internal/server/token.go +++ b/internal/server/token.go @@ -359,7 +359,7 @@ func (s *Server) parseSearchScanBaseTokens( err = errInvalidArgument(valStr) return } - valArr = append(valArr, val) + valArr[i] = val } t.whereins = append(t.whereins, whereinT{field, -1, valArr}) continue diff --git a/tests/keys_test.go b/tests/keys_test.go index 17ad6e51..0170ca27 100644 --- a/tests/keys_test.go +++ b/tests/keys_test.go @@ -394,6 +394,9 @@ func keys_WHEREIN_test(mc *mockServer) error { {"SET", "mykey", "myid_a2", "FIELD", "a", 2, "POINT", 32.99, -115}, {"OK"}, {"SET", "mykey", "myid_a3", "FIELD", "a", 3, "POINT", 33, -115.02}, {"OK"}, {"WITHIN", "mykey", "WHEREIN", "a", 3, 0, 1, 2, "BOUNDS", 32.8, -115.2, 33.2, -114.8}, {`[0 [[myid_a1 {"type":"Point","coordinates":[-115,33]} [a 1]] [myid_a2 {"type":"Point","coordinates":[-115,32.99]} [a 2]]]]`}, + // zero value should not match 1 and 2 + {"SET", "mykey", "myid_a0", "FIELD", "a", 0, "POINT", 33, -115.02}, {"OK"}, + {"WITHIN", "mykey", "WHEREIN", "a", 2, 1, 2, "BOUNDS", 32.8, -115.2, 33.2, -114.8}, {`[0 [[myid_a1 {"type":"Point","coordinates":[-115,33]} [a 1]] [myid_a2 {"type":"Point","coordinates":[-115,32.99]} [a 2]]]]`}, }) }