From 88fcc08f394959ea87ed94a990e8420294220b03 Mon Sep 17 00:00:00 2001 From: gobwas Date: Tue, 2 Feb 2016 14:57:42 +0300 Subject: [PATCH 01/26] progress --- compiler.go | 14 +++++++------- glob.go | 4 ++-- match/any.go | 2 +- match/single.go | 2 +- todo.txt | 27 +++++++++++++++++++++++++++ 5 files changed, 38 insertions(+), 11 deletions(-) create mode 100644 todo.txt diff --git a/compiler.go b/compiler.go index 0a38c8e..d7bcd8d 100644 --- a/compiler.go +++ b/compiler.go @@ -11,7 +11,7 @@ func optimize(matcher match.Matcher) match.Matcher { switch m := matcher.(type) { case match.Any: - if m.Separators == "" { + if len(m.Separators) == 0 { return match.Super{} } @@ -135,15 +135,15 @@ func glueAsEvery(matchers []match.Matcher) match.Matcher { hasSuper bool hasSingle bool min int - separator string + separator []rune ) for i, matcher := range matchers { - var sep string - switch m := matcher.(type) { + var sep []rune + switch m := matcher.(type) { case match.Super: - sep = "" + sep = []rune{} hasSuper = true case match.Any: @@ -486,7 +486,7 @@ func doAnyOf(n *nodeAnyOf, s string) (match.Matcher, error) { return match.AnyOf{matchers}, nil } -func do(leaf node, s string) (m match.Matcher, err error) { +func do(leaf node, s []rune) (m match.Matcher, err error) { switch n := leaf.(type) { case *nodeAnyOf: @@ -659,7 +659,7 @@ func do2(node node, s string) ([]match.Matcher, error) { return result, nil } -func compile(ast *nodePattern, s string) (Glob, error) { +func compile(ast *nodePattern, s []rune) (Glob, error) { // ms, err := do2(ast, s) // if err != nil { // return nil, err diff --git a/glob.go b/glob.go index 8b97961..dee2ef6 100644 --- a/glob.go +++ b/glob.go @@ -33,13 +33,13 @@ type Glob interface { // pattern { `,` pattern } // comma-separated (without spaces) patterns // -func Compile(pattern string, separators ...string) (Glob, error) { +func Compile(pattern string, separators ...rune) (Glob, error) { ast, err := parse(newLexer(pattern)) if err != nil { return nil, err } - matcher, err := compile(ast, strings.Join(separators, "")) + matcher, err := compile(ast, separators) if err != nil { return nil, err } diff --git a/match/any.go b/match/any.go index 6470c62..b00db7e 100644 --- a/match/any.go +++ b/match/any.go @@ -7,7 +7,7 @@ import ( ) type Any struct { - Separators string + Separators []rune } func (self Any) Match(s string) bool { diff --git a/match/single.go b/match/single.go index 5ad3391..f9cf018 100644 --- a/match/single.go +++ b/match/single.go @@ -8,7 +8,7 @@ import ( // single represents ? type Single struct { - Separators string + Separators []rune } func (self Single) Match(s string) bool { diff --git a/todo.txt b/todo.txt new file mode 100644 index 0000000..02bfc16 --- /dev/null +++ b/todo.txt @@ -0,0 +1,27 @@ +benchmark | old ns/op | new ns/op | delta +-----------------------------------------------|-----------|-------------|----------- +BenchmarkAllGlobMatch-4 512 711 +38.87% +BenchmarkMultipleGlobMatch-4 121 417 +244.63% +BenchmarkAlternativesGlobMatch-4 166 300 +80.72% +BenchmarkAlternativesSuffixFirstGlobMatch-4 23.5 292 +1142.55% +BenchmarkAlternativesSuffixSecondGlobMatch-4 29.8 355 +1091.28% +BenchmarkAlternativesCombineLiteGlobMatch-4 161 250 +55.28% +BenchmarkAlternativesCombineHardGlobMatch-4 325 334 +2.77% +BenchmarkPlainGlobMatch-4 7.20 154 +2038.89% +BenchmarkPrefixGlobMatch-4 8.75 113 +1191.43% +BenchmarkSuffixGlobMatch-4 9.07 115 +1167.92% +BenchmarkPrefixSuffixGlobMatch-4 15.1 125 +727.81% +BenchmarkIndexAny-4 887 255 -71.25% +BenchmarkIndexContains-4 492 247 -49.80% +BenchmarkIndexList-4 151 51.1 -66.16% +BenchmarkIndexMax-4 442 92.4 -79.10% +BenchmarkIndexMin-4 516 161 -68.80% +BenchmarkIndexNothing-4 452 92.8 -79.47% +BenchmarkIndexPrefixSuffix-4 84.3 57.2 -32.15% +BenchmarkIndexPrefix-4 85.1 55.9 -34.31% +BenchmarkIndexRange-4 170 60.6 -64.35% +BenchmarkRowIndex-4 172 94.0 -45.35% +BenchmarkIndexSingle-4 61.0 35.8 -41.31% +BenchmarkIndexSuffix-4 84.8 55.7 -34.32% +BenchmarkIndexSuper-4 461 192 -58.35% +BenchmarkIndexText-4 84.6 54.4 -35.70% From 57a5246facb06fc31b84edab1e1b4471cdc5e4a6 Mon Sep 17 00:00:00 2001 From: gobwas Date: Tue, 2 Feb 2016 22:03:37 +0300 Subject: [PATCH 02/26] use pool of segments --- cmd/globdraw/main.go | 15 ++- cmd/globtest/main.go | 12 +- compiler.go | 22 ++-- compiler_test.go | 32 +++--- glob.go | 4 +- glob_test.go | 24 ++-- match/any.go | 22 ++-- match/any_of.go | 23 ++-- match/any_of_test.go | 6 +- match/any_test.go | 27 ++++- match/btree.go | 9 +- match/contains.go | 28 ++--- match/contains_test.go | 19 ++- match/every_of.go | 47 ++++++-- match/every_of_test.go | 2 +- match/list.go | 18 ++- match/list_test.go | 25 +++- match/match.go | 134 ++++++++++++++++++++-- match/match_test.go | 48 ++++++-- match/max.go | 2 +- match/max_test.go | 17 ++- match/min.go | 14 +-- match/min_test.go | 17 ++- match/nothing.go | 4 +- match/nothing_test.go | 19 ++- match/prefix.go | 3 +- match/prefix_suffix.go | 16 +-- match/prefix_suffix_test.go | 17 ++- match/prefix_test.go | 17 ++- match/range.go | 7 +- match/range_test.go | 17 ++- match/row.go | 4 +- match/row_test.go | 50 +++++--- match/single.go | 10 +- match/single_test.go | 23 +++- match/suffix.go | 4 +- match/suffix_test.go | 17 ++- match/super.go | 5 +- match/super_test.go | 17 ++- match/text.go | 10 +- match/text_test.go | 17 ++- runes/runes.go | 128 +++++++++++++++++++++ runes/runes_test.go | 222 ++++++++++++++++++++++++++++++++++++ strings/strings.go | 13 +++ todo.txt | 27 +++-- 45 files changed, 965 insertions(+), 249 deletions(-) create mode 100644 runes/runes.go create mode 100644 runes/runes_test.go create mode 100644 strings/strings.go diff --git a/cmd/globdraw/main.go b/cmd/globdraw/main.go index bc64020..8ba91f1 100644 --- a/cmd/globdraw/main.go +++ b/cmd/globdraw/main.go @@ -9,6 +9,7 @@ import ( "math/rand" "os" "strings" + "unicode/utf8" ) func draw(pattern string, m match.Matcher) string { @@ -60,7 +61,7 @@ func graphviz(m match.Matcher, id string) string { func main() { pattern := flag.String("p", "", "pattern to draw") - sep := flag.String("s", "", "comma separated list of separators") + sep := flag.String("s", "", "comma separated list of separators characters") flag.Parse() if *pattern == "" { @@ -68,7 +69,17 @@ func main() { os.Exit(1) } - glob, err := glob.Compile(*pattern, strings.Split(*sep, ",")...) + var separators []rune + for _, c := range strings.Split(*sep, ",") { + if r, w := utf8.DecodeRuneInString(c); len(c) > w { + fmt.Println("only single charactered separators are allowed") + os.Exit(1) + } else { + separators = append(separators, r) + } + } + + glob, err := glob.Compile(*pattern, separators...) if err != nil { fmt.Println("could not compile pattern:", err) os.Exit(1) diff --git a/cmd/globtest/main.go b/cmd/globtest/main.go index 25c89ca..95c102f 100644 --- a/cmd/globtest/main.go +++ b/cmd/globtest/main.go @@ -7,6 +7,7 @@ import ( "os" "strings" "testing" + "unicode/utf8" ) func benchString(r testing.BenchmarkResult) string { @@ -42,7 +43,16 @@ func main() { os.Exit(1) } - separators := strings.Split(*sep, ",") + var separators []rune + for _, c := range strings.Split(*sep, ",") { + if r, w := utf8.DecodeRuneInString(c); len(c) > w { + fmt.Println("only single charactered separators are allowed") + os.Exit(1) + } else { + separators = append(separators, r) + } + } + g, err := glob.Compile(*pattern, separators...) if err != nil { fmt.Println("could not compile pattern:", err) diff --git a/compiler.go b/compiler.go index d7bcd8d..57b2d4c 100644 --- a/compiler.go +++ b/compiler.go @@ -3,8 +3,8 @@ package glob import ( "fmt" "github.com/gobwas/glob/match" + "github.com/gobwas/glob/runes" "reflect" - "unicode/utf8" ) func optimize(matcher match.Matcher) match.Matcher { @@ -23,8 +23,8 @@ func optimize(matcher match.Matcher) match.Matcher { return m case match.List: - if m.Not == false && utf8.RuneCountInString(m.List) == 1 { - return match.NewText(m.List) + if m.Not == false && len(m.List) == 1 { + return match.NewText(string(m.List)) } return m @@ -172,7 +172,7 @@ func glueAsEvery(matchers []match.Matcher) match.Matcher { separator = sep } - if sep == separator { + if runes.Equal(sep, separator) { continue } @@ -187,7 +187,7 @@ func glueAsEvery(matchers []match.Matcher) match.Matcher { return match.Any{separator} } - if (hasAny || hasSuper) && min > 0 && separator == "" { + if (hasAny || hasSuper) && min > 0 && len(separator) == 0 { return match.Min{min} } @@ -201,8 +201,8 @@ func glueAsEvery(matchers []match.Matcher) match.Matcher { } } - if separator != "" { - every.Add(match.Contains{separator, true}) + if len(separator) > 0 { + every.Add(match.Contains{string(separator), true}) } return every @@ -468,7 +468,7 @@ func compileMatchers(matchers []match.Matcher) (match.Matcher, error) { // return sum * k //} -func doAnyOf(n *nodeAnyOf, s string) (match.Matcher, error) { +func doAnyOf(n *nodeAnyOf, s []rune) (match.Matcher, error) { var matchers []match.Matcher for _, desc := range n.children() { if desc == nil { @@ -532,7 +532,7 @@ func do(leaf node, s []rune) (m match.Matcher, err error) { } case *nodeList: - m = match.List{n.chars, n.not} + m = match.List{[]rune(n.chars), n.not} case *nodeRange: m = match.Range{n.lo, n.hi, n.not} @@ -556,7 +556,7 @@ func do(leaf node, s []rune) (m match.Matcher, err error) { return optimize(m), nil } -func do2(node node, s string) ([]match.Matcher, error) { +func do2(node node, s []rune) ([]match.Matcher, error) { var result []match.Matcher switch n := node.(type) { @@ -631,7 +631,7 @@ func do2(node node, s string) ([]match.Matcher, error) { } case *nodeList: - result = append(result, match.List{n.chars, n.not}) + result = append(result, match.List{[]rune(n.chars), n.not}) case *nodeRange: result = append(result, match.Range{n.lo, n.hi, n.not}) diff --git a/compiler_test.go b/compiler_test.go index 008ddb7..0be7b76 100644 --- a/compiler_test.go +++ b/compiler_test.go @@ -6,7 +6,7 @@ import ( "testing" ) -const separators = "." +var separators = []rune{'.'} func TestGlueMatchers(t *testing.T) { for id, test := range []struct { @@ -27,7 +27,7 @@ func TestGlueMatchers(t *testing.T) { }, match.EveryOf{match.Matchers{ match.Min{1}, - match.Contains{separators, true}, + match.Contains{string(separators), true}, }}, }, { @@ -43,8 +43,8 @@ func TestGlueMatchers(t *testing.T) { }, { []match.Matcher{ - match.List{"a", true}, - match.Any{"a"}, + match.List{[]rune{'a'}, true}, + match.Any{[]rune{'a'}}, }, match.EveryOf{match.Matchers{ match.Min{1}, @@ -101,14 +101,14 @@ func TestCompileMatchers(t *testing.T) { { []match.Matcher{ match.Range{'a', 'c', true}, - match.List{"zte", false}, + match.List{[]rune{'z', 't', 'e'}, false}, match.NewText("c"), match.Single{}, }, match.Row{ Matchers: match.Matchers{ match.Range{'a', 'c', true}, - match.List{"zte", false}, + match.List{[]rune{'z', 't', 'e'}, false}, match.NewText("c"), match.Single{}, }, @@ -136,7 +136,7 @@ func TestConvertMatchers(t *testing.T) { { []match.Matcher{ match.Range{'a', 'c', true}, - match.List{"zte", false}, + match.List{[]rune{'z', 't', 'e'}, false}, match.NewText("c"), match.Single{}, match.Any{}, @@ -145,7 +145,7 @@ func TestConvertMatchers(t *testing.T) { match.Row{ Matchers: match.Matchers{ match.Range{'a', 'c', true}, - match.List{"zte", false}, + match.List{[]rune{'z', 't', 'e'}, false}, match.NewText("c"), match.Single{}, }, @@ -157,7 +157,7 @@ func TestConvertMatchers(t *testing.T) { { []match.Matcher{ match.Range{'a', 'c', true}, - match.List{"zte", false}, + match.List{[]rune{'z', 't', 'e'}, false}, match.NewText("c"), match.Single{}, match.Any{}, @@ -169,7 +169,7 @@ func TestConvertMatchers(t *testing.T) { match.Row{ Matchers: match.Matchers{ match.Range{'a', 'c', true}, - match.List{"zte", false}, + match.List{[]rune{'z', 't', 'e'}, false}, match.NewText("c"), }, RunesLength: 3, @@ -204,7 +204,7 @@ func TestCompiler(t *testing.T) { for id, test := range []struct { ast *nodePattern result Glob - sep string + sep []rune }{ { ast: pattern(&nodeText{text: "abc"}), @@ -241,14 +241,14 @@ func TestCompiler(t *testing.T) { chars: "abc", not: true, }), - result: match.List{"abc", true}, + result: match.List{[]rune{'a', 'b', 'c'}, true}, }, { ast: pattern(&nodeAny{}, &nodeSingle{}, &nodeSingle{}, &nodeSingle{}), sep: separators, result: match.EveryOf{Matchers: match.Matchers{ match.Min{3}, - match.Contains{separators, true}, + match.Contains{string(separators), true}, }}, }, { @@ -349,7 +349,7 @@ func TestCompiler(t *testing.T) { nil, match.AnyOf{Matchers: match.Matchers{ match.Single{}, - match.List{List: "def"}, + match.List{List: []rune{'d', 'e', 'f'}}, match.Nothing{}, }}, ), @@ -390,8 +390,8 @@ func TestCompiler(t *testing.T) { Matchers: match.Matchers{ match.NewText("abc"), match.AnyOf{Matchers: match.Matchers{ - match.List{List: "abc"}, - match.List{List: "def"}, + match.List{List: []rune{'a', 'b', 'c'}}, + match.List{List: []rune{'d', 'e', 'f'}}, }}, match.NewText("ghi"), }, diff --git a/glob.go b/glob.go index dee2ef6..4d1b77b 100644 --- a/glob.go +++ b/glob.go @@ -1,7 +1,5 @@ package glob -import "strings" - // Glob represents compiled glob pattern. type Glob interface { Match(string) bool @@ -48,7 +46,7 @@ func Compile(pattern string, separators ...rune) (Glob, error) { } // MustCompile is the same as Compile, except that if Compile returns error, this will panic -func MustCompile(pattern string, separators ...string) Glob { +func MustCompile(pattern string, separators ...rune) Glob { g, err := Compile(pattern, separators...) if err != nil { panic(err) diff --git a/glob_test.go b/glob_test.go index ce55202..d17e985 100644 --- a/glob_test.go +++ b/glob_test.go @@ -53,10 +53,10 @@ const ( type test struct { pattern, match string should bool - delimiters []string + delimiters []rune } -func glob(s bool, p, m string, d ...string) test { +func glob(s bool, p, m string, d ...rune) test { return test{p, m, s, d} } @@ -68,22 +68,22 @@ func TestGlob(t *testing.T) { glob(true, "a*c", "abc"), glob(true, "a*c", "a12345c"), glob(true, "a?c", "a1c"), - glob(true, "a.b", "a.b", "."), - glob(true, "a.*", "a.b", "."), - glob(true, "a.**", "a.b.c", "."), - glob(true, "a.?.c", "a.b.c", "."), - glob(true, "a.?.?", "a.b.c", "."), + glob(true, "a.b", "a.b", '.'), + glob(true, "a.*", "a.b", '.'), + glob(true, "a.**", "a.b.c", '.'), + glob(true, "a.?.c", "a.b.c", '.'), + glob(true, "a.?.?", "a.b.c", '.'), glob(true, "?at", "cat"), glob(true, "?at", "fat"), glob(true, "*", "abc"), glob(true, `\*`, "*"), - glob(true, "**", "a.b.c", "."), + glob(true, "**", "a.b.c", '.'), glob(false, "?at", "at"), - glob(false, "?at", "fat", "f"), - glob(false, "a.*", "a.b.c", "."), - glob(false, "a.?.c", "a.bb.c", "."), - glob(false, "*", "a.b.c", "."), + glob(false, "?at", "fat", 'f'), + glob(false, "a.*", "a.b.c", '.'), + glob(false, "a.?.c", "a.bb.c", '.'), + glob(false, "*", "a.b.c", '.'), glob(true, "*test", "this is a test"), glob(true, "this*", "this is a test"), diff --git a/match/any.go b/match/any.go index b00db7e..d931eea 100644 --- a/match/any.go +++ b/match/any.go @@ -2,8 +2,7 @@ package match import ( "fmt" - "strings" - "unicode/utf8" + "github.com/gobwas/glob/strings" ) type Any struct { @@ -11,28 +10,25 @@ type Any struct { } func (self Any) Match(s string) bool { - return strings.IndexAny(s, self.Separators) == -1 + return strings.IndexAnyRunes(s, self.Separators) == -1 } -func (self Any) Index(s string) (int, []int) { - var sub string - - found := strings.IndexAny(s, self.Separators) +func (self Any) Index(s string, segments []int) (int, []int) { + found := strings.IndexAnyRunes(s, self.Separators) switch found { case -1: - sub = s case 0: - return 0, []int{0} + segments = append(segments) + return 0, segments default: - sub = s[:found] + s = s[:found] } - segments := make([]int, 0, utf8.RuneCountInString(sub)+1) - for i := range sub { + for i := range s { segments = append(segments, i) } - segments = append(segments, len(sub)) + segments = append(segments, len(s)) return 0, segments } diff --git a/match/any_of.go b/match/any_of.go index 3d14edc..602cd28 100644 --- a/match/any_of.go +++ b/match/any_of.go @@ -23,39 +23,38 @@ func (self AnyOf) Match(s string) bool { return false } -func (self AnyOf) Index(s string) (int, []int) { - if len(self.Matchers) == 0 { - return -1, nil - } - - // segments to merge - var segments [][]int +func (self AnyOf) Index(s string, segments []int) (int, []int) { index := -1 - for _, m := range self.Matchers { - idx, seg := m.Index(s) + in := acquireSegments(len(s)) + idx, seg := m.Index(s, in) if idx == -1 { + releaseSegments(in) continue } if index == -1 || idx < index { index = idx - segments = [][]int{seg} + segments = append(segments[:0], seg...) + releaseSegments(in) continue } if idx > index { + releaseSegments(in) continue } - segments = append(segments, seg) + // here idx == index + segments = appendMerge(segments, seg) + releaseSegments(in) } if index == -1 { return -1, nil } - return index, mergeSegments(segments) + return index, segments } func (self AnyOf) Len() (l int) { diff --git a/match/any_of_test.go b/match/any_of_test.go index 506ddd8..ee3001a 100644 --- a/match/any_of_test.go +++ b/match/any_of_test.go @@ -33,8 +33,8 @@ func TestAnyOfIndex(t *testing.T) { }, { Matchers{ - List{"[def]", false}, - List{"[abc]", false}, + List{[]rune("[def]"), false}, + List{[]rune("[abc]"), false}, }, "abcdef", 0, @@ -42,7 +42,7 @@ func TestAnyOfIndex(t *testing.T) { }, } { everyOf := AnyOf{test.matchers} - index, segments := everyOf.Index(test.fixture) + index, segments := everyOf.Index(test.fixture, []int{}) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } diff --git a/match/any_test.go b/match/any_test.go index c436267..9239ffa 100644 --- a/match/any_test.go +++ b/match/any_test.go @@ -7,38 +7,53 @@ import ( func TestAnyIndex(t *testing.T) { for id, test := range []struct { - sep string + sep []rune fixture string index int segments []int }{ { - ".", + []rune{'.'}, "abc", 0, []int{0, 1, 2, 3}, }, { - ".", + []rune{'.'}, "abc.def", 0, []int{0, 1, 2, 3}, }, } { p := Any{test.sep} - index, segments := p.Index(test.fixture) + index, segments := p.Index(test.fixture, []int{}) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } if !reflect.DeepEqual(segments, test.segments) { t.Errorf("#%d unexpected segments: exp: %v, act: %v", id, test.segments, segments) } + + releaseSegments(segments) } } func BenchmarkIndexAny(b *testing.B) { - p := Any{bench_separators} + m := Any{bench_separators} + + in := acquireSegments(len(bench_pattern)) for i := 0; i < b.N; i++ { - p.Index(bench_pattern) + m.Index(bench_pattern, in[:0]) } } + +func BenchmarkIndexAnyParallel(b *testing.B) { + m := Any{bench_separators} + in := acquireSegments(len(bench_pattern)) + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + m.Index(bench_pattern, in[:0]) + } + }) +} diff --git a/match/btree.go b/match/btree.go index fe314f2..ad8f5f9 100644 --- a/match/btree.go +++ b/match/btree.go @@ -51,7 +51,7 @@ func (self BTree) Len() int { } // todo? -func (self BTree) Index(s string) (int, []int) { +func (self BTree) Index(s string, segments []int) (int, []int) { return -1, nil } @@ -79,8 +79,10 @@ func (self BTree) Match(s string) bool { for offset < limit { // search for matching part in substring - index, segments := self.Value.Index(s[offset:limit]) + in := acquireSegments(limit - offset) + index, segments := self.Value.Index(s[offset:limit], in) if index == -1 { + releaseSegments(in) return false } @@ -112,11 +114,14 @@ func (self BTree) Match(s string) bool { } if right { + releaseSegments(in) return true } } } + releaseSegments(in) + _, step := utf8.DecodeRuneInString(s[offset+index:]) offset += index + step } diff --git a/match/contains.go b/match/contains.go index 23f51b6..8246c92 100644 --- a/match/contains.go +++ b/match/contains.go @@ -3,7 +3,6 @@ package match import ( "fmt" "strings" - "unicode/utf8" ) type Contains struct { @@ -15,11 +14,8 @@ func (self Contains) Match(s string) bool { return strings.Contains(s, self.Needle) != self.Not } -func (self Contains) Index(s string) (int, []int) { - var ( - sub string - offset int - ) +func (self Contains) Index(s string, segments []int) (int, []int) { + var offset int idx := strings.Index(s, self.Needle) @@ -29,27 +25,19 @@ func (self Contains) Index(s string) (int, []int) { } offset = idx + len(self.Needle) - if len(s) <= offset { - return 0, []int{offset} - } - - sub = s[offset:] - } else { - switch idx { - case -1: - sub = s - default: - sub = s[:idx] + return 0, append(segments, offset) } + s = s[offset:] + } else if idx != -1 { + s = s[:idx] } - segments := make([]int, 0, utf8.RuneCountInString(sub)+1) - for i, _ := range sub { + for i, _ := range s { segments = append(segments, offset+i) } - return 0, append(segments, offset+len(sub)) + return 0, append(segments, offset+len(s)) } func (self Contains) Len() int { diff --git a/match/contains_test.go b/match/contains_test.go index b7e66c7..ba9577b 100644 --- a/match/contains_test.go +++ b/match/contains_test.go @@ -43,7 +43,7 @@ func TestContainsIndex(t *testing.T) { }, } { p := Contains{test.prefix, test.not} - index, segments := p.Index(test.fixture) + index, segments := p.Index(test.fixture, []int{}) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } @@ -54,8 +54,21 @@ func TestContainsIndex(t *testing.T) { } func BenchmarkIndexContains(b *testing.B) { - m := Contains{bench_separators, true} + m := Contains{string(bench_separators), true} + + in := acquireSegments(len(bench_pattern)) for i := 0; i < b.N; i++ { - m.Index(bench_pattern) + m.Index(bench_pattern, in[:0]) } } + +func BenchmarkIndexContainsParallel(b *testing.B) { + m := Contains{string(bench_separators), true} + in := acquireSegments(len(bench_pattern)) + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + m.Index(bench_pattern, in[:0]) + } + }) +} diff --git a/match/every_of.go b/match/every_of.go index 5df2fbc..dba0154 100644 --- a/match/every_of.go +++ b/match/every_of.go @@ -25,43 +25,66 @@ func (self EveryOf) Len() (l int) { return } -func (self EveryOf) Index(s string) (int, []int) { +func max(a, b int) int { + if a >= b { + return a + } + + return b +} + +func (self EveryOf) Index(s string, out []int) (int, []int) { var index int var offset int - var segments []int + var current []int sub := s - for _, m := range self.Matchers { - idx, seg := m.Index(sub) + for i, m := range self.Matchers { + in := acquireSegments(len(sub)) + idx, seg := m.Index(sub, in) if idx == -1 { + releaseSegments(in) + if cap(current) > 0 { + releaseSegments(current) + } return -1, nil } - var sum []int - if segments == nil { - sum = seg + next := acquireSegments(max(len(seg), len(current))) + if i == 0 { + next = append(next, seg...) } else { delta := index - (idx + offset) - for _, ex := range segments { + for _, ex := range current { for _, n := range seg { if ex+delta == n { - sum = append(sum, n) + next = append(next, n) } } } } - if len(sum) == 0 { + if cap(current) > 0 { + releaseSegments(current) + } + releaseSegments(in) + + if len(next) == 0 { + releaseSegments(next) return -1, nil } - segments = sum + current = next + index = idx + offset sub = s[index:] offset += idx } - return index, segments + out = append(out, current...) + releaseSegments(current) + + return index, out } func (self EveryOf) Match(s string) bool { diff --git a/match/every_of_test.go b/match/every_of_test.go index c55ef9e..c97ea70 100644 --- a/match/every_of_test.go +++ b/match/every_of_test.go @@ -34,7 +34,7 @@ func TestEveryOfIndex(t *testing.T) { }, } { everyOf := EveryOf{test.matchers} - index, segments := everyOf.Index(test.fixture) + index, segments := everyOf.Index(test.fixture, []int{}) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } diff --git a/match/list.go b/match/list.go index 3d9f0b6..bcc3612 100644 --- a/match/list.go +++ b/match/list.go @@ -2,24 +2,22 @@ package match import ( "fmt" - "strings" + "github.com/gobwas/glob/runes" "unicode/utf8" ) type List struct { - List string + List []rune Not bool } func (self List) Match(s string) bool { - // if s 100% have two symbols - // _, w := utf8.DecodeRuneInString(s) - // if len(s) > w { - if len(s) > 4 { + r, w := utf8.DecodeRuneInString(s) + if len(s) > w { return false } - inList := strings.Index(self.List, s) != -1 + inList := runes.IndexRune(self.List, r) != -1 return inList == !self.Not } @@ -27,10 +25,10 @@ func (self List) Len() int { return lenOne } -func (self List) Index(s string) (int, []int) { +func (self List) Index(s string, segments []int) (int, []int) { for i, r := range s { - if self.Not == (strings.IndexRune(self.List, r) == -1) { - return i, []int{utf8.RuneLen(r)} + if self.Not == (runes.IndexRune(self.List, r) == -1) { + return i, append(segments, utf8.RuneLen(r)) } } diff --git a/match/list_test.go b/match/list_test.go index a772fdf..8fd13a2 100644 --- a/match/list_test.go +++ b/match/list_test.go @@ -7,21 +7,21 @@ import ( func TestListIndex(t *testing.T) { for id, test := range []struct { - list string + list []rune not bool fixture string index int segments []int }{ { - "ab", + []rune("ab"), false, "abc", 0, []int{1}, }, { - "ab", + []rune("ab"), true, "fffabfff", 0, @@ -29,7 +29,7 @@ func TestListIndex(t *testing.T) { }, } { p := List{test.list, test.not} - index, segments := p.Index(test.fixture) + index, segments := p.Index(test.fixture, []int{}) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } @@ -40,8 +40,21 @@ func TestListIndex(t *testing.T) { } func BenchmarkIndexList(b *testing.B) { - m := List{"def", false} + m := List{[]rune("def"), false} + in := acquireSegments(len(bench_pattern)) + for i := 0; i < b.N; i++ { - m.Index(bench_pattern) + m.Index(bench_pattern, in[:0]) } } + +func BenchmarkIndexListParallel(b *testing.B) { + m := List{[]rune("def"), false} + in := acquireSegments(len(bench_pattern)) + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + m.Index(bench_pattern, in[:0]) + } + }) +} diff --git a/match/match.go b/match/match.go index 8d7158c..0a6664b 100644 --- a/match/match.go +++ b/match/match.go @@ -3,6 +3,7 @@ package match import ( "fmt" "strings" + "sync" ) const lenOne = 1 @@ -11,7 +12,7 @@ const lenNo = -1 type Matcher interface { Match(string) bool - Index(string) (int, []int) + Index(string, []int) (int, []int) Len() int String() string } @@ -27,6 +28,58 @@ func (m Matchers) String() string { return fmt.Sprintf("%s", strings.Join(s, ",")) } +var segmentsPools [1024]sync.Pool + +func toPowerOfTwo(v int) int { + v-- + v |= v >> 1 + v |= v >> 2 + v |= v >> 4 + v |= v >> 8 + v |= v >> 16 + v++ + + return v +} + +func init() { + for i := 1024; i >= 1; i >>= 1 { + func(i int) { + segmentsPools[i-1] = sync.Pool{ + New: func() interface{} { + return make([]int, 0, i) + }, + } + }(i) + } +} + +var segmentsPool = sync.Pool{ + New: func() interface{} { + return make([]int, 0, 64) + }, +} + +func getIdx(c int) int { + p := toPowerOfTwo(c) + switch { + case p >= 1024: + return 1023 + case p < 1: + return 0 + default: + return p - 1 + } +} + +func acquireSegments(c int) []int { + return segmentsPools[getIdx(c)].Get().([]int)[:0] +} + +func releaseSegments(s []int) { + segmentsPools[getIdx(cap(s))].Put(s) +} + func appendIfNotAsPrevious(target []int, val int) []int { l := len(target) if l != 0 && target[l-1] == val { @@ -36,16 +89,64 @@ func appendIfNotAsPrevious(target []int, val int) []int { return append(target, val) } -// mergeSegments merges and sorts given already SORTED and UNIQUE segments. -func mergeSegments(segments [][]int) []int { - var current []int - for _, s := range segments { - if current == nil { - current = s - continue +func appendMerge(target, sub []int) []int { + lt, ls := len(target), len(sub) + out := acquireSegments(lt + ls) + + for x, y := 0, 0; x < lt || y < ls; { + if x >= lt { + out = append(out, sub[y:]...) + break } - var next []int + if y >= ls { + out = append(out, target[x:]...) + break + } + + xValue := target[x] + yValue := sub[y] + + switch { + + case xValue == yValue: + out = append(out, xValue) + x++ + y++ + + case xValue < yValue: + out = append(out, xValue) + x++ + + case yValue < xValue: + out = append(out, yValue) + y++ + + } + } + + target = append(target[:0], out...) + releaseSegments(out) + + return target +} + +// mergeSegments merges and sorts given already SORTED and UNIQUE segments. +func mergeSegments(list [][]int, out []int) []int { + var current []int + switch len(list) { + case 0: + return out + case 1: + return list[0] + default: + current = acquireSegments(len(list[0])) + current = append(current, list[0]...) + // releaseSegments(list[0]) + } + + for _, s := range list[1:] { + next := acquireSegments(len(current) + len(s)) for x, y := 0, 0; x < len(current) || y < len(s); { if x >= len(current) { next = append(next, s[y:]...) @@ -78,8 +179,21 @@ func mergeSegments(segments [][]int) []int { } } + releaseSegments(current) current = next } - return current + out = append(out, current...) + releaseSegments(current) + + return out +} + +func reverseSegments(input []int) { + l := len(input) + m := l / 2 + + for i := 0; i < m; i++ { + input[i], input[l-i-1] = input[l-i-1], input[i] + } } diff --git a/match/match_test.go b/match/match_test.go index c3b2985..d60fc7d 100644 --- a/match/match_test.go +++ b/match/match_test.go @@ -5,36 +5,60 @@ import ( "testing" ) -const bench_separators = "." +var bench_separators = []rune{'.'} + const bench_pattern = "abcdefghijklmnopqrstuvwxyz0123456789" -func TestMergeSegments(t *testing.T) { +func TestAppendMerge(t *testing.T) { for id, test := range []struct { - segments [][]int + segments [2][]int exp []int }{ { - [][]int{ + [2][]int{ []int{0, 6, 7}, []int{0, 1, 3}, - []int{2, 4}, }, - []int{0, 1, 2, 3, 4, 6, 7}, + []int{0, 1, 3, 6, 7}, }, { - [][]int{ + [2][]int{ []int{0, 1, 3, 6, 7}, - []int{0, 1, 3}, - []int{2, 4}, - []int{1}, + []int{0, 1, 10}, }, - []int{0, 1, 2, 3, 4, 6, 7}, + []int{0, 1, 3, 6, 7, 10}, }, } { - act := mergeSegments(test.segments) + act := appendMerge(test.segments[0], test.segments[1]) if !reflect.DeepEqual(act, test.exp) { t.Errorf("#%d merge sort segments unexpected:\nact: %v\nexp:%v", id, act, test.exp) continue } } } + +func BenchmarkAppendMerge(b *testing.B) { + s1 := []int{0, 1, 3, 6, 7} + s2 := []int{0, 1, 3} + + for i := 0; i < b.N; i++ { + appendMerge(s1, s2) + } +} + +func BenchmarkAppendMergeParallel(b *testing.B) { + s1 := []int{0, 1, 3, 6, 7} + s2 := []int{0, 1, 3} + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + appendMerge(s1, s2) + } + }) +} + +func BenchmarkReverse(b *testing.B) { + for i := 0; i < b.N; i++ { + reverseSegments([]int{1, 2, 3, 4}) + } +} diff --git a/match/max.go b/match/max.go index af634d3..341264c 100644 --- a/match/max.go +++ b/match/max.go @@ -21,7 +21,7 @@ func (self Max) Match(s string) bool { return true } -func (self Max) Index(s string) (index int, segments []int) { +func (self Max) Index(s string, segments []int) (int, []int) { segments = append(segments, 0) var count int for i, r := range s { diff --git a/match/max_test.go b/match/max_test.go index bbd6de0..2c5cba5 100644 --- a/match/max_test.go +++ b/match/max_test.go @@ -26,7 +26,7 @@ func TestMaxIndex(t *testing.T) { }, } { p := Max{test.limit} - index, segments := p.Index(test.fixture) + index, segments := p.Index(test.fixture, []int{}) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } @@ -38,7 +38,20 @@ func TestMaxIndex(t *testing.T) { func BenchmarkIndexMax(b *testing.B) { m := Max{10} + in := acquireSegments(len(bench_pattern)) + for i := 0; i < b.N; i++ { - m.Index(bench_pattern) + m.Index(bench_pattern, in[:0]) } } + +func BenchmarkIndexMaxParallel(b *testing.B) { + m := Max{10} + in := acquireSegments(len(bench_pattern)) + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + m.Index(bench_pattern, in[:0]) + } + }) +} diff --git a/match/min.go b/match/min.go index cb330b4..9ddfd84 100644 --- a/match/min.go +++ b/match/min.go @@ -21,22 +21,22 @@ func (self Min) Match(s string) bool { return false } -func (self Min) Index(s string) (int, []int) { +func (self Min) Index(s string, segments []int) (int, []int) { var count int + var found bool - c := utf8.RuneCountInString(s) - if c < self.Limit { - return -1, nil - } - - segments := make([]int, 0, c-self.Limit+1) for i, r := range s { count++ if count >= self.Limit { + found = true segments = append(segments, i+utf8.RuneLen(r)) } } + if !found { + return -1, nil + } + return 0, segments } diff --git a/match/min_test.go b/match/min_test.go index c823223..5f9f126 100644 --- a/match/min_test.go +++ b/match/min_test.go @@ -26,7 +26,7 @@ func TestMinIndex(t *testing.T) { }, } { p := Min{test.limit} - index, segments := p.Index(test.fixture) + index, segments := p.Index(test.fixture, []int{}) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } @@ -38,7 +38,20 @@ func TestMinIndex(t *testing.T) { func BenchmarkIndexMin(b *testing.B) { m := Min{10} + in := acquireSegments(len(bench_pattern)) + for i := 0; i < b.N; i++ { - m.Index(bench_pattern) + m.Index(bench_pattern, in[:0]) } } + +func BenchmarkIndexMinParallel(b *testing.B) { + m := Min{10} + in := acquireSegments(len(bench_pattern)) + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + m.Index(bench_pattern, in[:0]) + } + }) +} diff --git a/match/nothing.go b/match/nothing.go index ef5049b..a6b6922 100644 --- a/match/nothing.go +++ b/match/nothing.go @@ -10,8 +10,8 @@ func (self Nothing) Match(s string) bool { return len(s) == 0 } -func (self Nothing) Index(s string) (int, []int) { - return 0, []int{0} +func (self Nothing) Index(s string, segments []int) (int, []int) { + return 0, append(segments, 0) } func (self Nothing) Len() int { diff --git a/match/nothing_test.go b/match/nothing_test.go index 1b96c58..76cb678 100644 --- a/match/nothing_test.go +++ b/match/nothing_test.go @@ -23,7 +23,7 @@ func TestNothingIndex(t *testing.T) { }, } { p := Nothing{} - index, segments := p.Index(test.fixture) + index, segments := p.Index(test.fixture, []int{}) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } @@ -34,8 +34,21 @@ func TestNothingIndex(t *testing.T) { } func BenchmarkIndexNothing(b *testing.B) { - m := Max{10} + m := Nothing{} + in := acquireSegments(len(bench_pattern)) + for i := 0; i < b.N; i++ { - m.Index(bench_pattern) + m.Index(bench_pattern, in[:0]) } } + +func BenchmarkIndexNothingParallel(b *testing.B) { + m := Nothing{} + in := acquireSegments(len(bench_pattern)) + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + m.Index(bench_pattern, in[:0]) + } + }) +} diff --git a/match/prefix.go b/match/prefix.go index bf73ae2..2bb1260 100644 --- a/match/prefix.go +++ b/match/prefix.go @@ -10,7 +10,7 @@ type Prefix struct { Prefix string } -func (self Prefix) Index(s string) (int, []int) { +func (self Prefix) Index(s string, segments []int) (int, []int) { idx := strings.Index(s, self.Prefix) if idx == -1 { return -1, nil @@ -24,7 +24,6 @@ func (self Prefix) Index(s string) (int, []int) { sub = "" } - segments := make([]int, 0, utf8.RuneCountInString(sub)+1) segments = append(segments, length) for i, r := range sub { segments = append(segments, length+i+utf8.RuneLen(r)) diff --git a/match/prefix_suffix.go b/match/prefix_suffix.go index d5166de..4b78107 100644 --- a/match/prefix_suffix.go +++ b/match/prefix_suffix.go @@ -9,17 +9,15 @@ type PrefixSuffix struct { Prefix, Suffix string } -func (self PrefixSuffix) Index(s string) (int, []int) { +func (self PrefixSuffix) Index(s string, segments []int) (int, []int) { prefixIdx := strings.Index(s, self.Prefix) if prefixIdx == -1 { return -1, nil } - var resp []int suffixLen := len(self.Suffix) if suffixLen > 0 { - var segments []int for sub := s[prefixIdx:]; ; { suffixIdx := strings.LastIndex(sub, self.Suffix) if suffixIdx == -1 { @@ -30,20 +28,16 @@ func (self PrefixSuffix) Index(s string) (int, []int) { sub = sub[:suffixIdx] } - segLen := len(segments) - if segLen == 0 { + if len(segments) == 0 { return -1, nil } - resp = make([]int, segLen) - for i, s := range segments { - resp[segLen-i-1] = s - } + reverseSegments(segments) } else { - resp = append(resp, len(s)-prefixIdx) + segments = append(segments, len(s)-prefixIdx) } - return prefixIdx, resp + return prefixIdx, segments } func (self PrefixSuffix) Len() int { diff --git a/match/prefix_suffix_test.go b/match/prefix_suffix_test.go index baf9427..23271c0 100644 --- a/match/prefix_suffix_test.go +++ b/match/prefix_suffix_test.go @@ -36,7 +36,7 @@ func TestPrefixSuffixIndex(t *testing.T) { }, } { p := PrefixSuffix{test.prefix, test.suffix} - index, segments := p.Index(test.fixture) + index, segments := p.Index(test.fixture, []int{}) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } @@ -48,7 +48,20 @@ func TestPrefixSuffixIndex(t *testing.T) { func BenchmarkIndexPrefixSuffix(b *testing.B) { m := PrefixSuffix{"qew", "sqw"} + in := acquireSegments(len(bench_pattern)) + for i := 0; i < b.N; i++ { - m.Index(bench_pattern) + m.Index(bench_pattern, in[:0]) } } + +func BenchmarkIndexPrefixSuffixParallel(b *testing.B) { + m := PrefixSuffix{"qew", "sqw"} + in := acquireSegments(len(bench_pattern)) + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + m.Index(bench_pattern, in[:0]) + } + }) +} diff --git a/match/prefix_test.go b/match/prefix_test.go index 3ee3012..5b38bfd 100644 --- a/match/prefix_test.go +++ b/match/prefix_test.go @@ -26,7 +26,7 @@ func TestPrefixIndex(t *testing.T) { }, } { p := Prefix{test.prefix} - index, segments := p.Index(test.fixture) + index, segments := p.Index(test.fixture, []int{}) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } @@ -38,7 +38,20 @@ func TestPrefixIndex(t *testing.T) { func BenchmarkIndexPrefix(b *testing.B) { m := Prefix{"qew"} + in := acquireSegments(len(bench_pattern)) + for i := 0; i < b.N; i++ { - m.Index(bench_pattern) + m.Index(bench_pattern, in[:0]) } } + +func BenchmarkIndexPrefixParallel(b *testing.B) { + m := Prefix{"qew"} + in := acquireSegments(len(bench_pattern)) + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + m.Index(bench_pattern, in[:0]) + } + }) +} diff --git a/match/range.go b/match/range.go index f8b6f5d..e669870 100644 --- a/match/range.go +++ b/match/range.go @@ -10,6 +10,9 @@ type Range struct { Not bool } +// todo make factory +// todo make range table inside factory + func (self Range) Len() int { return lenOne } @@ -25,10 +28,10 @@ func (self Range) Match(s string) bool { return inRange == !self.Not } -func (self Range) Index(s string) (int, []int) { +func (self Range) Index(s string, segments []int) (int, []int) { for i, r := range s { if self.Not != (r >= self.Lo && r <= self.Hi) { - return i, []int{utf8.RuneLen(r)} + return i, append(segments, utf8.RuneLen(r)) } } diff --git a/match/range_test.go b/match/range_test.go index e55bccd..a7cdef8 100644 --- a/match/range_test.go +++ b/match/range_test.go @@ -36,7 +36,7 @@ func TestRangeIndex(t *testing.T) { }, } { m := Range{test.lo, test.hi, test.not} - index, segments := m.Index(test.fixture) + index, segments := m.Index(test.fixture, []int{}) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } @@ -48,7 +48,20 @@ func TestRangeIndex(t *testing.T) { func BenchmarkIndexRange(b *testing.B) { m := Range{'0', '9', false} + in := acquireSegments(len(bench_pattern)) + for i := 0; i < b.N; i++ { - m.Index(bench_pattern) + m.Index(bench_pattern, in[:0]) } } + +func BenchmarkIndexRangeParallel(b *testing.B) { + m := Range{'0', '9', false} + in := acquireSegments(len(bench_pattern)) + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + m.Index(bench_pattern, in[:0]) + } + }) +} diff --git a/match/row.go b/match/row.go index a079aa4..8b3e2fc 100644 --- a/match/row.go +++ b/match/row.go @@ -52,7 +52,7 @@ func (self Row) Len() (l int) { return self.RunesLength } -func (self Row) Index(s string) (int, []int) { +func (self Row) Index(s string, segments []int) (int, []int) { if !self.lenOk(s) { return -1, nil } @@ -66,7 +66,7 @@ func (self Row) Index(s string) (int, []int) { } if self.matchAll(s[i:]) { - return i, []int{self.RunesLength} + return i, append(segments, self.RunesLength) } } diff --git a/match/row_test.go b/match/row_test.go index 4b59fe0..ff59cff 100644 --- a/match/row_test.go +++ b/match/row_test.go @@ -5,20 +5,6 @@ import ( "testing" ) -func BenchmarkRowIndex(b *testing.B) { - m := Row{ - Matchers: Matchers{ - NewText("abc"), - NewText("def"), - Single{}, - }, - RunesLength: 7, - } - for i := 0; i < b.N; i++ { - m.Index("abcdefghijk") - } -} - func TestRowIndex(t *testing.T) { for id, test := range []struct { matchers Matchers @@ -54,7 +40,7 @@ func TestRowIndex(t *testing.T) { Matchers: test.matchers, RunesLength: test.length, } - index, segments := p.Index(test.fixture) + index, segments := p.Index(test.fixture, []int{}) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } @@ -63,3 +49,37 @@ func TestRowIndex(t *testing.T) { } } } + +func BenchmarkRowIndex(b *testing.B) { + m := Row{ + Matchers: Matchers{ + NewText("abc"), + NewText("def"), + Single{}, + }, + RunesLength: 7, + } + in := acquireSegments(len(bench_pattern)) + + for i := 0; i < b.N; i++ { + m.Index(bench_pattern, in[:0]) + } +} + +func BenchmarkIndexRowParallel(b *testing.B) { + m := Row{ + Matchers: Matchers{ + NewText("abc"), + NewText("def"), + Single{}, + }, + RunesLength: 7, + } + in := acquireSegments(len(bench_pattern)) + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + m.Index(bench_pattern, in[:0]) + } + }) +} diff --git a/match/single.go b/match/single.go index f9cf018..e1ad121 100644 --- a/match/single.go +++ b/match/single.go @@ -2,7 +2,7 @@ package match import ( "fmt" - "strings" + "github.com/gobwas/glob/runes" "unicode/utf8" ) @@ -17,17 +17,17 @@ func (self Single) Match(s string) bool { return false } - return strings.IndexRune(self.Separators, r) == -1 + return runes.IndexRune(self.Separators, r) == -1 } func (self Single) Len() int { return lenOne } -func (self Single) Index(s string) (int, []int) { +func (self Single) Index(s string, segments []int) (int, []int) { for i, r := range s { - if strings.IndexRune(self.Separators, r) == -1 { - return i, []int{utf8.RuneLen(r)} + if runes.IndexRune(self.Separators, r) == -1 { + return i, append(segments, utf8.RuneLen(r)) } } diff --git a/match/single_test.go b/match/single_test.go index 1e9ba71..e1e99ac 100644 --- a/match/single_test.go +++ b/match/single_test.go @@ -7,26 +7,26 @@ import ( func TestSingleIndex(t *testing.T) { for id, test := range []struct { - separators string + separators []rune fixture string index int segments []int }{ { - ".", + []rune{'.'}, ".abc", 1, []int{1}, }, { - ".", + []rune{'.'}, ".", -1, nil, }, } { p := Single{test.separators} - index, segments := p.Index(test.fixture) + index, segments := p.Index(test.fixture, []int{}) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } @@ -38,7 +38,20 @@ func TestSingleIndex(t *testing.T) { func BenchmarkIndexSingle(b *testing.B) { m := Single{bench_separators} + in := acquireSegments(len(bench_pattern)) + for i := 0; i < b.N; i++ { - m.Index(bench_pattern) + m.Index(bench_pattern, in[:0]) } } + +func BenchmarkIndexSingleParallel(b *testing.B) { + m := Single{bench_separators} + in := acquireSegments(len(bench_pattern)) + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + m.Index(bench_pattern, in[:0]) + } + }) +} diff --git a/match/suffix.go b/match/suffix.go index d38d71a..ca825f7 100644 --- a/match/suffix.go +++ b/match/suffix.go @@ -9,13 +9,13 @@ type Suffix struct { Suffix string } -func (self Suffix) Index(s string) (int, []int) { +func (self Suffix) Index(s string, segments []int) (int, []int) { idx := strings.Index(s, self.Suffix) if idx == -1 { return -1, nil } - return 0, []int{idx + len(self.Suffix)} + return 0, append(segments, idx+len(self.Suffix)) } func (self Suffix) Len() int { diff --git a/match/suffix_test.go b/match/suffix_test.go index aca6eef..aeda714 100644 --- a/match/suffix_test.go +++ b/match/suffix_test.go @@ -26,7 +26,7 @@ func TestSuffixIndex(t *testing.T) { }, } { p := Suffix{test.prefix} - index, segments := p.Index(test.fixture) + index, segments := p.Index(test.fixture, []int{}) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } @@ -38,7 +38,20 @@ func TestSuffixIndex(t *testing.T) { func BenchmarkIndexSuffix(b *testing.B) { m := Suffix{"qwe"} + in := acquireSegments(len(bench_pattern)) + for i := 0; i < b.N; i++ { - m.Index(bench_pattern) + m.Index(bench_pattern, in[:0]) } } + +func BenchmarkIndexSuffixParallel(b *testing.B) { + m := Suffix{"qwe"} + in := acquireSegments(len(bench_pattern)) + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + m.Index(bench_pattern, in[:0]) + } + }) +} diff --git a/match/super.go b/match/super.go index 19b718f..27d83de 100644 --- a/match/super.go +++ b/match/super.go @@ -2,7 +2,6 @@ package match import ( "fmt" - "unicode/utf8" ) type Super struct{} @@ -15,12 +14,10 @@ func (self Super) Len() int { return lenNo } -func (self Super) Index(s string) (int, []int) { - segments := make([]int, 0, utf8.RuneCountInString(s)+1) +func (self Super) Index(s string, segments []int) (int, []int) { for i := range s { segments = append(segments, i) } - segments = append(segments, len(s)) return 0, segments diff --git a/match/super_test.go b/match/super_test.go index b649fb1..aa68cfb 100644 --- a/match/super_test.go +++ b/match/super_test.go @@ -23,7 +23,7 @@ func TestSuperIndex(t *testing.T) { }, } { p := Super{} - index, segments := p.Index(test.fixture) + index, segments := p.Index(test.fixture, []int{}) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } @@ -35,7 +35,20 @@ func TestSuperIndex(t *testing.T) { func BenchmarkIndexSuper(b *testing.B) { m := Super{} + in := acquireSegments(len(bench_pattern)) + for i := 0; i < b.N; i++ { - m.Index(bench_pattern) + m.Index(bench_pattern, in[:0]) } } + +func BenchmarkIndexSuperParallel(b *testing.B) { + m := Super{} + in := acquireSegments(len(bench_pattern)) + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + m.Index(bench_pattern, in[:0]) + } + }) +} diff --git a/match/text.go b/match/text.go index 0b22c78..aff5b97 100644 --- a/match/text.go +++ b/match/text.go @@ -29,15 +29,13 @@ func (self Text) Len() int { return self.RunesLength } -func (self Text) Index(s string) (index int, segments []int) { - index = strings.Index(s, self.Str) +func (self Text) Index(s string, segments []int) (int, []int) { + index := strings.Index(s, self.Str) if index == -1 { - return + return -1, nil } - segments = []int{self.BytesLength} - - return + return index, append(segments, self.BytesLength) } func (self Text) String() string { diff --git a/match/text_test.go b/match/text_test.go index b7e1d5a..b5c6964 100644 --- a/match/text_test.go +++ b/match/text_test.go @@ -26,7 +26,7 @@ func TestTextIndex(t *testing.T) { }, } { m := NewText(test.text) - index, segments := m.Index(test.fixture) + index, segments := m.Index(test.fixture, []int{}) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } @@ -38,7 +38,20 @@ func TestTextIndex(t *testing.T) { func BenchmarkIndexText(b *testing.B) { m := NewText("foo") + in := acquireSegments(len(bench_pattern)) + for i := 0; i < b.N; i++ { - m.Index(bench_pattern) + m.Index(bench_pattern, in[:0]) } } + +func BenchmarkIndexTextParallel(b *testing.B) { + m := NewText("foo") + in := acquireSegments(len(bench_pattern)) + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + m.Index(bench_pattern, in[:0]) + } + }) +} diff --git a/runes/runes.go b/runes/runes.go new file mode 100644 index 0000000..8586b16 --- /dev/null +++ b/runes/runes.go @@ -0,0 +1,128 @@ +package runes + +func Index(s, needle []rune) int { + ls, ln := len(s), len(needle) + + switch { + case ln == 0: + return 0 + case ln == 1: + return IndexRune(s, needle[0]) + case ln == ls: + if Equal(s, needle) { + return 0 + } + return -1 + case ln > ls: + return -1 + } + +head: + for i := 0; i < ls && ls-i >= ln; i++ { + for y := 0; y < ln; y++ { + if s[i+y] != needle[y] { + continue head + } + } + + return i + } + + return -1 +} + +func LastIndex(s, needle []rune) int { + ls, ln := len(s), len(needle) + + switch { + case ln == 0: + if ls == 0 { + return 0 + } + return ls + case ln == 1: + return IndexLastRune(s, needle[0]) + case ln == ls: + if Equal(s, needle) { + return 0 + } + return -1 + case ln > ls: + return -1 + } + +head: + for i := ls - 1; i >= 0 && i >= ln; i-- { + for y := ln - 1; y >= 0; y-- { + if s[i-(ln-y-1)] != needle[y] { + continue head + } + } + + return i - ln + 1 + } + + return -1 +} + +// IndexAny returns the index of the first instance of any Unicode code point +// from chars in s, or -1 if no Unicode code point from chars is present in s. +func IndexAny(s, chars []rune) int { + if len(chars) > 0 { + for i, c := range s { + for _, m := range chars { + if c == m { + return i + } + } + } + } + return -1 +} + +func Contains(s, needle []rune) bool { + return Index(s, needle) >= 0 +} + +func IndexRune(s []rune, r rune) int { + for i, c := range s { + if c == r { + return i + } + } + return -1 +} + +func IndexLastRune(s []rune, r rune) int { + for i := len(s) - 1; i >= 0; i-- { + if s[i] == r { + return i + } + } + + return -1 +} + +func Equal(a, b []rune) bool { + if len(a) == len(b) { + for i := 0; i < len(a); i++ { + if a[i] != b[i] { + return false + } + } + + return true + } + + return false +} + +// HasPrefix tests whether the string s begins with prefix. +func HasPrefix(s, prefix []rune) bool { + return len(s) >= len(prefix) && Equal(s[0:len(prefix)], prefix) +} + +// HasSuffix tests whether the string s ends with suffix. +func HasSuffix(s, suffix []rune) bool { + return len(s) >= len(suffix) && Equal(s[len(s)-len(suffix):], suffix) +} diff --git a/runes/runes_test.go b/runes/runes_test.go new file mode 100644 index 0000000..54498eb --- /dev/null +++ b/runes/runes_test.go @@ -0,0 +1,222 @@ +package runes + +import ( + "strings" + "testing" +) + +type indexTest struct { + s []rune + sep []rune + out int +} + +type equalTest struct { + a []rune + b []rune + out bool +} + +func newIndexTest(s, sep string, out int) indexTest { + return indexTest{[]rune(s), []rune(sep), out} +} +func newEqualTest(s, sep string, out bool) equalTest { + return equalTest{[]rune(s), []rune(sep), out} +} + +var dots = "1....2....3....4" + +var indexTests = []indexTest{ + newIndexTest("", "", 0), + newIndexTest("", "a", -1), + newIndexTest("", "foo", -1), + newIndexTest("fo", "foo", -1), + newIndexTest("foo", "foo", 0), + newIndexTest("oofofoofooo", "f", 2), + newIndexTest("oofofoofooo", "foo", 4), + newIndexTest("barfoobarfoo", "foo", 3), + newIndexTest("foo", "", 0), + newIndexTest("foo", "o", 1), + newIndexTest("abcABCabc", "A", 3), + // cases with one byte strings - test special case in Index() + newIndexTest("", "a", -1), + newIndexTest("x", "a", -1), + newIndexTest("x", "x", 0), + newIndexTest("abc", "a", 0), + newIndexTest("abc", "b", 1), + newIndexTest("abc", "c", 2), + newIndexTest("abc", "x", -1), +} + +var lastIndexTests = []indexTest{ + newIndexTest("", "", 0), + newIndexTest("", "a", -1), + newIndexTest("", "foo", -1), + newIndexTest("fo", "foo", -1), + newIndexTest("foo", "foo", 0), + newIndexTest("foo", "f", 0), + newIndexTest("oofofoofooo", "f", 7), + newIndexTest("oofofoofooo", "foo", 7), + newIndexTest("barfoobarfoo", "foo", 9), + newIndexTest("foo", "", 3), + newIndexTest("foo", "o", 2), + newIndexTest("abcABCabc", "A", 3), + newIndexTest("abcABCabc", "a", 6), +} + +var indexAnyTests = []indexTest{ + newIndexTest("", "", -1), + newIndexTest("", "a", -1), + newIndexTest("", "abc", -1), + newIndexTest("a", "", -1), + newIndexTest("a", "a", 0), + newIndexTest("aaa", "a", 0), + newIndexTest("abc", "xyz", -1), + newIndexTest("abc", "xcz", 2), + newIndexTest("a☺b☻c☹d", "uvw☻xyz", 3), + newIndexTest("aRegExp*", ".(|)*+?^$[]", 7), + newIndexTest(dots+dots+dots, " ", -1), +} + +// Execute f on each test case. funcName should be the name of f; it's used +// in failure reports. +func runIndexTests(t *testing.T, f func(s, sep []rune) int, funcName string, testCases []indexTest) { + for _, test := range testCases { + actual := f(test.s, test.sep) + if actual != test.out { + t.Errorf("%s(%q,%q) = %v; want %v", funcName, test.s, test.sep, actual, test.out) + } + } +} + +func TestIndex(t *testing.T) { runIndexTests(t, Index, "Index", indexTests) } +func TestLastIndex(t *testing.T) { runIndexTests(t, LastIndex, "LastIndex", lastIndexTests) } +func TestIndexAny(t *testing.T) { runIndexTests(t, IndexAny, "IndexAny", indexAnyTests) } + +var equalTests = []equalTest{ + newEqualTest("a", "a", true), + newEqualTest("a", "b", false), + newEqualTest("a☺b☻c☹d", "uvw☻xyz", false), + newEqualTest("a☺b☻c☹d", "a☺b☻c☹d", true), +} + +func TestEqual(t *testing.T) { + for _, test := range equalTests { + actual := Equal(test.a, test.b) + if actual != test.out { + t.Errorf("Equal(%q,%q) = %v; want %v", test.a, test.b, actual, test.out) + } + } +} + +func BenchmarkLastIndexRunes(b *testing.B) { + r := []rune("abcdef") + n := []rune("cd") + + for i := 0; i < b.N; i++ { + LastIndex(r, n) + } +} +func BenchmarkLastIndexStrings(b *testing.B) { + r := "abcdef" + n := "cd" + + for i := 0; i < b.N; i++ { + strings.LastIndex(r, n) + } +} + +func BenchmarkIndexAnyRunes(b *testing.B) { + s := []rune("...b...") + c := []rune("abc") + + for i := 0; i < b.N; i++ { + IndexAny(s, c) + } +} +func BenchmarkIndexAnyStrings(b *testing.B) { + s := "...b..." + c := "abc" + + for i := 0; i < b.N; i++ { + strings.IndexAny(s, c) + } +} + +func BenchmarkIndexRuneRunes(b *testing.B) { + s := []rune("...b...") + r := 'b' + + for i := 0; i < b.N; i++ { + IndexRune(s, r) + } +} +func BenchmarkIndexRuneStrings(b *testing.B) { + s := "...b..." + r := 'b' + + for i := 0; i < b.N; i++ { + strings.IndexRune(s, r) + } +} + +func BenchmarkIndexRunes(b *testing.B) { + r := []rune("abcdef") + n := []rune("cd") + + for i := 0; i < b.N; i++ { + Index(r, n) + } +} +func BenchmarkIndexStrings(b *testing.B) { + r := "abcdef" + n := "cd" + + for i := 0; i < b.N; i++ { + strings.Index(r, n) + } +} + +func BenchmarkEqualRunes(b *testing.B) { + x := []rune("abc") + y := []rune("abc") + + for i := 0; i < b.N; i++ { + if Equal(x, y) { + continue + } + } +} + +func BenchmarkEqualStrings(b *testing.B) { + x := "abc" + y := "abc" + + for i := 0; i < b.N; i++ { + if x == y { + continue + } + } +} + +func BenchmarkNotEqualRunes(b *testing.B) { + x := []rune("abc") + y := []rune("abcd") + + for i := 0; i < b.N; i++ { + if Equal(x, y) { + continue + } + } +} + +func BenchmarkNotEqualStrings(b *testing.B) { + x := "abc" + y := "abcd" + + for i := 0; i < b.N; i++ { + if x == y { + continue + } + } +} diff --git a/strings/strings.go b/strings/strings.go new file mode 100644 index 0000000..1be48f7 --- /dev/null +++ b/strings/strings.go @@ -0,0 +1,13 @@ +package strings + +import "strings" + +func IndexAnyRunes(s string, rs []rune) int { + for _, r := range rs { + if i := strings.IndexRune(s, r); i != -1 { + return i + } + } + + return -1 +} diff --git a/todo.txt b/todo.txt index 02bfc16..63fac05 100644 --- a/todo.txt +++ b/todo.txt @@ -11,17 +11,22 @@ BenchmarkPlainGlobMatch-4 7.20 154 +20 BenchmarkPrefixGlobMatch-4 8.75 113 +1191.43% BenchmarkSuffixGlobMatch-4 9.07 115 +1167.92% BenchmarkPrefixSuffixGlobMatch-4 15.1 125 +727.81% -BenchmarkIndexAny-4 887 255 -71.25% -BenchmarkIndexContains-4 492 247 -49.80% -BenchmarkIndexList-4 151 51.1 -66.16% -BenchmarkIndexMax-4 442 92.4 -79.10% -BenchmarkIndexMin-4 516 161 -68.80% -BenchmarkIndexNothing-4 452 92.8 -79.47% -BenchmarkIndexPrefixSuffix-4 84.3 57.2 -32.15% + + + + + BenchmarkIndexPrefix-4 85.1 55.9 -34.31% -BenchmarkIndexRange-4 170 60.6 -64.35% -BenchmarkRowIndex-4 172 94.0 -45.35% -BenchmarkIndexSingle-4 61.0 35.8 -41.31% +BenchmarkIndexRange-4 170(143) 60.6 -64.35% +BenchmarkRowIndex-4 172(128) 94.0 -45.35% +BenchmarkIndexSingle-4 61.0(16) 35.8 -41.31% BenchmarkIndexSuffix-4 84.8 55.7 -34.32% -BenchmarkIndexSuper-4 461 192 -58.35% +BenchmarkIndexSuper-4 461(180) 192 -58.35% BenchmarkIndexText-4 84.6 54.4 -35.70% +BenchmarkIndexPrefixSuffix-4 84.3 57.2 -32.15% +BenchmarkIndexNothing-4 452(3.31) 92.8 -79.47% XXX +BenchmarkIndexMin-4 516(274) 161 -68.80% +BenchmarkIndexMax-4 442(88) 92.4 -79.10% +BenchmarkIndexList-4 151(41) 51.1 -66.16% +BenchmarkIndexContains-4 492(220) 247 -49.80% +BenchmarkIndexAny-4 887(222) 255 -71.25% From 61a66d485f0d192ec0b17befc1cd20025aff864f Mon Sep 17 00:00:00 2001 From: gobwas Date: Tue, 2 Feb 2016 22:20:26 +0300 Subject: [PATCH 03/26] tune --- glob_test.go | 9 +++++ match/match.go | 93 ++++++++------------------------------------------ todo.txt | 56 ++++++++++++++---------------- 3 files changed, 49 insertions(+), 109 deletions(-) diff --git a/glob_test.go b/glob_test.go index d17e985..d0264f6 100644 --- a/glob_test.go +++ b/glob_test.go @@ -168,6 +168,15 @@ func BenchmarkAllGlobMatch(b *testing.B) { _ = m.Match(fixture_all_match) } } +func BenchmarkAllGlobMatchParallel(b *testing.B) { + m, _ := Compile(pattern_all) + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + _ = m.Match(fixture_all_match) + } + }) +} func BenchmarkAllRegexpMatch(b *testing.B) { m := regexp.MustCompile(regexp_all) f := []byte(fixture_all_match) diff --git a/match/match.go b/match/match.go index 0a6664b..f9dcae1 100644 --- a/match/match.go +++ b/match/match.go @@ -42,8 +42,15 @@ func toPowerOfTwo(v int) int { return v } +const ( + minSegment = 32 + minSegmentMinusOne = 31 + maxSegment = 1024 + maxSegmentMinusOne = 1023 +) + func init() { - for i := 1024; i >= 1; i >>= 1 { + for i := maxSegment; i >= minSegment; i >>= 1 { func(i int) { segmentsPools[i-1] = sync.Pool{ New: func() interface{} { @@ -54,41 +61,29 @@ func init() { } } -var segmentsPool = sync.Pool{ - New: func() interface{} { - return make([]int, 0, 64) - }, -} - func getIdx(c int) int { p := toPowerOfTwo(c) switch { - case p >= 1024: - return 1023 - case p < 1: - return 0 + case p >= maxSegment: + return maxSegmentMinusOne + case p <= minSegment: + return minSegmentMinusOne default: return p - 1 } } func acquireSegments(c int) []int { + // fmt.Println("GET", getIdx(c)) return segmentsPools[getIdx(c)].Get().([]int)[:0] } func releaseSegments(s []int) { + // fmt.Println("PUT", getIdx(cap(s))) segmentsPools[getIdx(cap(s))].Put(s) } -func appendIfNotAsPrevious(target []int, val int) []int { - l := len(target) - if l != 0 && target[l-1] == val { - return target - } - - return append(target, val) -} - +// appendMerge merges and sorts given already SORTED and UNIQUE segments. func appendMerge(target, sub []int) []int { lt, ls := len(target), len(sub) out := acquireSegments(lt + ls) @@ -131,64 +126,6 @@ func appendMerge(target, sub []int) []int { return target } -// mergeSegments merges and sorts given already SORTED and UNIQUE segments. -func mergeSegments(list [][]int, out []int) []int { - var current []int - switch len(list) { - case 0: - return out - case 1: - return list[0] - default: - current = acquireSegments(len(list[0])) - current = append(current, list[0]...) - // releaseSegments(list[0]) - } - - for _, s := range list[1:] { - next := acquireSegments(len(current) + len(s)) - for x, y := 0, 0; x < len(current) || y < len(s); { - if x >= len(current) { - next = append(next, s[y:]...) - break - } - - if y >= len(s) { - next = append(next, current[x:]...) - break - } - - xValue := current[x] - yValue := s[y] - - switch { - - case xValue == yValue: - x++ - y++ - next = appendIfNotAsPrevious(next, xValue) - - case xValue < yValue: - next = appendIfNotAsPrevious(next, xValue) - x++ - - case yValue < xValue: - next = appendIfNotAsPrevious(next, yValue) - y++ - - } - } - - releaseSegments(current) - current = next - } - - out = append(out, current...) - releaseSegments(current) - - return out -} - func reverseSegments(input []int) { l := len(input) m := l / 2 diff --git a/todo.txt b/todo.txt index 63fac05..918063e 100644 --- a/todo.txt +++ b/todo.txt @@ -1,32 +1,26 @@ -benchmark | old ns/op | new ns/op | delta ------------------------------------------------|-----------|-------------|----------- -BenchmarkAllGlobMatch-4 512 711 +38.87% -BenchmarkMultipleGlobMatch-4 121 417 +244.63% -BenchmarkAlternativesGlobMatch-4 166 300 +80.72% -BenchmarkAlternativesSuffixFirstGlobMatch-4 23.5 292 +1142.55% -BenchmarkAlternativesSuffixSecondGlobMatch-4 29.8 355 +1091.28% -BenchmarkAlternativesCombineLiteGlobMatch-4 161 250 +55.28% -BenchmarkAlternativesCombineHardGlobMatch-4 325 334 +2.77% -BenchmarkPlainGlobMatch-4 7.20 154 +2038.89% -BenchmarkPrefixGlobMatch-4 8.75 113 +1191.43% -BenchmarkSuffixGlobMatch-4 9.07 115 +1167.92% -BenchmarkPrefixSuffixGlobMatch-4 15.1 125 +727.81% +benchmark old ns/op new ns/op delta - - - - -BenchmarkIndexPrefix-4 85.1 55.9 -34.31% -BenchmarkIndexRange-4 170(143) 60.6 -64.35% -BenchmarkRowIndex-4 172(128) 94.0 -45.35% -BenchmarkIndexSingle-4 61.0(16) 35.8 -41.31% -BenchmarkIndexSuffix-4 84.8 55.7 -34.32% -BenchmarkIndexSuper-4 461(180) 192 -58.35% -BenchmarkIndexText-4 84.6 54.4 -35.70% -BenchmarkIndexPrefixSuffix-4 84.3 57.2 -32.15% -BenchmarkIndexNothing-4 452(3.31) 92.8 -79.47% XXX -BenchmarkIndexMin-4 516(274) 161 -68.80% -BenchmarkIndexMax-4 442(88) 92.4 -79.10% -BenchmarkIndexList-4 151(41) 51.1 -66.16% -BenchmarkIndexContains-4 492(220) 247 -49.80% -BenchmarkIndexAny-4 887(222) 255 -71.25% +BenchmarkAllGlobMatch-4 519 1024 +97.30% +BenchmarkMultipleGlobMatch-4 123 218 +77.24% +BenchmarkAlternativesGlobMatch-4 164 283 +72.56% +BenchmarkAlternativesSuffixFirstGlobMatch-4 23.6 23.5 -0.42% +BenchmarkAlternativesSuffixSecondGlobMatch-4 29.7 30.1 +1.35% +BenchmarkAlternativesCombineLiteGlobMatch-4 161 352 +118.63% +BenchmarkAlternativesCombineHardGlobMatch-4 321 649 +102.18% +BenchmarkPlainGlobMatch-4 7.17 7.09 -1.12% +BenchmarkPrefixGlobMatch-4 8.74 8.64 -1.14% +BenchmarkSuffixGlobMatch-4 10.3 9.06 -12.04% +BenchmarkPrefixSuffixGlobMatch-4 31.0 15.1 -51.29% +BenchmarkIndexAny-4 1414 232 -83.59% +BenchmarkIndexContains-4 557 250 -55.12% +BenchmarkIndexList-4 207 42.6 -79.42% +BenchmarkIndexMax-4 630 111 -82.38% +BenchmarkIndexMin-4 515 328 -36.31% +BenchmarkIndexPrefixSuffix-4 97.9 86.2 -11.95% +BenchmarkIndexPrefix-4 86.1 84.0 -2.44% +BenchmarkIndexRange-4 181 144 -20.44% +BenchmarkRowIndex-4 185 127 -31.35% +BenchmarkIndexSingle-4 82.6 16.0 -80.63% +BenchmarkIndexSuffix-4 85.5 84.9 -0.70% +BenchmarkIndexSuper-4 450 196 -56.44% +BenchmarkIndexText-4 85.3 85.9 +0.70% From 6b71a60e74dbb071e71228fefe31f24c7806c512 Mon Sep 17 00:00:00 2001 From: gobwas Date: Tue, 2 Feb 2016 22:34:10 +0300 Subject: [PATCH 04/26] debug info --- glob_test.go | 9 +++++++++ match/btree.go | 10 ++++++---- match/match.go | 5 +++-- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/glob_test.go b/glob_test.go index d0264f6..08de6f7 100644 --- a/glob_test.go +++ b/glob_test.go @@ -1,6 +1,8 @@ package glob import ( + "fmt" + "github.com/gobwas/glob/match" "regexp" "testing" ) @@ -150,6 +152,13 @@ func TestGlob(t *testing.T) { } } +func TestAllGlobMatch(t *testing.T) { + + m, _ := Compile(pattern_all) + fmt.Println("HI", m.(match.Matcher).String()) + m.Match(fixture_all_match) +} + func BenchmarkParseGlob(b *testing.B) { for i := 0; i < b.N; i++ { Compile(pattern_all) diff --git a/match/btree.go b/match/btree.go index ad8f5f9..a9a7a15 100644 --- a/match/btree.go +++ b/match/btree.go @@ -77,10 +77,12 @@ func (self BTree) Match(s string) bool { limit = inputLen } + fmt.Println("ACQUIRE") + in := acquireSegments(inputLen) + for offset < limit { // search for matching part in substring - in := acquireSegments(limit - offset) - index, segments := self.Value.Index(s[offset:limit], in) + index, segments := self.Value.Index(s[offset:limit], in[:0]) if index == -1 { releaseSegments(in) return false @@ -120,12 +122,12 @@ func (self BTree) Match(s string) bool { } } - releaseSegments(in) - _, step := utf8.DecodeRuneInString(s[offset+index:]) offset += index + step } + releaseSegments(in) + return false } diff --git a/match/match.go b/match/match.go index f9dcae1..9df55fd 100644 --- a/match/match.go +++ b/match/match.go @@ -54,6 +54,7 @@ func init() { func(i int) { segmentsPools[i-1] = sync.Pool{ New: func() interface{} { + fmt.Println("NEW", i) return make([]int, 0, i) }, } @@ -74,12 +75,12 @@ func getIdx(c int) int { } func acquireSegments(c int) []int { - // fmt.Println("GET", getIdx(c)) + fmt.Println("GET", getIdx(c)) return segmentsPools[getIdx(c)].Get().([]int)[:0] } func releaseSegments(s []int) { - // fmt.Println("PUT", getIdx(cap(s))) + fmt.Println("PUT", getIdx(cap(s))) segmentsPools[getIdx(cap(s))].Put(s) } From ed108ad05abad016d1f45e6a12cb69ffac70482e Mon Sep 17 00:00:00 2001 From: gobwas Date: Fri, 5 Feb 2016 15:15:36 +0300 Subject: [PATCH 05/26] benchmarks --- glob_test.go | 10 +---- match/btree.go | 1 - match/match.go | 3 -- match/segements_test.go | 37 ++++++++++++++++ match/segments.go | 97 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 135 insertions(+), 13 deletions(-) create mode 100644 match/segements_test.go create mode 100644 match/segments.go diff --git a/glob_test.go b/glob_test.go index 08de6f7..35a7267 100644 --- a/glob_test.go +++ b/glob_test.go @@ -1,8 +1,6 @@ package glob import ( - "fmt" - "github.com/gobwas/glob/match" "regexp" "testing" ) @@ -152,13 +150,6 @@ func TestGlob(t *testing.T) { } } -func TestAllGlobMatch(t *testing.T) { - - m, _ := Compile(pattern_all) - fmt.Println("HI", m.(match.Matcher).String()) - m.Match(fixture_all_match) -} - func BenchmarkParseGlob(b *testing.B) { for i := 0; i < b.N; i++ { Compile(pattern_all) @@ -186,6 +177,7 @@ func BenchmarkAllGlobMatchParallel(b *testing.B) { } }) } + func BenchmarkAllRegexpMatch(b *testing.B) { m := regexp.MustCompile(regexp_all) f := []byte(fixture_all_match) diff --git a/match/btree.go b/match/btree.go index a9a7a15..2358eb3 100644 --- a/match/btree.go +++ b/match/btree.go @@ -77,7 +77,6 @@ func (self BTree) Match(s string) bool { limit = inputLen } - fmt.Println("ACQUIRE") in := acquireSegments(inputLen) for offset < limit { diff --git a/match/match.go b/match/match.go index 9df55fd..a8351eb 100644 --- a/match/match.go +++ b/match/match.go @@ -54,7 +54,6 @@ func init() { func(i int) { segmentsPools[i-1] = sync.Pool{ New: func() interface{} { - fmt.Println("NEW", i) return make([]int, 0, i) }, } @@ -75,12 +74,10 @@ func getIdx(c int) int { } func acquireSegments(c int) []int { - fmt.Println("GET", getIdx(c)) return segmentsPools[getIdx(c)].Get().([]int)[:0] } func releaseSegments(s []int) { - fmt.Println("PUT", getIdx(cap(s))) segmentsPools[getIdx(cap(s))].Put(s) } diff --git a/match/segements_test.go b/match/segements_test.go new file mode 100644 index 0000000..2fcbe17 --- /dev/null +++ b/match/segements_test.go @@ -0,0 +1,37 @@ +package match + +import ( + "testing" +) + +func BenchmarkPerfPoolSequenced(b *testing.B) { + pool := NewPoolSequenced(32, 32) + + for i := 0; i < b.N; i++ { + s := pool.Get() + pool.Put(s) + } +} + +func BenchmarkPerfPoolSynced(b *testing.B) { + pool := NewPoolSynced(32) + + for i := 0; i < b.N; i++ { + s := pool.Get() + pool.Put(s) + } +} +func BenchmarkPerfPoolPoolNative(b *testing.B) { + pool := NewPoolNative(32) + + for i := 0; i < b.N; i++ { + s := pool.Get() + pool.Put(s) + } +} + +func BenchmarkPerfMake(b *testing.B) { + for i := 0; i < b.N; i++ { + _ = make([]int, 0, 32) + } +} diff --git a/match/segments.go b/match/segments.go new file mode 100644 index 0000000..bd099e3 --- /dev/null +++ b/match/segments.go @@ -0,0 +1,97 @@ +package match + +import "sync" + +// Pool holds Clients. +type PoolSequenced struct { + size int + pool chan []int +} + +// NewPool creates a new pool of Clients. +func NewPoolSequenced(max, size int) *PoolSequenced { + return &PoolSequenced{ + size: size, + pool: make(chan []int, max), + } +} + +// Borrow a Client from the pool. +func (p *PoolSequenced) Get() []int { + var s []int + select { + case s = <-p.pool: + default: + s = make([]int, 0, p.size) + } + + return s[:0] +} + +// Return returns a Client to the pool. +func (p *PoolSequenced) Put(s []int) { + select { + case p.pool <- s: + default: + // let it go, let it go... + } +} + +type PoolSynced struct { + size int + mu sync.Mutex + list [][]int +} + +func NewPoolSynced(size int) *PoolSynced { + return &PoolSynced{ + size: size, + } +} + +func (p *PoolSynced) Get() []int { + var s []int + + p.mu.Lock() + ll := len(p.list) + if ll > 0 { + s, p.list = p.list[ll-1], p.list[:ll-1] + } + p.mu.Unlock() + + if s == nil { + return make([]int, 0, p.size) + } + + return s[:0] +} + +func (p *PoolSynced) Put(s []int) { + p.mu.Lock() + defer p.mu.Unlock() + p.list = append(p.list, s) +} + +type PoolNative struct { + size int + pool sync.Pool +} + +func NewPoolNative(size int) *PoolNative { + return &PoolNative{ + size: size, + } +} + +func (p *PoolNative) Get() []int { + s := p.pool.Get() + if s == nil { + return make([]int, 0, p.size) + } + + return s.([]int) +} + +func (p *PoolNative) Put(s []int) { + p.pool.Put(s) +} From f843e797074287596287f752b725a69de564359e Mon Sep 17 00:00:00 2001 From: gobwas Date: Fri, 5 Feb 2016 16:57:42 +0300 Subject: [PATCH 06/26] remove acquire --- match/any_of.go | 11 ++++--- match/any_test.go | 6 ++-- match/btree.go | 8 ++---- match/contains_test.go | 4 +-- match/every_of.go | 48 +++++++++++++------------------ match/every_of_test.go | 2 +- match/list_test.go | 4 +-- match/match.go | 57 +------------------------------------ match/max_test.go | 4 +-- match/min_test.go | 4 +-- match/nothing_test.go | 4 +-- match/prefix_suffix_test.go | 4 +-- match/prefix_test.go | 4 +-- match/range_test.go | 4 +-- match/row_test.go | 4 +-- match/single_test.go | 4 +-- match/suffix_test.go | 4 +-- match/super_test.go | 4 +-- match/text_test.go | 4 +-- 19 files changed, 58 insertions(+), 126 deletions(-) diff --git a/match/any_of.go b/match/any_of.go index 602cd28..1b60287 100644 --- a/match/any_of.go +++ b/match/any_of.go @@ -25,29 +25,28 @@ func (self AnyOf) Match(s string) bool { func (self AnyOf) Index(s string, segments []int) (int, []int) { index := -1 + + // create reusable segments + in := make([]int, 0, len(s)) + for _, m := range self.Matchers { - in := acquireSegments(len(s)) - idx, seg := m.Index(s, in) + idx, seg := m.Index(s, in[:0]) if idx == -1 { - releaseSegments(in) continue } if index == -1 || idx < index { index = idx segments = append(segments[:0], seg...) - releaseSegments(in) continue } if idx > index { - releaseSegments(in) continue } // here idx == index segments = appendMerge(segments, seg) - releaseSegments(in) } if index == -1 { diff --git a/match/any_test.go b/match/any_test.go index 9239ffa..2a5b07e 100644 --- a/match/any_test.go +++ b/match/any_test.go @@ -33,15 +33,13 @@ func TestAnyIndex(t *testing.T) { if !reflect.DeepEqual(segments, test.segments) { t.Errorf("#%d unexpected segments: exp: %v, act: %v", id, test.segments, segments) } - - releaseSegments(segments) } } func BenchmarkIndexAny(b *testing.B) { m := Any{bench_separators} - in := acquireSegments(len(bench_pattern)) + in := make([]int, 0, len(bench_pattern)) for i := 0; i < b.N; i++ { m.Index(bench_pattern, in[:0]) } @@ -49,7 +47,7 @@ func BenchmarkIndexAny(b *testing.B) { func BenchmarkIndexAnyParallel(b *testing.B) { m := Any{bench_separators} - in := acquireSegments(len(bench_pattern)) + in := make([]int, 0, len(bench_pattern)) b.RunParallel(func(pb *testing.PB) { for pb.Next() { diff --git a/match/btree.go b/match/btree.go index 2358eb3..cbd93ee 100644 --- a/match/btree.go +++ b/match/btree.go @@ -77,13 +77,14 @@ func (self BTree) Match(s string) bool { limit = inputLen } - in := acquireSegments(inputLen) + // reusable segments list + // inputLen is the maximum size of output segments values + in := make([]int, 0, inputLen) for offset < limit { // search for matching part in substring index, segments := self.Value.Index(s[offset:limit], in[:0]) if index == -1 { - releaseSegments(in) return false } @@ -115,7 +116,6 @@ func (self BTree) Match(s string) bool { } if right { - releaseSegments(in) return true } } @@ -125,8 +125,6 @@ func (self BTree) Match(s string) bool { offset += index + step } - releaseSegments(in) - return false } diff --git a/match/contains_test.go b/match/contains_test.go index ba9577b..4789c89 100644 --- a/match/contains_test.go +++ b/match/contains_test.go @@ -56,7 +56,7 @@ func TestContainsIndex(t *testing.T) { func BenchmarkIndexContains(b *testing.B) { m := Contains{string(bench_separators), true} - in := acquireSegments(len(bench_pattern)) + in := make([]int, 0, len(bench_pattern)) for i := 0; i < b.N; i++ { m.Index(bench_pattern, in[:0]) } @@ -64,7 +64,7 @@ func BenchmarkIndexContains(b *testing.B) { func BenchmarkIndexContainsParallel(b *testing.B) { m := Contains{string(bench_separators), true} - in := acquireSegments(len(bench_pattern)) + in := make([]int, 0, len(bench_pattern)) b.RunParallel(func(pb *testing.PB) { for pb.Next() { diff --git a/match/every_of.go b/match/every_of.go index dba0154..22ea8c8 100644 --- a/match/every_of.go +++ b/match/every_of.go @@ -25,35 +25,32 @@ func (self EveryOf) Len() (l int) { return } -func max(a, b int) int { - if a >= b { - return a - } - - return b -} - func (self EveryOf) Index(s string, out []int) (int, []int) { var index int var offset int - var current []int + + // make `in` with cap as len(s), + // cause it is the maximum size of output segments values + in := make([]int, 0, len(s)) + next := make([]int, 0, len(s)) + current := make([]int, 0, len(s)) sub := s for i, m := range self.Matchers { - in := acquireSegments(len(sub)) - idx, seg := m.Index(sub, in) + idx, seg := m.Index(sub, in[:0]) if idx == -1 { - releaseSegments(in) - if cap(current) > 0 { - releaseSegments(current) - } return -1, nil } - next := acquireSegments(max(len(seg), len(current))) if i == 0 { - next = append(next, seg...) + // we use copy here instead of `current = seg` + // cause seg is a slice from reusable buffer `in` + // and it could be overwritten in next iteration + current = append(current, seg...) } else { + // clear the next + next = next[:0] + delta := index - (idx + offset) for _, ex := range current { for _, n := range seg { @@ -62,27 +59,22 @@ func (self EveryOf) Index(s string, out []int) (int, []int) { } } } - } - if cap(current) > 0 { - releaseSegments(current) - } - releaseSegments(in) + if len(next) == 0 { + return -1, nil + } - if len(next) == 0 { - releaseSegments(next) - return -1, nil + current = append(current[:0], next...) } - current = next - index = idx + offset sub = s[index:] offset += idx } + // copy result in `out` to prevent + // allocation `current` on heap out = append(out, current...) - releaseSegments(current) return index, out } diff --git a/match/every_of_test.go b/match/every_of_test.go index c97ea70..08a1cbe 100644 --- a/match/every_of_test.go +++ b/match/every_of_test.go @@ -18,7 +18,7 @@ func TestEveryOfIndex(t *testing.T) { NewText("b"), NewText("c"), }, - "abc", + "dbc", -1, nil, }, diff --git a/match/list_test.go b/match/list_test.go index 8fd13a2..de1ff0c 100644 --- a/match/list_test.go +++ b/match/list_test.go @@ -41,7 +41,7 @@ func TestListIndex(t *testing.T) { func BenchmarkIndexList(b *testing.B) { m := List{[]rune("def"), false} - in := acquireSegments(len(bench_pattern)) + in := make([]int, 0, len(bench_pattern)) for i := 0; i < b.N; i++ { m.Index(bench_pattern, in[:0]) @@ -50,7 +50,7 @@ func BenchmarkIndexList(b *testing.B) { func BenchmarkIndexListParallel(b *testing.B) { m := List{[]rune("def"), false} - in := acquireSegments(len(bench_pattern)) + in := make([]int, 0, len(bench_pattern)) b.RunParallel(func(pb *testing.PB) { for pb.Next() { diff --git a/match/match.go b/match/match.go index a8351eb..dadc5a1 100644 --- a/match/match.go +++ b/match/match.go @@ -3,7 +3,6 @@ package match import ( "fmt" "strings" - "sync" ) const lenOne = 1 @@ -28,63 +27,10 @@ func (m Matchers) String() string { return fmt.Sprintf("%s", strings.Join(s, ",")) } -var segmentsPools [1024]sync.Pool - -func toPowerOfTwo(v int) int { - v-- - v |= v >> 1 - v |= v >> 2 - v |= v >> 4 - v |= v >> 8 - v |= v >> 16 - v++ - - return v -} - -const ( - minSegment = 32 - minSegmentMinusOne = 31 - maxSegment = 1024 - maxSegmentMinusOne = 1023 -) - -func init() { - for i := maxSegment; i >= minSegment; i >>= 1 { - func(i int) { - segmentsPools[i-1] = sync.Pool{ - New: func() interface{} { - return make([]int, 0, i) - }, - } - }(i) - } -} - -func getIdx(c int) int { - p := toPowerOfTwo(c) - switch { - case p >= maxSegment: - return maxSegmentMinusOne - case p <= minSegment: - return minSegmentMinusOne - default: - return p - 1 - } -} - -func acquireSegments(c int) []int { - return segmentsPools[getIdx(c)].Get().([]int)[:0] -} - -func releaseSegments(s []int) { - segmentsPools[getIdx(cap(s))].Put(s) -} - // appendMerge merges and sorts given already SORTED and UNIQUE segments. func appendMerge(target, sub []int) []int { lt, ls := len(target), len(sub) - out := acquireSegments(lt + ls) + out := make([]int, 0, lt+ls) for x, y := 0, 0; x < lt || y < ls; { if x >= lt { @@ -119,7 +65,6 @@ func appendMerge(target, sub []int) []int { } target = append(target[:0], out...) - releaseSegments(out) return target } diff --git a/match/max_test.go b/match/max_test.go index 2c5cba5..f00b061 100644 --- a/match/max_test.go +++ b/match/max_test.go @@ -38,7 +38,7 @@ func TestMaxIndex(t *testing.T) { func BenchmarkIndexMax(b *testing.B) { m := Max{10} - in := acquireSegments(len(bench_pattern)) + in := make([]int, 0, len(bench_pattern)) for i := 0; i < b.N; i++ { m.Index(bench_pattern, in[:0]) @@ -47,7 +47,7 @@ func BenchmarkIndexMax(b *testing.B) { func BenchmarkIndexMaxParallel(b *testing.B) { m := Max{10} - in := acquireSegments(len(bench_pattern)) + in := make([]int, 0, len(bench_pattern)) b.RunParallel(func(pb *testing.PB) { for pb.Next() { diff --git a/match/min_test.go b/match/min_test.go index 5f9f126..da86aee 100644 --- a/match/min_test.go +++ b/match/min_test.go @@ -38,7 +38,7 @@ func TestMinIndex(t *testing.T) { func BenchmarkIndexMin(b *testing.B) { m := Min{10} - in := acquireSegments(len(bench_pattern)) + in := make([]int, 0, len(bench_pattern)) for i := 0; i < b.N; i++ { m.Index(bench_pattern, in[:0]) @@ -47,7 +47,7 @@ func BenchmarkIndexMin(b *testing.B) { func BenchmarkIndexMinParallel(b *testing.B) { m := Min{10} - in := acquireSegments(len(bench_pattern)) + in := make([]int, 0, len(bench_pattern)) b.RunParallel(func(pb *testing.PB) { for pb.Next() { diff --git a/match/nothing_test.go b/match/nothing_test.go index 76cb678..6bfab94 100644 --- a/match/nothing_test.go +++ b/match/nothing_test.go @@ -35,7 +35,7 @@ func TestNothingIndex(t *testing.T) { func BenchmarkIndexNothing(b *testing.B) { m := Nothing{} - in := acquireSegments(len(bench_pattern)) + in := make([]int, 0, len(bench_pattern)) for i := 0; i < b.N; i++ { m.Index(bench_pattern, in[:0]) @@ -44,7 +44,7 @@ func BenchmarkIndexNothing(b *testing.B) { func BenchmarkIndexNothingParallel(b *testing.B) { m := Nothing{} - in := acquireSegments(len(bench_pattern)) + in := make([]int, 0, len(bench_pattern)) b.RunParallel(func(pb *testing.PB) { for pb.Next() { diff --git a/match/prefix_suffix_test.go b/match/prefix_suffix_test.go index 23271c0..57db175 100644 --- a/match/prefix_suffix_test.go +++ b/match/prefix_suffix_test.go @@ -48,7 +48,7 @@ func TestPrefixSuffixIndex(t *testing.T) { func BenchmarkIndexPrefixSuffix(b *testing.B) { m := PrefixSuffix{"qew", "sqw"} - in := acquireSegments(len(bench_pattern)) + in := make([]int, 0, len(bench_pattern)) for i := 0; i < b.N; i++ { m.Index(bench_pattern, in[:0]) @@ -57,7 +57,7 @@ func BenchmarkIndexPrefixSuffix(b *testing.B) { func BenchmarkIndexPrefixSuffixParallel(b *testing.B) { m := PrefixSuffix{"qew", "sqw"} - in := acquireSegments(len(bench_pattern)) + in := make([]int, 0, len(bench_pattern)) b.RunParallel(func(pb *testing.PB) { for pb.Next() { diff --git a/match/prefix_test.go b/match/prefix_test.go index 5b38bfd..dfa3c00 100644 --- a/match/prefix_test.go +++ b/match/prefix_test.go @@ -38,7 +38,7 @@ func TestPrefixIndex(t *testing.T) { func BenchmarkIndexPrefix(b *testing.B) { m := Prefix{"qew"} - in := acquireSegments(len(bench_pattern)) + in := make([]int, 0, len(bench_pattern)) for i := 0; i < b.N; i++ { m.Index(bench_pattern, in[:0]) @@ -47,7 +47,7 @@ func BenchmarkIndexPrefix(b *testing.B) { func BenchmarkIndexPrefixParallel(b *testing.B) { m := Prefix{"qew"} - in := acquireSegments(len(bench_pattern)) + in := make([]int, 0, len(bench_pattern)) b.RunParallel(func(pb *testing.PB) { for pb.Next() { diff --git a/match/range_test.go b/match/range_test.go index a7cdef8..1a83301 100644 --- a/match/range_test.go +++ b/match/range_test.go @@ -48,7 +48,7 @@ func TestRangeIndex(t *testing.T) { func BenchmarkIndexRange(b *testing.B) { m := Range{'0', '9', false} - in := acquireSegments(len(bench_pattern)) + in := make([]int, 0, len(bench_pattern)) for i := 0; i < b.N; i++ { m.Index(bench_pattern, in[:0]) @@ -57,7 +57,7 @@ func BenchmarkIndexRange(b *testing.B) { func BenchmarkIndexRangeParallel(b *testing.B) { m := Range{'0', '9', false} - in := acquireSegments(len(bench_pattern)) + in := make([]int, 0, len(bench_pattern)) b.RunParallel(func(pb *testing.PB) { for pb.Next() { diff --git a/match/row_test.go b/match/row_test.go index ff59cff..b380434 100644 --- a/match/row_test.go +++ b/match/row_test.go @@ -59,7 +59,7 @@ func BenchmarkRowIndex(b *testing.B) { }, RunesLength: 7, } - in := acquireSegments(len(bench_pattern)) + in := make([]int, 0, len(bench_pattern)) for i := 0; i < b.N; i++ { m.Index(bench_pattern, in[:0]) @@ -75,7 +75,7 @@ func BenchmarkIndexRowParallel(b *testing.B) { }, RunesLength: 7, } - in := acquireSegments(len(bench_pattern)) + in := make([]int, 0, len(bench_pattern)) b.RunParallel(func(pb *testing.PB) { for pb.Next() { diff --git a/match/single_test.go b/match/single_test.go index e1e99ac..095cddc 100644 --- a/match/single_test.go +++ b/match/single_test.go @@ -38,7 +38,7 @@ func TestSingleIndex(t *testing.T) { func BenchmarkIndexSingle(b *testing.B) { m := Single{bench_separators} - in := acquireSegments(len(bench_pattern)) + in := make([]int, 0, len(bench_pattern)) for i := 0; i < b.N; i++ { m.Index(bench_pattern, in[:0]) @@ -47,7 +47,7 @@ func BenchmarkIndexSingle(b *testing.B) { func BenchmarkIndexSingleParallel(b *testing.B) { m := Single{bench_separators} - in := acquireSegments(len(bench_pattern)) + in := make([]int, 0, len(bench_pattern)) b.RunParallel(func(pb *testing.PB) { for pb.Next() { diff --git a/match/suffix_test.go b/match/suffix_test.go index aeda714..27cb60f 100644 --- a/match/suffix_test.go +++ b/match/suffix_test.go @@ -38,7 +38,7 @@ func TestSuffixIndex(t *testing.T) { func BenchmarkIndexSuffix(b *testing.B) { m := Suffix{"qwe"} - in := acquireSegments(len(bench_pattern)) + in := make([]int, 0, len(bench_pattern)) for i := 0; i < b.N; i++ { m.Index(bench_pattern, in[:0]) @@ -47,7 +47,7 @@ func BenchmarkIndexSuffix(b *testing.B) { func BenchmarkIndexSuffixParallel(b *testing.B) { m := Suffix{"qwe"} - in := acquireSegments(len(bench_pattern)) + in := make([]int, 0, len(bench_pattern)) b.RunParallel(func(pb *testing.PB) { for pb.Next() { diff --git a/match/super_test.go b/match/super_test.go index aa68cfb..02b9a67 100644 --- a/match/super_test.go +++ b/match/super_test.go @@ -35,7 +35,7 @@ func TestSuperIndex(t *testing.T) { func BenchmarkIndexSuper(b *testing.B) { m := Super{} - in := acquireSegments(len(bench_pattern)) + in := make([]int, 0, len(bench_pattern)) for i := 0; i < b.N; i++ { m.Index(bench_pattern, in[:0]) @@ -44,7 +44,7 @@ func BenchmarkIndexSuper(b *testing.B) { func BenchmarkIndexSuperParallel(b *testing.B) { m := Super{} - in := acquireSegments(len(bench_pattern)) + in := make([]int, 0, len(bench_pattern)) b.RunParallel(func(pb *testing.PB) { for pb.Next() { diff --git a/match/text_test.go b/match/text_test.go index b5c6964..5aab330 100644 --- a/match/text_test.go +++ b/match/text_test.go @@ -38,7 +38,7 @@ func TestTextIndex(t *testing.T) { func BenchmarkIndexText(b *testing.B) { m := NewText("foo") - in := acquireSegments(len(bench_pattern)) + in := make([]int, 0, len(bench_pattern)) for i := 0; i < b.N; i++ { m.Index(bench_pattern, in[:0]) @@ -47,7 +47,7 @@ func BenchmarkIndexText(b *testing.B) { func BenchmarkIndexTextParallel(b *testing.B) { m := NewText("foo") - in := acquireSegments(len(bench_pattern)) + in := make([]int, 0, len(bench_pattern)) b.RunParallel(func(pb *testing.PB) { for pb.Next() { From 462ce6e3ac7f829d6e464a85d9597b974a29adfe Mon Sep 17 00:00:00 2001 From: gobwas Date: Fri, 5 Feb 2016 16:57:54 +0300 Subject: [PATCH 07/26] remove files --- match/segements_test.go | 37 ---------------- match/segments.go | 97 ----------------------------------------- 2 files changed, 134 deletions(-) delete mode 100644 match/segements_test.go delete mode 100644 match/segments.go diff --git a/match/segements_test.go b/match/segements_test.go deleted file mode 100644 index 2fcbe17..0000000 --- a/match/segements_test.go +++ /dev/null @@ -1,37 +0,0 @@ -package match - -import ( - "testing" -) - -func BenchmarkPerfPoolSequenced(b *testing.B) { - pool := NewPoolSequenced(32, 32) - - for i := 0; i < b.N; i++ { - s := pool.Get() - pool.Put(s) - } -} - -func BenchmarkPerfPoolSynced(b *testing.B) { - pool := NewPoolSynced(32) - - for i := 0; i < b.N; i++ { - s := pool.Get() - pool.Put(s) - } -} -func BenchmarkPerfPoolPoolNative(b *testing.B) { - pool := NewPoolNative(32) - - for i := 0; i < b.N; i++ { - s := pool.Get() - pool.Put(s) - } -} - -func BenchmarkPerfMake(b *testing.B) { - for i := 0; i < b.N; i++ { - _ = make([]int, 0, 32) - } -} diff --git a/match/segments.go b/match/segments.go deleted file mode 100644 index bd099e3..0000000 --- a/match/segments.go +++ /dev/null @@ -1,97 +0,0 @@ -package match - -import "sync" - -// Pool holds Clients. -type PoolSequenced struct { - size int - pool chan []int -} - -// NewPool creates a new pool of Clients. -func NewPoolSequenced(max, size int) *PoolSequenced { - return &PoolSequenced{ - size: size, - pool: make(chan []int, max), - } -} - -// Borrow a Client from the pool. -func (p *PoolSequenced) Get() []int { - var s []int - select { - case s = <-p.pool: - default: - s = make([]int, 0, p.size) - } - - return s[:0] -} - -// Return returns a Client to the pool. -func (p *PoolSequenced) Put(s []int) { - select { - case p.pool <- s: - default: - // let it go, let it go... - } -} - -type PoolSynced struct { - size int - mu sync.Mutex - list [][]int -} - -func NewPoolSynced(size int) *PoolSynced { - return &PoolSynced{ - size: size, - } -} - -func (p *PoolSynced) Get() []int { - var s []int - - p.mu.Lock() - ll := len(p.list) - if ll > 0 { - s, p.list = p.list[ll-1], p.list[:ll-1] - } - p.mu.Unlock() - - if s == nil { - return make([]int, 0, p.size) - } - - return s[:0] -} - -func (p *PoolSynced) Put(s []int) { - p.mu.Lock() - defer p.mu.Unlock() - p.list = append(p.list, s) -} - -type PoolNative struct { - size int - pool sync.Pool -} - -func NewPoolNative(size int) *PoolNative { - return &PoolNative{ - size: size, - } -} - -func (p *PoolNative) Get() []int { - s := p.pool.Get() - if s == nil { - return make([]int, 0, p.size) - } - - return s.([]int) -} - -func (p *PoolNative) Put(s []int) { - p.pool.Put(s) -} From 4b229a908db5b9aa97705130afabc2e21c063648 Mon Sep 17 00:00:00 2001 From: gobwas Date: Fri, 5 Feb 2016 17:29:41 +0300 Subject: [PATCH 08/26] remove slices --- match/any.go | 6 +++--- match/any_of.go | 7 +++---- match/any_of_test.go | 2 +- match/any_test.go | 8 +++----- match/btree.go | 12 ++++-------- match/contains.go | 5 +++-- match/contains_test.go | 8 +++----- match/every_of.go | 11 +++-------- match/every_of_test.go | 2 +- match/list.go | 4 ++-- match/list_test.go | 8 +++----- match/match.go | 2 +- match/max.go | 4 +++- match/max_test.go | 8 +++----- match/min.go | 7 +++---- match/min_test.go | 8 +++----- match/nothing.go | 4 ++-- match/nothing_test.go | 8 +++----- match/prefix.go | 3 ++- match/prefix_suffix.go | 39 +++++++++++++++++++------------------ match/prefix_suffix_test.go | 8 +++----- match/prefix_test.go | 8 +++----- match/range.go | 4 ++-- match/range_test.go | 8 +++----- match/row.go | 4 ++-- match/row_test.go | 8 +++----- match/single.go | 4 ++-- match/single_test.go | 8 +++----- match/suffix.go | 4 ++-- match/suffix_test.go | 8 +++----- match/super.go | 3 ++- match/super_test.go | 8 +++----- match/text.go | 4 ++-- match/text_test.go | 8 +++----- 34 files changed, 105 insertions(+), 138 deletions(-) diff --git a/match/any.go b/match/any.go index d931eea..6e278ec 100644 --- a/match/any.go +++ b/match/any.go @@ -13,17 +13,17 @@ func (self Any) Match(s string) bool { return strings.IndexAnyRunes(s, self.Separators) == -1 } -func (self Any) Index(s string, segments []int) (int, []int) { +func (self Any) Index(s string) (int, []int) { found := strings.IndexAnyRunes(s, self.Separators) switch found { case -1: case 0: - segments = append(segments) - return 0, segments + return 0, []int{0} default: s = s[:found] } + segments := make([]int, 0, len(s)) for i := range s { segments = append(segments, i) } diff --git a/match/any_of.go b/match/any_of.go index 1b60287..affe8c2 100644 --- a/match/any_of.go +++ b/match/any_of.go @@ -23,14 +23,13 @@ func (self AnyOf) Match(s string) bool { return false } -func (self AnyOf) Index(s string, segments []int) (int, []int) { +func (self AnyOf) Index(s string) (int, []int) { index := -1 - // create reusable segments - in := make([]int, 0, len(s)) + segments := make([]int, 0, len(s)) for _, m := range self.Matchers { - idx, seg := m.Index(s, in[:0]) + idx, seg := m.Index(s) if idx == -1 { continue } diff --git a/match/any_of_test.go b/match/any_of_test.go index ee3001a..41bb6ee 100644 --- a/match/any_of_test.go +++ b/match/any_of_test.go @@ -42,7 +42,7 @@ func TestAnyOfIndex(t *testing.T) { }, } { everyOf := AnyOf{test.matchers} - index, segments := everyOf.Index(test.fixture, []int{}) + index, segments := everyOf.Index(test.fixture) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } diff --git a/match/any_test.go b/match/any_test.go index 2a5b07e..6c166f6 100644 --- a/match/any_test.go +++ b/match/any_test.go @@ -26,7 +26,7 @@ func TestAnyIndex(t *testing.T) { }, } { p := Any{test.sep} - index, segments := p.Index(test.fixture, []int{}) + index, segments := p.Index(test.fixture) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } @@ -39,19 +39,17 @@ func TestAnyIndex(t *testing.T) { func BenchmarkIndexAny(b *testing.B) { m := Any{bench_separators} - in := make([]int, 0, len(bench_pattern)) for i := 0; i < b.N; i++ { - m.Index(bench_pattern, in[:0]) + m.Index(bench_pattern) } } func BenchmarkIndexAnyParallel(b *testing.B) { m := Any{bench_separators} - in := make([]int, 0, len(bench_pattern)) b.RunParallel(func(pb *testing.PB) { for pb.Next() { - m.Index(bench_pattern, in[:0]) + m.Index(bench_pattern) } }) } diff --git a/match/btree.go b/match/btree.go index cbd93ee..e669383 100644 --- a/match/btree.go +++ b/match/btree.go @@ -51,7 +51,7 @@ func (self BTree) Len() int { } // todo? -func (self BTree) Index(s string, segments []int) (int, []int) { +func (self BTree) Index(s string) (int, []int) { return -1, nil } @@ -65,8 +65,8 @@ func (self BTree) Match(s string) bool { return false } - // try to cut unnecessary parts - // by knowledge of length of right and left part + // try to cut unnecessary parts + // by knowledge of length of right and left part var offset, limit int if self.LeftLengthRunes >= 0 { offset = self.LeftLengthRunes @@ -77,13 +77,9 @@ func (self BTree) Match(s string) bool { limit = inputLen } - // reusable segments list - // inputLen is the maximum size of output segments values - in := make([]int, 0, inputLen) - for offset < limit { // search for matching part in substring - index, segments := self.Value.Index(s[offset:limit], in[:0]) + index, segments := self.Value.Index(s[offset:limit]) if index == -1 { return false } diff --git a/match/contains.go b/match/contains.go index 8246c92..4d6f888 100644 --- a/match/contains.go +++ b/match/contains.go @@ -14,7 +14,7 @@ func (self Contains) Match(s string) bool { return strings.Contains(s, self.Needle) != self.Not } -func (self Contains) Index(s string, segments []int) (int, []int) { +func (self Contains) Index(s string) (int, []int) { var offset int idx := strings.Index(s, self.Needle) @@ -26,13 +26,14 @@ func (self Contains) Index(s string, segments []int) (int, []int) { offset = idx + len(self.Needle) if len(s) <= offset { - return 0, append(segments, offset) + return 0, []int{offset} } s = s[offset:] } else if idx != -1 { s = s[:idx] } + segments := make([]int, 0, len(s)+1) for i, _ := range s { segments = append(segments, offset+i) } diff --git a/match/contains_test.go b/match/contains_test.go index 4789c89..c2bdbaa 100644 --- a/match/contains_test.go +++ b/match/contains_test.go @@ -43,7 +43,7 @@ func TestContainsIndex(t *testing.T) { }, } { p := Contains{test.prefix, test.not} - index, segments := p.Index(test.fixture, []int{}) + index, segments := p.Index(test.fixture) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } @@ -56,19 +56,17 @@ func TestContainsIndex(t *testing.T) { func BenchmarkIndexContains(b *testing.B) { m := Contains{string(bench_separators), true} - in := make([]int, 0, len(bench_pattern)) for i := 0; i < b.N; i++ { - m.Index(bench_pattern, in[:0]) + m.Index(bench_pattern) } } func BenchmarkIndexContainsParallel(b *testing.B) { m := Contains{string(bench_separators), true} - in := make([]int, 0, len(bench_pattern)) b.RunParallel(func(pb *testing.PB) { for pb.Next() { - m.Index(bench_pattern, in[:0]) + m.Index(bench_pattern) } }) } diff --git a/match/every_of.go b/match/every_of.go index 22ea8c8..3a77b43 100644 --- a/match/every_of.go +++ b/match/every_of.go @@ -25,19 +25,18 @@ func (self EveryOf) Len() (l int) { return } -func (self EveryOf) Index(s string, out []int) (int, []int) { +func (self EveryOf) Index(s string) (int, []int) { var index int var offset int // make `in` with cap as len(s), // cause it is the maximum size of output segments values - in := make([]int, 0, len(s)) next := make([]int, 0, len(s)) current := make([]int, 0, len(s)) sub := s for i, m := range self.Matchers { - idx, seg := m.Index(sub, in[:0]) + idx, seg := m.Index(sub) if idx == -1 { return -1, nil } @@ -72,11 +71,7 @@ func (self EveryOf) Index(s string, out []int) (int, []int) { offset += idx } - // copy result in `out` to prevent - // allocation `current` on heap - out = append(out, current...) - - return index, out + return index, current } func (self EveryOf) Match(s string) bool { diff --git a/match/every_of_test.go b/match/every_of_test.go index 08a1cbe..3f70a78 100644 --- a/match/every_of_test.go +++ b/match/every_of_test.go @@ -34,7 +34,7 @@ func TestEveryOfIndex(t *testing.T) { }, } { everyOf := EveryOf{test.matchers} - index, segments := everyOf.Index(test.fixture, []int{}) + index, segments := everyOf.Index(test.fixture) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } diff --git a/match/list.go b/match/list.go index bcc3612..6ab49ba 100644 --- a/match/list.go +++ b/match/list.go @@ -25,10 +25,10 @@ func (self List) Len() int { return lenOne } -func (self List) Index(s string, segments []int) (int, []int) { +func (self List) Index(s string) (int, []int) { for i, r := range s { if self.Not == (runes.IndexRune(self.List, r) == -1) { - return i, append(segments, utf8.RuneLen(r)) + return i, []int{utf8.RuneLen(r)} } } diff --git a/match/list_test.go b/match/list_test.go index de1ff0c..473aa7e 100644 --- a/match/list_test.go +++ b/match/list_test.go @@ -29,7 +29,7 @@ func TestListIndex(t *testing.T) { }, } { p := List{test.list, test.not} - index, segments := p.Index(test.fixture, []int{}) + index, segments := p.Index(test.fixture) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } @@ -41,20 +41,18 @@ func TestListIndex(t *testing.T) { func BenchmarkIndexList(b *testing.B) { m := List{[]rune("def"), false} - in := make([]int, 0, len(bench_pattern)) for i := 0; i < b.N; i++ { - m.Index(bench_pattern, in[:0]) + m.Index(bench_pattern) } } func BenchmarkIndexListParallel(b *testing.B) { m := List{[]rune("def"), false} - in := make([]int, 0, len(bench_pattern)) b.RunParallel(func(pb *testing.PB) { for pb.Next() { - m.Index(bench_pattern, in[:0]) + m.Index(bench_pattern) } }) } diff --git a/match/match.go b/match/match.go index dadc5a1..122753e 100644 --- a/match/match.go +++ b/match/match.go @@ -11,7 +11,7 @@ const lenNo = -1 type Matcher interface { Match(string) bool - Index(string, []int) (int, []int) + Index(string) (int, []int) Len() int String() string } diff --git a/match/max.go b/match/max.go index 341264c..5405594 100644 --- a/match/max.go +++ b/match/max.go @@ -21,7 +21,9 @@ func (self Max) Match(s string) bool { return true } -func (self Max) Index(s string, segments []int) (int, []int) { +func (self Max) Index(s string) (int, []int) { + segments := make([]int, 0, self.Limit+1) + segments = append(segments, 0) var count int for i, r := range s { diff --git a/match/max_test.go b/match/max_test.go index f00b061..005e3f0 100644 --- a/match/max_test.go +++ b/match/max_test.go @@ -26,7 +26,7 @@ func TestMaxIndex(t *testing.T) { }, } { p := Max{test.limit} - index, segments := p.Index(test.fixture, []int{}) + index, segments := p.Index(test.fixture) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } @@ -38,20 +38,18 @@ func TestMaxIndex(t *testing.T) { func BenchmarkIndexMax(b *testing.B) { m := Max{10} - in := make([]int, 0, len(bench_pattern)) for i := 0; i < b.N; i++ { - m.Index(bench_pattern, in[:0]) + m.Index(bench_pattern) } } func BenchmarkIndexMaxParallel(b *testing.B) { m := Max{10} - in := make([]int, 0, len(bench_pattern)) b.RunParallel(func(pb *testing.PB) { for pb.Next() { - m.Index(bench_pattern, in[:0]) + m.Index(bench_pattern) } }) } diff --git a/match/min.go b/match/min.go index 9ddfd84..6942e29 100644 --- a/match/min.go +++ b/match/min.go @@ -21,19 +21,18 @@ func (self Min) Match(s string) bool { return false } -func (self Min) Index(s string, segments []int) (int, []int) { +func (self Min) Index(s string) (int, []int) { var count int - var found bool + segments := make([]int, 0, len(s)-self.Limit+1) for i, r := range s { count++ if count >= self.Limit { - found = true segments = append(segments, i+utf8.RuneLen(r)) } } - if !found { + if len(segments) == 0 { return -1, nil } diff --git a/match/min_test.go b/match/min_test.go index da86aee..2bea4ee 100644 --- a/match/min_test.go +++ b/match/min_test.go @@ -26,7 +26,7 @@ func TestMinIndex(t *testing.T) { }, } { p := Min{test.limit} - index, segments := p.Index(test.fixture, []int{}) + index, segments := p.Index(test.fixture) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } @@ -38,20 +38,18 @@ func TestMinIndex(t *testing.T) { func BenchmarkIndexMin(b *testing.B) { m := Min{10} - in := make([]int, 0, len(bench_pattern)) for i := 0; i < b.N; i++ { - m.Index(bench_pattern, in[:0]) + m.Index(bench_pattern) } } func BenchmarkIndexMinParallel(b *testing.B) { m := Min{10} - in := make([]int, 0, len(bench_pattern)) b.RunParallel(func(pb *testing.PB) { for pb.Next() { - m.Index(bench_pattern, in[:0]) + m.Index(bench_pattern) } }) } diff --git a/match/nothing.go b/match/nothing.go index a6b6922..ef5049b 100644 --- a/match/nothing.go +++ b/match/nothing.go @@ -10,8 +10,8 @@ func (self Nothing) Match(s string) bool { return len(s) == 0 } -func (self Nothing) Index(s string, segments []int) (int, []int) { - return 0, append(segments, 0) +func (self Nothing) Index(s string) (int, []int) { + return 0, []int{0} } func (self Nothing) Len() int { diff --git a/match/nothing_test.go b/match/nothing_test.go index 6bfab94..569969c 100644 --- a/match/nothing_test.go +++ b/match/nothing_test.go @@ -23,7 +23,7 @@ func TestNothingIndex(t *testing.T) { }, } { p := Nothing{} - index, segments := p.Index(test.fixture, []int{}) + index, segments := p.Index(test.fixture) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } @@ -35,20 +35,18 @@ func TestNothingIndex(t *testing.T) { func BenchmarkIndexNothing(b *testing.B) { m := Nothing{} - in := make([]int, 0, len(bench_pattern)) for i := 0; i < b.N; i++ { - m.Index(bench_pattern, in[:0]) + m.Index(bench_pattern) } } func BenchmarkIndexNothingParallel(b *testing.B) { m := Nothing{} - in := make([]int, 0, len(bench_pattern)) b.RunParallel(func(pb *testing.PB) { for pb.Next() { - m.Index(bench_pattern, in[:0]) + m.Index(bench_pattern) } }) } diff --git a/match/prefix.go b/match/prefix.go index 2bb1260..486d1bf 100644 --- a/match/prefix.go +++ b/match/prefix.go @@ -10,7 +10,7 @@ type Prefix struct { Prefix string } -func (self Prefix) Index(s string, segments []int) (int, []int) { +func (self Prefix) Index(s string) (int, []int) { idx := strings.Index(s, self.Prefix) if idx == -1 { return -1, nil @@ -24,6 +24,7 @@ func (self Prefix) Index(s string, segments []int) (int, []int) { sub = "" } + segments := make([]int, 0, len(sub)+1) segments = append(segments, length) for i, r := range sub { segments = append(segments, length+i+utf8.RuneLen(r)) diff --git a/match/prefix_suffix.go b/match/prefix_suffix.go index 4b78107..1e1795b 100644 --- a/match/prefix_suffix.go +++ b/match/prefix_suffix.go @@ -9,7 +9,7 @@ type PrefixSuffix struct { Prefix, Suffix string } -func (self PrefixSuffix) Index(s string, segments []int) (int, []int) { +func (self PrefixSuffix) Index(s string) (int, []int) { prefixIdx := strings.Index(s, self.Prefix) if prefixIdx == -1 { return -1, nil @@ -17,26 +17,27 @@ func (self PrefixSuffix) Index(s string, segments []int) (int, []int) { suffixLen := len(self.Suffix) - if suffixLen > 0 { - for sub := s[prefixIdx:]; ; { - suffixIdx := strings.LastIndex(sub, self.Suffix) - if suffixIdx == -1 { - break - } - - segments = append(segments, suffixIdx+suffixLen) - sub = sub[:suffixIdx] - } - - if len(segments) == 0 { - return -1, nil - } - - reverseSegments(segments) - } else { - segments = append(segments, len(s)-prefixIdx) + if suffixLen <= 0 { + return prefixIdx, []int{len(s) - prefixIdx} } + segments := make([]int, 0, len(s)-prefixIdx) + for sub := s[prefixIdx:]; ; { + suffixIdx := strings.LastIndex(sub, self.Suffix) + if suffixIdx == -1 { + break + } + + segments = append(segments, suffixIdx+suffixLen) + sub = sub[:suffixIdx] + } + + if len(segments) == 0 { + return -1, nil + } + + reverseSegments(segments) + return prefixIdx, segments } diff --git a/match/prefix_suffix_test.go b/match/prefix_suffix_test.go index 57db175..c4e1c4d 100644 --- a/match/prefix_suffix_test.go +++ b/match/prefix_suffix_test.go @@ -36,7 +36,7 @@ func TestPrefixSuffixIndex(t *testing.T) { }, } { p := PrefixSuffix{test.prefix, test.suffix} - index, segments := p.Index(test.fixture, []int{}) + index, segments := p.Index(test.fixture) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } @@ -48,20 +48,18 @@ func TestPrefixSuffixIndex(t *testing.T) { func BenchmarkIndexPrefixSuffix(b *testing.B) { m := PrefixSuffix{"qew", "sqw"} - in := make([]int, 0, len(bench_pattern)) for i := 0; i < b.N; i++ { - m.Index(bench_pattern, in[:0]) + m.Index(bench_pattern) } } func BenchmarkIndexPrefixSuffixParallel(b *testing.B) { m := PrefixSuffix{"qew", "sqw"} - in := make([]int, 0, len(bench_pattern)) b.RunParallel(func(pb *testing.PB) { for pb.Next() { - m.Index(bench_pattern, in[:0]) + m.Index(bench_pattern) } }) } diff --git a/match/prefix_test.go b/match/prefix_test.go index dfa3c00..a202d34 100644 --- a/match/prefix_test.go +++ b/match/prefix_test.go @@ -26,7 +26,7 @@ func TestPrefixIndex(t *testing.T) { }, } { p := Prefix{test.prefix} - index, segments := p.Index(test.fixture, []int{}) + index, segments := p.Index(test.fixture) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } @@ -38,20 +38,18 @@ func TestPrefixIndex(t *testing.T) { func BenchmarkIndexPrefix(b *testing.B) { m := Prefix{"qew"} - in := make([]int, 0, len(bench_pattern)) for i := 0; i < b.N; i++ { - m.Index(bench_pattern, in[:0]) + m.Index(bench_pattern) } } func BenchmarkIndexPrefixParallel(b *testing.B) { m := Prefix{"qew"} - in := make([]int, 0, len(bench_pattern)) b.RunParallel(func(pb *testing.PB) { for pb.Next() { - m.Index(bench_pattern, in[:0]) + m.Index(bench_pattern) } }) } diff --git a/match/range.go b/match/range.go index e669870..5b4ee03 100644 --- a/match/range.go +++ b/match/range.go @@ -28,10 +28,10 @@ func (self Range) Match(s string) bool { return inRange == !self.Not } -func (self Range) Index(s string, segments []int) (int, []int) { +func (self Range) Index(s string) (int, []int) { for i, r := range s { if self.Not != (r >= self.Lo && r <= self.Hi) { - return i, append(segments, utf8.RuneLen(r)) + return i, []int{utf8.RuneLen(r)} } } diff --git a/match/range_test.go b/match/range_test.go index 1a83301..f9a5151 100644 --- a/match/range_test.go +++ b/match/range_test.go @@ -36,7 +36,7 @@ func TestRangeIndex(t *testing.T) { }, } { m := Range{test.lo, test.hi, test.not} - index, segments := m.Index(test.fixture, []int{}) + index, segments := m.Index(test.fixture) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } @@ -48,20 +48,18 @@ func TestRangeIndex(t *testing.T) { func BenchmarkIndexRange(b *testing.B) { m := Range{'0', '9', false} - in := make([]int, 0, len(bench_pattern)) for i := 0; i < b.N; i++ { - m.Index(bench_pattern, in[:0]) + m.Index(bench_pattern) } } func BenchmarkIndexRangeParallel(b *testing.B) { m := Range{'0', '9', false} - in := make([]int, 0, len(bench_pattern)) b.RunParallel(func(pb *testing.PB) { for pb.Next() { - m.Index(bench_pattern, in[:0]) + m.Index(bench_pattern) } }) } diff --git a/match/row.go b/match/row.go index 8b3e2fc..a079aa4 100644 --- a/match/row.go +++ b/match/row.go @@ -52,7 +52,7 @@ func (self Row) Len() (l int) { return self.RunesLength } -func (self Row) Index(s string, segments []int) (int, []int) { +func (self Row) Index(s string) (int, []int) { if !self.lenOk(s) { return -1, nil } @@ -66,7 +66,7 @@ func (self Row) Index(s string, segments []int) (int, []int) { } if self.matchAll(s[i:]) { - return i, append(segments, self.RunesLength) + return i, []int{self.RunesLength} } } diff --git a/match/row_test.go b/match/row_test.go index b380434..7d7d7f4 100644 --- a/match/row_test.go +++ b/match/row_test.go @@ -40,7 +40,7 @@ func TestRowIndex(t *testing.T) { Matchers: test.matchers, RunesLength: test.length, } - index, segments := p.Index(test.fixture, []int{}) + index, segments := p.Index(test.fixture) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } @@ -59,10 +59,9 @@ func BenchmarkRowIndex(b *testing.B) { }, RunesLength: 7, } - in := make([]int, 0, len(bench_pattern)) for i := 0; i < b.N; i++ { - m.Index(bench_pattern, in[:0]) + m.Index(bench_pattern) } } @@ -75,11 +74,10 @@ func BenchmarkIndexRowParallel(b *testing.B) { }, RunesLength: 7, } - in := make([]int, 0, len(bench_pattern)) b.RunParallel(func(pb *testing.PB) { for pb.Next() { - m.Index(bench_pattern, in[:0]) + m.Index(bench_pattern) } }) } diff --git a/match/single.go b/match/single.go index e1ad121..c0d3171 100644 --- a/match/single.go +++ b/match/single.go @@ -24,10 +24,10 @@ func (self Single) Len() int { return lenOne } -func (self Single) Index(s string, segments []int) (int, []int) { +func (self Single) Index(s string) (int, []int) { for i, r := range s { if runes.IndexRune(self.Separators, r) == -1 { - return i, append(segments, utf8.RuneLen(r)) + return i, []int{utf8.RuneLen(r)} } } diff --git a/match/single_test.go b/match/single_test.go index 095cddc..5f38623 100644 --- a/match/single_test.go +++ b/match/single_test.go @@ -26,7 +26,7 @@ func TestSingleIndex(t *testing.T) { }, } { p := Single{test.separators} - index, segments := p.Index(test.fixture, []int{}) + index, segments := p.Index(test.fixture) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } @@ -38,20 +38,18 @@ func TestSingleIndex(t *testing.T) { func BenchmarkIndexSingle(b *testing.B) { m := Single{bench_separators} - in := make([]int, 0, len(bench_pattern)) for i := 0; i < b.N; i++ { - m.Index(bench_pattern, in[:0]) + m.Index(bench_pattern) } } func BenchmarkIndexSingleParallel(b *testing.B) { m := Single{bench_separators} - in := make([]int, 0, len(bench_pattern)) b.RunParallel(func(pb *testing.PB) { for pb.Next() { - m.Index(bench_pattern, in[:0]) + m.Index(bench_pattern) } }) } diff --git a/match/suffix.go b/match/suffix.go index ca825f7..d38d71a 100644 --- a/match/suffix.go +++ b/match/suffix.go @@ -9,13 +9,13 @@ type Suffix struct { Suffix string } -func (self Suffix) Index(s string, segments []int) (int, []int) { +func (self Suffix) Index(s string) (int, []int) { idx := strings.Index(s, self.Suffix) if idx == -1 { return -1, nil } - return 0, append(segments, idx+len(self.Suffix)) + return 0, []int{idx + len(self.Suffix)} } func (self Suffix) Len() int { diff --git a/match/suffix_test.go b/match/suffix_test.go index 27cb60f..a395c87 100644 --- a/match/suffix_test.go +++ b/match/suffix_test.go @@ -26,7 +26,7 @@ func TestSuffixIndex(t *testing.T) { }, } { p := Suffix{test.prefix} - index, segments := p.Index(test.fixture, []int{}) + index, segments := p.Index(test.fixture) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } @@ -38,20 +38,18 @@ func TestSuffixIndex(t *testing.T) { func BenchmarkIndexSuffix(b *testing.B) { m := Suffix{"qwe"} - in := make([]int, 0, len(bench_pattern)) for i := 0; i < b.N; i++ { - m.Index(bench_pattern, in[:0]) + m.Index(bench_pattern) } } func BenchmarkIndexSuffixParallel(b *testing.B) { m := Suffix{"qwe"} - in := make([]int, 0, len(bench_pattern)) b.RunParallel(func(pb *testing.PB) { for pb.Next() { - m.Index(bench_pattern, in[:0]) + m.Index(bench_pattern) } }) } diff --git a/match/super.go b/match/super.go index 27d83de..fe70f3c 100644 --- a/match/super.go +++ b/match/super.go @@ -14,7 +14,8 @@ func (self Super) Len() int { return lenNo } -func (self Super) Index(s string, segments []int) (int, []int) { +func (self Super) Index(s string) (int, []int) { + segments := make([]int, 0, len(s)+1) for i := range s { segments = append(segments, i) } diff --git a/match/super_test.go b/match/super_test.go index 02b9a67..1126e83 100644 --- a/match/super_test.go +++ b/match/super_test.go @@ -23,7 +23,7 @@ func TestSuperIndex(t *testing.T) { }, } { p := Super{} - index, segments := p.Index(test.fixture, []int{}) + index, segments := p.Index(test.fixture) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } @@ -35,20 +35,18 @@ func TestSuperIndex(t *testing.T) { func BenchmarkIndexSuper(b *testing.B) { m := Super{} - in := make([]int, 0, len(bench_pattern)) for i := 0; i < b.N; i++ { - m.Index(bench_pattern, in[:0]) + m.Index(bench_pattern) } } func BenchmarkIndexSuperParallel(b *testing.B) { m := Super{} - in := make([]int, 0, len(bench_pattern)) b.RunParallel(func(pb *testing.PB) { for pb.Next() { - m.Index(bench_pattern, in[:0]) + m.Index(bench_pattern) } }) } diff --git a/match/text.go b/match/text.go index aff5b97..f7f926b 100644 --- a/match/text.go +++ b/match/text.go @@ -29,13 +29,13 @@ func (self Text) Len() int { return self.RunesLength } -func (self Text) Index(s string, segments []int) (int, []int) { +func (self Text) Index(s string) (int, []int) { index := strings.Index(s, self.Str) if index == -1 { return -1, nil } - return index, append(segments, self.BytesLength) + return index, []int{self.BytesLength} } func (self Text) String() string { diff --git a/match/text_test.go b/match/text_test.go index 5aab330..81e31a0 100644 --- a/match/text_test.go +++ b/match/text_test.go @@ -26,7 +26,7 @@ func TestTextIndex(t *testing.T) { }, } { m := NewText(test.text) - index, segments := m.Index(test.fixture, []int{}) + index, segments := m.Index(test.fixture) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } @@ -38,20 +38,18 @@ func TestTextIndex(t *testing.T) { func BenchmarkIndexText(b *testing.B) { m := NewText("foo") - in := make([]int, 0, len(bench_pattern)) for i := 0; i < b.N; i++ { - m.Index(bench_pattern, in[:0]) + m.Index(bench_pattern) } } func BenchmarkIndexTextParallel(b *testing.B) { m := NewText("foo") - in := make([]int, 0, len(bench_pattern)) b.RunParallel(func(pb *testing.PB) { for pb.Next() { - m.Index(bench_pattern, in[:0]) + m.Index(bench_pattern) } }) } From 6e346bf8d55da3b611e49ba8612922bdf3fd962a Mon Sep 17 00:00:00 2001 From: gobwas Date: Mon, 22 Feb 2016 22:21:13 +0300 Subject: [PATCH 09/26] test parallel --- match/btree_test.go | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/match/btree_test.go b/match/btree_test.go index 24c10ee..3a96786 100644 --- a/match/btree_test.go +++ b/match/btree_test.go @@ -46,3 +46,45 @@ func TestBTree(t *testing.T) { } } } + +type fakeMatcher struct { + len int + name string +} + +func (f *fakeMatcher) Match(string) bool { + return true +} + +var i = 3 + +func (f *fakeMatcher) Index(s string) (int, []int) { + seg := make([]int, 0, i) + for x := 0; x < i; x++ { + seg = append(seg, x) + } + return 0, seg +} +func (f *fakeMatcher) Len() int { + return f.len +} +func (f *fakeMatcher) String() string { + return f.name +} + +func BenchmarkMatchBTree(b *testing.B) { + l := &fakeMatcher{4, "left_fake"} + r := &fakeMatcher{4, "right_fake"} + v := &fakeMatcher{2, "value_fake"} + + // must be <= len(l + r + v) + fixture := "abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij" + + bt := NewBTree(v, l, r) + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + bt.Match(fixture) + } + }) +} From a1783e998615f942bb62238fa9a82348323126b9 Mon Sep 17 00:00:00 2001 From: gobwas Date: Mon, 22 Feb 2016 23:47:31 +0300 Subject: [PATCH 10/26] try this --- match/any.go | 2 +- match/any_test.go | 3 +- match/segements_test.go | 65 ++++++++++++++ match/segments.go | 193 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 261 insertions(+), 2 deletions(-) create mode 100644 match/segements_test.go create mode 100644 match/segments.go diff --git a/match/any.go b/match/any.go index 6e278ec..abf25d8 100644 --- a/match/any.go +++ b/match/any.go @@ -23,7 +23,7 @@ func (self Any) Index(s string) (int, []int) { s = s[:found] } - segments := make([]int, 0, len(s)) + segments := acquireSegments(len(s)) for i := range s { segments = append(segments, i) } diff --git a/match/any_test.go b/match/any_test.go index 6c166f6..3b03881 100644 --- a/match/any_test.go +++ b/match/any_test.go @@ -40,7 +40,8 @@ func BenchmarkIndexAny(b *testing.B) { m := Any{bench_separators} for i := 0; i < b.N; i++ { - m.Index(bench_pattern) + _, s := m.Index(bench_pattern) + releaseSegments(s) } } diff --git a/match/segements_test.go b/match/segements_test.go new file mode 100644 index 0000000..9eaf49c --- /dev/null +++ b/match/segements_test.go @@ -0,0 +1,65 @@ +package match + +import ( + "testing" +) + +func BenchmarkPerfPoolSequenced(b *testing.B) { + pool := NewPoolSequenced(512, func() []int { + return make([]int, 0, 16) + }) + + b.SetParallelism(32) + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + s := pool.Get() + pool.Put(s) + } + }) +} + +func BenchmarkPerfPoolSynced(b *testing.B) { + pool := NewPoolSynced(32) + + b.SetParallelism(32) + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + s := pool.Get() + pool.Put(s) + } + }) +} + +func BenchmarkPerfPoolNative(b *testing.B) { + pool := NewPoolNative(func() []int { + return make([]int, 0, 16) + }) + + b.SetParallelism(32) + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + s := pool.Get() + pool.Put(s) + } + }) +} + +func BenchmarkPerfPoolStatic(b *testing.B) { + pool := NewPoolStatic(32, func() []int { + return make([]int, 0, 16) + }) + + b.SetParallelism(32) + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + i, v := pool.Get() + pool.Put(i, v) + } + }) +} + +func BenchmarkPerfMake(b *testing.B) { + for i := 0; i < b.N; i++ { + _ = make([]int, 0, 32) + } +} diff --git a/match/segments.go b/match/segments.go new file mode 100644 index 0000000..15aadaa --- /dev/null +++ b/match/segments.go @@ -0,0 +1,193 @@ +package match + +import ( + "sync" + "sync/atomic" +) + +var segmentsPools [1024]*PoolNative + +func toPowerOfTwo(v int) int { + v-- + v |= v >> 1 + v |= v >> 2 + v |= v >> 4 + v |= v >> 8 + v |= v >> 16 + v++ + + return v +} + +const ( + minSegment = 4 + minSegmentMinusOne = 3 + maxSegment = 1024 + maxSegmentMinusOne = 1023 +) + +func init() { + for i := maxSegment; i >= minSegment; i >>= 1 { + func(i int) { + segmentsPools[i-1] = NewPoolNative(func() []int { + return make([]int, 0, i) + }) + }(i) + } +} + +func getIdx(c int) int { + p := toPowerOfTwo(c) + switch { + case p >= maxSegment: + return maxSegmentMinusOne + case p <= minSegment: + return minSegmentMinusOne + default: + return p - 1 + } +} + +func acquireSegments(c int) []int { + return segmentsPools[getIdx(c)].Get() +} + +func releaseSegments(s []int) { + segmentsPools[getIdx(cap(s))].Put(s) +} + +type newSegmentsFunc func() []int + +// Pool holds Clients. +type PoolSequenced struct { + new newSegmentsFunc + pool chan []int +} + +// NewPool creates a new pool of Clients. +func NewPoolSequenced(size int, f newSegmentsFunc) *PoolSequenced { + return &PoolSequenced{ + new: f, + pool: make(chan []int, size), + } +} + +// Borrow a Client from the pool. +func (p *PoolSequenced) Get() []int { + var s []int + select { + case s = <-p.pool: + default: + s = p.new() + } + + return s[:0] +} + +// Return returns a Client to the pool. +func (p *PoolSequenced) Put(s []int) { + select { + case p.pool <- s: + default: + // let it go, let it go... + } +} + +type PoolSynced struct { + size int + mu sync.Mutex + list [][]int +} + +func NewPoolSynced(size int) *PoolSynced { + return &PoolSynced{ + size: size, + } +} + +func (p *PoolSynced) Get() []int { + var s []int + + p.mu.Lock() + ll := len(p.list) + if ll > 0 { + s, p.list = p.list[ll-1], p.list[:ll-1] + } + p.mu.Unlock() + + if s == nil { + return make([]int, 0, p.size) + } + + return s[:0] +} + +func (p *PoolSynced) Put(s []int) { + p.mu.Lock() + defer p.mu.Unlock() + p.list = append(p.list, s) +} + +type PoolNative struct { + pool *sync.Pool +} + +func NewPoolNative(f newSegmentsFunc) *PoolNative { + return &PoolNative{ + pool: &sync.Pool{New: func() interface{} { + return f() + }}, + } +} + +func (p *PoolNative) Get() []int { + return p.pool.Get().([]int)[:0] +} + +func (p *PoolNative) Put(s []int) { + p.pool.Put(s) +} + +type segments struct { + data []int + locked int32 +} + +type PoolStatic struct { + f newSegmentsFunc + pool []*segments +} + +func NewPoolStatic(size int, f newSegmentsFunc) *PoolStatic { + p := &PoolStatic{ + f: f, + pool: make([]*segments, 0, size), + } + + for i := 0; i < size; i++ { + p.pool = append(p.pool, &segments{ + data: f(), + }) + } + + return p +} + +func (p *PoolStatic) Get() (int, []int) { + for i, s := range p.pool { + if atomic.CompareAndSwapInt32(&s.locked, 0, 1) { + return i, s.data + } + } + + return -1, p.f() +} + +func (p *PoolStatic) Put(i int, s []int) { + if i < 0 { + return + } + + p.pool[i].data = s + atomic.CompareAndSwapInt32(&(p.pool[i].locked), 1, 0) +} From 71fc92e3a7383b2ee77e74eab2161a7304c6bee7 Mon Sep 17 00:00:00 2001 From: gobwas Date: Tue, 23 Feb 2016 00:26:06 +0300 Subject: [PATCH 11/26] acquire/release segments only when needed --- match/any_of.go | 3 +- match/any_test.go | 3 +- match/btree.go | 4 + match/contains.go | 2 +- match/contains_test.go | 6 +- match/prefix.go | 2 +- match/prefix_suffix.go | 4 +- match/prefix_suffix_test.go | 6 +- match/prefix_test.go | 6 +- match/range_test.go | 6 +- match/row_test.go | 6 +- match/segements_test.go | 100 ++++++++++++--------- match/segments.go | 173 ++++++------------------------------ match/single_test.go | 6 +- match/suffix_test.go | 6 +- match/super_test.go | 6 +- match/text_test.go | 6 +- 17 files changed, 132 insertions(+), 213 deletions(-) diff --git a/match/any_of.go b/match/any_of.go index affe8c2..8cdfed1 100644 --- a/match/any_of.go +++ b/match/any_of.go @@ -26,7 +26,7 @@ func (self AnyOf) Match(s string) bool { func (self AnyOf) Index(s string) (int, []int) { index := -1 - segments := make([]int, 0, len(s)) + segments := acquireSegments(len(s)) for _, m := range self.Matchers { idx, seg := m.Index(s) @@ -49,6 +49,7 @@ func (self AnyOf) Index(s string) (int, []int) { } if index == -1 { + releaseSegments(segments) return -1, nil } diff --git a/match/any_test.go b/match/any_test.go index 3b03881..e15a0c7 100644 --- a/match/any_test.go +++ b/match/any_test.go @@ -50,7 +50,8 @@ func BenchmarkIndexAnyParallel(b *testing.B) { b.RunParallel(func(pb *testing.PB) { for pb.Next() { - m.Index(bench_pattern) + _, s := m.Index(bench_pattern) + releaseSegments(s) } }) } diff --git a/match/btree.go b/match/btree.go index e669383..8827035 100644 --- a/match/btree.go +++ b/match/btree.go @@ -81,6 +81,7 @@ func (self BTree) Match(s string) bool { // search for matching part in substring index, segments := self.Value.Index(s[offset:limit]) if index == -1 { + releaseSegments(segments) return false } @@ -112,6 +113,7 @@ func (self BTree) Match(s string) bool { } if right { + releaseSegments(segments) return true } } @@ -119,6 +121,8 @@ func (self BTree) Match(s string) bool { _, step := utf8.DecodeRuneInString(s[offset+index:]) offset += index + step + + releaseSegments(segments) } return false diff --git a/match/contains.go b/match/contains.go index 4d6f888..be20492 100644 --- a/match/contains.go +++ b/match/contains.go @@ -33,7 +33,7 @@ func (self Contains) Index(s string) (int, []int) { s = s[:idx] } - segments := make([]int, 0, len(s)+1) + segments := acquireSegments(len(s) + 1) for i, _ := range s { segments = append(segments, offset+i) } diff --git a/match/contains_test.go b/match/contains_test.go index c2bdbaa..f840793 100644 --- a/match/contains_test.go +++ b/match/contains_test.go @@ -57,7 +57,8 @@ func BenchmarkIndexContains(b *testing.B) { m := Contains{string(bench_separators), true} for i := 0; i < b.N; i++ { - m.Index(bench_pattern) + _, s := m.Index(bench_pattern) + releaseSegments(s) } } @@ -66,7 +67,8 @@ func BenchmarkIndexContainsParallel(b *testing.B) { b.RunParallel(func(pb *testing.PB) { for pb.Next() { - m.Index(bench_pattern) + _, s := m.Index(bench_pattern) + releaseSegments(s) } }) } diff --git a/match/prefix.go b/match/prefix.go index 486d1bf..429ff88 100644 --- a/match/prefix.go +++ b/match/prefix.go @@ -24,7 +24,7 @@ func (self Prefix) Index(s string) (int, []int) { sub = "" } - segments := make([]int, 0, len(sub)+1) + segments := acquireSegments(len(sub) + 1) segments = append(segments, length) for i, r := range sub { segments = append(segments, length+i+utf8.RuneLen(r)) diff --git a/match/prefix_suffix.go b/match/prefix_suffix.go index 1e1795b..620b873 100644 --- a/match/prefix_suffix.go +++ b/match/prefix_suffix.go @@ -21,7 +21,7 @@ func (self PrefixSuffix) Index(s string) (int, []int) { return prefixIdx, []int{len(s) - prefixIdx} } - segments := make([]int, 0, len(s)-prefixIdx) + segments := acquireSegments(len(s) - prefixIdx) for sub := s[prefixIdx:]; ; { suffixIdx := strings.LastIndex(sub, self.Suffix) if suffixIdx == -1 { @@ -33,7 +33,7 @@ func (self PrefixSuffix) Index(s string) (int, []int) { } if len(segments) == 0 { - return -1, nil + return -1, segments } reverseSegments(segments) diff --git a/match/prefix_suffix_test.go b/match/prefix_suffix_test.go index c4e1c4d..aaf5dca 100644 --- a/match/prefix_suffix_test.go +++ b/match/prefix_suffix_test.go @@ -50,7 +50,8 @@ func BenchmarkIndexPrefixSuffix(b *testing.B) { m := PrefixSuffix{"qew", "sqw"} for i := 0; i < b.N; i++ { - m.Index(bench_pattern) + _, s := m.Index(bench_pattern) + releaseSegments(s) } } @@ -59,7 +60,8 @@ func BenchmarkIndexPrefixSuffixParallel(b *testing.B) { b.RunParallel(func(pb *testing.PB) { for pb.Next() { - m.Index(bench_pattern) + _, s := m.Index(bench_pattern) + releaseSegments(s) } }) } diff --git a/match/prefix_test.go b/match/prefix_test.go index a202d34..f6b2f04 100644 --- a/match/prefix_test.go +++ b/match/prefix_test.go @@ -40,7 +40,8 @@ func BenchmarkIndexPrefix(b *testing.B) { m := Prefix{"qew"} for i := 0; i < b.N; i++ { - m.Index(bench_pattern) + _, s := m.Index(bench_pattern) + releaseSegments(s) } } @@ -49,7 +50,8 @@ func BenchmarkIndexPrefixParallel(b *testing.B) { b.RunParallel(func(pb *testing.PB) { for pb.Next() { - m.Index(bench_pattern) + _, s := m.Index(bench_pattern) + releaseSegments(s) } }) } diff --git a/match/range_test.go b/match/range_test.go index f9a5151..1bde746 100644 --- a/match/range_test.go +++ b/match/range_test.go @@ -50,7 +50,8 @@ func BenchmarkIndexRange(b *testing.B) { m := Range{'0', '9', false} for i := 0; i < b.N; i++ { - m.Index(bench_pattern) + _, s := m.Index(bench_pattern) + releaseSegments(s) } } @@ -59,7 +60,8 @@ func BenchmarkIndexRangeParallel(b *testing.B) { b.RunParallel(func(pb *testing.PB) { for pb.Next() { - m.Index(bench_pattern) + _, s := m.Index(bench_pattern) + releaseSegments(s) } }) } diff --git a/match/row_test.go b/match/row_test.go index 7d7d7f4..0bd7bd8 100644 --- a/match/row_test.go +++ b/match/row_test.go @@ -61,7 +61,8 @@ func BenchmarkRowIndex(b *testing.B) { } for i := 0; i < b.N; i++ { - m.Index(bench_pattern) + _, s := m.Index(bench_pattern) + releaseSegments(s) } } @@ -77,7 +78,8 @@ func BenchmarkIndexRowParallel(b *testing.B) { b.RunParallel(func(pb *testing.PB) { for pb.Next() { - m.Index(bench_pattern) + _, s := m.Index(bench_pattern) + releaseSegments(s) } }) } diff --git a/match/segements_test.go b/match/segements_test.go index 9eaf49c..1ce1123 100644 --- a/match/segements_test.go +++ b/match/segements_test.go @@ -1,65 +1,83 @@ package match import ( + "sync" "testing" ) -func BenchmarkPerfPoolSequenced(b *testing.B) { - pool := NewPoolSequenced(512, func() []int { - return make([]int, 0, 16) - }) +func benchPool(i int, b *testing.B) { + pool := sync.Pool{New: func() interface{} { + return make([]int, 0, i) + }} - b.SetParallelism(32) b.RunParallel(func(pb *testing.PB) { for pb.Next() { - s := pool.Get() + s := pool.Get().([]int)[:0] pool.Put(s) } }) } -func BenchmarkPerfPoolSynced(b *testing.B) { - pool := NewPoolSynced(32) - - b.SetParallelism(32) +func benchMake(i int, b *testing.B) { b.RunParallel(func(pb *testing.PB) { for pb.Next() { - s := pool.Get() - pool.Put(s) + _ = make([]int, 0, i) } }) } -func BenchmarkPerfPoolNative(b *testing.B) { - pool := NewPoolNative(func() []int { - return make([]int, 0, 16) - }) - - b.SetParallelism(32) - b.RunParallel(func(pb *testing.PB) { - for pb.Next() { - s := pool.Get() - pool.Put(s) - } - }) +func BenchmarkSegmentsPool_1(b *testing.B) { + benchPool(1, b) +} +func BenchmarkSegmentsPool_2(b *testing.B) { + benchPool(2, b) +} +func BenchmarkSegmentsPool_4(b *testing.B) { + benchPool(4, b) +} +func BenchmarkSegmentsPool_8(b *testing.B) { + benchPool(8, b) +} +func BenchmarkSegmentsPool_16(b *testing.B) { + benchPool(16, b) +} +func BenchmarkSegmentsPool_32(b *testing.B) { + benchPool(32, b) +} +func BenchmarkSegmentsPool_64(b *testing.B) { + benchPool(64, b) +} +func BenchmarkSegmentsPool_128(b *testing.B) { + benchPool(128, b) +} +func BenchmarkSegmentsPool_256(b *testing.B) { + benchPool(256, b) } -func BenchmarkPerfPoolStatic(b *testing.B) { - pool := NewPoolStatic(32, func() []int { - return make([]int, 0, 16) - }) - - b.SetParallelism(32) - b.RunParallel(func(pb *testing.PB) { - for pb.Next() { - i, v := pool.Get() - pool.Put(i, v) - } - }) +func BenchmarkSegmentsMake_1(b *testing.B) { + benchMake(1, b) } - -func BenchmarkPerfMake(b *testing.B) { - for i := 0; i < b.N; i++ { - _ = make([]int, 0, 32) - } +func BenchmarkSegmentsMake_2(b *testing.B) { + benchMake(2, b) +} +func BenchmarkSegmentsMake_4(b *testing.B) { + benchMake(4, b) +} +func BenchmarkSegmentsMake_8(b *testing.B) { + benchMake(8, b) +} +func BenchmarkSegmentsMake_16(b *testing.B) { + benchMake(16, b) +} +func BenchmarkSegmentsMake_32(b *testing.B) { + benchMake(32, b) +} +func BenchmarkSegmentsMake_64(b *testing.B) { + benchMake(64, b) +} +func BenchmarkSegmentsMake_128(b *testing.B) { + benchMake(128, b) +} +func BenchmarkSegmentsMake_256(b *testing.B) { + benchMake(256, b) } diff --git a/match/segments.go b/match/segments.go index 15aadaa..0077e4e 100644 --- a/match/segments.go +++ b/match/segments.go @@ -2,10 +2,9 @@ package match import ( "sync" - "sync/atomic" ) -var segmentsPools [1024]*PoolNative +var segmentsPools [1024]*sync.Pool func toPowerOfTwo(v int) int { v-- @@ -20,174 +19,52 @@ func toPowerOfTwo(v int) int { } const ( - minSegment = 4 - minSegmentMinusOne = 3 - maxSegment = 1024 - maxSegmentMinusOne = 1023 + cacheFrom = 16 + cacheToAndHigher = 1024 + cacheFromIndex = 15 + cacheToAndHigherIndex = 1023 ) func init() { - for i := maxSegment; i >= minSegment; i >>= 1 { + for i := cacheToAndHigher; i >= cacheFrom; i >>= 1 { func(i int) { - segmentsPools[i-1] = NewPoolNative(func() []int { + segmentsPools[i-1] = &sync.Pool{New: func() interface{} { return make([]int, 0, i) - }) + }} }(i) } } -func getIdx(c int) int { +func getTableIndex(c int) int { p := toPowerOfTwo(c) switch { - case p >= maxSegment: - return maxSegmentMinusOne - case p <= minSegment: - return minSegmentMinusOne + case p >= cacheToAndHigher: + return cacheToAndHigherIndex + case p <= cacheFrom: + return cacheFromIndex default: return p - 1 } } func acquireSegments(c int) []int { - return segmentsPools[getIdx(c)].Get() + // make []int with less capacity than cacheFrom + // is faster than acquiring it from pool + if c < cacheFrom { + return make([]int, 0, c) + } + + return segmentsPools[getTableIndex(c)].Get().([]int)[:0] } func releaseSegments(s []int) { - segmentsPools[getIdx(cap(s))].Put(s) -} + c := cap(s) -type newSegmentsFunc func() []int - -// Pool holds Clients. -type PoolSequenced struct { - new newSegmentsFunc - pool chan []int -} - -// NewPool creates a new pool of Clients. -func NewPoolSequenced(size int, f newSegmentsFunc) *PoolSequenced { - return &PoolSequenced{ - new: f, - pool: make(chan []int, size), - } -} - -// Borrow a Client from the pool. -func (p *PoolSequenced) Get() []int { - var s []int - select { - case s = <-p.pool: - default: - s = p.new() - } - - return s[:0] -} - -// Return returns a Client to the pool. -func (p *PoolSequenced) Put(s []int) { - select { - case p.pool <- s: - default: - // let it go, let it go... - } -} - -type PoolSynced struct { - size int - mu sync.Mutex - list [][]int -} - -func NewPoolSynced(size int) *PoolSynced { - return &PoolSynced{ - size: size, - } -} - -func (p *PoolSynced) Get() []int { - var s []int - - p.mu.Lock() - ll := len(p.list) - if ll > 0 { - s, p.list = p.list[ll-1], p.list[:ll-1] - } - p.mu.Unlock() - - if s == nil { - return make([]int, 0, p.size) - } - - return s[:0] -} - -func (p *PoolSynced) Put(s []int) { - p.mu.Lock() - defer p.mu.Unlock() - p.list = append(p.list, s) -} - -type PoolNative struct { - pool *sync.Pool -} - -func NewPoolNative(f newSegmentsFunc) *PoolNative { - return &PoolNative{ - pool: &sync.Pool{New: func() interface{} { - return f() - }}, - } -} - -func (p *PoolNative) Get() []int { - return p.pool.Get().([]int)[:0] -} - -func (p *PoolNative) Put(s []int) { - p.pool.Put(s) -} - -type segments struct { - data []int - locked int32 -} - -type PoolStatic struct { - f newSegmentsFunc - pool []*segments -} - -func NewPoolStatic(size int, f newSegmentsFunc) *PoolStatic { - p := &PoolStatic{ - f: f, - pool: make([]*segments, 0, size), - } - - for i := 0; i < size; i++ { - p.pool = append(p.pool, &segments{ - data: f(), - }) - } - - return p -} - -func (p *PoolStatic) Get() (int, []int) { - for i, s := range p.pool { - if atomic.CompareAndSwapInt32(&s.locked, 0, 1) { - return i, s.data - } - } - - return -1, p.f() -} - -func (p *PoolStatic) Put(i int, s []int) { - if i < 0 { + // make []int with less capacity than cacheFrom + // is faster than acquiring it from pool + if c < cacheFrom { return } - p.pool[i].data = s - atomic.CompareAndSwapInt32(&(p.pool[i].locked), 1, 0) + segmentsPools[getTableIndex(cap(s))].Put(s) } diff --git a/match/single_test.go b/match/single_test.go index 5f38623..b08f236 100644 --- a/match/single_test.go +++ b/match/single_test.go @@ -40,7 +40,8 @@ func BenchmarkIndexSingle(b *testing.B) { m := Single{bench_separators} for i := 0; i < b.N; i++ { - m.Index(bench_pattern) + _, s := m.Index(bench_pattern) + releaseSegments(s) } } @@ -49,7 +50,8 @@ func BenchmarkIndexSingleParallel(b *testing.B) { b.RunParallel(func(pb *testing.PB) { for pb.Next() { - m.Index(bench_pattern) + _, s := m.Index(bench_pattern) + releaseSegments(s) } }) } diff --git a/match/suffix_test.go b/match/suffix_test.go index a395c87..3e3c250 100644 --- a/match/suffix_test.go +++ b/match/suffix_test.go @@ -40,7 +40,8 @@ func BenchmarkIndexSuffix(b *testing.B) { m := Suffix{"qwe"} for i := 0; i < b.N; i++ { - m.Index(bench_pattern) + _, s := m.Index(bench_pattern) + releaseSegments(s) } } @@ -49,7 +50,8 @@ func BenchmarkIndexSuffixParallel(b *testing.B) { b.RunParallel(func(pb *testing.PB) { for pb.Next() { - m.Index(bench_pattern) + _, s := m.Index(bench_pattern) + releaseSegments(s) } }) } diff --git a/match/super_test.go b/match/super_test.go index 1126e83..e7c2624 100644 --- a/match/super_test.go +++ b/match/super_test.go @@ -37,7 +37,8 @@ func BenchmarkIndexSuper(b *testing.B) { m := Super{} for i := 0; i < b.N; i++ { - m.Index(bench_pattern) + _, s := m.Index(bench_pattern) + releaseSegments(s) } } @@ -46,7 +47,8 @@ func BenchmarkIndexSuperParallel(b *testing.B) { b.RunParallel(func(pb *testing.PB) { for pb.Next() { - m.Index(bench_pattern) + _, s := m.Index(bench_pattern) + releaseSegments(s) } }) } diff --git a/match/text_test.go b/match/text_test.go index 81e31a0..a3de40e 100644 --- a/match/text_test.go +++ b/match/text_test.go @@ -40,7 +40,8 @@ func BenchmarkIndexText(b *testing.B) { m := NewText("foo") for i := 0; i < b.N; i++ { - m.Index(bench_pattern) + _, s := m.Index(bench_pattern) + releaseSegments(s) } } @@ -49,7 +50,8 @@ func BenchmarkIndexTextParallel(b *testing.B) { b.RunParallel(func(pb *testing.PB) { for pb.Next() { - m.Index(bench_pattern) + _, s := m.Index(bench_pattern) + releaseSegments(s) } }) } From 92be27c14ddf5c5823557657a071c14a0ba8da4a Mon Sep 17 00:00:00 2001 From: gobwas Date: Tue, 23 Feb 2016 14:46:20 +0300 Subject: [PATCH 12/26] optimizations with cached segments --- match/any.go | 3 +-- match/any_of_test.go | 4 ++-- match/every_of.go | 5 +++-- match/list.go | 9 ++++++++- match/list_test.go | 6 +++--- match/match.go | 2 ++ match/match_test.go | 26 ++++++++++++++++++++++++++ match/nothing.go | 2 +- match/range.go | 5 +---- match/row.go | 11 ++++++++++- match/segments.go | 27 +++++++++++++++++++++++++++ match/single.go | 2 +- match/super.go | 1 + match/text.go | 4 +++- runes/runes.go | 26 ++++++++++++++++++++++++++ 15 files changed, 115 insertions(+), 18 deletions(-) diff --git a/match/any.go b/match/any.go index abf25d8..02492e8 100644 --- a/match/any.go +++ b/match/any.go @@ -18,7 +18,7 @@ func (self Any) Index(s string) (int, []int) { switch found { case -1: case 0: - return 0, []int{0} + return 0, segments0 default: s = s[:found] } @@ -27,7 +27,6 @@ func (self Any) Index(s string) (int, []int) { for i := range s { segments = append(segments, i) } - segments = append(segments, len(s)) return 0, segments diff --git a/match/any_of_test.go b/match/any_of_test.go index 41bb6ee..44782e9 100644 --- a/match/any_of_test.go +++ b/match/any_of_test.go @@ -33,8 +33,8 @@ func TestAnyOfIndex(t *testing.T) { }, { Matchers{ - List{[]rune("[def]"), false}, - List{[]rune("[abc]"), false}, + NewList([]rune("[def]"), false), + NewList([]rune("[abc]"), false), }, "abcdef", 0, diff --git a/match/every_of.go b/match/every_of.go index 3a77b43..9220f78 100644 --- a/match/every_of.go +++ b/match/every_of.go @@ -31,8 +31,9 @@ func (self EveryOf) Index(s string) (int, []int) { // make `in` with cap as len(s), // cause it is the maximum size of output segments values - next := make([]int, 0, len(s)) - current := make([]int, 0, len(s)) + //todo opti!!! + next := acquireSegments(len(s)) + current := acquireSegments(len(s)) sub := s for i, m := range self.Matchers { diff --git a/match/list.go b/match/list.go index 6ab49ba..00f2d98 100644 --- a/match/list.go +++ b/match/list.go @@ -11,6 +11,13 @@ type List struct { Not bool } +func NewList(list []rune, not bool) List { + return List{ + List: list, + Not: not, + } +} + func (self List) Match(s string) bool { r, w := utf8.DecodeRuneInString(s) if len(s) > w { @@ -28,7 +35,7 @@ func (self List) Len() int { func (self List) Index(s string) (int, []int) { for i, r := range s { if self.Not == (runes.IndexRune(self.List, r) == -1) { - return i, []int{utf8.RuneLen(r)} + return i, segmentsByRuneLength[utf8.RuneLen(r)] } } diff --git a/match/list_test.go b/match/list_test.go index 473aa7e..10a5437 100644 --- a/match/list_test.go +++ b/match/list_test.go @@ -28,7 +28,7 @@ func TestListIndex(t *testing.T) { []int{1}, }, } { - p := List{test.list, test.not} + p := NewList(test.list, test.not) index, segments := p.Index(test.fixture) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) @@ -40,7 +40,7 @@ func TestListIndex(t *testing.T) { } func BenchmarkIndexList(b *testing.B) { - m := List{[]rune("def"), false} + m := NewList([]rune("def"), false) for i := 0; i < b.N; i++ { m.Index(bench_pattern) @@ -48,7 +48,7 @@ func BenchmarkIndexList(b *testing.B) { } func BenchmarkIndexListParallel(b *testing.B) { - m := List{[]rune("def"), false} + m := NewList([]rune("def"), false) b.RunParallel(func(pb *testing.PB) { for pb.Next() { diff --git a/match/match.go b/match/match.go index 122753e..f80e007 100644 --- a/match/match.go +++ b/match/match.go @@ -1,5 +1,7 @@ package match +// todo common table of rune's length + import ( "fmt" "strings" diff --git a/match/match_test.go b/match/match_test.go index d60fc7d..3b60a7c 100644 --- a/match/match_test.go +++ b/match/match_test.go @@ -3,6 +3,7 @@ package match import ( "reflect" "testing" + "unicode/utf8" ) var bench_separators = []rune{'.'} @@ -62,3 +63,28 @@ func BenchmarkReverse(b *testing.B) { reverseSegments([]int{1, 2, 3, 4}) } } + +func getTable() []int { + table := make([]int, utf8.MaxRune+1) + for i := 0; i <= utf8.MaxRune; i++ { + table[i] = utf8.RuneLen(rune(i)) + } + + return table +} + +var table = getTable() + +const runeToLen = 'q' + +func BenchmarkRuneLenFromTable(b *testing.B) { + for i := 0; i < b.N; i++ { + _ = table[runeToLen] + } +} + +func BenchmarkRuneLenFromUTF8(b *testing.B) { + for i := 0; i < b.N; i++ { + _ = utf8.RuneLen(runeToLen) + } +} diff --git a/match/nothing.go b/match/nothing.go index ef5049b..664ade9 100644 --- a/match/nothing.go +++ b/match/nothing.go @@ -11,7 +11,7 @@ func (self Nothing) Match(s string) bool { } func (self Nothing) Index(s string) (int, []int) { - return 0, []int{0} + return 0, segments0 } func (self Nothing) Len() int { diff --git a/match/range.go b/match/range.go index 5b4ee03..8a90569 100644 --- a/match/range.go +++ b/match/range.go @@ -10,9 +10,6 @@ type Range struct { Not bool } -// todo make factory -// todo make range table inside factory - func (self Range) Len() int { return lenOne } @@ -31,7 +28,7 @@ func (self Range) Match(s string) bool { func (self Range) Index(s string) (int, []int) { for i, r := range s { if self.Not != (r >= self.Lo && r <= self.Hi) { - return i, []int{utf8.RuneLen(r)} + return i, segmentsByRuneLength[utf8.RuneLen(r)] } } diff --git a/match/row.go b/match/row.go index a079aa4..a836a5c 100644 --- a/match/row.go +++ b/match/row.go @@ -7,6 +7,15 @@ import ( type Row struct { Matchers Matchers RunesLength int + Segments []int +} + +func NewRow(m Matchers, len int) Row { + return Row{ + Matchers: m, + RunesLength: len, + Segments: []int{len}, + } } func (self Row) matchAll(s string) bool { @@ -66,7 +75,7 @@ func (self Row) Index(s string) (int, []int) { } if self.matchAll(s[i:]) { - return i, []int{self.RunesLength} + return i, self.Segments } } diff --git a/match/segments.go b/match/segments.go index 0077e4e..63c7b2e 100644 --- a/match/segments.go +++ b/match/segments.go @@ -25,6 +25,23 @@ const ( cacheToAndHigherIndex = 1023 ) +var asciiTable [128]byte + +var segmentsByRuneLength [5][]int + +var ( + segments0 = []int{0} + segments1 = []int{1} + segments2 = []int{2} + segments3 = []int{3} + segments4 = []int{4} +) + +const ( + asciiLo = 0 + asciiHi = 127 +) + func init() { for i := cacheToAndHigher; i >= cacheFrom; i >>= 1 { func(i int) { @@ -33,6 +50,16 @@ func init() { }} }(i) } + + segmentsByRuneLength[0] = segments0 + segmentsByRuneLength[1] = segments1 + segmentsByRuneLength[2] = segments2 + segmentsByRuneLength[3] = segments3 + segmentsByRuneLength[4] = segments4 + + for i := 0; i <= 127; i++ { + asciiTable[i] = 1 + } } func getTableIndex(c int) int { diff --git a/match/single.go b/match/single.go index c0d3171..6911edf 100644 --- a/match/single.go +++ b/match/single.go @@ -27,7 +27,7 @@ func (self Single) Len() int { func (self Single) Index(s string) (int, []int) { for i, r := range s { if runes.IndexRune(self.Separators, r) == -1 { - return i, []int{utf8.RuneLen(r)} + return i, segmentsByRuneLength[utf8.RuneLen(r)] } } diff --git a/match/super.go b/match/super.go index fe70f3c..e7f1aec 100644 --- a/match/super.go +++ b/match/super.go @@ -15,6 +15,7 @@ func (self Super) Len() int { } func (self Super) Index(s string) (int, []int) { + //todo acquire here segments := make([]int, 0, len(s)+1) for i := range s { segments = append(segments, i) diff --git a/match/text.go b/match/text.go index f7f926b..8b93992 100644 --- a/match/text.go +++ b/match/text.go @@ -11,6 +11,7 @@ type Text struct { Str string RunesLength int BytesLength int + Segments []int } func NewText(s string) Text { @@ -18,6 +19,7 @@ func NewText(s string) Text { Str: s, RunesLength: utf8.RuneCountInString(s), BytesLength: len(s), + Segments: []int{len(s)}, } } @@ -35,7 +37,7 @@ func (self Text) Index(s string) (int, []int) { return -1, nil } - return index, []int{self.BytesLength} + return index, self.Segments } func (self Text) String() string { diff --git a/runes/runes.go b/runes/runes.go index 8586b16..a723556 100644 --- a/runes/runes.go +++ b/runes/runes.go @@ -84,6 +84,32 @@ func Contains(s, needle []rune) bool { return Index(s, needle) >= 0 } +func Max(s []rune) (max rune) { + for _, r := range s { + if r > max { + max = r + } + } + + return +} + +func Min(s []rune) rune { + min := rune(-1) + for _, r := range s { + if min == -1 { + min = r + continue + } + + if r < min { + min = r + } + } + + return min +} + func IndexRune(s []rune, r rune) int { for i, c := range s { if c == r { From 8de721a7a4c308eee8c84be05b40735cc55762bc Mon Sep 17 00:00:00 2001 From: gobwas Date: Tue, 23 Feb 2016 14:46:32 +0300 Subject: [PATCH 13/26] opti --- compiler.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/compiler.go b/compiler.go index 57b2d4c..fa9ab91 100644 --- a/compiler.go +++ b/compiler.go @@ -1,5 +1,7 @@ package glob +// TODO use constructor with all matchers, and to their structs private + import ( "fmt" "github.com/gobwas/glob/match" @@ -122,7 +124,7 @@ func glueAsRow(matchers []match.Matcher) match.Matcher { } } - return match.Row{c, l} + return match.NewRow(c, l) } func glueAsEvery(matchers []match.Matcher) match.Matcher { From 3dc92f7b54b9108559e334c2fbd09cbe1eefdfe9 Mon Sep 17 00:00:00 2001 From: gobwas Date: Wed, 24 Feb 2016 00:49:41 +0300 Subject: [PATCH 14/26] benchmarking script --- bench.sh | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100755 bench.sh diff --git a/bench.sh b/bench.sh new file mode 100755 index 0000000..5264052 --- /dev/null +++ b/bench.sh @@ -0,0 +1,25 @@ +#! /bin/bash + +bench() { + filename="/tmp/$1-$2.bench" + if test -e "${filename}"; + then + echo "Already exists ${filename}" + else + backup=`git rev-parse --abbrev-ref HEAD` + git checkout $1 &>/dev/null + echo -n "Creating ${filename}... " + go test ./... -run=NONE -bench=$2 > "${filename}" -benchmem + echo "OK" + git checkout ${backup} &>/dev/null + fi +} + + +to=$1 +current=`git rev-parse --abbrev-ref HEAD` + +bench ${to} $2 +bench ${current} $2 + +benchcmp $3 "/tmp/${to}-$2.bench" "/tmp/${current}-$2.bench" From f884dfeb2e127a9c3e334c23c866078349925686 Mon Sep 17 00:00:00 2001 From: gobwas Date: Wed, 24 Feb 2016 11:19:54 +0300 Subject: [PATCH 15/26] t --- match/any.go | 2 +- match/segments.go | 22 ++++++++-------------- 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/match/any.go b/match/any.go index 02492e8..e7a2953 100644 --- a/match/any.go +++ b/match/any.go @@ -37,5 +37,5 @@ func (self Any) Len() int { } func (self Any) String() string { - return fmt.Sprintf("", self.Separators) + return fmt.Sprintf("", string(self.Separators)) } diff --git a/match/segments.go b/match/segments.go index 63c7b2e..7a349cf 100644 --- a/match/segments.go +++ b/match/segments.go @@ -25,10 +25,6 @@ const ( cacheToAndHigherIndex = 1023 ) -var asciiTable [128]byte - -var segmentsByRuneLength [5][]int - var ( segments0 = []int{0} segments1 = []int{1} @@ -37,6 +33,14 @@ var ( segments4 = []int{4} ) +var segmentsByRuneLength [5][]int = [5][]int{ + 0: segments0, + 1: segments1, + 2: segments2, + 3: segments3, + 4: segments4, +} + const ( asciiLo = 0 asciiHi = 127 @@ -50,16 +54,6 @@ func init() { }} }(i) } - - segmentsByRuneLength[0] = segments0 - segmentsByRuneLength[1] = segments1 - segmentsByRuneLength[2] = segments2 - segmentsByRuneLength[3] = segments3 - segmentsByRuneLength[4] = segments4 - - for i := 0; i <= 127; i++ { - asciiTable[i] = 1 - } } func getTableIndex(c int) int { From 34494ea1161135dfe7c7ce17402347faed571599 Mon Sep 17 00:00:00 2001 From: gobwas Date: Wed, 24 Feb 2016 12:36:15 +0300 Subject: [PATCH 16/26] Use of constuctors every where, optimizations --- compiler.go | 60 ++++---- compiler_test.go | 278 ++++++++++++++++++------------------ match/any.go | 4 + match/any_of.go | 5 +- match/any_of_test.go | 8 +- match/any_test.go | 6 +- match/btree.go | 15 +- match/btree_test.go | 10 +- match/contains.go | 4 + match/contains_test.go | 6 +- match/every_of.go | 11 +- match/every_of_test.go | 10 +- match/list.go | 7 +- match/max.go | 7 +- match/max_test.go | 12 +- match/min.go | 11 +- match/min_test.go | 12 +- match/nothing.go | 4 + match/nothing_test.go | 12 +- match/prefix.go | 4 + match/prefix_suffix.go | 12 +- match/prefix_suffix_test.go | 6 +- match/prefix_test.go | 6 +- match/range.go | 4 + match/range_test.go | 6 +- match/row.go | 6 +- match/row_test.go | 33 ++--- match/single.go | 6 +- match/single_test.go | 6 +- match/suffix.go | 18 ++- match/suffix_test.go | 6 +- match/super.go | 7 +- match/super_test.go | 6 +- 33 files changed, 341 insertions(+), 267 deletions(-) diff --git a/compiler.go b/compiler.go index fa9ab91..c0d087b 100644 --- a/compiler.go +++ b/compiler.go @@ -14,7 +14,7 @@ func optimize(matcher match.Matcher) match.Matcher { case match.Any: if len(m.Separators) == 0 { - return match.Super{} + return match.NewSuper() } case match.AnyOf: @@ -54,23 +54,23 @@ func optimize(matcher match.Matcher) match.Matcher { rs, rightSuffix := m.Right.(match.Suffix) if leftSuper && rightSuper { - return match.Contains{r.Str, false} + return match.NewContains(r.Str, false) } if leftSuper && rightNil { - return match.Suffix{r.Str} + return match.NewSuffix(r.Str) } if rightSuper && leftNil { - return match.Prefix{r.Str} + return match.NewPrefix(r.Str) } if leftNil && rightSuffix { - return match.PrefixSuffix{Prefix: r.Str, Suffix: rs.Suffix} + return match.NewPrefixSuffix(r.Str, rs.Suffix) } if rightNil && leftPrefix { - return match.PrefixSuffix{Prefix: lp.Prefix, Suffix: r.Str} + return match.NewPrefixSuffix(lp.Prefix, r.Str) } return m @@ -124,7 +124,7 @@ func glueAsRow(matchers []match.Matcher) match.Matcher { } } - return match.NewRow(c, l) + return match.NewRow(l, c...) } func glueAsEvery(matchers []match.Matcher) match.Matcher { @@ -182,29 +182,29 @@ func glueAsEvery(matchers []match.Matcher) match.Matcher { } if hasSuper && !hasAny && !hasSingle { - return match.Super{} + return match.NewSuper() } if hasAny && !hasSuper && !hasSingle { - return match.Any{separator} + return match.NewAny(separator) } if (hasAny || hasSuper) && min > 0 && len(separator) == 0 { - return match.Min{min} + return match.NewMin(min) } - every := match.EveryOf{} + every := match.NewEveryOf() if min > 0 { - every.Add(match.Min{min}) + every.Add(match.NewMin(min)) if !hasAny && !hasSuper { - every.Add(match.Max{min}) + every.Add(match.NewMax(min)) } } if len(separator) > 0 { - every.Add(match.Contains{string(separator), true}) + every.Add(match.NewContains(string(separator), true)) } return every @@ -474,7 +474,7 @@ func doAnyOf(n *nodeAnyOf, s []rune) (match.Matcher, error) { var matchers []match.Matcher for _, desc := range n.children() { if desc == nil { - matchers = append(matchers, match.Nothing{}) + matchers = append(matchers, match.NewNothing()) continue } @@ -485,7 +485,7 @@ func doAnyOf(n *nodeAnyOf, s []rune) (match.Matcher, error) { matchers = append(matchers, optimize(m)) } - return match.AnyOf{matchers}, nil + return match.NewAnyOf(matchers...), nil } func do(leaf node, s []rune) (m match.Matcher, err error) { @@ -500,7 +500,7 @@ func do(leaf node, s []rune) (m match.Matcher, err error) { var matchers []match.Matcher for _, desc := range n.children() { if desc == nil { - matchers = append(matchers, match.Nothing{}) + matchers = append(matchers, match.NewNothing()) continue } @@ -511,12 +511,12 @@ func do(leaf node, s []rune) (m match.Matcher, err error) { matchers = append(matchers, optimize(m)) } - return match.AnyOf{matchers}, nil + return match.NewAnyOf(matchers...), nil case *nodePattern: nodes := leaf.children() if len(nodes) == 0 { - return match.Nothing{}, nil + return match.NewNothing(), nil } var matchers []match.Matcher @@ -534,19 +534,19 @@ func do(leaf node, s []rune) (m match.Matcher, err error) { } case *nodeList: - m = match.List{[]rune(n.chars), n.not} + m = match.NewList([]rune(n.chars), n.not) case *nodeRange: - m = match.Range{n.lo, n.hi, n.not} + m = match.NewRange(n.lo, n.hi, n.not) case *nodeAny: - m = match.Any{s} + m = match.NewAny(s) case *nodeSuper: - m = match.Super{} + m = match.NewSuper() case *nodeSingle: - m = match.Single{s} + m = match.NewSingle(s) case *nodeText: m = match.NewText(n.text) @@ -633,19 +633,19 @@ func do2(node node, s []rune) ([]match.Matcher, error) { } case *nodeList: - result = append(result, match.List{[]rune(n.chars), n.not}) + result = append(result, match.NewList([]rune(n.chars), n.not)) case *nodeRange: - result = append(result, match.Range{n.lo, n.hi, n.not}) + result = append(result, match.NewRange(n.lo, n.hi, n.not)) case *nodeAny: - result = append(result, match.Any{s}) + result = append(result, match.NewAny(s)) case *nodeSuper: - result = append(result, match.Super{}) + result = append(result, match.NewSuper()) case *nodeSingle: - result = append(result, match.Single{s}) + result = append(result, match.NewSingle(s)) case *nodeText: result = append(result, match.NewText(n.text)) @@ -669,7 +669,7 @@ func compile(ast *nodePattern, s []rune) (Glob, error) { // if len(ms) == 1 { // return ms[0], nil // } else { - // return match.AnyOf{ms}, nil + // return match.NewAnyOf(ms), nil // } g, err := do(ast, s) diff --git a/compiler_test.go b/compiler_test.go index 0be7b76..f2d0c70 100644 --- a/compiler_test.go +++ b/compiler_test.go @@ -15,40 +15,40 @@ func TestGlueMatchers(t *testing.T) { }{ { []match.Matcher{ - match.Super{}, - match.Single{}, + match.NewSuper(), + match.NewSingle(nil), }, - match.Min{1}, + match.NewMin(1), }, { []match.Matcher{ - match.Any{separators}, - match.Single{separators}, + match.NewAny(separators), + match.NewSingle(separators), }, match.EveryOf{match.Matchers{ - match.Min{1}, - match.Contains{string(separators), true}, + match.NewMin(1), + match.NewContains(string(separators), true), }}, }, { []match.Matcher{ - match.Single{}, - match.Single{}, - match.Single{}, + match.NewSingle(nil), + match.NewSingle(nil), + match.NewSingle(nil), }, match.EveryOf{match.Matchers{ - match.Min{3}, - match.Max{3}, + match.NewMin(3), + match.NewMax(3), }}, }, { []match.Matcher{ - match.List{[]rune{'a'}, true}, - match.Any{[]rune{'a'}}, + match.NewList([]rune{'a'}, true), + match.NewAny([]rune{'a'}), }, match.EveryOf{match.Matchers{ - match.Min{1}, - match.Contains{"a", true}, + match.NewMin(1), + match.NewContains("a", true), }}, }, } { @@ -59,7 +59,7 @@ func TestGlueMatchers(t *testing.T) { } if !reflect.DeepEqual(act, test.exp) { - t.Errorf("#%d unexpected convert matchers result:\nact: %s;\nexp: %s", id, act, test.exp) + t.Errorf("#%d unexpected convert matchers result:\nact: %#v;\nexp: %#v", id, act, test.exp) continue } } @@ -72,15 +72,15 @@ func TestCompileMatchers(t *testing.T) { }{ { []match.Matcher{ - match.Super{}, - match.Single{separators}, + match.NewSuper(), + match.NewSingle(separators), match.NewText("c"), }, match.NewBTree( match.NewText("c"), match.NewBTree( - match.Single{separators}, - match.Super{}, + match.NewSingle(separators), + match.NewSuper(), nil, ), nil, @@ -88,32 +88,32 @@ func TestCompileMatchers(t *testing.T) { }, { []match.Matcher{ - match.Any{}, + match.NewAny(nil), match.NewText("c"), - match.Any{}, + match.NewAny(nil), }, match.NewBTree( match.NewText("c"), - match.Any{}, - match.Any{}, + match.NewAny(nil), + match.NewAny(nil), ), }, { []match.Matcher{ - match.Range{'a', 'c', true}, - match.List{[]rune{'z', 't', 'e'}, false}, + match.NewRange('a', 'c', true), + match.NewList([]rune{'z', 't', 'e'}, false), match.NewText("c"), - match.Single{}, + match.NewSingle(nil), }, - match.Row{ - Matchers: match.Matchers{ - match.Range{'a', 'c', true}, - match.List{[]rune{'z', 't', 'e'}, false}, + match.NewRow( + 4, + match.Matchers{ + match.NewRange('a', 'c', true), + match.NewList([]rune{'z', 't', 'e'}, false), match.NewText("c"), - match.Single{}, - }, - RunesLength: 4, - }, + match.NewSingle(nil), + }..., + ), }, } { act, err := compileMatchers(test.in) @@ -123,7 +123,7 @@ func TestCompileMatchers(t *testing.T) { } if !reflect.DeepEqual(act, test.exp) { - t.Errorf("#%d unexpected convert matchers result:\nact: %s;\nexp: %s", id, act, test.exp) + t.Errorf("#%d unexpected convert matchers result:\nact: %#v\nexp: %#v", id, act, test.exp) continue } } @@ -135,52 +135,52 @@ func TestConvertMatchers(t *testing.T) { }{ { []match.Matcher{ - match.Range{'a', 'c', true}, - match.List{[]rune{'z', 't', 'e'}, false}, + match.NewRange('a', 'c', true), + match.NewList([]rune{'z', 't', 'e'}, false), match.NewText("c"), - match.Single{}, - match.Any{}, + match.NewSingle(nil), + match.NewAny(nil), }, []match.Matcher{ - match.Row{ - Matchers: match.Matchers{ - match.Range{'a', 'c', true}, - match.List{[]rune{'z', 't', 'e'}, false}, + match.NewRow( + 4, + []match.Matcher{ + match.NewRange('a', 'c', true), + match.NewList([]rune{'z', 't', 'e'}, false), match.NewText("c"), - match.Single{}, - }, - RunesLength: 4, - }, - match.Any{}, + match.NewSingle(nil), + }..., + ), + match.NewAny(nil), }, }, { []match.Matcher{ - match.Range{'a', 'c', true}, - match.List{[]rune{'z', 't', 'e'}, false}, + match.NewRange('a', 'c', true), + match.NewList([]rune{'z', 't', 'e'}, false), match.NewText("c"), - match.Single{}, - match.Any{}, - match.Single{}, - match.Single{}, - match.Any{}, + match.NewSingle(nil), + match.NewAny(nil), + match.NewSingle(nil), + match.NewSingle(nil), + match.NewAny(nil), }, []match.Matcher{ - match.Row{ - Matchers: match.Matchers{ - match.Range{'a', 'c', true}, - match.List{[]rune{'z', 't', 'e'}, false}, + match.NewRow( + 3, + match.Matchers{ + match.NewRange('a', 'c', true), + match.NewList([]rune{'z', 't', 'e'}, false), match.NewText("c"), - }, - RunesLength: 3, - }, - match.Min{3}, + }..., + ), + match.NewMin(3), }, }, } { act := minimizeMatchers(test.in) if !reflect.DeepEqual(act, test.exp) { - t.Errorf("#%d unexpected convert matchers 2 result:\nact: %s;\nexp: %s", id, act, test.exp) + t.Errorf("#%d unexpected convert matchers 2 result:\nact: %#v\nexp: %#v", id, act, test.exp) continue } } @@ -213,20 +213,20 @@ func TestCompiler(t *testing.T) { { ast: pattern(&nodeAny{}), sep: separators, - result: match.Any{separators}, + result: match.NewAny(separators), }, { ast: pattern(&nodeAny{}), - result: match.Super{}, + result: match.NewSuper(), }, { ast: pattern(&nodeSuper{}), - result: match.Super{}, + result: match.NewSuper(), }, { ast: pattern(&nodeSingle{}), sep: separators, - result: match.Single{separators}, + result: match.NewSingle(separators), }, { ast: pattern(&nodeRange{ @@ -234,39 +234,39 @@ func TestCompiler(t *testing.T) { hi: 'z', not: true, }), - result: match.Range{'a', 'z', true}, + result: match.NewRange('a', 'z', true), }, { ast: pattern(&nodeList{ chars: "abc", not: true, }), - result: match.List{[]rune{'a', 'b', 'c'}, true}, + result: match.NewList([]rune{'a', 'b', 'c'}, true), }, { ast: pattern(&nodeAny{}, &nodeSingle{}, &nodeSingle{}, &nodeSingle{}), sep: separators, result: match.EveryOf{Matchers: match.Matchers{ - match.Min{3}, - match.Contains{string(separators), true}, + match.NewMin(3), + match.NewContains(string(separators), true), }}, }, { ast: pattern(&nodeAny{}, &nodeSingle{}, &nodeSingle{}, &nodeSingle{}), - result: match.Min{3}, + result: match.NewMin(3), }, { ast: pattern(&nodeAny{}, &nodeText{text: "abc"}, &nodeSingle{}), sep: separators, result: match.NewBTree( - match.Row{ - Matchers: match.Matchers{ + match.NewRow( + 4, + match.Matchers{ match.NewText("abc"), - match.Single{separators}, - }, - RunesLength: 4, - }, - match.Any{separators}, + match.NewSingle(separators), + }..., + ), + match.NewAny(separators), nil, ), }, @@ -274,49 +274,49 @@ func TestCompiler(t *testing.T) { ast: pattern(&nodeSuper{}, &nodeSingle{}, &nodeText{text: "abc"}, &nodeSingle{}), sep: separators, result: match.NewBTree( - match.Row{ - Matchers: match.Matchers{ - match.Single{separators}, + match.NewRow( + 5, + match.Matchers{ + match.NewSingle(separators), match.NewText("abc"), - match.Single{separators}, - }, - RunesLength: 5, - }, - match.Super{}, + match.NewSingle(separators), + }..., + ), + match.NewSuper(), nil, ), }, { ast: pattern(&nodeAny{}, &nodeText{text: "abc"}), - result: match.Suffix{"abc"}, + result: match.NewSuffix("abc"), }, { ast: pattern(&nodeText{text: "abc"}, &nodeAny{}), - result: match.Prefix{"abc"}, + result: match.NewPrefix("abc"), }, { ast: pattern(&nodeText{text: "abc"}, &nodeAny{}, &nodeText{text: "def"}), - result: match.PrefixSuffix{"abc", "def"}, + result: match.NewPrefixSuffix("abc", "def"), }, { ast: pattern(&nodeAny{}, &nodeAny{}, &nodeAny{}, &nodeText{text: "abc"}, &nodeAny{}, &nodeAny{}), - result: match.Contains{"abc", false}, + result: match.NewContains("abc", false), }, { ast: pattern(&nodeAny{}, &nodeAny{}, &nodeAny{}, &nodeText{text: "abc"}, &nodeAny{}, &nodeAny{}), sep: separators, result: match.NewBTree( match.NewText("abc"), - match.Any{separators}, - match.Any{separators}, + match.NewAny(separators), + match.NewAny(separators), ), }, { ast: pattern(&nodeSuper{}, &nodeSingle{}, &nodeText{text: "abc"}, &nodeSuper{}, &nodeSingle{}), result: match.NewBTree( match.NewText("abc"), - match.Min{1}, - match.Min{1}, + match.NewMin(1), + match.NewMin(1), ), }, { @@ -348,9 +348,9 @@ func TestCompiler(t *testing.T) { match.NewText("abc"), nil, match.AnyOf{Matchers: match.Matchers{ - match.Single{}, - match.List{List: []rune{'d', 'e', 'f'}}, - match.Nothing{}, + match.NewSingle(nil), + match.NewList([]rune{'d', 'e', 'f'}, false), + match.NewNothing(), }}, ), }, @@ -361,15 +361,15 @@ func TestCompiler(t *testing.T) { &nodeAny{}, ), result: match.NewBTree( - match.Row{ - Matchers: match.Matchers{ - match.Range{Lo: 'a', Hi: 'z'}, - match.Range{Lo: 'a', Hi: 'x', Not: true}, - }, - RunesLength: 2, - }, + match.NewRow( + 2, + match.Matchers{ + match.NewRange('a', 'z', false), + match.NewRange('a', 'x', true), + }..., + ), nil, - match.Super{}, + match.NewSuper(), ), }, { @@ -385,17 +385,17 @@ func TestCompiler(t *testing.T) { &nodeText{text: "ghi"}, ), )), - result: match.Row{ - RunesLength: 7, - Matchers: match.Matchers{ + result: match.NewRow( + 7, + match.Matchers{ match.NewText("abc"), match.AnyOf{Matchers: match.Matchers{ - match.List{List: []rune{'a', 'b', 'c'}}, - match.List{List: []rune{'d', 'e', 'f'}}, + match.NewList([]rune{'a', 'b', 'c'}, false), + match.NewList([]rune{'d', 'e', 'f'}, false), }}, match.NewText("ghi"), - }, - }, + }..., + ), }, // { // ast: pattern( @@ -403,21 +403,21 @@ func TestCompiler(t *testing.T) { // anyOf(&nodeText{text: "c"}, &nodeText{text: "d"}), // ), // result: match.AnyOf{Matchers: match.Matchers{ - // match.Row{Matchers: match.Matchers{match.Raw{"a"}, match.Raw{"c", 1}}}, - // match.Row{Matchers: match.Matchers{match.Raw{"a"}, match.Raw{"d"}}}, - // match.Row{Matchers: match.Matchers{match.Raw{"b"}, match.Raw{"c", 1}}}, - // match.Row{Matchers: match.Matchers{match.Raw{"b"}, match.Raw{"d"}}}, + // match.NewRow(Matchers: match.Matchers{match.Raw{"a"}, match.Raw{"c", 1}}), + // match.NewRow(Matchers: match.Matchers{match.Raw{"a"}, match.Raw{"d"}}), + // match.NewRow(Matchers: match.Matchers{match.Raw{"b"}, match.Raw{"c", 1}}), + // match.NewRow(Matchers: match.Matchers{match.Raw{"b"}, match.Raw{"d"}}), // }}, // }, } { - prog, err := compile(test.ast, test.sep) + m, err := compile(test.ast, test.sep) if err != nil { t.Errorf("compilation error: %s", err) continue } - if !reflect.DeepEqual(prog, test.result) { - t.Errorf("#%d results are not equal:\nexp: %s,\nact: %s", id, test.result, prog) + if !reflect.DeepEqual(m, test.result) { + t.Errorf("#%d results are not equal:\nexp: %#v\nact: %#v", id, test.result, m) continue } } @@ -426,105 +426,105 @@ func TestCompiler(t *testing.T) { const complexityString = "abcd" //func BenchmarkComplexityAny(b *testing.B) { -// m := match.Any{} +// m := match.NewAny(nil) // for i := 0; i < b.N; i++ { // _ = m.Match(complexityString) // _, _ = m.Index(complexityString) // } //} //func BenchmarkComplexityContains(b *testing.B) { -// m := match.Contains{} +// m := match.NewContains() // for i := 0; i < b.N; i++ { // _ = m.Match(complexityString) // _, _ = m.Index(complexityString) // } //} //func BenchmarkComplexityList(b *testing.B) { -// m := match.List{} +// m := match.NewList() // for i := 0; i < b.N; i++ { // _ = m.Match(complexityString) // _, _ = m.Index(complexityString) // } //} //func BenchmarkComplexityMax(b *testing.B) { -// m := match.Max{} +// m := match.NewMax() // for i := 0; i < b.N; i++ { // _ = m.Match(complexityString) // _, _ = m.Index(complexityString) // } //} //func BenchmarkComplexityMin(b *testing.B) { -// m := match.Min{} +// m := match.NewMin() // for i := 0; i < b.N; i++ { // _ = m.Match(complexityString) // _, _ = m.Index(complexityString) // } //} //func BenchmarkComplexityNothing(b *testing.B) { -// m := match.Nothing{} +// m := match.NewNothing() // for i := 0; i < b.N; i++ { // _ = m.Match(complexityString) // _, _ = m.Index(complexityString) // } //} //func BenchmarkComplexityPrefix(b *testing.B) { -// m := match.Prefix{} +// m := match.NewPrefix() // for i := 0; i < b.N; i++ { // _ = m.Match(complexityString) // _, _ = m.Index(complexityString) // } //} //func BenchmarkComplexityPrefixSuffix(b *testing.B) { -// m := match.PrefixSuffix{} +// m := match.NewPrefixSuffix() // for i := 0; i < b.N; i++ { // _ = m.Match(complexityString) // _, _ = m.Index(complexityString) // } //} //func BenchmarkComplexityRange(b *testing.B) { -// m := match.Range{} +// m := match.NewRange() // for i := 0; i < b.N; i++ { // _ = m.Match(complexityString) // _, _ = m.Index(complexityString) // } //} //func BenchmarkComplexityRow(b *testing.B) { -// m := match.Row{} +// m := match.NewRow() // for i := 0; i < b.N; i++ { // _ = m.Match(complexityString) // _, _ = m.Index(complexityString) // } //} //func BenchmarkComplexitySingle(b *testing.B) { -// m := match.Single{} +// m := match.NewSingle(nil) // for i := 0; i < b.N; i++ { // _ = m.Match(complexityString) // _, _ = m.Index(complexityString) // } //} //func BenchmarkComplexitySuffix(b *testing.B) { -// m := match.Suffix{} +// m := match.NewSuffix() // for i := 0; i < b.N; i++ { // _ = m.Match(complexityString) // _, _ = m.Index(complexityString) // } //} //func BenchmarkComplexitySuper(b *testing.B) { -// m := match.Super{} +// m := match.NewSuper() // for i := 0; i < b.N; i++ { // _ = m.Match(complexityString) // _, _ = m.Index(complexityString) // } //} //func BenchmarkComplexityText(b *testing.B) { -// m := match.Text{} +// m := match.NewText() // for i := 0; i < b.N; i++ { // _ = m.Match(complexityString) // _, _ = m.Index(complexityString) // } //} //func BenchmarkComplexityAnyOf(b *testing.B) { -// m := match.AnyOf{} +// m := match.NewAnyOf() // for i := 0; i < b.N; i++ { // _ = m.Match(complexityString) // _, _ = m.Index(complexityString) @@ -538,7 +538,7 @@ const complexityString = "abcd" // } //} //func BenchmarkComplexityEveryOf(b *testing.B) { -// m := match.EveryOf{} +// m := match.NewEveryOf() // for i := 0; i < b.N; i++ { // _ = m.Match(complexityString) // _, _ = m.Index(complexityString) diff --git a/match/any.go b/match/any.go index e7a2953..1d2d12b 100644 --- a/match/any.go +++ b/match/any.go @@ -9,6 +9,10 @@ type Any struct { Separators []rune } +func NewAny(s []rune) Any { + return Any{s} +} + func (self Any) Match(s string) bool { return strings.IndexAnyRunes(s, self.Separators) == -1 } diff --git a/match/any_of.go b/match/any_of.go index 8cdfed1..ffae2f6 100644 --- a/match/any_of.go +++ b/match/any_of.go @@ -8,6 +8,10 @@ type AnyOf struct { Matchers Matchers } +func NewAnyOf(m ...Matcher) AnyOf { + return AnyOf{Matchers(m)} +} + func (self *AnyOf) Add(m Matcher) error { self.Matchers = append(self.Matchers, m) return nil @@ -27,7 +31,6 @@ func (self AnyOf) Index(s string) (int, []int) { index := -1 segments := acquireSegments(len(s)) - for _, m := range self.Matchers { idx, seg := m.Index(s) if idx == -1 { diff --git a/match/any_of_test.go b/match/any_of_test.go index 44782e9..3b478cf 100644 --- a/match/any_of_test.go +++ b/match/any_of_test.go @@ -14,7 +14,7 @@ func TestAnyOfIndex(t *testing.T) { }{ { Matchers{ - Any{}, + NewAny(nil), NewText("b"), NewText("c"), }, @@ -24,8 +24,8 @@ func TestAnyOfIndex(t *testing.T) { }, { Matchers{ - Prefix{"b"}, - Suffix{"c"}, + NewPrefix("b"), + NewSuffix("c"), }, "abc", 0, @@ -41,7 +41,7 @@ func TestAnyOfIndex(t *testing.T) { []int{1}, }, } { - everyOf := AnyOf{test.matchers} + everyOf := NewAnyOf(test.matchers...) index, segments := everyOf.Index(test.fixture) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) diff --git a/match/any_test.go b/match/any_test.go index e15a0c7..358f553 100644 --- a/match/any_test.go +++ b/match/any_test.go @@ -25,7 +25,7 @@ func TestAnyIndex(t *testing.T) { []int{0, 1, 2, 3}, }, } { - p := Any{test.sep} + p := NewAny(test.sep) index, segments := p.Index(test.fixture) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) @@ -37,7 +37,7 @@ func TestAnyIndex(t *testing.T) { } func BenchmarkIndexAny(b *testing.B) { - m := Any{bench_separators} + m := NewAny(bench_separators) for i := 0; i < b.N; i++ { _, s := m.Index(bench_pattern) @@ -46,7 +46,7 @@ func BenchmarkIndexAny(b *testing.B) { } func BenchmarkIndexAnyParallel(b *testing.B) { - m := Any{bench_separators} + m := NewAny(bench_separators) b.RunParallel(func(pb *testing.PB) { for pb.Next() { diff --git a/match/btree.go b/match/btree.go index 8827035..a8130e9 100644 --- a/match/btree.go +++ b/match/btree.go @@ -129,5 +129,18 @@ func (self BTree) Match(s string) bool { } func (self BTree) String() string { - return fmt.Sprintf("%s]>", self.Left, self.Value, self.Right) + const n string = "" + var l, r string + if self.Left == nil { + l = n + } else { + l = self.Left.String() + } + if self.Right == nil { + r = n + } else { + r = self.Right.String() + } + + return fmt.Sprintf("%s]>", l, self.Value, r) } diff --git a/match/btree_test.go b/match/btree_test.go index 3a96786..3bd9ea5 100644 --- a/match/btree_test.go +++ b/match/btree_test.go @@ -11,17 +11,17 @@ func TestBTree(t *testing.T) { exp bool }{ { - NewBTree(NewText("abc"), Super{}, Super{}), + NewBTree(NewText("abc"), NewSuper(), NewSuper()), "abc", true, }, { - NewBTree(NewText("a"), Single{}, Single{}), + NewBTree(NewText("a"), NewSingle(nil), NewSingle(nil)), "aaa", true, }, { - NewBTree(NewText("b"), Single{}, nil), + NewBTree(NewText("b"), NewSingle(nil), nil), "bbb", false, }, @@ -29,8 +29,8 @@ func TestBTree(t *testing.T) { NewBTree( NewText("c"), NewBTree( - Single{}, - Super{}, + NewSingle(nil), + NewSuper(), nil, ), nil, diff --git a/match/contains.go b/match/contains.go index be20492..b4f57fc 100644 --- a/match/contains.go +++ b/match/contains.go @@ -10,6 +10,10 @@ type Contains struct { Not bool } +func NewContains(needle string, not bool) Contains { + return Contains{needle, not} +} + func (self Contains) Match(s string) bool { return strings.Contains(s, self.Needle) != self.Not } diff --git a/match/contains_test.go b/match/contains_test.go index f840793..931322e 100644 --- a/match/contains_test.go +++ b/match/contains_test.go @@ -42,7 +42,7 @@ func TestContainsIndex(t *testing.T) { []int{0, 1, 2, 3}, }, } { - p := Contains{test.prefix, test.not} + p := NewContains(test.prefix, test.not) index, segments := p.Index(test.fixture) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) @@ -54,7 +54,7 @@ func TestContainsIndex(t *testing.T) { } func BenchmarkIndexContains(b *testing.B) { - m := Contains{string(bench_separators), true} + m := NewContains(string(bench_separators), true) for i := 0; i < b.N; i++ { _, s := m.Index(bench_pattern) @@ -63,7 +63,7 @@ func BenchmarkIndexContains(b *testing.B) { } func BenchmarkIndexContainsParallel(b *testing.B) { - m := Contains{string(bench_separators), true} + m := NewContains(string(bench_separators), true) b.RunParallel(func(pb *testing.PB) { for pb.Next() { diff --git a/match/every_of.go b/match/every_of.go index 9220f78..7c968ee 100644 --- a/match/every_of.go +++ b/match/every_of.go @@ -8,6 +8,10 @@ type EveryOf struct { Matchers Matchers } +func NewEveryOf(m ...Matcher) EveryOf { + return EveryOf{Matchers(m)} +} + func (self *EveryOf) Add(m Matcher) error { self.Matchers = append(self.Matchers, m) return nil @@ -31,7 +35,6 @@ func (self EveryOf) Index(s string) (int, []int) { // make `in` with cap as len(s), // cause it is the maximum size of output segments values - //todo opti!!! next := acquireSegments(len(s)) current := acquireSegments(len(s)) @@ -39,6 +42,8 @@ func (self EveryOf) Index(s string) (int, []int) { for i, m := range self.Matchers { idx, seg := m.Index(sub) if idx == -1 { + releaseSegments(next) + releaseSegments(current) return -1, nil } @@ -61,6 +66,8 @@ func (self EveryOf) Index(s string) (int, []int) { } if len(next) == 0 { + releaseSegments(next) + releaseSegments(current) return -1, nil } @@ -72,6 +79,8 @@ func (self EveryOf) Index(s string) (int, []int) { offset += idx } + releaseSegments(next) + return index, current } diff --git a/match/every_of_test.go b/match/every_of_test.go index 3f70a78..eb83f86 100644 --- a/match/every_of_test.go +++ b/match/every_of_test.go @@ -14,7 +14,7 @@ func TestEveryOfIndex(t *testing.T) { }{ { Matchers{ - Any{}, + NewAny(nil), NewText("b"), NewText("c"), }, @@ -24,16 +24,16 @@ func TestEveryOfIndex(t *testing.T) { }, { Matchers{ - Any{}, - Prefix{"b"}, - Suffix{"c"}, + NewAny(nil), + NewPrefix("b"), + NewSuffix("c"), }, "abc", 1, []int{2}, }, } { - everyOf := EveryOf{test.matchers} + everyOf := NewEveryOf(test.matchers...) index, segments := everyOf.Index(test.fixture) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) diff --git a/match/list.go b/match/list.go index 00f2d98..fe0841f 100644 --- a/match/list.go +++ b/match/list.go @@ -12,10 +12,7 @@ type List struct { } func NewList(list []rune, not bool) List { - return List{ - List: list, - Not: not, - } + return List{list, not} } func (self List) Match(s string) bool { @@ -48,5 +45,5 @@ func (self List) String() string { not = "!" } - return fmt.Sprintf("", not, self.List) + return fmt.Sprintf("", not, string(self.List)) } diff --git a/match/max.go b/match/max.go index 5405594..d72f69e 100644 --- a/match/max.go +++ b/match/max.go @@ -9,6 +9,10 @@ type Max struct { Limit int } +func NewMax(l int) Max { + return Max{l} +} + func (self Max) Match(s string) bool { var l int for range s { @@ -22,8 +26,7 @@ func (self Max) Match(s string) bool { } func (self Max) Index(s string) (int, []int) { - segments := make([]int, 0, self.Limit+1) - + segments := acquireSegments(self.Limit + 1) segments = append(segments, 0) var count int for i, r := range s { diff --git a/match/max_test.go b/match/max_test.go index 005e3f0..2367628 100644 --- a/match/max_test.go +++ b/match/max_test.go @@ -25,7 +25,7 @@ func TestMaxIndex(t *testing.T) { []int{0, 1, 2, 3}, }, } { - p := Max{test.limit} + p := NewMax(test.limit) index, segments := p.Index(test.fixture) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) @@ -37,19 +37,21 @@ func TestMaxIndex(t *testing.T) { } func BenchmarkIndexMax(b *testing.B) { - m := Max{10} + m := NewMax(10) for i := 0; i < b.N; i++ { - m.Index(bench_pattern) + _, s := m.Index(bench_pattern) + releaseSegments(s) } } func BenchmarkIndexMaxParallel(b *testing.B) { - m := Max{10} + m := NewMax(10) b.RunParallel(func(pb *testing.PB) { for pb.Next() { - m.Index(bench_pattern) + _, s := m.Index(bench_pattern) + releaseSegments(s) } }) } diff --git a/match/min.go b/match/min.go index 6942e29..db57ac8 100644 --- a/match/min.go +++ b/match/min.go @@ -9,6 +9,10 @@ type Min struct { Limit int } +func NewMin(l int) Min { + return Min{l} +} + func (self Min) Match(s string) bool { var l int for range s { @@ -24,7 +28,12 @@ func (self Min) Match(s string) bool { func (self Min) Index(s string) (int, []int) { var count int - segments := make([]int, 0, len(s)-self.Limit+1) + c := len(s) - self.Limit + 1 + if c <= 0 { + return -1, nil + } + + segments := acquireSegments(c) for i, r := range s { count++ if count >= self.Limit { diff --git a/match/min_test.go b/match/min_test.go index 2bea4ee..ab854ae 100644 --- a/match/min_test.go +++ b/match/min_test.go @@ -25,7 +25,7 @@ func TestMinIndex(t *testing.T) { []int{3, 4}, }, } { - p := Min{test.limit} + p := NewMin(test.limit) index, segments := p.Index(test.fixture) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) @@ -37,19 +37,21 @@ func TestMinIndex(t *testing.T) { } func BenchmarkIndexMin(b *testing.B) { - m := Min{10} + m := NewMin(10) for i := 0; i < b.N; i++ { - m.Index(bench_pattern) + _, s := m.Index(bench_pattern) + releaseSegments(s) } } func BenchmarkIndexMinParallel(b *testing.B) { - m := Min{10} + m := NewMin(10) b.RunParallel(func(pb *testing.PB) { for pb.Next() { - m.Index(bench_pattern) + _, s := m.Index(bench_pattern) + releaseSegments(s) } }) } diff --git a/match/nothing.go b/match/nothing.go index 664ade9..0d4ecd3 100644 --- a/match/nothing.go +++ b/match/nothing.go @@ -6,6 +6,10 @@ import ( type Nothing struct{} +func NewNothing() Nothing { + return Nothing{} +} + func (self Nothing) Match(s string) bool { return len(s) == 0 } diff --git a/match/nothing_test.go b/match/nothing_test.go index 569969c..941c22d 100644 --- a/match/nothing_test.go +++ b/match/nothing_test.go @@ -22,7 +22,7 @@ func TestNothingIndex(t *testing.T) { []int{0}, }, } { - p := Nothing{} + p := NewNothing() index, segments := p.Index(test.fixture) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) @@ -34,19 +34,21 @@ func TestNothingIndex(t *testing.T) { } func BenchmarkIndexNothing(b *testing.B) { - m := Nothing{} + m := NewNothing() for i := 0; i < b.N; i++ { - m.Index(bench_pattern) + _, s := m.Index(bench_pattern) + releaseSegments(s) } } func BenchmarkIndexNothingParallel(b *testing.B) { - m := Nothing{} + m := NewNothing() b.RunParallel(func(pb *testing.PB) { for pb.Next() { - m.Index(bench_pattern) + _, s := m.Index(bench_pattern) + releaseSegments(s) } }) } diff --git a/match/prefix.go b/match/prefix.go index 429ff88..a734725 100644 --- a/match/prefix.go +++ b/match/prefix.go @@ -10,6 +10,10 @@ type Prefix struct { Prefix string } +func NewPrefix(p string) Prefix { + return Prefix{p} +} + func (self Prefix) Index(s string) (int, []int) { idx := strings.Index(s, self.Prefix) if idx == -1 { diff --git a/match/prefix_suffix.go b/match/prefix_suffix.go index 620b873..8208085 100644 --- a/match/prefix_suffix.go +++ b/match/prefix_suffix.go @@ -9,6 +9,10 @@ type PrefixSuffix struct { Prefix, Suffix string } +func NewPrefixSuffix(p, s string) PrefixSuffix { + return PrefixSuffix{p, s} +} + func (self PrefixSuffix) Index(s string) (int, []int) { prefixIdx := strings.Index(s, self.Prefix) if prefixIdx == -1 { @@ -16,11 +20,14 @@ func (self PrefixSuffix) Index(s string) (int, []int) { } suffixLen := len(self.Suffix) - if suffixLen <= 0 { return prefixIdx, []int{len(s) - prefixIdx} } + if (len(s) - prefixIdx) <= 0 { + return -1, nil + } + segments := acquireSegments(len(s) - prefixIdx) for sub := s[prefixIdx:]; ; { suffixIdx := strings.LastIndex(sub, self.Suffix) @@ -33,7 +40,8 @@ func (self PrefixSuffix) Index(s string) (int, []int) { } if len(segments) == 0 { - return -1, segments + releaseSegments(segments) + return -1, nil } reverseSegments(segments) diff --git a/match/prefix_suffix_test.go b/match/prefix_suffix_test.go index aaf5dca..79b17b2 100644 --- a/match/prefix_suffix_test.go +++ b/match/prefix_suffix_test.go @@ -35,7 +35,7 @@ func TestPrefixSuffixIndex(t *testing.T) { []int{3}, }, } { - p := PrefixSuffix{test.prefix, test.suffix} + p := NewPrefixSuffix(test.prefix, test.suffix) index, segments := p.Index(test.fixture) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) @@ -47,7 +47,7 @@ func TestPrefixSuffixIndex(t *testing.T) { } func BenchmarkIndexPrefixSuffix(b *testing.B) { - m := PrefixSuffix{"qew", "sqw"} + m := NewPrefixSuffix("qew", "sqw") for i := 0; i < b.N; i++ { _, s := m.Index(bench_pattern) @@ -56,7 +56,7 @@ func BenchmarkIndexPrefixSuffix(b *testing.B) { } func BenchmarkIndexPrefixSuffixParallel(b *testing.B) { - m := PrefixSuffix{"qew", "sqw"} + m := NewPrefixSuffix("qew", "sqw") b.RunParallel(func(pb *testing.PB) { for pb.Next() { diff --git a/match/prefix_test.go b/match/prefix_test.go index f6b2f04..22a296e 100644 --- a/match/prefix_test.go +++ b/match/prefix_test.go @@ -25,7 +25,7 @@ func TestPrefixIndex(t *testing.T) { []int{2, 3, 4, 5}, }, } { - p := Prefix{test.prefix} + p := NewPrefix(test.prefix) index, segments := p.Index(test.fixture) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) @@ -37,7 +37,7 @@ func TestPrefixIndex(t *testing.T) { } func BenchmarkIndexPrefix(b *testing.B) { - m := Prefix{"qew"} + m := NewPrefix("qew") for i := 0; i < b.N; i++ { _, s := m.Index(bench_pattern) @@ -46,7 +46,7 @@ func BenchmarkIndexPrefix(b *testing.B) { } func BenchmarkIndexPrefixParallel(b *testing.B) { - m := Prefix{"qew"} + m := NewPrefix("qew") b.RunParallel(func(pb *testing.PB) { for pb.Next() { diff --git a/match/range.go b/match/range.go index 8a90569..ce30245 100644 --- a/match/range.go +++ b/match/range.go @@ -10,6 +10,10 @@ type Range struct { Not bool } +func NewRange(lo, hi rune, not bool) Range { + return Range{lo, hi, not} +} + func (self Range) Len() int { return lenOne } diff --git a/match/range_test.go b/match/range_test.go index 1bde746..0dddcfd 100644 --- a/match/range_test.go +++ b/match/range_test.go @@ -35,7 +35,7 @@ func TestRangeIndex(t *testing.T) { []int{1}, }, } { - m := Range{test.lo, test.hi, test.not} + m := NewRange(test.lo, test.hi, test.not) index, segments := m.Index(test.fixture) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) @@ -47,7 +47,7 @@ func TestRangeIndex(t *testing.T) { } func BenchmarkIndexRange(b *testing.B) { - m := Range{'0', '9', false} + m := NewRange('0', '9', false) for i := 0; i < b.N; i++ { _, s := m.Index(bench_pattern) @@ -56,7 +56,7 @@ func BenchmarkIndexRange(b *testing.B) { } func BenchmarkIndexRangeParallel(b *testing.B) { - m := Range{'0', '9', false} + m := NewRange('0', '9', false) b.RunParallel(func(pb *testing.PB) { for pb.Next() { diff --git a/match/row.go b/match/row.go index a836a5c..d0dcb35 100644 --- a/match/row.go +++ b/match/row.go @@ -10,9 +10,9 @@ type Row struct { Segments []int } -func NewRow(m Matchers, len int) Row { +func NewRow(len int, m ...Matcher) Row { return Row{ - Matchers: m, + Matchers: Matchers(m), RunesLength: len, Segments: []int{len}, } @@ -68,8 +68,6 @@ func (self Row) Index(s string) (int, []int) { for i := range s { // this is not strict check but useful - // when glob will be refactored for usage with []rune - // it will be better if len(s[i:]) < self.RunesLength { break } diff --git a/match/row_test.go b/match/row_test.go index 0bd7bd8..c9e65ef 100644 --- a/match/row_test.go +++ b/match/row_test.go @@ -17,7 +17,7 @@ func TestRowIndex(t *testing.T) { Matchers{ NewText("abc"), NewText("def"), - Single{}, + NewSingle(nil), }, 7, "qweabcdefghij", @@ -28,7 +28,7 @@ func TestRowIndex(t *testing.T) { Matchers{ NewText("abc"), NewText("def"), - Single{}, + NewSingle(nil), }, 7, "abcd", @@ -36,10 +36,7 @@ func TestRowIndex(t *testing.T) { nil, }, } { - p := Row{ - Matchers: test.matchers, - RunesLength: test.length, - } + p := NewRow(test.length, test.matchers...) index, segments := p.Index(test.fixture) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) @@ -51,14 +48,14 @@ func TestRowIndex(t *testing.T) { } func BenchmarkRowIndex(b *testing.B) { - m := Row{ - Matchers: Matchers{ + m := NewRow( + 7, + Matchers{ NewText("abc"), NewText("def"), - Single{}, - }, - RunesLength: 7, - } + NewSingle(nil), + }..., + ) for i := 0; i < b.N; i++ { _, s := m.Index(bench_pattern) @@ -67,14 +64,14 @@ func BenchmarkRowIndex(b *testing.B) { } func BenchmarkIndexRowParallel(b *testing.B) { - m := Row{ - Matchers: Matchers{ + m := NewRow( + 7, + Matchers{ NewText("abc"), NewText("def"), - Single{}, - }, - RunesLength: 7, - } + NewSingle(nil), + }..., + ) b.RunParallel(func(pb *testing.PB) { for pb.Next() { diff --git a/match/single.go b/match/single.go index 6911edf..33e926d 100644 --- a/match/single.go +++ b/match/single.go @@ -11,6 +11,10 @@ type Single struct { Separators []rune } +func NewSingle(s []rune) Single { + return Single{s} +} + func (self Single) Match(s string) bool { r, w := utf8.DecodeRuneInString(s) if len(s) > w { @@ -35,5 +39,5 @@ func (self Single) Index(s string) (int, []int) { } func (self Single) String() string { - return fmt.Sprintf("", self.Separators) + return fmt.Sprintf("", string(self.Separators)) } diff --git a/match/single_test.go b/match/single_test.go index b08f236..a62d720 100644 --- a/match/single_test.go +++ b/match/single_test.go @@ -25,7 +25,7 @@ func TestSingleIndex(t *testing.T) { nil, }, } { - p := Single{test.separators} + p := NewSingle(test.separators) index, segments := p.Index(test.fixture) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) @@ -37,7 +37,7 @@ func TestSingleIndex(t *testing.T) { } func BenchmarkIndexSingle(b *testing.B) { - m := Single{bench_separators} + m := NewSingle(bench_separators) for i := 0; i < b.N; i++ { _, s := m.Index(bench_pattern) @@ -46,7 +46,7 @@ func BenchmarkIndexSingle(b *testing.B) { } func BenchmarkIndexSingleParallel(b *testing.B) { - m := Single{bench_separators} + m := NewSingle(bench_separators) b.RunParallel(func(pb *testing.PB) { for pb.Next() { diff --git a/match/suffix.go b/match/suffix.go index d38d71a..85bea8c 100644 --- a/match/suffix.go +++ b/match/suffix.go @@ -9,13 +9,8 @@ type Suffix struct { Suffix string } -func (self Suffix) Index(s string) (int, []int) { - idx := strings.Index(s, self.Suffix) - if idx == -1 { - return -1, nil - } - - return 0, []int{idx + len(self.Suffix)} +func NewSuffix(s string) Suffix { + return Suffix{s} } func (self Suffix) Len() int { @@ -26,6 +21,15 @@ func (self Suffix) Match(s string) bool { return strings.HasSuffix(s, self.Suffix) } +func (self Suffix) Index(s string) (int, []int) { + idx := strings.Index(s, self.Suffix) + if idx == -1 { + return -1, nil + } + + return 0, []int{idx + len(self.Suffix)} +} + func (self Suffix) String() string { return fmt.Sprintf("", self.Suffix) } diff --git a/match/suffix_test.go b/match/suffix_test.go index 3e3c250..4904763 100644 --- a/match/suffix_test.go +++ b/match/suffix_test.go @@ -25,7 +25,7 @@ func TestSuffixIndex(t *testing.T) { []int{5}, }, } { - p := Suffix{test.prefix} + p := NewSuffix(test.prefix) index, segments := p.Index(test.fixture) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) @@ -37,7 +37,7 @@ func TestSuffixIndex(t *testing.T) { } func BenchmarkIndexSuffix(b *testing.B) { - m := Suffix{"qwe"} + m := NewSuffix("qwe") for i := 0; i < b.N; i++ { _, s := m.Index(bench_pattern) @@ -46,7 +46,7 @@ func BenchmarkIndexSuffix(b *testing.B) { } func BenchmarkIndexSuffixParallel(b *testing.B) { - m := Suffix{"qwe"} + m := NewSuffix("qwe") b.RunParallel(func(pb *testing.PB) { for pb.Next() { diff --git a/match/super.go b/match/super.go index e7f1aec..3875950 100644 --- a/match/super.go +++ b/match/super.go @@ -6,6 +6,10 @@ import ( type Super struct{} +func NewSuper() Super { + return Super{} +} + func (self Super) Match(s string) bool { return true } @@ -15,8 +19,7 @@ func (self Super) Len() int { } func (self Super) Index(s string) (int, []int) { - //todo acquire here - segments := make([]int, 0, len(s)+1) + segments := acquireSegments(len(s) + 1) for i := range s { segments = append(segments, i) } diff --git a/match/super_test.go b/match/super_test.go index e7c2624..10418dc 100644 --- a/match/super_test.go +++ b/match/super_test.go @@ -22,7 +22,7 @@ func TestSuperIndex(t *testing.T) { []int{0}, }, } { - p := Super{} + p := NewSuper() index, segments := p.Index(test.fixture) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) @@ -34,7 +34,7 @@ func TestSuperIndex(t *testing.T) { } func BenchmarkIndexSuper(b *testing.B) { - m := Super{} + m := NewSuper() for i := 0; i < b.N; i++ { _, s := m.Index(bench_pattern) @@ -43,7 +43,7 @@ func BenchmarkIndexSuper(b *testing.B) { } func BenchmarkIndexSuperParallel(b *testing.B) { - m := Super{} + m := NewSuper() b.RunParallel(func(pb *testing.PB) { for pb.Next() { From 72f082815c5056a5e82541904616d436b0eaaa58 Mon Sep 17 00:00:00 2001 From: gobwas Date: Wed, 24 Feb 2016 13:51:14 +0300 Subject: [PATCH 17/26] tune --- bench.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bench.sh b/bench.sh index 5264052..4856523 100755 --- a/bench.sh +++ b/bench.sh @@ -7,11 +7,11 @@ bench() { echo "Already exists ${filename}" else backup=`git rev-parse --abbrev-ref HEAD` - git checkout $1 &>/dev/null + git checkout $1 echo -n "Creating ${filename}... " go test ./... -run=NONE -bench=$2 > "${filename}" -benchmem echo "OK" - git checkout ${backup} &>/dev/null + git checkout ${backup} fi } From 6dc0cef6914601a040a41f4d6851b45cd62044ca Mon Sep 17 00:00:00 2001 From: gobwas Date: Wed, 24 Feb 2016 19:38:05 +0300 Subject: [PATCH 18/26] try use pool with channel --- match/segments.go | 77 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 72 insertions(+), 5 deletions(-) diff --git a/match/segments.go b/match/segments.go index 7a349cf..1961caa 100644 --- a/match/segments.go +++ b/match/segments.go @@ -4,7 +4,14 @@ import ( "sync" ) -var segmentsPools [1024]*sync.Pool +type SomePool interface { + Get() []int + Put([]int) +} + +var segmentsPools [1024]SomePool + +//var segmentsPools [1024]*sync.Pool func toPowerOfTwo(v int) int { v-- @@ -49,9 +56,12 @@ const ( func init() { for i := cacheToAndHigher; i >= cacheFrom; i >>= 1 { func(i int) { - segmentsPools[i-1] = &sync.Pool{New: func() interface{} { + // segmentsPools[i-1] = &sync.Pool{New: func() interface{} { + // return make([]int, 0, i) + // }} + segmentsPools[i-1] = newChanPool(func() []int { return make([]int, 0, i) - }} + }) }(i) } } @@ -75,7 +85,8 @@ func acquireSegments(c int) []int { return make([]int, 0, c) } - return segmentsPools[getTableIndex(c)].Get().([]int)[:0] + // return segmentsPools[getTableIndex(c)].Get().([]int)[:0] + return segmentsPools[getTableIndex(c)].Get() } func releaseSegments(s []int) { @@ -87,5 +98,61 @@ func releaseSegments(s []int) { return } - segmentsPools[getTableIndex(cap(s))].Put(s) + segmentsPools[getTableIndex(c)].Put(s) +} + +type maker func() []int + +type syncPool struct { + new maker + pool sync.Pool +} + +func newSyncPool(m maker) *syncPool { + return &syncPool{ + new: m, + pool: sync.Pool{New: func() interface{} { + return m() + }}, + } +} + +func (s *syncPool) Get() []int { + return s.pool.Get().([]int)[:0] +} + +func (s *syncPool) Put(x []int) { + s.pool.Put(x) +} + +type chanPool struct { + pool chan []int + new maker + index int +} + +func newChanPool(m maker) *chanPool { + return &chanPool{ + pool: make(chan []int, 32), + new: m, + } +} + +func (c *chanPool) Get() []int { + select { + case s := <-c.pool: + return s[:0] + default: + // pool is empty + return c.new() + } +} + +func (c *chanPool) Put(s []int) { + select { + case c.pool <- s: + // ok + default: + // pool is full + } } From 0964106bb31fd6e634ab4c9826ca25480bb2d362 Mon Sep 17 00:00:00 2001 From: gobwas Date: Wed, 24 Feb 2016 19:44:33 +0300 Subject: [PATCH 19/26] try sync pool --- match/segments.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/match/segments.go b/match/segments.go index 1961caa..7d12850 100644 --- a/match/segments.go +++ b/match/segments.go @@ -59,7 +59,10 @@ func init() { // segmentsPools[i-1] = &sync.Pool{New: func() interface{} { // return make([]int, 0, i) // }} - segmentsPools[i-1] = newChanPool(func() []int { + // segmentsPools[i-1] = newChanPool(func() []int { + // return make([]int, 0, i) + // }) + segmentsPools[i-1] = newSyncPool(func() []int { return make([]int, 0, i) }) }(i) @@ -133,7 +136,7 @@ type chanPool struct { func newChanPool(m maker) *chanPool { return &chanPool{ - pool: make(chan []int, 32), + pool: make(chan []int, 16), new: m, } } From ee3b8204de5673aa3a68f0f546c6c6d83b41b750 Mon Sep 17 00:00:00 2001 From: gobwas Date: Wed, 24 Feb 2016 19:49:44 +0300 Subject: [PATCH 20/26] use sync.pool --- match/segments.go | 73 +++-------------------------------------------- 1 file changed, 4 insertions(+), 69 deletions(-) diff --git a/match/segments.go b/match/segments.go index 7d12850..5406f37 100644 --- a/match/segments.go +++ b/match/segments.go @@ -9,9 +9,7 @@ type SomePool interface { Put([]int) } -var segmentsPools [1024]SomePool - -//var segmentsPools [1024]*sync.Pool +var segmentsPools [1024]sync.Pool func toPowerOfTwo(v int) int { v-- @@ -56,15 +54,9 @@ const ( func init() { for i := cacheToAndHigher; i >= cacheFrom; i >>= 1 { func(i int) { - // segmentsPools[i-1] = &sync.Pool{New: func() interface{} { - // return make([]int, 0, i) - // }} - // segmentsPools[i-1] = newChanPool(func() []int { - // return make([]int, 0, i) - // }) - segmentsPools[i-1] = newSyncPool(func() []int { + segmentsPools[i-1] = sync.Pool{New: func() interface{} { return make([]int, 0, i) - }) + }} }(i) } } @@ -88,8 +80,7 @@ func acquireSegments(c int) []int { return make([]int, 0, c) } - // return segmentsPools[getTableIndex(c)].Get().([]int)[:0] - return segmentsPools[getTableIndex(c)].Get() + return segmentsPools[getTableIndex(c)].Get().([]int)[:0] } func releaseSegments(s []int) { @@ -103,59 +94,3 @@ func releaseSegments(s []int) { segmentsPools[getTableIndex(c)].Put(s) } - -type maker func() []int - -type syncPool struct { - new maker - pool sync.Pool -} - -func newSyncPool(m maker) *syncPool { - return &syncPool{ - new: m, - pool: sync.Pool{New: func() interface{} { - return m() - }}, - } -} - -func (s *syncPool) Get() []int { - return s.pool.Get().([]int)[:0] -} - -func (s *syncPool) Put(x []int) { - s.pool.Put(x) -} - -type chanPool struct { - pool chan []int - new maker - index int -} - -func newChanPool(m maker) *chanPool { - return &chanPool{ - pool: make(chan []int, 16), - new: m, - } -} - -func (c *chanPool) Get() []int { - select { - case s := <-c.pool: - return s[:0] - default: - // pool is empty - return c.new() - } -} - -func (c *chanPool) Put(s []int) { - select { - case c.pool <- s: - // ok - default: - // pool is full - } -} From 543f3e714cfe6fd860bb314548dc8bae1dcea27c Mon Sep 17 00:00:00 2001 From: gobwas Date: Wed, 24 Feb 2016 19:50:04 +0300 Subject: [PATCH 21/26] rename typo --- match/{segements_test.go => segments_test.go} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename match/{segements_test.go => segments_test.go} (100%) diff --git a/match/segements_test.go b/match/segments_test.go similarity index 100% rename from match/segements_test.go rename to match/segments_test.go From 2cd1592c0b709ca8ff518fafc31404e6c4f7836a Mon Sep 17 00:00:00 2001 From: gobwas Date: Wed, 24 Feb 2016 20:16:32 +0300 Subject: [PATCH 22/26] sleep before takes --- bench.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/bench.sh b/bench.sh index 4856523..e033d89 100755 --- a/bench.sh +++ b/bench.sh @@ -20,6 +20,7 @@ to=$1 current=`git rev-parse --abbrev-ref HEAD` bench ${to} $2 +sleep 5 bench ${current} $2 benchcmp $3 "/tmp/${to}-$2.bench" "/tmp/${current}-$2.bench" From 5abd72c5443a38449380926ecc361fa04433e972 Mon Sep 17 00:00:00 2001 From: gobwas Date: Wed, 24 Feb 2016 20:23:24 +0300 Subject: [PATCH 23/26] fixes --- cmd/globdraw/main.go | 66 +++++++------------------------------------- match/debug/debug.go | 55 ++++++++++++++++++++++++++++++++++++ readme.md | 8 +++--- 3 files changed, 69 insertions(+), 60 deletions(-) create mode 100644 match/debug/debug.go diff --git a/cmd/globdraw/main.go b/cmd/globdraw/main.go index 8ba91f1..585880d 100644 --- a/cmd/globdraw/main.go +++ b/cmd/globdraw/main.go @@ -1,64 +1,16 @@ package main import ( - "bytes" "flag" "fmt" "github.com/gobwas/glob" "github.com/gobwas/glob/match" - "math/rand" + "github.com/gobwas/glob/match/debug" "os" "strings" "unicode/utf8" ) -func draw(pattern string, m match.Matcher) string { - return fmt.Sprintf(`digraph G {graph[label="%s"];%s}`, pattern, graphviz(m, fmt.Sprintf("%x", rand.Int63()))) -} - -func graphviz(m match.Matcher, id string) string { - buf := &bytes.Buffer{} - - switch matcher := m.(type) { - case match.BTree: - fmt.Fprintf(buf, `"%s"[label="%s"];`, id, matcher.Value.String()) - for _, m := range []match.Matcher{matcher.Left, matcher.Right} { - switch n := m.(type) { - case nil: - rnd := rand.Int63() - fmt.Fprintf(buf, `"%x"[label=""];`, rnd) - fmt.Fprintf(buf, `"%s"->"%x";`, id, rnd) - - default: - sub := fmt.Sprintf("%x", rand.Int63()) - fmt.Fprintf(buf, `"%s"->"%s";`, id, sub) - fmt.Fprintf(buf, graphviz(n, sub)) - } - } - - case match.AnyOf: - fmt.Fprintf(buf, `"%s"[label="AnyOf"];`, id) - for _, m := range matcher.Matchers { - rnd := rand.Int63() - fmt.Fprintf(buf, graphviz(m, fmt.Sprintf("%x", rnd))) - fmt.Fprintf(buf, `"%s"->"%x";`, id, rnd) - } - - case match.EveryOf: - fmt.Fprintf(buf, `"%s"[label="EveryOf"];`, id) - for _, m := range matcher.Matchers { - rnd := rand.Int63() - fmt.Fprintf(buf, graphviz(m, fmt.Sprintf("%x", rnd))) - fmt.Fprintf(buf, `"%s"->"%x";`, id, rnd) - } - - default: - fmt.Fprintf(buf, `"%s"[label="%s"];`, id, m.String()) - } - - return buf.String() -} - func main() { pattern := flag.String("p", "", "pattern to draw") sep := flag.String("s", "", "comma separated list of separators characters") @@ -70,12 +22,14 @@ func main() { } var separators []rune - for _, c := range strings.Split(*sep, ",") { - if r, w := utf8.DecodeRuneInString(c); len(c) > w { - fmt.Println("only single charactered separators are allowed") - os.Exit(1) - } else { - separators = append(separators, r) + if len(*sep) > 0 { + for _, c := range strings.Split(*sep, ",") { + if r, w := utf8.DecodeRuneInString(c); len(c) > w { + fmt.Println("only single charactered separators are allowed") + os.Exit(1) + } else { + separators = append(separators, r) + } } } @@ -86,5 +40,5 @@ func main() { } matcher := glob.(match.Matcher) - fmt.Fprint(os.Stdout, draw(*pattern, matcher)) + fmt.Fprint(os.Stdout, debug.Graphviz(*pattern, matcher)) } diff --git a/match/debug/debug.go b/match/debug/debug.go new file mode 100644 index 0000000..5c5dbc1 --- /dev/null +++ b/match/debug/debug.go @@ -0,0 +1,55 @@ +package debug + +import ( + "bytes" + "fmt" + "github.com/gobwas/glob/match" + "math/rand" +) + +func Graphviz(pattern string, m match.Matcher) string { + return fmt.Sprintf(`digraph G {graph[label="%s"];%s}`, pattern, graphviz_internal(m, fmt.Sprintf("%x", rand.Int63()))) +} + +func graphviz_internal(m match.Matcher, id string) string { + buf := &bytes.Buffer{} + + switch matcher := m.(type) { + case match.BTree: + fmt.Fprintf(buf, `"%s"[label="%s"];`, id, matcher.Value.String()) + for _, m := range []match.Matcher{matcher.Left, matcher.Right} { + switch n := m.(type) { + case nil: + rnd := rand.Int63() + fmt.Fprintf(buf, `"%x"[label=""];`, rnd) + fmt.Fprintf(buf, `"%s"->"%x";`, id, rnd) + + default: + sub := fmt.Sprintf("%x", rand.Int63()) + fmt.Fprintf(buf, `"%s"->"%s";`, id, sub) + fmt.Fprintf(buf, graphviz_internal(n, sub)) + } + } + + case match.AnyOf: + fmt.Fprintf(buf, `"%s"[label="AnyOf"];`, id) + for _, m := range matcher.Matchers { + rnd := rand.Int63() + fmt.Fprintf(buf, graphviz_internal(m, fmt.Sprintf("%x", rnd))) + fmt.Fprintf(buf, `"%s"->"%x";`, id, rnd) + } + + case match.EveryOf: + fmt.Fprintf(buf, `"%s"[label="EveryOf"];`, id) + for _, m := range matcher.Matchers { + rnd := rand.Int63() + fmt.Fprintf(buf, graphviz_internal(m, fmt.Sprintf("%x", rnd))) + fmt.Fprintf(buf, `"%s"->"%x";`, id, rnd) + } + + default: + fmt.Fprintf(buf, `"%s"[label="%s"];`, id, m.String()) + } + + return buf.String() +} diff --git a/readme.md b/readme.md index 0f387ee..3e68b52 100644 --- a/readme.md +++ b/readme.md @@ -26,13 +26,13 @@ func main() { g.Match("api.github.com") // true // create new glob with set of delimiters as ["."] - g = glob.MustCompile("api.*.com", ".") + g = glob.MustCompile("api.*.com", '.') g.Match("api.github.com") // true g.Match("api.gi.hub.com") // false // create new glob with set of delimiters as ["."] // but now with super wildcard - g = glob.MustCompile("api.**.com", ".") + g = glob.MustCompile("api.**.com", '.') g.Match("api.github.com") // true g.Match("api.gi.hub.com") // true @@ -42,8 +42,8 @@ func main() { g.Match("fat") // true g.Match("at") // false - // create glob with single symbol wildcard and delimiters ["f"] - g = glob.MustCompile("?at", "f") + // create glob with single symbol wildcard and delimiters ['f'] + g = glob.MustCompile("?at", 'f') g.Match("cat") // true g.Match("fat") // false g.Match("at") // false From 803c985a9acae9b208653b4374f200c3b036a4fe Mon Sep 17 00:00:00 2001 From: gobwas Date: Wed, 24 Feb 2016 23:10:13 +0300 Subject: [PATCH 24/26] cleanup --- match/segments.go | 5 ----- 1 file changed, 5 deletions(-) diff --git a/match/segments.go b/match/segments.go index 5406f37..9ea6f30 100644 --- a/match/segments.go +++ b/match/segments.go @@ -46,11 +46,6 @@ var segmentsByRuneLength [5][]int = [5][]int{ 4: segments4, } -const ( - asciiLo = 0 - asciiHi = 127 -) - func init() { for i := cacheToAndHigher; i >= cacheFrom; i >>= 1 { func(i int) { From d8ec0fa85c04f69c56982e6517a92dc8d32ea54c Mon Sep 17 00:00:00 2001 From: gobwas Date: Wed, 24 Feb 2016 23:25:01 +0300 Subject: [PATCH 25/26] tune script --- bench.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bench.sh b/bench.sh index e033d89..804cf22 100755 --- a/bench.sh +++ b/bench.sh @@ -12,6 +12,7 @@ bench() { go test ./... -run=NONE -bench=$2 > "${filename}" -benchmem echo "OK" git checkout ${backup} + sleep 5 fi } @@ -20,7 +21,6 @@ to=$1 current=`git rev-parse --abbrev-ref HEAD` bench ${to} $2 -sleep 5 bench ${current} $2 benchcmp $3 "/tmp/${to}-$2.bench" "/tmp/${current}-$2.bench" From 7a245c9846ad6d223d599919dbfd4877e9d91c23 Mon Sep 17 00:00:00 2001 From: gobwas Date: Wed, 24 Feb 2016 23:33:18 +0300 Subject: [PATCH 26/26] update result of benchmarks --- readme.md | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/readme.md b/readme.md index 3e68b52..dcb4b2a 100644 --- a/readme.md +++ b/readme.md @@ -100,18 +100,18 @@ Run `go test -bench=.` from source root to see the benchmarks: Pattern | Fixture | Match | Operations | Speed (ns/op) --------|---------|-------|------------|-------------- -`[a-z][!a-x]*cat*[h][!b]*eyes*` | `my cat has very bright eyes` | `true` | 2000000 | 527 -`[a-z][!a-x]*cat*[h][!b]*eyes*` | `my dog has very bright eyes` | `false` | 10000000 | 229 -`https://*.google.*` | `https://account.google.com` | `true` | 10000000 | 121 -`https://*.google.*` | `https://google.com` | `false` | 20000000 | 68.6 -`{https://*.google.*,*yandex.*,*yahoo.*,*mail.ru}` | `http://yahoo.com` | `true` | 10000000 | 167 -`{https://*.google.*,*yandex.*,*yahoo.*,*mail.ru}` | `http://google.com` | `false` | 10000000 | 198 -`{https://*gobwas.com,http://exclude.gobwas.com}` | `https://safe.gobwas.com` | `true` | 100000000 | 23.9 -`{https://*gobwas.com,http://exclude.gobwas.com}` | `http://safe.gobwas.com` | `false` | 50000000 | 24.7 -`abc*` | `abcdef` | `true` | 200000000 | 8.86 -`abc*` | `af` | `false` | 300000000 | 4.99 -`*def` | `abcdef` | `true` | 200000000 | 9.23 -`*def` | `af` | `false` | 300000000 | 5.44 +`[a-z][!a-x]*cat*[h][!b]*eyes*` | `my cat has very bright eyes` | `true` | 2000000 | 432 +`[a-z][!a-x]*cat*[h][!b]*eyes*` | `my dog has very bright eyes` | `false` | 10000000 | 199 +`https://*.google.*` | `https://account.google.com` | `true` | 10000000 | 96 +`https://*.google.*` | `https://google.com` | `false` | 20000000 | 66 +`{https://*.google.*,*yandex.*,*yahoo.*,*mail.ru}` | `http://yahoo.com` | `true` | 10000000 | 163 +`{https://*.google.*,*yandex.*,*yahoo.*,*mail.ru}` | `http://google.com` | `false` | 10000000 | 197 +`{https://*gobwas.com,http://exclude.gobwas.com}` | `https://safe.gobwas.com` | `true` | 100000000 | 22 +`{https://*gobwas.com,http://exclude.gobwas.com}` | `http://safe.gobwas.com` | `false` | 50000000 | 24 +`abc*` | `abcdef` | `true` | 200000000 | 8.15 +`abc*` | `af` | `false` | 300000000 | 5.68 +`*def` | `abcdef` | `true` | 200000000 | 8.84 +`*def` | `af` | `false` | 300000000 | 5.74 `ab*ef` | `abcdef` | `true` | 100000000 | 15.2 `ab*ef` | `af` | `false` | 100000000 | 10.4