diff --git a/cmd/globdraw/main.go b/cmd/globdraw/main.go index bc64020..8ba91f1 100644 --- a/cmd/globdraw/main.go +++ b/cmd/globdraw/main.go @@ -9,6 +9,7 @@ import ( "math/rand" "os" "strings" + "unicode/utf8" ) func draw(pattern string, m match.Matcher) string { @@ -60,7 +61,7 @@ func graphviz(m match.Matcher, id string) string { func main() { pattern := flag.String("p", "", "pattern to draw") - sep := flag.String("s", "", "comma separated list of separators") + sep := flag.String("s", "", "comma separated list of separators characters") flag.Parse() if *pattern == "" { @@ -68,7 +69,17 @@ func main() { os.Exit(1) } - glob, err := glob.Compile(*pattern, strings.Split(*sep, ",")...) + var separators []rune + for _, c := range strings.Split(*sep, ",") { + if r, w := utf8.DecodeRuneInString(c); len(c) > w { + fmt.Println("only single charactered separators are allowed") + os.Exit(1) + } else { + separators = append(separators, r) + } + } + + glob, err := glob.Compile(*pattern, separators...) if err != nil { fmt.Println("could not compile pattern:", err) os.Exit(1) diff --git a/cmd/globtest/main.go b/cmd/globtest/main.go index 25c89ca..95c102f 100644 --- a/cmd/globtest/main.go +++ b/cmd/globtest/main.go @@ -7,6 +7,7 @@ import ( "os" "strings" "testing" + "unicode/utf8" ) func benchString(r testing.BenchmarkResult) string { @@ -42,7 +43,16 @@ func main() { os.Exit(1) } - separators := strings.Split(*sep, ",") + var separators []rune + for _, c := range strings.Split(*sep, ",") { + if r, w := utf8.DecodeRuneInString(c); len(c) > w { + fmt.Println("only single charactered separators are allowed") + os.Exit(1) + } else { + separators = append(separators, r) + } + } + g, err := glob.Compile(*pattern, separators...) if err != nil { fmt.Println("could not compile pattern:", err) diff --git a/compiler.go b/compiler.go index d7bcd8d..57b2d4c 100644 --- a/compiler.go +++ b/compiler.go @@ -3,8 +3,8 @@ package glob import ( "fmt" "github.com/gobwas/glob/match" + "github.com/gobwas/glob/runes" "reflect" - "unicode/utf8" ) func optimize(matcher match.Matcher) match.Matcher { @@ -23,8 +23,8 @@ func optimize(matcher match.Matcher) match.Matcher { return m case match.List: - if m.Not == false && utf8.RuneCountInString(m.List) == 1 { - return match.NewText(m.List) + if m.Not == false && len(m.List) == 1 { + return match.NewText(string(m.List)) } return m @@ -172,7 +172,7 @@ func glueAsEvery(matchers []match.Matcher) match.Matcher { separator = sep } - if sep == separator { + if runes.Equal(sep, separator) { continue } @@ -187,7 +187,7 @@ func glueAsEvery(matchers []match.Matcher) match.Matcher { return match.Any{separator} } - if (hasAny || hasSuper) && min > 0 && separator == "" { + if (hasAny || hasSuper) && min > 0 && len(separator) == 0 { return match.Min{min} } @@ -201,8 +201,8 @@ func glueAsEvery(matchers []match.Matcher) match.Matcher { } } - if separator != "" { - every.Add(match.Contains{separator, true}) + if len(separator) > 0 { + every.Add(match.Contains{string(separator), true}) } return every @@ -468,7 +468,7 @@ func compileMatchers(matchers []match.Matcher) (match.Matcher, error) { // return sum * k //} -func doAnyOf(n *nodeAnyOf, s string) (match.Matcher, error) { +func doAnyOf(n *nodeAnyOf, s []rune) (match.Matcher, error) { var matchers []match.Matcher for _, desc := range n.children() { if desc == nil { @@ -532,7 +532,7 @@ func do(leaf node, s []rune) (m match.Matcher, err error) { } case *nodeList: - m = match.List{n.chars, n.not} + m = match.List{[]rune(n.chars), n.not} case *nodeRange: m = match.Range{n.lo, n.hi, n.not} @@ -556,7 +556,7 @@ func do(leaf node, s []rune) (m match.Matcher, err error) { return optimize(m), nil } -func do2(node node, s string) ([]match.Matcher, error) { +func do2(node node, s []rune) ([]match.Matcher, error) { var result []match.Matcher switch n := node.(type) { @@ -631,7 +631,7 @@ func do2(node node, s string) ([]match.Matcher, error) { } case *nodeList: - result = append(result, match.List{n.chars, n.not}) + result = append(result, match.List{[]rune(n.chars), n.not}) case *nodeRange: result = append(result, match.Range{n.lo, n.hi, n.not}) diff --git a/compiler_test.go b/compiler_test.go index 008ddb7..0be7b76 100644 --- a/compiler_test.go +++ b/compiler_test.go @@ -6,7 +6,7 @@ import ( "testing" ) -const separators = "." +var separators = []rune{'.'} func TestGlueMatchers(t *testing.T) { for id, test := range []struct { @@ -27,7 +27,7 @@ func TestGlueMatchers(t *testing.T) { }, match.EveryOf{match.Matchers{ match.Min{1}, - match.Contains{separators, true}, + match.Contains{string(separators), true}, }}, }, { @@ -43,8 +43,8 @@ func TestGlueMatchers(t *testing.T) { }, { []match.Matcher{ - match.List{"a", true}, - match.Any{"a"}, + match.List{[]rune{'a'}, true}, + match.Any{[]rune{'a'}}, }, match.EveryOf{match.Matchers{ match.Min{1}, @@ -101,14 +101,14 @@ func TestCompileMatchers(t *testing.T) { { []match.Matcher{ match.Range{'a', 'c', true}, - match.List{"zte", false}, + match.List{[]rune{'z', 't', 'e'}, false}, match.NewText("c"), match.Single{}, }, match.Row{ Matchers: match.Matchers{ match.Range{'a', 'c', true}, - match.List{"zte", false}, + match.List{[]rune{'z', 't', 'e'}, false}, match.NewText("c"), match.Single{}, }, @@ -136,7 +136,7 @@ func TestConvertMatchers(t *testing.T) { { []match.Matcher{ match.Range{'a', 'c', true}, - match.List{"zte", false}, + match.List{[]rune{'z', 't', 'e'}, false}, match.NewText("c"), match.Single{}, match.Any{}, @@ -145,7 +145,7 @@ func TestConvertMatchers(t *testing.T) { match.Row{ Matchers: match.Matchers{ match.Range{'a', 'c', true}, - match.List{"zte", false}, + match.List{[]rune{'z', 't', 'e'}, false}, match.NewText("c"), match.Single{}, }, @@ -157,7 +157,7 @@ func TestConvertMatchers(t *testing.T) { { []match.Matcher{ match.Range{'a', 'c', true}, - match.List{"zte", false}, + match.List{[]rune{'z', 't', 'e'}, false}, match.NewText("c"), match.Single{}, match.Any{}, @@ -169,7 +169,7 @@ func TestConvertMatchers(t *testing.T) { match.Row{ Matchers: match.Matchers{ match.Range{'a', 'c', true}, - match.List{"zte", false}, + match.List{[]rune{'z', 't', 'e'}, false}, match.NewText("c"), }, RunesLength: 3, @@ -204,7 +204,7 @@ func TestCompiler(t *testing.T) { for id, test := range []struct { ast *nodePattern result Glob - sep string + sep []rune }{ { ast: pattern(&nodeText{text: "abc"}), @@ -241,14 +241,14 @@ func TestCompiler(t *testing.T) { chars: "abc", not: true, }), - result: match.List{"abc", true}, + result: match.List{[]rune{'a', 'b', 'c'}, true}, }, { ast: pattern(&nodeAny{}, &nodeSingle{}, &nodeSingle{}, &nodeSingle{}), sep: separators, result: match.EveryOf{Matchers: match.Matchers{ match.Min{3}, - match.Contains{separators, true}, + match.Contains{string(separators), true}, }}, }, { @@ -349,7 +349,7 @@ func TestCompiler(t *testing.T) { nil, match.AnyOf{Matchers: match.Matchers{ match.Single{}, - match.List{List: "def"}, + match.List{List: []rune{'d', 'e', 'f'}}, match.Nothing{}, }}, ), @@ -390,8 +390,8 @@ func TestCompiler(t *testing.T) { Matchers: match.Matchers{ match.NewText("abc"), match.AnyOf{Matchers: match.Matchers{ - match.List{List: "abc"}, - match.List{List: "def"}, + match.List{List: []rune{'a', 'b', 'c'}}, + match.List{List: []rune{'d', 'e', 'f'}}, }}, match.NewText("ghi"), }, diff --git a/glob.go b/glob.go index dee2ef6..4d1b77b 100644 --- a/glob.go +++ b/glob.go @@ -1,7 +1,5 @@ package glob -import "strings" - // Glob represents compiled glob pattern. type Glob interface { Match(string) bool @@ -48,7 +46,7 @@ func Compile(pattern string, separators ...rune) (Glob, error) { } // MustCompile is the same as Compile, except that if Compile returns error, this will panic -func MustCompile(pattern string, separators ...string) Glob { +func MustCompile(pattern string, separators ...rune) Glob { g, err := Compile(pattern, separators...) if err != nil { panic(err) diff --git a/glob_test.go b/glob_test.go index ce55202..d17e985 100644 --- a/glob_test.go +++ b/glob_test.go @@ -53,10 +53,10 @@ const ( type test struct { pattern, match string should bool - delimiters []string + delimiters []rune } -func glob(s bool, p, m string, d ...string) test { +func glob(s bool, p, m string, d ...rune) test { return test{p, m, s, d} } @@ -68,22 +68,22 @@ func TestGlob(t *testing.T) { glob(true, "a*c", "abc"), glob(true, "a*c", "a12345c"), glob(true, "a?c", "a1c"), - glob(true, "a.b", "a.b", "."), - glob(true, "a.*", "a.b", "."), - glob(true, "a.**", "a.b.c", "."), - glob(true, "a.?.c", "a.b.c", "."), - glob(true, "a.?.?", "a.b.c", "."), + glob(true, "a.b", "a.b", '.'), + glob(true, "a.*", "a.b", '.'), + glob(true, "a.**", "a.b.c", '.'), + glob(true, "a.?.c", "a.b.c", '.'), + glob(true, "a.?.?", "a.b.c", '.'), glob(true, "?at", "cat"), glob(true, "?at", "fat"), glob(true, "*", "abc"), glob(true, `\*`, "*"), - glob(true, "**", "a.b.c", "."), + glob(true, "**", "a.b.c", '.'), glob(false, "?at", "at"), - glob(false, "?at", "fat", "f"), - glob(false, "a.*", "a.b.c", "."), - glob(false, "a.?.c", "a.bb.c", "."), - glob(false, "*", "a.b.c", "."), + glob(false, "?at", "fat", 'f'), + glob(false, "a.*", "a.b.c", '.'), + glob(false, "a.?.c", "a.bb.c", '.'), + glob(false, "*", "a.b.c", '.'), glob(true, "*test", "this is a test"), glob(true, "this*", "this is a test"), diff --git a/match/any.go b/match/any.go index b00db7e..d931eea 100644 --- a/match/any.go +++ b/match/any.go @@ -2,8 +2,7 @@ package match import ( "fmt" - "strings" - "unicode/utf8" + "github.com/gobwas/glob/strings" ) type Any struct { @@ -11,28 +10,25 @@ type Any struct { } func (self Any) Match(s string) bool { - return strings.IndexAny(s, self.Separators) == -1 + return strings.IndexAnyRunes(s, self.Separators) == -1 } -func (self Any) Index(s string) (int, []int) { - var sub string - - found := strings.IndexAny(s, self.Separators) +func (self Any) Index(s string, segments []int) (int, []int) { + found := strings.IndexAnyRunes(s, self.Separators) switch found { case -1: - sub = s case 0: - return 0, []int{0} + segments = append(segments) + return 0, segments default: - sub = s[:found] + s = s[:found] } - segments := make([]int, 0, utf8.RuneCountInString(sub)+1) - for i := range sub { + for i := range s { segments = append(segments, i) } - segments = append(segments, len(sub)) + segments = append(segments, len(s)) return 0, segments } diff --git a/match/any_of.go b/match/any_of.go index 3d14edc..602cd28 100644 --- a/match/any_of.go +++ b/match/any_of.go @@ -23,39 +23,38 @@ func (self AnyOf) Match(s string) bool { return false } -func (self AnyOf) Index(s string) (int, []int) { - if len(self.Matchers) == 0 { - return -1, nil - } - - // segments to merge - var segments [][]int +func (self AnyOf) Index(s string, segments []int) (int, []int) { index := -1 - for _, m := range self.Matchers { - idx, seg := m.Index(s) + in := acquireSegments(len(s)) + idx, seg := m.Index(s, in) if idx == -1 { + releaseSegments(in) continue } if index == -1 || idx < index { index = idx - segments = [][]int{seg} + segments = append(segments[:0], seg...) + releaseSegments(in) continue } if idx > index { + releaseSegments(in) continue } - segments = append(segments, seg) + // here idx == index + segments = appendMerge(segments, seg) + releaseSegments(in) } if index == -1 { return -1, nil } - return index, mergeSegments(segments) + return index, segments } func (self AnyOf) Len() (l int) { diff --git a/match/any_of_test.go b/match/any_of_test.go index 506ddd8..ee3001a 100644 --- a/match/any_of_test.go +++ b/match/any_of_test.go @@ -33,8 +33,8 @@ func TestAnyOfIndex(t *testing.T) { }, { Matchers{ - List{"[def]", false}, - List{"[abc]", false}, + List{[]rune("[def]"), false}, + List{[]rune("[abc]"), false}, }, "abcdef", 0, @@ -42,7 +42,7 @@ func TestAnyOfIndex(t *testing.T) { }, } { everyOf := AnyOf{test.matchers} - index, segments := everyOf.Index(test.fixture) + index, segments := everyOf.Index(test.fixture, []int{}) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } diff --git a/match/any_test.go b/match/any_test.go index c436267..9239ffa 100644 --- a/match/any_test.go +++ b/match/any_test.go @@ -7,38 +7,53 @@ import ( func TestAnyIndex(t *testing.T) { for id, test := range []struct { - sep string + sep []rune fixture string index int segments []int }{ { - ".", + []rune{'.'}, "abc", 0, []int{0, 1, 2, 3}, }, { - ".", + []rune{'.'}, "abc.def", 0, []int{0, 1, 2, 3}, }, } { p := Any{test.sep} - index, segments := p.Index(test.fixture) + index, segments := p.Index(test.fixture, []int{}) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } if !reflect.DeepEqual(segments, test.segments) { t.Errorf("#%d unexpected segments: exp: %v, act: %v", id, test.segments, segments) } + + releaseSegments(segments) } } func BenchmarkIndexAny(b *testing.B) { - p := Any{bench_separators} + m := Any{bench_separators} + + in := acquireSegments(len(bench_pattern)) for i := 0; i < b.N; i++ { - p.Index(bench_pattern) + m.Index(bench_pattern, in[:0]) } } + +func BenchmarkIndexAnyParallel(b *testing.B) { + m := Any{bench_separators} + in := acquireSegments(len(bench_pattern)) + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + m.Index(bench_pattern, in[:0]) + } + }) +} diff --git a/match/btree.go b/match/btree.go index fe314f2..ad8f5f9 100644 --- a/match/btree.go +++ b/match/btree.go @@ -51,7 +51,7 @@ func (self BTree) Len() int { } // todo? -func (self BTree) Index(s string) (int, []int) { +func (self BTree) Index(s string, segments []int) (int, []int) { return -1, nil } @@ -79,8 +79,10 @@ func (self BTree) Match(s string) bool { for offset < limit { // search for matching part in substring - index, segments := self.Value.Index(s[offset:limit]) + in := acquireSegments(limit - offset) + index, segments := self.Value.Index(s[offset:limit], in) if index == -1 { + releaseSegments(in) return false } @@ -112,11 +114,14 @@ func (self BTree) Match(s string) bool { } if right { + releaseSegments(in) return true } } } + releaseSegments(in) + _, step := utf8.DecodeRuneInString(s[offset+index:]) offset += index + step } diff --git a/match/contains.go b/match/contains.go index 23f51b6..8246c92 100644 --- a/match/contains.go +++ b/match/contains.go @@ -3,7 +3,6 @@ package match import ( "fmt" "strings" - "unicode/utf8" ) type Contains struct { @@ -15,11 +14,8 @@ func (self Contains) Match(s string) bool { return strings.Contains(s, self.Needle) != self.Not } -func (self Contains) Index(s string) (int, []int) { - var ( - sub string - offset int - ) +func (self Contains) Index(s string, segments []int) (int, []int) { + var offset int idx := strings.Index(s, self.Needle) @@ -29,27 +25,19 @@ func (self Contains) Index(s string) (int, []int) { } offset = idx + len(self.Needle) - if len(s) <= offset { - return 0, []int{offset} - } - - sub = s[offset:] - } else { - switch idx { - case -1: - sub = s - default: - sub = s[:idx] + return 0, append(segments, offset) } + s = s[offset:] + } else if idx != -1 { + s = s[:idx] } - segments := make([]int, 0, utf8.RuneCountInString(sub)+1) - for i, _ := range sub { + for i, _ := range s { segments = append(segments, offset+i) } - return 0, append(segments, offset+len(sub)) + return 0, append(segments, offset+len(s)) } func (self Contains) Len() int { diff --git a/match/contains_test.go b/match/contains_test.go index b7e66c7..ba9577b 100644 --- a/match/contains_test.go +++ b/match/contains_test.go @@ -43,7 +43,7 @@ func TestContainsIndex(t *testing.T) { }, } { p := Contains{test.prefix, test.not} - index, segments := p.Index(test.fixture) + index, segments := p.Index(test.fixture, []int{}) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } @@ -54,8 +54,21 @@ func TestContainsIndex(t *testing.T) { } func BenchmarkIndexContains(b *testing.B) { - m := Contains{bench_separators, true} + m := Contains{string(bench_separators), true} + + in := acquireSegments(len(bench_pattern)) for i := 0; i < b.N; i++ { - m.Index(bench_pattern) + m.Index(bench_pattern, in[:0]) } } + +func BenchmarkIndexContainsParallel(b *testing.B) { + m := Contains{string(bench_separators), true} + in := acquireSegments(len(bench_pattern)) + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + m.Index(bench_pattern, in[:0]) + } + }) +} diff --git a/match/every_of.go b/match/every_of.go index 5df2fbc..dba0154 100644 --- a/match/every_of.go +++ b/match/every_of.go @@ -25,43 +25,66 @@ func (self EveryOf) Len() (l int) { return } -func (self EveryOf) Index(s string) (int, []int) { +func max(a, b int) int { + if a >= b { + return a + } + + return b +} + +func (self EveryOf) Index(s string, out []int) (int, []int) { var index int var offset int - var segments []int + var current []int sub := s - for _, m := range self.Matchers { - idx, seg := m.Index(sub) + for i, m := range self.Matchers { + in := acquireSegments(len(sub)) + idx, seg := m.Index(sub, in) if idx == -1 { + releaseSegments(in) + if cap(current) > 0 { + releaseSegments(current) + } return -1, nil } - var sum []int - if segments == nil { - sum = seg + next := acquireSegments(max(len(seg), len(current))) + if i == 0 { + next = append(next, seg...) } else { delta := index - (idx + offset) - for _, ex := range segments { + for _, ex := range current { for _, n := range seg { if ex+delta == n { - sum = append(sum, n) + next = append(next, n) } } } } - if len(sum) == 0 { + if cap(current) > 0 { + releaseSegments(current) + } + releaseSegments(in) + + if len(next) == 0 { + releaseSegments(next) return -1, nil } - segments = sum + current = next + index = idx + offset sub = s[index:] offset += idx } - return index, segments + out = append(out, current...) + releaseSegments(current) + + return index, out } func (self EveryOf) Match(s string) bool { diff --git a/match/every_of_test.go b/match/every_of_test.go index c55ef9e..c97ea70 100644 --- a/match/every_of_test.go +++ b/match/every_of_test.go @@ -34,7 +34,7 @@ func TestEveryOfIndex(t *testing.T) { }, } { everyOf := EveryOf{test.matchers} - index, segments := everyOf.Index(test.fixture) + index, segments := everyOf.Index(test.fixture, []int{}) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } diff --git a/match/list.go b/match/list.go index 3d9f0b6..bcc3612 100644 --- a/match/list.go +++ b/match/list.go @@ -2,24 +2,22 @@ package match import ( "fmt" - "strings" + "github.com/gobwas/glob/runes" "unicode/utf8" ) type List struct { - List string + List []rune Not bool } func (self List) Match(s string) bool { - // if s 100% have two symbols - // _, w := utf8.DecodeRuneInString(s) - // if len(s) > w { - if len(s) > 4 { + r, w := utf8.DecodeRuneInString(s) + if len(s) > w { return false } - inList := strings.Index(self.List, s) != -1 + inList := runes.IndexRune(self.List, r) != -1 return inList == !self.Not } @@ -27,10 +25,10 @@ func (self List) Len() int { return lenOne } -func (self List) Index(s string) (int, []int) { +func (self List) Index(s string, segments []int) (int, []int) { for i, r := range s { - if self.Not == (strings.IndexRune(self.List, r) == -1) { - return i, []int{utf8.RuneLen(r)} + if self.Not == (runes.IndexRune(self.List, r) == -1) { + return i, append(segments, utf8.RuneLen(r)) } } diff --git a/match/list_test.go b/match/list_test.go index a772fdf..8fd13a2 100644 --- a/match/list_test.go +++ b/match/list_test.go @@ -7,21 +7,21 @@ import ( func TestListIndex(t *testing.T) { for id, test := range []struct { - list string + list []rune not bool fixture string index int segments []int }{ { - "ab", + []rune("ab"), false, "abc", 0, []int{1}, }, { - "ab", + []rune("ab"), true, "fffabfff", 0, @@ -29,7 +29,7 @@ func TestListIndex(t *testing.T) { }, } { p := List{test.list, test.not} - index, segments := p.Index(test.fixture) + index, segments := p.Index(test.fixture, []int{}) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } @@ -40,8 +40,21 @@ func TestListIndex(t *testing.T) { } func BenchmarkIndexList(b *testing.B) { - m := List{"def", false} + m := List{[]rune("def"), false} + in := acquireSegments(len(bench_pattern)) + for i := 0; i < b.N; i++ { - m.Index(bench_pattern) + m.Index(bench_pattern, in[:0]) } } + +func BenchmarkIndexListParallel(b *testing.B) { + m := List{[]rune("def"), false} + in := acquireSegments(len(bench_pattern)) + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + m.Index(bench_pattern, in[:0]) + } + }) +} diff --git a/match/match.go b/match/match.go index 8d7158c..0a6664b 100644 --- a/match/match.go +++ b/match/match.go @@ -3,6 +3,7 @@ package match import ( "fmt" "strings" + "sync" ) const lenOne = 1 @@ -11,7 +12,7 @@ const lenNo = -1 type Matcher interface { Match(string) bool - Index(string) (int, []int) + Index(string, []int) (int, []int) Len() int String() string } @@ -27,6 +28,58 @@ func (m Matchers) String() string { return fmt.Sprintf("%s", strings.Join(s, ",")) } +var segmentsPools [1024]sync.Pool + +func toPowerOfTwo(v int) int { + v-- + v |= v >> 1 + v |= v >> 2 + v |= v >> 4 + v |= v >> 8 + v |= v >> 16 + v++ + + return v +} + +func init() { + for i := 1024; i >= 1; i >>= 1 { + func(i int) { + segmentsPools[i-1] = sync.Pool{ + New: func() interface{} { + return make([]int, 0, i) + }, + } + }(i) + } +} + +var segmentsPool = sync.Pool{ + New: func() interface{} { + return make([]int, 0, 64) + }, +} + +func getIdx(c int) int { + p := toPowerOfTwo(c) + switch { + case p >= 1024: + return 1023 + case p < 1: + return 0 + default: + return p - 1 + } +} + +func acquireSegments(c int) []int { + return segmentsPools[getIdx(c)].Get().([]int)[:0] +} + +func releaseSegments(s []int) { + segmentsPools[getIdx(cap(s))].Put(s) +} + func appendIfNotAsPrevious(target []int, val int) []int { l := len(target) if l != 0 && target[l-1] == val { @@ -36,16 +89,64 @@ func appendIfNotAsPrevious(target []int, val int) []int { return append(target, val) } -// mergeSegments merges and sorts given already SORTED and UNIQUE segments. -func mergeSegments(segments [][]int) []int { - var current []int - for _, s := range segments { - if current == nil { - current = s - continue +func appendMerge(target, sub []int) []int { + lt, ls := len(target), len(sub) + out := acquireSegments(lt + ls) + + for x, y := 0, 0; x < lt || y < ls; { + if x >= lt { + out = append(out, sub[y:]...) + break } - var next []int + if y >= ls { + out = append(out, target[x:]...) + break + } + + xValue := target[x] + yValue := sub[y] + + switch { + + case xValue == yValue: + out = append(out, xValue) + x++ + y++ + + case xValue < yValue: + out = append(out, xValue) + x++ + + case yValue < xValue: + out = append(out, yValue) + y++ + + } + } + + target = append(target[:0], out...) + releaseSegments(out) + + return target +} + +// mergeSegments merges and sorts given already SORTED and UNIQUE segments. +func mergeSegments(list [][]int, out []int) []int { + var current []int + switch len(list) { + case 0: + return out + case 1: + return list[0] + default: + current = acquireSegments(len(list[0])) + current = append(current, list[0]...) + // releaseSegments(list[0]) + } + + for _, s := range list[1:] { + next := acquireSegments(len(current) + len(s)) for x, y := 0, 0; x < len(current) || y < len(s); { if x >= len(current) { next = append(next, s[y:]...) @@ -78,8 +179,21 @@ func mergeSegments(segments [][]int) []int { } } + releaseSegments(current) current = next } - return current + out = append(out, current...) + releaseSegments(current) + + return out +} + +func reverseSegments(input []int) { + l := len(input) + m := l / 2 + + for i := 0; i < m; i++ { + input[i], input[l-i-1] = input[l-i-1], input[i] + } } diff --git a/match/match_test.go b/match/match_test.go index c3b2985..d60fc7d 100644 --- a/match/match_test.go +++ b/match/match_test.go @@ -5,36 +5,60 @@ import ( "testing" ) -const bench_separators = "." +var bench_separators = []rune{'.'} + const bench_pattern = "abcdefghijklmnopqrstuvwxyz0123456789" -func TestMergeSegments(t *testing.T) { +func TestAppendMerge(t *testing.T) { for id, test := range []struct { - segments [][]int + segments [2][]int exp []int }{ { - [][]int{ + [2][]int{ []int{0, 6, 7}, []int{0, 1, 3}, - []int{2, 4}, }, - []int{0, 1, 2, 3, 4, 6, 7}, + []int{0, 1, 3, 6, 7}, }, { - [][]int{ + [2][]int{ []int{0, 1, 3, 6, 7}, - []int{0, 1, 3}, - []int{2, 4}, - []int{1}, + []int{0, 1, 10}, }, - []int{0, 1, 2, 3, 4, 6, 7}, + []int{0, 1, 3, 6, 7, 10}, }, } { - act := mergeSegments(test.segments) + act := appendMerge(test.segments[0], test.segments[1]) if !reflect.DeepEqual(act, test.exp) { t.Errorf("#%d merge sort segments unexpected:\nact: %v\nexp:%v", id, act, test.exp) continue } } } + +func BenchmarkAppendMerge(b *testing.B) { + s1 := []int{0, 1, 3, 6, 7} + s2 := []int{0, 1, 3} + + for i := 0; i < b.N; i++ { + appendMerge(s1, s2) + } +} + +func BenchmarkAppendMergeParallel(b *testing.B) { + s1 := []int{0, 1, 3, 6, 7} + s2 := []int{0, 1, 3} + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + appendMerge(s1, s2) + } + }) +} + +func BenchmarkReverse(b *testing.B) { + for i := 0; i < b.N; i++ { + reverseSegments([]int{1, 2, 3, 4}) + } +} diff --git a/match/max.go b/match/max.go index af634d3..341264c 100644 --- a/match/max.go +++ b/match/max.go @@ -21,7 +21,7 @@ func (self Max) Match(s string) bool { return true } -func (self Max) Index(s string) (index int, segments []int) { +func (self Max) Index(s string, segments []int) (int, []int) { segments = append(segments, 0) var count int for i, r := range s { diff --git a/match/max_test.go b/match/max_test.go index bbd6de0..2c5cba5 100644 --- a/match/max_test.go +++ b/match/max_test.go @@ -26,7 +26,7 @@ func TestMaxIndex(t *testing.T) { }, } { p := Max{test.limit} - index, segments := p.Index(test.fixture) + index, segments := p.Index(test.fixture, []int{}) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } @@ -38,7 +38,20 @@ func TestMaxIndex(t *testing.T) { func BenchmarkIndexMax(b *testing.B) { m := Max{10} + in := acquireSegments(len(bench_pattern)) + for i := 0; i < b.N; i++ { - m.Index(bench_pattern) + m.Index(bench_pattern, in[:0]) } } + +func BenchmarkIndexMaxParallel(b *testing.B) { + m := Max{10} + in := acquireSegments(len(bench_pattern)) + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + m.Index(bench_pattern, in[:0]) + } + }) +} diff --git a/match/min.go b/match/min.go index cb330b4..9ddfd84 100644 --- a/match/min.go +++ b/match/min.go @@ -21,22 +21,22 @@ func (self Min) Match(s string) bool { return false } -func (self Min) Index(s string) (int, []int) { +func (self Min) Index(s string, segments []int) (int, []int) { var count int + var found bool - c := utf8.RuneCountInString(s) - if c < self.Limit { - return -1, nil - } - - segments := make([]int, 0, c-self.Limit+1) for i, r := range s { count++ if count >= self.Limit { + found = true segments = append(segments, i+utf8.RuneLen(r)) } } + if !found { + return -1, nil + } + return 0, segments } diff --git a/match/min_test.go b/match/min_test.go index c823223..5f9f126 100644 --- a/match/min_test.go +++ b/match/min_test.go @@ -26,7 +26,7 @@ func TestMinIndex(t *testing.T) { }, } { p := Min{test.limit} - index, segments := p.Index(test.fixture) + index, segments := p.Index(test.fixture, []int{}) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } @@ -38,7 +38,20 @@ func TestMinIndex(t *testing.T) { func BenchmarkIndexMin(b *testing.B) { m := Min{10} + in := acquireSegments(len(bench_pattern)) + for i := 0; i < b.N; i++ { - m.Index(bench_pattern) + m.Index(bench_pattern, in[:0]) } } + +func BenchmarkIndexMinParallel(b *testing.B) { + m := Min{10} + in := acquireSegments(len(bench_pattern)) + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + m.Index(bench_pattern, in[:0]) + } + }) +} diff --git a/match/nothing.go b/match/nothing.go index ef5049b..a6b6922 100644 --- a/match/nothing.go +++ b/match/nothing.go @@ -10,8 +10,8 @@ func (self Nothing) Match(s string) bool { return len(s) == 0 } -func (self Nothing) Index(s string) (int, []int) { - return 0, []int{0} +func (self Nothing) Index(s string, segments []int) (int, []int) { + return 0, append(segments, 0) } func (self Nothing) Len() int { diff --git a/match/nothing_test.go b/match/nothing_test.go index 1b96c58..76cb678 100644 --- a/match/nothing_test.go +++ b/match/nothing_test.go @@ -23,7 +23,7 @@ func TestNothingIndex(t *testing.T) { }, } { p := Nothing{} - index, segments := p.Index(test.fixture) + index, segments := p.Index(test.fixture, []int{}) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } @@ -34,8 +34,21 @@ func TestNothingIndex(t *testing.T) { } func BenchmarkIndexNothing(b *testing.B) { - m := Max{10} + m := Nothing{} + in := acquireSegments(len(bench_pattern)) + for i := 0; i < b.N; i++ { - m.Index(bench_pattern) + m.Index(bench_pattern, in[:0]) } } + +func BenchmarkIndexNothingParallel(b *testing.B) { + m := Nothing{} + in := acquireSegments(len(bench_pattern)) + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + m.Index(bench_pattern, in[:0]) + } + }) +} diff --git a/match/prefix.go b/match/prefix.go index bf73ae2..2bb1260 100644 --- a/match/prefix.go +++ b/match/prefix.go @@ -10,7 +10,7 @@ type Prefix struct { Prefix string } -func (self Prefix) Index(s string) (int, []int) { +func (self Prefix) Index(s string, segments []int) (int, []int) { idx := strings.Index(s, self.Prefix) if idx == -1 { return -1, nil @@ -24,7 +24,6 @@ func (self Prefix) Index(s string) (int, []int) { sub = "" } - segments := make([]int, 0, utf8.RuneCountInString(sub)+1) segments = append(segments, length) for i, r := range sub { segments = append(segments, length+i+utf8.RuneLen(r)) diff --git a/match/prefix_suffix.go b/match/prefix_suffix.go index d5166de..4b78107 100644 --- a/match/prefix_suffix.go +++ b/match/prefix_suffix.go @@ -9,17 +9,15 @@ type PrefixSuffix struct { Prefix, Suffix string } -func (self PrefixSuffix) Index(s string) (int, []int) { +func (self PrefixSuffix) Index(s string, segments []int) (int, []int) { prefixIdx := strings.Index(s, self.Prefix) if prefixIdx == -1 { return -1, nil } - var resp []int suffixLen := len(self.Suffix) if suffixLen > 0 { - var segments []int for sub := s[prefixIdx:]; ; { suffixIdx := strings.LastIndex(sub, self.Suffix) if suffixIdx == -1 { @@ -30,20 +28,16 @@ func (self PrefixSuffix) Index(s string) (int, []int) { sub = sub[:suffixIdx] } - segLen := len(segments) - if segLen == 0 { + if len(segments) == 0 { return -1, nil } - resp = make([]int, segLen) - for i, s := range segments { - resp[segLen-i-1] = s - } + reverseSegments(segments) } else { - resp = append(resp, len(s)-prefixIdx) + segments = append(segments, len(s)-prefixIdx) } - return prefixIdx, resp + return prefixIdx, segments } func (self PrefixSuffix) Len() int { diff --git a/match/prefix_suffix_test.go b/match/prefix_suffix_test.go index baf9427..23271c0 100644 --- a/match/prefix_suffix_test.go +++ b/match/prefix_suffix_test.go @@ -36,7 +36,7 @@ func TestPrefixSuffixIndex(t *testing.T) { }, } { p := PrefixSuffix{test.prefix, test.suffix} - index, segments := p.Index(test.fixture) + index, segments := p.Index(test.fixture, []int{}) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } @@ -48,7 +48,20 @@ func TestPrefixSuffixIndex(t *testing.T) { func BenchmarkIndexPrefixSuffix(b *testing.B) { m := PrefixSuffix{"qew", "sqw"} + in := acquireSegments(len(bench_pattern)) + for i := 0; i < b.N; i++ { - m.Index(bench_pattern) + m.Index(bench_pattern, in[:0]) } } + +func BenchmarkIndexPrefixSuffixParallel(b *testing.B) { + m := PrefixSuffix{"qew", "sqw"} + in := acquireSegments(len(bench_pattern)) + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + m.Index(bench_pattern, in[:0]) + } + }) +} diff --git a/match/prefix_test.go b/match/prefix_test.go index 3ee3012..5b38bfd 100644 --- a/match/prefix_test.go +++ b/match/prefix_test.go @@ -26,7 +26,7 @@ func TestPrefixIndex(t *testing.T) { }, } { p := Prefix{test.prefix} - index, segments := p.Index(test.fixture) + index, segments := p.Index(test.fixture, []int{}) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } @@ -38,7 +38,20 @@ func TestPrefixIndex(t *testing.T) { func BenchmarkIndexPrefix(b *testing.B) { m := Prefix{"qew"} + in := acquireSegments(len(bench_pattern)) + for i := 0; i < b.N; i++ { - m.Index(bench_pattern) + m.Index(bench_pattern, in[:0]) } } + +func BenchmarkIndexPrefixParallel(b *testing.B) { + m := Prefix{"qew"} + in := acquireSegments(len(bench_pattern)) + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + m.Index(bench_pattern, in[:0]) + } + }) +} diff --git a/match/range.go b/match/range.go index f8b6f5d..e669870 100644 --- a/match/range.go +++ b/match/range.go @@ -10,6 +10,9 @@ type Range struct { Not bool } +// todo make factory +// todo make range table inside factory + func (self Range) Len() int { return lenOne } @@ -25,10 +28,10 @@ func (self Range) Match(s string) bool { return inRange == !self.Not } -func (self Range) Index(s string) (int, []int) { +func (self Range) Index(s string, segments []int) (int, []int) { for i, r := range s { if self.Not != (r >= self.Lo && r <= self.Hi) { - return i, []int{utf8.RuneLen(r)} + return i, append(segments, utf8.RuneLen(r)) } } diff --git a/match/range_test.go b/match/range_test.go index e55bccd..a7cdef8 100644 --- a/match/range_test.go +++ b/match/range_test.go @@ -36,7 +36,7 @@ func TestRangeIndex(t *testing.T) { }, } { m := Range{test.lo, test.hi, test.not} - index, segments := m.Index(test.fixture) + index, segments := m.Index(test.fixture, []int{}) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } @@ -48,7 +48,20 @@ func TestRangeIndex(t *testing.T) { func BenchmarkIndexRange(b *testing.B) { m := Range{'0', '9', false} + in := acquireSegments(len(bench_pattern)) + for i := 0; i < b.N; i++ { - m.Index(bench_pattern) + m.Index(bench_pattern, in[:0]) } } + +func BenchmarkIndexRangeParallel(b *testing.B) { + m := Range{'0', '9', false} + in := acquireSegments(len(bench_pattern)) + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + m.Index(bench_pattern, in[:0]) + } + }) +} diff --git a/match/row.go b/match/row.go index a079aa4..8b3e2fc 100644 --- a/match/row.go +++ b/match/row.go @@ -52,7 +52,7 @@ func (self Row) Len() (l int) { return self.RunesLength } -func (self Row) Index(s string) (int, []int) { +func (self Row) Index(s string, segments []int) (int, []int) { if !self.lenOk(s) { return -1, nil } @@ -66,7 +66,7 @@ func (self Row) Index(s string) (int, []int) { } if self.matchAll(s[i:]) { - return i, []int{self.RunesLength} + return i, append(segments, self.RunesLength) } } diff --git a/match/row_test.go b/match/row_test.go index 4b59fe0..ff59cff 100644 --- a/match/row_test.go +++ b/match/row_test.go @@ -5,20 +5,6 @@ import ( "testing" ) -func BenchmarkRowIndex(b *testing.B) { - m := Row{ - Matchers: Matchers{ - NewText("abc"), - NewText("def"), - Single{}, - }, - RunesLength: 7, - } - for i := 0; i < b.N; i++ { - m.Index("abcdefghijk") - } -} - func TestRowIndex(t *testing.T) { for id, test := range []struct { matchers Matchers @@ -54,7 +40,7 @@ func TestRowIndex(t *testing.T) { Matchers: test.matchers, RunesLength: test.length, } - index, segments := p.Index(test.fixture) + index, segments := p.Index(test.fixture, []int{}) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } @@ -63,3 +49,37 @@ func TestRowIndex(t *testing.T) { } } } + +func BenchmarkRowIndex(b *testing.B) { + m := Row{ + Matchers: Matchers{ + NewText("abc"), + NewText("def"), + Single{}, + }, + RunesLength: 7, + } + in := acquireSegments(len(bench_pattern)) + + for i := 0; i < b.N; i++ { + m.Index(bench_pattern, in[:0]) + } +} + +func BenchmarkIndexRowParallel(b *testing.B) { + m := Row{ + Matchers: Matchers{ + NewText("abc"), + NewText("def"), + Single{}, + }, + RunesLength: 7, + } + in := acquireSegments(len(bench_pattern)) + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + m.Index(bench_pattern, in[:0]) + } + }) +} diff --git a/match/single.go b/match/single.go index f9cf018..e1ad121 100644 --- a/match/single.go +++ b/match/single.go @@ -2,7 +2,7 @@ package match import ( "fmt" - "strings" + "github.com/gobwas/glob/runes" "unicode/utf8" ) @@ -17,17 +17,17 @@ func (self Single) Match(s string) bool { return false } - return strings.IndexRune(self.Separators, r) == -1 + return runes.IndexRune(self.Separators, r) == -1 } func (self Single) Len() int { return lenOne } -func (self Single) Index(s string) (int, []int) { +func (self Single) Index(s string, segments []int) (int, []int) { for i, r := range s { - if strings.IndexRune(self.Separators, r) == -1 { - return i, []int{utf8.RuneLen(r)} + if runes.IndexRune(self.Separators, r) == -1 { + return i, append(segments, utf8.RuneLen(r)) } } diff --git a/match/single_test.go b/match/single_test.go index 1e9ba71..e1e99ac 100644 --- a/match/single_test.go +++ b/match/single_test.go @@ -7,26 +7,26 @@ import ( func TestSingleIndex(t *testing.T) { for id, test := range []struct { - separators string + separators []rune fixture string index int segments []int }{ { - ".", + []rune{'.'}, ".abc", 1, []int{1}, }, { - ".", + []rune{'.'}, ".", -1, nil, }, } { p := Single{test.separators} - index, segments := p.Index(test.fixture) + index, segments := p.Index(test.fixture, []int{}) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } @@ -38,7 +38,20 @@ func TestSingleIndex(t *testing.T) { func BenchmarkIndexSingle(b *testing.B) { m := Single{bench_separators} + in := acquireSegments(len(bench_pattern)) + for i := 0; i < b.N; i++ { - m.Index(bench_pattern) + m.Index(bench_pattern, in[:0]) } } + +func BenchmarkIndexSingleParallel(b *testing.B) { + m := Single{bench_separators} + in := acquireSegments(len(bench_pattern)) + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + m.Index(bench_pattern, in[:0]) + } + }) +} diff --git a/match/suffix.go b/match/suffix.go index d38d71a..ca825f7 100644 --- a/match/suffix.go +++ b/match/suffix.go @@ -9,13 +9,13 @@ type Suffix struct { Suffix string } -func (self Suffix) Index(s string) (int, []int) { +func (self Suffix) Index(s string, segments []int) (int, []int) { idx := strings.Index(s, self.Suffix) if idx == -1 { return -1, nil } - return 0, []int{idx + len(self.Suffix)} + return 0, append(segments, idx+len(self.Suffix)) } func (self Suffix) Len() int { diff --git a/match/suffix_test.go b/match/suffix_test.go index aca6eef..aeda714 100644 --- a/match/suffix_test.go +++ b/match/suffix_test.go @@ -26,7 +26,7 @@ func TestSuffixIndex(t *testing.T) { }, } { p := Suffix{test.prefix} - index, segments := p.Index(test.fixture) + index, segments := p.Index(test.fixture, []int{}) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } @@ -38,7 +38,20 @@ func TestSuffixIndex(t *testing.T) { func BenchmarkIndexSuffix(b *testing.B) { m := Suffix{"qwe"} + in := acquireSegments(len(bench_pattern)) + for i := 0; i < b.N; i++ { - m.Index(bench_pattern) + m.Index(bench_pattern, in[:0]) } } + +func BenchmarkIndexSuffixParallel(b *testing.B) { + m := Suffix{"qwe"} + in := acquireSegments(len(bench_pattern)) + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + m.Index(bench_pattern, in[:0]) + } + }) +} diff --git a/match/super.go b/match/super.go index 19b718f..27d83de 100644 --- a/match/super.go +++ b/match/super.go @@ -2,7 +2,6 @@ package match import ( "fmt" - "unicode/utf8" ) type Super struct{} @@ -15,12 +14,10 @@ func (self Super) Len() int { return lenNo } -func (self Super) Index(s string) (int, []int) { - segments := make([]int, 0, utf8.RuneCountInString(s)+1) +func (self Super) Index(s string, segments []int) (int, []int) { for i := range s { segments = append(segments, i) } - segments = append(segments, len(s)) return 0, segments diff --git a/match/super_test.go b/match/super_test.go index b649fb1..aa68cfb 100644 --- a/match/super_test.go +++ b/match/super_test.go @@ -23,7 +23,7 @@ func TestSuperIndex(t *testing.T) { }, } { p := Super{} - index, segments := p.Index(test.fixture) + index, segments := p.Index(test.fixture, []int{}) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } @@ -35,7 +35,20 @@ func TestSuperIndex(t *testing.T) { func BenchmarkIndexSuper(b *testing.B) { m := Super{} + in := acquireSegments(len(bench_pattern)) + for i := 0; i < b.N; i++ { - m.Index(bench_pattern) + m.Index(bench_pattern, in[:0]) } } + +func BenchmarkIndexSuperParallel(b *testing.B) { + m := Super{} + in := acquireSegments(len(bench_pattern)) + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + m.Index(bench_pattern, in[:0]) + } + }) +} diff --git a/match/text.go b/match/text.go index 0b22c78..aff5b97 100644 --- a/match/text.go +++ b/match/text.go @@ -29,15 +29,13 @@ func (self Text) Len() int { return self.RunesLength } -func (self Text) Index(s string) (index int, segments []int) { - index = strings.Index(s, self.Str) +func (self Text) Index(s string, segments []int) (int, []int) { + index := strings.Index(s, self.Str) if index == -1 { - return + return -1, nil } - segments = []int{self.BytesLength} - - return + return index, append(segments, self.BytesLength) } func (self Text) String() string { diff --git a/match/text_test.go b/match/text_test.go index b7e1d5a..b5c6964 100644 --- a/match/text_test.go +++ b/match/text_test.go @@ -26,7 +26,7 @@ func TestTextIndex(t *testing.T) { }, } { m := NewText(test.text) - index, segments := m.Index(test.fixture) + index, segments := m.Index(test.fixture, []int{}) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } @@ -38,7 +38,20 @@ func TestTextIndex(t *testing.T) { func BenchmarkIndexText(b *testing.B) { m := NewText("foo") + in := acquireSegments(len(bench_pattern)) + for i := 0; i < b.N; i++ { - m.Index(bench_pattern) + m.Index(bench_pattern, in[:0]) } } + +func BenchmarkIndexTextParallel(b *testing.B) { + m := NewText("foo") + in := acquireSegments(len(bench_pattern)) + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + m.Index(bench_pattern, in[:0]) + } + }) +} diff --git a/runes/runes.go b/runes/runes.go new file mode 100644 index 0000000..8586b16 --- /dev/null +++ b/runes/runes.go @@ -0,0 +1,128 @@ +package runes + +func Index(s, needle []rune) int { + ls, ln := len(s), len(needle) + + switch { + case ln == 0: + return 0 + case ln == 1: + return IndexRune(s, needle[0]) + case ln == ls: + if Equal(s, needle) { + return 0 + } + return -1 + case ln > ls: + return -1 + } + +head: + for i := 0; i < ls && ls-i >= ln; i++ { + for y := 0; y < ln; y++ { + if s[i+y] != needle[y] { + continue head + } + } + + return i + } + + return -1 +} + +func LastIndex(s, needle []rune) int { + ls, ln := len(s), len(needle) + + switch { + case ln == 0: + if ls == 0 { + return 0 + } + return ls + case ln == 1: + return IndexLastRune(s, needle[0]) + case ln == ls: + if Equal(s, needle) { + return 0 + } + return -1 + case ln > ls: + return -1 + } + +head: + for i := ls - 1; i >= 0 && i >= ln; i-- { + for y := ln - 1; y >= 0; y-- { + if s[i-(ln-y-1)] != needle[y] { + continue head + } + } + + return i - ln + 1 + } + + return -1 +} + +// IndexAny returns the index of the first instance of any Unicode code point +// from chars in s, or -1 if no Unicode code point from chars is present in s. +func IndexAny(s, chars []rune) int { + if len(chars) > 0 { + for i, c := range s { + for _, m := range chars { + if c == m { + return i + } + } + } + } + return -1 +} + +func Contains(s, needle []rune) bool { + return Index(s, needle) >= 0 +} + +func IndexRune(s []rune, r rune) int { + for i, c := range s { + if c == r { + return i + } + } + return -1 +} + +func IndexLastRune(s []rune, r rune) int { + for i := len(s) - 1; i >= 0; i-- { + if s[i] == r { + return i + } + } + + return -1 +} + +func Equal(a, b []rune) bool { + if len(a) == len(b) { + for i := 0; i < len(a); i++ { + if a[i] != b[i] { + return false + } + } + + return true + } + + return false +} + +// HasPrefix tests whether the string s begins with prefix. +func HasPrefix(s, prefix []rune) bool { + return len(s) >= len(prefix) && Equal(s[0:len(prefix)], prefix) +} + +// HasSuffix tests whether the string s ends with suffix. +func HasSuffix(s, suffix []rune) bool { + return len(s) >= len(suffix) && Equal(s[len(s)-len(suffix):], suffix) +} diff --git a/runes/runes_test.go b/runes/runes_test.go new file mode 100644 index 0000000..54498eb --- /dev/null +++ b/runes/runes_test.go @@ -0,0 +1,222 @@ +package runes + +import ( + "strings" + "testing" +) + +type indexTest struct { + s []rune + sep []rune + out int +} + +type equalTest struct { + a []rune + b []rune + out bool +} + +func newIndexTest(s, sep string, out int) indexTest { + return indexTest{[]rune(s), []rune(sep), out} +} +func newEqualTest(s, sep string, out bool) equalTest { + return equalTest{[]rune(s), []rune(sep), out} +} + +var dots = "1....2....3....4" + +var indexTests = []indexTest{ + newIndexTest("", "", 0), + newIndexTest("", "a", -1), + newIndexTest("", "foo", -1), + newIndexTest("fo", "foo", -1), + newIndexTest("foo", "foo", 0), + newIndexTest("oofofoofooo", "f", 2), + newIndexTest("oofofoofooo", "foo", 4), + newIndexTest("barfoobarfoo", "foo", 3), + newIndexTest("foo", "", 0), + newIndexTest("foo", "o", 1), + newIndexTest("abcABCabc", "A", 3), + // cases with one byte strings - test special case in Index() + newIndexTest("", "a", -1), + newIndexTest("x", "a", -1), + newIndexTest("x", "x", 0), + newIndexTest("abc", "a", 0), + newIndexTest("abc", "b", 1), + newIndexTest("abc", "c", 2), + newIndexTest("abc", "x", -1), +} + +var lastIndexTests = []indexTest{ + newIndexTest("", "", 0), + newIndexTest("", "a", -1), + newIndexTest("", "foo", -1), + newIndexTest("fo", "foo", -1), + newIndexTest("foo", "foo", 0), + newIndexTest("foo", "f", 0), + newIndexTest("oofofoofooo", "f", 7), + newIndexTest("oofofoofooo", "foo", 7), + newIndexTest("barfoobarfoo", "foo", 9), + newIndexTest("foo", "", 3), + newIndexTest("foo", "o", 2), + newIndexTest("abcABCabc", "A", 3), + newIndexTest("abcABCabc", "a", 6), +} + +var indexAnyTests = []indexTest{ + newIndexTest("", "", -1), + newIndexTest("", "a", -1), + newIndexTest("", "abc", -1), + newIndexTest("a", "", -1), + newIndexTest("a", "a", 0), + newIndexTest("aaa", "a", 0), + newIndexTest("abc", "xyz", -1), + newIndexTest("abc", "xcz", 2), + newIndexTest("a☺b☻c☹d", "uvw☻xyz", 3), + newIndexTest("aRegExp*", ".(|)*+?^$[]", 7), + newIndexTest(dots+dots+dots, " ", -1), +} + +// Execute f on each test case. funcName should be the name of f; it's used +// in failure reports. +func runIndexTests(t *testing.T, f func(s, sep []rune) int, funcName string, testCases []indexTest) { + for _, test := range testCases { + actual := f(test.s, test.sep) + if actual != test.out { + t.Errorf("%s(%q,%q) = %v; want %v", funcName, test.s, test.sep, actual, test.out) + } + } +} + +func TestIndex(t *testing.T) { runIndexTests(t, Index, "Index", indexTests) } +func TestLastIndex(t *testing.T) { runIndexTests(t, LastIndex, "LastIndex", lastIndexTests) } +func TestIndexAny(t *testing.T) { runIndexTests(t, IndexAny, "IndexAny", indexAnyTests) } + +var equalTests = []equalTest{ + newEqualTest("a", "a", true), + newEqualTest("a", "b", false), + newEqualTest("a☺b☻c☹d", "uvw☻xyz", false), + newEqualTest("a☺b☻c☹d", "a☺b☻c☹d", true), +} + +func TestEqual(t *testing.T) { + for _, test := range equalTests { + actual := Equal(test.a, test.b) + if actual != test.out { + t.Errorf("Equal(%q,%q) = %v; want %v", test.a, test.b, actual, test.out) + } + } +} + +func BenchmarkLastIndexRunes(b *testing.B) { + r := []rune("abcdef") + n := []rune("cd") + + for i := 0; i < b.N; i++ { + LastIndex(r, n) + } +} +func BenchmarkLastIndexStrings(b *testing.B) { + r := "abcdef" + n := "cd" + + for i := 0; i < b.N; i++ { + strings.LastIndex(r, n) + } +} + +func BenchmarkIndexAnyRunes(b *testing.B) { + s := []rune("...b...") + c := []rune("abc") + + for i := 0; i < b.N; i++ { + IndexAny(s, c) + } +} +func BenchmarkIndexAnyStrings(b *testing.B) { + s := "...b..." + c := "abc" + + for i := 0; i < b.N; i++ { + strings.IndexAny(s, c) + } +} + +func BenchmarkIndexRuneRunes(b *testing.B) { + s := []rune("...b...") + r := 'b' + + for i := 0; i < b.N; i++ { + IndexRune(s, r) + } +} +func BenchmarkIndexRuneStrings(b *testing.B) { + s := "...b..." + r := 'b' + + for i := 0; i < b.N; i++ { + strings.IndexRune(s, r) + } +} + +func BenchmarkIndexRunes(b *testing.B) { + r := []rune("abcdef") + n := []rune("cd") + + for i := 0; i < b.N; i++ { + Index(r, n) + } +} +func BenchmarkIndexStrings(b *testing.B) { + r := "abcdef" + n := "cd" + + for i := 0; i < b.N; i++ { + strings.Index(r, n) + } +} + +func BenchmarkEqualRunes(b *testing.B) { + x := []rune("abc") + y := []rune("abc") + + for i := 0; i < b.N; i++ { + if Equal(x, y) { + continue + } + } +} + +func BenchmarkEqualStrings(b *testing.B) { + x := "abc" + y := "abc" + + for i := 0; i < b.N; i++ { + if x == y { + continue + } + } +} + +func BenchmarkNotEqualRunes(b *testing.B) { + x := []rune("abc") + y := []rune("abcd") + + for i := 0; i < b.N; i++ { + if Equal(x, y) { + continue + } + } +} + +func BenchmarkNotEqualStrings(b *testing.B) { + x := "abc" + y := "abcd" + + for i := 0; i < b.N; i++ { + if x == y { + continue + } + } +} diff --git a/strings/strings.go b/strings/strings.go new file mode 100644 index 0000000..1be48f7 --- /dev/null +++ b/strings/strings.go @@ -0,0 +1,13 @@ +package strings + +import "strings" + +func IndexAnyRunes(s string, rs []rune) int { + for _, r := range rs { + if i := strings.IndexRune(s, r); i != -1 { + return i + } + } + + return -1 +} diff --git a/todo.txt b/todo.txt index 02bfc16..63fac05 100644 --- a/todo.txt +++ b/todo.txt @@ -11,17 +11,22 @@ BenchmarkPlainGlobMatch-4 7.20 154 +20 BenchmarkPrefixGlobMatch-4 8.75 113 +1191.43% BenchmarkSuffixGlobMatch-4 9.07 115 +1167.92% BenchmarkPrefixSuffixGlobMatch-4 15.1 125 +727.81% -BenchmarkIndexAny-4 887 255 -71.25% -BenchmarkIndexContains-4 492 247 -49.80% -BenchmarkIndexList-4 151 51.1 -66.16% -BenchmarkIndexMax-4 442 92.4 -79.10% -BenchmarkIndexMin-4 516 161 -68.80% -BenchmarkIndexNothing-4 452 92.8 -79.47% -BenchmarkIndexPrefixSuffix-4 84.3 57.2 -32.15% + + + + + BenchmarkIndexPrefix-4 85.1 55.9 -34.31% -BenchmarkIndexRange-4 170 60.6 -64.35% -BenchmarkRowIndex-4 172 94.0 -45.35% -BenchmarkIndexSingle-4 61.0 35.8 -41.31% +BenchmarkIndexRange-4 170(143) 60.6 -64.35% +BenchmarkRowIndex-4 172(128) 94.0 -45.35% +BenchmarkIndexSingle-4 61.0(16) 35.8 -41.31% BenchmarkIndexSuffix-4 84.8 55.7 -34.32% -BenchmarkIndexSuper-4 461 192 -58.35% +BenchmarkIndexSuper-4 461(180) 192 -58.35% BenchmarkIndexText-4 84.6 54.4 -35.70% +BenchmarkIndexPrefixSuffix-4 84.3 57.2 -32.15% +BenchmarkIndexNothing-4 452(3.31) 92.8 -79.47% XXX +BenchmarkIndexMin-4 516(274) 161 -68.80% +BenchmarkIndexMax-4 442(88) 92.4 -79.10% +BenchmarkIndexList-4 151(41) 51.1 -66.16% +BenchmarkIndexContains-4 492(220) 247 -49.80% +BenchmarkIndexAny-4 887(222) 255 -71.25%