Avoid btree Index() call on pattern with separators.

To avoid hard Index()'ing of given text with btree matcher we implement
an prefix_any and suffix_any matchers that can work well in many cases.

BTree matcher Index() will be implemented in upcoming commits to prevent
same bugs.

Fixes #23
This commit is contained in:
Sergey Kamardin 2018-02-09 00:02:47 +03:00
parent 51eb1ee00b
commit 034ebb20be
8 changed files with 258 additions and 26 deletions

View File

@ -43,37 +43,43 @@ func optimizeMatcher(matcher match.Matcher) match.Matcher {
return m
}
leftNil := m.Left == nil
rightNil := m.Right == nil
var (
leftNil = m.Left == nil
rightNil = m.Right == nil
)
if leftNil && rightNil {
return match.NewText(r.Str)
}
_, leftSuper := m.Left.(match.Super)
lp, leftPrefix := m.Left.(match.Prefix)
la, leftAny := m.Left.(match.Any)
_, rightSuper := m.Right.(match.Super)
rs, rightSuffix := m.Right.(match.Suffix)
ra, rightAny := m.Right.(match.Any)
if leftSuper && rightSuper {
switch {
case leftSuper && rightSuper:
return match.NewContains(r.Str, false)
}
if leftSuper && rightNil {
case leftSuper && rightNil:
return match.NewSuffix(r.Str)
}
if rightSuper && leftNil {
case rightSuper && leftNil:
return match.NewPrefix(r.Str)
}
if leftNil && rightSuffix {
case leftNil && rightSuffix:
return match.NewPrefixSuffix(r.Str, rs.Suffix)
}
if rightNil && leftPrefix {
case rightNil && leftPrefix:
return match.NewPrefixSuffix(lp.Prefix, r.Str)
case rightNil && leftAny:
return match.NewSuffixAny(r.Str, la.Separators)
case leftNil && rightAny:
return match.NewPrefixAny(r.Str, ra.Separators)
}
return m

View File

@ -120,6 +120,16 @@ func TestGlob(t *testing.T) {
glob(true, "/{rate,[0-9][0-9][0-9]}*", "/rate"),
glob(true, "/{rate,[a-z][a-z][a-z]}*", "/usd"),
glob(true, "{*.google.*,*.yandex.*}", "www.google.com", '.'),
glob(true, "{*.google.*,*.yandex.*}", "www.yandex.com", '.'),
glob(false, "{*.google.*,*.yandex.*}", "yandex.com", '.'),
glob(false, "{*.google.*,*.yandex.*}", "google.com", '.'),
glob(true, "{*.google.*,yandex.*}", "www.google.com", '.'),
glob(true, "{*.google.*,yandex.*}", "yandex.com", '.'),
glob(false, "{*.google.*,yandex.*}", "www.yandex.com", '.'),
glob(false, "{*.google.*,yandex.*}", "google.com", '.'),
glob(true, pattern_all, fixture_all_match),
glob(false, pattern_all, fixture_all_mismatch),
@ -149,16 +159,16 @@ func TestGlob(t *testing.T) {
glob(true, pattern_prefix_suffix, fixture_prefix_suffix_match),
glob(false, pattern_prefix_suffix, fixture_prefix_suffix_mismatch),
} {
g, err := Compile(test.pattern, test.delimiters...)
if err != nil {
t.Errorf("parsing pattern %q error: %s", test.pattern, err)
continue
}
t.Run("", func(t *testing.T) {
g := MustCompile(test.pattern, test.delimiters...)
result := g.Match(test.match)
if result != test.should {
t.Errorf("pattern %q matching %q should be %v but got %v\n%s", test.pattern, test.match, test.should, result, g)
t.Errorf(
"pattern %q matching %q should be %v but got %v\n%s",
test.pattern, test.match, test.should, result, g,
)
}
})
}
}

View File

@ -1,8 +1,6 @@
package match
import (
"fmt"
)
import "fmt"
type AnyOf struct {
Matchers Matchers

55
match/prefix_any.go Normal file
View File

@ -0,0 +1,55 @@
package match
import (
"fmt"
"strings"
"unicode/utf8"
sutil "github.com/gobwas/glob/util/strings"
)
type PrefixAny struct {
Prefix string
Separators []rune
}
func NewPrefixAny(s string, sep []rune) PrefixAny {
return PrefixAny{s, sep}
}
func (self PrefixAny) Index(s string) (int, []int) {
idx := strings.Index(s, self.Prefix)
if idx == -1 {
return -1, nil
}
n := len(self.Prefix)
sub := s[idx+n:]
i := sutil.IndexAnyRunes(sub, self.Separators)
if i > -1 {
sub = sub[:i]
}
seg := acquireSegments(len(sub) + 1)
seg = append(seg, n)
for i, r := range sub {
seg = append(seg, n+i+utf8.RuneLen(r))
}
return idx, seg
}
func (self PrefixAny) Len() int {
return lenNo
}
func (self PrefixAny) Match(s string) bool {
if !strings.HasPrefix(s, self.Prefix) {
return false
}
return sutil.IndexAnyRunes(s[len(self.Prefix):], self.Separators) == -1
}
func (self PrefixAny) String() string {
return fmt.Sprintf("<prefix_any:%s![%s]>", self.Prefix, string(self.Separators))
}

47
match/prefix_any_test.go Normal file
View File

@ -0,0 +1,47 @@
package match
import (
"reflect"
"testing"
)
func TestPrefixAnyIndex(t *testing.T) {
for id, test := range []struct {
prefix string
separators []rune
fixture string
index int
segments []int
}{
{
"ab",
[]rune{'.'},
"ab",
0,
[]int{2},
},
{
"ab",
[]rune{'.'},
"abc",
0,
[]int{2, 3},
},
{
"ab",
[]rune{'.'},
"qw.abcd.efg",
3,
[]int{2, 3, 4},
},
} {
p := NewPrefixAny(test.prefix, test.separators)
index, segments := p.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
}
if !reflect.DeepEqual(segments, test.segments) {
t.Errorf("#%d unexpected segments: exp: %v, act: %v", id, test.segments, segments)
}
}
}

43
match/suffix_any.go Normal file
View File

@ -0,0 +1,43 @@
package match
import (
"fmt"
"strings"
sutil "github.com/gobwas/glob/util/strings"
)
type SuffixAny struct {
Suffix string
Separators []rune
}
func NewSuffixAny(s string, sep []rune) SuffixAny {
return SuffixAny{s, sep}
}
func (self SuffixAny) Index(s string) (int, []int) {
idx := strings.Index(s, self.Suffix)
if idx == -1 {
return -1, nil
}
i := sutil.LastIndexAnyRunes(s[:idx], self.Separators) + 1
return i, []int{idx + len(self.Suffix) - i}
}
func (self SuffixAny) Len() int {
return lenNo
}
func (self SuffixAny) Match(s string) bool {
if !strings.HasSuffix(s, self.Suffix) {
return false
}
return sutil.IndexAnyRunes(s[:len(s)-len(self.Suffix)], self.Separators) == -1
}
func (self SuffixAny) String() string {
return fmt.Sprintf("<suffix_any:![%s]%s>", string(self.Separators), self.Suffix)
}

47
match/suffix_any_test.go Normal file
View File

@ -0,0 +1,47 @@
package match
import (
"reflect"
"testing"
)
func TestSuffixAnyIndex(t *testing.T) {
for id, test := range []struct {
suffix string
separators []rune
fixture string
index int
segments []int
}{
{
"ab",
[]rune{'.'},
"ab",
0,
[]int{2},
},
{
"ab",
[]rune{'.'},
"cab",
0,
[]int{3},
},
{
"ab",
[]rune{'.'},
"qw.cdab.efg",
3,
[]int{4},
},
} {
p := NewSuffixAny(test.suffix, test.separators)
index, segments := p.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
}
if !reflect.DeepEqual(segments, test.segments) {
t.Errorf("#%d unexpected segments: exp: %v, act: %v", id, test.segments, segments)
}
}
}

View File

@ -1,6 +1,9 @@
package strings
import "strings"
import (
"strings"
"unicode/utf8"
)
func IndexAnyRunes(s string, rs []rune) int {
for _, r := range rs {
@ -11,3 +14,26 @@ func IndexAnyRunes(s string, rs []rune) int {
return -1
}
func LastIndexAnyRunes(s string, rs []rune) int {
for _, r := range rs {
i := -1
if 0 <= r && r < utf8.RuneSelf {
i = strings.LastIndexByte(s, byte(r))
} else {
sub := s
for len(sub) > 0 {
j := strings.IndexRune(s, r)
if j == -1 {
break
}
i = j
sub = sub[i+1:]
}
}
if i != -1 {
return i
}
}
return -1
}