forked from mirror/glob
Avoid btree Index() call on pattern with separators.
To avoid hard Index()'ing of given text with btree matcher we implement an prefix_any and suffix_any matchers that can work well in many cases. BTree matcher Index() will be implemented in upcoming commits to prevent same bugs. Fixes #23
This commit is contained in:
parent
51eb1ee00b
commit
034ebb20be
|
@ -43,37 +43,43 @@ func optimizeMatcher(matcher match.Matcher) match.Matcher {
|
|||
return m
|
||||
}
|
||||
|
||||
leftNil := m.Left == nil
|
||||
rightNil := m.Right == nil
|
||||
|
||||
var (
|
||||
leftNil = m.Left == nil
|
||||
rightNil = m.Right == nil
|
||||
)
|
||||
if leftNil && rightNil {
|
||||
return match.NewText(r.Str)
|
||||
}
|
||||
|
||||
_, leftSuper := m.Left.(match.Super)
|
||||
lp, leftPrefix := m.Left.(match.Prefix)
|
||||
la, leftAny := m.Left.(match.Any)
|
||||
|
||||
_, rightSuper := m.Right.(match.Super)
|
||||
rs, rightSuffix := m.Right.(match.Suffix)
|
||||
ra, rightAny := m.Right.(match.Any)
|
||||
|
||||
if leftSuper && rightSuper {
|
||||
switch {
|
||||
case leftSuper && rightSuper:
|
||||
return match.NewContains(r.Str, false)
|
||||
}
|
||||
|
||||
if leftSuper && rightNil {
|
||||
case leftSuper && rightNil:
|
||||
return match.NewSuffix(r.Str)
|
||||
}
|
||||
|
||||
if rightSuper && leftNil {
|
||||
case rightSuper && leftNil:
|
||||
return match.NewPrefix(r.Str)
|
||||
}
|
||||
|
||||
if leftNil && rightSuffix {
|
||||
case leftNil && rightSuffix:
|
||||
return match.NewPrefixSuffix(r.Str, rs.Suffix)
|
||||
}
|
||||
|
||||
if rightNil && leftPrefix {
|
||||
case rightNil && leftPrefix:
|
||||
return match.NewPrefixSuffix(lp.Prefix, r.Str)
|
||||
|
||||
case rightNil && leftAny:
|
||||
return match.NewSuffixAny(r.Str, la.Separators)
|
||||
|
||||
case leftNil && rightAny:
|
||||
return match.NewPrefixAny(r.Str, ra.Separators)
|
||||
}
|
||||
|
||||
return m
|
||||
|
|
24
glob_test.go
24
glob_test.go
|
@ -120,6 +120,16 @@ func TestGlob(t *testing.T) {
|
|||
glob(true, "/{rate,[0-9][0-9][0-9]}*", "/rate"),
|
||||
glob(true, "/{rate,[a-z][a-z][a-z]}*", "/usd"),
|
||||
|
||||
glob(true, "{*.google.*,*.yandex.*}", "www.google.com", '.'),
|
||||
glob(true, "{*.google.*,*.yandex.*}", "www.yandex.com", '.'),
|
||||
glob(false, "{*.google.*,*.yandex.*}", "yandex.com", '.'),
|
||||
glob(false, "{*.google.*,*.yandex.*}", "google.com", '.'),
|
||||
|
||||
glob(true, "{*.google.*,yandex.*}", "www.google.com", '.'),
|
||||
glob(true, "{*.google.*,yandex.*}", "yandex.com", '.'),
|
||||
glob(false, "{*.google.*,yandex.*}", "www.yandex.com", '.'),
|
||||
glob(false, "{*.google.*,yandex.*}", "google.com", '.'),
|
||||
|
||||
glob(true, pattern_all, fixture_all_match),
|
||||
glob(false, pattern_all, fixture_all_mismatch),
|
||||
|
||||
|
@ -149,16 +159,16 @@ func TestGlob(t *testing.T) {
|
|||
glob(true, pattern_prefix_suffix, fixture_prefix_suffix_match),
|
||||
glob(false, pattern_prefix_suffix, fixture_prefix_suffix_mismatch),
|
||||
} {
|
||||
g, err := Compile(test.pattern, test.delimiters...)
|
||||
if err != nil {
|
||||
t.Errorf("parsing pattern %q error: %s", test.pattern, err)
|
||||
continue
|
||||
}
|
||||
|
||||
t.Run("", func(t *testing.T) {
|
||||
g := MustCompile(test.pattern, test.delimiters...)
|
||||
result := g.Match(test.match)
|
||||
if result != test.should {
|
||||
t.Errorf("pattern %q matching %q should be %v but got %v\n%s", test.pattern, test.match, test.should, result, g)
|
||||
t.Errorf(
|
||||
"pattern %q matching %q should be %v but got %v\n%s",
|
||||
test.pattern, test.match, test.should, result, g,
|
||||
)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,8 +1,6 @@
|
|||
package match
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
import "fmt"
|
||||
|
||||
type AnyOf struct {
|
||||
Matchers Matchers
|
||||
|
|
|
@ -0,0 +1,55 @@
|
|||
package match
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
sutil "github.com/gobwas/glob/util/strings"
|
||||
)
|
||||
|
||||
type PrefixAny struct {
|
||||
Prefix string
|
||||
Separators []rune
|
||||
}
|
||||
|
||||
func NewPrefixAny(s string, sep []rune) PrefixAny {
|
||||
return PrefixAny{s, sep}
|
||||
}
|
||||
|
||||
func (self PrefixAny) Index(s string) (int, []int) {
|
||||
idx := strings.Index(s, self.Prefix)
|
||||
if idx == -1 {
|
||||
return -1, nil
|
||||
}
|
||||
|
||||
n := len(self.Prefix)
|
||||
sub := s[idx+n:]
|
||||
i := sutil.IndexAnyRunes(sub, self.Separators)
|
||||
if i > -1 {
|
||||
sub = sub[:i]
|
||||
}
|
||||
|
||||
seg := acquireSegments(len(sub) + 1)
|
||||
seg = append(seg, n)
|
||||
for i, r := range sub {
|
||||
seg = append(seg, n+i+utf8.RuneLen(r))
|
||||
}
|
||||
|
||||
return idx, seg
|
||||
}
|
||||
|
||||
func (self PrefixAny) Len() int {
|
||||
return lenNo
|
||||
}
|
||||
|
||||
func (self PrefixAny) Match(s string) bool {
|
||||
if !strings.HasPrefix(s, self.Prefix) {
|
||||
return false
|
||||
}
|
||||
return sutil.IndexAnyRunes(s[len(self.Prefix):], self.Separators) == -1
|
||||
}
|
||||
|
||||
func (self PrefixAny) String() string {
|
||||
return fmt.Sprintf("<prefix_any:%s![%s]>", self.Prefix, string(self.Separators))
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
package match
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestPrefixAnyIndex(t *testing.T) {
|
||||
for id, test := range []struct {
|
||||
prefix string
|
||||
separators []rune
|
||||
fixture string
|
||||
index int
|
||||
segments []int
|
||||
}{
|
||||
{
|
||||
"ab",
|
||||
[]rune{'.'},
|
||||
"ab",
|
||||
0,
|
||||
[]int{2},
|
||||
},
|
||||
{
|
||||
"ab",
|
||||
[]rune{'.'},
|
||||
"abc",
|
||||
0,
|
||||
[]int{2, 3},
|
||||
},
|
||||
{
|
||||
"ab",
|
||||
[]rune{'.'},
|
||||
"qw.abcd.efg",
|
||||
3,
|
||||
[]int{2, 3, 4},
|
||||
},
|
||||
} {
|
||||
p := NewPrefixAny(test.prefix, test.separators)
|
||||
index, segments := p.Index(test.fixture)
|
||||
if index != test.index {
|
||||
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
|
||||
}
|
||||
if !reflect.DeepEqual(segments, test.segments) {
|
||||
t.Errorf("#%d unexpected segments: exp: %v, act: %v", id, test.segments, segments)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
package match
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
sutil "github.com/gobwas/glob/util/strings"
|
||||
)
|
||||
|
||||
type SuffixAny struct {
|
||||
Suffix string
|
||||
Separators []rune
|
||||
}
|
||||
|
||||
func NewSuffixAny(s string, sep []rune) SuffixAny {
|
||||
return SuffixAny{s, sep}
|
||||
}
|
||||
|
||||
func (self SuffixAny) Index(s string) (int, []int) {
|
||||
idx := strings.Index(s, self.Suffix)
|
||||
if idx == -1 {
|
||||
return -1, nil
|
||||
}
|
||||
|
||||
i := sutil.LastIndexAnyRunes(s[:idx], self.Separators) + 1
|
||||
|
||||
return i, []int{idx + len(self.Suffix) - i}
|
||||
}
|
||||
|
||||
func (self SuffixAny) Len() int {
|
||||
return lenNo
|
||||
}
|
||||
|
||||
func (self SuffixAny) Match(s string) bool {
|
||||
if !strings.HasSuffix(s, self.Suffix) {
|
||||
return false
|
||||
}
|
||||
return sutil.IndexAnyRunes(s[:len(s)-len(self.Suffix)], self.Separators) == -1
|
||||
}
|
||||
|
||||
func (self SuffixAny) String() string {
|
||||
return fmt.Sprintf("<suffix_any:![%s]%s>", string(self.Separators), self.Suffix)
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
package match
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestSuffixAnyIndex(t *testing.T) {
|
||||
for id, test := range []struct {
|
||||
suffix string
|
||||
separators []rune
|
||||
fixture string
|
||||
index int
|
||||
segments []int
|
||||
}{
|
||||
{
|
||||
"ab",
|
||||
[]rune{'.'},
|
||||
"ab",
|
||||
0,
|
||||
[]int{2},
|
||||
},
|
||||
{
|
||||
"ab",
|
||||
[]rune{'.'},
|
||||
"cab",
|
||||
0,
|
||||
[]int{3},
|
||||
},
|
||||
{
|
||||
"ab",
|
||||
[]rune{'.'},
|
||||
"qw.cdab.efg",
|
||||
3,
|
||||
[]int{4},
|
||||
},
|
||||
} {
|
||||
p := NewSuffixAny(test.suffix, test.separators)
|
||||
index, segments := p.Index(test.fixture)
|
||||
if index != test.index {
|
||||
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
|
||||
}
|
||||
if !reflect.DeepEqual(segments, test.segments) {
|
||||
t.Errorf("#%d unexpected segments: exp: %v, act: %v", id, test.segments, segments)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,6 +1,9 @@
|
|||
package strings
|
||||
|
||||
import "strings"
|
||||
import (
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
func IndexAnyRunes(s string, rs []rune) int {
|
||||
for _, r := range rs {
|
||||
|
@ -11,3 +14,26 @@ func IndexAnyRunes(s string, rs []rune) int {
|
|||
|
||||
return -1
|
||||
}
|
||||
|
||||
func LastIndexAnyRunes(s string, rs []rune) int {
|
||||
for _, r := range rs {
|
||||
i := -1
|
||||
if 0 <= r && r < utf8.RuneSelf {
|
||||
i = strings.LastIndexByte(s, byte(r))
|
||||
} else {
|
||||
sub := s
|
||||
for len(sub) > 0 {
|
||||
j := strings.IndexRune(s, r)
|
||||
if j == -1 {
|
||||
break
|
||||
}
|
||||
i = j
|
||||
sub = sub[i+1:]
|
||||
}
|
||||
}
|
||||
if i != -1 {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue