This commit is contained in:
gobwas 2016-01-09 02:34:41 +03:00
parent 76b6c27015
commit 66fc4deeeb
22 changed files with 471 additions and 119 deletions

View File

@ -62,6 +62,52 @@ func optimize(matcher match.Matcher) match.Matcher {
}
func glueMatchers(matchers []match.Matcher) match.Matcher {
var (
glued []match.Matcher
winner match.Matcher
)
maxLen := -1
if m := glueAsEvery(matchers); m != nil {
glued = append(glued, m)
return m
}
if m := glueAsRow(matchers); m != nil {
glued = append(glued, m)
return m
}
for _, g := range glued {
if l := g.Len(); l > maxLen {
maxLen = l
winner = g
}
}
return winner
}
func glueAsRow(matchers []match.Matcher) match.Matcher {
switch len(matchers) {
case 0:
return nil
case 1:
return matchers[0]
}
row := match.Row{}
for _, matcher := range matchers {
err := row.Add(matcher)
if err != nil {
return nil
}
}
return row
}
func glueAsEvery(matchers []match.Matcher) match.Matcher {
switch len(matchers) {
case 0:
return nil
@ -147,7 +193,28 @@ func glueMatchers(matchers []match.Matcher) match.Matcher {
return every
}
func convertMatchers(matchers []match.Matcher) (match.Matcher, error) {
func convertMatchers(matchers []match.Matcher, result []match.Matcher) []match.Matcher {
var (
buf []match.Matcher
done match.Matcher
)
for idx, m := range matchers {
buf = append(buf, m)
if g := glueMatchers(buf); g != nil {
done = g
} else {
return convertMatchers(matchers[idx:], append(result, done))
}
}
if done != nil {
return append(result, done)
}
return result
}
func compileMatchers(matchers []match.Matcher) (match.Matcher, error) {
if m := glueMatchers(matchers); m != nil {
return m, nil
}
@ -156,14 +223,14 @@ func convertMatchers(matchers []match.Matcher) (match.Matcher, error) {
val match.Primitive
idx int
)
maxLen := -1
for i, matcher := range matchers {
if p, ok := matcher.(match.Primitive); ok {
l := p.Len()
if l >= maxLen {
maxLen = l
idx = i
val = p
if _, ok := matcher.(match.Raw); ok {
break
}
}
}
@ -181,7 +248,7 @@ func convertMatchers(matchers []match.Matcher) (match.Matcher, error) {
tree := match.BTree{Value: val}
if len(left) > 0 {
l, err := convertMatchers(left)
l, err := compileMatchers(left)
if err != nil {
return nil, err
}
@ -190,7 +257,7 @@ func convertMatchers(matchers []match.Matcher) (match.Matcher, error) {
}
if len(right) > 0 {
r, err := convertMatchers(right)
r, err := compileMatchers(right)
if err != nil {
return nil, err
}
@ -217,7 +284,7 @@ func do(node node, s string) (m match.Matcher, err error) {
if _, ok := node.(*nodeAnyOf); ok {
m = match.AnyOf{matchers}
} else {
m, err = convertMatchers(matchers)
m, err = compileMatchers(convertMatchers(matchers, nil))
if err != nil {
return nil, err
}

View File

@ -52,7 +52,7 @@ func TestGlueMatchers(t *testing.T) {
}},
},
} {
act, err := convertMatchers(test.in)
act, err := compileMatchers(test.in)
if err != nil {
t.Errorf("#%d convert matchers error: %s", id, err)
continue
@ -65,7 +65,7 @@ func TestGlueMatchers(t *testing.T) {
}
}
func TestConvertMatchers(t *testing.T) {
func TestCompileMatchers(t *testing.T) {
for id, test := range []struct {
in []match.Matcher
exp match.Matcher
@ -96,8 +96,22 @@ func TestConvertMatchers(t *testing.T) {
Right: match.Any{},
},
},
{
[]match.Matcher{
match.Range{'a', 'c', true},
match.List{"zte", false},
match.Raw{"c"},
match.Single{},
},
match.Row{Matchers: match.Matchers{
match.Range{'a', 'c', true},
match.List{"zte", false},
match.Raw{"c"},
match.Single{},
}},
},
} {
act, err := convertMatchers(test.in)
act, err := compileMatchers(test.in)
if err != nil {
t.Errorf("#%d convert matchers error: %s", id, err)
continue
@ -110,6 +124,58 @@ func TestConvertMatchers(t *testing.T) {
}
}
func TestConvertMatchers2(t *testing.T) {
for id, test := range []struct {
in, exp []match.Matcher
}{
{
[]match.Matcher{
match.Range{'a', 'c', true},
match.List{"zte", false},
match.Raw{"c"},
match.Single{},
match.Any{},
},
[]match.Matcher{
match.Row{Matchers: match.Matchers{
match.Range{'a', 'c', true},
match.List{"zte", false},
match.Raw{"c"},
match.Single{},
}},
match.Any{},
},
},
{
[]match.Matcher{
match.Range{'a', 'c', true},
match.List{"zte", false},
match.Raw{"c"},
match.Single{},
match.Any{},
match.Single{},
match.Single{},
match.Any{},
},
[]match.Matcher{
match.Row{Matchers: match.Matchers{
match.Range{'a', 'c', true},
match.List{"zte", false},
match.Raw{"c"},
match.Single{},
}},
match.Min{2},
},
},
} {
act := convertMatchers(test.in, nil)
if !reflect.DeepEqual(act, test.exp) {
t.Errorf("#%d unexpected convert matchers 2 result:\nact: %s;\nexp: %s", id, act, test.exp)
continue
}
}
}
func pattern(nodes ...node) *nodePattern {
return &nodePattern{
nodeImpl: nodeImpl{
@ -184,20 +250,22 @@ func TestCompiler(t *testing.T) {
sep: separators,
result: match.BTree{
Left: match.Any{separators},
Value: match.Raw{"abc"},
Right: match.Single{separators},
Value: match.Row{Matchers: match.Matchers{
match.Raw{"abc"},
match.Single{separators},
}},
},
},
{
ast: pattern(&nodeSuper{}, &nodeSingle{}, &nodeText{text: "abc"}, &nodeSingle{}),
sep: separators,
result: match.BTree{
Left: match.BTree{
Left: match.Super{},
Value: match.Single{separators},
},
Value: match.Raw{"abc"},
Right: match.Single{separators},
Value: match.Row{Matchers: match.Matchers{
match.Single{separators},
match.Raw{"abc"},
match.Single{separators},
}},
},
},
{
@ -243,6 +311,20 @@ func TestCompiler(t *testing.T) {
}},
}},
},
{
ast: pattern(
&nodeRange{lo: 'a', hi: 'z'},
&nodeRange{lo: 'a', hi: 'x', not: true},
&nodeAny{},
),
result: match.BTree{
Value: match.Row{Matchers: match.Matchers{
match.Range{Lo: 'a', Hi: 'z'},
match.Range{Lo: 'a', Hi: 'x', Not: true},
}},
Right: match.Super{},
},
},
} {
prog, err := compile(test.ast, test.sep)
if err != nil {

View File

@ -16,6 +16,9 @@ const (
pattern_multiple = "https://*.google.*"
fixture_multiple = "https://account.google.com"
pattern_alternatives = "{https://*.google.*,*yahoo.*}"
fixture_alternatives = "http://yahoo.com"
pattern_prefix = "abc*"
pattern_suffix = "*def"
pattern_prefix_suffix = "ab*ef"
@ -39,7 +42,7 @@ func TestCompilePattern(t *testing.T) {
exp match.Matcher
}{
// {
// pattern: "[!a]*****",
// pattern: "{http://*yandex.ru,b}",
// exp: match.Raw{"t"},
// },
} {
@ -104,6 +107,8 @@ func TestIndexByteNonEscaped(t *testing.T) {
func TestGlob(t *testing.T) {
for _, test := range []test{
glob(true, "* ?at * eyes", "my cat has very bright eyes"),
glob(true, "abc", "abc"),
glob(true, "a*c", "abc"),
glob(true, "a*c", "a12345c"),
@ -119,8 +124,6 @@ func TestGlob(t *testing.T) {
glob(true, `\*`, "*"),
glob(true, "**", "a.b.c", "."),
glob(true, "* ?at * eyes", "my cat has very bright eyes"),
glob(false, "?at", "at"),
glob(false, "?at", "fat", "f"),
glob(false, "a.*", "a.b.c", "."),
@ -138,15 +141,16 @@ func TestGlob(t *testing.T) {
glob(false, "*no*", "this is a test"),
glob(true, "[!a]*", "this is a test3"),
// glob(true, "*abc", "abcabc"),
glob(true, "*abc", "abcabc"),
glob(true, "**abc", "abcabc"),
// glob(true, "???", "abc"),
// glob(true, "?*?", "abc"),
// glob(true, "?*?", "ac"),
glob(true, "???", "abc"),
glob(true, "?*?", "abc"),
glob(true, "?*?", "ac"),
glob(true, pattern_all, fixture_all),
glob(true, pattern_plain, fixture_plain),
glob(true, pattern_multiple, fixture_multiple),
glob(true, pattern_alternatives, fixture_alternatives),
glob(true, pattern_prefix, fixture_prefix_suffix),
glob(true, pattern_suffix, fixture_prefix_suffix),
glob(true, pattern_prefix_suffix, fixture_prefix_suffix),
@ -172,6 +176,8 @@ func BenchmarkParse(b *testing.B) {
func BenchmarkAll(b *testing.B) {
m, _ := Compile(pattern_all)
// fmt.Println("tree all:")
// fmt.Println(m)
for i := 0; i < b.N; i++ {
_ = m.Match(fixture_all)
@ -185,6 +191,13 @@ func BenchmarkMultiple(b *testing.B) {
_ = m.Match(fixture_multiple)
}
}
func BenchmarkAlternatives(b *testing.B) {
m, _ := Compile(pattern_alternatives)
for i := 0; i < b.N; i++ {
_ = m.Match(fixture_alternatives)
}
}
func BenchmarkPlain(b *testing.B) {
m, _ := Compile(pattern_plain)
@ -213,3 +226,11 @@ func BenchmarkPrefixSuffix(b *testing.B) {
_ = m.Match(fixture_prefix_suffix)
}
}
//BenchmarkParse-8 500000 2235 ns/op
//BenchmarkAll-8 20000000 73.1 ns/op
//BenchmarkMultiple-8 10000000 130 ns/op
//BenchmarkPlain-8 200000000 6.70 ns/op
//BenchmarkPrefix-8 200000000 8.36 ns/op
//BenchmarkSuffix-8 200000000 8.35 ns/op
//BenchmarkPrefixSuffix-8 100000000 13.6 ns/op

View File

@ -13,19 +13,15 @@ func (self Any) Match(s string) bool {
return strings.IndexAny(s, self.Separators) == -1
}
func (self Any) Index(s string) (index, min, max int) {
func (self Any) Index(s string) (index int, segments []int) {
index = -1
for i, r := range []rune(s) {
for i, r := range s {
if strings.IndexRune(self.Separators, r) == -1 {
if index == -1 {
index = i
}
max++
continue
}
if index != -1 {
segments = append(segments, i-index)
} else if index != -1 {
break
}
}
@ -33,6 +29,10 @@ func (self Any) Index(s string) (index, min, max int) {
return
}
func (self Any) Len() int {
return -1
}
func (self Any) Kind() Kind {
return KindAny
}

View File

@ -8,8 +8,9 @@ type AnyOf struct {
Matchers Matchers
}
func (self *AnyOf) Add(m Matcher) {
func (self *AnyOf) Add(m Matcher) error {
self.Matchers = append(self.Matchers, m)
return nil
}
func (self AnyOf) Match(s string) bool {
@ -22,6 +23,10 @@ func (self AnyOf) Match(s string) bool {
return false
}
func (self AnyOf) Len() int {
return -1
}
func (self AnyOf) Kind() Kind {
return KindAnyOf
}

View File

@ -2,6 +2,7 @@ package match
import (
"fmt"
"unicode/utf8"
)
type BTree struct {
@ -13,37 +14,79 @@ func (self BTree) Kind() Kind {
return KindBTree
}
func (self BTree) len() (l, v, r int, ok bool) {
v = self.Value.Len()
if self.Left != nil {
l = self.Left.Len()
}
if self.Right != nil {
r = self.Right.Len()
}
ok = l > -1 && v > -1 && r > -1
return
}
func (self BTree) Len() int {
l, v, r, ok := self.len()
if ok {
return l + v + r
}
return -1
}
func (self BTree) Match(s string) bool {
runes := []rune(s)
inputLen := len(runes)
inputLen := len(s)
for offset := 0; offset < inputLen; {
index, min, max := self.Value.Index(string(runes[offset:]))
lLen, vLen, rLen, ok := self.len()
if ok && lLen+vLen+rLen > inputLen {
return false
}
var offset, limit int
if lLen >= 0 {
offset = lLen
}
if rLen >= 0 {
limit = inputLen - rLen
} else {
limit = inputLen
}
for offset < limit {
index, segments := self.Value.Index(s[offset:limit])
if index == -1 {
return false
}
for length := min; length <= max; length++ {
var left, right bool
l := string(runes[:offset+index])
l := string(s[:offset+index])
var left bool
if self.Left != nil {
left = self.Left.Match(l)
} else {
left = l == ""
}
if !left {
break
if left {
for i := len(segments) - 1; i >= 0; i-- {
length := segments[i]
if rLen >= 0 && inputLen-(offset+index+length) != rLen {
continue
}
var right bool
var r string
// if there is no string for the right branch
if inputLen <= offset+index+length {
r = ""
} else {
r = string(runes[offset+index+length:])
r = s[offset+index+length:]
}
if self.Right != nil {
@ -52,12 +95,14 @@ func (self BTree) Match(s string) bool {
right = r == ""
}
if left && right {
if right {
return true
}
}
}
offset += index + 1
_, step := utf8.DecodeRuneInString(s[offset+index:])
offset += index + step
}
return false

View File

@ -14,6 +14,10 @@ func (self Contains) Match(s string) bool {
return strings.Contains(s, self.Needle) != self.Not
}
func (self Contains) Len() int {
return -1
}
func (self Contains) Kind() Kind {
return KindContains
}

View File

@ -8,8 +8,21 @@ type Every struct {
Matchers Matchers
}
func (self *Every) Add(m Matcher) {
func (self *Every) Add(m Matcher) error {
self.Matchers = append(self.Matchers, m)
return nil
}
func (self Every) Len() (l int) {
for _, m := range self.Matchers {
if ml := m.Len(); l > 0 {
l += ml
} else {
return -1
}
}
return
}
func (self Every) Match(s string) bool {

View File

@ -3,6 +3,7 @@ package match
import (
"fmt"
"strings"
"unicode/utf8"
)
type List struct {
@ -15,7 +16,7 @@ func (self List) Kind() Kind {
}
func (self List) Match(s string) bool {
if len([]rune(s)) != 1 {
if utf8.RuneCountInString(s) > 1 {
return false
}
@ -24,14 +25,18 @@ func (self List) Match(s string) bool {
return inList == !self.Not
}
func (self List) Index(s string) (index, min, max int) {
for i, r := range []rune(s) {
func (self List) Len() int {
return 1
}
func (self List) Index(s string) (int, []int) {
for i, r := range s {
if self.Not == (strings.IndexRune(self.List, r) == -1) {
return i, 1, 1
return i, []int{utf8.RuneLen(r)}
}
}
return -1, 0, 0
return -1, nil
}
func (self List) String() string {

View File

@ -29,10 +29,12 @@ const (
type Matcher interface {
Match(string) bool
Len() int
}
type Primitive interface {
Index(string) (int, int, int)
Matcher
Index(string) (int, []int)
}
type Matchers []Matcher

View File

@ -1,13 +1,20 @@
package match
import "fmt"
import (
"fmt"
"unicode/utf8"
)
type Max struct {
Limit int
}
func (self Max) Match(s string) bool {
return len([]rune(s)) <= self.Limit
return utf8.RuneCountInString(s) <= self.Limit
}
func (self Max) Len() int {
return -1
}
func (self Max) Search(s string) (int, int, bool) {

View File

@ -1,13 +1,20 @@
package match
import "fmt"
import (
"fmt"
"unicode/utf8"
)
type Min struct {
Limit int
}
func (self Min) Match(s string) bool {
return len([]rune(s)) >= self.Limit
return utf8.RuneCountInString(s) >= self.Limit
}
func (self Min) Len() int {
return -1
}
func (self Min) Search(s string) (int, int, bool) {

View File

@ -1,11 +1,10 @@
package match
import (
"strings"
"fmt"
"strings"
)
type Prefix struct {
Prefix string
}
@ -14,6 +13,10 @@ func (self Prefix) Kind() Kind {
return KindPrefix
}
func (self Prefix) Len() int {
return -1
}
func (self Prefix) Search(s string) (i int, l int, ok bool) {
if self.Match(s) {
return 0, len(s), true
@ -26,7 +29,6 @@ func (self Prefix) Match(s string) bool {
return strings.HasPrefix(s, self.Prefix)
}
func (self Prefix) String() string {
return fmt.Sprintf("[prefix:%s]", self.Prefix)
}

View File

@ -13,6 +13,10 @@ func (self PrefixSuffix) Kind() Kind {
return KindPrefixSuffix
}
func (self PrefixSuffix) Len() int {
return -1
}
func (self PrefixSuffix) Search(s string) (i int, l int, ok bool) {
if self.Match(s) {
return 0, len(s), true

View File

@ -2,6 +2,7 @@ package match
import (
"fmt"
"unicode/utf8"
)
type Range struct {
@ -13,28 +14,31 @@ func (self Range) Kind() Kind {
return KindRange
}
func (self Range) Match(s string) bool {
r := []rune(s)
func (self Range) Len() int {
return 1
}
if len(r) != 1 {
func (self Range) Match(s string) bool {
r, w := utf8.DecodeRuneInString(s)
if len(s) > w {
return false
}
inRange := r[0] >= self.Lo && r[0] <= self.Hi
inRange := r >= self.Lo && r <= self.Hi
return inRange == !self.Not
}
func (self Range) Index(s string) (index, min, max int) {
for i, r := range []rune(s) {
func (self Range) Index(s string) (int, []int) {
for i, r := range s {
if self.Not != (r >= self.Lo && r <= self.Hi) {
return i, 1, 1
return i, []int{utf8.RuneLen(r)}
}
}
return -1, 0, 0
return -1, nil
}
func (self Range) String() string {
return fmt.Sprintf("[range_between:%s-%s(%t)]", self.Lo, self.Hi, self.Not)
return fmt.Sprintf("[range:%s-%s(%t)]", string(self.Lo), string(self.Hi), self.Not)
}

View File

@ -14,18 +14,21 @@ func (self Raw) Match(s string) bool {
return self.Str == s
}
func (self Raw) Len() int {
return len(self.Str)
}
func (self Raw) Kind() Kind {
return KindRaw
}
func (self Raw) Index(s string) (index, min, max int) {
func (self Raw) Index(s string) (index int, segments []int) {
index = strings.Index(s, self.Str)
if index == -1 {
return
}
min = len(self.Str)
max = min
segments = []int{len(self.Str)}
return
}

66
match/row.go Normal file
View File

@ -0,0 +1,66 @@
package match
import (
"fmt"
)
type Row struct {
Matchers Matchers
len int
}
func (self *Row) Add(m Matcher) error {
if l := m.Len(); l == -1 {
return fmt.Errorf("matcher should have fixed length")
}
self.Matchers = append(self.Matchers, m)
return nil
}
func (self Row) Match(s string) bool {
if len(s) < self.Len() {
return false
}
var idx int
for _, m := range self.Matchers {
l := m.Len()
if !m.Match(s[idx : idx+l]) {
return false
}
idx += l
}
return true
}
func (self Row) Len() (l int) {
if self.len == 0 {
for _, m := range self.Matchers {
self.len += m.Len()
}
}
return self.len
}
func (self Row) Index(s string) (int, []int) {
for i := range s {
sub := s[i:]
if self.Match(sub) {
return i, []int{self.Len()}
}
}
return -1, nil
}
func (self Row) Kind() Kind {
return KindMin
}
func (self Row) String() string {
return fmt.Sprintf("[row:%s]", self.Matchers)
}

View File

@ -3,6 +3,7 @@ package match
import (
"fmt"
"strings"
"unicode/utf8"
)
// single represents ?
@ -11,17 +12,21 @@ type Single struct {
}
func (self Single) Match(s string) bool {
return len([]rune(s)) == 1 && strings.IndexAny(s, self.Separators) == -1
return utf8.RuneCountInString(s) == 1 && strings.IndexAny(s, self.Separators) == -1
}
func (self Single) Index(s string) (index, min, max int) {
for i, c := range []rune(s) {
if strings.IndexRune(self.Separators, c) == -1 {
return i, 1, 1
func (self Single) Len() int {
return 1
}
func (self Single) Index(s string) (int, []int) {
for i, r := range s {
if strings.IndexRune(self.Separators, r) == -1 {
return i, []int{utf8.RuneLen(r)}
}
}
return -1, 0, 0
return -1, nil
}
func (self Single) Kind() Kind {

View File

@ -1,14 +1,10 @@
package match
import (
"strings"
"fmt"
"strings"
)
type Suffix struct {
Suffix string
}
@ -17,6 +13,10 @@ func (self Suffix) Kind() Kind {
return KindSuffix
}
func (self Suffix) Len() int {
return -1
}
func (self Suffix) Search(s string) (i int, l int, ok bool) {
if self.Match(s) {
return 0, len(s), true
@ -32,5 +32,3 @@ func (self Suffix) Match(s string) bool {
func (self Suffix) String() string {
return fmt.Sprintf("[suffix:%s]", self.Suffix)
}

View File

@ -2,6 +2,7 @@ package match
import (
"fmt"
"unicode/utf8"
)
type Super struct{}
@ -10,8 +11,19 @@ func (self Super) Match(s string) bool {
return true
}
func (self Super) Index(s string) (index, min, max int) {
return 0, 0, len([]rune(s))
func (self Super) Len() int {
return -1
}
func (self Super) Index(s string) (int, []int) {
segments := make([]int, utf8.RuneCountInString(s))
for i := range s {
segments = append(segments, i)
}
segments = append(segments, len(s))
return 0, segments
}
func (self Super) Kind() Kind {

View File

@ -3,6 +3,7 @@ package glob
import (
"errors"
"fmt"
"unicode/utf8"
)
type node interface {
@ -172,23 +173,23 @@ func parserRange(tree *tree, lexer *lexer) (parseFn, error) {
not = true
case item_range_lo:
r := []rune(item.s)
if len(r) != 1 {
r, w := utf8.DecodeRuneInString(item.s)
if len(item.s) > w {
return nil, fmt.Errorf("unexpected length of lo character")
}
lo = r[0]
lo = r
case item_range_between:
//
case item_range_hi:
r := []rune(item.s)
if len(r) != 1 {
return nil, fmt.Errorf("unexpected length of hi character")
r, w := utf8.DecodeRuneInString(item.s)
if len(item.s) > w {
return nil, fmt.Errorf("unexpected length of lo character")
}
hi = r[0]
hi = r
if hi < lo {
return nil, fmt.Errorf("hi character '%s' should be greater than lo '%s'", string(hi), string(lo))

View File

@ -1,6 +1,5 @@
package glob
import (
"strings"
)