Fixes, cleanup

This commit is contained in:
s.kamardin 2016-01-15 19:50:12 +03:00
parent d2a191e0f0
commit 55776ffb29
11 changed files with 560 additions and 98 deletions

View File

@ -3,6 +3,7 @@ package glob
import (
"fmt"
"github.com/gobwas/glob/match"
"reflect"
)
func optimize(matcher match.Matcher) match.Matcher {
@ -13,6 +14,13 @@ func optimize(matcher match.Matcher) match.Matcher {
return match.Super{}
}
case match.AnyOf:
if len(m.Matchers) == 1 {
return m.Matchers[0]
}
return m
case match.BTree:
m.Left = optimize(m.Left)
m.Right = optimize(m.Right)
@ -235,6 +243,124 @@ func minimizeMatchers(matchers []match.Matcher) []match.Matcher {
return minimizeMatchers(next)
}
func minimizeAnyOf(children []node) node {
var nodes [][]node
var min int
var idx int
for i, desc := range children {
pat, ok := desc.(*nodePattern)
if !ok {
return nil
}
n := pat.children()
ln := len(n)
if len(nodes) == 0 || (ln < min) {
min = ln
idx = i
}
nodes = append(nodes, pat.children())
}
minNodes := nodes[idx]
if idx+1 < len(nodes) {
nodes = append(nodes[:idx], nodes[idx+1:]...)
} else {
nodes = nodes[:idx]
}
var commonLeft []node
var commonLeftCount int
for i, n := range minNodes {
has := true
for _, t := range nodes {
if !reflect.DeepEqual(n, t[i]) {
has = false
break
}
}
if has {
commonLeft = append(commonLeft, n)
commonLeftCount++
} else {
break
}
}
var commonRight []node
var commonRightCount int
for i := min - 1; i > commonLeftCount-1; i-- {
n := minNodes[i]
has := true
for _, t := range nodes {
if !reflect.DeepEqual(n, t[len(t)-(min-i)]) {
has = false
break
}
}
if has {
commonRight = append(commonRight, n)
commonRightCount++
} else {
break
}
}
if commonLeftCount == 0 && commonRightCount == 0 {
return nil
}
nodes = append(nodes, minNodes)
nodes[len(nodes)-1], nodes[idx] = nodes[idx], nodes[len(nodes)-1]
var result []node
if commonLeftCount > 0 {
result = append(result, &nodePattern{nodeImpl: nodeImpl{desc: commonLeft}})
}
var anyOf []node
for _, n := range nodes {
if commonLeftCount+commonRightCount == len(n) {
anyOf = append(anyOf, nil)
} else {
anyOf = append(anyOf, &nodePattern{nodeImpl: nodeImpl{desc: n[commonLeftCount : len(n)-commonRightCount]}})
}
}
anyOf = uniqueNodes(anyOf)
if len(anyOf) == 1 {
if anyOf[0] != nil {
result = append(result, &nodePattern{nodeImpl: nodeImpl{desc: anyOf}})
}
} else {
result = append(result, &nodeAnyOf{nodeImpl: nodeImpl{desc: anyOf}})
}
if commonRightCount > 0 {
result = append(result, &nodePattern{nodeImpl: nodeImpl{desc: commonRight}})
}
return &nodePattern{nodeImpl: nodeImpl{desc: result}}
}
func uniqueNodes(nodes []node) (result []node) {
head:
for _, n := range nodes {
for _, e := range result {
if reflect.DeepEqual(e, n) {
continue head
}
}
result = append(result, n)
}
return
}
func compileMatchers(matchers []match.Matcher) (match.Matcher, error) {
if len(matchers) == 0 {
return nil, fmt.Errorf("compile error: need at least one matcher")
@ -287,12 +413,61 @@ func compileMatchers(matchers []match.Matcher) (match.Matcher, error) {
return match.NewBTree(val, l, r), nil
}
func do(node node, s string) (m match.Matcher, err error) {
switch n := node.(type) {
//func complexity(m match.Matcher) int {
// var matchers []match.Matcher
// var k int
//
// switch matcher := m.(type) {
//
// case match.Nothing:
// return 0
//
// case match.Max, match.Range, match.Suffix, match.Text:
// return 1
//
// case match.PrefixSuffix, match.Single, match.Row:
// return 2
//
// case match.Any, match.Contains, match.List, match.Min, match.Prefix, match.Super:
// return 4
//
// case match.BTree:
// matchers = append(matchers, matcher.Value)
// if matcher.Left != nil {
// matchers = append(matchers, matcher.Left)
// }
// if matcher.Right != nil {
// matchers = append(matchers, matcher.Right)
// }
// k = 1
//
// case match.AnyOf:
// matchers = matcher.Matchers
// k = 1
// case match.EveryOf:
// matchers = matcher.Matchers
// k = 1
//
// default:
// return 0
// }
//
// var sum int
// for _, m := range matchers {
// sum += complexity(m)
// }
//
// return sum * k
//}
case *nodePattern, *nodeAnyOf:
func doAnyOf(n *nodeAnyOf, s string) (match.Matcher, error) {
var matchers []match.Matcher
for _, desc := range node.children() {
for _, desc := range n.children() {
if desc == nil {
matchers = append(matchers, match.Nothing{})
continue
}
m, err := do(desc, s)
if err != nil {
return nil, err
@ -300,13 +475,52 @@ func do(node node, s string) (m match.Matcher, err error) {
matchers = append(matchers, optimize(m))
}
if _, ok := node.(*nodeAnyOf); ok {
m = match.AnyOf{matchers}
} else {
m, err = compileMatchers(minimizeMatchers(matchers))
return match.AnyOf{matchers}, nil
}
func do(leaf node, s string) (m match.Matcher, err error) {
switch n := leaf.(type) {
case *nodeAnyOf:
// todo this could be faster on pattern_alternatives_combine_lite
if n := minimizeAnyOf(n.children()); n != nil {
return do(n, s)
}
var matchers []match.Matcher
for _, desc := range n.children() {
if desc == nil {
matchers = append(matchers, match.Nothing{})
continue
}
m, err := do(desc, s)
if err != nil {
return nil, err
}
matchers = append(matchers, optimize(m))
}
return match.AnyOf{matchers}, nil
case *nodePattern:
nodes := leaf.children()
if len(nodes) == 0 {
return match.Nothing{}, nil
}
var matchers []match.Matcher
for _, desc := range nodes {
m, err := do(desc, s)
if err != nil {
return nil, err
}
matchers = append(matchers, optimize(m))
}
m, err = compileMatchers(minimizeMatchers(matchers))
if err != nil {
return nil, err
}
case *nodeList:

View File

@ -321,17 +321,38 @@ func TestCompiler(t *testing.T) {
},
{
ast: pattern(anyOf(&nodeText{text: "abc"})),
result: match.AnyOf{match.Matchers{
match.NewText("abc"),
}},
result: match.NewText("abc"),
},
{
ast: pattern(anyOf(pattern(anyOf(pattern(&nodeText{text: "abc"}))))),
result: match.AnyOf{match.Matchers{
match.AnyOf{match.Matchers{
result: match.NewText("abc"),
},
{
ast: pattern(anyOf(
pattern(
&nodeText{text: "abc"},
&nodeSingle{},
),
pattern(
&nodeText{text: "abc"},
&nodeList{chars: "def"},
),
pattern(
&nodeText{text: "abc"},
),
pattern(
&nodeText{text: "abc"},
),
)),
result: match.NewBTree(
match.NewText("abc"),
nil,
match.AnyOf{Matchers: match.Matchers{
match.Single{},
match.List{List: "def"},
match.Nothing{},
}},
}},
),
},
{
ast: pattern(
@ -351,6 +372,31 @@ func TestCompiler(t *testing.T) {
match.Super{},
),
},
{
ast: pattern(anyOf(
pattern(
&nodeText{text: "abc"},
&nodeList{chars: "abc"},
&nodeText{text: "ghi"},
),
pattern(
&nodeText{text: "abc"},
&nodeList{chars: "def"},
&nodeText{text: "ghi"},
),
)),
result: match.Row{
RunesLength: 7,
Matchers: match.Matchers{
match.NewText("abc"),
match.AnyOf{Matchers: match.Matchers{
match.List{List: "abc"},
match.List{List: "def"},
}},
match.NewText("ghi"),
},
},
},
// {
// ast: pattern(
// anyOf(&nodeText{text: "a"}, &nodeText{text: "b"}),
@ -376,3 +422,125 @@ func TestCompiler(t *testing.T) {
}
}
}
const complexityString = "abcd"
//func BenchmarkComplexityAny(b *testing.B) {
// m := match.Any{}
// for i := 0; i < b.N; i++ {
// _ = m.Match(complexityString)
// _, _ = m.Index(complexityString)
// }
//}
//func BenchmarkComplexityContains(b *testing.B) {
// m := match.Contains{}
// for i := 0; i < b.N; i++ {
// _ = m.Match(complexityString)
// _, _ = m.Index(complexityString)
// }
//}
//func BenchmarkComplexityList(b *testing.B) {
// m := match.List{}
// for i := 0; i < b.N; i++ {
// _ = m.Match(complexityString)
// _, _ = m.Index(complexityString)
// }
//}
//func BenchmarkComplexityMax(b *testing.B) {
// m := match.Max{}
// for i := 0; i < b.N; i++ {
// _ = m.Match(complexityString)
// _, _ = m.Index(complexityString)
// }
//}
//func BenchmarkComplexityMin(b *testing.B) {
// m := match.Min{}
// for i := 0; i < b.N; i++ {
// _ = m.Match(complexityString)
// _, _ = m.Index(complexityString)
// }
//}
//func BenchmarkComplexityNothing(b *testing.B) {
// m := match.Nothing{}
// for i := 0; i < b.N; i++ {
// _ = m.Match(complexityString)
// _, _ = m.Index(complexityString)
// }
//}
//func BenchmarkComplexityPrefix(b *testing.B) {
// m := match.Prefix{}
// for i := 0; i < b.N; i++ {
// _ = m.Match(complexityString)
// _, _ = m.Index(complexityString)
// }
//}
//func BenchmarkComplexityPrefixSuffix(b *testing.B) {
// m := match.PrefixSuffix{}
// for i := 0; i < b.N; i++ {
// _ = m.Match(complexityString)
// _, _ = m.Index(complexityString)
// }
//}
//func BenchmarkComplexityRange(b *testing.B) {
// m := match.Range{}
// for i := 0; i < b.N; i++ {
// _ = m.Match(complexityString)
// _, _ = m.Index(complexityString)
// }
//}
//func BenchmarkComplexityRow(b *testing.B) {
// m := match.Row{}
// for i := 0; i < b.N; i++ {
// _ = m.Match(complexityString)
// _, _ = m.Index(complexityString)
// }
//}
//func BenchmarkComplexitySingle(b *testing.B) {
// m := match.Single{}
// for i := 0; i < b.N; i++ {
// _ = m.Match(complexityString)
// _, _ = m.Index(complexityString)
// }
//}
//func BenchmarkComplexitySuffix(b *testing.B) {
// m := match.Suffix{}
// for i := 0; i < b.N; i++ {
// _ = m.Match(complexityString)
// _, _ = m.Index(complexityString)
// }
//}
//func BenchmarkComplexitySuper(b *testing.B) {
// m := match.Super{}
// for i := 0; i < b.N; i++ {
// _ = m.Match(complexityString)
// _, _ = m.Index(complexityString)
// }
//}
//func BenchmarkComplexityText(b *testing.B) {
// m := match.Text{}
// for i := 0; i < b.N; i++ {
// _ = m.Match(complexityString)
// _, _ = m.Index(complexityString)
// }
//}
//func BenchmarkComplexityAnyOf(b *testing.B) {
// m := match.AnyOf{}
// for i := 0; i < b.N; i++ {
// _ = m.Match(complexityString)
// _, _ = m.Index(complexityString)
// }
//}
//func BenchmarkComplexityBTree(b *testing.B) {
// m := match.NewBTree(match.NewText("abc"), match.NewText("d"), match.NewText("e"))
// for i := 0; i < b.N; i++ {
// _ = m.Match(complexityString)
// _, _ = m.Index(complexityString)
// }
//}
//func BenchmarkComplexityEveryOf(b *testing.B) {
// m := match.EveryOf{}
// for i := 0; i < b.N; i++ {
// _ = m.Match(complexityString)
// _, _ = m.Index(complexityString)
// }
//}

View File

@ -5,7 +5,7 @@ import (
"fmt"
"github.com/gobwas/glob/match"
"math/rand"
"reflect"
"strings"
"testing"
)
@ -22,10 +22,20 @@ const (
pattern_alternatives = "{https://*.google.*,*yandex.*,*yahoo.*,*mail.ru}"
fixture_alternatives = "http://yahoo.com"
pattern_alternatives_suffix = "{https://*gobwas.com,http://exclude.gobwas.com}"
fixture_alternatives_suffix_first = "https://safe.gobwas.com"
fixture_alternatives_suffix_second = "http://exclude.gobwas.com"
pattern_prefix = "abc*"
pattern_suffix = "*def"
pattern_prefix_suffix = "ab*ef"
fixture_prefix_suffix = "abcdef"
pattern_alternatives_combine_lite = "{abc*def,abc?def,abc[zte]def}"
fixture_alternatives_combine_lite = "abczdef"
pattern_alternatives_combine_hard = "{abc*[a-c]def,abc?[d-g]def,abc[zte]?def}"
fixture_alternatives_combine_hard = "abczqdef"
)
type test struct {
@ -39,63 +49,66 @@ func glob(s bool, p, m string, d ...string) test {
}
func draw(pattern string, m match.Matcher) string {
if tree, ok := m.(match.BTree); ok {
return fmt.Sprintf(`digraph G {graph[label="%s"];%s}`, pattern, graphviz(tree, fmt.Sprintf("%x", rand.Int63())))
return fmt.Sprintf(`digraph G {graph[label="%s"];%s}`, pattern, graphviz(m, fmt.Sprintf("%x", rand.Int63())))
}
return m.String()
}
func graphviz(tree match.BTree, id string) string {
func graphviz(m match.Matcher, id string) string {
buf := &bytes.Buffer{}
fmt.Fprintf(buf, `"%s"[label="%s"];`, id, tree.Value.String())
for _, m := range []match.Matcher{tree.Left, tree.Right} {
switch matcher := m.(type) {
case match.BTree:
fmt.Fprintf(buf, `"%s"[label="%s"];`, id, matcher.Value.String())
for _, m := range []match.Matcher{matcher.Left, matcher.Right} {
switch n := m.(type) {
case nil:
rnd := rand.Int63()
fmt.Fprintf(buf, `"%x"[label="<nil>"];`, rnd)
// fmt.Fprintf(buf, `"%s"->"%x"[label="len = 0"];`, id, rnd)
fmt.Fprintf(buf, `"%s"->"%x";`, id, rnd)
case match.BTree:
sub := fmt.Sprintf("%x", rand.Int63())
// fmt.Fprintf(buf, `"%s"->"%s"[label="len=%d"];`, id, sub, n.Len())
fmt.Fprintf(buf, `"%s"->"%s";`, id, sub)
fmt.Fprintf(buf, graphviz(n, sub))
default:
sub := fmt.Sprintf("%x", rand.Int63())
fmt.Fprintf(buf, `"%s"->"%s";`, id, sub)
fmt.Fprintf(buf, graphviz(n, sub))
}
}
case match.AnyOf:
fmt.Fprintf(buf, `"%s"[label="AnyOf"];`, id)
for _, m := range matcher.Matchers {
rnd := rand.Int63()
fmt.Fprintf(buf, `"%x"[label="%s"];`, rnd, m.String())
// fmt.Fprintf(buf, `"%s"->"%x"[label="len = %d"];`, id, rnd, m.Len())
fmt.Fprintf(buf, graphviz(m, fmt.Sprintf("%x", rnd)))
fmt.Fprintf(buf, `"%s"->"%x";`, id, rnd)
}
case match.EveryOf:
fmt.Fprintf(buf, `"%s"[label="EveryOf"];`, id)
for _, m := range matcher.Matchers {
rnd := rand.Int63()
fmt.Fprintf(buf, graphviz(m, fmt.Sprintf("%x", rnd)))
fmt.Fprintf(buf, `"%s"->"%x";`, id, rnd)
}
default:
fmt.Fprintf(buf, `"%s"[label="%s"];`, id, m.String())
}
return buf.String()
}
func TestCompilePattern(t *testing.T) {
func DrawPatterns(t *testing.T) {
for id, test := range []struct {
pattern string
sep string
exp match.Matcher
}{
// {
// pattern: "left*??B*abcd*[!b]??*abc*right",
// exp: match.Raw{"t"},
// },
// {
// pattern: "abc*??def",
// exp: match.Raw{"t"},
// },
{
pattern: "{abc[abc]ghi,abc[def]ghi}",
exp: match.NewBTree(
match.AnyOf{match.Matchers{match.List{"abc", false}, match.List{"qwe", false}}},
match.NewText("abc"),
match.NewText("ghi"),
),
pattern: pattern_alternatives_suffix,
sep: separators,
},
{
pattern: pattern_alternatives_combine_lite,
},
{
pattern: pattern_alternatives_combine_hard,
},
} {
glob, err := Compile(test.pattern, test.sep)
@ -105,10 +118,12 @@ func TestCompilePattern(t *testing.T) {
}
matcher := glob.(match.Matcher)
if !reflect.DeepEqual(test.exp, matcher) {
t.Errorf("#%d unexpected compilation:\nexp: %s\nact: %s", id, test.exp, draw(test.pattern, matcher))
continue
}
fmt.Println(test.pattern)
fmt.Println(strings.Repeat("=", len(test.pattern)))
fmt.Println(draw(test.pattern, matcher))
fmt.Println()
fmt.Println(matcher.String())
fmt.Println()
}
}
@ -208,6 +223,10 @@ func TestGlob(t *testing.T) {
glob(true, pattern_plain, fixture_plain),
glob(true, pattern_multiple, fixture_multiple),
glob(true, pattern_alternatives, fixture_alternatives),
glob(true, pattern_alternatives_suffix, fixture_alternatives_suffix_first),
glob(true, pattern_alternatives_suffix, fixture_alternatives_suffix_second),
glob(true, pattern_alternatives_combine_hard, fixture_alternatives_combine_hard),
glob(true, pattern_alternatives_combine_lite, fixture_alternatives_combine_lite),
glob(true, pattern_prefix, fixture_prefix_suffix),
glob(true, pattern_suffix, fixture_prefix_suffix),
glob(true, pattern_prefix_suffix, fixture_prefix_suffix),
@ -255,6 +274,34 @@ func BenchmarkAlternatives(b *testing.B) {
_ = m.Match(fixture_alternatives)
}
}
func BenchmarkAlternativesSuffixFirst(b *testing.B) {
m, _ := Compile(pattern_alternatives_suffix)
for i := 0; i < b.N; i++ {
_ = m.Match(fixture_alternatives_suffix_first)
}
}
func BenchmarkAlternativesSuffixSecond(b *testing.B) {
m, _ := Compile(pattern_alternatives_suffix)
for i := 0; i < b.N; i++ {
_ = m.Match(fixture_alternatives_suffix_second)
}
}
func BenchmarkAlternativesCombineLite(b *testing.B) {
m, _ := Compile(pattern_alternatives_combine_lite)
for i := 0; i < b.N; i++ {
_ = m.Match(fixture_alternatives_combine_lite)
}
}
func BenchmarkAlternativesCombineHard(b *testing.B) {
m, _ := Compile(pattern_alternatives_combine_hard)
for i := 0; i < b.N; i++ {
_ = m.Match(fixture_alternatives_combine_hard)
}
}
func BenchmarkPlain(b *testing.B) {
m, _ := Compile(pattern_plain)

View File

@ -15,8 +15,8 @@ func TestAnyOfIndex(t *testing.T) {
{
Matchers{
Any{},
Text{"b"},
Text{"c"},
NewText("b"),
NewText("c"),
},
"abc",
0,

View File

@ -11,28 +11,30 @@ func TestBTree(t *testing.T) {
exp bool
}{
{
BTree{Value: Text{"abc"}, Left: Super{}, Right: Super{}},
NewBTree(NewText("abc"), Super{}, Super{}),
"abc",
true,
},
{
BTree{Value: Text{"a"}, Left: Single{}, Right: Single{}},
NewBTree(NewText("a"), Single{}, Single{}),
"aaa",
true,
},
{
BTree{Value: Text{"b"}, Left: Single{}},
NewBTree(NewText("b"), Single{}, nil),
"bbb",
false,
},
{
BTree{
Left: BTree{
Left: Super{},
Value: Single{},
},
Value: Text{"c"},
},
NewBTree(
NewText("c"),
NewBTree(
Single{},
Super{},
nil,
),
nil,
),
"abc",
true,
},

View File

@ -15,8 +15,8 @@ func TestEveryOfIndex(t *testing.T) {
{
Matchers{
Any{},
Text{"b"},
Text{"c"},
NewText("b"),
NewText("c"),
},
"abc",
-1,

View File

@ -6,6 +6,7 @@ import (
)
const lenOne = 1
const lenZero = 0
const lenNo = -1
type Matcher interface {

View File

@ -21,12 +21,7 @@ func (self Max) Match(s string) bool {
return true
}
func (self Max) Index(s string) (int, []int) {
if !self.Match(s) {
return -1, nil
}
segments := make([]int, 0, self.Limit+1)
func (self Max) Index(s string) (index int, segments []int) {
segments = append(segments, 0)
var count int
for i, r := range s {

23
match/nothing.go Normal file
View File

@ -0,0 +1,23 @@
package match
import (
"fmt"
)
type Nothing struct{}
func (self Nothing) Match(s string) bool {
return len(s) == 0
}
func (self Nothing) Index(s string) (int, []int) {
return 0, []int{0}
}
func (self Nothing) Len() int {
return lenZero
}
func (self Nothing) String() string {
return fmt.Sprintf("<nothing>")
}

View File

@ -15,6 +15,10 @@ func (self PrefixSuffix) Index(s string) (int, []int) {
return -1, nil
}
var resp []int
suffixLen := len(self.Suffix)
if suffixLen > 0 {
var segments []int
for sub := s[prefixIdx:]; ; {
suffixIdx := strings.LastIndex(sub, self.Suffix)
@ -22,8 +26,8 @@ func (self PrefixSuffix) Index(s string) (int, []int) {
break
}
segments = append(segments, suffixIdx+len(self.Suffix))
sub = s[:suffixIdx]
segments = append(segments, suffixIdx+suffixLen)
sub = sub[:suffixIdx]
}
segLen := len(segments)
@ -31,10 +35,13 @@ func (self PrefixSuffix) Index(s string) (int, []int) {
return -1, nil
}
resp := make([]int, segLen)
resp = make([]int, segLen)
for i, s := range segments {
resp[segLen-i-1] = s
}
} else {
resp = append(resp, len(s)-prefixIdx)
}
return prefixIdx, resp
}

View File

@ -12,7 +12,12 @@ type Single struct {
}
func (self Single) Match(s string) bool {
return strings.IndexAny(s, self.Separators) == -1
r, w := utf8.DecodeRuneInString(s)
if len(s) > w {
return false
}
return strings.IndexRune(self.Separators, r) == -1
}
func (self Single) Len() int {