From e4652bc1f446254763a4e547a578fb9ad9305979 Mon Sep 17 00:00:00 2001 From: gobwas Date: Fri, 16 Feb 2018 17:36:02 +0300 Subject: [PATCH] dramatic refactoring --- cmd/globdraw/main.go | 8 +- cmd/globtest/main.go | 3 +- compiler/compiler.go | 502 ++++-------------------------------- compiler/compiler_test.go | 130 +--------- glob_test.go | 23 ++ match/any.go | 21 +- match/any_of.go | 88 +++---- match/any_of_test.go | 6 +- match/btree.go | 185 ------------- match/btree_test.go | 90 ------- match/contains.go | 30 +-- match/contains_test.go | 8 +- match/debug.go | 77 ++++++ match/debug/debug.go | 55 ---- match/every_of.go | 56 ++-- match/every_of_test.go | 4 +- match/list.go | 35 ++- match/match.go | 47 +++- match/max.go | 29 +-- match/min.go | 32 +-- match/nothing.go | 8 +- match/optimize.go | 278 ++++++++++++++++++++ match/prefix.go | 26 +- match/prefix_any.go | 31 +-- match/prefix_suffix.go | 28 +- match/range.go | 4 +- match/row.go | 104 ++++---- match/row_test.go | 39 +-- match/single.go | 33 +-- match/suffix.go | 25 +- match/suffix_any.go | 32 +-- match/super.go | 21 +- match/text.go | 49 ++-- match/tree.go | 154 +++++++++++ match/tree_test.go | 94 +++++++ match/util.go | 11 + syntax/ast/optimize.go | 165 ++++++++++++ syntax/ast/optimize_test.go | 126 +++++++++ syntax/ast/parser.go | 3 +- util/runes/runes.go | 94 +++++++ util/strings/strings.go | 39 --- 41 files changed, 1478 insertions(+), 1315 deletions(-) delete mode 100644 match/btree.go delete mode 100644 match/btree_test.go create mode 100644 match/debug.go delete mode 100644 match/debug/debug.go create mode 100644 match/optimize.go create mode 100644 match/tree.go create mode 100644 match/tree_test.go create mode 100644 match/util.go create mode 100644 syntax/ast/optimize.go create mode 100644 syntax/ast/optimize_test.go delete mode 100644 util/strings/strings.go diff --git a/cmd/globdraw/main.go b/cmd/globdraw/main.go index 585880d..d14abd8 100644 --- a/cmd/globdraw/main.go +++ b/cmd/globdraw/main.go @@ -3,12 +3,12 @@ package main import ( "flag" "fmt" - "github.com/gobwas/glob" - "github.com/gobwas/glob/match" - "github.com/gobwas/glob/match/debug" "os" "strings" "unicode/utf8" + + "github.com/gobwas/glob" + "github.com/gobwas/glob/match" ) func main() { @@ -40,5 +40,5 @@ func main() { } matcher := glob.(match.Matcher) - fmt.Fprint(os.Stdout, debug.Graphviz(*pattern, matcher)) + fmt.Fprint(os.Stdout, match.Graphviz(*pattern, matcher)) } diff --git a/cmd/globtest/main.go b/cmd/globtest/main.go index 95c102f..ea1b893 100644 --- a/cmd/globtest/main.go +++ b/cmd/globtest/main.go @@ -3,11 +3,12 @@ package main import ( "flag" "fmt" - "github.com/gobwas/glob" "os" "strings" "testing" "unicode/utf8" + + "github.com/gobwas/glob" ) func benchString(r testing.BenchmarkResult) string { diff --git a/compiler/compiler.go b/compiler/compiler.go index 02e7de8..3c36eb6 100644 --- a/compiler/compiler.go +++ b/compiler/compiler.go @@ -5,467 +5,58 @@ package compiler import ( "fmt" - "reflect" + "os" + "strings" + "sync/atomic" "github.com/gobwas/glob/match" "github.com/gobwas/glob/syntax/ast" - "github.com/gobwas/glob/util/runes" ) -func optimizeMatcher(matcher match.Matcher) match.Matcher { - switch m := matcher.(type) { - - case match.Any: - if len(m.Separators) == 0 { - return match.NewSuper() - } - - case match.AnyOf: - if len(m.Matchers) == 1 { - return m.Matchers[0] - } - - return m - - case match.List: - if m.Not == false && len(m.List) == 1 { - return match.NewText(string(m.List)) - } - - return m - - case match.BTree: - m.Left = optimizeMatcher(m.Left) - m.Right = optimizeMatcher(m.Right) - - r, ok := m.Value.(match.Text) - if !ok { - return m - } - - var ( - leftNil = m.Left == nil - rightNil = m.Right == nil - ) - if leftNil && rightNil { - return match.NewText(r.Str) - } - - _, leftSuper := m.Left.(match.Super) - lp, leftPrefix := m.Left.(match.Prefix) - la, leftAny := m.Left.(match.Any) - - _, rightSuper := m.Right.(match.Super) - rs, rightSuffix := m.Right.(match.Suffix) - ra, rightAny := m.Right.(match.Any) - - switch { - case leftSuper && rightSuper: - return match.NewContains(r.Str, false) - - case leftSuper && rightNil: - return match.NewSuffix(r.Str) - - case rightSuper && leftNil: - return match.NewPrefix(r.Str) - - case leftNil && rightSuffix: - return match.NewPrefixSuffix(r.Str, rs.Suffix) - - case rightNil && leftPrefix: - return match.NewPrefixSuffix(lp.Prefix, r.Str) - - case rightNil && leftAny: - return match.NewSuffixAny(r.Str, la.Separators) - - case leftNil && rightAny: - return match.NewPrefixAny(r.Str, ra.Separators) - } - - return m +func Compile(tree *ast.Node, sep []rune) (match.Matcher, error) { + m, err := compile(tree, sep) + if err != nil { + return nil, err } - return matcher + return m, nil } -func compileMatchers(matchers []match.Matcher) (match.Matcher, error) { - if len(matchers) == 0 { - return nil, fmt.Errorf("compile error: need at least one matcher") - } - if len(matchers) == 1 { - return matchers[0], nil - } - if m := glueMatchers(matchers); m != nil { - return m, nil - } - - idx := -1 - maxLen := -1 - var val match.Matcher - for i, matcher := range matchers { - if l := matcher.Len(); l != -1 && l >= maxLen { - maxLen = l - idx = i - val = matcher - } - } - - if val == nil { // not found matcher with static length - r, err := compileMatchers(matchers[1:]) - if err != nil { - return nil, err - } - return match.NewBTree(matchers[0], nil, r), nil - } - - left := matchers[:idx] - var right []match.Matcher - if len(matchers) > idx+1 { - right = matchers[idx+1:] - } - - var l, r match.Matcher - var err error - if len(left) > 0 { - l, err = compileMatchers(left) - if err != nil { - return nil, err - } - } - - if len(right) > 0 { - r, err = compileMatchers(right) - if err != nil { - return nil, err - } - } - - return match.NewBTree(val, l, r), nil -} - -func glueMatchers(matchers []match.Matcher) match.Matcher { - if m := glueMatchersAsEvery(matchers); m != nil { - return m - } - if m := glueMatchersAsRow(matchers); m != nil { - return m - } - return nil -} - -func glueMatchersAsRow(matchers []match.Matcher) match.Matcher { - if len(matchers) <= 1 { - return nil - } - - var ( - c []match.Matcher - l int - ) - for _, matcher := range matchers { - if ml := matcher.Len(); ml == -1 { - return nil - } else { - c = append(c, matcher) - l += ml - } - } - return match.NewRow(l, c...) -} - -func glueMatchersAsEvery(matchers []match.Matcher) match.Matcher { - if len(matchers) <= 1 { - return nil - } - - var ( - hasAny bool - hasSuper bool - hasSingle bool - min int - separator []rune - ) - - for i, matcher := range matchers { - var sep []rune - - switch m := matcher.(type) { - case match.Super: - sep = []rune{} - hasSuper = true - - case match.Any: - sep = m.Separators - hasAny = true - - case match.Single: - sep = m.Separators - hasSingle = true - min++ - - case match.List: - if !m.Not { - return nil - } - sep = m.List - hasSingle = true - min++ - - default: - return nil - } - - // initialize - if i == 0 { - separator = sep - } - - if runes.Equal(sep, separator) { - continue - } - - return nil - } - - if hasSuper && !hasAny && !hasSingle { - return match.NewSuper() - } - - if hasAny && !hasSuper && !hasSingle { - return match.NewAny(separator) - } - - if (hasAny || hasSuper) && min > 0 && len(separator) == 0 { - return match.NewMin(min) - } - - every := match.NewEveryOf() - - if min > 0 { - every.Add(match.NewMin(min)) - - if !hasAny && !hasSuper { - every.Add(match.NewMax(min)) - } - } - - if len(separator) > 0 { - every.Add(match.NewContains(string(separator), true)) - } - - return every -} - -func minimizeMatchers(matchers []match.Matcher) []match.Matcher { - var done match.Matcher - var left, right, count int - - for l := 0; l < len(matchers); l++ { - for r := len(matchers); r > l; r-- { - if glued := glueMatchers(matchers[l:r]); glued != nil { - var swap bool - - if done == nil { - swap = true - } else { - cl, gl := done.Len(), glued.Len() - swap = cl > -1 && gl > -1 && gl > cl - swap = swap || count < r-l - } - - if swap { - done = glued - left = l - right = r - count = r - l - } - } - } - } - - if done == nil { - return matchers - } - - next := append(append([]match.Matcher{}, matchers[:left]...), done) - if right < len(matchers) { - next = append(next, matchers[right:]...) - } - - if len(next) == len(matchers) { - return next - } - - return minimizeMatchers(next) -} - -// minimizeAnyOf tries to apply some heuristics to minimize number of nodes in given tree -func minimizeTree(tree *ast.Node) *ast.Node { - switch tree.Kind { - case ast.KindAnyOf: - return minimizeTreeAnyOf(tree) - default: - return nil - } -} - -// minimizeAnyOf tries to find common children of given node of AnyOf pattern -// it searches for common children from left and from right -// if any common children are found – then it returns new optimized ast tree -// else it returns nil -func minimizeTreeAnyOf(tree *ast.Node) *ast.Node { - if !areOfSameKind(tree.Children, ast.KindPattern) { - return nil - } - - commonLeft, commonRight := commonChildren(tree.Children) - commonLeftCount, commonRightCount := len(commonLeft), len(commonRight) - if commonLeftCount == 0 && commonRightCount == 0 { // there are no common parts - return nil - } - - var result []*ast.Node - if commonLeftCount > 0 { - result = append(result, ast.NewNode(ast.KindPattern, nil, commonLeft...)) - } - - var anyOf []*ast.Node - for _, child := range tree.Children { - reuse := child.Children[commonLeftCount : len(child.Children)-commonRightCount] - var node *ast.Node - if len(reuse) == 0 { - // this pattern is completely reduced by commonLeft and commonRight patterns - // so it become nothing - node = ast.NewNode(ast.KindNothing, nil) - } else { - node = ast.NewNode(ast.KindPattern, nil, reuse...) - } - anyOf = appendIfUnique(anyOf, node) - } - switch { - case len(anyOf) == 1 && anyOf[0].Kind != ast.KindNothing: - result = append(result, anyOf[0]) - case len(anyOf) > 1: - result = append(result, ast.NewNode(ast.KindAnyOf, nil, anyOf...)) - } - - if commonRightCount > 0 { - result = append(result, ast.NewNode(ast.KindPattern, nil, commonRight...)) - } - - return ast.NewNode(ast.KindPattern, nil, result...) -} - -func commonChildren(nodes []*ast.Node) (commonLeft, commonRight []*ast.Node) { - if len(nodes) <= 1 { - return - } - - // find node that has least number of children - idx := leastChildren(nodes) - if idx == -1 { - return - } - tree := nodes[idx] - treeLength := len(tree.Children) - - // allocate max able size for rightCommon slice - // to get ability insert elements in reverse order (from end to start) - // without sorting - commonRight = make([]*ast.Node, treeLength) - lastRight := treeLength // will use this to get results as commonRight[lastRight:] - - var ( - breakLeft bool - breakRight bool - commonTotal int - ) - for i, j := 0, treeLength-1; commonTotal < treeLength && j >= 0 && !(breakLeft && breakRight); i, j = i+1, j-1 { - treeLeft := tree.Children[i] - treeRight := tree.Children[j] - - for k := 0; k < len(nodes) && !(breakLeft && breakRight); k++ { - // skip least children node - if k == idx { - continue - } - - restLeft := nodes[k].Children[i] - restRight := nodes[k].Children[j+len(nodes[k].Children)-treeLength] - - breakLeft = breakLeft || !treeLeft.Equal(restLeft) - - // disable searching for right common parts, if left part is already overlapping - breakRight = breakRight || (!breakLeft && j <= i) - breakRight = breakRight || !treeRight.Equal(restRight) - } - - if !breakLeft { - commonTotal++ - commonLeft = append(commonLeft, treeLeft) - } - if !breakRight { - commonTotal++ - lastRight = j - commonRight[j] = treeRight - } - } - - commonRight = commonRight[lastRight:] - - return -} - -func appendIfUnique(target []*ast.Node, val *ast.Node) []*ast.Node { - for _, n := range target { - if reflect.DeepEqual(n, val) { - return target - } - } - return append(target, val) -} - -func areOfSameKind(nodes []*ast.Node, kind ast.Kind) bool { - for _, n := range nodes { - if n.Kind != kind { - return false - } - } - return true -} - -func leastChildren(nodes []*ast.Node) int { - min := -1 - idx := -1 - for i, n := range nodes { - if idx == -1 || (len(n.Children) < min) { - min = len(n.Children) - idx = i - } - } - return idx -} - -func compileTreeChildren(tree *ast.Node, sep []rune) ([]match.Matcher, error) { +func compileNodes(ns []*ast.Node, sep []rune) ([]match.Matcher, error) { var matchers []match.Matcher - for _, desc := range tree.Children { - m, err := compile(desc, sep) + for _, n := range ns { + m, err := compile(n, sep) if err != nil { return nil, err } - matchers = append(matchers, optimizeMatcher(m)) + matchers = append(matchers, m) } return matchers, nil } func compile(tree *ast.Node, sep []rune) (m match.Matcher, err error) { + enter() + logf("compiling %s", tree) + defer func() { + logf("result %s", m) + leave() + }() + + // todo this could be faster on pattern_alternatives_combine_lite (see glob_test.go) + if n := ast.Minimize(tree); n != nil { + logf("minimized tree") + logf("\t%s", tree) + logf("\t%s", n) + r, err := compile(n, sep) + if err == nil { + return r, nil + } + logf("compile minimized tree failed: %v", err) + } + switch tree.Kind { case ast.KindAnyOf: - // todo this could be faster on pattern_alternatives_combine_lite (see glob_test.go) - if n := minimizeTree(tree); n != nil { - return compile(n, sep) - } - matchers, err := compileTreeChildren(tree, sep) + matchers, err := compileNodes(tree.Children, sep) if err != nil { return nil, err } @@ -475,11 +66,11 @@ func compile(tree *ast.Node, sep []rune) (m match.Matcher, err error) { if len(tree.Children) == 0 { return match.NewNothing(), nil } - matchers, err := compileTreeChildren(tree, sep) + matchers, err := compileNodes(tree.Children, sep) if err != nil { return nil, err } - m, err = compileMatchers(minimizeMatchers(matchers)) + m, err = match.Compile(match.Minimize(matchers)) if err != nil { return nil, err } @@ -512,14 +103,25 @@ func compile(tree *ast.Node, sep []rune) (m match.Matcher, err error) { return nil, fmt.Errorf("could not compile tree: unknown node type") } - return optimizeMatcher(m), nil + return match.Optimize(m), nil } -func Compile(tree *ast.Node, sep []rune) (match.Matcher, error) { - m, err := compile(tree, sep) - if err != nil { - return nil, err - } +var i = new(int32) - return m, nil +func logf(f string, args ...interface{}) { + n := int(atomic.LoadInt32(i)) + fmt.Fprint(os.Stderr, + strings.Repeat(" ", n), + fmt.Sprintf("(%d) ", n), + fmt.Sprintf(f, args...), + "\n", + ) +} + +func enter() { + atomic.AddInt32(i, 1) +} + +func leave() { + atomic.AddInt32(i, -1) } diff --git a/compiler/compiler_test.go b/compiler/compiler_test.go index b58b1eb..f0ebe69 100644 --- a/compiler/compiler_test.go +++ b/compiler/compiler_test.go @@ -1,140 +1,16 @@ package compiler import ( + "reflect" + "testing" + "github.com/gobwas/glob/match" "github.com/gobwas/glob/match/debug" "github.com/gobwas/glob/syntax/ast" - "reflect" - "testing" ) var separators = []rune{'.'} -func TestCommonChildren(t *testing.T) { - for i, test := range []struct { - nodes []*ast.Node - left []*ast.Node - right []*ast.Node - }{ - { - nodes: []*ast.Node{ - ast.NewNode(ast.KindNothing, nil, - ast.NewNode(ast.KindText, ast.Text{"a"}), - ast.NewNode(ast.KindText, ast.Text{"z"}), - ast.NewNode(ast.KindText, ast.Text{"c"}), - ), - }, - }, - { - nodes: []*ast.Node{ - ast.NewNode(ast.KindNothing, nil, - ast.NewNode(ast.KindText, ast.Text{"a"}), - ast.NewNode(ast.KindText, ast.Text{"z"}), - ast.NewNode(ast.KindText, ast.Text{"c"}), - ), - ast.NewNode(ast.KindNothing, nil, - ast.NewNode(ast.KindText, ast.Text{"a"}), - ast.NewNode(ast.KindText, ast.Text{"b"}), - ast.NewNode(ast.KindText, ast.Text{"c"}), - ), - }, - left: []*ast.Node{ - ast.NewNode(ast.KindText, ast.Text{"a"}), - }, - right: []*ast.Node{ - ast.NewNode(ast.KindText, ast.Text{"c"}), - }, - }, - { - nodes: []*ast.Node{ - ast.NewNode(ast.KindNothing, nil, - ast.NewNode(ast.KindText, ast.Text{"a"}), - ast.NewNode(ast.KindText, ast.Text{"b"}), - ast.NewNode(ast.KindText, ast.Text{"c"}), - ast.NewNode(ast.KindText, ast.Text{"d"}), - ), - ast.NewNode(ast.KindNothing, nil, - ast.NewNode(ast.KindText, ast.Text{"a"}), - ast.NewNode(ast.KindText, ast.Text{"b"}), - ast.NewNode(ast.KindText, ast.Text{"c"}), - ast.NewNode(ast.KindText, ast.Text{"c"}), - ast.NewNode(ast.KindText, ast.Text{"d"}), - ), - }, - left: []*ast.Node{ - ast.NewNode(ast.KindText, ast.Text{"a"}), - ast.NewNode(ast.KindText, ast.Text{"b"}), - }, - right: []*ast.Node{ - ast.NewNode(ast.KindText, ast.Text{"c"}), - ast.NewNode(ast.KindText, ast.Text{"d"}), - }, - }, - { - nodes: []*ast.Node{ - ast.NewNode(ast.KindNothing, nil, - ast.NewNode(ast.KindText, ast.Text{"a"}), - ast.NewNode(ast.KindText, ast.Text{"b"}), - ast.NewNode(ast.KindText, ast.Text{"c"}), - ), - ast.NewNode(ast.KindNothing, nil, - ast.NewNode(ast.KindText, ast.Text{"a"}), - ast.NewNode(ast.KindText, ast.Text{"b"}), - ast.NewNode(ast.KindText, ast.Text{"b"}), - ast.NewNode(ast.KindText, ast.Text{"c"}), - ), - }, - left: []*ast.Node{ - ast.NewNode(ast.KindText, ast.Text{"a"}), - ast.NewNode(ast.KindText, ast.Text{"b"}), - }, - right: []*ast.Node{ - ast.NewNode(ast.KindText, ast.Text{"c"}), - }, - }, - { - nodes: []*ast.Node{ - ast.NewNode(ast.KindNothing, nil, - ast.NewNode(ast.KindText, ast.Text{"a"}), - ast.NewNode(ast.KindText, ast.Text{"d"}), - ), - ast.NewNode(ast.KindNothing, nil, - ast.NewNode(ast.KindText, ast.Text{"a"}), - ast.NewNode(ast.KindText, ast.Text{"d"}), - ), - ast.NewNode(ast.KindNothing, nil, - ast.NewNode(ast.KindText, ast.Text{"a"}), - ast.NewNode(ast.KindText, ast.Text{"e"}), - ), - }, - left: []*ast.Node{ - ast.NewNode(ast.KindText, ast.Text{"a"}), - }, - right: []*ast.Node{}, - }, - } { - left, right := commonChildren(test.nodes) - if !nodesEqual(left, test.left) { - t.Errorf("[%d] left, right := commonChildren(); left = %v; want %v", i, left, test.left) - } - if !nodesEqual(right, test.right) { - t.Errorf("[%d] left, right := commonChildren(); right = %v; want %v", i, right, test.right) - } - } -} - -func nodesEqual(a, b []*ast.Node) bool { - if len(a) != len(b) { - return false - } - for i, av := range a { - if !av.Equal(b[i]) { - return false - } - } - return true -} - func TestGlueMatchers(t *testing.T) { for id, test := range []struct { in []match.Matcher diff --git a/glob_test.go b/glob_test.go index 810036f..b012fc8 100644 --- a/glob_test.go +++ b/glob_test.go @@ -60,6 +60,24 @@ func glob(s bool, p, m string, d ...rune) test { return test{p, m, s, d} } +func globc(p string, d ...rune) test { + return test{pattern: p, delimiters: d} +} + +func TestCompilation(t *testing.T) { + for _, test := range []test{ + globc("{*,**,?}", '.'), + globc("{*.google.*,yandex.*}", '.'), + } { + t.Run("", func(t *testing.T) { + _, err := Compile(test.pattern, test.delimiters...) + if err != nil { + t.Fatal(err) + } + }) + } +} + func TestGlob(t *testing.T) { for _, test := range []test{ glob(true, "* ?at * eyes", "my cat has very bright eyes"), @@ -164,6 +182,11 @@ func TestGlob(t *testing.T) { glob(false, pattern_prefix_suffix, fixture_prefix_suffix_mismatch), } { t.Run("", func(t *testing.T) { + defer func() { + if thePanic := recover(); thePanic != nil { + t.Fatalf("panic recovered: %v", thePanic) + } + }() g := MustCompile(test.pattern, test.delimiters...) result := g.Match(test.match) if result != test.should { diff --git a/match/any.go b/match/any.go index 514a9a5..c8545fc 100644 --- a/match/any.go +++ b/match/any.go @@ -2,23 +2,24 @@ package match import ( "fmt" - "github.com/gobwas/glob/util/strings" + + "github.com/gobwas/glob/util/runes" ) type Any struct { - Separators []rune + sep []rune } func NewAny(s []rune) Any { return Any{s} } -func (self Any) Match(s string) bool { - return strings.IndexAnyRunes(s, self.Separators) == -1 +func (a Any) Match(s string) bool { + return runes.IndexAnyRune(s, a.sep) == -1 } -func (self Any) Index(s string) (int, []int) { - found := strings.IndexAnyRunes(s, self.Separators) +func (a Any) Index(s string) (int, []int) { + found := runes.IndexAnyRune(s, a.sep) switch found { case -1: case 0: @@ -36,10 +37,10 @@ func (self Any) Index(s string) (int, []int) { return 0, segments } -func (self Any) Len() int { - return lenNo +func (a Any) MinLen() int { + return 0 } -func (self Any) String() string { - return fmt.Sprintf("", string(self.Separators)) +func (a Any) String() string { + return fmt.Sprintf("", string(a.sep)) } diff --git a/match/any_of.go b/match/any_of.go index 8e65356..490c63d 100644 --- a/match/any_of.go +++ b/match/any_of.go @@ -1,82 +1,74 @@ package match -import "fmt" +import ( + "fmt" +) type AnyOf struct { - Matchers Matchers + ms []Matcher + min int } -func NewAnyOf(m ...Matcher) AnyOf { - return AnyOf{Matchers(m)} +func NewAnyOf(ms ...Matcher) Matcher { + a := AnyOf{ms, minLen(ms)} + if mis, ok := MatchIndexers(ms); ok { + return IndexedAnyOf{a, mis} + } + return a } -func (self *AnyOf) Add(m Matcher) error { - self.Matchers = append(self.Matchers, m) - return nil -} - -func (self AnyOf) Match(s string) bool { - for _, m := range self.Matchers { +func (a AnyOf) Match(s string) bool { + for _, m := range a.ms { if m.Match(s) { return true } } - return false } -func (self AnyOf) Index(s string) (int, []int) { - index := -1 +func (a AnyOf) MinLen() (n int) { + return a.min +} +func (a AnyOf) Content() []Matcher { + return a.ms +} + +func (a AnyOf) String() string { + return fmt.Sprintf("", Matchers(a.ms)) +} + +type IndexedAnyOf struct { + AnyOf + ms []MatchIndexer +} + +func (a IndexedAnyOf) Index(s string) (int, []int) { + index := -1 segments := acquireSegments(len(s)) - for _, m := range self.Matchers { - idx, seg := m.Index(s) - if idx == -1 { + for _, m := range a.ms { + i, seg := m.Index(s) + if i == -1 { continue } - - if index == -1 || idx < index { - index = idx + if index == -1 || i < index { + index = i segments = append(segments[:0], seg...) continue } - - if idx > index { + if i > index { continue } - - // here idx == index + // here i == index segments = appendMerge(segments, seg) } - if index == -1 { releaseSegments(segments) return -1, nil } - return index, segments } -func (self AnyOf) Len() (l int) { - l = -1 - for _, m := range self.Matchers { - ml := m.Len() - switch { - case l == -1: - l = ml - continue - - case ml == -1: - return -1 - - case l != ml: - return -1 - } - } - - return -} - -func (self AnyOf) String() string { - return fmt.Sprintf("", self.Matchers) +func (a IndexedAnyOf) String() string { + return fmt.Sprintf("", a.ms) } diff --git a/match/any_of_test.go b/match/any_of_test.go index 3b478cf..c989ff2 100644 --- a/match/any_of_test.go +++ b/match/any_of_test.go @@ -5,7 +5,7 @@ import ( "testing" ) -func TestAnyOfIndex(t *testing.T) { +func TestIndexedAnyOf(t *testing.T) { for id, test := range []struct { matchers Matchers fixture string @@ -41,8 +41,8 @@ func TestAnyOfIndex(t *testing.T) { []int{1}, }, } { - everyOf := NewAnyOf(test.matchers...) - index, segments := everyOf.Index(test.fixture) + a := NewAnyOf(test.matchers...).(IndexedAnyOf) + index, segments := a.Index(test.fixture) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) } diff --git a/match/btree.go b/match/btree.go deleted file mode 100644 index 8302bf8..0000000 --- a/match/btree.go +++ /dev/null @@ -1,185 +0,0 @@ -package match - -import ( - "fmt" - "unicode/utf8" -) - -type BTree struct { - Value Matcher - Left Matcher - Right Matcher - ValueLengthRunes int - LeftLengthRunes int - RightLengthRunes int - LengthRunes int -} - -func NewBTree(Value, Left, Right Matcher) (tree BTree) { - tree.Value = Value - tree.Left = Left - tree.Right = Right - - lenOk := true - if tree.ValueLengthRunes = Value.Len(); tree.ValueLengthRunes == -1 { - lenOk = false - } - - if Left != nil { - if tree.LeftLengthRunes = Left.Len(); tree.LeftLengthRunes == -1 { - lenOk = false - } - } - - if Right != nil { - if tree.RightLengthRunes = Right.Len(); tree.RightLengthRunes == -1 { - lenOk = false - } - } - - if lenOk { - tree.LengthRunes = tree.LeftLengthRunes + tree.ValueLengthRunes + tree.RightLengthRunes - } else { - tree.LengthRunes = -1 - } - - return tree -} - -func (self BTree) Len() int { - return self.LengthRunes -} - -// todo? -func (self BTree) Index(s string) (index int, segments []int) { - //inputLen := len(s) - //// try to cut unnecessary parts - //// by knowledge of length of right and left part - //offset, limit := self.offsetLimit(inputLen) - //for offset < limit { - // // search for matching part in substring - // vi, segments := self.Value.Index(s[offset:limit]) - // if index == -1 { - // return -1, nil - // } - // if self.Left == nil { - // if index != offset { - // return -1, nil - // } - // } else { - // left := s[:offset+vi] - // i := self.Left.IndexSuffix(left) - // if i == -1 { - // return -1, nil - // } - // index = i - // } - // if self.Right != nil { - // for _, seg := range segments { - // right := s[:offset+vi+seg] - // } - // } - - // l := s[:offset+index] - // var left bool - // if self.Left != nil { - // left = self.Left.Index(l) - // } else { - // left = l == "" - // } - //} - - return -1, nil -} - -func (self BTree) Match(s string) bool { - inputLen := len(s) - // try to cut unnecessary parts - // by knowledge of length of right and left part - offset, limit := self.offsetLimit(inputLen) - - for offset < limit { - // search for matching part in substring - index, segments := self.Value.Index(s[offset:limit]) - if index == -1 { - releaseSegments(segments) - return false - } - - l := s[:offset+index] - var left bool - if self.Left != nil { - left = self.Left.Match(l) - } else { - left = l == "" - } - - if left { - for i := len(segments) - 1; i >= 0; i-- { - length := segments[i] - - var right bool - var r string - // if there is no string for the right branch - if inputLen <= offset+index+length { - r = "" - } else { - r = s[offset+index+length:] - } - - if self.Right != nil { - right = self.Right.Match(r) - } else { - right = r == "" - } - - if right { - releaseSegments(segments) - return true - } - } - } - - _, step := utf8.DecodeRuneInString(s[offset+index:]) - offset += index + step - - releaseSegments(segments) - } - - return false -} - -func (self BTree) offsetLimit(inputLen int) (offset int, limit int) { - // self.Length, self.RLen and self.LLen are values meaning the length of runes for each part - // here we manipulating byte length for better optimizations - // but these checks still works, cause minLen of 1-rune string is 1 byte. - if self.LengthRunes != -1 && self.LengthRunes > inputLen { - return 0, 0 - } - if self.LeftLengthRunes >= 0 { - offset = self.LeftLengthRunes - } - if self.RightLengthRunes >= 0 { - limit = inputLen - self.RightLengthRunes - } else { - limit = inputLen - } - return offset, limit -} - -func (self BTree) String() string { - const n string = "" - var l, r string - if self.Left == nil { - l = n - } else { - l = self.Left.String() - } - if self.Right == nil { - r = n - } else { - r = self.Right.String() - } - - return fmt.Sprintf("%s]>", l, self.Value, r) -} diff --git a/match/btree_test.go b/match/btree_test.go deleted file mode 100644 index 3bd9ea5..0000000 --- a/match/btree_test.go +++ /dev/null @@ -1,90 +0,0 @@ -package match - -import ( - "testing" -) - -func TestBTree(t *testing.T) { - for id, test := range []struct { - tree BTree - str string - exp bool - }{ - { - NewBTree(NewText("abc"), NewSuper(), NewSuper()), - "abc", - true, - }, - { - NewBTree(NewText("a"), NewSingle(nil), NewSingle(nil)), - "aaa", - true, - }, - { - NewBTree(NewText("b"), NewSingle(nil), nil), - "bbb", - false, - }, - { - NewBTree( - NewText("c"), - NewBTree( - NewSingle(nil), - NewSuper(), - nil, - ), - nil, - ), - "abc", - true, - }, - } { - act := test.tree.Match(test.str) - if act != test.exp { - t.Errorf("#%d match %q error: act: %t; exp: %t", id, test.str, act, test.exp) - continue - } - } -} - -type fakeMatcher struct { - len int - name string -} - -func (f *fakeMatcher) Match(string) bool { - return true -} - -var i = 3 - -func (f *fakeMatcher) Index(s string) (int, []int) { - seg := make([]int, 0, i) - for x := 0; x < i; x++ { - seg = append(seg, x) - } - return 0, seg -} -func (f *fakeMatcher) Len() int { - return f.len -} -func (f *fakeMatcher) String() string { - return f.name -} - -func BenchmarkMatchBTree(b *testing.B) { - l := &fakeMatcher{4, "left_fake"} - r := &fakeMatcher{4, "right_fake"} - v := &fakeMatcher{2, "value_fake"} - - // must be <= len(l + r + v) - fixture := "abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij" - - bt := NewBTree(v, l, r) - - b.RunParallel(func(pb *testing.PB) { - for pb.Next() { - bt.Match(fixture) - } - }) -} diff --git a/match/contains.go b/match/contains.go index 0998e95..1178885 100644 --- a/match/contains.go +++ b/match/contains.go @@ -6,29 +6,29 @@ import ( ) type Contains struct { - Needle string - Not bool + s string + not bool } -func NewContains(needle string, not bool) Contains { - return Contains{needle, not} +func NewContains(needle string) Contains { + return Contains{needle, false} } -func (self Contains) Match(s string) bool { - return strings.Contains(s, self.Needle) != self.Not +func (c Contains) Match(s string) bool { + return strings.Contains(s, c.s) != c.not } -func (self Contains) Index(s string) (int, []int) { +func (c Contains) Index(s string) (int, []int) { var offset int - idx := strings.Index(s, self.Needle) + idx := strings.Index(s, c.s) - if !self.Not { + if !c.not { if idx == -1 { return -1, nil } - offset = idx + len(self.Needle) + offset = idx + len(c.s) if len(s) <= offset { return 0, []int{offset} } @@ -45,14 +45,14 @@ func (self Contains) Index(s string) (int, []int) { return 0, append(segments, offset+len(s)) } -func (self Contains) Len() int { - return lenNo +func (c Contains) MinLen() int { + return 0 } -func (self Contains) String() string { +func (c Contains) String() string { var not string - if self.Not { + if c.not { not = "!" } - return fmt.Sprintf("", not, self.Needle) + return fmt.Sprintf("", not, c.s) } diff --git a/match/contains_test.go b/match/contains_test.go index 931322e..a2ee14d 100644 --- a/match/contains_test.go +++ b/match/contains_test.go @@ -42,7 +42,7 @@ func TestContainsIndex(t *testing.T) { []int{0, 1, 2, 3}, }, } { - p := NewContains(test.prefix, test.not) + p := Contains{test.prefix, test.not} index, segments := p.Index(test.fixture) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) @@ -54,8 +54,7 @@ func TestContainsIndex(t *testing.T) { } func BenchmarkIndexContains(b *testing.B) { - m := NewContains(string(bench_separators), true) - + m := Contains{string(bench_separators), true} for i := 0; i < b.N; i++ { _, s := m.Index(bench_pattern) releaseSegments(s) @@ -63,8 +62,7 @@ func BenchmarkIndexContains(b *testing.B) { } func BenchmarkIndexContainsParallel(b *testing.B) { - m := NewContains(string(bench_separators), true) - + m := Contains{string(bench_separators), true} b.RunParallel(func(pb *testing.PB) { for pb.Next() { _, s := m.Index(bench_pattern) diff --git a/match/debug.go b/match/debug.go new file mode 100644 index 0000000..2dd6a96 --- /dev/null +++ b/match/debug.go @@ -0,0 +1,77 @@ +package match + +import ( + "bytes" + "fmt" + "math/rand" + "os" + "strings" + "sync/atomic" +) + +var i = new(int32) + +func logf(f string, args ...interface{}) { + n := int(atomic.LoadInt32(i)) + fmt.Fprint(os.Stderr, + strings.Repeat(" ", n), + fmt.Sprintf("(%d) ", n), + fmt.Sprintf(f, args...), + "\n", + ) +} + +func enter() { + atomic.AddInt32(i, 1) +} + +func leave() { + atomic.AddInt32(i, -1) +} + +func Graphviz(pattern string, m Matcher) string { + return fmt.Sprintf(`digraph G {graph[label="%s"];%s}`, pattern, graphviz(m, fmt.Sprintf("%x", rand.Int63()))) +} + +func graphviz(m Matcher, id string) string { + buf := &bytes.Buffer{} + + switch v := m.(type) { + case Tree: + fmt.Fprintf(buf, `"%s"[label="%s"];`, id, v.value) + for _, m := range []Matcher{v.left, v.right} { + switch n := m.(type) { + case nil: + rnd := rand.Int63() + fmt.Fprintf(buf, `"%x"[label=""];`, rnd) + fmt.Fprintf(buf, `"%s"->"%x";`, id, rnd) + + default: + sub := fmt.Sprintf("%x", rand.Int63()) + fmt.Fprintf(buf, `"%s"->"%s";`, id, sub) + fmt.Fprintf(buf, graphviz(n, sub)) + } + } + + case Container: + fmt.Fprintf(buf, `"%s"[label="*AnyOf"];`, id) + for _, m := range v.Content() { + rnd := rand.Int63() + fmt.Fprintf(buf, graphviz(m, fmt.Sprintf("%x", rnd))) + fmt.Fprintf(buf, `"%s"->"%x";`, id, rnd) + } + + case EveryOf: + fmt.Fprintf(buf, `"%s"[label="EveryOf"];`, id) + for _, m := range v.ms { + rnd := rand.Int63() + fmt.Fprintf(buf, graphviz(m, fmt.Sprintf("%x", rnd))) + fmt.Fprintf(buf, `"%s"->"%x";`, id, rnd) + } + + default: + fmt.Fprintf(buf, `"%s"[label="%s"];`, id, m) + } + + return buf.String() +} diff --git a/match/debug/debug.go b/match/debug/debug.go deleted file mode 100644 index 5c5dbc1..0000000 --- a/match/debug/debug.go +++ /dev/null @@ -1,55 +0,0 @@ -package debug - -import ( - "bytes" - "fmt" - "github.com/gobwas/glob/match" - "math/rand" -) - -func Graphviz(pattern string, m match.Matcher) string { - return fmt.Sprintf(`digraph G {graph[label="%s"];%s}`, pattern, graphviz_internal(m, fmt.Sprintf("%x", rand.Int63()))) -} - -func graphviz_internal(m match.Matcher, id string) string { - buf := &bytes.Buffer{} - - switch matcher := m.(type) { - case match.BTree: - fmt.Fprintf(buf, `"%s"[label="%s"];`, id, matcher.Value.String()) - for _, m := range []match.Matcher{matcher.Left, matcher.Right} { - switch n := m.(type) { - case nil: - rnd := rand.Int63() - fmt.Fprintf(buf, `"%x"[label=""];`, rnd) - fmt.Fprintf(buf, `"%s"->"%x";`, id, rnd) - - default: - sub := fmt.Sprintf("%x", rand.Int63()) - fmt.Fprintf(buf, `"%s"->"%s";`, id, sub) - fmt.Fprintf(buf, graphviz_internal(n, sub)) - } - } - - case match.AnyOf: - fmt.Fprintf(buf, `"%s"[label="AnyOf"];`, id) - for _, m := range matcher.Matchers { - rnd := rand.Int63() - fmt.Fprintf(buf, graphviz_internal(m, fmt.Sprintf("%x", rnd))) - fmt.Fprintf(buf, `"%s"->"%x";`, id, rnd) - } - - case match.EveryOf: - fmt.Fprintf(buf, `"%s"[label="EveryOf"];`, id) - for _, m := range matcher.Matchers { - rnd := rand.Int63() - fmt.Fprintf(buf, graphviz_internal(m, fmt.Sprintf("%x", rnd))) - fmt.Fprintf(buf, `"%s"->"%x";`, id, rnd) - } - - default: - fmt.Fprintf(buf, `"%s"[label="%s"];`, id, m.String()) - } - - return buf.String() -} diff --git a/match/every_of.go b/match/every_of.go index 7c968ee..89e453f 100644 --- a/match/every_of.go +++ b/match/every_of.go @@ -5,31 +5,41 @@ import ( ) type EveryOf struct { - Matchers Matchers + ms []Matcher + min int } -func NewEveryOf(m ...Matcher) EveryOf { - return EveryOf{Matchers(m)} +func NewEveryOf(ms []Matcher) Matcher { + e := EveryOf{ms, minLen(ms)} + if mis, ok := MatchIndexers(ms); ok { + return IndexedEveryOf{e, mis} + } + return e } -func (self *EveryOf) Add(m Matcher) error { - self.Matchers = append(self.Matchers, m) - return nil +func (e EveryOf) MinLen() (n int) { + return e.min } -func (self EveryOf) Len() (l int) { - for _, m := range self.Matchers { - if ml := m.Len(); l > 0 { - l += ml - } else { - return -1 +func (e EveryOf) Match(s string) bool { + for _, m := range e.ms { + if !m.Match(s) { + return false } } - - return + return true } -func (self EveryOf) Index(s string) (int, []int) { +func (e EveryOf) String() string { + return fmt.Sprintf("", e.ms) +} + +type IndexedEveryOf struct { + EveryOf + ms []MatchIndexer +} + +func (e IndexedEveryOf) Index(s string) (int, []int) { var index int var offset int @@ -39,7 +49,7 @@ func (self EveryOf) Index(s string) (int, []int) { current := acquireSegments(len(s)) sub := s - for i, m := range self.Matchers { + for i, m := range e.ms { idx, seg := m.Index(sub) if idx == -1 { releaseSegments(next) @@ -84,16 +94,6 @@ func (self EveryOf) Index(s string) (int, []int) { return index, current } -func (self EveryOf) Match(s string) bool { - for _, m := range self.Matchers { - if !m.Match(s) { - return false - } - } - - return true -} - -func (self EveryOf) String() string { - return fmt.Sprintf("", self.Matchers) +func (e IndexedEveryOf) String() string { + return fmt.Sprintf("", e.ms) } diff --git a/match/every_of_test.go b/match/every_of_test.go index eb83f86..840ae1e 100644 --- a/match/every_of_test.go +++ b/match/every_of_test.go @@ -5,7 +5,7 @@ import ( "testing" ) -func TestEveryOfIndex(t *testing.T) { +func TestIndexedEveryOf(t *testing.T) { for id, test := range []struct { matchers Matchers fixture string @@ -33,7 +33,7 @@ func TestEveryOfIndex(t *testing.T) { []int{2}, }, } { - everyOf := NewEveryOf(test.matchers...) + everyOf := NewEveryOf(test.matchers).(IndexedEveryOf) index, segments := everyOf.Index(test.fixture) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) diff --git a/match/list.go b/match/list.go index 7fd763e..296bbdb 100644 --- a/match/list.go +++ b/match/list.go @@ -2,48 +2,47 @@ package match import ( "fmt" - "github.com/gobwas/glob/util/runes" "unicode/utf8" + + "github.com/gobwas/glob/util/runes" ) type List struct { - List []rune - Not bool + rs []rune + not bool } -func NewList(list []rune, not bool) List { - return List{list, not} +func NewList(rs []rune, not bool) List { + return List{rs, not} } -func (self List) Match(s string) bool { +func (l List) Match(s string) bool { r, w := utf8.DecodeRuneInString(s) if len(s) > w { + // Invalid rune. return false } - - inList := runes.IndexRune(self.List, r) != -1 - return inList == !self.Not + inList := runes.IndexRune(l.rs, r) != -1 + return inList == !l.not } -func (self List) Len() int { - return lenOne +func (l List) MinLen() int { + return 1 } -func (self List) Index(s string) (int, []int) { +func (l List) Index(s string) (int, []int) { for i, r := range s { - if self.Not == (runes.IndexRune(self.List, r) == -1) { + if l.not == (runes.IndexRune(l.rs, r) == -1) { return i, segmentsByRuneLength[utf8.RuneLen(r)] } } - return -1, nil } -func (self List) String() string { +func (l List) String() string { var not string - if self.Not { + if l.not { not = "!" } - - return fmt.Sprintf("", not, string(self.List)) + return fmt.Sprintf("", not, string(l.rs)) } diff --git a/match/match.go b/match/match.go index f80e007..bfc401c 100644 --- a/match/match.go +++ b/match/match.go @@ -7,15 +7,50 @@ import ( "strings" ) -const lenOne = 1 -const lenZero = 0 -const lenNo = -1 - type Matcher interface { Match(string) bool + MinLen() int +} + +type Indexer interface { Index(string) (int, []int) - Len() int - String() string +} + +type Sizer interface { + RunesCount() int +} + +type MatchIndexer interface { + Matcher + Indexer +} + +type MatchSizer interface { + Matcher + Sizer +} + +type MatchIndexSizer interface { + Matcher + Indexer + Sizer +} + +type Container interface { + Content() []Matcher +} + +func MatchIndexers(ms []Matcher) ([]MatchIndexer, bool) { + for _, m := range ms { + if _, ok := m.(Indexer); !ok { + return nil, false + } + } + mis := make([]MatchIndexer, len(ms)) + for i := range mis { + mis[i] = ms[i].(MatchIndexer) + } + return mis, true } type Matchers []Matcher diff --git a/match/max.go b/match/max.go index d72f69e..7901843 100644 --- a/match/max.go +++ b/match/max.go @@ -6,32 +6,31 @@ import ( ) type Max struct { - Limit int + n int } -func NewMax(l int) Max { - return Max{l} +func NewMax(n int) Max { + return Max{n} } -func (self Max) Match(s string) bool { - var l int +func (m Max) Match(s string) bool { + var n int for range s { - l += 1 - if l > self.Limit { + n += 1 + if n > m.n { return false } } - return true } -func (self Max) Index(s string) (int, []int) { - segments := acquireSegments(self.Limit + 1) +func (m Max) Index(s string) (int, []int) { + segments := acquireSegments(m.n + 1) segments = append(segments, 0) var count int for i, r := range s { count++ - if count > self.Limit { + if count > m.n { break } segments = append(segments, i+utf8.RuneLen(r)) @@ -40,10 +39,10 @@ func (self Max) Index(s string) (int, []int) { return 0, segments } -func (self Max) Len() int { - return lenNo +func (m Max) MinLen() int { + return 0 } -func (self Max) String() string { - return fmt.Sprintf("", self.Limit) +func (m Max) String() string { + return fmt.Sprintf("", m.n) } diff --git a/match/min.go b/match/min.go index db57ac8..55bc2ab 100644 --- a/match/min.go +++ b/match/min.go @@ -6,52 +6,48 @@ import ( ) type Min struct { - Limit int + n int } -func NewMin(l int) Min { - return Min{l} +func NewMin(n int) Min { + return Min{n} } -func (self Min) Match(s string) bool { - var l int +func (m Min) Match(s string) bool { + var n int for range s { - l += 1 - if l >= self.Limit { + n += 1 + if n >= m.n { return true } } - return false } -func (self Min) Index(s string) (int, []int) { +func (m Min) Index(s string) (int, []int) { var count int - c := len(s) - self.Limit + 1 + c := len(s) - m.n + 1 if c <= 0 { return -1, nil } - segments := acquireSegments(c) for i, r := range s { count++ - if count >= self.Limit { + if count >= m.n { segments = append(segments, i+utf8.RuneLen(r)) } } - if len(segments) == 0 { return -1, nil } - return 0, segments } -func (self Min) Len() int { - return lenNo +func (m Min) MinLen() int { + return m.n } -func (self Min) String() string { - return fmt.Sprintf("", self.Limit) +func (m Min) String() string { + return fmt.Sprintf("", m.n) } diff --git a/match/nothing.go b/match/nothing.go index 0d4ecd3..ba8fb5d 100644 --- a/match/nothing.go +++ b/match/nothing.go @@ -18,8 +18,12 @@ func (self Nothing) Index(s string) (int, []int) { return 0, segments0 } -func (self Nothing) Len() int { - return lenZero +func (self Nothing) MinLen() int { + return 0 +} + +func (self Nothing) RunesCount() int { + return 0 } func (self Nothing) String() string { diff --git a/match/optimize.go b/match/optimize.go new file mode 100644 index 0000000..beb0271 --- /dev/null +++ b/match/optimize.go @@ -0,0 +1,278 @@ +package match + +import ( + "fmt" + + "gopkg.in/readline.v1/runes" +) + +func Optimize(m Matcher) Matcher { + switch v := m.(type) { + case Any: + if len(v.sep) == 0 { + return NewSuper() + } + + case Container: + ms := v.Content() + if len(ms) == 1 { + return ms[0] + } + return m + + case List: + if v.not == false && len(v.rs) == 1 { + return NewText(string(v.rs)) + } + return m + + case Tree: + v.left = Optimize(v.left) + v.right = Optimize(v.right) + + txt, ok := v.value.(Text) + if !ok { + return m + } + + var ( + leftNil = v.left == nil + rightNil = v.right == nil + ) + if leftNil && rightNil { + return NewText(txt.s) + } + + _, leftSuper := v.left.(Super) + lp, leftPrefix := v.left.(Prefix) + la, leftAny := v.left.(Any) + + _, rightSuper := v.right.(Super) + rs, rightSuffix := v.right.(Suffix) + ra, rightAny := v.right.(Any) + + switch { + case leftSuper && rightSuper: + return NewContains(txt.s) + + case leftSuper && rightNil: + return NewSuffix(txt.s) + + case rightSuper && leftNil: + return NewPrefix(txt.s) + + case leftNil && rightSuffix: + return NewPrefixSuffix(txt.s, rs.s) + + case rightNil && leftPrefix: + return NewPrefixSuffix(lp.s, txt.s) + + case rightNil && leftAny: + return NewSuffixAny(txt.s, la.sep) + + case leftNil && rightAny: + return NewPrefixAny(txt.s, ra.sep) + } + } + + return m +} + +func Compile(ms []Matcher) (Matcher, error) { + if len(ms) == 0 { + return nil, fmt.Errorf("compile error: need at least one matcher") + } + if len(ms) == 1 { + return ms[0], nil + } + if m := glueMatchers(ms); m != nil { + return m, nil + } + + var ( + idx = -1 + maxLen = -2 + indexer MatchIndexer + ) + for i, m := range ms { + mi, ok := m.(MatchIndexer) + if !ok { + continue + } + if n := m.MinLen(); n > maxLen { + maxLen = n + idx = i + indexer = mi + } + } + if indexer == nil { + return nil, fmt.Errorf("can not index on matchers") + } + + left := ms[:idx] + var right []Matcher + if len(ms) > idx+1 { + right = ms[idx+1:] + } + + var l, r Matcher + var err error + if len(left) > 0 { + l, err = Compile(left) + if err != nil { + return nil, err + } + } + + if len(right) > 0 { + r, err = Compile(right) + if err != nil { + return nil, err + } + } + + return NewTree(indexer, l, r), nil +} + +func glueMatchers(ms []Matcher) Matcher { + if m := glueMatchersAsEvery(ms); m != nil { + return m + } + if m := glueMatchersAsRow(ms); m != nil { + return m + } + return nil +} + +func glueMatchersAsRow(ms []Matcher) Matcher { + if len(ms) <= 1 { + return nil + } + var s []MatchIndexSizer + for _, m := range ms { + rsz, ok := m.(MatchIndexSizer) + if !ok { + return nil + } + s = append(s, rsz) + } + return NewRow(s) +} + +func glueMatchersAsEvery(ms []Matcher) Matcher { + if len(ms) <= 1 { + return nil + } + + var ( + hasAny bool + hasSuper bool + hasSingle bool + min int + separator []rune + ) + + for i, matcher := range ms { + var sep []rune + + switch m := matcher.(type) { + case Super: + sep = []rune{} + hasSuper = true + + case Any: + sep = m.sep + hasAny = true + + case Single: + sep = m.sep + hasSingle = true + min++ + + case List: + if !m.not { + return nil + } + sep = m.rs + hasSingle = true + min++ + + default: + return nil + } + + // initialize + if i == 0 { + separator = sep + } + + if runes.Equal(sep, separator) { + continue + } + + return nil + } + + if hasSuper && !hasAny && !hasSingle { + return NewSuper() + } + + if hasAny && !hasSuper && !hasSingle { + return NewAny(separator) + } + + if (hasAny || hasSuper) && min > 0 && len(separator) == 0 { + return NewMin(min) + } + + var every []Matcher + if min > 0 { + every = append(every, NewMin(min)) + if !hasAny && !hasSuper { + every = append(every, NewMax(min)) + } + } + if len(separator) > 0 { + every = append(every, NewAny(separator)) + } + + return NewEveryOf(every) +} + +func Minimize(ms []Matcher) []Matcher { + var ( + result Matcher + left int + right int + count int + ) + for l := 0; l < len(ms); l++ { + for r := len(ms); r > l; r-- { + if glued := glueMatchers(ms[l:r]); glued != nil { + var swap bool + if result == nil { + swap = true + } else { + swap = glued.MinLen() > result.MinLen() || count < r-l + } + if swap { + result = glued + left = l + right = r + count = r - l + } + } + } + } + if result == nil { + return ms + } + next := append(append([]Matcher{}, ms[:left]...), result) + if right < len(ms) { + next = append(next, ms[right:]...) + } + if len(next) == len(ms) { + return next + } + return Minimize(next) +} diff --git a/match/prefix.go b/match/prefix.go index a734725..db8dda4 100644 --- a/match/prefix.go +++ b/match/prefix.go @@ -7,20 +7,24 @@ import ( ) type Prefix struct { - Prefix string + s string + minSize int } func NewPrefix(p string) Prefix { - return Prefix{p} + return Prefix{ + s: p, + minSize: utf8.RuneCountInString(p), + } } -func (self Prefix) Index(s string) (int, []int) { - idx := strings.Index(s, self.Prefix) +func (p Prefix) Index(s string) (int, []int) { + idx := strings.Index(s, p.s) if idx == -1 { return -1, nil } - length := len(self.Prefix) + length := len(p.s) var sub string if len(s) > idx+length { sub = s[idx+length:] @@ -37,14 +41,14 @@ func (self Prefix) Index(s string) (int, []int) { return idx, segments } -func (self Prefix) Len() int { - return lenNo +func (p Prefix) MinLen() int { + return p.minSize } -func (self Prefix) Match(s string) bool { - return strings.HasPrefix(s, self.Prefix) +func (p Prefix) Match(s string) bool { + return strings.HasPrefix(s, p.s) } -func (self Prefix) String() string { - return fmt.Sprintf("", self.Prefix) +func (p Prefix) String() string { + return fmt.Sprintf("", p.s) } diff --git a/match/prefix_any.go b/match/prefix_any.go index 8ee58fe..5d9f166 100644 --- a/match/prefix_any.go +++ b/match/prefix_any.go @@ -5,27 +5,28 @@ import ( "strings" "unicode/utf8" - sutil "github.com/gobwas/glob/util/strings" + "github.com/gobwas/glob/util/runes" ) type PrefixAny struct { - Prefix string - Separators []rune + s string + sep []rune + minLen int } func NewPrefixAny(s string, sep []rune) PrefixAny { - return PrefixAny{s, sep} + return PrefixAny{s, sep, utf8.RuneCountInString(s)} } -func (self PrefixAny) Index(s string) (int, []int) { - idx := strings.Index(s, self.Prefix) +func (p PrefixAny) Index(s string) (int, []int) { + idx := strings.Index(s, p.s) if idx == -1 { return -1, nil } - n := len(self.Prefix) + n := len(p.s) sub := s[idx+n:] - i := sutil.IndexAnyRunes(sub, self.Separators) + i := runes.IndexAnyRune(sub, p.sep) if i > -1 { sub = sub[:i] } @@ -39,17 +40,17 @@ func (self PrefixAny) Index(s string) (int, []int) { return idx, seg } -func (self PrefixAny) Len() int { - return lenNo +func (p PrefixAny) MinLen() int { + return p.minLen } -func (self PrefixAny) Match(s string) bool { - if !strings.HasPrefix(s, self.Prefix) { +func (p PrefixAny) Match(s string) bool { + if !strings.HasPrefix(s, p.s) { return false } - return sutil.IndexAnyRunes(s[len(self.Prefix):], self.Separators) == -1 + return runes.IndexAnyRune(s[len(p.s):], p.sep) == -1 } -func (self PrefixAny) String() string { - return fmt.Sprintf("", self.Prefix, string(self.Separators)) +func (p PrefixAny) String() string { + return fmt.Sprintf("", p.s, string(p.sep)) } diff --git a/match/prefix_suffix.go b/match/prefix_suffix.go index 8208085..4ed60de 100644 --- a/match/prefix_suffix.go +++ b/match/prefix_suffix.go @@ -3,23 +3,27 @@ package match import ( "fmt" "strings" + "unicode/utf8" ) type PrefixSuffix struct { - Prefix, Suffix string + p, s string + minLen int } func NewPrefixSuffix(p, s string) PrefixSuffix { - return PrefixSuffix{p, s} + pn := utf8.RuneCountInString(p) + sn := utf8.RuneCountInString(s) + return PrefixSuffix{p, s, pn + sn} } -func (self PrefixSuffix) Index(s string) (int, []int) { - prefixIdx := strings.Index(s, self.Prefix) +func (ps PrefixSuffix) Index(s string) (int, []int) { + prefixIdx := strings.Index(s, ps.p) if prefixIdx == -1 { return -1, nil } - suffixLen := len(self.Suffix) + suffixLen := len(ps.s) if suffixLen <= 0 { return prefixIdx, []int{len(s) - prefixIdx} } @@ -30,7 +34,7 @@ func (self PrefixSuffix) Index(s string) (int, []int) { segments := acquireSegments(len(s) - prefixIdx) for sub := s[prefixIdx:]; ; { - suffixIdx := strings.LastIndex(sub, self.Suffix) + suffixIdx := strings.LastIndex(sub, ps.s) if suffixIdx == -1 { break } @@ -49,14 +53,14 @@ func (self PrefixSuffix) Index(s string) (int, []int) { return prefixIdx, segments } -func (self PrefixSuffix) Len() int { - return lenNo +func (ps PrefixSuffix) Match(s string) bool { + return strings.HasPrefix(s, ps.p) && strings.HasSuffix(s, ps.s) } -func (self PrefixSuffix) Match(s string) bool { - return strings.HasPrefix(s, self.Prefix) && strings.HasSuffix(s, self.Suffix) +func (ps PrefixSuffix) MinLen() int { + return ps.minLen } -func (self PrefixSuffix) String() string { - return fmt.Sprintf("", self.Prefix, self.Suffix) +func (ps PrefixSuffix) String() string { + return fmt.Sprintf("", ps.p, ps.s) } diff --git a/match/range.go b/match/range.go index ce30245..da4e940 100644 --- a/match/range.go +++ b/match/range.go @@ -14,8 +14,8 @@ func NewRange(lo, hi rune, not bool) Range { return Range{lo, hi, not} } -func (self Range) Len() int { - return lenOne +func (self Range) MinLen() int { + return 1 } func (self Range) Match(s string) bool { diff --git a/match/row.go b/match/row.go index 4379042..80d566e 100644 --- a/match/row.go +++ b/match/row.go @@ -2,76 +2,72 @@ package match import ( "fmt" + "unicode/utf8" + + "github.com/gobwas/glob/util/runes" ) type Row struct { - Matchers Matchers - RunesLength int - Segments []int + ms []MatchIndexSizer + runes int + seg []int } -func NewRow(len int, m ...Matcher) Row { +func NewRow(ms []MatchIndexSizer) Row { + var r int + for _, m := range ms { + r += m.RunesCount() + } return Row{ - Matchers: Matchers(m), - RunesLength: len, - Segments: []int{len}, + ms: ms, + runes: r, + seg: []int{r}, } } -func (self Row) matchAll(s string) bool { - var idx int - for _, m := range self.Matchers { - length := m.Len() - - var next, i int - for next = range s[idx:] { - i++ - if i == length { - break - } - } - - if i < length || !m.Match(s[idx:idx+next+1]) { - return false - } - - idx += next + 1 +func (r Row) Match(s string) bool { + if !runes.ExactlyRunesCount(s, r.runes) { + return false } - - return true + return r.matchAll(s) } -func (self Row) lenOk(s string) bool { - var i int - for range s { - i++ - if i > self.RunesLength { - return false +func (r Row) MinLen() int { + return r.runes +} + +func (r Row) RunesCount() int { + return r.runes +} + +func (r Row) Index(s string) (int, []int) { + for j := 0; j < len(s)-r.runes; { + i, _ := r.ms[0].Index(s[j:]) + if i == -1 { + return -1, nil } - } - return self.RunesLength == i -} - -func (self Row) Match(s string) bool { - return self.lenOk(s) && self.matchAll(s) -} - -func (self Row) Len() (l int) { - return self.RunesLength -} - -func (self Row) Index(s string) (int, []int) { - for i := range s { - if len(s[i:]) < self.RunesLength { - break - } - if self.matchAll(s[i:]) { - return i, self.Segments + if r.matchAll(s[i:]) { + return j + i, r.seg } + _, x := utf8.DecodeRuneInString(s[i:]) + j += x } return -1, nil } -func (self Row) String() string { - return fmt.Sprintf("", self.RunesLength, self.Matchers) +func (r Row) String() string { + return fmt.Sprintf("", r.runes, r.ms) +} + +func (r Row) matchAll(s string) bool { + var i int + for _, m := range r.ms { + n := m.RunesCount() + sub := runes.Head(s[i:], n) + if !m.Match(sub) { + return false + } + i += len(sub) + } + return true } diff --git a/match/row_test.go b/match/row_test.go index c9e65ef..c25bdd4 100644 --- a/match/row_test.go +++ b/match/row_test.go @@ -7,36 +7,33 @@ import ( func TestRowIndex(t *testing.T) { for id, test := range []struct { - matchers Matchers - length int + matchers []MatchIndexSizer fixture string index int segments []int }{ { - Matchers{ + []MatchIndexSizer{ NewText("abc"), NewText("def"), NewSingle(nil), }, - 7, "qweabcdefghij", 3, []int{7}, }, { - Matchers{ + []MatchIndexSizer{ NewText("abc"), NewText("def"), NewSingle(nil), }, - 7, "abcd", -1, nil, }, } { - p := NewRow(test.length, test.matchers...) + p := NewRow(test.matchers) index, segments := p.Index(test.fixture) if index != test.index { t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) @@ -48,15 +45,11 @@ func TestRowIndex(t *testing.T) { } func BenchmarkRowIndex(b *testing.B) { - m := NewRow( - 7, - Matchers{ - NewText("abc"), - NewText("def"), - NewSingle(nil), - }..., - ) - + m := NewRow([]MatchIndexSizer{ + NewText("abc"), + NewText("def"), + NewSingle(nil), + }) for i := 0; i < b.N; i++ { _, s := m.Index(bench_pattern) releaseSegments(s) @@ -64,15 +57,11 @@ func BenchmarkRowIndex(b *testing.B) { } func BenchmarkIndexRowParallel(b *testing.B) { - m := NewRow( - 7, - Matchers{ - NewText("abc"), - NewText("def"), - NewSingle(nil), - }..., - ) - + m := NewRow([]MatchIndexSizer{ + NewText("abc"), + NewText("def"), + NewSingle(nil), + }) b.RunParallel(func(pb *testing.PB) { for pb.Next() { _, s := m.Index(bench_pattern) diff --git a/match/single.go b/match/single.go index ee6e395..60d810e 100644 --- a/match/single.go +++ b/match/single.go @@ -2,42 +2,45 @@ package match import ( "fmt" - "github.com/gobwas/glob/util/runes" "unicode/utf8" + + "github.com/gobwas/glob/util/runes" ) // single represents ? type Single struct { - Separators []rune + sep []rune } func NewSingle(s []rune) Single { return Single{s} } -func (self Single) Match(s string) bool { - r, w := utf8.DecodeRuneInString(s) - if len(s) > w { +func (s Single) Match(v string) bool { + r, w := utf8.DecodeRuneInString(v) + if len(v) > w { return false } - - return runes.IndexRune(self.Separators, r) == -1 + return runes.IndexRune(s.sep, r) == -1 } -func (self Single) Len() int { - return lenOne +func (s Single) MinLen() int { + return 1 } -func (self Single) Index(s string) (int, []int) { - for i, r := range s { - if runes.IndexRune(self.Separators, r) == -1 { +func (s Single) RunesCount() int { + return 1 +} + +func (s Single) Index(v string) (int, []int) { + for i, r := range v { + if runes.IndexRune(s.sep, r) == -1 { return i, segmentsByRuneLength[utf8.RuneLen(r)] } } - return -1, nil } -func (self Single) String() string { - return fmt.Sprintf("", string(self.Separators)) +func (s Single) String() string { + return fmt.Sprintf("", string(s.sep)) } diff --git a/match/suffix.go b/match/suffix.go index 85bea8c..ae72474 100644 --- a/match/suffix.go +++ b/match/suffix.go @@ -3,33 +3,34 @@ package match import ( "fmt" "strings" + "unicode/utf8" ) type Suffix struct { - Suffix string + s string + minLen int } func NewSuffix(s string) Suffix { - return Suffix{s} + return Suffix{s, utf8.RuneCountInString(s)} } -func (self Suffix) Len() int { - return lenNo +func (s Suffix) MinLen() int { + return s.minLen } -func (self Suffix) Match(s string) bool { - return strings.HasSuffix(s, self.Suffix) +func (s Suffix) Match(v string) bool { + return strings.HasSuffix(v, s.s) } -func (self Suffix) Index(s string) (int, []int) { - idx := strings.Index(s, self.Suffix) +func (s Suffix) Index(v string) (int, []int) { + idx := strings.Index(v, s.s) if idx == -1 { return -1, nil } - - return 0, []int{idx + len(self.Suffix)} + return 0, []int{idx + len(s.s)} } -func (self Suffix) String() string { - return fmt.Sprintf("", self.Suffix) +func (s Suffix) String() string { + return fmt.Sprintf("", s.s) } diff --git a/match/suffix_any.go b/match/suffix_any.go index c5106f8..4e424bd 100644 --- a/match/suffix_any.go +++ b/match/suffix_any.go @@ -3,41 +3,43 @@ package match import ( "fmt" "strings" + "unicode/utf8" - sutil "github.com/gobwas/glob/util/strings" + "github.com/gobwas/glob/util/runes" ) type SuffixAny struct { - Suffix string - Separators []rune + s string + sep []rune + minLen int } func NewSuffixAny(s string, sep []rune) SuffixAny { - return SuffixAny{s, sep} + return SuffixAny{s, sep, utf8.RuneCountInString(s)} } -func (self SuffixAny) Index(s string) (int, []int) { - idx := strings.Index(s, self.Suffix) +func (s SuffixAny) Index(v string) (int, []int) { + idx := strings.Index(v, s.s) if idx == -1 { return -1, nil } - i := sutil.LastIndexAnyRunes(s[:idx], self.Separators) + 1 + i := runes.LastIndexAnyRune(v[:idx], s.sep) + 1 - return i, []int{idx + len(self.Suffix) - i} + return i, []int{idx + len(s.s) - i} } -func (self SuffixAny) Len() int { - return lenNo +func (s SuffixAny) MinLen() int { + return s.minLen } -func (self SuffixAny) Match(s string) bool { - if !strings.HasSuffix(s, self.Suffix) { +func (s SuffixAny) Match(v string) bool { + if !strings.HasSuffix(v, s.s) { return false } - return sutil.IndexAnyRunes(s[:len(s)-len(self.Suffix)], self.Separators) == -1 + return runes.IndexAnyRune(v[:len(v)-len(s.s)], s.sep) == -1 } -func (self SuffixAny) String() string { - return fmt.Sprintf("", string(self.Separators), self.Suffix) +func (s SuffixAny) String() string { + return fmt.Sprintf("", string(s.sep), s.s) } diff --git a/match/super.go b/match/super.go index 3875950..0714b48 100644 --- a/match/super.go +++ b/match/super.go @@ -10,24 +10,23 @@ func NewSuper() Super { return Super{} } -func (self Super) Match(s string) bool { +func (s Super) Match(_ string) bool { return true } -func (self Super) Len() int { - return lenNo +func (s Super) MinLen() int { + return 0 } -func (self Super) Index(s string) (int, []int) { - segments := acquireSegments(len(s) + 1) - for i := range s { - segments = append(segments, i) +func (s Super) Index(v string) (int, []int) { + seg := acquireSegments(len(v) + 1) + for i := range v { + seg = append(seg, i) } - segments = append(segments, len(s)) - - return 0, segments + seg = append(seg, len(v)) + return 0, seg } -func (self Super) String() string { +func (s Super) String() string { return fmt.Sprintf("") } diff --git a/match/text.go b/match/text.go index 0a17616..5955a57 100644 --- a/match/text.go +++ b/match/text.go @@ -8,38 +8,45 @@ import ( // raw represents raw string to match type Text struct { - Str string - RunesLength int - BytesLength int - Segments []int + s string + runes int + bytes int + seg []int } func NewText(s string) Text { return Text{ - Str: s, - RunesLength: utf8.RuneCountInString(s), - BytesLength: len(s), - Segments: []int{len(s)}, + s: s, + runes: utf8.RuneCountInString(s), + bytes: len(s), + seg: []int{len(s)}, } } -func (self Text) Match(s string) bool { - return self.Str == s +func (t Text) Match(s string) bool { + return t.s == s } -func (self Text) Len() int { - return self.RunesLength -} - -func (self Text) Index(s string) (int, []int) { - index := strings.Index(s, self.Str) - if index == -1 { +func (t Text) Index(s string) (int, []int) { + i := strings.Index(s, t.s) + if i == -1 { return -1, nil } - - return index, self.Segments + return i, t.seg } -func (self Text) String() string { - return fmt.Sprintf("", self.Str) +func (t Text) MinLen() int { + return t.runes +} + +func (t Text) BytesCount() int { + return t.bytes +} + +func (t Text) RunesCount() int { + return t.runes +} + +func (t Text) String() string { + return fmt.Sprintf("", t.s) } diff --git a/match/tree.go b/match/tree.go new file mode 100644 index 0000000..950b5ae --- /dev/null +++ b/match/tree.go @@ -0,0 +1,154 @@ +package match + +import ( + "fmt" + "unicode/utf8" + + "github.com/gobwas/glob/util/runes" +) + +type Tree struct { + value MatchIndexer + left Matcher + right Matcher + + minLen int + + runes int + vrunes int + lrunes int + rrunes int +} + +type SizedTree struct { + Tree +} + +func (st SizedTree) RunesCount() int { + return st.Tree.runes +} + +func NewTree(v MatchIndexer, l, r Matcher) Matcher { + tree := Tree{ + value: v, + left: l, + right: r, + } + tree.minLen = v.MinLen() + if l != nil { + tree.minLen += l.MinLen() + } + if r != nil { + tree.minLen += r.MinLen() + } + var ( + ls, lsz = l.(Sizer) + rs, rsz = r.(Sizer) + vs, vsz = v.(Sizer) + ) + if lsz { + tree.lrunes = ls.RunesCount() + } else { + tree.lrunes = -1 + } + if rsz { + tree.rrunes = rs.RunesCount() + } else { + tree.rrunes = -1 + } + if vsz { + tree.vrunes = vs.RunesCount() + } else { + tree.vrunes = -1 + } + if vsz && lsz && rsz { + tree.runes = tree.vrunes + tree.lrunes + tree.rrunes + return SizedTree{tree} + } + tree.runes = -1 + return tree +} + +func (t Tree) MinLen() int { + return t.minLen +} + +func (t Tree) Match(s string) (ok bool) { + enter() + logf("matching %q: %v", s, t) + defer func(s string) { + logf("result: %q -> %v", s, ok) + leave() + }(s) + + offset, limit := t.offsetLimit(s) + q := s[offset : len(s)-limit] + logf("OFFSET/LIMIT: %d/%d %q of %q", offset, limit, q, s) + + for len(q) >= t.vrunes { + // search for matching part in substring + index, segments := t.value.Index(q) + logf("INDEX #%d %q (%v)", index, q, t.value) + if index == -1 { + releaseSegments(segments) + return false + } + + l := s[:offset+index] + var left bool + if t.left != nil { + left = t.left.Match(l) + } else { + left = l == "" + } + logf("LEFT %q %v", l, left) + if left { + for _, seg := range segments { + var ( + right bool + ) + r := s[offset+index+seg:] + if t.right != nil { + right = t.right.Match(r) + } else { + right = r == "" + } + logf("RIGHT %q %v", r, right) + if right { + releaseSegments(segments) + return true + } + } + } + + _, x := utf8.DecodeRuneInString(q[index:]) + releaseSegments(segments) + q = q[x:] + offset += x + logf("SLICED TO %q", q) + } + + return false +} + +// Retuns substring and offset/limit pair in bytes. +func (t Tree) offsetLimit(s string) (offset, limit int) { + n := utf8.RuneCountInString(s) + if t.runes > n { + return 0, 0 + } + if n := t.lrunes; n > 0 { + offset = len(runes.Head(s, n)) + } + if n := t.rrunes; n > 0 { + limit = len(runes.Tail(s, n)) + } + return +} + +func (t Tree) String() string { + return fmt.Sprintf( + "%v]>", + t.left, t.value, t.right, + ) +} diff --git a/match/tree_test.go b/match/tree_test.go new file mode 100644 index 0000000..3e89287 --- /dev/null +++ b/match/tree_test.go @@ -0,0 +1,94 @@ +package match + +import ( + "fmt" + "testing" +) + +func TestTree(t *testing.T) { + for _, test := range []struct { + tree Matcher + str string + exp bool + }{ + { + NewTree(NewText("abc"), NewSuper(), NewSuper()), + "abc", + true, + }, + { + NewTree(NewText("a"), NewSingle(nil), NewSingle(nil)), + "aaa", + true, + }, + { + NewTree(NewText("b"), NewSingle(nil), nil), + "bbb", + false, + }, + { + NewTree( + NewText("c"), + NewTree( + NewSingle(nil), + NewSuper(), + nil, + ), + nil, + ), + "abc", + true, + }, + } { + t.Run("", func(t *testing.T) { + act := test.tree.Match(test.str) + if act != test.exp { + fmt.Println(Graphviz("NIL", test.tree)) + t.Errorf("match %q error: act: %t; exp: %t", test.str, act, test.exp) + } + }) + } +} + +type fakeMatcher struct { + len int + segn int + name string +} + +func (f *fakeMatcher) Match(string) bool { + return true +} + +func (f *fakeMatcher) Index(s string) (int, []int) { + seg := make([]int, 0, f.segn) + for x := 0; x < f.segn; x++ { + seg = append(seg, f.segn) + } + return 0, seg +} + +func (f *fakeMatcher) MinLen() int { + return f.len +} + +func (f *fakeMatcher) String() string { + return f.name +} + +func BenchmarkMatchTree(b *testing.B) { + l := &fakeMatcher{4, 3, "left_fake"} + r := &fakeMatcher{4, 3, "right_fake"} + v := &fakeMatcher{2, 3, "value_fake"} + + // must be <= len(l + r + v) + fixture := "abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij" + + bt := NewTree(v, l, r) + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + bt.Match(fixture) + } + }) +} diff --git a/match/util.go b/match/util.go new file mode 100644 index 0000000..6dc60a1 --- /dev/null +++ b/match/util.go @@ -0,0 +1,11 @@ +package match + +func minLen(ms []Matcher) (min int) { + for i, m := range ms { + n := m.MinLen() + if i == 0 || n < min { + min = n + } + } + return min +} diff --git a/syntax/ast/optimize.go b/syntax/ast/optimize.go new file mode 100644 index 0000000..4cc637d --- /dev/null +++ b/syntax/ast/optimize.go @@ -0,0 +1,165 @@ +package ast + +import ( + "reflect" +) + +// Minimize tries to apply some heuristics to minimize number of nodes in given +// t +func Minimize(t *Node) *Node { + switch t.Kind { + case KindAnyOf: + return minimizeAnyOf(t) + default: + return nil + } +} + +// minimizeAnyOf tries to find common children of given node of AnyOf pattern +// it searches for common children from left and from right +// if any common children are found – then it returns new optimized ast t +// else it returns nil +func minimizeAnyOf(t *Node) *Node { + if !SameKind(t.Children, KindPattern) { + return nil + } + + commonLeft, commonRight := CommonChildren(t.Children) + commonLeftCount, commonRightCount := len(commonLeft), len(commonRight) + if commonLeftCount == 0 && commonRightCount == 0 { // there are no common parts + return nil + } + + var result []*Node + if commonLeftCount > 0 { + result = append(result, NewNode(KindPattern, nil, commonLeft...)) + } + + var anyOf []*Node + for _, child := range t.Children { + reuse := child.Children[commonLeftCount : len(child.Children)-commonRightCount] + var node *Node + if len(reuse) == 0 { + // this pattern is completely reduced by commonLeft and commonRight patterns + // so it become nothing + node = NewNode(KindNothing, nil) + } else { + node = NewNode(KindPattern, nil, reuse...) + } + anyOf = AppendUnique(anyOf, node) + } + switch { + case len(anyOf) == 1 && anyOf[0].Kind != KindNothing: + result = append(result, anyOf[0]) + case len(anyOf) > 1: + result = append(result, NewNode(KindAnyOf, nil, anyOf...)) + } + + if commonRightCount > 0 { + result = append(result, NewNode(KindPattern, nil, commonRight...)) + } + + return NewNode(KindPattern, nil, result...) +} + +func CommonChildren(nodes []*Node) (commonLeft, commonRight []*Node) { + if len(nodes) <= 1 { + return + } + + // find node that has least number of children + idx := OneWithLeastChildren(nodes) + if idx == -1 { + return + } + tree := nodes[idx] + treeLength := len(tree.Children) + + // allocate max able size for rightCommon slice + // to get ability insert elements in reverse order (from end to start) + // without sorting + commonRight = make([]*Node, treeLength) + lastRight := treeLength // will use this to get results as commonRight[lastRight:] + + var ( + breakLeft bool + breakRight bool + commonTotal int + ) + for i, j := 0, treeLength-1; commonTotal < treeLength && j >= 0 && !(breakLeft && breakRight); i, j = i+1, j-1 { + treeLeft := tree.Children[i] + treeRight := tree.Children[j] + + for k := 0; k < len(nodes) && !(breakLeft && breakRight); k++ { + // skip least children node + if k == idx { + continue + } + + restLeft := nodes[k].Children[i] + restRight := nodes[k].Children[j+len(nodes[k].Children)-treeLength] + + breakLeft = breakLeft || !treeLeft.Equal(restLeft) + + // disable searching for right common parts, if left part is already overlapping + breakRight = breakRight || (!breakLeft && j <= i) + breakRight = breakRight || !treeRight.Equal(restRight) + } + + if !breakLeft { + commonTotal++ + commonLeft = append(commonLeft, treeLeft) + } + if !breakRight { + commonTotal++ + lastRight = j + commonRight[j] = treeRight + } + } + + commonRight = commonRight[lastRight:] + + return +} + +func AppendUnique(target []*Node, val *Node) []*Node { + for _, n := range target { + if reflect.DeepEqual(n, val) { + return target + } + } + return append(target, val) +} + +func SameKind(nodes []*Node, kind Kind) bool { + for _, n := range nodes { + if n.Kind != kind { + return false + } + } + return true +} + +func OneWithLeastChildren(nodes []*Node) int { + min := -1 + idx := -1 + for i, n := range nodes { + if idx == -1 || (len(n.Children) < min) { + min = len(n.Children) + idx = i + } + } + return idx +} + +func Equal(a, b []*Node) bool { + if len(a) != len(b) { + return false + } + for i, av := range a { + if !av.Equal(b[i]) { + return false + } + } + return true +} diff --git a/syntax/ast/optimize_test.go b/syntax/ast/optimize_test.go new file mode 100644 index 0000000..dd2e876 --- /dev/null +++ b/syntax/ast/optimize_test.go @@ -0,0 +1,126 @@ +package ast + +import ( + "testing" +) + +func TestCommonChildren(t *testing.T) { + for _, test := range []struct { + nodes []*Node + left []*Node + right []*Node + }{ + { + nodes: []*Node{ + NewNode(KindNothing, nil, + NewNode(KindText, Text{"a"}), + NewNode(KindText, Text{"z"}), + NewNode(KindText, Text{"c"}), + ), + }, + }, + { + nodes: []*Node{ + NewNode(KindNothing, nil, + NewNode(KindText, Text{"a"}), + NewNode(KindText, Text{"z"}), + NewNode(KindText, Text{"c"}), + ), + NewNode(KindNothing, nil, + NewNode(KindText, Text{"a"}), + NewNode(KindText, Text{"b"}), + NewNode(KindText, Text{"c"}), + ), + }, + left: []*Node{ + NewNode(KindText, Text{"a"}), + }, + right: []*Node{ + NewNode(KindText, Text{"c"}), + }, + }, + { + nodes: []*Node{ + NewNode(KindNothing, nil, + NewNode(KindText, Text{"a"}), + NewNode(KindText, Text{"b"}), + NewNode(KindText, Text{"c"}), + NewNode(KindText, Text{"d"}), + ), + NewNode(KindNothing, nil, + NewNode(KindText, Text{"a"}), + NewNode(KindText, Text{"b"}), + NewNode(KindText, Text{"c"}), + NewNode(KindText, Text{"c"}), + NewNode(KindText, Text{"d"}), + ), + }, + left: []*Node{ + NewNode(KindText, Text{"a"}), + NewNode(KindText, Text{"b"}), + }, + right: []*Node{ + NewNode(KindText, Text{"c"}), + NewNode(KindText, Text{"d"}), + }, + }, + { + nodes: []*Node{ + NewNode(KindNothing, nil, + NewNode(KindText, Text{"a"}), + NewNode(KindText, Text{"b"}), + NewNode(KindText, Text{"c"}), + ), + NewNode(KindNothing, nil, + NewNode(KindText, Text{"a"}), + NewNode(KindText, Text{"b"}), + NewNode(KindText, Text{"b"}), + NewNode(KindText, Text{"c"}), + ), + }, + left: []*Node{ + NewNode(KindText, Text{"a"}), + NewNode(KindText, Text{"b"}), + }, + right: []*Node{ + NewNode(KindText, Text{"c"}), + }, + }, + { + nodes: []*Node{ + NewNode(KindNothing, nil, + NewNode(KindText, Text{"a"}), + NewNode(KindText, Text{"d"}), + ), + NewNode(KindNothing, nil, + NewNode(KindText, Text{"a"}), + NewNode(KindText, Text{"d"}), + ), + NewNode(KindNothing, nil, + NewNode(KindText, Text{"a"}), + NewNode(KindText, Text{"e"}), + ), + }, + left: []*Node{ + NewNode(KindText, Text{"a"}), + }, + right: []*Node{}, + }, + } { + t.Run("", func(t *testing.T) { + left, right := CommonChildren(test.nodes) + if !Equal(left, test.left) { + t.Errorf( + "left, right := commonChildren(); left = %v; want %v", + left, test.left, + ) + } + if !Equal(right, test.right) { + t.Errorf( + "left, right := commonChildren(); right = %v; want %v", + right, test.right, + ) + } + }) + } +} diff --git a/syntax/ast/parser.go b/syntax/ast/parser.go index 429b409..d988e56 100644 --- a/syntax/ast/parser.go +++ b/syntax/ast/parser.go @@ -3,8 +3,9 @@ package ast import ( "errors" "fmt" - "github.com/gobwas/glob/syntax/lexer" "unicode/utf8" + + "github.com/gobwas/glob/syntax/lexer" ) type Lexer interface { diff --git a/util/runes/runes.go b/util/runes/runes.go index a723556..1f954be 100644 --- a/util/runes/runes.go +++ b/util/runes/runes.go @@ -1,5 +1,98 @@ package runes +import ( + "strings" + "unicode/utf8" +) + +func Head(s string, r int) string { + var i, m int + for i < len(s) { + _, n := utf8.DecodeRuneInString(s[i:]) + i += n + m += 1 + if m == r { + break + } + } + return s[:i] +} + +func Tail(s string, r int) string { + var i, n int + for i = len(s); i >= 0; { + var ok bool + for j := 1; j <= 4 && i-j >= 0; j++ { + v, _ := utf8.DecodeRuneInString(s[i-j:]) + if v != utf8.RuneError { + i -= j + n++ + ok = true + break + } + } + if !ok || n == r { + return s[i:] + } + } + return s[i:] +} + +func ExactlyRunesCount(s string, n int) bool { + var m int + for range s { + m++ + if m > n { + return false + } + } + return m == n +} + +func AtLeastRunesCount(s string, n int) bool { + var m int + for range s { + m++ + if m >= n { + return true + } + } + return false +} + +func IndexAnyRune(s string, rs []rune) int { + for _, r := range rs { + if i := strings.IndexRune(s, r); i != -1 { + return i + } + } + + return -1 +} + +func LastIndexAnyRune(s string, rs []rune) int { + for _, r := range rs { + i := -1 + if 0 <= r && r < utf8.RuneSelf { + i = strings.LastIndexByte(s, byte(r)) + } else { + sub := s + for len(sub) > 0 { + j := strings.IndexRune(s, r) + if j == -1 { + break + } + i = j + sub = sub[i+1:] + } + } + if i != -1 { + return i + } + } + return -1 +} + func Index(s, needle []rune) int { ls, ln := len(s), len(needle) @@ -130,6 +223,7 @@ func IndexLastRune(s []rune, r rune) int { } func Equal(a, b []rune) bool { + // TODO use bytes.Equal with unsafe. if len(a) == len(b) { for i := 0; i < len(a); i++ { if a[i] != b[i] { diff --git a/util/strings/strings.go b/util/strings/strings.go deleted file mode 100644 index e8ee192..0000000 --- a/util/strings/strings.go +++ /dev/null @@ -1,39 +0,0 @@ -package strings - -import ( - "strings" - "unicode/utf8" -) - -func IndexAnyRunes(s string, rs []rune) int { - for _, r := range rs { - if i := strings.IndexRune(s, r); i != -1 { - return i - } - } - - return -1 -} - -func LastIndexAnyRunes(s string, rs []rune) int { - for _, r := range rs { - i := -1 - if 0 <= r && r < utf8.RuneSelf { - i = strings.LastIndexByte(s, byte(r)) - } else { - sub := s - for len(sub) > 0 { - j := strings.IndexRune(s, r) - if j == -1 { - break - } - i = j - sub = sub[i+1:] - } - } - if i != -1 { - return i - } - } - return -1 -}