mirror of https://github.com/gobwas/glob.git
dramatic refactoring
This commit is contained in:
parent
e7a84e9525
commit
e4652bc1f4
|
@ -3,12 +3,12 @@ package main
|
|||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"github.com/gobwas/glob"
|
||||
"github.com/gobwas/glob/match"
|
||||
"github.com/gobwas/glob/match/debug"
|
||||
"os"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/gobwas/glob"
|
||||
"github.com/gobwas/glob/match"
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
@ -40,5 +40,5 @@ func main() {
|
|||
}
|
||||
|
||||
matcher := glob.(match.Matcher)
|
||||
fmt.Fprint(os.Stdout, debug.Graphviz(*pattern, matcher))
|
||||
fmt.Fprint(os.Stdout, match.Graphviz(*pattern, matcher))
|
||||
}
|
||||
|
|
|
@ -3,11 +3,12 @@ package main
|
|||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"github.com/gobwas/glob"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/gobwas/glob"
|
||||
)
|
||||
|
||||
func benchString(r testing.BenchmarkResult) string {
|
||||
|
|
|
@ -5,467 +5,58 @@ package compiler
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
"os"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/gobwas/glob/match"
|
||||
"github.com/gobwas/glob/syntax/ast"
|
||||
"github.com/gobwas/glob/util/runes"
|
||||
)
|
||||
|
||||
func optimizeMatcher(matcher match.Matcher) match.Matcher {
|
||||
switch m := matcher.(type) {
|
||||
|
||||
case match.Any:
|
||||
if len(m.Separators) == 0 {
|
||||
return match.NewSuper()
|
||||
}
|
||||
|
||||
case match.AnyOf:
|
||||
if len(m.Matchers) == 1 {
|
||||
return m.Matchers[0]
|
||||
}
|
||||
|
||||
return m
|
||||
|
||||
case match.List:
|
||||
if m.Not == false && len(m.List) == 1 {
|
||||
return match.NewText(string(m.List))
|
||||
}
|
||||
|
||||
return m
|
||||
|
||||
case match.BTree:
|
||||
m.Left = optimizeMatcher(m.Left)
|
||||
m.Right = optimizeMatcher(m.Right)
|
||||
|
||||
r, ok := m.Value.(match.Text)
|
||||
if !ok {
|
||||
return m
|
||||
}
|
||||
|
||||
var (
|
||||
leftNil = m.Left == nil
|
||||
rightNil = m.Right == nil
|
||||
)
|
||||
if leftNil && rightNil {
|
||||
return match.NewText(r.Str)
|
||||
}
|
||||
|
||||
_, leftSuper := m.Left.(match.Super)
|
||||
lp, leftPrefix := m.Left.(match.Prefix)
|
||||
la, leftAny := m.Left.(match.Any)
|
||||
|
||||
_, rightSuper := m.Right.(match.Super)
|
||||
rs, rightSuffix := m.Right.(match.Suffix)
|
||||
ra, rightAny := m.Right.(match.Any)
|
||||
|
||||
switch {
|
||||
case leftSuper && rightSuper:
|
||||
return match.NewContains(r.Str, false)
|
||||
|
||||
case leftSuper && rightNil:
|
||||
return match.NewSuffix(r.Str)
|
||||
|
||||
case rightSuper && leftNil:
|
||||
return match.NewPrefix(r.Str)
|
||||
|
||||
case leftNil && rightSuffix:
|
||||
return match.NewPrefixSuffix(r.Str, rs.Suffix)
|
||||
|
||||
case rightNil && leftPrefix:
|
||||
return match.NewPrefixSuffix(lp.Prefix, r.Str)
|
||||
|
||||
case rightNil && leftAny:
|
||||
return match.NewSuffixAny(r.Str, la.Separators)
|
||||
|
||||
case leftNil && rightAny:
|
||||
return match.NewPrefixAny(r.Str, ra.Separators)
|
||||
}
|
||||
|
||||
return m
|
||||
func Compile(tree *ast.Node, sep []rune) (match.Matcher, error) {
|
||||
m, err := compile(tree, sep)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return matcher
|
||||
return m, nil
|
||||
}
|
||||
|
||||
func compileMatchers(matchers []match.Matcher) (match.Matcher, error) {
|
||||
if len(matchers) == 0 {
|
||||
return nil, fmt.Errorf("compile error: need at least one matcher")
|
||||
}
|
||||
if len(matchers) == 1 {
|
||||
return matchers[0], nil
|
||||
}
|
||||
if m := glueMatchers(matchers); m != nil {
|
||||
return m, nil
|
||||
}
|
||||
|
||||
idx := -1
|
||||
maxLen := -1
|
||||
var val match.Matcher
|
||||
for i, matcher := range matchers {
|
||||
if l := matcher.Len(); l != -1 && l >= maxLen {
|
||||
maxLen = l
|
||||
idx = i
|
||||
val = matcher
|
||||
}
|
||||
}
|
||||
|
||||
if val == nil { // not found matcher with static length
|
||||
r, err := compileMatchers(matchers[1:])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return match.NewBTree(matchers[0], nil, r), nil
|
||||
}
|
||||
|
||||
left := matchers[:idx]
|
||||
var right []match.Matcher
|
||||
if len(matchers) > idx+1 {
|
||||
right = matchers[idx+1:]
|
||||
}
|
||||
|
||||
var l, r match.Matcher
|
||||
var err error
|
||||
if len(left) > 0 {
|
||||
l, err = compileMatchers(left)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
if len(right) > 0 {
|
||||
r, err = compileMatchers(right)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return match.NewBTree(val, l, r), nil
|
||||
}
|
||||
|
||||
func glueMatchers(matchers []match.Matcher) match.Matcher {
|
||||
if m := glueMatchersAsEvery(matchers); m != nil {
|
||||
return m
|
||||
}
|
||||
if m := glueMatchersAsRow(matchers); m != nil {
|
||||
return m
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func glueMatchersAsRow(matchers []match.Matcher) match.Matcher {
|
||||
if len(matchers) <= 1 {
|
||||
return nil
|
||||
}
|
||||
|
||||
var (
|
||||
c []match.Matcher
|
||||
l int
|
||||
)
|
||||
for _, matcher := range matchers {
|
||||
if ml := matcher.Len(); ml == -1 {
|
||||
return nil
|
||||
} else {
|
||||
c = append(c, matcher)
|
||||
l += ml
|
||||
}
|
||||
}
|
||||
return match.NewRow(l, c...)
|
||||
}
|
||||
|
||||
func glueMatchersAsEvery(matchers []match.Matcher) match.Matcher {
|
||||
if len(matchers) <= 1 {
|
||||
return nil
|
||||
}
|
||||
|
||||
var (
|
||||
hasAny bool
|
||||
hasSuper bool
|
||||
hasSingle bool
|
||||
min int
|
||||
separator []rune
|
||||
)
|
||||
|
||||
for i, matcher := range matchers {
|
||||
var sep []rune
|
||||
|
||||
switch m := matcher.(type) {
|
||||
case match.Super:
|
||||
sep = []rune{}
|
||||
hasSuper = true
|
||||
|
||||
case match.Any:
|
||||
sep = m.Separators
|
||||
hasAny = true
|
||||
|
||||
case match.Single:
|
||||
sep = m.Separators
|
||||
hasSingle = true
|
||||
min++
|
||||
|
||||
case match.List:
|
||||
if !m.Not {
|
||||
return nil
|
||||
}
|
||||
sep = m.List
|
||||
hasSingle = true
|
||||
min++
|
||||
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
|
||||
// initialize
|
||||
if i == 0 {
|
||||
separator = sep
|
||||
}
|
||||
|
||||
if runes.Equal(sep, separator) {
|
||||
continue
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
if hasSuper && !hasAny && !hasSingle {
|
||||
return match.NewSuper()
|
||||
}
|
||||
|
||||
if hasAny && !hasSuper && !hasSingle {
|
||||
return match.NewAny(separator)
|
||||
}
|
||||
|
||||
if (hasAny || hasSuper) && min > 0 && len(separator) == 0 {
|
||||
return match.NewMin(min)
|
||||
}
|
||||
|
||||
every := match.NewEveryOf()
|
||||
|
||||
if min > 0 {
|
||||
every.Add(match.NewMin(min))
|
||||
|
||||
if !hasAny && !hasSuper {
|
||||
every.Add(match.NewMax(min))
|
||||
}
|
||||
}
|
||||
|
||||
if len(separator) > 0 {
|
||||
every.Add(match.NewContains(string(separator), true))
|
||||
}
|
||||
|
||||
return every
|
||||
}
|
||||
|
||||
func minimizeMatchers(matchers []match.Matcher) []match.Matcher {
|
||||
var done match.Matcher
|
||||
var left, right, count int
|
||||
|
||||
for l := 0; l < len(matchers); l++ {
|
||||
for r := len(matchers); r > l; r-- {
|
||||
if glued := glueMatchers(matchers[l:r]); glued != nil {
|
||||
var swap bool
|
||||
|
||||
if done == nil {
|
||||
swap = true
|
||||
} else {
|
||||
cl, gl := done.Len(), glued.Len()
|
||||
swap = cl > -1 && gl > -1 && gl > cl
|
||||
swap = swap || count < r-l
|
||||
}
|
||||
|
||||
if swap {
|
||||
done = glued
|
||||
left = l
|
||||
right = r
|
||||
count = r - l
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if done == nil {
|
||||
return matchers
|
||||
}
|
||||
|
||||
next := append(append([]match.Matcher{}, matchers[:left]...), done)
|
||||
if right < len(matchers) {
|
||||
next = append(next, matchers[right:]...)
|
||||
}
|
||||
|
||||
if len(next) == len(matchers) {
|
||||
return next
|
||||
}
|
||||
|
||||
return minimizeMatchers(next)
|
||||
}
|
||||
|
||||
// minimizeAnyOf tries to apply some heuristics to minimize number of nodes in given tree
|
||||
func minimizeTree(tree *ast.Node) *ast.Node {
|
||||
switch tree.Kind {
|
||||
case ast.KindAnyOf:
|
||||
return minimizeTreeAnyOf(tree)
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// minimizeAnyOf tries to find common children of given node of AnyOf pattern
|
||||
// it searches for common children from left and from right
|
||||
// if any common children are found – then it returns new optimized ast tree
|
||||
// else it returns nil
|
||||
func minimizeTreeAnyOf(tree *ast.Node) *ast.Node {
|
||||
if !areOfSameKind(tree.Children, ast.KindPattern) {
|
||||
return nil
|
||||
}
|
||||
|
||||
commonLeft, commonRight := commonChildren(tree.Children)
|
||||
commonLeftCount, commonRightCount := len(commonLeft), len(commonRight)
|
||||
if commonLeftCount == 0 && commonRightCount == 0 { // there are no common parts
|
||||
return nil
|
||||
}
|
||||
|
||||
var result []*ast.Node
|
||||
if commonLeftCount > 0 {
|
||||
result = append(result, ast.NewNode(ast.KindPattern, nil, commonLeft...))
|
||||
}
|
||||
|
||||
var anyOf []*ast.Node
|
||||
for _, child := range tree.Children {
|
||||
reuse := child.Children[commonLeftCount : len(child.Children)-commonRightCount]
|
||||
var node *ast.Node
|
||||
if len(reuse) == 0 {
|
||||
// this pattern is completely reduced by commonLeft and commonRight patterns
|
||||
// so it become nothing
|
||||
node = ast.NewNode(ast.KindNothing, nil)
|
||||
} else {
|
||||
node = ast.NewNode(ast.KindPattern, nil, reuse...)
|
||||
}
|
||||
anyOf = appendIfUnique(anyOf, node)
|
||||
}
|
||||
switch {
|
||||
case len(anyOf) == 1 && anyOf[0].Kind != ast.KindNothing:
|
||||
result = append(result, anyOf[0])
|
||||
case len(anyOf) > 1:
|
||||
result = append(result, ast.NewNode(ast.KindAnyOf, nil, anyOf...))
|
||||
}
|
||||
|
||||
if commonRightCount > 0 {
|
||||
result = append(result, ast.NewNode(ast.KindPattern, nil, commonRight...))
|
||||
}
|
||||
|
||||
return ast.NewNode(ast.KindPattern, nil, result...)
|
||||
}
|
||||
|
||||
func commonChildren(nodes []*ast.Node) (commonLeft, commonRight []*ast.Node) {
|
||||
if len(nodes) <= 1 {
|
||||
return
|
||||
}
|
||||
|
||||
// find node that has least number of children
|
||||
idx := leastChildren(nodes)
|
||||
if idx == -1 {
|
||||
return
|
||||
}
|
||||
tree := nodes[idx]
|
||||
treeLength := len(tree.Children)
|
||||
|
||||
// allocate max able size for rightCommon slice
|
||||
// to get ability insert elements in reverse order (from end to start)
|
||||
// without sorting
|
||||
commonRight = make([]*ast.Node, treeLength)
|
||||
lastRight := treeLength // will use this to get results as commonRight[lastRight:]
|
||||
|
||||
var (
|
||||
breakLeft bool
|
||||
breakRight bool
|
||||
commonTotal int
|
||||
)
|
||||
for i, j := 0, treeLength-1; commonTotal < treeLength && j >= 0 && !(breakLeft && breakRight); i, j = i+1, j-1 {
|
||||
treeLeft := tree.Children[i]
|
||||
treeRight := tree.Children[j]
|
||||
|
||||
for k := 0; k < len(nodes) && !(breakLeft && breakRight); k++ {
|
||||
// skip least children node
|
||||
if k == idx {
|
||||
continue
|
||||
}
|
||||
|
||||
restLeft := nodes[k].Children[i]
|
||||
restRight := nodes[k].Children[j+len(nodes[k].Children)-treeLength]
|
||||
|
||||
breakLeft = breakLeft || !treeLeft.Equal(restLeft)
|
||||
|
||||
// disable searching for right common parts, if left part is already overlapping
|
||||
breakRight = breakRight || (!breakLeft && j <= i)
|
||||
breakRight = breakRight || !treeRight.Equal(restRight)
|
||||
}
|
||||
|
||||
if !breakLeft {
|
||||
commonTotal++
|
||||
commonLeft = append(commonLeft, treeLeft)
|
||||
}
|
||||
if !breakRight {
|
||||
commonTotal++
|
||||
lastRight = j
|
||||
commonRight[j] = treeRight
|
||||
}
|
||||
}
|
||||
|
||||
commonRight = commonRight[lastRight:]
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func appendIfUnique(target []*ast.Node, val *ast.Node) []*ast.Node {
|
||||
for _, n := range target {
|
||||
if reflect.DeepEqual(n, val) {
|
||||
return target
|
||||
}
|
||||
}
|
||||
return append(target, val)
|
||||
}
|
||||
|
||||
func areOfSameKind(nodes []*ast.Node, kind ast.Kind) bool {
|
||||
for _, n := range nodes {
|
||||
if n.Kind != kind {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func leastChildren(nodes []*ast.Node) int {
|
||||
min := -1
|
||||
idx := -1
|
||||
for i, n := range nodes {
|
||||
if idx == -1 || (len(n.Children) < min) {
|
||||
min = len(n.Children)
|
||||
idx = i
|
||||
}
|
||||
}
|
||||
return idx
|
||||
}
|
||||
|
||||
func compileTreeChildren(tree *ast.Node, sep []rune) ([]match.Matcher, error) {
|
||||
func compileNodes(ns []*ast.Node, sep []rune) ([]match.Matcher, error) {
|
||||
var matchers []match.Matcher
|
||||
for _, desc := range tree.Children {
|
||||
m, err := compile(desc, sep)
|
||||
for _, n := range ns {
|
||||
m, err := compile(n, sep)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
matchers = append(matchers, optimizeMatcher(m))
|
||||
matchers = append(matchers, m)
|
||||
}
|
||||
return matchers, nil
|
||||
}
|
||||
|
||||
func compile(tree *ast.Node, sep []rune) (m match.Matcher, err error) {
|
||||
enter()
|
||||
logf("compiling %s", tree)
|
||||
defer func() {
|
||||
logf("result %s", m)
|
||||
leave()
|
||||
}()
|
||||
|
||||
// todo this could be faster on pattern_alternatives_combine_lite (see glob_test.go)
|
||||
if n := ast.Minimize(tree); n != nil {
|
||||
logf("minimized tree")
|
||||
logf("\t%s", tree)
|
||||
logf("\t%s", n)
|
||||
r, err := compile(n, sep)
|
||||
if err == nil {
|
||||
return r, nil
|
||||
}
|
||||
logf("compile minimized tree failed: %v", err)
|
||||
}
|
||||
|
||||
switch tree.Kind {
|
||||
case ast.KindAnyOf:
|
||||
// todo this could be faster on pattern_alternatives_combine_lite (see glob_test.go)
|
||||
if n := minimizeTree(tree); n != nil {
|
||||
return compile(n, sep)
|
||||
}
|
||||
matchers, err := compileTreeChildren(tree, sep)
|
||||
matchers, err := compileNodes(tree.Children, sep)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -475,11 +66,11 @@ func compile(tree *ast.Node, sep []rune) (m match.Matcher, err error) {
|
|||
if len(tree.Children) == 0 {
|
||||
return match.NewNothing(), nil
|
||||
}
|
||||
matchers, err := compileTreeChildren(tree, sep)
|
||||
matchers, err := compileNodes(tree.Children, sep)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
m, err = compileMatchers(minimizeMatchers(matchers))
|
||||
m, err = match.Compile(match.Minimize(matchers))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -512,14 +103,25 @@ func compile(tree *ast.Node, sep []rune) (m match.Matcher, err error) {
|
|||
return nil, fmt.Errorf("could not compile tree: unknown node type")
|
||||
}
|
||||
|
||||
return optimizeMatcher(m), nil
|
||||
return match.Optimize(m), nil
|
||||
}
|
||||
|
||||
func Compile(tree *ast.Node, sep []rune) (match.Matcher, error) {
|
||||
m, err := compile(tree, sep)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var i = new(int32)
|
||||
|
||||
return m, nil
|
||||
func logf(f string, args ...interface{}) {
|
||||
n := int(atomic.LoadInt32(i))
|
||||
fmt.Fprint(os.Stderr,
|
||||
strings.Repeat(" ", n),
|
||||
fmt.Sprintf("(%d) ", n),
|
||||
fmt.Sprintf(f, args...),
|
||||
"\n",
|
||||
)
|
||||
}
|
||||
|
||||
func enter() {
|
||||
atomic.AddInt32(i, 1)
|
||||
}
|
||||
|
||||
func leave() {
|
||||
atomic.AddInt32(i, -1)
|
||||
}
|
||||
|
|
|
@ -1,140 +1,16 @@
|
|||
package compiler
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/gobwas/glob/match"
|
||||
"github.com/gobwas/glob/match/debug"
|
||||
"github.com/gobwas/glob/syntax/ast"
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
var separators = []rune{'.'}
|
||||
|
||||
func TestCommonChildren(t *testing.T) {
|
||||
for i, test := range []struct {
|
||||
nodes []*ast.Node
|
||||
left []*ast.Node
|
||||
right []*ast.Node
|
||||
}{
|
||||
{
|
||||
nodes: []*ast.Node{
|
||||
ast.NewNode(ast.KindNothing, nil,
|
||||
ast.NewNode(ast.KindText, ast.Text{"a"}),
|
||||
ast.NewNode(ast.KindText, ast.Text{"z"}),
|
||||
ast.NewNode(ast.KindText, ast.Text{"c"}),
|
||||
),
|
||||
},
|
||||
},
|
||||
{
|
||||
nodes: []*ast.Node{
|
||||
ast.NewNode(ast.KindNothing, nil,
|
||||
ast.NewNode(ast.KindText, ast.Text{"a"}),
|
||||
ast.NewNode(ast.KindText, ast.Text{"z"}),
|
||||
ast.NewNode(ast.KindText, ast.Text{"c"}),
|
||||
),
|
||||
ast.NewNode(ast.KindNothing, nil,
|
||||
ast.NewNode(ast.KindText, ast.Text{"a"}),
|
||||
ast.NewNode(ast.KindText, ast.Text{"b"}),
|
||||
ast.NewNode(ast.KindText, ast.Text{"c"}),
|
||||
),
|
||||
},
|
||||
left: []*ast.Node{
|
||||
ast.NewNode(ast.KindText, ast.Text{"a"}),
|
||||
},
|
||||
right: []*ast.Node{
|
||||
ast.NewNode(ast.KindText, ast.Text{"c"}),
|
||||
},
|
||||
},
|
||||
{
|
||||
nodes: []*ast.Node{
|
||||
ast.NewNode(ast.KindNothing, nil,
|
||||
ast.NewNode(ast.KindText, ast.Text{"a"}),
|
||||
ast.NewNode(ast.KindText, ast.Text{"b"}),
|
||||
ast.NewNode(ast.KindText, ast.Text{"c"}),
|
||||
ast.NewNode(ast.KindText, ast.Text{"d"}),
|
||||
),
|
||||
ast.NewNode(ast.KindNothing, nil,
|
||||
ast.NewNode(ast.KindText, ast.Text{"a"}),
|
||||
ast.NewNode(ast.KindText, ast.Text{"b"}),
|
||||
ast.NewNode(ast.KindText, ast.Text{"c"}),
|
||||
ast.NewNode(ast.KindText, ast.Text{"c"}),
|
||||
ast.NewNode(ast.KindText, ast.Text{"d"}),
|
||||
),
|
||||
},
|
||||
left: []*ast.Node{
|
||||
ast.NewNode(ast.KindText, ast.Text{"a"}),
|
||||
ast.NewNode(ast.KindText, ast.Text{"b"}),
|
||||
},
|
||||
right: []*ast.Node{
|
||||
ast.NewNode(ast.KindText, ast.Text{"c"}),
|
||||
ast.NewNode(ast.KindText, ast.Text{"d"}),
|
||||
},
|
||||
},
|
||||
{
|
||||
nodes: []*ast.Node{
|
||||
ast.NewNode(ast.KindNothing, nil,
|
||||
ast.NewNode(ast.KindText, ast.Text{"a"}),
|
||||
ast.NewNode(ast.KindText, ast.Text{"b"}),
|
||||
ast.NewNode(ast.KindText, ast.Text{"c"}),
|
||||
),
|
||||
ast.NewNode(ast.KindNothing, nil,
|
||||
ast.NewNode(ast.KindText, ast.Text{"a"}),
|
||||
ast.NewNode(ast.KindText, ast.Text{"b"}),
|
||||
ast.NewNode(ast.KindText, ast.Text{"b"}),
|
||||
ast.NewNode(ast.KindText, ast.Text{"c"}),
|
||||
),
|
||||
},
|
||||
left: []*ast.Node{
|
||||
ast.NewNode(ast.KindText, ast.Text{"a"}),
|
||||
ast.NewNode(ast.KindText, ast.Text{"b"}),
|
||||
},
|
||||
right: []*ast.Node{
|
||||
ast.NewNode(ast.KindText, ast.Text{"c"}),
|
||||
},
|
||||
},
|
||||
{
|
||||
nodes: []*ast.Node{
|
||||
ast.NewNode(ast.KindNothing, nil,
|
||||
ast.NewNode(ast.KindText, ast.Text{"a"}),
|
||||
ast.NewNode(ast.KindText, ast.Text{"d"}),
|
||||
),
|
||||
ast.NewNode(ast.KindNothing, nil,
|
||||
ast.NewNode(ast.KindText, ast.Text{"a"}),
|
||||
ast.NewNode(ast.KindText, ast.Text{"d"}),
|
||||
),
|
||||
ast.NewNode(ast.KindNothing, nil,
|
||||
ast.NewNode(ast.KindText, ast.Text{"a"}),
|
||||
ast.NewNode(ast.KindText, ast.Text{"e"}),
|
||||
),
|
||||
},
|
||||
left: []*ast.Node{
|
||||
ast.NewNode(ast.KindText, ast.Text{"a"}),
|
||||
},
|
||||
right: []*ast.Node{},
|
||||
},
|
||||
} {
|
||||
left, right := commonChildren(test.nodes)
|
||||
if !nodesEqual(left, test.left) {
|
||||
t.Errorf("[%d] left, right := commonChildren(); left = %v; want %v", i, left, test.left)
|
||||
}
|
||||
if !nodesEqual(right, test.right) {
|
||||
t.Errorf("[%d] left, right := commonChildren(); right = %v; want %v", i, right, test.right)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func nodesEqual(a, b []*ast.Node) bool {
|
||||
if len(a) != len(b) {
|
||||
return false
|
||||
}
|
||||
for i, av := range a {
|
||||
if !av.Equal(b[i]) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func TestGlueMatchers(t *testing.T) {
|
||||
for id, test := range []struct {
|
||||
in []match.Matcher
|
||||
|
|
23
glob_test.go
23
glob_test.go
|
@ -60,6 +60,24 @@ func glob(s bool, p, m string, d ...rune) test {
|
|||
return test{p, m, s, d}
|
||||
}
|
||||
|
||||
func globc(p string, d ...rune) test {
|
||||
return test{pattern: p, delimiters: d}
|
||||
}
|
||||
|
||||
func TestCompilation(t *testing.T) {
|
||||
for _, test := range []test{
|
||||
globc("{*,**,?}", '.'),
|
||||
globc("{*.google.*,yandex.*}", '.'),
|
||||
} {
|
||||
t.Run("", func(t *testing.T) {
|
||||
_, err := Compile(test.pattern, test.delimiters...)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGlob(t *testing.T) {
|
||||
for _, test := range []test{
|
||||
glob(true, "* ?at * eyes", "my cat has very bright eyes"),
|
||||
|
@ -164,6 +182,11 @@ func TestGlob(t *testing.T) {
|
|||
glob(false, pattern_prefix_suffix, fixture_prefix_suffix_mismatch),
|
||||
} {
|
||||
t.Run("", func(t *testing.T) {
|
||||
defer func() {
|
||||
if thePanic := recover(); thePanic != nil {
|
||||
t.Fatalf("panic recovered: %v", thePanic)
|
||||
}
|
||||
}()
|
||||
g := MustCompile(test.pattern, test.delimiters...)
|
||||
result := g.Match(test.match)
|
||||
if result != test.should {
|
||||
|
|
21
match/any.go
21
match/any.go
|
@ -2,23 +2,24 @@ package match
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/gobwas/glob/util/strings"
|
||||
|
||||
"github.com/gobwas/glob/util/runes"
|
||||
)
|
||||
|
||||
type Any struct {
|
||||
Separators []rune
|
||||
sep []rune
|
||||
}
|
||||
|
||||
func NewAny(s []rune) Any {
|
||||
return Any{s}
|
||||
}
|
||||
|
||||
func (self Any) Match(s string) bool {
|
||||
return strings.IndexAnyRunes(s, self.Separators) == -1
|
||||
func (a Any) Match(s string) bool {
|
||||
return runes.IndexAnyRune(s, a.sep) == -1
|
||||
}
|
||||
|
||||
func (self Any) Index(s string) (int, []int) {
|
||||
found := strings.IndexAnyRunes(s, self.Separators)
|
||||
func (a Any) Index(s string) (int, []int) {
|
||||
found := runes.IndexAnyRune(s, a.sep)
|
||||
switch found {
|
||||
case -1:
|
||||
case 0:
|
||||
|
@ -36,10 +37,10 @@ func (self Any) Index(s string) (int, []int) {
|
|||
return 0, segments
|
||||
}
|
||||
|
||||
func (self Any) Len() int {
|
||||
return lenNo
|
||||
func (a Any) MinLen() int {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (self Any) String() string {
|
||||
return fmt.Sprintf("<any:![%s]>", string(self.Separators))
|
||||
func (a Any) String() string {
|
||||
return fmt.Sprintf("<any:![%s]>", string(a.sep))
|
||||
}
|
||||
|
|
|
@ -1,82 +1,74 @@
|
|||
package match
|
||||
|
||||
import "fmt"
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
type AnyOf struct {
|
||||
Matchers Matchers
|
||||
ms []Matcher
|
||||
min int
|
||||
}
|
||||
|
||||
func NewAnyOf(m ...Matcher) AnyOf {
|
||||
return AnyOf{Matchers(m)}
|
||||
func NewAnyOf(ms ...Matcher) Matcher {
|
||||
a := AnyOf{ms, minLen(ms)}
|
||||
if mis, ok := MatchIndexers(ms); ok {
|
||||
return IndexedAnyOf{a, mis}
|
||||
}
|
||||
return a
|
||||
}
|
||||
|
||||
func (self *AnyOf) Add(m Matcher) error {
|
||||
self.Matchers = append(self.Matchers, m)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (self AnyOf) Match(s string) bool {
|
||||
for _, m := range self.Matchers {
|
||||
func (a AnyOf) Match(s string) bool {
|
||||
for _, m := range a.ms {
|
||||
if m.Match(s) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func (self AnyOf) Index(s string) (int, []int) {
|
||||
index := -1
|
||||
func (a AnyOf) MinLen() (n int) {
|
||||
return a.min
|
||||
}
|
||||
|
||||
func (a AnyOf) Content() []Matcher {
|
||||
return a.ms
|
||||
}
|
||||
|
||||
func (a AnyOf) String() string {
|
||||
return fmt.Sprintf("<any_of:[%s]>", Matchers(a.ms))
|
||||
}
|
||||
|
||||
type IndexedAnyOf struct {
|
||||
AnyOf
|
||||
ms []MatchIndexer
|
||||
}
|
||||
|
||||
func (a IndexedAnyOf) Index(s string) (int, []int) {
|
||||
index := -1
|
||||
segments := acquireSegments(len(s))
|
||||
for _, m := range self.Matchers {
|
||||
idx, seg := m.Index(s)
|
||||
if idx == -1 {
|
||||
for _, m := range a.ms {
|
||||
i, seg := m.Index(s)
|
||||
if i == -1 {
|
||||
continue
|
||||
}
|
||||
|
||||
if index == -1 || idx < index {
|
||||
index = idx
|
||||
if index == -1 || i < index {
|
||||
index = i
|
||||
segments = append(segments[:0], seg...)
|
||||
continue
|
||||
}
|
||||
|
||||
if idx > index {
|
||||
if i > index {
|
||||
continue
|
||||
}
|
||||
|
||||
// here idx == index
|
||||
// here i == index
|
||||
segments = appendMerge(segments, seg)
|
||||
}
|
||||
|
||||
if index == -1 {
|
||||
releaseSegments(segments)
|
||||
return -1, nil
|
||||
}
|
||||
|
||||
return index, segments
|
||||
}
|
||||
|
||||
func (self AnyOf) Len() (l int) {
|
||||
l = -1
|
||||
for _, m := range self.Matchers {
|
||||
ml := m.Len()
|
||||
switch {
|
||||
case l == -1:
|
||||
l = ml
|
||||
continue
|
||||
|
||||
case ml == -1:
|
||||
return -1
|
||||
|
||||
case l != ml:
|
||||
return -1
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (self AnyOf) String() string {
|
||||
return fmt.Sprintf("<any_of:[%s]>", self.Matchers)
|
||||
func (a IndexedAnyOf) String() string {
|
||||
return fmt.Sprintf("<indexed_any_of:[%s]>", a.ms)
|
||||
}
|
||||
|
|
|
@ -5,7 +5,7 @@ import (
|
|||
"testing"
|
||||
)
|
||||
|
||||
func TestAnyOfIndex(t *testing.T) {
|
||||
func TestIndexedAnyOf(t *testing.T) {
|
||||
for id, test := range []struct {
|
||||
matchers Matchers
|
||||
fixture string
|
||||
|
@ -41,8 +41,8 @@ func TestAnyOfIndex(t *testing.T) {
|
|||
[]int{1},
|
||||
},
|
||||
} {
|
||||
everyOf := NewAnyOf(test.matchers...)
|
||||
index, segments := everyOf.Index(test.fixture)
|
||||
a := NewAnyOf(test.matchers...).(IndexedAnyOf)
|
||||
index, segments := a.Index(test.fixture)
|
||||
if index != test.index {
|
||||
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
|
||||
}
|
||||
|
|
185
match/btree.go
185
match/btree.go
|
@ -1,185 +0,0 @@
|
|||
package match
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
type BTree struct {
|
||||
Value Matcher
|
||||
Left Matcher
|
||||
Right Matcher
|
||||
ValueLengthRunes int
|
||||
LeftLengthRunes int
|
||||
RightLengthRunes int
|
||||
LengthRunes int
|
||||
}
|
||||
|
||||
func NewBTree(Value, Left, Right Matcher) (tree BTree) {
|
||||
tree.Value = Value
|
||||
tree.Left = Left
|
||||
tree.Right = Right
|
||||
|
||||
lenOk := true
|
||||
if tree.ValueLengthRunes = Value.Len(); tree.ValueLengthRunes == -1 {
|
||||
lenOk = false
|
||||
}
|
||||
|
||||
if Left != nil {
|
||||
if tree.LeftLengthRunes = Left.Len(); tree.LeftLengthRunes == -1 {
|
||||
lenOk = false
|
||||
}
|
||||
}
|
||||
|
||||
if Right != nil {
|
||||
if tree.RightLengthRunes = Right.Len(); tree.RightLengthRunes == -1 {
|
||||
lenOk = false
|
||||
}
|
||||
}
|
||||
|
||||
if lenOk {
|
||||
tree.LengthRunes = tree.LeftLengthRunes + tree.ValueLengthRunes + tree.RightLengthRunes
|
||||
} else {
|
||||
tree.LengthRunes = -1
|
||||
}
|
||||
|
||||
return tree
|
||||
}
|
||||
|
||||
func (self BTree) Len() int {
|
||||
return self.LengthRunes
|
||||
}
|
||||
|
||||
// todo?
|
||||
func (self BTree) Index(s string) (index int, segments []int) {
|
||||
//inputLen := len(s)
|
||||
//// try to cut unnecessary parts
|
||||
//// by knowledge of length of right and left part
|
||||
//offset, limit := self.offsetLimit(inputLen)
|
||||
//for offset < limit {
|
||||
// // search for matching part in substring
|
||||
// vi, segments := self.Value.Index(s[offset:limit])
|
||||
// if index == -1 {
|
||||
// return -1, nil
|
||||
// }
|
||||
// if self.Left == nil {
|
||||
// if index != offset {
|
||||
// return -1, nil
|
||||
// }
|
||||
// } else {
|
||||
// left := s[:offset+vi]
|
||||
// i := self.Left.IndexSuffix(left)
|
||||
// if i == -1 {
|
||||
// return -1, nil
|
||||
// }
|
||||
// index = i
|
||||
// }
|
||||
// if self.Right != nil {
|
||||
// for _, seg := range segments {
|
||||
// right := s[:offset+vi+seg]
|
||||
// }
|
||||
// }
|
||||
|
||||
// l := s[:offset+index]
|
||||
// var left bool
|
||||
// if self.Left != nil {
|
||||
// left = self.Left.Index(l)
|
||||
// } else {
|
||||
// left = l == ""
|
||||
// }
|
||||
//}
|
||||
|
||||
return -1, nil
|
||||
}
|
||||
|
||||
func (self BTree) Match(s string) bool {
|
||||
inputLen := len(s)
|
||||
// try to cut unnecessary parts
|
||||
// by knowledge of length of right and left part
|
||||
offset, limit := self.offsetLimit(inputLen)
|
||||
|
||||
for offset < limit {
|
||||
// search for matching part in substring
|
||||
index, segments := self.Value.Index(s[offset:limit])
|
||||
if index == -1 {
|
||||
releaseSegments(segments)
|
||||
return false
|
||||
}
|
||||
|
||||
l := s[:offset+index]
|
||||
var left bool
|
||||
if self.Left != nil {
|
||||
left = self.Left.Match(l)
|
||||
} else {
|
||||
left = l == ""
|
||||
}
|
||||
|
||||
if left {
|
||||
for i := len(segments) - 1; i >= 0; i-- {
|
||||
length := segments[i]
|
||||
|
||||
var right bool
|
||||
var r string
|
||||
// if there is no string for the right branch
|
||||
if inputLen <= offset+index+length {
|
||||
r = ""
|
||||
} else {
|
||||
r = s[offset+index+length:]
|
||||
}
|
||||
|
||||
if self.Right != nil {
|
||||
right = self.Right.Match(r)
|
||||
} else {
|
||||
right = r == ""
|
||||
}
|
||||
|
||||
if right {
|
||||
releaseSegments(segments)
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_, step := utf8.DecodeRuneInString(s[offset+index:])
|
||||
offset += index + step
|
||||
|
||||
releaseSegments(segments)
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func (self BTree) offsetLimit(inputLen int) (offset int, limit int) {
|
||||
// self.Length, self.RLen and self.LLen are values meaning the length of runes for each part
|
||||
// here we manipulating byte length for better optimizations
|
||||
// but these checks still works, cause minLen of 1-rune string is 1 byte.
|
||||
if self.LengthRunes != -1 && self.LengthRunes > inputLen {
|
||||
return 0, 0
|
||||
}
|
||||
if self.LeftLengthRunes >= 0 {
|
||||
offset = self.LeftLengthRunes
|
||||
}
|
||||
if self.RightLengthRunes >= 0 {
|
||||
limit = inputLen - self.RightLengthRunes
|
||||
} else {
|
||||
limit = inputLen
|
||||
}
|
||||
return offset, limit
|
||||
}
|
||||
|
||||
func (self BTree) String() string {
|
||||
const n string = "<nil>"
|
||||
var l, r string
|
||||
if self.Left == nil {
|
||||
l = n
|
||||
} else {
|
||||
l = self.Left.String()
|
||||
}
|
||||
if self.Right == nil {
|
||||
r = n
|
||||
} else {
|
||||
r = self.Right.String()
|
||||
}
|
||||
|
||||
return fmt.Sprintf("<btree:[%s<-%s->%s]>", l, self.Value, r)
|
||||
}
|
|
@ -1,90 +0,0 @@
|
|||
package match
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestBTree(t *testing.T) {
|
||||
for id, test := range []struct {
|
||||
tree BTree
|
||||
str string
|
||||
exp bool
|
||||
}{
|
||||
{
|
||||
NewBTree(NewText("abc"), NewSuper(), NewSuper()),
|
||||
"abc",
|
||||
true,
|
||||
},
|
||||
{
|
||||
NewBTree(NewText("a"), NewSingle(nil), NewSingle(nil)),
|
||||
"aaa",
|
||||
true,
|
||||
},
|
||||
{
|
||||
NewBTree(NewText("b"), NewSingle(nil), nil),
|
||||
"bbb",
|
||||
false,
|
||||
},
|
||||
{
|
||||
NewBTree(
|
||||
NewText("c"),
|
||||
NewBTree(
|
||||
NewSingle(nil),
|
||||
NewSuper(),
|
||||
nil,
|
||||
),
|
||||
nil,
|
||||
),
|
||||
"abc",
|
||||
true,
|
||||
},
|
||||
} {
|
||||
act := test.tree.Match(test.str)
|
||||
if act != test.exp {
|
||||
t.Errorf("#%d match %q error: act: %t; exp: %t", id, test.str, act, test.exp)
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type fakeMatcher struct {
|
||||
len int
|
||||
name string
|
||||
}
|
||||
|
||||
func (f *fakeMatcher) Match(string) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
var i = 3
|
||||
|
||||
func (f *fakeMatcher) Index(s string) (int, []int) {
|
||||
seg := make([]int, 0, i)
|
||||
for x := 0; x < i; x++ {
|
||||
seg = append(seg, x)
|
||||
}
|
||||
return 0, seg
|
||||
}
|
||||
func (f *fakeMatcher) Len() int {
|
||||
return f.len
|
||||
}
|
||||
func (f *fakeMatcher) String() string {
|
||||
return f.name
|
||||
}
|
||||
|
||||
func BenchmarkMatchBTree(b *testing.B) {
|
||||
l := &fakeMatcher{4, "left_fake"}
|
||||
r := &fakeMatcher{4, "right_fake"}
|
||||
v := &fakeMatcher{2, "value_fake"}
|
||||
|
||||
// must be <= len(l + r + v)
|
||||
fixture := "abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij"
|
||||
|
||||
bt := NewBTree(v, l, r)
|
||||
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
bt.Match(fixture)
|
||||
}
|
||||
})
|
||||
}
|
|
@ -6,29 +6,29 @@ import (
|
|||
)
|
||||
|
||||
type Contains struct {
|
||||
Needle string
|
||||
Not bool
|
||||
s string
|
||||
not bool
|
||||
}
|
||||
|
||||
func NewContains(needle string, not bool) Contains {
|
||||
return Contains{needle, not}
|
||||
func NewContains(needle string) Contains {
|
||||
return Contains{needle, false}
|
||||
}
|
||||
|
||||
func (self Contains) Match(s string) bool {
|
||||
return strings.Contains(s, self.Needle) != self.Not
|
||||
func (c Contains) Match(s string) bool {
|
||||
return strings.Contains(s, c.s) != c.not
|
||||
}
|
||||
|
||||
func (self Contains) Index(s string) (int, []int) {
|
||||
func (c Contains) Index(s string) (int, []int) {
|
||||
var offset int
|
||||
|
||||
idx := strings.Index(s, self.Needle)
|
||||
idx := strings.Index(s, c.s)
|
||||
|
||||
if !self.Not {
|
||||
if !c.not {
|
||||
if idx == -1 {
|
||||
return -1, nil
|
||||
}
|
||||
|
||||
offset = idx + len(self.Needle)
|
||||
offset = idx + len(c.s)
|
||||
if len(s) <= offset {
|
||||
return 0, []int{offset}
|
||||
}
|
||||
|
@ -45,14 +45,14 @@ func (self Contains) Index(s string) (int, []int) {
|
|||
return 0, append(segments, offset+len(s))
|
||||
}
|
||||
|
||||
func (self Contains) Len() int {
|
||||
return lenNo
|
||||
func (c Contains) MinLen() int {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (self Contains) String() string {
|
||||
func (c Contains) String() string {
|
||||
var not string
|
||||
if self.Not {
|
||||
if c.not {
|
||||
not = "!"
|
||||
}
|
||||
return fmt.Sprintf("<contains:%s[%s]>", not, self.Needle)
|
||||
return fmt.Sprintf("<contains:%s[%s]>", not, c.s)
|
||||
}
|
||||
|
|
|
@ -42,7 +42,7 @@ func TestContainsIndex(t *testing.T) {
|
|||
[]int{0, 1, 2, 3},
|
||||
},
|
||||
} {
|
||||
p := NewContains(test.prefix, test.not)
|
||||
p := Contains{test.prefix, test.not}
|
||||
index, segments := p.Index(test.fixture)
|
||||
if index != test.index {
|
||||
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
|
||||
|
@ -54,8 +54,7 @@ func TestContainsIndex(t *testing.T) {
|
|||
}
|
||||
|
||||
func BenchmarkIndexContains(b *testing.B) {
|
||||
m := NewContains(string(bench_separators), true)
|
||||
|
||||
m := Contains{string(bench_separators), true}
|
||||
for i := 0; i < b.N; i++ {
|
||||
_, s := m.Index(bench_pattern)
|
||||
releaseSegments(s)
|
||||
|
@ -63,8 +62,7 @@ func BenchmarkIndexContains(b *testing.B) {
|
|||
}
|
||||
|
||||
func BenchmarkIndexContainsParallel(b *testing.B) {
|
||||
m := NewContains(string(bench_separators), true)
|
||||
|
||||
m := Contains{string(bench_separators), true}
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
_, s := m.Index(bench_pattern)
|
||||
|
|
|
@ -0,0 +1,77 @@
|
|||
package match
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"os"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
)
|
||||
|
||||
var i = new(int32)
|
||||
|
||||
func logf(f string, args ...interface{}) {
|
||||
n := int(atomic.LoadInt32(i))
|
||||
fmt.Fprint(os.Stderr,
|
||||
strings.Repeat(" ", n),
|
||||
fmt.Sprintf("(%d) ", n),
|
||||
fmt.Sprintf(f, args...),
|
||||
"\n",
|
||||
)
|
||||
}
|
||||
|
||||
func enter() {
|
||||
atomic.AddInt32(i, 1)
|
||||
}
|
||||
|
||||
func leave() {
|
||||
atomic.AddInt32(i, -1)
|
||||
}
|
||||
|
||||
func Graphviz(pattern string, m Matcher) string {
|
||||
return fmt.Sprintf(`digraph G {graph[label="%s"];%s}`, pattern, graphviz(m, fmt.Sprintf("%x", rand.Int63())))
|
||||
}
|
||||
|
||||
func graphviz(m Matcher, id string) string {
|
||||
buf := &bytes.Buffer{}
|
||||
|
||||
switch v := m.(type) {
|
||||
case Tree:
|
||||
fmt.Fprintf(buf, `"%s"[label="%s"];`, id, v.value)
|
||||
for _, m := range []Matcher{v.left, v.right} {
|
||||
switch n := m.(type) {
|
||||
case nil:
|
||||
rnd := rand.Int63()
|
||||
fmt.Fprintf(buf, `"%x"[label="<nil>"];`, rnd)
|
||||
fmt.Fprintf(buf, `"%s"->"%x";`, id, rnd)
|
||||
|
||||
default:
|
||||
sub := fmt.Sprintf("%x", rand.Int63())
|
||||
fmt.Fprintf(buf, `"%s"->"%s";`, id, sub)
|
||||
fmt.Fprintf(buf, graphviz(n, sub))
|
||||
}
|
||||
}
|
||||
|
||||
case Container:
|
||||
fmt.Fprintf(buf, `"%s"[label="*AnyOf"];`, id)
|
||||
for _, m := range v.Content() {
|
||||
rnd := rand.Int63()
|
||||
fmt.Fprintf(buf, graphviz(m, fmt.Sprintf("%x", rnd)))
|
||||
fmt.Fprintf(buf, `"%s"->"%x";`, id, rnd)
|
||||
}
|
||||
|
||||
case EveryOf:
|
||||
fmt.Fprintf(buf, `"%s"[label="EveryOf"];`, id)
|
||||
for _, m := range v.ms {
|
||||
rnd := rand.Int63()
|
||||
fmt.Fprintf(buf, graphviz(m, fmt.Sprintf("%x", rnd)))
|
||||
fmt.Fprintf(buf, `"%s"->"%x";`, id, rnd)
|
||||
}
|
||||
|
||||
default:
|
||||
fmt.Fprintf(buf, `"%s"[label="%s"];`, id, m)
|
||||
}
|
||||
|
||||
return buf.String()
|
||||
}
|
|
@ -1,55 +0,0 @@
|
|||
package debug
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"github.com/gobwas/glob/match"
|
||||
"math/rand"
|
||||
)
|
||||
|
||||
func Graphviz(pattern string, m match.Matcher) string {
|
||||
return fmt.Sprintf(`digraph G {graph[label="%s"];%s}`, pattern, graphviz_internal(m, fmt.Sprintf("%x", rand.Int63())))
|
||||
}
|
||||
|
||||
func graphviz_internal(m match.Matcher, id string) string {
|
||||
buf := &bytes.Buffer{}
|
||||
|
||||
switch matcher := m.(type) {
|
||||
case match.BTree:
|
||||
fmt.Fprintf(buf, `"%s"[label="%s"];`, id, matcher.Value.String())
|
||||
for _, m := range []match.Matcher{matcher.Left, matcher.Right} {
|
||||
switch n := m.(type) {
|
||||
case nil:
|
||||
rnd := rand.Int63()
|
||||
fmt.Fprintf(buf, `"%x"[label="<nil>"];`, rnd)
|
||||
fmt.Fprintf(buf, `"%s"->"%x";`, id, rnd)
|
||||
|
||||
default:
|
||||
sub := fmt.Sprintf("%x", rand.Int63())
|
||||
fmt.Fprintf(buf, `"%s"->"%s";`, id, sub)
|
||||
fmt.Fprintf(buf, graphviz_internal(n, sub))
|
||||
}
|
||||
}
|
||||
|
||||
case match.AnyOf:
|
||||
fmt.Fprintf(buf, `"%s"[label="AnyOf"];`, id)
|
||||
for _, m := range matcher.Matchers {
|
||||
rnd := rand.Int63()
|
||||
fmt.Fprintf(buf, graphviz_internal(m, fmt.Sprintf("%x", rnd)))
|
||||
fmt.Fprintf(buf, `"%s"->"%x";`, id, rnd)
|
||||
}
|
||||
|
||||
case match.EveryOf:
|
||||
fmt.Fprintf(buf, `"%s"[label="EveryOf"];`, id)
|
||||
for _, m := range matcher.Matchers {
|
||||
rnd := rand.Int63()
|
||||
fmt.Fprintf(buf, graphviz_internal(m, fmt.Sprintf("%x", rnd)))
|
||||
fmt.Fprintf(buf, `"%s"->"%x";`, id, rnd)
|
||||
}
|
||||
|
||||
default:
|
||||
fmt.Fprintf(buf, `"%s"[label="%s"];`, id, m.String())
|
||||
}
|
||||
|
||||
return buf.String()
|
||||
}
|
|
@ -5,31 +5,41 @@ import (
|
|||
)
|
||||
|
||||
type EveryOf struct {
|
||||
Matchers Matchers
|
||||
ms []Matcher
|
||||
min int
|
||||
}
|
||||
|
||||
func NewEveryOf(m ...Matcher) EveryOf {
|
||||
return EveryOf{Matchers(m)}
|
||||
func NewEveryOf(ms []Matcher) Matcher {
|
||||
e := EveryOf{ms, minLen(ms)}
|
||||
if mis, ok := MatchIndexers(ms); ok {
|
||||
return IndexedEveryOf{e, mis}
|
||||
}
|
||||
return e
|
||||
}
|
||||
|
||||
func (self *EveryOf) Add(m Matcher) error {
|
||||
self.Matchers = append(self.Matchers, m)
|
||||
return nil
|
||||
func (e EveryOf) MinLen() (n int) {
|
||||
return e.min
|
||||
}
|
||||
|
||||
func (self EveryOf) Len() (l int) {
|
||||
for _, m := range self.Matchers {
|
||||
if ml := m.Len(); l > 0 {
|
||||
l += ml
|
||||
} else {
|
||||
return -1
|
||||
func (e EveryOf) Match(s string) bool {
|
||||
for _, m := range e.ms {
|
||||
if !m.Match(s) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
return true
|
||||
}
|
||||
|
||||
func (self EveryOf) Index(s string) (int, []int) {
|
||||
func (e EveryOf) String() string {
|
||||
return fmt.Sprintf("<every_of:[%s]>", e.ms)
|
||||
}
|
||||
|
||||
type IndexedEveryOf struct {
|
||||
EveryOf
|
||||
ms []MatchIndexer
|
||||
}
|
||||
|
||||
func (e IndexedEveryOf) Index(s string) (int, []int) {
|
||||
var index int
|
||||
var offset int
|
||||
|
||||
|
@ -39,7 +49,7 @@ func (self EveryOf) Index(s string) (int, []int) {
|
|||
current := acquireSegments(len(s))
|
||||
|
||||
sub := s
|
||||
for i, m := range self.Matchers {
|
||||
for i, m := range e.ms {
|
||||
idx, seg := m.Index(sub)
|
||||
if idx == -1 {
|
||||
releaseSegments(next)
|
||||
|
@ -84,16 +94,6 @@ func (self EveryOf) Index(s string) (int, []int) {
|
|||
return index, current
|
||||
}
|
||||
|
||||
func (self EveryOf) Match(s string) bool {
|
||||
for _, m := range self.Matchers {
|
||||
if !m.Match(s) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func (self EveryOf) String() string {
|
||||
return fmt.Sprintf("<every_of:[%s]>", self.Matchers)
|
||||
func (e IndexedEveryOf) String() string {
|
||||
return fmt.Sprintf("<indexed_every_of:[%s]>", e.ms)
|
||||
}
|
||||
|
|
|
@ -5,7 +5,7 @@ import (
|
|||
"testing"
|
||||
)
|
||||
|
||||
func TestEveryOfIndex(t *testing.T) {
|
||||
func TestIndexedEveryOf(t *testing.T) {
|
||||
for id, test := range []struct {
|
||||
matchers Matchers
|
||||
fixture string
|
||||
|
@ -33,7 +33,7 @@ func TestEveryOfIndex(t *testing.T) {
|
|||
[]int{2},
|
||||
},
|
||||
} {
|
||||
everyOf := NewEveryOf(test.matchers...)
|
||||
everyOf := NewEveryOf(test.matchers).(IndexedEveryOf)
|
||||
index, segments := everyOf.Index(test.fixture)
|
||||
if index != test.index {
|
||||
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
|
||||
|
|
|
@ -2,48 +2,47 @@ package match
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/gobwas/glob/util/runes"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/gobwas/glob/util/runes"
|
||||
)
|
||||
|
||||
type List struct {
|
||||
List []rune
|
||||
Not bool
|
||||
rs []rune
|
||||
not bool
|
||||
}
|
||||
|
||||
func NewList(list []rune, not bool) List {
|
||||
return List{list, not}
|
||||
func NewList(rs []rune, not bool) List {
|
||||
return List{rs, not}
|
||||
}
|
||||
|
||||
func (self List) Match(s string) bool {
|
||||
func (l List) Match(s string) bool {
|
||||
r, w := utf8.DecodeRuneInString(s)
|
||||
if len(s) > w {
|
||||
// Invalid rune.
|
||||
return false
|
||||
}
|
||||
|
||||
inList := runes.IndexRune(self.List, r) != -1
|
||||
return inList == !self.Not
|
||||
inList := runes.IndexRune(l.rs, r) != -1
|
||||
return inList == !l.not
|
||||
}
|
||||
|
||||
func (self List) Len() int {
|
||||
return lenOne
|
||||
func (l List) MinLen() int {
|
||||
return 1
|
||||
}
|
||||
|
||||
func (self List) Index(s string) (int, []int) {
|
||||
func (l List) Index(s string) (int, []int) {
|
||||
for i, r := range s {
|
||||
if self.Not == (runes.IndexRune(self.List, r) == -1) {
|
||||
if l.not == (runes.IndexRune(l.rs, r) == -1) {
|
||||
return i, segmentsByRuneLength[utf8.RuneLen(r)]
|
||||
}
|
||||
}
|
||||
|
||||
return -1, nil
|
||||
}
|
||||
|
||||
func (self List) String() string {
|
||||
func (l List) String() string {
|
||||
var not string
|
||||
if self.Not {
|
||||
if l.not {
|
||||
not = "!"
|
||||
}
|
||||
|
||||
return fmt.Sprintf("<list:%s[%s]>", not, string(self.List))
|
||||
return fmt.Sprintf("<list:%s[%s]>", not, string(l.rs))
|
||||
}
|
||||
|
|
|
@ -7,15 +7,50 @@ import (
|
|||
"strings"
|
||||
)
|
||||
|
||||
const lenOne = 1
|
||||
const lenZero = 0
|
||||
const lenNo = -1
|
||||
|
||||
type Matcher interface {
|
||||
Match(string) bool
|
||||
MinLen() int
|
||||
}
|
||||
|
||||
type Indexer interface {
|
||||
Index(string) (int, []int)
|
||||
Len() int
|
||||
String() string
|
||||
}
|
||||
|
||||
type Sizer interface {
|
||||
RunesCount() int
|
||||
}
|
||||
|
||||
type MatchIndexer interface {
|
||||
Matcher
|
||||
Indexer
|
||||
}
|
||||
|
||||
type MatchSizer interface {
|
||||
Matcher
|
||||
Sizer
|
||||
}
|
||||
|
||||
type MatchIndexSizer interface {
|
||||
Matcher
|
||||
Indexer
|
||||
Sizer
|
||||
}
|
||||
|
||||
type Container interface {
|
||||
Content() []Matcher
|
||||
}
|
||||
|
||||
func MatchIndexers(ms []Matcher) ([]MatchIndexer, bool) {
|
||||
for _, m := range ms {
|
||||
if _, ok := m.(Indexer); !ok {
|
||||
return nil, false
|
||||
}
|
||||
}
|
||||
mis := make([]MatchIndexer, len(ms))
|
||||
for i := range mis {
|
||||
mis[i] = ms[i].(MatchIndexer)
|
||||
}
|
||||
return mis, true
|
||||
}
|
||||
|
||||
type Matchers []Matcher
|
||||
|
|
29
match/max.go
29
match/max.go
|
@ -6,32 +6,31 @@ import (
|
|||
)
|
||||
|
||||
type Max struct {
|
||||
Limit int
|
||||
n int
|
||||
}
|
||||
|
||||
func NewMax(l int) Max {
|
||||
return Max{l}
|
||||
func NewMax(n int) Max {
|
||||
return Max{n}
|
||||
}
|
||||
|
||||
func (self Max) Match(s string) bool {
|
||||
var l int
|
||||
func (m Max) Match(s string) bool {
|
||||
var n int
|
||||
for range s {
|
||||
l += 1
|
||||
if l > self.Limit {
|
||||
n += 1
|
||||
if n > m.n {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func (self Max) Index(s string) (int, []int) {
|
||||
segments := acquireSegments(self.Limit + 1)
|
||||
func (m Max) Index(s string) (int, []int) {
|
||||
segments := acquireSegments(m.n + 1)
|
||||
segments = append(segments, 0)
|
||||
var count int
|
||||
for i, r := range s {
|
||||
count++
|
||||
if count > self.Limit {
|
||||
if count > m.n {
|
||||
break
|
||||
}
|
||||
segments = append(segments, i+utf8.RuneLen(r))
|
||||
|
@ -40,10 +39,10 @@ func (self Max) Index(s string) (int, []int) {
|
|||
return 0, segments
|
||||
}
|
||||
|
||||
func (self Max) Len() int {
|
||||
return lenNo
|
||||
func (m Max) MinLen() int {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (self Max) String() string {
|
||||
return fmt.Sprintf("<max:%d>", self.Limit)
|
||||
func (m Max) String() string {
|
||||
return fmt.Sprintf("<max:%d>", m.n)
|
||||
}
|
||||
|
|
32
match/min.go
32
match/min.go
|
@ -6,52 +6,48 @@ import (
|
|||
)
|
||||
|
||||
type Min struct {
|
||||
Limit int
|
||||
n int
|
||||
}
|
||||
|
||||
func NewMin(l int) Min {
|
||||
return Min{l}
|
||||
func NewMin(n int) Min {
|
||||
return Min{n}
|
||||
}
|
||||
|
||||
func (self Min) Match(s string) bool {
|
||||
var l int
|
||||
func (m Min) Match(s string) bool {
|
||||
var n int
|
||||
for range s {
|
||||
l += 1
|
||||
if l >= self.Limit {
|
||||
n += 1
|
||||
if n >= m.n {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func (self Min) Index(s string) (int, []int) {
|
||||
func (m Min) Index(s string) (int, []int) {
|
||||
var count int
|
||||
|
||||
c := len(s) - self.Limit + 1
|
||||
c := len(s) - m.n + 1
|
||||
if c <= 0 {
|
||||
return -1, nil
|
||||
}
|
||||
|
||||
segments := acquireSegments(c)
|
||||
for i, r := range s {
|
||||
count++
|
||||
if count >= self.Limit {
|
||||
if count >= m.n {
|
||||
segments = append(segments, i+utf8.RuneLen(r))
|
||||
}
|
||||
}
|
||||
|
||||
if len(segments) == 0 {
|
||||
return -1, nil
|
||||
}
|
||||
|
||||
return 0, segments
|
||||
}
|
||||
|
||||
func (self Min) Len() int {
|
||||
return lenNo
|
||||
func (m Min) MinLen() int {
|
||||
return m.n
|
||||
}
|
||||
|
||||
func (self Min) String() string {
|
||||
return fmt.Sprintf("<min:%d>", self.Limit)
|
||||
func (m Min) String() string {
|
||||
return fmt.Sprintf("<min:%d>", m.n)
|
||||
}
|
||||
|
|
|
@ -18,8 +18,12 @@ func (self Nothing) Index(s string) (int, []int) {
|
|||
return 0, segments0
|
||||
}
|
||||
|
||||
func (self Nothing) Len() int {
|
||||
return lenZero
|
||||
func (self Nothing) MinLen() int {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (self Nothing) RunesCount() int {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (self Nothing) String() string {
|
||||
|
|
|
@ -0,0 +1,278 @@
|
|||
package match
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"gopkg.in/readline.v1/runes"
|
||||
)
|
||||
|
||||
func Optimize(m Matcher) Matcher {
|
||||
switch v := m.(type) {
|
||||
case Any:
|
||||
if len(v.sep) == 0 {
|
||||
return NewSuper()
|
||||
}
|
||||
|
||||
case Container:
|
||||
ms := v.Content()
|
||||
if len(ms) == 1 {
|
||||
return ms[0]
|
||||
}
|
||||
return m
|
||||
|
||||
case List:
|
||||
if v.not == false && len(v.rs) == 1 {
|
||||
return NewText(string(v.rs))
|
||||
}
|
||||
return m
|
||||
|
||||
case Tree:
|
||||
v.left = Optimize(v.left)
|
||||
v.right = Optimize(v.right)
|
||||
|
||||
txt, ok := v.value.(Text)
|
||||
if !ok {
|
||||
return m
|
||||
}
|
||||
|
||||
var (
|
||||
leftNil = v.left == nil
|
||||
rightNil = v.right == nil
|
||||
)
|
||||
if leftNil && rightNil {
|
||||
return NewText(txt.s)
|
||||
}
|
||||
|
||||
_, leftSuper := v.left.(Super)
|
||||
lp, leftPrefix := v.left.(Prefix)
|
||||
la, leftAny := v.left.(Any)
|
||||
|
||||
_, rightSuper := v.right.(Super)
|
||||
rs, rightSuffix := v.right.(Suffix)
|
||||
ra, rightAny := v.right.(Any)
|
||||
|
||||
switch {
|
||||
case leftSuper && rightSuper:
|
||||
return NewContains(txt.s)
|
||||
|
||||
case leftSuper && rightNil:
|
||||
return NewSuffix(txt.s)
|
||||
|
||||
case rightSuper && leftNil:
|
||||
return NewPrefix(txt.s)
|
||||
|
||||
case leftNil && rightSuffix:
|
||||
return NewPrefixSuffix(txt.s, rs.s)
|
||||
|
||||
case rightNil && leftPrefix:
|
||||
return NewPrefixSuffix(lp.s, txt.s)
|
||||
|
||||
case rightNil && leftAny:
|
||||
return NewSuffixAny(txt.s, la.sep)
|
||||
|
||||
case leftNil && rightAny:
|
||||
return NewPrefixAny(txt.s, ra.sep)
|
||||
}
|
||||
}
|
||||
|
||||
return m
|
||||
}
|
||||
|
||||
func Compile(ms []Matcher) (Matcher, error) {
|
||||
if len(ms) == 0 {
|
||||
return nil, fmt.Errorf("compile error: need at least one matcher")
|
||||
}
|
||||
if len(ms) == 1 {
|
||||
return ms[0], nil
|
||||
}
|
||||
if m := glueMatchers(ms); m != nil {
|
||||
return m, nil
|
||||
}
|
||||
|
||||
var (
|
||||
idx = -1
|
||||
maxLen = -2
|
||||
indexer MatchIndexer
|
||||
)
|
||||
for i, m := range ms {
|
||||
mi, ok := m.(MatchIndexer)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if n := m.MinLen(); n > maxLen {
|
||||
maxLen = n
|
||||
idx = i
|
||||
indexer = mi
|
||||
}
|
||||
}
|
||||
if indexer == nil {
|
||||
return nil, fmt.Errorf("can not index on matchers")
|
||||
}
|
||||
|
||||
left := ms[:idx]
|
||||
var right []Matcher
|
||||
if len(ms) > idx+1 {
|
||||
right = ms[idx+1:]
|
||||
}
|
||||
|
||||
var l, r Matcher
|
||||
var err error
|
||||
if len(left) > 0 {
|
||||
l, err = Compile(left)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
if len(right) > 0 {
|
||||
r, err = Compile(right)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return NewTree(indexer, l, r), nil
|
||||
}
|
||||
|
||||
func glueMatchers(ms []Matcher) Matcher {
|
||||
if m := glueMatchersAsEvery(ms); m != nil {
|
||||
return m
|
||||
}
|
||||
if m := glueMatchersAsRow(ms); m != nil {
|
||||
return m
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func glueMatchersAsRow(ms []Matcher) Matcher {
|
||||
if len(ms) <= 1 {
|
||||
return nil
|
||||
}
|
||||
var s []MatchIndexSizer
|
||||
for _, m := range ms {
|
||||
rsz, ok := m.(MatchIndexSizer)
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
s = append(s, rsz)
|
||||
}
|
||||
return NewRow(s)
|
||||
}
|
||||
|
||||
func glueMatchersAsEvery(ms []Matcher) Matcher {
|
||||
if len(ms) <= 1 {
|
||||
return nil
|
||||
}
|
||||
|
||||
var (
|
||||
hasAny bool
|
||||
hasSuper bool
|
||||
hasSingle bool
|
||||
min int
|
||||
separator []rune
|
||||
)
|
||||
|
||||
for i, matcher := range ms {
|
||||
var sep []rune
|
||||
|
||||
switch m := matcher.(type) {
|
||||
case Super:
|
||||
sep = []rune{}
|
||||
hasSuper = true
|
||||
|
||||
case Any:
|
||||
sep = m.sep
|
||||
hasAny = true
|
||||
|
||||
case Single:
|
||||
sep = m.sep
|
||||
hasSingle = true
|
||||
min++
|
||||
|
||||
case List:
|
||||
if !m.not {
|
||||
return nil
|
||||
}
|
||||
sep = m.rs
|
||||
hasSingle = true
|
||||
min++
|
||||
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
|
||||
// initialize
|
||||
if i == 0 {
|
||||
separator = sep
|
||||
}
|
||||
|
||||
if runes.Equal(sep, separator) {
|
||||
continue
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
if hasSuper && !hasAny && !hasSingle {
|
||||
return NewSuper()
|
||||
}
|
||||
|
||||
if hasAny && !hasSuper && !hasSingle {
|
||||
return NewAny(separator)
|
||||
}
|
||||
|
||||
if (hasAny || hasSuper) && min > 0 && len(separator) == 0 {
|
||||
return NewMin(min)
|
||||
}
|
||||
|
||||
var every []Matcher
|
||||
if min > 0 {
|
||||
every = append(every, NewMin(min))
|
||||
if !hasAny && !hasSuper {
|
||||
every = append(every, NewMax(min))
|
||||
}
|
||||
}
|
||||
if len(separator) > 0 {
|
||||
every = append(every, NewAny(separator))
|
||||
}
|
||||
|
||||
return NewEveryOf(every)
|
||||
}
|
||||
|
||||
func Minimize(ms []Matcher) []Matcher {
|
||||
var (
|
||||
result Matcher
|
||||
left int
|
||||
right int
|
||||
count int
|
||||
)
|
||||
for l := 0; l < len(ms); l++ {
|
||||
for r := len(ms); r > l; r-- {
|
||||
if glued := glueMatchers(ms[l:r]); glued != nil {
|
||||
var swap bool
|
||||
if result == nil {
|
||||
swap = true
|
||||
} else {
|
||||
swap = glued.MinLen() > result.MinLen() || count < r-l
|
||||
}
|
||||
if swap {
|
||||
result = glued
|
||||
left = l
|
||||
right = r
|
||||
count = r - l
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if result == nil {
|
||||
return ms
|
||||
}
|
||||
next := append(append([]Matcher{}, ms[:left]...), result)
|
||||
if right < len(ms) {
|
||||
next = append(next, ms[right:]...)
|
||||
}
|
||||
if len(next) == len(ms) {
|
||||
return next
|
||||
}
|
||||
return Minimize(next)
|
||||
}
|
|
@ -7,20 +7,24 @@ import (
|
|||
)
|
||||
|
||||
type Prefix struct {
|
||||
Prefix string
|
||||
s string
|
||||
minSize int
|
||||
}
|
||||
|
||||
func NewPrefix(p string) Prefix {
|
||||
return Prefix{p}
|
||||
return Prefix{
|
||||
s: p,
|
||||
minSize: utf8.RuneCountInString(p),
|
||||
}
|
||||
}
|
||||
|
||||
func (self Prefix) Index(s string) (int, []int) {
|
||||
idx := strings.Index(s, self.Prefix)
|
||||
func (p Prefix) Index(s string) (int, []int) {
|
||||
idx := strings.Index(s, p.s)
|
||||
if idx == -1 {
|
||||
return -1, nil
|
||||
}
|
||||
|
||||
length := len(self.Prefix)
|
||||
length := len(p.s)
|
||||
var sub string
|
||||
if len(s) > idx+length {
|
||||
sub = s[idx+length:]
|
||||
|
@ -37,14 +41,14 @@ func (self Prefix) Index(s string) (int, []int) {
|
|||
return idx, segments
|
||||
}
|
||||
|
||||
func (self Prefix) Len() int {
|
||||
return lenNo
|
||||
func (p Prefix) MinLen() int {
|
||||
return p.minSize
|
||||
}
|
||||
|
||||
func (self Prefix) Match(s string) bool {
|
||||
return strings.HasPrefix(s, self.Prefix)
|
||||
func (p Prefix) Match(s string) bool {
|
||||
return strings.HasPrefix(s, p.s)
|
||||
}
|
||||
|
||||
func (self Prefix) String() string {
|
||||
return fmt.Sprintf("<prefix:%s>", self.Prefix)
|
||||
func (p Prefix) String() string {
|
||||
return fmt.Sprintf("<prefix:%s>", p.s)
|
||||
}
|
||||
|
|
|
@ -5,27 +5,28 @@ import (
|
|||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
sutil "github.com/gobwas/glob/util/strings"
|
||||
"github.com/gobwas/glob/util/runes"
|
||||
)
|
||||
|
||||
type PrefixAny struct {
|
||||
Prefix string
|
||||
Separators []rune
|
||||
s string
|
||||
sep []rune
|
||||
minLen int
|
||||
}
|
||||
|
||||
func NewPrefixAny(s string, sep []rune) PrefixAny {
|
||||
return PrefixAny{s, sep}
|
||||
return PrefixAny{s, sep, utf8.RuneCountInString(s)}
|
||||
}
|
||||
|
||||
func (self PrefixAny) Index(s string) (int, []int) {
|
||||
idx := strings.Index(s, self.Prefix)
|
||||
func (p PrefixAny) Index(s string) (int, []int) {
|
||||
idx := strings.Index(s, p.s)
|
||||
if idx == -1 {
|
||||
return -1, nil
|
||||
}
|
||||
|
||||
n := len(self.Prefix)
|
||||
n := len(p.s)
|
||||
sub := s[idx+n:]
|
||||
i := sutil.IndexAnyRunes(sub, self.Separators)
|
||||
i := runes.IndexAnyRune(sub, p.sep)
|
||||
if i > -1 {
|
||||
sub = sub[:i]
|
||||
}
|
||||
|
@ -39,17 +40,17 @@ func (self PrefixAny) Index(s string) (int, []int) {
|
|||
return idx, seg
|
||||
}
|
||||
|
||||
func (self PrefixAny) Len() int {
|
||||
return lenNo
|
||||
func (p PrefixAny) MinLen() int {
|
||||
return p.minLen
|
||||
}
|
||||
|
||||
func (self PrefixAny) Match(s string) bool {
|
||||
if !strings.HasPrefix(s, self.Prefix) {
|
||||
func (p PrefixAny) Match(s string) bool {
|
||||
if !strings.HasPrefix(s, p.s) {
|
||||
return false
|
||||
}
|
||||
return sutil.IndexAnyRunes(s[len(self.Prefix):], self.Separators) == -1
|
||||
return runes.IndexAnyRune(s[len(p.s):], p.sep) == -1
|
||||
}
|
||||
|
||||
func (self PrefixAny) String() string {
|
||||
return fmt.Sprintf("<prefix_any:%s![%s]>", self.Prefix, string(self.Separators))
|
||||
func (p PrefixAny) String() string {
|
||||
return fmt.Sprintf("<prefix_any:%s![%s]>", p.s, string(p.sep))
|
||||
}
|
||||
|
|
|
@ -3,23 +3,27 @@ package match
|
|||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
type PrefixSuffix struct {
|
||||
Prefix, Suffix string
|
||||
p, s string
|
||||
minLen int
|
||||
}
|
||||
|
||||
func NewPrefixSuffix(p, s string) PrefixSuffix {
|
||||
return PrefixSuffix{p, s}
|
||||
pn := utf8.RuneCountInString(p)
|
||||
sn := utf8.RuneCountInString(s)
|
||||
return PrefixSuffix{p, s, pn + sn}
|
||||
}
|
||||
|
||||
func (self PrefixSuffix) Index(s string) (int, []int) {
|
||||
prefixIdx := strings.Index(s, self.Prefix)
|
||||
func (ps PrefixSuffix) Index(s string) (int, []int) {
|
||||
prefixIdx := strings.Index(s, ps.p)
|
||||
if prefixIdx == -1 {
|
||||
return -1, nil
|
||||
}
|
||||
|
||||
suffixLen := len(self.Suffix)
|
||||
suffixLen := len(ps.s)
|
||||
if suffixLen <= 0 {
|
||||
return prefixIdx, []int{len(s) - prefixIdx}
|
||||
}
|
||||
|
@ -30,7 +34,7 @@ func (self PrefixSuffix) Index(s string) (int, []int) {
|
|||
|
||||
segments := acquireSegments(len(s) - prefixIdx)
|
||||
for sub := s[prefixIdx:]; ; {
|
||||
suffixIdx := strings.LastIndex(sub, self.Suffix)
|
||||
suffixIdx := strings.LastIndex(sub, ps.s)
|
||||
if suffixIdx == -1 {
|
||||
break
|
||||
}
|
||||
|
@ -49,14 +53,14 @@ func (self PrefixSuffix) Index(s string) (int, []int) {
|
|||
return prefixIdx, segments
|
||||
}
|
||||
|
||||
func (self PrefixSuffix) Len() int {
|
||||
return lenNo
|
||||
func (ps PrefixSuffix) Match(s string) bool {
|
||||
return strings.HasPrefix(s, ps.p) && strings.HasSuffix(s, ps.s)
|
||||
}
|
||||
|
||||
func (self PrefixSuffix) Match(s string) bool {
|
||||
return strings.HasPrefix(s, self.Prefix) && strings.HasSuffix(s, self.Suffix)
|
||||
func (ps PrefixSuffix) MinLen() int {
|
||||
return ps.minLen
|
||||
}
|
||||
|
||||
func (self PrefixSuffix) String() string {
|
||||
return fmt.Sprintf("<prefix_suffix:[%s,%s]>", self.Prefix, self.Suffix)
|
||||
func (ps PrefixSuffix) String() string {
|
||||
return fmt.Sprintf("<prefix_suffix:[%s,%s]>", ps.p, ps.s)
|
||||
}
|
||||
|
|
|
@ -14,8 +14,8 @@ func NewRange(lo, hi rune, not bool) Range {
|
|||
return Range{lo, hi, not}
|
||||
}
|
||||
|
||||
func (self Range) Len() int {
|
||||
return lenOne
|
||||
func (self Range) MinLen() int {
|
||||
return 1
|
||||
}
|
||||
|
||||
func (self Range) Match(s string) bool {
|
||||
|
|
104
match/row.go
104
match/row.go
|
@ -2,76 +2,72 @@ package match
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/gobwas/glob/util/runes"
|
||||
)
|
||||
|
||||
type Row struct {
|
||||
Matchers Matchers
|
||||
RunesLength int
|
||||
Segments []int
|
||||
ms []MatchIndexSizer
|
||||
runes int
|
||||
seg []int
|
||||
}
|
||||
|
||||
func NewRow(len int, m ...Matcher) Row {
|
||||
func NewRow(ms []MatchIndexSizer) Row {
|
||||
var r int
|
||||
for _, m := range ms {
|
||||
r += m.RunesCount()
|
||||
}
|
||||
return Row{
|
||||
Matchers: Matchers(m),
|
||||
RunesLength: len,
|
||||
Segments: []int{len},
|
||||
ms: ms,
|
||||
runes: r,
|
||||
seg: []int{r},
|
||||
}
|
||||
}
|
||||
|
||||
func (self Row) matchAll(s string) bool {
|
||||
var idx int
|
||||
for _, m := range self.Matchers {
|
||||
length := m.Len()
|
||||
|
||||
var next, i int
|
||||
for next = range s[idx:] {
|
||||
i++
|
||||
if i == length {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if i < length || !m.Match(s[idx:idx+next+1]) {
|
||||
return false
|
||||
}
|
||||
|
||||
idx += next + 1
|
||||
func (r Row) Match(s string) bool {
|
||||
if !runes.ExactlyRunesCount(s, r.runes) {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
return r.matchAll(s)
|
||||
}
|
||||
|
||||
func (self Row) lenOk(s string) bool {
|
||||
var i int
|
||||
for range s {
|
||||
i++
|
||||
if i > self.RunesLength {
|
||||
return false
|
||||
func (r Row) MinLen() int {
|
||||
return r.runes
|
||||
}
|
||||
|
||||
func (r Row) RunesCount() int {
|
||||
return r.runes
|
||||
}
|
||||
|
||||
func (r Row) Index(s string) (int, []int) {
|
||||
for j := 0; j < len(s)-r.runes; {
|
||||
i, _ := r.ms[0].Index(s[j:])
|
||||
if i == -1 {
|
||||
return -1, nil
|
||||
}
|
||||
}
|
||||
return self.RunesLength == i
|
||||
}
|
||||
|
||||
func (self Row) Match(s string) bool {
|
||||
return self.lenOk(s) && self.matchAll(s)
|
||||
}
|
||||
|
||||
func (self Row) Len() (l int) {
|
||||
return self.RunesLength
|
||||
}
|
||||
|
||||
func (self Row) Index(s string) (int, []int) {
|
||||
for i := range s {
|
||||
if len(s[i:]) < self.RunesLength {
|
||||
break
|
||||
}
|
||||
if self.matchAll(s[i:]) {
|
||||
return i, self.Segments
|
||||
if r.matchAll(s[i:]) {
|
||||
return j + i, r.seg
|
||||
}
|
||||
_, x := utf8.DecodeRuneInString(s[i:])
|
||||
j += x
|
||||
}
|
||||
return -1, nil
|
||||
}
|
||||
|
||||
func (self Row) String() string {
|
||||
return fmt.Sprintf("<row_%d:[%s]>", self.RunesLength, self.Matchers)
|
||||
func (r Row) String() string {
|
||||
return fmt.Sprintf("<row_%d:[%s]>", r.runes, r.ms)
|
||||
}
|
||||
|
||||
func (r Row) matchAll(s string) bool {
|
||||
var i int
|
||||
for _, m := range r.ms {
|
||||
n := m.RunesCount()
|
||||
sub := runes.Head(s[i:], n)
|
||||
if !m.Match(sub) {
|
||||
return false
|
||||
}
|
||||
i += len(sub)
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
|
|
@ -7,36 +7,33 @@ import (
|
|||
|
||||
func TestRowIndex(t *testing.T) {
|
||||
for id, test := range []struct {
|
||||
matchers Matchers
|
||||
length int
|
||||
matchers []MatchIndexSizer
|
||||
fixture string
|
||||
index int
|
||||
segments []int
|
||||
}{
|
||||
{
|
||||
Matchers{
|
||||
[]MatchIndexSizer{
|
||||
NewText("abc"),
|
||||
NewText("def"),
|
||||
NewSingle(nil),
|
||||
},
|
||||
7,
|
||||
"qweabcdefghij",
|
||||
3,
|
||||
[]int{7},
|
||||
},
|
||||
{
|
||||
Matchers{
|
||||
[]MatchIndexSizer{
|
||||
NewText("abc"),
|
||||
NewText("def"),
|
||||
NewSingle(nil),
|
||||
},
|
||||
7,
|
||||
"abcd",
|
||||
-1,
|
||||
nil,
|
||||
},
|
||||
} {
|
||||
p := NewRow(test.length, test.matchers...)
|
||||
p := NewRow(test.matchers)
|
||||
index, segments := p.Index(test.fixture)
|
||||
if index != test.index {
|
||||
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
|
||||
|
@ -48,15 +45,11 @@ func TestRowIndex(t *testing.T) {
|
|||
}
|
||||
|
||||
func BenchmarkRowIndex(b *testing.B) {
|
||||
m := NewRow(
|
||||
7,
|
||||
Matchers{
|
||||
NewText("abc"),
|
||||
NewText("def"),
|
||||
NewSingle(nil),
|
||||
}...,
|
||||
)
|
||||
|
||||
m := NewRow([]MatchIndexSizer{
|
||||
NewText("abc"),
|
||||
NewText("def"),
|
||||
NewSingle(nil),
|
||||
})
|
||||
for i := 0; i < b.N; i++ {
|
||||
_, s := m.Index(bench_pattern)
|
||||
releaseSegments(s)
|
||||
|
@ -64,15 +57,11 @@ func BenchmarkRowIndex(b *testing.B) {
|
|||
}
|
||||
|
||||
func BenchmarkIndexRowParallel(b *testing.B) {
|
||||
m := NewRow(
|
||||
7,
|
||||
Matchers{
|
||||
NewText("abc"),
|
||||
NewText("def"),
|
||||
NewSingle(nil),
|
||||
}...,
|
||||
)
|
||||
|
||||
m := NewRow([]MatchIndexSizer{
|
||||
NewText("abc"),
|
||||
NewText("def"),
|
||||
NewSingle(nil),
|
||||
})
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
_, s := m.Index(bench_pattern)
|
||||
|
|
|
@ -2,42 +2,45 @@ package match
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/gobwas/glob/util/runes"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/gobwas/glob/util/runes"
|
||||
)
|
||||
|
||||
// single represents ?
|
||||
type Single struct {
|
||||
Separators []rune
|
||||
sep []rune
|
||||
}
|
||||
|
||||
func NewSingle(s []rune) Single {
|
||||
return Single{s}
|
||||
}
|
||||
|
||||
func (self Single) Match(s string) bool {
|
||||
r, w := utf8.DecodeRuneInString(s)
|
||||
if len(s) > w {
|
||||
func (s Single) Match(v string) bool {
|
||||
r, w := utf8.DecodeRuneInString(v)
|
||||
if len(v) > w {
|
||||
return false
|
||||
}
|
||||
|
||||
return runes.IndexRune(self.Separators, r) == -1
|
||||
return runes.IndexRune(s.sep, r) == -1
|
||||
}
|
||||
|
||||
func (self Single) Len() int {
|
||||
return lenOne
|
||||
func (s Single) MinLen() int {
|
||||
return 1
|
||||
}
|
||||
|
||||
func (self Single) Index(s string) (int, []int) {
|
||||
for i, r := range s {
|
||||
if runes.IndexRune(self.Separators, r) == -1 {
|
||||
func (s Single) RunesCount() int {
|
||||
return 1
|
||||
}
|
||||
|
||||
func (s Single) Index(v string) (int, []int) {
|
||||
for i, r := range v {
|
||||
if runes.IndexRune(s.sep, r) == -1 {
|
||||
return i, segmentsByRuneLength[utf8.RuneLen(r)]
|
||||
}
|
||||
}
|
||||
|
||||
return -1, nil
|
||||
}
|
||||
|
||||
func (self Single) String() string {
|
||||
return fmt.Sprintf("<single:![%s]>", string(self.Separators))
|
||||
func (s Single) String() string {
|
||||
return fmt.Sprintf("<single:![%s]>", string(s.sep))
|
||||
}
|
||||
|
|
|
@ -3,33 +3,34 @@ package match
|
|||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
type Suffix struct {
|
||||
Suffix string
|
||||
s string
|
||||
minLen int
|
||||
}
|
||||
|
||||
func NewSuffix(s string) Suffix {
|
||||
return Suffix{s}
|
||||
return Suffix{s, utf8.RuneCountInString(s)}
|
||||
}
|
||||
|
||||
func (self Suffix) Len() int {
|
||||
return lenNo
|
||||
func (s Suffix) MinLen() int {
|
||||
return s.minLen
|
||||
}
|
||||
|
||||
func (self Suffix) Match(s string) bool {
|
||||
return strings.HasSuffix(s, self.Suffix)
|
||||
func (s Suffix) Match(v string) bool {
|
||||
return strings.HasSuffix(v, s.s)
|
||||
}
|
||||
|
||||
func (self Suffix) Index(s string) (int, []int) {
|
||||
idx := strings.Index(s, self.Suffix)
|
||||
func (s Suffix) Index(v string) (int, []int) {
|
||||
idx := strings.Index(v, s.s)
|
||||
if idx == -1 {
|
||||
return -1, nil
|
||||
}
|
||||
|
||||
return 0, []int{idx + len(self.Suffix)}
|
||||
return 0, []int{idx + len(s.s)}
|
||||
}
|
||||
|
||||
func (self Suffix) String() string {
|
||||
return fmt.Sprintf("<suffix:%s>", self.Suffix)
|
||||
func (s Suffix) String() string {
|
||||
return fmt.Sprintf("<suffix:%s>", s.s)
|
||||
}
|
||||
|
|
|
@ -3,41 +3,43 @@ package match
|
|||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
sutil "github.com/gobwas/glob/util/strings"
|
||||
"github.com/gobwas/glob/util/runes"
|
||||
)
|
||||
|
||||
type SuffixAny struct {
|
||||
Suffix string
|
||||
Separators []rune
|
||||
s string
|
||||
sep []rune
|
||||
minLen int
|
||||
}
|
||||
|
||||
func NewSuffixAny(s string, sep []rune) SuffixAny {
|
||||
return SuffixAny{s, sep}
|
||||
return SuffixAny{s, sep, utf8.RuneCountInString(s)}
|
||||
}
|
||||
|
||||
func (self SuffixAny) Index(s string) (int, []int) {
|
||||
idx := strings.Index(s, self.Suffix)
|
||||
func (s SuffixAny) Index(v string) (int, []int) {
|
||||
idx := strings.Index(v, s.s)
|
||||
if idx == -1 {
|
||||
return -1, nil
|
||||
}
|
||||
|
||||
i := sutil.LastIndexAnyRunes(s[:idx], self.Separators) + 1
|
||||
i := runes.LastIndexAnyRune(v[:idx], s.sep) + 1
|
||||
|
||||
return i, []int{idx + len(self.Suffix) - i}
|
||||
return i, []int{idx + len(s.s) - i}
|
||||
}
|
||||
|
||||
func (self SuffixAny) Len() int {
|
||||
return lenNo
|
||||
func (s SuffixAny) MinLen() int {
|
||||
return s.minLen
|
||||
}
|
||||
|
||||
func (self SuffixAny) Match(s string) bool {
|
||||
if !strings.HasSuffix(s, self.Suffix) {
|
||||
func (s SuffixAny) Match(v string) bool {
|
||||
if !strings.HasSuffix(v, s.s) {
|
||||
return false
|
||||
}
|
||||
return sutil.IndexAnyRunes(s[:len(s)-len(self.Suffix)], self.Separators) == -1
|
||||
return runes.IndexAnyRune(v[:len(v)-len(s.s)], s.sep) == -1
|
||||
}
|
||||
|
||||
func (self SuffixAny) String() string {
|
||||
return fmt.Sprintf("<suffix_any:![%s]%s>", string(self.Separators), self.Suffix)
|
||||
func (s SuffixAny) String() string {
|
||||
return fmt.Sprintf("<suffix_any:![%s]%s>", string(s.sep), s.s)
|
||||
}
|
||||
|
|
|
@ -10,24 +10,23 @@ func NewSuper() Super {
|
|||
return Super{}
|
||||
}
|
||||
|
||||
func (self Super) Match(s string) bool {
|
||||
func (s Super) Match(_ string) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func (self Super) Len() int {
|
||||
return lenNo
|
||||
func (s Super) MinLen() int {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (self Super) Index(s string) (int, []int) {
|
||||
segments := acquireSegments(len(s) + 1)
|
||||
for i := range s {
|
||||
segments = append(segments, i)
|
||||
func (s Super) Index(v string) (int, []int) {
|
||||
seg := acquireSegments(len(v) + 1)
|
||||
for i := range v {
|
||||
seg = append(seg, i)
|
||||
}
|
||||
segments = append(segments, len(s))
|
||||
|
||||
return 0, segments
|
||||
seg = append(seg, len(v))
|
||||
return 0, seg
|
||||
}
|
||||
|
||||
func (self Super) String() string {
|
||||
func (s Super) String() string {
|
||||
return fmt.Sprintf("<super>")
|
||||
}
|
||||
|
|
|
@ -8,38 +8,45 @@ import (
|
|||
|
||||
// raw represents raw string to match
|
||||
type Text struct {
|
||||
Str string
|
||||
RunesLength int
|
||||
BytesLength int
|
||||
Segments []int
|
||||
s string
|
||||
runes int
|
||||
bytes int
|
||||
seg []int
|
||||
}
|
||||
|
||||
func NewText(s string) Text {
|
||||
return Text{
|
||||
Str: s,
|
||||
RunesLength: utf8.RuneCountInString(s),
|
||||
BytesLength: len(s),
|
||||
Segments: []int{len(s)},
|
||||
s: s,
|
||||
runes: utf8.RuneCountInString(s),
|
||||
bytes: len(s),
|
||||
seg: []int{len(s)},
|
||||
}
|
||||
}
|
||||
|
||||
func (self Text) Match(s string) bool {
|
||||
return self.Str == s
|
||||
func (t Text) Match(s string) bool {
|
||||
return t.s == s
|
||||
}
|
||||
|
||||
func (self Text) Len() int {
|
||||
return self.RunesLength
|
||||
}
|
||||
|
||||
func (self Text) Index(s string) (int, []int) {
|
||||
index := strings.Index(s, self.Str)
|
||||
if index == -1 {
|
||||
func (t Text) Index(s string) (int, []int) {
|
||||
i := strings.Index(s, t.s)
|
||||
if i == -1 {
|
||||
return -1, nil
|
||||
}
|
||||
|
||||
return index, self.Segments
|
||||
return i, t.seg
|
||||
}
|
||||
|
||||
func (self Text) String() string {
|
||||
return fmt.Sprintf("<text:`%v`>", self.Str)
|
||||
func (t Text) MinLen() int {
|
||||
return t.runes
|
||||
}
|
||||
|
||||
func (t Text) BytesCount() int {
|
||||
return t.bytes
|
||||
}
|
||||
|
||||
func (t Text) RunesCount() int {
|
||||
return t.runes
|
||||
}
|
||||
|
||||
func (t Text) String() string {
|
||||
return fmt.Sprintf("<text:`%v`>", t.s)
|
||||
}
|
||||
|
|
|
@ -0,0 +1,154 @@
|
|||
package match
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/gobwas/glob/util/runes"
|
||||
)
|
||||
|
||||
type Tree struct {
|
||||
value MatchIndexer
|
||||
left Matcher
|
||||
right Matcher
|
||||
|
||||
minLen int
|
||||
|
||||
runes int
|
||||
vrunes int
|
||||
lrunes int
|
||||
rrunes int
|
||||
}
|
||||
|
||||
type SizedTree struct {
|
||||
Tree
|
||||
}
|
||||
|
||||
func (st SizedTree) RunesCount() int {
|
||||
return st.Tree.runes
|
||||
}
|
||||
|
||||
func NewTree(v MatchIndexer, l, r Matcher) Matcher {
|
||||
tree := Tree{
|
||||
value: v,
|
||||
left: l,
|
||||
right: r,
|
||||
}
|
||||
tree.minLen = v.MinLen()
|
||||
if l != nil {
|
||||
tree.minLen += l.MinLen()
|
||||
}
|
||||
if r != nil {
|
||||
tree.minLen += r.MinLen()
|
||||
}
|
||||
var (
|
||||
ls, lsz = l.(Sizer)
|
||||
rs, rsz = r.(Sizer)
|
||||
vs, vsz = v.(Sizer)
|
||||
)
|
||||
if lsz {
|
||||
tree.lrunes = ls.RunesCount()
|
||||
} else {
|
||||
tree.lrunes = -1
|
||||
}
|
||||
if rsz {
|
||||
tree.rrunes = rs.RunesCount()
|
||||
} else {
|
||||
tree.rrunes = -1
|
||||
}
|
||||
if vsz {
|
||||
tree.vrunes = vs.RunesCount()
|
||||
} else {
|
||||
tree.vrunes = -1
|
||||
}
|
||||
if vsz && lsz && rsz {
|
||||
tree.runes = tree.vrunes + tree.lrunes + tree.rrunes
|
||||
return SizedTree{tree}
|
||||
}
|
||||
tree.runes = -1
|
||||
return tree
|
||||
}
|
||||
|
||||
func (t Tree) MinLen() int {
|
||||
return t.minLen
|
||||
}
|
||||
|
||||
func (t Tree) Match(s string) (ok bool) {
|
||||
enter()
|
||||
logf("matching %q: %v", s, t)
|
||||
defer func(s string) {
|
||||
logf("result: %q -> %v", s, ok)
|
||||
leave()
|
||||
}(s)
|
||||
|
||||
offset, limit := t.offsetLimit(s)
|
||||
q := s[offset : len(s)-limit]
|
||||
logf("OFFSET/LIMIT: %d/%d %q of %q", offset, limit, q, s)
|
||||
|
||||
for len(q) >= t.vrunes {
|
||||
// search for matching part in substring
|
||||
index, segments := t.value.Index(q)
|
||||
logf("INDEX #%d %q (%v)", index, q, t.value)
|
||||
if index == -1 {
|
||||
releaseSegments(segments)
|
||||
return false
|
||||
}
|
||||
|
||||
l := s[:offset+index]
|
||||
var left bool
|
||||
if t.left != nil {
|
||||
left = t.left.Match(l)
|
||||
} else {
|
||||
left = l == ""
|
||||
}
|
||||
logf("LEFT %q %v", l, left)
|
||||
if left {
|
||||
for _, seg := range segments {
|
||||
var (
|
||||
right bool
|
||||
)
|
||||
r := s[offset+index+seg:]
|
||||
if t.right != nil {
|
||||
right = t.right.Match(r)
|
||||
} else {
|
||||
right = r == ""
|
||||
}
|
||||
logf("RIGHT %q %v", r, right)
|
||||
if right {
|
||||
releaseSegments(segments)
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_, x := utf8.DecodeRuneInString(q[index:])
|
||||
releaseSegments(segments)
|
||||
q = q[x:]
|
||||
offset += x
|
||||
logf("SLICED TO %q", q)
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// Retuns substring and offset/limit pair in bytes.
|
||||
func (t Tree) offsetLimit(s string) (offset, limit int) {
|
||||
n := utf8.RuneCountInString(s)
|
||||
if t.runes > n {
|
||||
return 0, 0
|
||||
}
|
||||
if n := t.lrunes; n > 0 {
|
||||
offset = len(runes.Head(s, n))
|
||||
}
|
||||
if n := t.rrunes; n > 0 {
|
||||
limit = len(runes.Tail(s, n))
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (t Tree) String() string {
|
||||
return fmt.Sprintf(
|
||||
"<btree:[%v<-%s->%v]>",
|
||||
t.left, t.value, t.right,
|
||||
)
|
||||
}
|
|
@ -0,0 +1,94 @@
|
|||
package match
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestTree(t *testing.T) {
|
||||
for _, test := range []struct {
|
||||
tree Matcher
|
||||
str string
|
||||
exp bool
|
||||
}{
|
||||
{
|
||||
NewTree(NewText("abc"), NewSuper(), NewSuper()),
|
||||
"abc",
|
||||
true,
|
||||
},
|
||||
{
|
||||
NewTree(NewText("a"), NewSingle(nil), NewSingle(nil)),
|
||||
"aaa",
|
||||
true,
|
||||
},
|
||||
{
|
||||
NewTree(NewText("b"), NewSingle(nil), nil),
|
||||
"bbb",
|
||||
false,
|
||||
},
|
||||
{
|
||||
NewTree(
|
||||
NewText("c"),
|
||||
NewTree(
|
||||
NewSingle(nil),
|
||||
NewSuper(),
|
||||
nil,
|
||||
),
|
||||
nil,
|
||||
),
|
||||
"abc",
|
||||
true,
|
||||
},
|
||||
} {
|
||||
t.Run("", func(t *testing.T) {
|
||||
act := test.tree.Match(test.str)
|
||||
if act != test.exp {
|
||||
fmt.Println(Graphviz("NIL", test.tree))
|
||||
t.Errorf("match %q error: act: %t; exp: %t", test.str, act, test.exp)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
type fakeMatcher struct {
|
||||
len int
|
||||
segn int
|
||||
name string
|
||||
}
|
||||
|
||||
func (f *fakeMatcher) Match(string) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func (f *fakeMatcher) Index(s string) (int, []int) {
|
||||
seg := make([]int, 0, f.segn)
|
||||
for x := 0; x < f.segn; x++ {
|
||||
seg = append(seg, f.segn)
|
||||
}
|
||||
return 0, seg
|
||||
}
|
||||
|
||||
func (f *fakeMatcher) MinLen() int {
|
||||
return f.len
|
||||
}
|
||||
|
||||
func (f *fakeMatcher) String() string {
|
||||
return f.name
|
||||
}
|
||||
|
||||
func BenchmarkMatchTree(b *testing.B) {
|
||||
l := &fakeMatcher{4, 3, "left_fake"}
|
||||
r := &fakeMatcher{4, 3, "right_fake"}
|
||||
v := &fakeMatcher{2, 3, "value_fake"}
|
||||
|
||||
// must be <= len(l + r + v)
|
||||
fixture := "abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij"
|
||||
|
||||
bt := NewTree(v, l, r)
|
||||
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
bt.Match(fixture)
|
||||
}
|
||||
})
|
||||
}
|
|
@ -0,0 +1,11 @@
|
|||
package match
|
||||
|
||||
func minLen(ms []Matcher) (min int) {
|
||||
for i, m := range ms {
|
||||
n := m.MinLen()
|
||||
if i == 0 || n < min {
|
||||
min = n
|
||||
}
|
||||
}
|
||||
return min
|
||||
}
|
|
@ -0,0 +1,165 @@
|
|||
package ast
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
)
|
||||
|
||||
// Minimize tries to apply some heuristics to minimize number of nodes in given
|
||||
// t
|
||||
func Minimize(t *Node) *Node {
|
||||
switch t.Kind {
|
||||
case KindAnyOf:
|
||||
return minimizeAnyOf(t)
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// minimizeAnyOf tries to find common children of given node of AnyOf pattern
|
||||
// it searches for common children from left and from right
|
||||
// if any common children are found – then it returns new optimized ast t
|
||||
// else it returns nil
|
||||
func minimizeAnyOf(t *Node) *Node {
|
||||
if !SameKind(t.Children, KindPattern) {
|
||||
return nil
|
||||
}
|
||||
|
||||
commonLeft, commonRight := CommonChildren(t.Children)
|
||||
commonLeftCount, commonRightCount := len(commonLeft), len(commonRight)
|
||||
if commonLeftCount == 0 && commonRightCount == 0 { // there are no common parts
|
||||
return nil
|
||||
}
|
||||
|
||||
var result []*Node
|
||||
if commonLeftCount > 0 {
|
||||
result = append(result, NewNode(KindPattern, nil, commonLeft...))
|
||||
}
|
||||
|
||||
var anyOf []*Node
|
||||
for _, child := range t.Children {
|
||||
reuse := child.Children[commonLeftCount : len(child.Children)-commonRightCount]
|
||||
var node *Node
|
||||
if len(reuse) == 0 {
|
||||
// this pattern is completely reduced by commonLeft and commonRight patterns
|
||||
// so it become nothing
|
||||
node = NewNode(KindNothing, nil)
|
||||
} else {
|
||||
node = NewNode(KindPattern, nil, reuse...)
|
||||
}
|
||||
anyOf = AppendUnique(anyOf, node)
|
||||
}
|
||||
switch {
|
||||
case len(anyOf) == 1 && anyOf[0].Kind != KindNothing:
|
||||
result = append(result, anyOf[0])
|
||||
case len(anyOf) > 1:
|
||||
result = append(result, NewNode(KindAnyOf, nil, anyOf...))
|
||||
}
|
||||
|
||||
if commonRightCount > 0 {
|
||||
result = append(result, NewNode(KindPattern, nil, commonRight...))
|
||||
}
|
||||
|
||||
return NewNode(KindPattern, nil, result...)
|
||||
}
|
||||
|
||||
func CommonChildren(nodes []*Node) (commonLeft, commonRight []*Node) {
|
||||
if len(nodes) <= 1 {
|
||||
return
|
||||
}
|
||||
|
||||
// find node that has least number of children
|
||||
idx := OneWithLeastChildren(nodes)
|
||||
if idx == -1 {
|
||||
return
|
||||
}
|
||||
tree := nodes[idx]
|
||||
treeLength := len(tree.Children)
|
||||
|
||||
// allocate max able size for rightCommon slice
|
||||
// to get ability insert elements in reverse order (from end to start)
|
||||
// without sorting
|
||||
commonRight = make([]*Node, treeLength)
|
||||
lastRight := treeLength // will use this to get results as commonRight[lastRight:]
|
||||
|
||||
var (
|
||||
breakLeft bool
|
||||
breakRight bool
|
||||
commonTotal int
|
||||
)
|
||||
for i, j := 0, treeLength-1; commonTotal < treeLength && j >= 0 && !(breakLeft && breakRight); i, j = i+1, j-1 {
|
||||
treeLeft := tree.Children[i]
|
||||
treeRight := tree.Children[j]
|
||||
|
||||
for k := 0; k < len(nodes) && !(breakLeft && breakRight); k++ {
|
||||
// skip least children node
|
||||
if k == idx {
|
||||
continue
|
||||
}
|
||||
|
||||
restLeft := nodes[k].Children[i]
|
||||
restRight := nodes[k].Children[j+len(nodes[k].Children)-treeLength]
|
||||
|
||||
breakLeft = breakLeft || !treeLeft.Equal(restLeft)
|
||||
|
||||
// disable searching for right common parts, if left part is already overlapping
|
||||
breakRight = breakRight || (!breakLeft && j <= i)
|
||||
breakRight = breakRight || !treeRight.Equal(restRight)
|
||||
}
|
||||
|
||||
if !breakLeft {
|
||||
commonTotal++
|
||||
commonLeft = append(commonLeft, treeLeft)
|
||||
}
|
||||
if !breakRight {
|
||||
commonTotal++
|
||||
lastRight = j
|
||||
commonRight[j] = treeRight
|
||||
}
|
||||
}
|
||||
|
||||
commonRight = commonRight[lastRight:]
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func AppendUnique(target []*Node, val *Node) []*Node {
|
||||
for _, n := range target {
|
||||
if reflect.DeepEqual(n, val) {
|
||||
return target
|
||||
}
|
||||
}
|
||||
return append(target, val)
|
||||
}
|
||||
|
||||
func SameKind(nodes []*Node, kind Kind) bool {
|
||||
for _, n := range nodes {
|
||||
if n.Kind != kind {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func OneWithLeastChildren(nodes []*Node) int {
|
||||
min := -1
|
||||
idx := -1
|
||||
for i, n := range nodes {
|
||||
if idx == -1 || (len(n.Children) < min) {
|
||||
min = len(n.Children)
|
||||
idx = i
|
||||
}
|
||||
}
|
||||
return idx
|
||||
}
|
||||
|
||||
func Equal(a, b []*Node) bool {
|
||||
if len(a) != len(b) {
|
||||
return false
|
||||
}
|
||||
for i, av := range a {
|
||||
if !av.Equal(b[i]) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
|
@ -0,0 +1,126 @@
|
|||
package ast
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestCommonChildren(t *testing.T) {
|
||||
for _, test := range []struct {
|
||||
nodes []*Node
|
||||
left []*Node
|
||||
right []*Node
|
||||
}{
|
||||
{
|
||||
nodes: []*Node{
|
||||
NewNode(KindNothing, nil,
|
||||
NewNode(KindText, Text{"a"}),
|
||||
NewNode(KindText, Text{"z"}),
|
||||
NewNode(KindText, Text{"c"}),
|
||||
),
|
||||
},
|
||||
},
|
||||
{
|
||||
nodes: []*Node{
|
||||
NewNode(KindNothing, nil,
|
||||
NewNode(KindText, Text{"a"}),
|
||||
NewNode(KindText, Text{"z"}),
|
||||
NewNode(KindText, Text{"c"}),
|
||||
),
|
||||
NewNode(KindNothing, nil,
|
||||
NewNode(KindText, Text{"a"}),
|
||||
NewNode(KindText, Text{"b"}),
|
||||
NewNode(KindText, Text{"c"}),
|
||||
),
|
||||
},
|
||||
left: []*Node{
|
||||
NewNode(KindText, Text{"a"}),
|
||||
},
|
||||
right: []*Node{
|
||||
NewNode(KindText, Text{"c"}),
|
||||
},
|
||||
},
|
||||
{
|
||||
nodes: []*Node{
|
||||
NewNode(KindNothing, nil,
|
||||
NewNode(KindText, Text{"a"}),
|
||||
NewNode(KindText, Text{"b"}),
|
||||
NewNode(KindText, Text{"c"}),
|
||||
NewNode(KindText, Text{"d"}),
|
||||
),
|
||||
NewNode(KindNothing, nil,
|
||||
NewNode(KindText, Text{"a"}),
|
||||
NewNode(KindText, Text{"b"}),
|
||||
NewNode(KindText, Text{"c"}),
|
||||
NewNode(KindText, Text{"c"}),
|
||||
NewNode(KindText, Text{"d"}),
|
||||
),
|
||||
},
|
||||
left: []*Node{
|
||||
NewNode(KindText, Text{"a"}),
|
||||
NewNode(KindText, Text{"b"}),
|
||||
},
|
||||
right: []*Node{
|
||||
NewNode(KindText, Text{"c"}),
|
||||
NewNode(KindText, Text{"d"}),
|
||||
},
|
||||
},
|
||||
{
|
||||
nodes: []*Node{
|
||||
NewNode(KindNothing, nil,
|
||||
NewNode(KindText, Text{"a"}),
|
||||
NewNode(KindText, Text{"b"}),
|
||||
NewNode(KindText, Text{"c"}),
|
||||
),
|
||||
NewNode(KindNothing, nil,
|
||||
NewNode(KindText, Text{"a"}),
|
||||
NewNode(KindText, Text{"b"}),
|
||||
NewNode(KindText, Text{"b"}),
|
||||
NewNode(KindText, Text{"c"}),
|
||||
),
|
||||
},
|
||||
left: []*Node{
|
||||
NewNode(KindText, Text{"a"}),
|
||||
NewNode(KindText, Text{"b"}),
|
||||
},
|
||||
right: []*Node{
|
||||
NewNode(KindText, Text{"c"}),
|
||||
},
|
||||
},
|
||||
{
|
||||
nodes: []*Node{
|
||||
NewNode(KindNothing, nil,
|
||||
NewNode(KindText, Text{"a"}),
|
||||
NewNode(KindText, Text{"d"}),
|
||||
),
|
||||
NewNode(KindNothing, nil,
|
||||
NewNode(KindText, Text{"a"}),
|
||||
NewNode(KindText, Text{"d"}),
|
||||
),
|
||||
NewNode(KindNothing, nil,
|
||||
NewNode(KindText, Text{"a"}),
|
||||
NewNode(KindText, Text{"e"}),
|
||||
),
|
||||
},
|
||||
left: []*Node{
|
||||
NewNode(KindText, Text{"a"}),
|
||||
},
|
||||
right: []*Node{},
|
||||
},
|
||||
} {
|
||||
t.Run("", func(t *testing.T) {
|
||||
left, right := CommonChildren(test.nodes)
|
||||
if !Equal(left, test.left) {
|
||||
t.Errorf(
|
||||
"left, right := commonChildren(); left = %v; want %v",
|
||||
left, test.left,
|
||||
)
|
||||
}
|
||||
if !Equal(right, test.right) {
|
||||
t.Errorf(
|
||||
"left, right := commonChildren(); right = %v; want %v",
|
||||
right, test.right,
|
||||
)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
|
@ -3,8 +3,9 @@ package ast
|
|||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"github.com/gobwas/glob/syntax/lexer"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/gobwas/glob/syntax/lexer"
|
||||
)
|
||||
|
||||
type Lexer interface {
|
||||
|
|
|
@ -1,5 +1,98 @@
|
|||
package runes
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
func Head(s string, r int) string {
|
||||
var i, m int
|
||||
for i < len(s) {
|
||||
_, n := utf8.DecodeRuneInString(s[i:])
|
||||
i += n
|
||||
m += 1
|
||||
if m == r {
|
||||
break
|
||||
}
|
||||
}
|
||||
return s[:i]
|
||||
}
|
||||
|
||||
func Tail(s string, r int) string {
|
||||
var i, n int
|
||||
for i = len(s); i >= 0; {
|
||||
var ok bool
|
||||
for j := 1; j <= 4 && i-j >= 0; j++ {
|
||||
v, _ := utf8.DecodeRuneInString(s[i-j:])
|
||||
if v != utf8.RuneError {
|
||||
i -= j
|
||||
n++
|
||||
ok = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !ok || n == r {
|
||||
return s[i:]
|
||||
}
|
||||
}
|
||||
return s[i:]
|
||||
}
|
||||
|
||||
func ExactlyRunesCount(s string, n int) bool {
|
||||
var m int
|
||||
for range s {
|
||||
m++
|
||||
if m > n {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return m == n
|
||||
}
|
||||
|
||||
func AtLeastRunesCount(s string, n int) bool {
|
||||
var m int
|
||||
for range s {
|
||||
m++
|
||||
if m >= n {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func IndexAnyRune(s string, rs []rune) int {
|
||||
for _, r := range rs {
|
||||
if i := strings.IndexRune(s, r); i != -1 {
|
||||
return i
|
||||
}
|
||||
}
|
||||
|
||||
return -1
|
||||
}
|
||||
|
||||
func LastIndexAnyRune(s string, rs []rune) int {
|
||||
for _, r := range rs {
|
||||
i := -1
|
||||
if 0 <= r && r < utf8.RuneSelf {
|
||||
i = strings.LastIndexByte(s, byte(r))
|
||||
} else {
|
||||
sub := s
|
||||
for len(sub) > 0 {
|
||||
j := strings.IndexRune(s, r)
|
||||
if j == -1 {
|
||||
break
|
||||
}
|
||||
i = j
|
||||
sub = sub[i+1:]
|
||||
}
|
||||
}
|
||||
if i != -1 {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
func Index(s, needle []rune) int {
|
||||
ls, ln := len(s), len(needle)
|
||||
|
||||
|
@ -130,6 +223,7 @@ func IndexLastRune(s []rune, r rune) int {
|
|||
}
|
||||
|
||||
func Equal(a, b []rune) bool {
|
||||
// TODO use bytes.Equal with unsafe.
|
||||
if len(a) == len(b) {
|
||||
for i := 0; i < len(a); i++ {
|
||||
if a[i] != b[i] {
|
||||
|
|
|
@ -1,39 +0,0 @@
|
|||
package strings
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
func IndexAnyRunes(s string, rs []rune) int {
|
||||
for _, r := range rs {
|
||||
if i := strings.IndexRune(s, r); i != -1 {
|
||||
return i
|
||||
}
|
||||
}
|
||||
|
||||
return -1
|
||||
}
|
||||
|
||||
func LastIndexAnyRunes(s string, rs []rune) int {
|
||||
for _, r := range rs {
|
||||
i := -1
|
||||
if 0 <= r && r < utf8.RuneSelf {
|
||||
i = strings.LastIndexByte(s, byte(r))
|
||||
} else {
|
||||
sub := s
|
||||
for len(sub) > 0 {
|
||||
j := strings.IndexRune(s, r)
|
||||
if j == -1 {
|
||||
break
|
||||
}
|
||||
i = j
|
||||
sub = sub[i+1:]
|
||||
}
|
||||
}
|
||||
if i != -1 {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
Loading…
Reference in New Issue