Compare commits

..

12 Commits

Author SHA1 Message Date
gobwas bcbbef0a8d fix tests 2019-03-12 22:15:41 +03:00
gobwas cd9e75ee86 fix tests 2019-03-12 22:04:16 +03:00
gobwas fa2b8d5017 travis: update 2019-03-12 21:29:42 +03:00
gobwas c8369fd9fe wip 2019-02-10 21:25:05 +03:00
gobwas 1c85fdee98 wip 2019-02-06 23:59:37 +03:00
gobwas 9bf042f426 wip 2019-02-06 23:51:47 +03:00
gobwas 6f2a897df2 tune bench script 2019-02-06 23:48:06 +03:00
gobwas 4a52abd846 wip 2019-02-06 23:43:38 +03:00
Sergey Kamardin abc7140723 wip 2018-11-24 21:47:12 +03:00
Sergey Kamardin 2b9d056d0d debug mechanics 2018-10-02 22:52:57 +03:00
Sergey Kamardin 9cd1b6671f wip 2018-10-02 22:11:52 +03:00
gobwas e4652bc1f4 dramatic refactoring 2018-10-02 22:11:19 +03:00
53 changed files with 2269 additions and 1683 deletions

1
.gitignore vendored
View File

@ -6,3 +6,4 @@ glob.iml
*.dot
*.png
*.svg
patterns.txt

View File

@ -1,10 +1,9 @@
language: go
go:
- "1.7.X"
- "1.8.X"
- "1.9.X"
- "1.10.X"
- master
- 1.7.x
- 1.8.x
- 1.9.x
- 1.x
matrix:
allow_failures:

View File

@ -1,15 +1,21 @@
#! /bin/bash
rnd=$(head -c4 </dev/urandom|xxd -p)
bench() {
filename="/tmp/$1-$2.bench"
local exp=".*"
if [[ ! -z $2 ]]; then
$exp = $2
fi
filename=$(echo "$rnd-$1.bench" | tr "/" "_")
if test -e "${filename}";
then
echo "Already exists ${filename}"
else
backup=`git rev-parse --abbrev-ref HEAD`
git checkout $1
git checkout "$1"
echo -n "Creating ${filename}... "
go test ./... -run=NONE -bench=$2 > "${filename}" -benchmem
go test ./... -run=NONE -bench="$exp" > "${filename}" -benchmem
echo "OK"
git checkout ${backup}
sleep 5
@ -23,4 +29,4 @@ current=`git rev-parse --abbrev-ref HEAD`
bench ${to} $2
bench ${current} $2
benchcmp $3 "/tmp/${to}-$2.bench" "/tmp/${current}-$2.bench"
benchcmp $3 "$rnd-${to}.bench" "$rnd-${current}.bench"

View File

@ -1,45 +1,116 @@
package main
import (
"bufio"
"flag"
"fmt"
"os"
"os/exec"
"strings"
"unicode/utf8"
"git.internal/re/glob"
"git.internal/re/glob/match"
"git.internal/re/glob/match/debug"
"github.com/gobwas/glob"
"github.com/gobwas/glob/match"
)
func main() {
pattern := flag.String("p", "", "pattern to draw")
sep := flag.String("s", "", "comma separated list of separators characters")
var (
pattern = flag.String("p", "", "pattern to draw")
sep = flag.String("s", "", "comma separated list of separators characters")
filepath = flag.String("file", "", "path for patterns file")
auto = flag.Bool("auto", false, "autoopen result")
offset = flag.Int("offset", 0, "patterns to skip")
)
flag.Parse()
if *pattern == "" {
flag.Usage()
os.Exit(1)
var patterns []string
if *pattern != "" {
patterns = append(patterns, *pattern)
}
if *filepath != "" {
file, err := os.Open(*filepath)
if err != nil {
fmt.Printf("could not open file: %v\n", err)
os.Exit(1)
}
s := bufio.NewScanner(file)
for s.Scan() {
fmt.Println(*offset)
if *offset > 0 {
*offset--
fmt.Println("skipped")
continue
}
patterns = append(patterns, s.Text())
}
file.Close()
}
if len(patterns) == 0 {
return
}
var separators []rune
if len(*sep) > 0 {
for _, c := range strings.Split(*sep, ",") {
if r, w := utf8.DecodeRuneInString(c); len(c) > w {
fmt.Println("only single charactered separators are allowed")
r, w := utf8.DecodeRuneInString(c)
if len(c) > w {
fmt.Printf("only single charactered separators are allowed: %+q\n", c)
os.Exit(1)
} else {
separators = append(separators, r)
}
separators = append(separators, r)
}
}
glob, err := glob.Compile(*pattern, separators...)
if err != nil {
fmt.Println("could not compile pattern:", err)
os.Exit(1)
br := bufio.NewReader(os.Stdin)
for _, p := range patterns {
g, err := glob.Compile(p, separators...)
if err != nil {
fmt.Printf("could not compile pattern %+q: %v\n", p, err)
os.Exit(1)
}
s := match.Graphviz(p, g.(match.Matcher))
if *auto {
fmt.Fprintf(os.Stdout, "pattern: %+q: ", p)
if err := open(s); err != nil {
fmt.Printf("could not open graphviz: %v", err)
os.Exit(1)
}
if !next(br) {
return
}
} else {
fmt.Fprintln(os.Stdout, s)
}
}
matcher := glob.(match.Matcher)
fmt.Fprint(os.Stdout, debug.Graphviz(*pattern, matcher))
}
func open(s string) error {
file, err := os.Create("glob.graphviz.png")
if err != nil {
return err
}
defer file.Close()
cmd := exec.Command("dot", "-Tpng")
cmd.Stdin = strings.NewReader(s)
cmd.Stdout = file
if err := cmd.Run(); err != nil {
return err
}
if err := file.Sync(); err != nil {
return err
}
cmd = exec.Command("open", file.Name())
return cmd.Run()
}
func next(in *bufio.Reader) bool {
fmt.Fprint(os.Stdout, "cancel? [Y/n]: ")
p, err := in.ReadBytes('\n')
if err != nil {
return false
}
if p[0] == 'Y' {
return false
}
return true
}

View File

@ -8,7 +8,7 @@ import (
"testing"
"unicode/utf8"
"git.internal/re/glob"
"github.com/gobwas/glob"
)
func benchString(r testing.BenchmarkResult) string {

View File

@ -5,481 +5,80 @@ package compiler
import (
"fmt"
"reflect"
"git.internal/re/glob/match"
"git.internal/re/glob/syntax/ast"
"git.internal/re/glob/util/runes"
"github.com/gobwas/glob/internal/debug"
"github.com/gobwas/glob/match"
"github.com/gobwas/glob/syntax/ast"
)
func optimizeMatcher(matcher match.Matcher) match.Matcher {
switch m := matcher.(type) {
case match.Any:
if len(m.Separators) == 0 {
return match.NewSuper()
}
case match.AnyOf:
if len(m.Matchers) == 1 {
return m.Matchers[0]
}
return m
case match.List:
if m.Not == false && len(m.List) == 1 {
return match.NewText(string(m.List))
}
return m
case match.BTree:
m.Left = optimizeMatcher(m.Left)
m.Right = optimizeMatcher(m.Right)
r, ok := m.Value.(match.Text)
if !ok {
return m
}
var (
leftNil = m.Left == nil
rightNil = m.Right == nil
)
if leftNil && rightNil {
return match.NewText(r.Str)
}
_, leftSuper := m.Left.(match.Super)
lp, leftPrefix := m.Left.(match.Prefix)
la, leftAny := m.Left.(match.Any)
_, rightSuper := m.Right.(match.Super)
rs, rightSuffix := m.Right.(match.Suffix)
ra, rightAny := m.Right.(match.Any)
switch {
case leftSuper && rightSuper:
return match.NewContains(r.Str, false)
case leftSuper && rightNil:
return match.NewSuffix(r.Str)
case rightSuper && leftNil:
return match.NewPrefix(r.Str)
case leftNil && rightSuffix:
return match.NewPrefixSuffix(r.Str, rs.Suffix)
case rightNil && leftPrefix:
return match.NewPrefixSuffix(lp.Prefix, r.Str)
case rightNil && leftAny:
return match.NewSuffixAny(r.Str, la.Separators)
case leftNil && rightAny:
return match.NewPrefixAny(r.Str, ra.Separators)
}
return m
func Compile(tree *ast.Node, sep []rune) (match.Matcher, error) {
m, err := compile(tree, sep)
if err != nil {
return nil, err
}
return matcher
return m, nil
}
func compileMatchers(matchers []match.Matcher) (match.Matcher, error) {
if len(matchers) == 0 {
return nil, fmt.Errorf("compile error: need at least one matcher")
}
if len(matchers) == 1 {
return matchers[0], nil
}
if m := glueMatchers(matchers); m != nil {
return m, nil
}
idx := -1
maxLen := -1
var val match.Matcher
for i, matcher := range matchers {
if l := matcher.Len(); l != -1 && l >= maxLen {
maxLen = l
idx = i
val = matcher
}
}
if val == nil { // not found matcher with static length
r, err := compileMatchers(matchers[1:])
if err != nil {
return nil, err
}
return match.NewBTree(matchers[0], nil, r), nil
}
left := matchers[:idx]
var right []match.Matcher
if len(matchers) > idx+1 {
right = matchers[idx+1:]
}
var l, r match.Matcher
var err error
if len(left) > 0 {
l, err = compileMatchers(left)
if err != nil {
return nil, err
}
}
if len(right) > 0 {
r, err = compileMatchers(right)
if err != nil {
return nil, err
}
}
return match.NewBTree(val, l, r), nil
}
func glueMatchers(matchers []match.Matcher) match.Matcher {
if m := glueMatchersAsEvery(matchers); m != nil {
return m
}
if m := glueMatchersAsRow(matchers); m != nil {
return m
}
return nil
}
func glueMatchersAsRow(matchers []match.Matcher) match.Matcher {
if len(matchers) <= 1 {
return nil
}
var (
c []match.Matcher
l int
)
for _, matcher := range matchers {
if ml := matcher.Len(); ml == -1 {
return nil
} else {
c = append(c, matcher)
l += ml
}
}
return match.NewRow(l, c...)
}
func glueMatchersAsEvery(matchers []match.Matcher) match.Matcher {
if len(matchers) <= 1 {
return nil
}
var (
hasAny bool
hasSuper bool
hasSingle bool
min int
separator []rune
)
for i, matcher := range matchers {
var sep []rune
switch m := matcher.(type) {
case match.Super:
sep = []rune{}
hasSuper = true
case match.Any:
sep = m.Separators
hasAny = true
case match.Single:
sep = m.Separators
hasSingle = true
min++
case match.List:
if !m.Not {
return nil
}
sep = m.List
hasSingle = true
min++
default:
return nil
}
// initialize
if i == 0 {
separator = sep
}
if runes.Equal(sep, separator) {
continue
}
return nil
}
if hasSuper && !hasAny && !hasSingle {
return match.NewSuper()
}
if hasAny && !hasSuper && !hasSingle {
return match.NewAny(separator)
}
if (hasAny || hasSuper) && min > 0 && len(separator) == 0 {
return match.NewMin(min)
}
every := match.NewEveryOf()
if min > 0 {
every.Add(match.NewMin(min))
if !hasAny && !hasSuper {
every.Add(match.NewMax(min))
}
}
if len(separator) > 0 {
every.Add(match.NewContains(string(separator), true))
}
return every
}
func minimizeMatchers(matchers []match.Matcher) []match.Matcher {
var done match.Matcher
var left, right, count int
for l := 0; l < len(matchers); l++ {
for r := len(matchers); r > l; r-- {
if glued := glueMatchers(matchers[l:r]); glued != nil {
var swap bool
if done == nil {
swap = true
} else {
cl, gl := done.Len(), glued.Len()
swap = cl > -1 && gl > -1 && gl > cl
swap = swap || count < r-l
}
if swap {
done = glued
left = l
right = r
count = r - l
}
}
}
}
if done == nil {
return matchers
}
next := append(append([]match.Matcher{}, matchers[:left]...), done)
if right < len(matchers) {
next = append(next, matchers[right:]...)
}
if len(next) == len(matchers) {
return next
}
return minimizeMatchers(next)
}
// minimizeAnyOf tries to apply some heuristics to minimize number of nodes in given tree
func minimizeTree(tree *ast.Node) *ast.Node {
switch tree.Kind {
case ast.KindAnyOf:
return minimizeTreeAnyOf(tree)
default:
return nil
}
}
// minimizeAnyOf tries to find common children of given node of AnyOf pattern
// it searches for common children from left and from right
// if any common children are found then it returns new optimized ast tree
// else it returns nil
func minimizeTreeAnyOf(tree *ast.Node) *ast.Node {
if !areOfSameKind(tree.Children, ast.KindPattern) {
return nil
}
commonLeft, commonRight := commonChildren(tree.Children)
commonLeftCount, commonRightCount := len(commonLeft), len(commonRight)
if commonLeftCount == 0 && commonRightCount == 0 { // there are no common parts
return nil
}
var result []*ast.Node
if commonLeftCount > 0 {
result = append(result, ast.NewNode(ast.KindPattern, nil, commonLeft...))
}
var anyOf []*ast.Node
for _, child := range tree.Children {
reuse := child.Children[commonLeftCount : len(child.Children)-commonRightCount]
var node *ast.Node
if len(reuse) == 0 {
// this pattern is completely reduced by commonLeft and commonRight patterns
// so it become nothing
node = ast.NewNode(ast.KindNothing, nil)
} else {
node = ast.NewNode(ast.KindPattern, nil, reuse...)
}
anyOf = appendIfUnique(anyOf, node)
}
switch {
case len(anyOf) == 1 && anyOf[0].Kind != ast.KindNothing:
result = append(result, anyOf[0])
case len(anyOf) > 1:
result = append(result, ast.NewNode(ast.KindAnyOf, nil, anyOf...))
}
if commonRightCount > 0 {
result = append(result, ast.NewNode(ast.KindPattern, nil, commonRight...))
}
return ast.NewNode(ast.KindPattern, nil, result...)
}
func commonChildren(nodes []*ast.Node) (commonLeft, commonRight []*ast.Node) {
if len(nodes) <= 1 {
return
}
// find node that has least number of children
idx := leastChildren(nodes)
if idx == -1 {
return
}
tree := nodes[idx]
treeLength := len(tree.Children)
// allocate max able size for rightCommon slice
// to get ability insert elements in reverse order (from end to start)
// without sorting
commonRight = make([]*ast.Node, treeLength)
lastRight := treeLength // will use this to get results as commonRight[lastRight:]
var (
breakLeft bool
breakRight bool
commonTotal int
)
for i, j := 0, treeLength-1; commonTotal < treeLength && j >= 0 && !(breakLeft && breakRight); i, j = i+1, j-1 {
treeLeft := tree.Children[i]
treeRight := tree.Children[j]
for k := 0; k < len(nodes) && !(breakLeft && breakRight); k++ {
// skip least children node
if k == idx {
continue
}
restLeft := nodes[k].Children[i]
restRight := nodes[k].Children[j+len(nodes[k].Children)-treeLength]
breakLeft = breakLeft || !treeLeft.Equal(restLeft)
// disable searching for right common parts, if left part is already overlapping
breakRight = breakRight || (!breakLeft && j <= i)
breakRight = breakRight || !treeRight.Equal(restRight)
}
if !breakLeft {
commonTotal++
commonLeft = append(commonLeft, treeLeft)
}
if !breakRight {
commonTotal++
lastRight = j
commonRight[j] = treeRight
}
}
commonRight = commonRight[lastRight:]
return
}
func appendIfUnique(target []*ast.Node, val *ast.Node) []*ast.Node {
for _, n := range target {
if reflect.DeepEqual(n, val) {
return target
}
}
return append(target, val)
}
func areOfSameKind(nodes []*ast.Node, kind ast.Kind) bool {
for _, n := range nodes {
if n.Kind != kind {
return false
}
}
return true
}
func leastChildren(nodes []*ast.Node) int {
min := -1
idx := -1
for i, n := range nodes {
if idx == -1 || (len(n.Children) < min) {
min = len(n.Children)
idx = i
}
}
return idx
}
func compileTreeChildren(tree *ast.Node, sep []rune) ([]match.Matcher, error) {
func compileNodes(ns []*ast.Node, sep []rune) ([]match.Matcher, error) {
var matchers []match.Matcher
for _, desc := range tree.Children {
m, err := compile(desc, sep)
for _, n := range ns {
m, err := compile(n, sep)
if err != nil {
return nil, err
}
matchers = append(matchers, optimizeMatcher(m))
matchers = append(matchers, m)
}
return matchers, nil
}
func compile(tree *ast.Node, sep []rune) (m match.Matcher, err error) {
switch tree.Kind {
case ast.KindAnyOf:
// todo this could be faster on pattern_alternatives_combine_lite (see glob_test.go)
if n := minimizeTree(tree); n != nil {
return compile(n, sep)
func compile(node *ast.Node, sep []rune) (m match.Matcher, err error) {
if debug.Enabled {
debug.EnterPrefix("compiler: compiling %s", node)
defer func() {
if err != nil {
debug.Logf("->! %v", err)
} else {
debug.Logf("-> %s", m)
}
debug.LeavePrefix()
}()
}
// todo this could be faster on pattern_alternatives_combine_lite (see glob_test.go)
if n := ast.Minimize(node); n != nil {
debug.Logf("minimized tree -> %s", node, n)
r, err := compile(n, sep)
if debug.Enabled {
if err != nil {
debug.Logf("compiler: compile minimized tree failed: %v", err)
} else {
debug.Logf("compiler: minimized tree")
debug.Logf("compiler: \t%s", node)
debug.Logf("compiler: \t%s", n)
}
}
matchers, err := compileTreeChildren(tree, sep)
if err == nil {
return r, nil
}
}
switch node.Kind {
case ast.KindAnyOf:
matchers, err := compileNodes(node.Children, sep)
if err != nil {
return nil, err
}
return match.NewAnyOf(matchers...), nil
case ast.KindPattern:
if len(tree.Children) == 0 {
if len(node.Children) == 0 {
return match.NewNothing(), nil
}
matchers, err := compileTreeChildren(tree, sep)
matchers, err := compileNodes(node.Children, sep)
if err != nil {
return nil, err
}
m, err = compileMatchers(minimizeMatchers(matchers))
m, err = match.Compile(match.Minimize(matchers))
if err != nil {
return nil, err
}
@ -497,29 +96,20 @@ func compile(tree *ast.Node, sep []rune) (m match.Matcher, err error) {
m = match.NewNothing()
case ast.KindList:
l := tree.Value.(ast.List)
l := node.Value.(ast.List)
m = match.NewList([]rune(l.Chars), l.Not)
case ast.KindRange:
r := tree.Value.(ast.Range)
r := node.Value.(ast.Range)
m = match.NewRange(r.Lo, r.Hi, r.Not)
case ast.KindText:
t := tree.Value.(ast.Text)
t := node.Value.(ast.Text)
m = match.NewText(t.Text)
default:
return nil, fmt.Errorf("could not compile tree: unknown node type")
}
return optimizeMatcher(m), nil
}
func Compile(tree *ast.Node, sep []rune) (match.Matcher, error) {
m, err := compile(tree, sep)
if err != nil {
return nil, err
}
return m, nil
return match.Optimize(m), nil
}

View File

@ -4,355 +4,58 @@ import (
"reflect"
"testing"
"git.internal/re/glob/match"
"git.internal/re/glob/match/debug"
"git.internal/re/glob/syntax/ast"
"github.com/gobwas/glob/match"
"github.com/gobwas/glob/syntax/ast"
)
var separators = []rune{'.'}
func TestCommonChildren(t *testing.T) {
for i, test := range []struct {
nodes []*ast.Node
left []*ast.Node
right []*ast.Node
}{
{
nodes: []*ast.Node{
ast.NewNode(ast.KindNothing, nil,
ast.NewNode(ast.KindText, ast.Text{"a"}),
ast.NewNode(ast.KindText, ast.Text{"z"}),
ast.NewNode(ast.KindText, ast.Text{"c"}),
),
},
},
{
nodes: []*ast.Node{
ast.NewNode(ast.KindNothing, nil,
ast.NewNode(ast.KindText, ast.Text{"a"}),
ast.NewNode(ast.KindText, ast.Text{"z"}),
ast.NewNode(ast.KindText, ast.Text{"c"}),
),
ast.NewNode(ast.KindNothing, nil,
ast.NewNode(ast.KindText, ast.Text{"a"}),
ast.NewNode(ast.KindText, ast.Text{"b"}),
ast.NewNode(ast.KindText, ast.Text{"c"}),
),
},
left: []*ast.Node{
ast.NewNode(ast.KindText, ast.Text{"a"}),
},
right: []*ast.Node{
ast.NewNode(ast.KindText, ast.Text{"c"}),
},
},
{
nodes: []*ast.Node{
ast.NewNode(ast.KindNothing, nil,
ast.NewNode(ast.KindText, ast.Text{"a"}),
ast.NewNode(ast.KindText, ast.Text{"b"}),
ast.NewNode(ast.KindText, ast.Text{"c"}),
ast.NewNode(ast.KindText, ast.Text{"d"}),
),
ast.NewNode(ast.KindNothing, nil,
ast.NewNode(ast.KindText, ast.Text{"a"}),
ast.NewNode(ast.KindText, ast.Text{"b"}),
ast.NewNode(ast.KindText, ast.Text{"c"}),
ast.NewNode(ast.KindText, ast.Text{"c"}),
ast.NewNode(ast.KindText, ast.Text{"d"}),
),
},
left: []*ast.Node{
ast.NewNode(ast.KindText, ast.Text{"a"}),
ast.NewNode(ast.KindText, ast.Text{"b"}),
},
right: []*ast.Node{
ast.NewNode(ast.KindText, ast.Text{"c"}),
ast.NewNode(ast.KindText, ast.Text{"d"}),
},
},
{
nodes: []*ast.Node{
ast.NewNode(ast.KindNothing, nil,
ast.NewNode(ast.KindText, ast.Text{"a"}),
ast.NewNode(ast.KindText, ast.Text{"b"}),
ast.NewNode(ast.KindText, ast.Text{"c"}),
),
ast.NewNode(ast.KindNothing, nil,
ast.NewNode(ast.KindText, ast.Text{"a"}),
ast.NewNode(ast.KindText, ast.Text{"b"}),
ast.NewNode(ast.KindText, ast.Text{"b"}),
ast.NewNode(ast.KindText, ast.Text{"c"}),
),
},
left: []*ast.Node{
ast.NewNode(ast.KindText, ast.Text{"a"}),
ast.NewNode(ast.KindText, ast.Text{"b"}),
},
right: []*ast.Node{
ast.NewNode(ast.KindText, ast.Text{"c"}),
},
},
{
nodes: []*ast.Node{
ast.NewNode(ast.KindNothing, nil,
ast.NewNode(ast.KindText, ast.Text{"a"}),
ast.NewNode(ast.KindText, ast.Text{"d"}),
),
ast.NewNode(ast.KindNothing, nil,
ast.NewNode(ast.KindText, ast.Text{"a"}),
ast.NewNode(ast.KindText, ast.Text{"d"}),
),
ast.NewNode(ast.KindNothing, nil,
ast.NewNode(ast.KindText, ast.Text{"a"}),
ast.NewNode(ast.KindText, ast.Text{"e"}),
),
},
left: []*ast.Node{
ast.NewNode(ast.KindText, ast.Text{"a"}),
},
right: []*ast.Node{},
},
} {
left, right := commonChildren(test.nodes)
if !nodesEqual(left, test.left) {
t.Errorf("[%d] left, right := commonChildren(); left = %v; want %v", i, left, test.left)
}
if !nodesEqual(right, test.right) {
t.Errorf("[%d] left, right := commonChildren(); right = %v; want %v", i, right, test.right)
}
}
}
func nodesEqual(a, b []*ast.Node) bool {
if len(a) != len(b) {
return false
}
for i, av := range a {
if !av.Equal(b[i]) {
return false
}
}
return true
}
func TestGlueMatchers(t *testing.T) {
for id, test := range []struct {
in []match.Matcher
exp match.Matcher
}{
{
[]match.Matcher{
match.NewSuper(),
match.NewSingle(nil),
},
match.NewMin(1),
},
{
[]match.Matcher{
match.NewAny(separators),
match.NewSingle(separators),
},
match.EveryOf{match.Matchers{
match.NewMin(1),
match.NewContains(string(separators), true),
}},
},
{
[]match.Matcher{
match.NewSingle(nil),
match.NewSingle(nil),
match.NewSingle(nil),
},
match.EveryOf{match.Matchers{
match.NewMin(3),
match.NewMax(3),
}},
},
{
[]match.Matcher{
match.NewList([]rune{'a'}, true),
match.NewAny([]rune{'a'}),
},
match.EveryOf{match.Matchers{
match.NewMin(1),
match.NewContains("a", true),
}},
},
} {
act, err := compileMatchers(test.in)
if err != nil {
t.Errorf("#%d convert matchers error: %s", id, err)
continue
}
if !reflect.DeepEqual(act, test.exp) {
t.Errorf("#%d unexpected convert matchers result:\nact: %#v;\nexp: %#v", id, act, test.exp)
continue
}
}
}
func TestCompileMatchers(t *testing.T) {
for id, test := range []struct {
in []match.Matcher
exp match.Matcher
}{
{
[]match.Matcher{
match.NewSuper(),
match.NewSingle(separators),
match.NewText("c"),
},
match.NewBTree(
match.NewText("c"),
match.NewBTree(
match.NewSingle(separators),
match.NewSuper(),
nil,
),
nil,
),
},
{
[]match.Matcher{
match.NewAny(nil),
match.NewText("c"),
match.NewAny(nil),
},
match.NewBTree(
match.NewText("c"),
match.NewAny(nil),
match.NewAny(nil),
),
},
{
[]match.Matcher{
match.NewRange('a', 'c', true),
match.NewList([]rune{'z', 't', 'e'}, false),
match.NewText("c"),
match.NewSingle(nil),
},
match.NewRow(
4,
match.Matchers{
match.NewRange('a', 'c', true),
match.NewList([]rune{'z', 't', 'e'}, false),
match.NewText("c"),
match.NewSingle(nil),
}...,
),
},
} {
act, err := compileMatchers(test.in)
if err != nil {
t.Errorf("#%d convert matchers error: %s", id, err)
continue
}
if !reflect.DeepEqual(act, test.exp) {
t.Errorf("#%d unexpected convert matchers result:\nact: %#v\nexp: %#v", id, act, test.exp)
continue
}
}
}
func TestConvertMatchers(t *testing.T) {
for id, test := range []struct {
in, exp []match.Matcher
}{
{
[]match.Matcher{
match.NewRange('a', 'c', true),
match.NewList([]rune{'z', 't', 'e'}, false),
match.NewText("c"),
match.NewSingle(nil),
match.NewAny(nil),
},
[]match.Matcher{
match.NewRow(
4,
[]match.Matcher{
match.NewRange('a', 'c', true),
match.NewList([]rune{'z', 't', 'e'}, false),
match.NewText("c"),
match.NewSingle(nil),
}...,
),
match.NewAny(nil),
},
},
{
[]match.Matcher{
match.NewRange('a', 'c', true),
match.NewList([]rune{'z', 't', 'e'}, false),
match.NewText("c"),
match.NewSingle(nil),
match.NewAny(nil),
match.NewSingle(nil),
match.NewSingle(nil),
match.NewAny(nil),
},
[]match.Matcher{
match.NewRow(
3,
match.Matchers{
match.NewRange('a', 'c', true),
match.NewList([]rune{'z', 't', 'e'}, false),
match.NewText("c"),
}...,
),
match.NewMin(3),
},
},
} {
act := minimizeMatchers(test.in)
if !reflect.DeepEqual(act, test.exp) {
t.Errorf("#%d unexpected convert matchers 2 result:\nact: %#v\nexp: %#v", id, act, test.exp)
continue
}
}
}
func TestCompiler(t *testing.T) {
for id, test := range []struct {
ast *ast.Node
result match.Matcher
sep []rune
for _, test := range []struct {
name string
ast *ast.Node
exp match.Matcher
sep []rune
}{
{
// #0
ast: ast.NewNode(ast.KindPattern, nil,
ast.NewNode(ast.KindText, ast.Text{"abc"}),
),
result: match.NewText("abc"),
exp: match.NewText("abc"),
},
{
// #1
ast: ast.NewNode(ast.KindPattern, nil,
ast.NewNode(ast.KindAny, nil),
),
sep: separators,
result: match.NewAny(separators),
sep: separators,
exp: match.NewAny(separators),
},
{
// #2
ast: ast.NewNode(ast.KindPattern, nil,
ast.NewNode(ast.KindAny, nil),
),
result: match.NewSuper(),
exp: match.NewSuper(),
},
{
// #3
ast: ast.NewNode(ast.KindPattern, nil,
ast.NewNode(ast.KindSuper, nil),
),
result: match.NewSuper(),
exp: match.NewSuper(),
},
{
// #4
ast: ast.NewNode(ast.KindPattern, nil,
ast.NewNode(ast.KindSingle, nil),
),
sep: separators,
result: match.NewSingle(separators),
sep: separators,
exp: match.NewSingle(separators),
},
{
// #5
ast: ast.NewNode(ast.KindPattern, nil,
ast.NewNode(ast.KindRange, ast.Range{
Lo: 'a',
@ -360,18 +63,20 @@ func TestCompiler(t *testing.T) {
Not: true,
}),
),
result: match.NewRange('a', 'z', true),
exp: match.NewRange('a', 'z', true),
},
{
// #6
ast: ast.NewNode(ast.KindPattern, nil,
ast.NewNode(ast.KindList, ast.List{
Chars: "abc",
Not: true,
}),
),
result: match.NewList([]rune{'a', 'b', 'c'}, true),
exp: match.NewList([]rune{'a', 'b', 'c'}, true),
},
{
// #7
ast: ast.NewNode(ast.KindPattern, nil,
ast.NewNode(ast.KindAny, nil),
ast.NewNode(ast.KindSingle, nil),
@ -379,40 +84,40 @@ func TestCompiler(t *testing.T) {
ast.NewNode(ast.KindSingle, nil),
),
sep: separators,
result: match.EveryOf{Matchers: match.Matchers{
exp: match.NewEveryOf([]match.Matcher{
match.NewMin(3),
match.NewContains(string(separators), true),
}},
match.NewAny(separators),
}),
},
{
// #8
ast: ast.NewNode(ast.KindPattern, nil,
ast.NewNode(ast.KindAny, nil),
ast.NewNode(ast.KindSingle, nil),
ast.NewNode(ast.KindSingle, nil),
ast.NewNode(ast.KindSingle, nil),
),
result: match.NewMin(3),
exp: match.NewMin(3),
},
{
// #9
ast: ast.NewNode(ast.KindPattern, nil,
ast.NewNode(ast.KindAny, nil),
ast.NewNode(ast.KindText, ast.Text{"abc"}),
ast.NewNode(ast.KindSingle, nil),
),
sep: separators,
result: match.NewBTree(
match.NewRow(
4,
match.Matchers{
match.NewText("abc"),
match.NewSingle(separators),
}...,
),
exp: match.NewTree(
match.NewRow([]match.MatchIndexSizer{
match.NewText("abc"),
match.NewSingle(separators),
}),
match.NewAny(separators),
nil,
match.Nothing{},
),
},
{
// #10
ast: ast.NewNode(ast.KindPattern, nil,
ast.NewNode(ast.KindText, ast.Text{"/"}),
ast.NewNode(ast.KindAnyOf, nil,
@ -422,17 +127,21 @@ func TestCompiler(t *testing.T) {
ast.NewNode(ast.KindSuper, nil),
),
sep: separators,
result: match.NewBTree(
exp: match.NewTree(
match.NewText("/"),
nil,
match.NewBTree(
match.NewAnyOf(match.NewText("z"), match.NewText("ab")),
nil,
match.Nothing{},
match.NewTree(
match.MustIndexedAnyOf(
match.NewText("z"),
match.NewText("ab"),
),
match.Nothing{},
match.NewSuper(),
),
),
},
{
// #11
ast: ast.NewNode(ast.KindPattern, nil,
ast.NewNode(ast.KindSuper, nil),
ast.NewNode(ast.KindSingle, nil),
@ -440,42 +149,43 @@ func TestCompiler(t *testing.T) {
ast.NewNode(ast.KindSingle, nil),
),
sep: separators,
result: match.NewBTree(
match.NewRow(
5,
match.Matchers{
match.NewSingle(separators),
match.NewText("abc"),
match.NewSingle(separators),
}...,
),
exp: match.NewTree(
match.NewRow([]match.MatchIndexSizer{
match.NewSingle(separators),
match.NewText("abc"),
match.NewSingle(separators),
}),
match.NewSuper(),
nil,
match.Nothing{},
),
},
{
// #12
ast: ast.NewNode(ast.KindPattern, nil,
ast.NewNode(ast.KindAny, nil),
ast.NewNode(ast.KindText, ast.Text{"abc"}),
),
result: match.NewSuffix("abc"),
exp: match.NewSuffix("abc"),
},
{
// #13
ast: ast.NewNode(ast.KindPattern, nil,
ast.NewNode(ast.KindText, ast.Text{"abc"}),
ast.NewNode(ast.KindAny, nil),
),
result: match.NewPrefix("abc"),
exp: match.NewPrefix("abc"),
},
{
// #14
ast: ast.NewNode(ast.KindPattern, nil,
ast.NewNode(ast.KindText, ast.Text{"abc"}),
ast.NewNode(ast.KindAny, nil),
ast.NewNode(ast.KindText, ast.Text{"def"}),
),
result: match.NewPrefixSuffix("abc", "def"),
exp: match.NewPrefixSuffix("abc", "def"),
},
{
// #15
ast: ast.NewNode(ast.KindPattern, nil,
ast.NewNode(ast.KindAny, nil),
ast.NewNode(ast.KindAny, nil),
@ -484,9 +194,10 @@ func TestCompiler(t *testing.T) {
ast.NewNode(ast.KindAny, nil),
ast.NewNode(ast.KindAny, nil),
),
result: match.NewContains("abc", false),
exp: match.NewContains("abc"),
},
{
// #16
ast: ast.NewNode(ast.KindPattern, nil,
ast.NewNode(ast.KindAny, nil),
ast.NewNode(ast.KindAny, nil),
@ -496,13 +207,15 @@ func TestCompiler(t *testing.T) {
ast.NewNode(ast.KindAny, nil),
),
sep: separators,
result: match.NewBTree(
exp: match.NewTree(
match.NewText("abc"),
match.NewAny(separators),
match.NewAny(separators),
),
},
{
// #17
// pattern: "**?abc**?"
ast: ast.NewNode(ast.KindPattern, nil,
ast.NewNode(ast.KindSuper, nil),
ast.NewNode(ast.KindSingle, nil),
@ -510,19 +223,21 @@ func TestCompiler(t *testing.T) {
ast.NewNode(ast.KindSuper, nil),
ast.NewNode(ast.KindSingle, nil),
),
result: match.NewBTree(
exp: match.NewTree(
match.NewText("abc"),
match.NewMin(1),
match.NewMin(1),
),
},
{
// #18
ast: ast.NewNode(ast.KindPattern, nil,
ast.NewNode(ast.KindText, ast.Text{"abc"}),
),
result: match.NewText("abc"),
exp: match.NewText("abc"),
},
{
// #19
ast: ast.NewNode(ast.KindPattern, nil,
ast.NewNode(ast.KindAnyOf, nil,
ast.NewNode(ast.KindPattern, nil,
@ -534,9 +249,10 @@ func TestCompiler(t *testing.T) {
),
),
),
result: match.NewText("abc"),
exp: match.NewText("abc"),
},
{
// #20
ast: ast.NewNode(ast.KindPattern, nil,
ast.NewNode(ast.KindAnyOf, nil,
ast.NewNode(ast.KindPattern, nil,
@ -555,35 +271,34 @@ func TestCompiler(t *testing.T) {
),
),
),
result: match.NewBTree(
exp: match.NewTree(
match.NewText("abc"),
nil,
match.AnyOf{Matchers: match.Matchers{
match.Nothing{},
match.NewAnyOf(
match.NewSingle(nil),
match.NewList([]rune{'d', 'e', 'f'}, false),
match.NewNothing(),
}},
),
),
},
{
// #21
ast: ast.NewNode(ast.KindPattern, nil,
ast.NewNode(ast.KindRange, ast.Range{Lo: 'a', Hi: 'z'}),
ast.NewNode(ast.KindRange, ast.Range{Lo: 'a', Hi: 'x', Not: true}),
ast.NewNode(ast.KindAny, nil),
),
result: match.NewBTree(
match.NewRow(
2,
match.Matchers{
match.NewRange('a', 'z', false),
match.NewRange('a', 'x', true),
}...,
),
nil,
exp: match.NewTree(
match.NewRow([]match.MatchIndexSizer{
match.NewRange('a', 'z', false),
match.NewRange('a', 'x', true),
}),
match.Nothing{},
match.NewSuper(),
),
},
{
// #22
ast: ast.NewNode(ast.KindPattern, nil,
ast.NewNode(ast.KindAnyOf, nil,
ast.NewNode(ast.KindPattern, nil,
@ -598,28 +313,29 @@ func TestCompiler(t *testing.T) {
),
),
),
result: match.NewRow(
7,
match.Matchers{
match.NewText("abc"),
match.AnyOf{Matchers: match.Matchers{
match.NewList([]rune{'a', 'b', 'c'}, false),
match.NewList([]rune{'d', 'e', 'f'}, false),
}},
match.NewText("ghi"),
}...,
),
exp: match.NewRow([]match.MatchIndexSizer{
match.NewText("abc"),
match.MustIndexedSizedAnyOf(
match.NewList([]rune{'a', 'b', 'c'}, false),
match.NewList([]rune{'d', 'e', 'f'}, false),
),
match.NewText("ghi"),
}),
},
} {
m, err := Compile(test.ast, test.sep)
if err != nil {
t.Errorf("compilation error: %s", err)
continue
}
if !reflect.DeepEqual(m, test.result) {
t.Errorf("[%d] Compile():\nexp: %#v\nact: %#v\n\ngraphviz:\nexp:\n%s\nact:\n%s\n", id, test.result, m, debug.Graphviz("", test.result.(match.Matcher)), debug.Graphviz("", m.(match.Matcher)))
continue
}
t.Run(test.name, func(t *testing.T) {
act, err := Compile(test.ast, test.sep)
if err != nil {
t.Fatalf("compilation error: %s", err)
}
if !reflect.DeepEqual(act, test.exp) {
t.Errorf(
"Compile():\nact: %#v\nexp: %#v\n\ngraphviz:\n%s\n%s\n",
act, test.exp,
match.Graphviz("act", act.(match.Matcher)),
match.Graphviz("exp", test.exp.(match.Matcher)),
)
}
})
}
}

43
glob.go
View File

@ -1,8 +1,8 @@
package glob
import (
"git.internal/re/glob/compiler"
"git.internal/re/glob/syntax"
"github.com/gobwas/glob/compiler"
"github.com/gobwas/glob/syntax"
)
// Glob represents compiled glob pattern.
@ -13,28 +13,29 @@ type Glob interface {
// Compile creates Glob for given pattern and strings (if any present after pattern) as separators.
// The pattern syntax is:
//
// pattern:
// { term }
// pattern:
// { term }
//
// term:
// `*` matches any sequence of non-separator characters
// `**` matches any sequence of characters
// `?` matches any single non-separator character
// `[` [ `!` ] { character-range } `]`
// character class (must be non-empty)
// `{` pattern-list `}`
// pattern alternatives
// c matches character c (c != `*`, `**`, `?`, `\`, `[`, `{`, `}`)
// `\` c matches character c
// term:
// `*` matches any sequence of non-separator characters
// `**` matches any sequence of characters
// `?` matches any single non-separator character
// `[` [ `!` ] { character-range } `]`
// character class (must be non-empty)
// `{` pattern-list `}`
// pattern alternatives
// c matches character c (c != `*`, `**`, `?`, `\`, `[`, `{`, `}`)
// `\` c matches character c
//
// character-range:
// c matches character c (c != `\\`, `-`, `]`)
// `\` c matches character c
// lo `-` hi matches character c for lo <= c <= hi
// character-range:
// c matches character c (c != `\\`, `-`, `]`)
// `\` c matches character c
// lo `-` hi matches character c for lo <= c <= hi
//
// pattern-list:
// pattern { `,` pattern }
// comma-separated (without spaces) patterns
//
// pattern-list:
// pattern { `,` pattern }
// comma-separated (without spaces) patterns
func Compile(pattern string, separators ...rune) (Glob, error) {
ast, err := syntax.Parse(pattern)
if err != nil {

View File

@ -1,8 +1,11 @@
package glob
import (
"fmt"
"regexp"
"testing"
"github.com/gobwas/glob/match"
)
const (
@ -57,7 +60,30 @@ type test struct {
}
func glob(s bool, p, m string, d ...rune) test {
return test{p, m, s, d}
return test{
should: s,
pattern: p,
match: m,
delimiters: d,
}
}
func globc(p string, d ...rune) test {
return test{pattern: p, delimiters: d}
}
func TestCompilation(t *testing.T) {
for _, test := range []test{
globc("{*,**,?}", '.'),
globc("{*.google.*,yandex.*}", '.'),
} {
t.Run("", func(t *testing.T) {
_, err := Compile(test.pattern, test.delimiters...)
if err != nil {
t.Fatal(err)
}
})
}
}
func TestGlob(t *testing.T) {
@ -134,6 +160,8 @@ func TestGlob(t *testing.T) {
glob(true, "*//{,*.}example.com", "http://example.com"),
glob(false, "*//{,*.}example.com", "http://example.com.net"),
glob(true, "{a*,b}c", "abc", '.'),
glob(true, pattern_all, fixture_all_match),
glob(false, pattern_all, fixture_all_mismatch),
@ -164,6 +192,11 @@ func TestGlob(t *testing.T) {
glob(false, pattern_prefix_suffix, fixture_prefix_suffix_mismatch),
} {
t.Run("", func(t *testing.T) {
defer func() {
if thePanic := recover(); thePanic != nil {
t.Fatalf("panic recovered: %v", thePanic)
}
}()
g := MustCompile(test.pattern, test.delimiters...)
result := g.Match(test.match)
if result != test.should {
@ -220,20 +253,10 @@ func BenchmarkParseRegexp(b *testing.B) {
func BenchmarkAllGlobMatch(b *testing.B) {
m, _ := Compile(pattern_all)
for i := 0; i < b.N; i++ {
_ = m.Match(fixture_all_match)
}
}
func BenchmarkAllGlobMatchParallel(b *testing.B) {
m, _ := Compile(pattern_all)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_ = m.Match(fixture_all_match)
}
})
}
func BenchmarkAllRegexpMatch(b *testing.B) {
m := regexp.MustCompile(regexp_all)
@ -243,22 +266,16 @@ func BenchmarkAllRegexpMatch(b *testing.B) {
_ = m.Match(f)
}
}
func BenchmarkAllGlobMismatch(b *testing.B) {
m, _ := Compile(pattern_all)
m := MustCompile(pattern_all)
fmt.Println(match.Graphviz(pattern_all, m.(match.Matcher)))
for i := 0; i < b.N; i++ {
_ = m.Match(fixture_all_mismatch)
}
}
func BenchmarkAllGlobMismatchParallel(b *testing.B) {
m, _ := Compile(pattern_all)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_ = m.Match(fixture_all_mismatch)
}
})
}
func BenchmarkAllRegexpMismatch(b *testing.B) {
m := regexp.MustCompile(regexp_all)
f := []byte(fixture_all_mismatch)

3
go.mod
View File

@ -1,3 +0,0 @@
module git.internal/re/glob
go 1.19

View File

@ -0,0 +1,17 @@
// +build !globdebug
package debug
const Enabled = false
func Logf(string, ...interface{}) {}
func Enter() {}
func Leave() {}
func EnterPrefix(string, ...interface{}) {}
func LeavePrefix() {}
func Indexing(n, s string) func(int, []int) {
panic("must never be called")
}
func Matching(n, s string) func(bool) {
panic("must never be called")
}

View File

@ -0,0 +1,64 @@
// +build globdebug
package debug
import (
"fmt"
"os"
"strings"
)
const Enabled = true
var (
i = 0
prefix = map[int]string{}
)
func Logf(f string, args ...interface{}) {
if f != "" && prefix[i] != "" {
f = ": " + f
}
fmt.Fprint(os.Stderr,
strings.Repeat(" ", i),
fmt.Sprintf("(%d) ", i),
prefix[i],
fmt.Sprintf(f, args...),
"\n",
)
}
func Indexing(name, s string) func(int, []int) {
EnterPrefix("%s: index: %q", name, s)
return func(index int, segments []int) {
Logf("-> %d, %v", index, segments)
LeavePrefix()
}
}
func Matching(name, s string) func(bool) {
EnterPrefix("%s: match %q", name, s)
return func(ok bool) {
Logf("-> %t", ok)
LeavePrefix()
}
}
func EnterPrefix(s string, args ...interface{}) {
Enter()
prefix[i] = fmt.Sprintf(s, args...)
Logf("")
}
func LeavePrefix() {
prefix[i] = ""
Leave()
}
func Enter() {
i++
}
func Leave() {
i--
}

View File

@ -3,23 +3,23 @@ package match
import (
"fmt"
"git.internal/re/glob/util/strings"
"github.com/gobwas/glob/util/runes"
)
type Any struct {
Separators []rune
sep []rune
}
func NewAny(s []rune) Any {
return Any{s}
}
func (self Any) Match(s string) bool {
return strings.IndexAnyRunes(s, self.Separators) == -1
func (a Any) Match(s string) bool {
return runes.IndexAnyRune(s, a.sep) == -1
}
func (self Any) Index(s string) (int, []int) {
found := strings.IndexAnyRunes(s, self.Separators)
func (a Any) Index(s string) (int, []int) {
found := runes.IndexAnyRune(s, a.sep)
switch found {
case -1:
case 0:
@ -37,10 +37,10 @@ func (self Any) Index(s string) (int, []int) {
return 0, segments
}
func (self Any) Len() int {
return lenNo
func (a Any) MinLen() int {
return 0
}
func (self Any) String() string {
return fmt.Sprintf("<any:![%s]>", string(self.Separators))
func (a Any) String() string {
return fmt.Sprintf("<any:![%s]>", string(a.sep))
}

View File

@ -1,82 +1,122 @@
package match
import "fmt"
import (
"fmt"
"github.com/gobwas/glob/internal/debug"
)
type AnyOf struct {
Matchers Matchers
ms []Matcher
min int
}
func NewAnyOf(m ...Matcher) AnyOf {
return AnyOf{Matchers(m)}
func NewAnyOf(ms ...Matcher) Matcher {
a := AnyOf{ms, minLen(ms)}
if mis, ok := MatchIndexers(ms); ok {
x := IndexedAnyOf{a, mis}
if msz, ok := MatchIndexSizers(ms); ok {
sz := -1
for _, m := range msz {
n := m.RunesCount()
if sz == -1 {
sz = n
} else if sz != n {
sz = -1
break
}
}
if sz != -1 {
return IndexedSizedAnyOf{x, sz}
}
}
return x
}
return a
}
func (self *AnyOf) Add(m Matcher) error {
self.Matchers = append(self.Matchers, m)
return nil
func MustIndexedAnyOf(ms ...Matcher) MatchIndexer {
return NewAnyOf(ms...).(MatchIndexer)
}
func (self AnyOf) Match(s string) bool {
for _, m := range self.Matchers {
func MustIndexedSizedAnyOf(ms ...Matcher) MatchIndexSizer {
return NewAnyOf(ms...).(MatchIndexSizer)
}
func (a AnyOf) Match(s string) (ok bool) {
if debug.Enabled {
done := debug.Matching("any_of", s)
defer func() { done(ok) }()
}
for _, m := range a.ms {
if m.Match(s) {
return true
}
}
return false
}
func (self AnyOf) Index(s string) (int, []int) {
index := -1
func (a AnyOf) MinLen() (n int) {
return a.min
}
segments := acquireSegments(len(s))
for _, m := range self.Matchers {
idx, seg := m.Index(s)
if idx == -1 {
func (a AnyOf) Content(cb func(Matcher)) {
for _, m := range a.ms {
cb(m)
}
}
func (a AnyOf) String() string {
return fmt.Sprintf("<any_of:[%s]>", Matchers(a.ms))
}
type IndexedAnyOf struct {
AnyOf
ms []MatchIndexer
}
func (a IndexedAnyOf) Index(s string) (index int, segments []int) {
if debug.Enabled {
done := debug.Indexing("any_of", s)
defer func() { done(index, segments) }()
}
index = -1
segments = acquireSegments(len(s))
for _, m := range a.ms {
if debug.Enabled {
debug.Logf("indexing: any_of: trying %s", m)
}
i, seg := m.Index(s)
if i == -1 {
continue
}
if index == -1 || idx < index {
index = idx
if index == -1 || i < index {
index = i
segments = append(segments[:0], seg...)
continue
}
if idx > index {
if i > index {
continue
}
// here idx == index
// here i == index
segments = appendMerge(segments, seg)
}
if index == -1 {
releaseSegments(segments)
return -1, nil
}
return index, segments
}
func (self AnyOf) Len() (l int) {
l = -1
for _, m := range self.Matchers {
ml := m.Len()
switch {
case l == -1:
l = ml
continue
case ml == -1:
return -1
case l != ml:
return -1
}
}
return
func (a IndexedAnyOf) String() string {
return fmt.Sprintf("<indexed_any_of:[%s]>", a.ms)
}
func (self AnyOf) String() string {
return fmt.Sprintf("<any_of:[%s]>", self.Matchers)
type IndexedSizedAnyOf struct {
IndexedAnyOf
runes int
}
func (a IndexedSizedAnyOf) RunesCount() int {
return a.runes
}

View File

@ -5,7 +5,7 @@ import (
"testing"
)
func TestAnyOfIndex(t *testing.T) {
func TestIndexedAnyOf(t *testing.T) {
for id, test := range []struct {
matchers Matchers
fixture string
@ -41,13 +41,15 @@ func TestAnyOfIndex(t *testing.T) {
[]int{1},
},
} {
everyOf := NewAnyOf(test.matchers...)
index, segments := everyOf.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
}
if !reflect.DeepEqual(segments, test.segments) {
t.Errorf("#%d unexpected segments: exp: %v, act: %v", id, test.segments, segments)
}
t.Run("", func(t *testing.T) {
a := NewAnyOf(test.matchers...).(Indexer)
index, segments := a.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
}
if !reflect.DeepEqual(segments, test.segments) {
t.Errorf("#%d unexpected segments: exp: %v, act: %v", id, test.segments, segments)
}
})
}
}

View File

@ -1,185 +0,0 @@
package match
import (
"fmt"
"unicode/utf8"
)
type BTree struct {
Value Matcher
Left Matcher
Right Matcher
ValueLengthRunes int
LeftLengthRunes int
RightLengthRunes int
LengthRunes int
}
func NewBTree(Value, Left, Right Matcher) (tree BTree) {
tree.Value = Value
tree.Left = Left
tree.Right = Right
lenOk := true
if tree.ValueLengthRunes = Value.Len(); tree.ValueLengthRunes == -1 {
lenOk = false
}
if Left != nil {
if tree.LeftLengthRunes = Left.Len(); tree.LeftLengthRunes == -1 {
lenOk = false
}
}
if Right != nil {
if tree.RightLengthRunes = Right.Len(); tree.RightLengthRunes == -1 {
lenOk = false
}
}
if lenOk {
tree.LengthRunes = tree.LeftLengthRunes + tree.ValueLengthRunes + tree.RightLengthRunes
} else {
tree.LengthRunes = -1
}
return tree
}
func (self BTree) Len() int {
return self.LengthRunes
}
// todo?
func (self BTree) Index(s string) (index int, segments []int) {
//inputLen := len(s)
//// try to cut unnecessary parts
//// by knowledge of length of right and left part
//offset, limit := self.offsetLimit(inputLen)
//for offset < limit {
// // search for matching part in substring
// vi, segments := self.Value.Index(s[offset:limit])
// if index == -1 {
// return -1, nil
// }
// if self.Left == nil {
// if index != offset {
// return -1, nil
// }
// } else {
// left := s[:offset+vi]
// i := self.Left.IndexSuffix(left)
// if i == -1 {
// return -1, nil
// }
// index = i
// }
// if self.Right != nil {
// for _, seg := range segments {
// right := s[:offset+vi+seg]
// }
// }
// l := s[:offset+index]
// var left bool
// if self.Left != nil {
// left = self.Left.Index(l)
// } else {
// left = l == ""
// }
//}
return -1, nil
}
func (self BTree) Match(s string) bool {
inputLen := len(s)
// try to cut unnecessary parts
// by knowledge of length of right and left part
offset, limit := self.offsetLimit(inputLen)
for offset < limit {
// search for matching part in substring
index, segments := self.Value.Index(s[offset:limit])
if index == -1 {
releaseSegments(segments)
return false
}
l := s[:offset+index]
var left bool
if self.Left != nil {
left = self.Left.Match(l)
} else {
left = l == ""
}
if left {
for i := len(segments) - 1; i >= 0; i-- {
length := segments[i]
var right bool
var r string
// if there is no string for the right branch
if inputLen <= offset+index+length {
r = ""
} else {
r = s[offset+index+length:]
}
if self.Right != nil {
right = self.Right.Match(r)
} else {
right = r == ""
}
if right {
releaseSegments(segments)
return true
}
}
}
_, step := utf8.DecodeRuneInString(s[offset+index:])
offset += index + step
releaseSegments(segments)
}
return false
}
func (self BTree) offsetLimit(inputLen int) (offset int, limit int) {
// self.Length, self.RLen and self.LLen are values meaning the length of runes for each part
// here we manipulating byte length for better optimizations
// but these checks still works, cause minLen of 1-rune string is 1 byte.
if self.LengthRunes != -1 && self.LengthRunes > inputLen {
return 0, 0
}
if self.LeftLengthRunes >= 0 {
offset = self.LeftLengthRunes
}
if self.RightLengthRunes >= 0 {
limit = inputLen - self.RightLengthRunes
} else {
limit = inputLen
}
return offset, limit
}
func (self BTree) String() string {
const n string = "<nil>"
var l, r string
if self.Left == nil {
l = n
} else {
l = self.Left.String()
}
if self.Right == nil {
r = n
} else {
r = self.Right.String()
}
return fmt.Sprintf("<btree:[%s<-%s->%s]>", l, self.Value, r)
}

View File

@ -1,90 +0,0 @@
package match
import (
"testing"
)
func TestBTree(t *testing.T) {
for id, test := range []struct {
tree BTree
str string
exp bool
}{
{
NewBTree(NewText("abc"), NewSuper(), NewSuper()),
"abc",
true,
},
{
NewBTree(NewText("a"), NewSingle(nil), NewSingle(nil)),
"aaa",
true,
},
{
NewBTree(NewText("b"), NewSingle(nil), nil),
"bbb",
false,
},
{
NewBTree(
NewText("c"),
NewBTree(
NewSingle(nil),
NewSuper(),
nil,
),
nil,
),
"abc",
true,
},
} {
act := test.tree.Match(test.str)
if act != test.exp {
t.Errorf("#%d match %q error: act: %t; exp: %t", id, test.str, act, test.exp)
continue
}
}
}
type fakeMatcher struct {
len int
name string
}
func (f *fakeMatcher) Match(string) bool {
return true
}
var i = 3
func (f *fakeMatcher) Index(s string) (int, []int) {
seg := make([]int, 0, i)
for x := 0; x < i; x++ {
seg = append(seg, x)
}
return 0, seg
}
func (f *fakeMatcher) Len() int {
return f.len
}
func (f *fakeMatcher) String() string {
return f.name
}
func BenchmarkMatchBTree(b *testing.B) {
l := &fakeMatcher{4, "left_fake"}
r := &fakeMatcher{4, "right_fake"}
v := &fakeMatcher{2, "value_fake"}
// must be <= len(l + r + v)
fixture := "abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij"
bt := NewBTree(v, l, r)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
bt.Match(fixture)
}
})
}

View File

@ -6,29 +6,33 @@ import (
)
type Contains struct {
Needle string
Not bool
s string
not bool
}
func NewContains(needle string, not bool) Contains {
return Contains{needle, not}
func NewContains(needle string) Contains {
return Contains{needle, false}
}
func (self Contains) Match(s string) bool {
return strings.Contains(s, self.Needle) != self.Not
func NewNotContains(needle string) Contains {
return Contains{needle, true}
}
func (self Contains) Index(s string) (int, []int) {
func (c Contains) Match(s string) bool {
return strings.Contains(s, c.s) != c.not
}
func (c Contains) Index(s string) (int, []int) {
var offset int
idx := strings.Index(s, self.Needle)
idx := strings.Index(s, c.s)
if !self.Not {
if !c.not {
if idx == -1 {
return -1, nil
}
offset = idx + len(self.Needle)
offset = idx + len(c.s)
if len(s) <= offset {
return 0, []int{offset}
}
@ -45,14 +49,14 @@ func (self Contains) Index(s string) (int, []int) {
return 0, append(segments, offset+len(s))
}
func (self Contains) Len() int {
return lenNo
func (c Contains) MinLen() int {
return 0
}
func (self Contains) String() string {
func (c Contains) String() string {
var not string
if self.Not {
if c.not {
not = "!"
}
return fmt.Sprintf("<contains:%s[%s]>", not, self.Needle)
return fmt.Sprintf("<contains:%s[%s]>", not, c.s)
}

View File

@ -42,7 +42,7 @@ func TestContainsIndex(t *testing.T) {
[]int{0, 1, 2, 3},
},
} {
p := NewContains(test.prefix, test.not)
p := Contains{test.prefix, test.not}
index, segments := p.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
@ -54,8 +54,7 @@ func TestContainsIndex(t *testing.T) {
}
func BenchmarkIndexContains(b *testing.B) {
m := NewContains(string(bench_separators), true)
m := Contains{string(bench_separators), true}
for i := 0; i < b.N; i++ {
_, s := m.Index(bench_pattern)
releaseSegments(s)
@ -63,8 +62,7 @@ func BenchmarkIndexContains(b *testing.B) {
}
func BenchmarkIndexContainsParallel(b *testing.B) {
m := NewContains(string(bench_separators), true)
m := Contains{string(bench_separators), true}
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_, s := m.Index(bench_pattern)

69
match/debug.go Normal file
View File

@ -0,0 +1,69 @@
package match
import (
"bytes"
"fmt"
"math/rand"
"os"
"strings"
"sync/atomic"
)
var i = new(int32)
func logf(f string, args ...interface{}) {
n := int(atomic.LoadInt32(i))
fmt.Fprint(os.Stderr,
strings.Repeat(" ", n),
fmt.Sprintf("(%d) ", n),
fmt.Sprintf(f, args...),
"\n",
)
}
func enter() {
atomic.AddInt32(i, 1)
}
func leave() {
atomic.AddInt32(i, -1)
}
func Graphviz(pattern string, m Matcher) string {
return fmt.Sprintf(`digraph G {graph[label="%s"];%s}`, pattern, graphviz(m, fmt.Sprintf("%x", rand.Int63())))
}
func graphviz(m Matcher, id string) string {
buf := &bytes.Buffer{}
switch v := m.(type) {
case Tree:
fmt.Fprintf(buf, `"%s"[label="%s"];`, id, v.value)
for _, m := range []Matcher{v.left, v.right} {
switch n := m.(type) {
case nil:
rnd := rand.Int63()
fmt.Fprintf(buf, `"%x"[label="<nil>"];`, rnd)
fmt.Fprintf(buf, `"%s"->"%x";`, id, rnd)
default:
sub := fmt.Sprintf("%x", rand.Int63())
fmt.Fprintf(buf, `"%s"->"%s";`, id, sub)
fmt.Fprintf(buf, graphviz(n, sub))
}
}
case Container:
fmt.Fprintf(buf, `"%s"[label="Container(%T)"];`, id, m)
v.Content(func(m Matcher) {
rnd := rand.Int63()
fmt.Fprintf(buf, graphviz(m, fmt.Sprintf("%x", rnd)))
fmt.Fprintf(buf, `"%s"->"%x";`, id, rnd)
})
default:
fmt.Fprintf(buf, `"%s"[label="%s"];`, id, m)
}
return buf.String()
}

View File

@ -1,56 +0,0 @@
package debug
import (
"bytes"
"fmt"
"math/rand"
"git.internal/re/glob/match"
)
func Graphviz(pattern string, m match.Matcher) string {
return fmt.Sprintf(`digraph G {graph[label="%s"];%s}`, pattern, graphviz_internal(m, fmt.Sprintf("%x", rand.Int63())))
}
func graphviz_internal(m match.Matcher, id string) string {
buf := &bytes.Buffer{}
switch matcher := m.(type) {
case match.BTree:
fmt.Fprintf(buf, `"%s"[label="%s"];`, id, matcher.Value.String())
for _, m := range []match.Matcher{matcher.Left, matcher.Right} {
switch n := m.(type) {
case nil:
rnd := rand.Int63()
fmt.Fprintf(buf, `"%x"[label="<nil>"];`, rnd)
fmt.Fprintf(buf, `"%s"->"%x";`, id, rnd)
default:
sub := fmt.Sprintf("%x", rand.Int63())
fmt.Fprintf(buf, `"%s"->"%s";`, id, sub)
fmt.Fprintf(buf, graphviz_internal(n, sub))
}
}
case match.AnyOf:
fmt.Fprintf(buf, `"%s"[label="AnyOf"];`, id)
for _, m := range matcher.Matchers {
rnd := rand.Int63()
fmt.Fprintf(buf, graphviz_internal(m, fmt.Sprintf("%x", rnd)))
fmt.Fprintf(buf, `"%s"->"%x";`, id, rnd)
}
case match.EveryOf:
fmt.Fprintf(buf, `"%s"[label="EveryOf"];`, id)
for _, m := range matcher.Matchers {
rnd := rand.Int63()
fmt.Fprintf(buf, graphviz_internal(m, fmt.Sprintf("%x", rnd)))
fmt.Fprintf(buf, `"%s"->"%x";`, id, rnd)
}
default:
fmt.Fprintf(buf, `"%s"[label="%s"];`, id, m.String())
}
return buf.String()
}

View File

@ -5,31 +5,50 @@ import (
)
type EveryOf struct {
Matchers Matchers
ms []Matcher
min int
}
func NewEveryOf(m ...Matcher) EveryOf {
return EveryOf{Matchers(m)}
func NewEveryOf(ms []Matcher) Matcher {
e := EveryOf{
ms: ms,
min: maxLen(ms),
}
if mis, ok := MatchIndexers(ms); ok {
return IndexedEveryOf{e, mis}
}
return e
}
func (self *EveryOf) Add(m Matcher) error {
self.Matchers = append(self.Matchers, m)
return nil
func (e EveryOf) MinLen() (n int) {
return e.min
}
func (self EveryOf) Len() (l int) {
for _, m := range self.Matchers {
if ml := m.Len(); l > 0 {
l += ml
} else {
return -1
func (e EveryOf) Match(s string) bool {
for _, m := range e.ms {
if !m.Match(s) {
return false
}
}
return
return true
}
func (self EveryOf) Index(s string) (int, []int) {
func (e EveryOf) Content(cb func(Matcher)) {
for _, m := range e.ms {
cb(m)
}
}
func (e EveryOf) String() string {
return fmt.Sprintf("<every_of:[%s]>", e.ms)
}
type IndexedEveryOf struct {
EveryOf
ms []MatchIndexer
}
func (e IndexedEveryOf) Index(s string) (int, []int) {
var index int
var offset int
@ -39,7 +58,7 @@ func (self EveryOf) Index(s string) (int, []int) {
current := acquireSegments(len(s))
sub := s
for i, m := range self.Matchers {
for i, m := range e.ms {
idx, seg := m.Index(sub)
if idx == -1 {
releaseSegments(next)
@ -84,16 +103,6 @@ func (self EveryOf) Index(s string) (int, []int) {
return index, current
}
func (self EveryOf) Match(s string) bool {
for _, m := range self.Matchers {
if !m.Match(s) {
return false
}
}
return true
}
func (self EveryOf) String() string {
return fmt.Sprintf("<every_of:[%s]>", self.Matchers)
func (e IndexedEveryOf) String() string {
return fmt.Sprintf("<indexed_every_of:[%s]>", e.ms)
}

View File

@ -5,7 +5,7 @@ import (
"testing"
)
func TestEveryOfIndex(t *testing.T) {
func TestIndexedEveryOf(t *testing.T) {
for id, test := range []struct {
matchers Matchers
fixture string
@ -33,7 +33,7 @@ func TestEveryOfIndex(t *testing.T) {
[]int{2},
},
} {
everyOf := NewEveryOf(test.matchers...)
everyOf := NewEveryOf(test.matchers).(IndexedEveryOf)
index, segments := everyOf.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)

View File

@ -4,47 +4,49 @@ import (
"fmt"
"unicode/utf8"
"git.internal/re/glob/util/runes"
"github.com/gobwas/glob/util/runes"
)
type List struct {
List []rune
Not bool
rs []rune
not bool
}
func NewList(list []rune, not bool) List {
return List{list, not}
func NewList(rs []rune, not bool) List {
return List{rs, not}
}
func (self List) Match(s string) bool {
func (l List) Match(s string) bool {
r, w := utf8.DecodeRuneInString(s)
if len(s) > w {
// Invalid rune.
return false
}
inList := runes.IndexRune(self.List, r) != -1
return inList == !self.Not
inList := runes.IndexRune(l.rs, r) != -1
return inList == !l.not
}
func (self List) Len() int {
return lenOne
func (l List) MinLen() int {
return 1
}
func (self List) Index(s string) (int, []int) {
func (l List) RunesCount() int {
return 1
}
func (l List) Index(s string) (int, []int) {
for i, r := range s {
if self.Not == (runes.IndexRune(self.List, r) == -1) {
if l.not == (runes.IndexRune(l.rs, r) == -1) {
return i, segmentsByRuneLength[utf8.RuneLen(r)]
}
}
return -1, nil
}
func (self List) String() string {
func (l List) String() string {
var not string
if self.Not {
if l.not {
not = "!"
}
return fmt.Sprintf("<list:%s[%s]>", not, string(self.List))
return fmt.Sprintf("<list:%s[%s]>", not, string(l.rs))
}

View File

@ -7,15 +7,63 @@ import (
"strings"
)
const lenOne = 1
const lenZero = 0
const lenNo = -1
type Matcher interface {
Match(string) bool
MinLen() int
}
type Indexer interface {
Index(string) (int, []int)
Len() int
String() string
}
type Sizer interface {
RunesCount() int
}
type MatchIndexer interface {
Matcher
Indexer
}
type MatchSizer interface {
Matcher
Sizer
}
type MatchIndexSizer interface {
Matcher
Indexer
Sizer
}
type Container interface {
Content(func(Matcher))
}
func MatchIndexers(ms []Matcher) ([]MatchIndexer, bool) {
for _, m := range ms {
if _, ok := m.(MatchIndexer); !ok {
return nil, false
}
}
r := make([]MatchIndexer, len(ms))
for i := range r {
r[i] = ms[i].(MatchIndexer)
}
return r, true
}
func MatchIndexSizers(ms []Matcher) ([]MatchIndexSizer, bool) {
for _, m := range ms {
if _, ok := m.(MatchIndexSizer); !ok {
return nil, false
}
}
r := make([]MatchIndexSizer, len(ms))
for i := range r {
r[i] = ms[i].(MatchIndexSizer)
}
return r, true
}
type Matchers []Matcher

View File

@ -6,32 +6,31 @@ import (
)
type Max struct {
Limit int
n int
}
func NewMax(l int) Max {
return Max{l}
func NewMax(n int) Max {
return Max{n}
}
func (self Max) Match(s string) bool {
var l int
func (m Max) Match(s string) bool {
var n int
for range s {
l += 1
if l > self.Limit {
n += 1
if n > m.n {
return false
}
}
return true
}
func (self Max) Index(s string) (int, []int) {
segments := acquireSegments(self.Limit + 1)
func (m Max) Index(s string) (int, []int) {
segments := acquireSegments(m.n + 1)
segments = append(segments, 0)
var count int
for i, r := range s {
count++
if count > self.Limit {
if count > m.n {
break
}
segments = append(segments, i+utf8.RuneLen(r))
@ -40,10 +39,10 @@ func (self Max) Index(s string) (int, []int) {
return 0, segments
}
func (self Max) Len() int {
return lenNo
func (m Max) MinLen() int {
return 0
}
func (self Max) String() string {
return fmt.Sprintf("<max:%d>", self.Limit)
func (m Max) String() string {
return fmt.Sprintf("<max:%d>", m.n)
}

View File

@ -6,52 +6,48 @@ import (
)
type Min struct {
Limit int
n int
}
func NewMin(l int) Min {
return Min{l}
func NewMin(n int) Min {
return Min{n}
}
func (self Min) Match(s string) bool {
var l int
func (m Min) Match(s string) bool {
var n int
for range s {
l += 1
if l >= self.Limit {
n += 1
if n >= m.n {
return true
}
}
return false
}
func (self Min) Index(s string) (int, []int) {
func (m Min) Index(s string) (int, []int) {
var count int
c := len(s) - self.Limit + 1
c := len(s) - m.n + 1
if c <= 0 {
return -1, nil
}
segments := acquireSegments(c)
for i, r := range s {
count++
if count >= self.Limit {
if count >= m.n {
segments = append(segments, i+utf8.RuneLen(r))
}
}
if len(segments) == 0 {
return -1, nil
}
return 0, segments
}
func (self Min) Len() int {
return lenNo
func (m Min) MinLen() int {
return m.n
}
func (self Min) String() string {
return fmt.Sprintf("<min:%d>", self.Limit)
func (m Min) String() string {
return fmt.Sprintf("<min:%d>", m.n)
}

View File

@ -18,8 +18,12 @@ func (self Nothing) Index(s string) (int, []int) {
return 0, segments0
}
func (self Nothing) Len() int {
return lenZero
func (self Nothing) MinLen() int {
return 0
}
func (self Nothing) RunesCount() int {
return 0
}
func (self Nothing) String() string {

422
match/optimize.go Normal file
View File

@ -0,0 +1,422 @@
package match
import (
"fmt"
"github.com/gobwas/glob/internal/debug"
"github.com/gobwas/glob/util/runes"
)
func Optimize(m Matcher) (opt Matcher) {
if debug.Enabled {
defer func() {
a := fmt.Sprintf("%s", m)
b := fmt.Sprintf("%s", opt)
if a != b {
debug.EnterPrefix("optimized %s: -> %s", a, b)
debug.LeavePrefix()
}
}()
}
switch v := m.(type) {
case Any:
if len(v.sep) == 0 {
return NewSuper()
}
case List:
if v.not == false && len(v.rs) == 1 {
return NewText(string(v.rs))
}
return m
case Tree:
v.left = Optimize(v.left)
v.right = Optimize(v.right)
txt, ok := v.value.(Text)
if !ok {
return m
}
var (
leftNil = v.left == nil || v.left == Nothing{}
rightNil = v.right == nil || v.right == Nothing{}
)
if leftNil && rightNil {
return NewText(txt.s)
}
_, leftSuper := v.left.(Super)
lp, leftPrefix := v.left.(Prefix)
la, leftAny := v.left.(Any)
_, rightSuper := v.right.(Super)
rs, rightSuffix := v.right.(Suffix)
ra, rightAny := v.right.(Any)
switch {
case leftSuper && rightSuper:
return NewContains(txt.s)
case leftSuper && rightNil:
return NewSuffix(txt.s)
case rightSuper && leftNil:
return NewPrefix(txt.s)
case leftNil && rightSuffix:
return NewPrefixSuffix(txt.s, rs.s)
case rightNil && leftPrefix:
return NewPrefixSuffix(lp.s, txt.s)
case rightNil && leftAny:
return NewSuffixAny(txt.s, la.sep)
case leftNil && rightAny:
return NewPrefixAny(txt.s, ra.sep)
}
case Container:
var (
first Matcher
n int
)
v.Content(func(m Matcher) {
first = m
n++
})
if n == 1 {
return first
}
return m
}
return m
}
func Compile(ms []Matcher) (m Matcher, err error) {
if debug.Enabled {
debug.EnterPrefix("compiling %s", ms)
defer func() {
debug.Logf("-> %s, %v", m, err)
debug.LeavePrefix()
}()
}
if len(ms) == 0 {
return nil, fmt.Errorf("compile error: need at least one matcher")
}
if len(ms) == 1 {
return ms[0], nil
}
if m := glueMatchers(ms); m != nil {
return m, nil
}
var (
x = -1
max = -2
wantText bool
indexer MatchIndexer
)
for i, m := range ms {
mx, ok := m.(MatchIndexer)
if !ok {
continue
}
_, isText := m.(Text)
if wantText && !isText {
continue
}
n := m.MinLen()
if (!wantText && isText) || n > max {
max = n
x = i
indexer = mx
wantText = isText
}
}
if indexer == nil {
return nil, fmt.Errorf("can not index on matchers")
}
left := ms[:x]
var right []Matcher
if len(ms) > x+1 {
right = ms[x+1:]
}
var (
l Matcher = Nothing{}
r Matcher = Nothing{}
)
if len(left) > 0 {
l, err = Compile(left)
if err != nil {
return nil, err
}
}
if len(right) > 0 {
r, err = Compile(right)
if err != nil {
return nil, err
}
}
return NewTree(indexer, l, r), nil
}
func glueMatchers(ms []Matcher) Matcher {
if m := glueMatchersAsEvery(ms); m != nil {
return m
}
if m := glueMatchersAsRow(ms); m != nil {
return m
}
return nil
}
func glueMatchersAsRow(ms []Matcher) Matcher {
if len(ms) <= 1 {
return nil
}
var s []MatchIndexSizer
for _, m := range ms {
rsz, ok := m.(MatchIndexSizer)
if !ok {
return nil
}
s = append(s, rsz)
}
return NewRow(s)
}
func glueMatchersAsEvery(ms []Matcher) Matcher {
if len(ms) <= 1 {
return nil
}
var (
hasAny bool
hasSuper bool
hasSingle bool
min int
separator []rune
)
for i, matcher := range ms {
var sep []rune
switch m := matcher.(type) {
case Super:
sep = []rune{}
hasSuper = true
case Any:
sep = m.sep
hasAny = true
case Single:
sep = m.sep
hasSingle = true
min++
case List:
if !m.not {
return nil
}
sep = m.rs
hasSingle = true
min++
default:
return nil
}
// initialize
if i == 0 {
separator = sep
}
if runes.Equal(sep, separator) {
continue
}
return nil
}
if hasSuper && !hasAny && !hasSingle {
return NewSuper()
}
if hasAny && !hasSuper && !hasSingle {
return NewAny(separator)
}
if (hasAny || hasSuper) && min > 0 && len(separator) == 0 {
return NewMin(min)
}
var every []Matcher
if min > 0 {
every = append(every, NewMin(min))
if !hasAny && !hasSuper {
every = append(every, NewMax(min))
}
}
if len(separator) > 0 {
every = append(every, NewAny(separator))
}
return NewEveryOf(every)
}
type result struct {
ms []Matcher
matchers int
maxMinLen int
sumMinLen int
nesting int
}
func compareResult(a, b result) int {
if x := b.sumMinLen - a.sumMinLen; x != 0 {
return x
}
if x := len(a.ms) - len(b.ms); x != 0 {
return x
}
if x := a.nesting - b.nesting; x != 0 {
return x
}
if x := a.matchers - b.matchers; x != 0 {
return x
}
if x := b.maxMinLen - a.maxMinLen; x != 0 {
return x
}
return 0
}
func collapse(ms []Matcher, x Matcher, i, j int) (cp []Matcher) {
cp = make([]Matcher, len(ms)-(j-i)+1)
copy(cp[0:i], ms[0:i])
copy(cp[i+1:], ms[j:])
cp[i] = x
return cp
}
func matchersCount(ms []Matcher) (n int) {
n = len(ms)
for _, m := range ms {
n += countNestedMatchers(m)
}
return n
}
func countNestedMatchers(m Matcher) (n int) {
if c, _ := m.(Container); c != nil {
c.Content(func(m Matcher) {
n += 1 + countNestedMatchers(m)
})
}
return n
}
func nestingDepth(m Matcher) (depth int) {
c, ok := m.(Container)
if !ok {
return 0
}
var max int
c.Content(func(m Matcher) {
if d := nestingDepth(m); d > max {
max = d
}
})
return max + 1
}
func maxMinLen(ms []Matcher) (max int) {
for _, m := range ms {
if n := m.MinLen(); n > max {
max = n
}
}
return max
}
func sumMinLen(ms []Matcher) (sum int) {
for _, m := range ms {
sum += m.MinLen()
}
return sum
}
func maxNestingDepth(ms []Matcher) (max int) {
for _, m := range ms {
if n := nestingDepth(m); n > max {
max = n
}
}
return
}
func minimize(ms []Matcher, i, j int, best *result) *result {
if j > len(ms) {
j = 0
i++
}
if i > len(ms)-2 {
return best
}
if j == 0 {
j = i + 2
}
if g := glueMatchers(ms[i:j]); g != nil {
cp := collapse(ms, g, i, j)
r := result{
ms: cp,
matchers: matchersCount(cp),
sumMinLen: sumMinLen(cp),
maxMinLen: maxMinLen(cp),
nesting: maxNestingDepth(cp),
}
if debug.Enabled {
debug.EnterPrefix(
"intermediate: %s (matchers:%d, summinlen:%d, maxminlen:%d, nesting:%d)",
cp, r.matchers, r.sumMinLen, r.maxMinLen, r.nesting,
)
}
if best == nil {
best = new(result)
}
if best.ms == nil || compareResult(r, *best) < 0 {
*best = r
if debug.Enabled {
debug.Logf("new best result")
}
}
best = minimize(cp, 0, 0, best)
if debug.Enabled {
debug.LeavePrefix()
}
}
return minimize(ms, i, j+1, best)
}
func Minimize(ms []Matcher) (m []Matcher) {
if debug.Enabled {
debug.EnterPrefix("minimizing %s", ms)
defer func() {
debug.Logf("-> %s", m)
debug.LeavePrefix()
}()
}
best := minimize(ms, 0, 0, nil)
if best == nil {
return ms
}
return best.ms
}

165
match/optimize_test.go Normal file
View File

@ -0,0 +1,165 @@
package match
import (
"reflect"
"testing"
)
var separators = []rune{'.'}
func TestCompile(t *testing.T) {
for _, test := range []struct {
in []Matcher
exp Matcher
}{
{
[]Matcher{
NewSuper(),
NewSingle(nil),
},
NewMin(1),
},
{
[]Matcher{
NewAny(separators),
NewSingle(separators),
},
NewEveryOf([]Matcher{
NewMin(1),
NewAny(separators),
}),
},
{
[]Matcher{
NewSingle(nil),
NewSingle(nil),
NewSingle(nil),
},
NewEveryOf([]Matcher{
NewMin(3),
NewMax(3),
}),
},
{
[]Matcher{
NewList([]rune{'a'}, true),
NewAny([]rune{'a'}),
},
NewEveryOf([]Matcher{
NewMin(1),
NewAny([]rune{'a'}),
}),
},
{
[]Matcher{
NewSuper(),
NewSingle(separators),
NewText("c"),
},
NewTree(
NewText("c"),
NewTree(
NewSingle(separators),
NewSuper(),
Nothing{},
),
Nothing{},
),
},
{
[]Matcher{
NewAny(nil),
NewText("c"),
NewAny(nil),
},
NewTree(
NewText("c"),
NewAny(nil),
NewAny(nil),
),
},
{
[]Matcher{
NewRange('a', 'c', true),
NewList([]rune{'z', 't', 'e'}, false),
NewText("c"),
NewSingle(nil),
},
NewRow([]MatchIndexSizer{
NewRange('a', 'c', true),
NewList([]rune{'z', 't', 'e'}, false),
NewText("c"),
NewSingle(nil),
}),
},
} {
t.Run("", func(t *testing.T) {
act, err := Compile(test.in)
if err != nil {
t.Fatalf("Compile() error: %s", err)
}
if !reflect.DeepEqual(act, test.exp) {
t.Errorf(
"Compile():\nact: %#v;\nexp: %#v;\ngraphviz:\n%s\n%s",
act, test.exp,
Graphviz("act", act), Graphviz("exp", test.exp),
)
}
})
}
}
func TestMinimize(t *testing.T) {
for _, test := range []struct {
in, exp []Matcher
}{
{
in: []Matcher{
NewRange('a', 'c', true),
NewList([]rune{'z', 't', 'e'}, false),
NewText("c"),
NewSingle(nil),
NewAny(nil),
},
exp: []Matcher{
NewRow([]MatchIndexSizer{
NewRange('a', 'c', true),
NewList([]rune{'z', 't', 'e'}, false),
NewText("c"),
}),
NewMin(1),
},
},
{
in: []Matcher{
NewRange('a', 'c', true),
NewList([]rune{'z', 't', 'e'}, false),
NewText("c"),
NewSingle(nil),
NewAny(nil),
NewSingle(nil),
NewSingle(nil),
NewAny(nil),
},
exp: []Matcher{
NewRow([]MatchIndexSizer{
NewRange('a', 'c', true),
NewList([]rune{'z', 't', 'e'}, false),
NewText("c"),
}),
NewMin(3),
},
},
} {
t.Run("", func(t *testing.T) {
act := Minimize(test.in)
if !reflect.DeepEqual(act, test.exp) {
t.Errorf(
"Minimize():\nact: %#v;\nexp: %#v",
act, test.exp,
)
}
})
}
}

View File

@ -7,20 +7,24 @@ import (
)
type Prefix struct {
Prefix string
s string
minSize int
}
func NewPrefix(p string) Prefix {
return Prefix{p}
return Prefix{
s: p,
minSize: utf8.RuneCountInString(p),
}
}
func (self Prefix) Index(s string) (int, []int) {
idx := strings.Index(s, self.Prefix)
func (p Prefix) Index(s string) (int, []int) {
idx := strings.Index(s, p.s)
if idx == -1 {
return -1, nil
}
length := len(self.Prefix)
length := len(p.s)
var sub string
if len(s) > idx+length {
sub = s[idx+length:]
@ -37,14 +41,14 @@ func (self Prefix) Index(s string) (int, []int) {
return idx, segments
}
func (self Prefix) Len() int {
return lenNo
func (p Prefix) MinLen() int {
return p.minSize
}
func (self Prefix) Match(s string) bool {
return strings.HasPrefix(s, self.Prefix)
func (p Prefix) Match(s string) bool {
return strings.HasPrefix(s, p.s)
}
func (self Prefix) String() string {
return fmt.Sprintf("<prefix:%s>", self.Prefix)
func (p Prefix) String() string {
return fmt.Sprintf("<prefix:%s>", p.s)
}

View File

@ -5,27 +5,28 @@ import (
"strings"
"unicode/utf8"
sutil "git.internal/re/glob/util/strings"
"github.com/gobwas/glob/util/runes"
)
type PrefixAny struct {
Prefix string
Separators []rune
s string
sep []rune
minLen int
}
func NewPrefixAny(s string, sep []rune) PrefixAny {
return PrefixAny{s, sep}
return PrefixAny{s, sep, utf8.RuneCountInString(s)}
}
func (self PrefixAny) Index(s string) (int, []int) {
idx := strings.Index(s, self.Prefix)
func (p PrefixAny) Index(s string) (int, []int) {
idx := strings.Index(s, p.s)
if idx == -1 {
return -1, nil
}
n := len(self.Prefix)
n := len(p.s)
sub := s[idx+n:]
i := sutil.IndexAnyRunes(sub, self.Separators)
i := runes.IndexAnyRune(sub, p.sep)
if i > -1 {
sub = sub[:i]
}
@ -39,17 +40,17 @@ func (self PrefixAny) Index(s string) (int, []int) {
return idx, seg
}
func (self PrefixAny) Len() int {
return lenNo
func (p PrefixAny) MinLen() int {
return p.minLen
}
func (self PrefixAny) Match(s string) bool {
if !strings.HasPrefix(s, self.Prefix) {
func (p PrefixAny) Match(s string) bool {
if !strings.HasPrefix(s, p.s) {
return false
}
return sutil.IndexAnyRunes(s[len(self.Prefix):], self.Separators) == -1
return runes.IndexAnyRune(s[len(p.s):], p.sep) == -1
}
func (self PrefixAny) String() string {
return fmt.Sprintf("<prefix_any:%s![%s]>", self.Prefix, string(self.Separators))
func (p PrefixAny) String() string {
return fmt.Sprintf("<prefix_any:%s![%s]>", p.s, string(p.sep))
}

View File

@ -3,23 +3,27 @@ package match
import (
"fmt"
"strings"
"unicode/utf8"
)
type PrefixSuffix struct {
Prefix, Suffix string
p, s string
minLen int
}
func NewPrefixSuffix(p, s string) PrefixSuffix {
return PrefixSuffix{p, s}
pn := utf8.RuneCountInString(p)
sn := utf8.RuneCountInString(s)
return PrefixSuffix{p, s, pn + sn}
}
func (self PrefixSuffix) Index(s string) (int, []int) {
prefixIdx := strings.Index(s, self.Prefix)
func (ps PrefixSuffix) Index(s string) (int, []int) {
prefixIdx := strings.Index(s, ps.p)
if prefixIdx == -1 {
return -1, nil
}
suffixLen := len(self.Suffix)
suffixLen := len(ps.s)
if suffixLen <= 0 {
return prefixIdx, []int{len(s) - prefixIdx}
}
@ -30,7 +34,7 @@ func (self PrefixSuffix) Index(s string) (int, []int) {
segments := acquireSegments(len(s) - prefixIdx)
for sub := s[prefixIdx:]; ; {
suffixIdx := strings.LastIndex(sub, self.Suffix)
suffixIdx := strings.LastIndex(sub, ps.s)
if suffixIdx == -1 {
break
}
@ -49,14 +53,14 @@ func (self PrefixSuffix) Index(s string) (int, []int) {
return prefixIdx, segments
}
func (self PrefixSuffix) Len() int {
return lenNo
func (ps PrefixSuffix) Match(s string) bool {
return strings.HasPrefix(s, ps.p) && strings.HasSuffix(s, ps.s)
}
func (self PrefixSuffix) Match(s string) bool {
return strings.HasPrefix(s, self.Prefix) && strings.HasSuffix(s, self.Suffix)
func (ps PrefixSuffix) MinLen() int {
return ps.minLen
}
func (self PrefixSuffix) String() string {
return fmt.Sprintf("<prefix_suffix:[%s,%s]>", self.Prefix, self.Suffix)
func (ps PrefixSuffix) String() string {
return fmt.Sprintf("<prefix_suffix:[%s,%s]>", ps.p, ps.s)
}

View File

@ -3,6 +3,8 @@ package match
import (
"fmt"
"unicode/utf8"
"github.com/gobwas/glob/internal/debug"
)
type Range struct {
@ -14,11 +16,19 @@ func NewRange(lo, hi rune, not bool) Range {
return Range{lo, hi, not}
}
func (self Range) Len() int {
return lenOne
func (self Range) MinLen() int {
return 1
}
func (self Range) Match(s string) bool {
func (self Range) RunesCount() int {
return 1
}
func (self Range) Match(s string) (ok bool) {
if debug.Enabled {
done := debug.Matching("range", s)
defer func() { done(ok) }()
}
r, w := utf8.DecodeRuneInString(s)
if len(s) > w {
return false
@ -29,7 +39,11 @@ func (self Range) Match(s string) bool {
return inRange == !self.Not
}
func (self Range) Index(s string) (int, []int) {
func (self Range) Index(s string) (index int, segments []int) {
if debug.Enabled {
done := debug.Indexing("range", s)
defer func() { done(index, segments) }()
}
for i, r := range s {
if self.Not != (r >= self.Lo && r <= self.Hi) {
return i, segmentsByRuneLength[utf8.RuneLen(r)]

View File

@ -2,76 +2,89 @@ package match
import (
"fmt"
"unicode/utf8"
"github.com/gobwas/glob/internal/debug"
"github.com/gobwas/glob/util/runes"
)
type Row struct {
Matchers Matchers
RunesLength int
Segments []int
ms []MatchIndexSizer
runes int
seg []int
}
func NewRow(len int, m ...Matcher) Row {
func NewRow(ms []MatchIndexSizer) Row {
var r int
for _, m := range ms {
r += m.RunesCount()
}
return Row{
Matchers: Matchers(m),
RunesLength: len,
Segments: []int{len},
ms: ms,
runes: r,
seg: []int{r},
}
}
func (self Row) matchAll(s string) bool {
var idx int
for _, m := range self.Matchers {
length := m.Len()
func (r Row) Match(s string) (ok bool) {
if debug.Enabled {
done := debug.Matching("row", s)
defer func() { done(ok) }()
}
if !runes.ExactlyRunesCount(s, r.runes) {
return false
}
return r.matchAll(s)
}
var next, i int
for next = range s[idx:] {
i++
if i == length {
break
}
}
func (r Row) MinLen() int {
return r.runes
}
if i < length || !m.Match(s[idx:idx+next+1]) {
return false
}
func (r Row) RunesCount() int {
return r.runes
}
idx += next + 1
func (r Row) Index(s string) (index int, segments []int) {
if debug.Enabled {
done := debug.Indexing("row", s)
debug.Logf("row: %d vs %d", len(s), r.runes)
defer func() { done(index, segments) }()
}
return true
}
func (self Row) lenOk(s string) bool {
var i int
for range s {
i++
if i > self.RunesLength {
return false
for j := 0; j <= len(s)-r.runes; { // NOTE: using len() here to avoid counting runes.
i, _ := r.ms[0].Index(s[j:])
if i == -1 {
return -1, nil
}
}
return self.RunesLength == i
}
func (self Row) Match(s string) bool {
return self.lenOk(s) && self.matchAll(s)
}
func (self Row) Len() (l int) {
return self.RunesLength
}
func (self Row) Index(s string) (int, []int) {
for i := range s {
if len(s[i:]) < self.RunesLength {
break
}
if self.matchAll(s[i:]) {
return i, self.Segments
if r.matchAll(s[i:]) {
return j + i, r.seg
}
_, x := utf8.DecodeRuneInString(s[i:])
j += x
}
return -1, nil
}
func (self Row) String() string {
return fmt.Sprintf("<row_%d:[%s]>", self.RunesLength, self.Matchers)
func (r Row) Content(cb func(Matcher)) {
for _, m := range r.ms {
cb(m)
}
}
func (r Row) String() string {
return fmt.Sprintf("<row_%d:%s>", r.runes, r.ms)
}
func (r Row) matchAll(s string) bool {
var i int
for _, m := range r.ms {
n := m.RunesCount()
sub := runes.Head(s[i:], n)
if !m.Match(sub) {
return false
}
i += len(sub)
}
return true
}

View File

@ -7,36 +7,33 @@ import (
func TestRowIndex(t *testing.T) {
for id, test := range []struct {
matchers Matchers
length int
matchers []MatchIndexSizer
fixture string
index int
segments []int
}{
{
Matchers{
[]MatchIndexSizer{
NewText("abc"),
NewText("def"),
NewSingle(nil),
},
7,
"qweabcdefghij",
3,
[]int{7},
},
{
Matchers{
[]MatchIndexSizer{
NewText("abc"),
NewText("def"),
NewSingle(nil),
},
7,
"abcd",
-1,
nil,
},
} {
p := NewRow(test.length, test.matchers...)
p := NewRow(test.matchers)
index, segments := p.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
@ -48,15 +45,11 @@ func TestRowIndex(t *testing.T) {
}
func BenchmarkRowIndex(b *testing.B) {
m := NewRow(
7,
Matchers{
NewText("abc"),
NewText("def"),
NewSingle(nil),
}...,
)
m := NewRow([]MatchIndexSizer{
NewText("abc"),
NewText("def"),
NewSingle(nil),
})
for i := 0; i < b.N; i++ {
_, s := m.Index(bench_pattern)
releaseSegments(s)
@ -64,15 +57,11 @@ func BenchmarkRowIndex(b *testing.B) {
}
func BenchmarkIndexRowParallel(b *testing.B) {
m := NewRow(
7,
Matchers{
NewText("abc"),
NewText("def"),
NewSingle(nil),
}...,
)
m := NewRow([]MatchIndexSizer{
NewText("abc"),
NewText("def"),
NewSingle(nil),
})
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_, s := m.Index(bench_pattern)

View File

@ -4,41 +4,46 @@ import (
"fmt"
"unicode/utf8"
"git.internal/re/glob/util/runes"
"github.com/gobwas/glob/util/runes"
)
// single represents ?
type Single struct {
Separators []rune
sep []rune
}
func NewSingle(s []rune) Single {
return Single{s}
}
func (self Single) Match(s string) bool {
r, w := utf8.DecodeRuneInString(s)
if len(s) > w {
func (s Single) Match(v string) bool {
r, w := utf8.DecodeRuneInString(v)
if len(v) > w {
return false
}
return runes.IndexRune(self.Separators, r) == -1
return runes.IndexRune(s.sep, r) == -1
}
func (self Single) Len() int {
return lenOne
func (s Single) MinLen() int {
return 1
}
func (self Single) Index(s string) (int, []int) {
for i, r := range s {
if runes.IndexRune(self.Separators, r) == -1 {
func (s Single) RunesCount() int {
return 1
}
func (s Single) Index(v string) (int, []int) {
for i, r := range v {
if runes.IndexRune(s.sep, r) == -1 {
return i, segmentsByRuneLength[utf8.RuneLen(r)]
}
}
return -1, nil
}
func (self Single) String() string {
return fmt.Sprintf("<single:![%s]>", string(self.Separators))
func (s Single) String() string {
if len(s.sep) == 0 {
return "<single>"
}
return fmt.Sprintf("<single:![%s]>", string(s.sep))
}

View File

@ -3,33 +3,34 @@ package match
import (
"fmt"
"strings"
"unicode/utf8"
)
type Suffix struct {
Suffix string
s string
minLen int
}
func NewSuffix(s string) Suffix {
return Suffix{s}
return Suffix{s, utf8.RuneCountInString(s)}
}
func (self Suffix) Len() int {
return lenNo
func (s Suffix) MinLen() int {
return s.minLen
}
func (self Suffix) Match(s string) bool {
return strings.HasSuffix(s, self.Suffix)
func (s Suffix) Match(v string) bool {
return strings.HasSuffix(v, s.s)
}
func (self Suffix) Index(s string) (int, []int) {
idx := strings.Index(s, self.Suffix)
func (s Suffix) Index(v string) (int, []int) {
idx := strings.Index(v, s.s)
if idx == -1 {
return -1, nil
}
return 0, []int{idx + len(self.Suffix)}
return 0, []int{idx + len(s.s)}
}
func (self Suffix) String() string {
return fmt.Sprintf("<suffix:%s>", self.Suffix)
func (s Suffix) String() string {
return fmt.Sprintf("<suffix:%s>", s.s)
}

View File

@ -3,41 +3,43 @@ package match
import (
"fmt"
"strings"
"unicode/utf8"
sutil "git.internal/re/glob/util/strings"
"github.com/gobwas/glob/util/runes"
)
type SuffixAny struct {
Suffix string
Separators []rune
s string
sep []rune
minLen int
}
func NewSuffixAny(s string, sep []rune) SuffixAny {
return SuffixAny{s, sep}
return SuffixAny{s, sep, utf8.RuneCountInString(s)}
}
func (self SuffixAny) Index(s string) (int, []int) {
idx := strings.Index(s, self.Suffix)
func (s SuffixAny) Index(v string) (int, []int) {
idx := strings.Index(v, s.s)
if idx == -1 {
return -1, nil
}
i := sutil.LastIndexAnyRunes(s[:idx], self.Separators) + 1
i := runes.LastIndexAnyRune(v[:idx], s.sep) + 1
return i, []int{idx + len(self.Suffix) - i}
return i, []int{idx + len(s.s) - i}
}
func (self SuffixAny) Len() int {
return lenNo
func (s SuffixAny) MinLen() int {
return s.minLen
}
func (self SuffixAny) Match(s string) bool {
if !strings.HasSuffix(s, self.Suffix) {
func (s SuffixAny) Match(v string) bool {
if !strings.HasSuffix(v, s.s) {
return false
}
return sutil.IndexAnyRunes(s[:len(s)-len(self.Suffix)], self.Separators) == -1
return runes.IndexAnyRune(v[:len(v)-len(s.s)], s.sep) == -1
}
func (self SuffixAny) String() string {
return fmt.Sprintf("<suffix_any:![%s]%s>", string(self.Separators), self.Suffix)
func (s SuffixAny) String() string {
return fmt.Sprintf("<suffix_any:![%s]%s>", string(s.sep), s.s)
}

View File

@ -10,24 +10,23 @@ func NewSuper() Super {
return Super{}
}
func (self Super) Match(s string) bool {
func (s Super) Match(_ string) bool {
return true
}
func (self Super) Len() int {
return lenNo
func (s Super) MinLen() int {
return 0
}
func (self Super) Index(s string) (int, []int) {
segments := acquireSegments(len(s) + 1)
for i := range s {
segments = append(segments, i)
func (s Super) Index(v string) (int, []int) {
seg := acquireSegments(len(v) + 1)
for i := range v {
seg = append(seg, i)
}
segments = append(segments, len(s))
return 0, segments
seg = append(seg, len(v))
return 0, seg
}
func (self Super) String() string {
func (s Super) String() string {
return fmt.Sprintf("<super>")
}

View File

@ -8,38 +8,45 @@ import (
// raw represents raw string to match
type Text struct {
Str string
RunesLength int
BytesLength int
Segments []int
s string
runes int
bytes int
seg []int
}
func NewText(s string) Text {
return Text{
Str: s,
RunesLength: utf8.RuneCountInString(s),
BytesLength: len(s),
Segments: []int{len(s)},
s: s,
runes: utf8.RuneCountInString(s),
bytes: len(s),
seg: []int{len(s)},
}
}
func (self Text) Match(s string) bool {
return self.Str == s
func (t Text) Match(s string) bool {
return t.s == s
}
func (self Text) Len() int {
return self.RunesLength
}
func (self Text) Index(s string) (int, []int) {
index := strings.Index(s, self.Str)
if index == -1 {
func (t Text) Index(s string) (int, []int) {
i := strings.Index(s, t.s)
if i == -1 {
return -1, nil
}
return index, self.Segments
return i, t.seg
}
func (self Text) String() string {
return fmt.Sprintf("<text:`%v`>", self.Str)
func (t Text) MinLen() int {
return t.runes
}
func (t Text) BytesCount() int {
return t.bytes
}
func (t Text) RunesCount() int {
return t.runes
}
func (t Text) String() string {
return fmt.Sprintf("<text:`%v`>", t.s)
}

173
match/tree.go Normal file
View File

@ -0,0 +1,173 @@
package match
import (
"fmt"
"unicode/utf8"
"github.com/gobwas/glob/internal/debug"
"github.com/gobwas/glob/util/runes"
)
type Tree struct {
value MatchIndexer
left Matcher
right Matcher
minLen int
runes int
vrunes int
lrunes int
rrunes int
}
type SizedTree struct {
Tree
}
type IndexedTree struct {
value MatchIndexer
left MatchIndexer
right MatchIndexer
}
func (st SizedTree) RunesCount() int {
return st.Tree.runes
}
func NewTree(v MatchIndexer, l, r Matcher) Matcher {
tree := Tree{
value: v,
left: l,
right: r,
}
tree.minLen = v.MinLen()
if l != nil {
tree.minLen += l.MinLen()
}
if r != nil {
tree.minLen += r.MinLen()
}
var (
ls, lsz = l.(Sizer)
rs, rsz = r.(Sizer)
vs, vsz = v.(Sizer)
)
if lsz {
tree.lrunes = ls.RunesCount()
}
if rsz {
tree.rrunes = rs.RunesCount()
}
if vsz {
tree.vrunes = vs.RunesCount()
}
//li, lix := l.(MatchIndexer)
//ri, rix := r.(MatchIndexer)
if vsz && lsz && rsz {
tree.runes = tree.vrunes + tree.lrunes + tree.rrunes
return SizedTree{tree}
}
return tree
}
func (t Tree) MinLen() int {
return t.minLen
}
func (t Tree) Content(cb func(Matcher)) {
if t.left != nil {
cb(t.left)
}
cb(t.value)
if t.right != nil {
cb(t.right)
}
}
func (t Tree) Match(s string) (ok bool) {
if debug.Enabled {
done := debug.Matching("tree", s)
defer func() { done(ok) }()
}
n := len(s)
offset, limit := t.offsetLimit(s)
for len(s)-offset-limit >= t.vrunes {
if debug.Enabled {
debug.Logf(
"value %s indexing: %q (offset=%d; limit=%d)",
t.value, s[offset:n-limit], offset, limit,
)
}
index, segments := t.value.Index(s[offset : n-limit])
if debug.Enabled {
debug.Logf(
"value %s index: %d; %v",
t.value, index, segments,
)
}
if index == -1 {
releaseSegments(segments)
return false
}
if debug.Enabled {
debug.Logf("matching left: %q", s[:offset+index])
}
left := t.left.Match(s[:offset+index])
if debug.Enabled {
debug.Logf("matching left: -> %t", left)
}
if left {
for _, seg := range segments {
if debug.Enabled {
debug.Logf("matching right: %q", s[offset+index+seg:])
}
right := t.right.Match(s[offset+index+seg:])
if debug.Enabled {
debug.Logf("matching right: -> %t", right)
}
if right {
releaseSegments(segments)
return true
}
}
}
releaseSegments(segments)
_, x := utf8.DecodeRuneInString(s[offset+index:])
if x == 0 {
// No progress.
break
}
offset = offset + index + x
}
return false
}
// Retuns substring and offset/limit pair in bytes.
func (t Tree) offsetLimit(s string) (offset, limit int) {
n := utf8.RuneCountInString(s)
if t.runes > n {
return 0, 0
}
if n := t.lrunes; n > 0 {
offset = len(runes.Head(s, n))
}
if n := t.rrunes; n > 0 {
limit = len(runes.Tail(s, n))
}
return
}
func (t Tree) String() string {
return fmt.Sprintf(
"<btree:[%v<-%s->%v]>",
t.left, t.value, t.right,
)
}

104
match/tree_test.go Normal file
View File

@ -0,0 +1,104 @@
package match
import (
"fmt"
"testing"
)
func TestTree(t *testing.T) {
for _, test := range []struct {
tree Matcher
str string
exp bool
}{
{
NewTree(NewText("x"), NewText("y"), NewText("z")),
"0000x1111",
false,
},
{
NewTree(NewText("a"), NewSingle(nil), NewSingle(nil)),
"aaa",
true,
},
{
NewTree(NewText("abc"), NewSuper(), NewSuper()),
"abc",
true,
},
{
NewTree(NewText("a"), NewSingle(nil), NewSingle(nil)),
"aaa",
true,
},
{
NewTree(NewText("b"), NewSingle(nil), Nothing{}),
"bbb",
false,
},
{
NewTree(
NewText("c"),
NewTree(
NewSingle(nil),
NewSuper(),
Nothing{},
),
Nothing{},
),
"abc",
true,
},
} {
t.Run("", func(t *testing.T) {
act := test.tree.Match(test.str)
if act != test.exp {
fmt.Println(Graphviz("NIL", test.tree))
t.Errorf("match %q error: act: %t; exp: %t", test.str, act, test.exp)
}
})
}
}
type fakeMatcher struct {
len int
segn int
name string
}
func (f *fakeMatcher) Match(string) bool {
return true
}
func (f *fakeMatcher) Index(s string) (int, []int) {
seg := make([]int, 0, f.segn)
for x := 0; x < f.segn; x++ {
seg = append(seg, f.segn)
}
return 0, seg
}
func (f *fakeMatcher) MinLen() int {
return f.len
}
func (f *fakeMatcher) String() string {
return f.name
}
func BenchmarkMatchTree(b *testing.B) {
l := &fakeMatcher{4, 3, "left_fake"}
r := &fakeMatcher{4, 3, "right_fake"}
v := &fakeMatcher{2, 3, "value_fake"}
// must be <= len(l + r + v)
fixture := "abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij"
bt := NewTree(v, l, r)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
bt.Match(fixture)
}
})
}

21
match/util.go Normal file
View File

@ -0,0 +1,21 @@
package match
func minLen(ms []Matcher) (min int) {
for i, m := range ms {
n := m.MinLen()
if i == 0 || n < min {
min = n
}
}
return min
}
func maxLen(ms []Matcher) (max int) {
for i, m := range ms {
n := m.MinLen()
if i == 0 || n > max {
max = n
}
}
return max
}

View File

@ -7,7 +7,7 @@
## Install
```shell
go get git.internal/re/glob
go get github.com/gobwas/glob
```
## Example
@ -16,7 +16,7 @@
package main
import "git.internal/re/glob"
import "github.com/gobwas/glob"
func main() {
var g glob.Glob
@ -137,8 +137,8 @@ Pattern | Fixture | Match | Speed (ns/op)
`^ab.*ef$` | `abcdef` | `true` | 375
`^ab.*ef$` | `af` | `false` | 145
[godoc-image]: https://godoc.org/git.internal/re/glob?status.svg
[godoc-url]: https://godoc.org/git.internal/re/glob
[godoc-image]: https://godoc.org/github.com/gobwas/glob?status.svg
[godoc-url]: https://godoc.org/github.com/gobwas/glob
[travis-image]: https://travis-ci.org/gobwas/glob.svg?branch=master
[travis-url]: https://travis-ci.org/gobwas/glob

165
syntax/ast/optimize.go Normal file
View File

@ -0,0 +1,165 @@
package ast
import (
"reflect"
)
// Minimize tries to apply some heuristics to minimize number of nodes in given
// t
func Minimize(t *Node) *Node {
switch t.Kind {
case KindAnyOf:
return minimizeAnyOf(t)
default:
return nil
}
}
// minimizeAnyOf tries to find common children of given node of AnyOf pattern
// it searches for common children from left and from right
// if any common children are found then it returns new optimized ast t
// else it returns nil
func minimizeAnyOf(t *Node) *Node {
if !SameKind(t.Children, KindPattern) {
return nil
}
commonLeft, commonRight := CommonChildren(t.Children)
commonLeftCount, commonRightCount := len(commonLeft), len(commonRight)
if commonLeftCount == 0 && commonRightCount == 0 { // there are no common parts
return nil
}
var result []*Node
if commonLeftCount > 0 {
result = append(result, NewNode(KindPattern, nil, commonLeft...))
}
var anyOf []*Node
for _, child := range t.Children {
reuse := child.Children[commonLeftCount : len(child.Children)-commonRightCount]
var node *Node
if len(reuse) == 0 {
// this pattern is completely reduced by commonLeft and commonRight patterns
// so it become nothing
node = NewNode(KindNothing, nil)
} else {
node = NewNode(KindPattern, nil, reuse...)
}
anyOf = AppendUnique(anyOf, node)
}
switch {
case len(anyOf) == 1 && anyOf[0].Kind != KindNothing:
result = append(result, anyOf[0])
case len(anyOf) > 1:
result = append(result, NewNode(KindAnyOf, nil, anyOf...))
}
if commonRightCount > 0 {
result = append(result, NewNode(KindPattern, nil, commonRight...))
}
return NewNode(KindPattern, nil, result...)
}
func CommonChildren(nodes []*Node) (commonLeft, commonRight []*Node) {
if len(nodes) <= 1 {
return
}
// find node that has least number of children
idx := OneWithLeastChildren(nodes)
if idx == -1 {
return
}
tree := nodes[idx]
treeLength := len(tree.Children)
// allocate max able size for rightCommon slice
// to get ability insert elements in reverse order (from end to start)
// without sorting
commonRight = make([]*Node, treeLength)
lastRight := treeLength // will use this to get results as commonRight[lastRight:]
var (
breakLeft bool
breakRight bool
commonTotal int
)
for i, j := 0, treeLength-1; commonTotal < treeLength && j >= 0 && !(breakLeft && breakRight); i, j = i+1, j-1 {
treeLeft := tree.Children[i]
treeRight := tree.Children[j]
for k := 0; k < len(nodes) && !(breakLeft && breakRight); k++ {
// skip least children node
if k == idx {
continue
}
restLeft := nodes[k].Children[i]
restRight := nodes[k].Children[j+len(nodes[k].Children)-treeLength]
breakLeft = breakLeft || !treeLeft.Equal(restLeft)
// disable searching for right common parts, if left part is already overlapping
breakRight = breakRight || (!breakLeft && j <= i)
breakRight = breakRight || !treeRight.Equal(restRight)
}
if !breakLeft {
commonTotal++
commonLeft = append(commonLeft, treeLeft)
}
if !breakRight {
commonTotal++
lastRight = j
commonRight[j] = treeRight
}
}
commonRight = commonRight[lastRight:]
return
}
func AppendUnique(target []*Node, val *Node) []*Node {
for _, n := range target {
if reflect.DeepEqual(n, val) {
return target
}
}
return append(target, val)
}
func SameKind(nodes []*Node, kind Kind) bool {
for _, n := range nodes {
if n.Kind != kind {
return false
}
}
return true
}
func OneWithLeastChildren(nodes []*Node) int {
min := -1
idx := -1
for i, n := range nodes {
if idx == -1 || (len(n.Children) < min) {
min = len(n.Children)
idx = i
}
}
return idx
}
func Equal(a, b []*Node) bool {
if len(a) != len(b) {
return false
}
for i, av := range a {
if !av.Equal(b[i]) {
return false
}
}
return true
}

126
syntax/ast/optimize_test.go Normal file
View File

@ -0,0 +1,126 @@
package ast
import (
"testing"
)
func TestCommonChildren(t *testing.T) {
for _, test := range []struct {
nodes []*Node
left []*Node
right []*Node
}{
{
nodes: []*Node{
NewNode(KindNothing, nil,
NewNode(KindText, Text{"a"}),
NewNode(KindText, Text{"z"}),
NewNode(KindText, Text{"c"}),
),
},
},
{
nodes: []*Node{
NewNode(KindNothing, nil,
NewNode(KindText, Text{"a"}),
NewNode(KindText, Text{"z"}),
NewNode(KindText, Text{"c"}),
),
NewNode(KindNothing, nil,
NewNode(KindText, Text{"a"}),
NewNode(KindText, Text{"b"}),
NewNode(KindText, Text{"c"}),
),
},
left: []*Node{
NewNode(KindText, Text{"a"}),
},
right: []*Node{
NewNode(KindText, Text{"c"}),
},
},
{
nodes: []*Node{
NewNode(KindNothing, nil,
NewNode(KindText, Text{"a"}),
NewNode(KindText, Text{"b"}),
NewNode(KindText, Text{"c"}),
NewNode(KindText, Text{"d"}),
),
NewNode(KindNothing, nil,
NewNode(KindText, Text{"a"}),
NewNode(KindText, Text{"b"}),
NewNode(KindText, Text{"c"}),
NewNode(KindText, Text{"c"}),
NewNode(KindText, Text{"d"}),
),
},
left: []*Node{
NewNode(KindText, Text{"a"}),
NewNode(KindText, Text{"b"}),
},
right: []*Node{
NewNode(KindText, Text{"c"}),
NewNode(KindText, Text{"d"}),
},
},
{
nodes: []*Node{
NewNode(KindNothing, nil,
NewNode(KindText, Text{"a"}),
NewNode(KindText, Text{"b"}),
NewNode(KindText, Text{"c"}),
),
NewNode(KindNothing, nil,
NewNode(KindText, Text{"a"}),
NewNode(KindText, Text{"b"}),
NewNode(KindText, Text{"b"}),
NewNode(KindText, Text{"c"}),
),
},
left: []*Node{
NewNode(KindText, Text{"a"}),
NewNode(KindText, Text{"b"}),
},
right: []*Node{
NewNode(KindText, Text{"c"}),
},
},
{
nodes: []*Node{
NewNode(KindNothing, nil,
NewNode(KindText, Text{"a"}),
NewNode(KindText, Text{"d"}),
),
NewNode(KindNothing, nil,
NewNode(KindText, Text{"a"}),
NewNode(KindText, Text{"d"}),
),
NewNode(KindNothing, nil,
NewNode(KindText, Text{"a"}),
NewNode(KindText, Text{"e"}),
),
},
left: []*Node{
NewNode(KindText, Text{"a"}),
},
right: []*Node{},
},
} {
t.Run("", func(t *testing.T) {
left, right := CommonChildren(test.nodes)
if !Equal(left, test.left) {
t.Errorf(
"left, right := commonChildren(); left = %v; want %v",
left, test.left,
)
}
if !Equal(right, test.right) {
t.Errorf(
"left, right := commonChildren(); right = %v; want %v",
right, test.right,
)
}
})
}
}

View File

@ -5,7 +5,7 @@ import (
"fmt"
"unicode/utf8"
"git.internal/re/glob/syntax/lexer"
"github.com/gobwas/glob/syntax/lexer"
)
type Lexer interface {

View File

@ -4,7 +4,7 @@ import (
"reflect"
"testing"
"git.internal/re/glob/syntax/lexer"
"github.com/gobwas/glob/syntax/lexer"
)
type stubLexer struct {
@ -27,7 +27,7 @@ func TestParseString(t *testing.T) {
tree *Node
}{
{
// pattern: "abc",
//pattern: "abc",
tokens: []lexer.Token{
{lexer.Text, "abc"},
{lexer.EOF, ""},
@ -37,7 +37,7 @@ func TestParseString(t *testing.T) {
),
},
{
// pattern: "a*c",
//pattern: "a*c",
tokens: []lexer.Token{
{lexer.Text, "a"},
{lexer.Any, "*"},
@ -51,7 +51,7 @@ func TestParseString(t *testing.T) {
),
},
{
// pattern: "a**c",
//pattern: "a**c",
tokens: []lexer.Token{
{lexer.Text, "a"},
{lexer.Super, "**"},
@ -65,7 +65,7 @@ func TestParseString(t *testing.T) {
),
},
{
// pattern: "a?c",
//pattern: "a?c",
tokens: []lexer.Token{
{lexer.Text, "a"},
{lexer.Single, "?"},
@ -79,7 +79,7 @@ func TestParseString(t *testing.T) {
),
},
{
// pattern: "[!a-z]",
//pattern: "[!a-z]",
tokens: []lexer.Token{
{lexer.RangeOpen, "["},
{lexer.Not, "!"},
@ -94,7 +94,7 @@ func TestParseString(t *testing.T) {
),
},
{
// pattern: "[az]",
//pattern: "[az]",
tokens: []lexer.Token{
{lexer.RangeOpen, "["},
{lexer.Text, "az"},
@ -106,7 +106,7 @@ func TestParseString(t *testing.T) {
),
},
{
// pattern: "{a,z}",
//pattern: "{a,z}",
tokens: []lexer.Token{
{lexer.TermsOpen, "{"},
{lexer.Text, "a"},
@ -127,7 +127,7 @@ func TestParseString(t *testing.T) {
),
},
{
// pattern: "/{z,ab}*",
//pattern: "/{z,ab}*",
tokens: []lexer.Token{
{lexer.Text, "/"},
{lexer.TermsOpen, "{"},
@ -152,7 +152,7 @@ func TestParseString(t *testing.T) {
),
},
{
// pattern: "{a,{x,y},?,[a-z],[!qwe]}",
//pattern: "{a,{x,y},?,[a-z],[!qwe]}",
tokens: []lexer.Token{
{lexer.TermsOpen, "{"},
{lexer.Text, "a"},

View File

@ -3,9 +3,8 @@ package lexer
import (
"bytes"
"fmt"
"github.com/gobwas/glob/util/runes"
"unicode/utf8"
"git.internal/re/glob/util/runes"
)
const (
@ -147,10 +146,8 @@ func (l *lexer) termsLeave() {
l.termsLevel--
}
var (
inTextBreakers = []rune{char_single, char_any, char_range_open, char_terms_open}
inTermsBreakers = append(inTextBreakers, char_terms_close, char_comma)
)
var inTextBreakers = []rune{char_single, char_any, char_range_open, char_terms_open}
var inTermsBreakers = append(inTextBreakers, char_terms_close, char_comma)
func (l *lexer) fetchItem() {
r := l.read()

View File

@ -1,8 +1,8 @@
package syntax
import (
"git.internal/re/glob/syntax/ast"
"git.internal/re/glob/syntax/lexer"
"github.com/gobwas/glob/syntax/ast"
"github.com/gobwas/glob/syntax/lexer"
)
func Parse(s string) (*ast.Node, error) {

View File

@ -1,5 +1,98 @@
package runes
import (
"strings"
"unicode/utf8"
)
func Head(s string, r int) string {
var i, m int
for i < len(s) {
_, n := utf8.DecodeRuneInString(s[i:])
i += n
m += 1
if m == r {
break
}
}
return s[:i]
}
func Tail(s string, r int) string {
var i, n int
for i = len(s); i >= 0; {
var ok bool
for j := 1; j <= 4 && i-j >= 0; j++ {
v, _ := utf8.DecodeRuneInString(s[i-j:])
if v != utf8.RuneError {
i -= j
n++
ok = true
break
}
}
if !ok || n == r {
return s[i:]
}
}
return s[i:]
}
func ExactlyRunesCount(s string, n int) bool {
var m int
for range s {
m++
if m > n {
return false
}
}
return m == n
}
func AtLeastRunesCount(s string, n int) bool {
var m int
for range s {
m++
if m >= n {
return true
}
}
return false
}
func IndexAnyRune(s string, rs []rune) int {
for _, r := range rs {
if i := strings.IndexRune(s, r); i != -1 {
return i
}
}
return -1
}
func LastIndexAnyRune(s string, rs []rune) int {
for _, r := range rs {
i := -1
if 0 <= r && r < utf8.RuneSelf {
i = strings.LastIndexByte(s, byte(r))
} else {
sub := s
for len(sub) > 0 {
j := strings.IndexRune(s, r)
if j == -1 {
break
}
i = j
sub = sub[i+1:]
}
}
if i != -1 {
return i
}
}
return -1
}
func Index(s, needle []rune) int {
ls, ln := len(s), len(needle)
@ -130,6 +223,7 @@ func IndexLastRune(s []rune, r rune) int {
}
func Equal(a, b []rune) bool {
// TODO use bytes.Equal with unsafe.
if len(a) == len(b) {
for i := 0; i < len(a); i++ {
if a[i] != b[i] {

View File

@ -1,39 +0,0 @@
package strings
import (
"strings"
"unicode/utf8"
)
func IndexAnyRunes(s string, rs []rune) int {
for _, r := range rs {
if i := strings.IndexRune(s, r); i != -1 {
return i
}
}
return -1
}
func LastIndexAnyRunes(s string, rs []rune) int {
for _, r := range rs {
i := -1
if 0 <= r && r < utf8.RuneSelf {
i = strings.LastIndexByte(s, byte(r))
} else {
sub := s
for len(sub) > 0 {
j := strings.IndexRune(s, r)
if j == -1 {
break
}
i = j
sub = sub[i+1:]
}
}
if i != -1 {
return i
}
}
return -1
}