glob/compiler/compiler.go

526 lines
11 KiB
Go
Raw Normal View History

2016-05-30 19:35:53 +03:00
package compiler
2016-01-08 20:14:31 +03:00
2016-02-23 14:46:32 +03:00
// TODO use constructor with all matchers, and to their structs private
2016-02-25 00:41:52 +03:00
// TODO glue multiple Text nodes (like after QuoteMeta)
2016-02-23 14:46:32 +03:00
2016-01-08 20:14:31 +03:00
import (
"fmt"
2016-12-07 10:55:55 +03:00
"reflect"
2022-12-12 17:24:35 +03:00
"git.internal/re/glob/match"
"git.internal/re/glob/syntax/ast"
"git.internal/re/glob/util/runes"
2016-01-08 20:14:31 +03:00
)
2016-05-30 19:35:53 +03:00
func optimizeMatcher(matcher match.Matcher) match.Matcher {
2016-01-08 20:14:31 +03:00
switch m := matcher.(type) {
case match.Any:
2016-02-02 14:57:42 +03:00
if len(m.Separators) == 0 {
return match.NewSuper()
2016-01-08 20:14:31 +03:00
}
2016-01-15 19:50:12 +03:00
case match.AnyOf:
if len(m.Matchers) == 1 {
return m.Matchers[0]
}
return m
2016-01-19 20:52:25 +03:00
case match.List:
2016-02-02 22:03:37 +03:00
if m.Not == false && len(m.List) == 1 {
return match.NewText(string(m.List))
2016-01-19 20:52:25 +03:00
}
return m
2016-01-08 20:14:31 +03:00
case match.BTree:
2016-05-30 19:35:53 +03:00
m.Left = optimizeMatcher(m.Left)
m.Right = optimizeMatcher(m.Right)
2016-01-08 20:14:31 +03:00
2016-01-14 21:32:02 +03:00
r, ok := m.Value.(match.Text)
2016-01-08 20:14:31 +03:00
if !ok {
return m
}
var (
leftNil = m.Left == nil
rightNil = m.Right == nil
)
2016-01-08 20:14:31 +03:00
if leftNil && rightNil {
2016-01-14 21:32:02 +03:00
return match.NewText(r.Str)
2016-01-08 20:14:31 +03:00
}
_, leftSuper := m.Left.(match.Super)
lp, leftPrefix := m.Left.(match.Prefix)
la, leftAny := m.Left.(match.Any)
2016-01-08 20:14:31 +03:00
_, rightSuper := m.Right.(match.Super)
rs, rightSuffix := m.Right.(match.Suffix)
ra, rightAny := m.Right.(match.Any)
2016-01-08 20:14:31 +03:00
switch {
case leftSuper && rightSuper:
return match.NewContains(r.Str, false)
2016-01-08 20:14:31 +03:00
case leftSuper && rightNil:
return match.NewSuffix(r.Str)
2016-01-08 20:14:31 +03:00
case rightSuper && leftNil:
return match.NewPrefix(r.Str)
2016-01-08 20:14:31 +03:00
case leftNil && rightSuffix:
return match.NewPrefixSuffix(r.Str, rs.Suffix)
2016-01-08 20:14:31 +03:00
case rightNil && leftPrefix:
return match.NewPrefixSuffix(lp.Prefix, r.Str)
case rightNil && leftAny:
return match.NewSuffixAny(r.Str, la.Separators)
case leftNil && rightAny:
return match.NewPrefixAny(r.Str, ra.Separators)
2016-01-08 20:14:31 +03:00
}
return m
}
return matcher
}
2016-05-30 19:35:53 +03:00
func compileMatchers(matchers []match.Matcher) (match.Matcher, error) {
if len(matchers) == 0 {
return nil, fmt.Errorf("compile error: need at least one matcher")
}
if len(matchers) == 1 {
return matchers[0], nil
}
if m := glueMatchers(matchers); m != nil {
return m, nil
}
idx := -1
2016-01-09 02:34:41 +03:00
maxLen := -1
2016-05-30 19:35:53 +03:00
var val match.Matcher
for i, matcher := range matchers {
if l := matcher.Len(); l != -1 && l >= maxLen {
maxLen = l
idx = i
val = matcher
}
}
2016-01-09 02:34:41 +03:00
2016-05-30 19:35:53 +03:00
if val == nil { // not found matcher with static length
r, err := compileMatchers(matchers[1:])
if err != nil {
return nil, err
}
return match.NewBTree(matchers[0], nil, r), nil
2016-01-09 02:34:41 +03:00
}
2016-05-30 19:35:53 +03:00
left := matchers[:idx]
var right []match.Matcher
if len(matchers) > idx+1 {
right = matchers[idx+1:]
2016-01-09 02:34:41 +03:00
}
2016-05-30 19:35:53 +03:00
var l, r match.Matcher
var err error
if len(left) > 0 {
l, err = compileMatchers(left)
if err != nil {
return nil, err
}
}
if len(right) > 0 {
r, err = compileMatchers(right)
if err != nil {
return nil, err
2016-01-09 02:34:41 +03:00
}
}
2016-05-30 19:35:53 +03:00
return match.NewBTree(val, l, r), nil
}
func glueMatchers(matchers []match.Matcher) match.Matcher {
if m := glueMatchersAsEvery(matchers); m != nil {
return m
}
if m := glueMatchersAsRow(matchers); m != nil {
return m
}
return nil
2016-01-09 02:34:41 +03:00
}
2016-05-30 19:35:53 +03:00
func glueMatchersAsRow(matchers []match.Matcher) match.Matcher {
2016-01-13 20:13:11 +03:00
if len(matchers) <= 1 {
2016-01-09 02:34:41 +03:00
return nil
}
2016-01-14 18:29:13 +03:00
var (
c []match.Matcher
l int
)
2016-01-09 02:34:41 +03:00
for _, matcher := range matchers {
2016-01-14 18:29:13 +03:00
if ml := matcher.Len(); ml == -1 {
2016-01-09 02:34:41 +03:00
return nil
2016-01-14 18:29:13 +03:00
} else {
c = append(c, matcher)
l += ml
2016-01-09 02:34:41 +03:00
}
}
return match.NewRow(l, c...)
2016-01-09 02:34:41 +03:00
}
2016-05-30 19:35:53 +03:00
func glueMatchersAsEvery(matchers []match.Matcher) match.Matcher {
2016-01-13 20:13:11 +03:00
if len(matchers) <= 1 {
2016-01-08 20:14:31 +03:00
return nil
}
var (
hasAny bool
hasSuper bool
hasSingle bool
min int
2016-02-02 14:57:42 +03:00
separator []rune
2016-01-08 20:14:31 +03:00
)
for i, matcher := range matchers {
2016-02-02 14:57:42 +03:00
var sep []rune
2016-01-08 20:14:31 +03:00
2016-02-02 14:57:42 +03:00
switch m := matcher.(type) {
2016-01-08 20:14:31 +03:00
case match.Super:
2016-02-02 14:57:42 +03:00
sep = []rune{}
2016-01-08 20:14:31 +03:00
hasSuper = true
case match.Any:
sep = m.Separators
hasAny = true
case match.Single:
sep = m.Separators
hasSingle = true
min++
case match.List:
if !m.Not {
return nil
}
sep = m.List
hasSingle = true
min++
default:
return nil
}
// initialize
if i == 0 {
separator = sep
}
2016-02-02 22:03:37 +03:00
if runes.Equal(sep, separator) {
2016-01-08 20:14:31 +03:00
continue
}
return nil
}
if hasSuper && !hasAny && !hasSingle {
return match.NewSuper()
2016-01-08 20:14:31 +03:00
}
if hasAny && !hasSuper && !hasSingle {
return match.NewAny(separator)
2016-01-08 20:14:31 +03:00
}
2016-02-02 22:03:37 +03:00
if (hasAny || hasSuper) && min > 0 && len(separator) == 0 {
return match.NewMin(min)
2016-01-08 20:14:31 +03:00
}
every := match.NewEveryOf()
2016-01-08 20:14:31 +03:00
if min > 0 {
every.Add(match.NewMin(min))
2016-01-08 20:14:31 +03:00
if !hasAny && !hasSuper {
every.Add(match.NewMax(min))
2016-01-08 20:14:31 +03:00
}
}
2016-02-02 22:03:37 +03:00
if len(separator) > 0 {
every.Add(match.NewContains(string(separator), true))
2016-01-08 20:14:31 +03:00
}
return every
}
2016-01-14 18:29:13 +03:00
func minimizeMatchers(matchers []match.Matcher) []match.Matcher {
2016-01-13 20:13:11 +03:00
var done match.Matcher
var left, right, count int
for l := 0; l < len(matchers); l++ {
for r := len(matchers); r > l; r-- {
if glued := glueMatchers(matchers[l:r]); glued != nil {
2016-01-13 20:26:39 +03:00
var swap bool
if done == nil {
swap = true
} else {
cl, gl := done.Len(), glued.Len()
swap = cl > -1 && gl > -1 && gl > cl
swap = swap || count < r-l
}
if swap {
2016-01-13 20:13:11 +03:00
done = glued
left = l
right = r
count = r - l
}
}
2016-01-09 02:34:41 +03:00
}
}
2016-01-13 20:13:11 +03:00
if done == nil {
return matchers
2016-01-09 02:34:41 +03:00
}
2016-01-13 20:13:11 +03:00
next := append(append([]match.Matcher{}, matchers[:left]...), done)
if right < len(matchers) {
next = append(next, matchers[right:]...)
}
if len(next) == len(matchers) {
return next
}
2016-01-14 18:29:13 +03:00
return minimizeMatchers(next)
2016-01-09 02:34:41 +03:00
}
2016-05-30 19:35:53 +03:00
// minimizeAnyOf tries to apply some heuristics to minimize number of nodes in given tree
func minimizeTree(tree *ast.Node) *ast.Node {
switch tree.Kind {
case ast.KindAnyOf:
return minimizeTreeAnyOf(tree)
default:
return nil
2016-01-15 19:50:12 +03:00
}
2016-05-30 19:35:53 +03:00
}
2016-01-15 19:50:12 +03:00
2016-05-30 19:35:53 +03:00
// minimizeAnyOf tries to find common children of given node of AnyOf pattern
// it searches for common children from left and from right
// if any common children are found then it returns new optimized ast tree
// else it returns nil
func minimizeTreeAnyOf(tree *ast.Node) *ast.Node {
if !areOfSameKind(tree.Children, ast.KindPattern) {
return nil
2016-01-15 19:50:12 +03:00
}
2016-05-30 19:35:53 +03:00
commonLeft, commonRight := commonChildren(tree.Children)
commonLeftCount, commonRightCount := len(commonLeft), len(commonRight)
if commonLeftCount == 0 && commonRightCount == 0 { // there are no common parts
2016-01-15 19:50:12 +03:00
return nil
}
2016-05-30 19:35:53 +03:00
var result []*ast.Node
2016-01-15 19:50:12 +03:00
if commonLeftCount > 0 {
2016-05-30 19:35:53 +03:00
result = append(result, ast.NewNode(ast.KindPattern, nil, commonLeft...))
2016-01-15 19:50:12 +03:00
}
2016-05-30 19:35:53 +03:00
var anyOf []*ast.Node
for _, child := range tree.Children {
reuse := child.Children[commonLeftCount : len(child.Children)-commonRightCount]
var node *ast.Node
if len(reuse) == 0 {
// this pattern is completely reduced by commonLeft and commonRight patterns
// so it become nothing
node = ast.NewNode(ast.KindNothing, nil)
2016-01-15 19:50:12 +03:00
} else {
2016-05-30 19:35:53 +03:00
node = ast.NewNode(ast.KindPattern, nil, reuse...)
2016-01-15 19:50:12 +03:00
}
2016-05-30 19:35:53 +03:00
anyOf = appendIfUnique(anyOf, node)
2016-01-15 19:50:12 +03:00
}
2016-05-30 19:35:53 +03:00
switch {
case len(anyOf) == 1 && anyOf[0].Kind != ast.KindNothing:
result = append(result, anyOf[0])
case len(anyOf) > 1:
result = append(result, ast.NewNode(ast.KindAnyOf, nil, anyOf...))
2016-01-15 19:50:12 +03:00
}
if commonRightCount > 0 {
2016-05-30 19:35:53 +03:00
result = append(result, ast.NewNode(ast.KindPattern, nil, commonRight...))
2016-01-15 19:50:12 +03:00
}
2016-05-30 19:35:53 +03:00
return ast.NewNode(ast.KindPattern, nil, result...)
2016-01-15 19:50:12 +03:00
}
2016-05-30 19:35:53 +03:00
func commonChildren(nodes []*ast.Node) (commonLeft, commonRight []*ast.Node) {
2016-05-31 11:28:02 +03:00
if len(nodes) <= 1 {
return
}
2016-05-30 19:35:53 +03:00
// find node that has least number of children
idx := leastChildren(nodes)
if idx == -1 {
2016-05-31 11:28:02 +03:00
return
2016-01-15 19:50:12 +03:00
}
2016-05-30 19:35:53 +03:00
tree := nodes[idx]
2016-05-31 11:28:02 +03:00
treeLength := len(tree.Children)
// allocate max able size for rightCommon slice
// to get ability insert elements in reverse order (from end to start)
// without sorting
commonRight = make([]*ast.Node, treeLength)
lastRight := treeLength // will use this to get results as commonRight[lastRight:]
2016-01-15 19:50:12 +03:00
2016-05-30 19:35:53 +03:00
var (
2016-05-31 11:28:02 +03:00
breakLeft bool
breakRight bool
commonTotal int
2016-05-30 19:35:53 +03:00
)
2016-12-07 10:55:55 +03:00
for i, j := 0, treeLength-1; commonTotal < treeLength && j >= 0 && !(breakLeft && breakRight); i, j = i+1, j-1 {
2016-05-30 19:35:53 +03:00
treeLeft := tree.Children[i]
treeRight := tree.Children[j]
2016-01-15 19:50:12 +03:00
2016-12-07 10:55:55 +03:00
for k := 0; k < len(nodes) && !(breakLeft && breakRight); k++ {
2016-05-30 19:35:53 +03:00
// skip least children node
if k == idx {
continue
}
2016-01-13 20:13:11 +03:00
2016-05-30 19:35:53 +03:00
restLeft := nodes[k].Children[i]
2016-05-31 11:28:02 +03:00
restRight := nodes[k].Children[j+len(nodes[k].Children)-treeLength]
2016-01-08 20:14:31 +03:00
2016-05-30 19:35:53 +03:00
breakLeft = breakLeft || !treeLeft.Equal(restLeft)
// disable searching for right common parts, if left part is already overlapping
breakRight = breakRight || (!breakLeft && j <= i)
breakRight = breakRight || !treeRight.Equal(restRight)
}
2016-05-31 11:28:02 +03:00
2016-05-30 19:35:53 +03:00
if !breakLeft {
2016-05-31 11:28:02 +03:00
commonTotal++
2016-05-30 19:35:53 +03:00
commonLeft = append(commonLeft, treeLeft)
}
if !breakRight {
2016-05-31 11:28:02 +03:00
commonTotal++
lastRight = j
commonRight[j] = treeRight
2016-01-08 20:14:31 +03:00
}
}
2016-05-31 11:28:02 +03:00
commonRight = commonRight[lastRight:]
2016-05-30 19:35:53 +03:00
return
}
2016-01-08 20:14:31 +03:00
2016-05-30 19:35:53 +03:00
func appendIfUnique(target []*ast.Node, val *ast.Node) []*ast.Node {
for _, n := range target {
if reflect.DeepEqual(n, val) {
return target
2016-05-14 21:38:00 +03:00
}
}
2016-05-30 19:35:53 +03:00
return append(target, val)
}
2016-05-14 21:38:00 +03:00
2016-05-30 19:35:53 +03:00
func areOfSameKind(nodes []*ast.Node, kind ast.Kind) bool {
for _, n := range nodes {
if n.Kind != kind {
return false
}
2016-01-08 20:14:31 +03:00
}
2016-05-30 19:35:53 +03:00
return true
}
2016-01-08 20:14:31 +03:00
2016-05-30 19:35:53 +03:00
func leastChildren(nodes []*ast.Node) int {
min := -1
idx := -1
for i, n := range nodes {
if idx == -1 || (len(n.Children) < min) {
min = len(n.Children)
idx = i
2016-01-08 20:14:31 +03:00
}
}
2016-05-30 19:35:53 +03:00
return idx
}
2016-01-08 20:14:31 +03:00
2016-05-30 19:35:53 +03:00
func compileTreeChildren(tree *ast.Node, sep []rune) ([]match.Matcher, error) {
var matchers []match.Matcher
for _, desc := range tree.Children {
m, err := compile(desc, sep)
2016-01-08 20:14:31 +03:00
if err != nil {
return nil, err
}
2016-05-30 19:35:53 +03:00
matchers = append(matchers, optimizeMatcher(m))
2016-01-08 20:14:31 +03:00
}
2016-05-30 19:35:53 +03:00
return matchers, nil
2016-01-08 20:14:31 +03:00
}
2016-05-30 19:35:53 +03:00
func compile(tree *ast.Node, sep []rune) (m match.Matcher, err error) {
switch tree.Kind {
case ast.KindAnyOf:
// todo this could be faster on pattern_alternatives_combine_lite (see glob_test.go)
if n := minimizeTree(tree); n != nil {
return compile(n, sep)
2016-01-15 19:50:12 +03:00
}
2016-05-30 19:35:53 +03:00
matchers, err := compileTreeChildren(tree, sep)
if err != nil {
return nil, err
2016-01-08 20:14:31 +03:00
}
return match.NewAnyOf(matchers...), nil
2016-01-15 19:50:12 +03:00
2016-05-30 19:35:53 +03:00
case ast.KindPattern:
if len(tree.Children) == 0 {
return match.NewNothing(), nil
2016-01-15 19:50:12 +03:00
}
2016-05-30 19:35:53 +03:00
matchers, err := compileTreeChildren(tree, sep)
if err != nil {
return nil, err
2016-01-15 19:50:12 +03:00
}
m, err = compileMatchers(minimizeMatchers(matchers))
if err != nil {
return nil, err
2016-01-08 20:14:31 +03:00
}
2016-05-30 19:35:53 +03:00
case ast.KindAny:
m = match.NewAny(sep)
2016-01-08 20:14:31 +03:00
2016-05-30 19:35:53 +03:00
case ast.KindSuper:
m = match.NewSuper()
2016-01-08 20:14:31 +03:00
2016-05-30 19:35:53 +03:00
case ast.KindSingle:
m = match.NewSingle(sep)
2016-01-08 20:14:31 +03:00
2016-05-30 19:35:53 +03:00
case ast.KindNothing:
m = match.NewNothing()
case ast.KindList:
l := tree.Value.(ast.List)
m = match.NewList([]rune(l.Chars), l.Not)
2016-01-08 20:14:31 +03:00
2016-05-30 19:35:53 +03:00
case ast.KindRange:
r := tree.Value.(ast.Range)
m = match.NewRange(r.Lo, r.Hi, r.Not)
2016-01-08 20:14:31 +03:00
2016-05-30 19:35:53 +03:00
case ast.KindText:
t := tree.Value.(ast.Text)
m = match.NewText(t.Text)
2016-01-08 20:14:31 +03:00
default:
return nil, fmt.Errorf("could not compile tree: unknown node type")
}
2016-05-30 19:35:53 +03:00
return optimizeMatcher(m), nil
2016-01-08 20:14:31 +03:00
}
2016-05-30 19:35:53 +03:00
func Compile(tree *ast.Node, sep []rune) (match.Matcher, error) {
m, err := compile(tree, sep)
2016-01-08 20:14:31 +03:00
if err != nil {
return nil, err
}
2016-05-30 19:35:53 +03:00
return m, nil
2016-01-08 20:14:31 +03:00
}