glob/compiler.go

682 lines
12 KiB
Go
Raw Normal View History

2016-01-08 20:14:31 +03:00
package glob
2016-02-23 14:46:32 +03:00
// TODO use constructor with all matchers, and to their structs private
2016-01-08 20:14:31 +03:00
import (
"fmt"
"github.com/gobwas/glob/match"
2016-02-02 22:03:37 +03:00
"github.com/gobwas/glob/runes"
2016-01-15 19:50:12 +03:00
"reflect"
2016-01-08 20:14:31 +03:00
)
func optimize(matcher match.Matcher) match.Matcher {
switch m := matcher.(type) {
case match.Any:
2016-02-02 14:57:42 +03:00
if len(m.Separators) == 0 {
return match.NewSuper()
2016-01-08 20:14:31 +03:00
}
2016-01-15 19:50:12 +03:00
case match.AnyOf:
if len(m.Matchers) == 1 {
return m.Matchers[0]
}
return m
2016-01-19 20:52:25 +03:00
case match.List:
2016-02-02 22:03:37 +03:00
if m.Not == false && len(m.List) == 1 {
return match.NewText(string(m.List))
2016-01-19 20:52:25 +03:00
}
return m
2016-01-08 20:14:31 +03:00
case match.BTree:
m.Left = optimize(m.Left)
m.Right = optimize(m.Right)
2016-01-14 21:32:02 +03:00
r, ok := m.Value.(match.Text)
2016-01-08 20:14:31 +03:00
if !ok {
return m
}
leftNil := m.Left == nil
rightNil := m.Right == nil
if leftNil && rightNil {
2016-01-14 21:32:02 +03:00
return match.NewText(r.Str)
2016-01-08 20:14:31 +03:00
}
_, leftSuper := m.Left.(match.Super)
lp, leftPrefix := m.Left.(match.Prefix)
_, rightSuper := m.Right.(match.Super)
rs, rightSuffix := m.Right.(match.Suffix)
if leftSuper && rightSuper {
return match.NewContains(r.Str, false)
2016-01-08 20:14:31 +03:00
}
if leftSuper && rightNil {
return match.NewSuffix(r.Str)
2016-01-08 20:14:31 +03:00
}
if rightSuper && leftNil {
return match.NewPrefix(r.Str)
2016-01-08 20:14:31 +03:00
}
if leftNil && rightSuffix {
return match.NewPrefixSuffix(r.Str, rs.Suffix)
2016-01-08 20:14:31 +03:00
}
if rightNil && leftPrefix {
return match.NewPrefixSuffix(lp.Prefix, r.Str)
2016-01-08 20:14:31 +03:00
}
return m
}
return matcher
}
func glueMatchers(matchers []match.Matcher) match.Matcher {
2016-01-09 02:34:41 +03:00
var (
glued []match.Matcher
winner match.Matcher
)
maxLen := -1
if m := glueAsEvery(matchers); m != nil {
glued = append(glued, m)
return m
}
if m := glueAsRow(matchers); m != nil {
glued = append(glued, m)
return m
}
for _, g := range glued {
if l := g.Len(); l > maxLen {
maxLen = l
winner = g
}
}
return winner
}
func glueAsRow(matchers []match.Matcher) match.Matcher {
2016-01-13 20:13:11 +03:00
if len(matchers) <= 1 {
2016-01-09 02:34:41 +03:00
return nil
}
2016-01-14 18:29:13 +03:00
var (
c []match.Matcher
l int
)
2016-01-09 02:34:41 +03:00
for _, matcher := range matchers {
2016-01-14 18:29:13 +03:00
if ml := matcher.Len(); ml == -1 {
2016-01-09 02:34:41 +03:00
return nil
2016-01-14 18:29:13 +03:00
} else {
c = append(c, matcher)
l += ml
2016-01-09 02:34:41 +03:00
}
}
return match.NewRow(l, c...)
2016-01-09 02:34:41 +03:00
}
func glueAsEvery(matchers []match.Matcher) match.Matcher {
2016-01-13 20:13:11 +03:00
if len(matchers) <= 1 {
2016-01-08 20:14:31 +03:00
return nil
}
var (
hasAny bool
hasSuper bool
hasSingle bool
min int
2016-02-02 14:57:42 +03:00
separator []rune
2016-01-08 20:14:31 +03:00
)
for i, matcher := range matchers {
2016-02-02 14:57:42 +03:00
var sep []rune
2016-01-08 20:14:31 +03:00
2016-02-02 14:57:42 +03:00
switch m := matcher.(type) {
2016-01-08 20:14:31 +03:00
case match.Super:
2016-02-02 14:57:42 +03:00
sep = []rune{}
2016-01-08 20:14:31 +03:00
hasSuper = true
case match.Any:
sep = m.Separators
hasAny = true
case match.Single:
sep = m.Separators
hasSingle = true
min++
case match.List:
if !m.Not {
return nil
}
sep = m.List
hasSingle = true
min++
default:
return nil
}
// initialize
if i == 0 {
separator = sep
}
2016-02-02 22:03:37 +03:00
if runes.Equal(sep, separator) {
2016-01-08 20:14:31 +03:00
continue
}
return nil
}
if hasSuper && !hasAny && !hasSingle {
return match.NewSuper()
2016-01-08 20:14:31 +03:00
}
if hasAny && !hasSuper && !hasSingle {
return match.NewAny(separator)
2016-01-08 20:14:31 +03:00
}
2016-02-02 22:03:37 +03:00
if (hasAny || hasSuper) && min > 0 && len(separator) == 0 {
return match.NewMin(min)
2016-01-08 20:14:31 +03:00
}
every := match.NewEveryOf()
2016-01-08 20:14:31 +03:00
if min > 0 {
every.Add(match.NewMin(min))
2016-01-08 20:14:31 +03:00
if !hasAny && !hasSuper {
every.Add(match.NewMax(min))
2016-01-08 20:14:31 +03:00
}
}
2016-02-02 22:03:37 +03:00
if len(separator) > 0 {
every.Add(match.NewContains(string(separator), true))
2016-01-08 20:14:31 +03:00
}
return every
}
2016-01-14 18:29:13 +03:00
func minimizeMatchers(matchers []match.Matcher) []match.Matcher {
2016-01-13 20:13:11 +03:00
var done match.Matcher
var left, right, count int
for l := 0; l < len(matchers); l++ {
for r := len(matchers); r > l; r-- {
if glued := glueMatchers(matchers[l:r]); glued != nil {
2016-01-13 20:26:39 +03:00
var swap bool
if done == nil {
swap = true
} else {
cl, gl := done.Len(), glued.Len()
swap = cl > -1 && gl > -1 && gl > cl
swap = swap || count < r-l
}
if swap {
2016-01-13 20:13:11 +03:00
done = glued
left = l
right = r
count = r - l
}
}
2016-01-09 02:34:41 +03:00
}
}
2016-01-13 20:13:11 +03:00
if done == nil {
return matchers
2016-01-09 02:34:41 +03:00
}
2016-01-13 20:13:11 +03:00
next := append(append([]match.Matcher{}, matchers[:left]...), done)
if right < len(matchers) {
next = append(next, matchers[right:]...)
}
if len(next) == len(matchers) {
return next
}
2016-01-14 18:29:13 +03:00
return minimizeMatchers(next)
2016-01-09 02:34:41 +03:00
}
2016-01-15 19:50:12 +03:00
func minimizeAnyOf(children []node) node {
var nodes [][]node
var min int
var idx int
for i, desc := range children {
pat, ok := desc.(*nodePattern)
if !ok {
return nil
}
n := pat.children()
ln := len(n)
if len(nodes) == 0 || (ln < min) {
min = ln
idx = i
}
nodes = append(nodes, pat.children())
}
minNodes := nodes[idx]
if idx+1 < len(nodes) {
nodes = append(nodes[:idx], nodes[idx+1:]...)
} else {
nodes = nodes[:idx]
}
var commonLeft []node
var commonLeftCount int
for i, n := range minNodes {
has := true
for _, t := range nodes {
if !reflect.DeepEqual(n, t[i]) {
has = false
break
}
}
if has {
commonLeft = append(commonLeft, n)
commonLeftCount++
} else {
break
}
}
var commonRight []node
var commonRightCount int
for i := min - 1; i > commonLeftCount-1; i-- {
n := minNodes[i]
has := true
for _, t := range nodes {
if !reflect.DeepEqual(n, t[len(t)-(min-i)]) {
has = false
break
}
}
if has {
commonRight = append(commonRight, n)
commonRightCount++
} else {
break
}
}
if commonLeftCount == 0 && commonRightCount == 0 {
return nil
}
nodes = append(nodes, minNodes)
nodes[len(nodes)-1], nodes[idx] = nodes[idx], nodes[len(nodes)-1]
var result []node
if commonLeftCount > 0 {
result = append(result, &nodePattern{nodeImpl: nodeImpl{desc: commonLeft}})
}
var anyOf []node
for _, n := range nodes {
if commonLeftCount+commonRightCount == len(n) {
anyOf = append(anyOf, nil)
} else {
anyOf = append(anyOf, &nodePattern{nodeImpl: nodeImpl{desc: n[commonLeftCount : len(n)-commonRightCount]}})
}
}
anyOf = uniqueNodes(anyOf)
if len(anyOf) == 1 {
if anyOf[0] != nil {
result = append(result, &nodePattern{nodeImpl: nodeImpl{desc: anyOf}})
}
} else {
result = append(result, &nodeAnyOf{nodeImpl: nodeImpl{desc: anyOf}})
}
if commonRightCount > 0 {
result = append(result, &nodePattern{nodeImpl: nodeImpl{desc: commonRight}})
}
return &nodePattern{nodeImpl: nodeImpl{desc: result}}
}
func uniqueNodes(nodes []node) (result []node) {
head:
for _, n := range nodes {
for _, e := range result {
if reflect.DeepEqual(e, n) {
continue head
}
}
result = append(result, n)
}
return
}
2016-01-09 02:34:41 +03:00
func compileMatchers(matchers []match.Matcher) (match.Matcher, error) {
2016-01-13 01:26:48 +03:00
if len(matchers) == 0 {
return nil, fmt.Errorf("compile error: need at least one matcher")
}
2016-01-13 20:13:11 +03:00
if len(matchers) == 1 {
return matchers[0], nil
}
2016-01-08 20:14:31 +03:00
if m := glueMatchers(matchers); m != nil {
return m, nil
}
var (
2016-01-12 14:06:59 +03:00
val match.Matcher
2016-01-08 20:14:31 +03:00
idx int
)
2016-01-09 02:34:41 +03:00
maxLen := -1
2016-01-08 20:14:31 +03:00
for i, matcher := range matchers {
2016-01-12 14:06:59 +03:00
l := matcher.Len()
if l >= maxLen {
maxLen = l
idx = i
val = matcher
2016-01-08 20:14:31 +03:00
}
}
left := matchers[:idx]
var right []match.Matcher
if len(matchers) > idx+1 {
right = matchers[idx+1:]
}
2016-01-14 18:29:13 +03:00
var l, r match.Matcher
var err error
2016-01-08 20:14:31 +03:00
if len(left) > 0 {
2016-01-14 18:29:13 +03:00
l, err = compileMatchers(left)
2016-01-08 20:14:31 +03:00
if err != nil {
return nil, err
}
}
if len(right) > 0 {
2016-01-14 18:29:13 +03:00
r, err = compileMatchers(right)
2016-01-08 20:14:31 +03:00
if err != nil {
return nil, err
}
}
2016-01-14 18:29:13 +03:00
return match.NewBTree(val, l, r), nil
2016-01-08 20:14:31 +03:00
}
2016-01-15 19:50:12 +03:00
//func complexity(m match.Matcher) int {
// var matchers []match.Matcher
// var k int
//
// switch matcher := m.(type) {
//
// case match.Nothing:
// return 0
//
// case match.Max, match.Range, match.Suffix, match.Text:
// return 1
//
// case match.PrefixSuffix, match.Single, match.Row:
// return 2
//
// case match.Any, match.Contains, match.List, match.Min, match.Prefix, match.Super:
// return 4
//
// case match.BTree:
// matchers = append(matchers, matcher.Value)
// if matcher.Left != nil {
// matchers = append(matchers, matcher.Left)
// }
// if matcher.Right != nil {
// matchers = append(matchers, matcher.Right)
// }
// k = 1
//
// case match.AnyOf:
// matchers = matcher.Matchers
// k = 1
// case match.EveryOf:
// matchers = matcher.Matchers
// k = 1
//
// default:
// return 0
// }
//
// var sum int
// for _, m := range matchers {
// sum += complexity(m)
// }
//
// return sum * k
//}
2016-02-02 22:03:37 +03:00
func doAnyOf(n *nodeAnyOf, s []rune) (match.Matcher, error) {
2016-01-15 19:50:12 +03:00
var matchers []match.Matcher
for _, desc := range n.children() {
if desc == nil {
matchers = append(matchers, match.NewNothing())
2016-01-15 19:50:12 +03:00
continue
}
m, err := do(desc, s)
if err != nil {
return nil, err
}
matchers = append(matchers, optimize(m))
}
return match.NewAnyOf(matchers...), nil
2016-01-15 19:50:12 +03:00
}
2016-02-02 14:57:42 +03:00
func do(leaf node, s []rune) (m match.Matcher, err error) {
2016-01-15 19:50:12 +03:00
switch n := leaf.(type) {
case *nodeAnyOf:
// todo this could be faster on pattern_alternatives_combine_lite
if n := minimizeAnyOf(n.children()); n != nil {
return do(n, s)
}
2016-01-08 20:14:31 +03:00
var matchers []match.Matcher
2016-01-15 19:50:12 +03:00
for _, desc := range n.children() {
if desc == nil {
matchers = append(matchers, match.NewNothing())
2016-01-15 19:50:12 +03:00
continue
}
2016-01-08 20:14:31 +03:00
m, err := do(desc, s)
if err != nil {
return nil, err
}
matchers = append(matchers, optimize(m))
}
return match.NewAnyOf(matchers...), nil
2016-01-15 19:50:12 +03:00
case *nodePattern:
nodes := leaf.children()
if len(nodes) == 0 {
return match.NewNothing(), nil
2016-01-15 19:50:12 +03:00
}
var matchers []match.Matcher
for _, desc := range nodes {
m, err := do(desc, s)
2016-01-08 20:14:31 +03:00
if err != nil {
return nil, err
}
2016-01-15 19:50:12 +03:00
matchers = append(matchers, optimize(m))
}
m, err = compileMatchers(minimizeMatchers(matchers))
if err != nil {
return nil, err
2016-01-08 20:14:31 +03:00
}
case *nodeList:
m = match.NewList([]rune(n.chars), n.not)
2016-01-08 20:14:31 +03:00
case *nodeRange:
m = match.NewRange(n.lo, n.hi, n.not)
2016-01-08 20:14:31 +03:00
case *nodeAny:
m = match.NewAny(s)
2016-01-08 20:14:31 +03:00
case *nodeSuper:
m = match.NewSuper()
2016-01-08 20:14:31 +03:00
case *nodeSingle:
m = match.NewSingle(s)
2016-01-08 20:14:31 +03:00
case *nodeText:
2016-01-14 21:32:02 +03:00
m = match.NewText(n.text)
2016-01-08 20:14:31 +03:00
default:
return nil, fmt.Errorf("could not compile tree: unknown node type")
}
return optimize(m), nil
}
2016-02-02 22:03:37 +03:00
func do2(node node, s []rune) ([]match.Matcher, error) {
2016-01-11 10:17:19 +03:00
var result []match.Matcher
switch n := node.(type) {
case *nodePattern:
ways := [][]match.Matcher{[]match.Matcher{}}
for _, desc := range node.children() {
variants, err := do2(desc, s)
if err != nil {
return nil, err
}
fmt.Println("variants pat", variants)
for i, l := 0, len(ways); i < l; i++ {
for i := 0; i < len(variants); i++ {
o := optimize(variants[i])
if i == len(variants)-1 {
ways[i] = append(ways[i], o)
} else {
var w []match.Matcher
copy(w, ways[i])
ways = append(ways, append(w, o))
}
}
}
fmt.Println("ways pat", ways)
}
for _, matchers := range ways {
2016-01-14 18:29:13 +03:00
c, err := compileMatchers(minimizeMatchers(matchers))
2016-01-11 10:17:19 +03:00
if err != nil {
return nil, err
}
result = append(result, c)
}
case *nodeAnyOf:
ways := make([][]match.Matcher, len(node.children()))
for _, desc := range node.children() {
variants, err := do2(desc, s)
if err != nil {
return nil, err
}
fmt.Println("variants any", variants)
for x, l := 0, len(ways); x < l; x++ {
for i := 0; i < len(variants); i++ {
o := optimize(variants[i])
if i == len(variants)-1 {
ways[x] = append(ways[x], o)
} else {
var w []match.Matcher
copy(w, ways[x])
ways = append(ways, append(w, o))
}
}
}
fmt.Println("ways any", ways)
}
for _, matchers := range ways {
2016-01-14 18:29:13 +03:00
c, err := compileMatchers(minimizeMatchers(matchers))
2016-01-11 10:17:19 +03:00
if err != nil {
return nil, err
}
result = append(result, c)
}
case *nodeList:
result = append(result, match.NewList([]rune(n.chars), n.not))
2016-01-11 10:17:19 +03:00
case *nodeRange:
result = append(result, match.NewRange(n.lo, n.hi, n.not))
2016-01-11 10:17:19 +03:00
case *nodeAny:
result = append(result, match.NewAny(s))
2016-01-11 10:17:19 +03:00
case *nodeSuper:
result = append(result, match.NewSuper())
2016-01-11 10:17:19 +03:00
case *nodeSingle:
result = append(result, match.NewSingle(s))
2016-01-11 10:17:19 +03:00
case *nodeText:
2016-01-14 21:32:02 +03:00
result = append(result, match.NewText(n.text))
2016-01-11 10:17:19 +03:00
default:
return nil, fmt.Errorf("could not compile tree: unknown node type")
}
for i, m := range result {
result[i] = optimize(m)
}
return result, nil
}
2016-02-02 14:57:42 +03:00
func compile(ast *nodePattern, s []rune) (Glob, error) {
2016-01-11 10:17:19 +03:00
// ms, err := do2(ast, s)
// if err != nil {
// return nil, err
// }
// if len(ms) == 1 {
// return ms[0], nil
// } else {
// return match.NewAnyOf(ms), nil
2016-01-11 10:17:19 +03:00
// }
2016-01-08 20:14:31 +03:00
g, err := do(ast, s)
if err != nil {
return nil, err
}
return g, nil
}