glob/match/optimize.go

423 lines
6.9 KiB
Go
Raw Permalink Normal View History

2018-02-16 17:36:02 +03:00
package match
import (
"fmt"
2019-02-06 23:43:38 +03:00
"github.com/gobwas/glob/internal/debug"
2018-10-02 22:02:43 +03:00
"github.com/gobwas/glob/util/runes"
2018-02-16 17:36:02 +03:00
)
2019-02-06 23:43:38 +03:00
func Optimize(m Matcher) (opt Matcher) {
if debug.Enabled {
defer func() {
a := fmt.Sprintf("%s", m)
b := fmt.Sprintf("%s", opt)
if a != b {
debug.EnterPrefix("optimized %s: -> %s", a, b)
debug.LeavePrefix()
}
}()
}
2018-02-16 17:36:02 +03:00
switch v := m.(type) {
case Any:
if len(v.sep) == 0 {
return NewSuper()
}
case List:
if v.not == false && len(v.rs) == 1 {
return NewText(string(v.rs))
}
return m
case Tree:
v.left = Optimize(v.left)
v.right = Optimize(v.right)
txt, ok := v.value.(Text)
if !ok {
return m
}
var (
2019-03-12 22:15:41 +03:00
leftNil = v.left == nil || v.left == Nothing{}
rightNil = v.right == nil || v.right == Nothing{}
2018-02-16 17:36:02 +03:00
)
if leftNil && rightNil {
return NewText(txt.s)
}
_, leftSuper := v.left.(Super)
lp, leftPrefix := v.left.(Prefix)
la, leftAny := v.left.(Any)
_, rightSuper := v.right.(Super)
rs, rightSuffix := v.right.(Suffix)
ra, rightAny := v.right.(Any)
switch {
case leftSuper && rightSuper:
return NewContains(txt.s)
case leftSuper && rightNil:
return NewSuffix(txt.s)
case rightSuper && leftNil:
return NewPrefix(txt.s)
case leftNil && rightSuffix:
return NewPrefixSuffix(txt.s, rs.s)
case rightNil && leftPrefix:
return NewPrefixSuffix(lp.s, txt.s)
case rightNil && leftAny:
return NewSuffixAny(txt.s, la.sep)
case leftNil && rightAny:
return NewPrefixAny(txt.s, ra.sep)
}
2019-02-06 23:43:38 +03:00
case Container:
var (
first Matcher
n int
)
v.Content(func(m Matcher) {
first = m
n++
})
if n == 1 {
return first
}
return m
2018-02-16 17:36:02 +03:00
}
return m
}
2019-02-06 23:43:38 +03:00
func Compile(ms []Matcher) (m Matcher, err error) {
if debug.Enabled {
debug.EnterPrefix("compiling %s", ms)
defer func() {
debug.Logf("-> %s, %v", m, err)
debug.LeavePrefix()
}()
}
2018-02-16 17:36:02 +03:00
if len(ms) == 0 {
return nil, fmt.Errorf("compile error: need at least one matcher")
}
if len(ms) == 1 {
return ms[0], nil
}
if m := glueMatchers(ms); m != nil {
return m, nil
}
var (
2019-02-06 23:43:38 +03:00
x = -1
max = -2
wantText bool
indexer MatchIndexer
2018-02-16 17:36:02 +03:00
)
for i, m := range ms {
2019-02-06 23:43:38 +03:00
mx, ok := m.(MatchIndexer)
2018-02-16 17:36:02 +03:00
if !ok {
continue
}
2019-02-06 23:43:38 +03:00
_, isText := m.(Text)
if wantText && !isText {
continue
}
n := m.MinLen()
if (!wantText && isText) || n > max {
max = n
x = i
indexer = mx
wantText = isText
2018-02-16 17:36:02 +03:00
}
}
if indexer == nil {
return nil, fmt.Errorf("can not index on matchers")
}
2019-02-06 23:43:38 +03:00
left := ms[:x]
2018-02-16 17:36:02 +03:00
var right []Matcher
2019-02-06 23:43:38 +03:00
if len(ms) > x+1 {
right = ms[x+1:]
2018-02-16 17:36:02 +03:00
}
2019-02-10 21:25:05 +03:00
var (
l Matcher = Nothing{}
r Matcher = Nothing{}
)
2018-02-16 17:36:02 +03:00
if len(left) > 0 {
l, err = Compile(left)
if err != nil {
return nil, err
}
}
if len(right) > 0 {
r, err = Compile(right)
if err != nil {
return nil, err
}
}
return NewTree(indexer, l, r), nil
}
func glueMatchers(ms []Matcher) Matcher {
if m := glueMatchersAsEvery(ms); m != nil {
return m
}
if m := glueMatchersAsRow(ms); m != nil {
return m
}
return nil
}
func glueMatchersAsRow(ms []Matcher) Matcher {
if len(ms) <= 1 {
return nil
}
var s []MatchIndexSizer
for _, m := range ms {
rsz, ok := m.(MatchIndexSizer)
if !ok {
return nil
}
s = append(s, rsz)
}
return NewRow(s)
}
func glueMatchersAsEvery(ms []Matcher) Matcher {
if len(ms) <= 1 {
return nil
}
var (
hasAny bool
hasSuper bool
hasSingle bool
min int
separator []rune
)
for i, matcher := range ms {
var sep []rune
switch m := matcher.(type) {
case Super:
sep = []rune{}
hasSuper = true
case Any:
sep = m.sep
hasAny = true
case Single:
sep = m.sep
hasSingle = true
min++
case List:
if !m.not {
return nil
}
sep = m.rs
hasSingle = true
min++
default:
return nil
}
// initialize
if i == 0 {
separator = sep
}
if runes.Equal(sep, separator) {
continue
}
return nil
}
if hasSuper && !hasAny && !hasSingle {
return NewSuper()
}
if hasAny && !hasSuper && !hasSingle {
return NewAny(separator)
}
if (hasAny || hasSuper) && min > 0 && len(separator) == 0 {
return NewMin(min)
}
var every []Matcher
if min > 0 {
every = append(every, NewMin(min))
if !hasAny && !hasSuper {
every = append(every, NewMax(min))
}
}
if len(separator) > 0 {
every = append(every, NewAny(separator))
}
return NewEveryOf(every)
}
2019-02-06 23:43:38 +03:00
type result struct {
2019-03-12 22:04:16 +03:00
ms []Matcher
matchers int
maxMinLen int
sumMinLen int
nesting int
2019-02-06 23:43:38 +03:00
}
func compareResult(a, b result) int {
2019-03-12 22:04:16 +03:00
if x := b.sumMinLen - a.sumMinLen; x != 0 {
2019-02-06 23:43:38 +03:00
return x
}
2019-03-12 22:04:16 +03:00
if x := len(a.ms) - len(b.ms); x != 0 {
2019-02-06 23:43:38 +03:00
return x
}
2019-02-10 21:25:05 +03:00
if x := a.nesting - b.nesting; x != 0 {
2019-02-06 23:43:38 +03:00
return x
}
2019-03-12 22:04:16 +03:00
if x := a.matchers - b.matchers; x != 0 {
return x
}
if x := b.maxMinLen - a.maxMinLen; x != 0 {
2019-02-06 23:43:38 +03:00
return x
}
return 0
}
func collapse(ms []Matcher, x Matcher, i, j int) (cp []Matcher) {
cp = make([]Matcher, len(ms)-(j-i)+1)
copy(cp[0:i], ms[0:i])
copy(cp[i+1:], ms[j:])
cp[i] = x
return cp
}
func matchersCount(ms []Matcher) (n int) {
n = len(ms)
for _, m := range ms {
n += countNestedMatchers(m)
}
return n
}
func countNestedMatchers(m Matcher) (n int) {
if c, _ := m.(Container); c != nil {
c.Content(func(m Matcher) {
n += 1 + countNestedMatchers(m)
})
}
return n
}
func nestingDepth(m Matcher) (depth int) {
c, ok := m.(Container)
if !ok {
return 0
}
var max int
c.Content(func(m Matcher) {
if d := nestingDepth(m); d > max {
max = d
}
})
return max + 1
}
func maxMinLen(ms []Matcher) (max int) {
for _, m := range ms {
if n := m.MinLen(); n > max {
max = n
2018-02-16 17:36:02 +03:00
}
}
2019-02-06 23:43:38 +03:00
return max
}
2019-03-12 22:04:16 +03:00
func sumMinLen(ms []Matcher) (sum int) {
for _, m := range ms {
sum += m.MinLen()
}
return sum
}
2019-02-06 23:43:38 +03:00
func maxNestingDepth(ms []Matcher) (max int) {
for _, m := range ms {
if n := nestingDepth(m); n > max {
max = n
}
2018-02-16 17:36:02 +03:00
}
2019-02-06 23:43:38 +03:00
return
}
func minimize(ms []Matcher, i, j int, best *result) *result {
if j > len(ms) {
j = 0
i++
}
if i > len(ms)-2 {
return best
}
if j == 0 {
j = i + 2
2018-02-16 17:36:02 +03:00
}
2019-02-06 23:43:38 +03:00
if g := glueMatchers(ms[i:j]); g != nil {
cp := collapse(ms, g, i, j)
r := result{
2019-03-12 22:04:16 +03:00
ms: cp,
matchers: matchersCount(cp),
sumMinLen: sumMinLen(cp),
maxMinLen: maxMinLen(cp),
nesting: maxNestingDepth(cp),
2019-02-06 23:43:38 +03:00
}
if debug.Enabled {
debug.EnterPrefix(
2019-03-12 22:04:16 +03:00
"intermediate: %s (matchers:%d, summinlen:%d, maxminlen:%d, nesting:%d)",
cp, r.matchers, r.sumMinLen, r.maxMinLen, r.nesting,
2019-02-06 23:43:38 +03:00
)
}
if best == nil {
best = new(result)
}
if best.ms == nil || compareResult(r, *best) < 0 {
*best = r
if debug.Enabled {
debug.Logf("new best result")
}
}
best = minimize(cp, 0, 0, best)
if debug.Enabled {
debug.LeavePrefix()
}
}
return minimize(ms, i, j+1, best)
}
func Minimize(ms []Matcher) (m []Matcher) {
if debug.Enabled {
debug.EnterPrefix("minimizing %s", ms)
defer func() {
debug.Logf("-> %s", m)
debug.LeavePrefix()
}()
}
best := minimize(ms, 0, 0, nil)
if best == nil {
return ms
2018-02-16 17:36:02 +03:00
}
2019-02-06 23:43:38 +03:00
return best.ms
2018-02-16 17:36:02 +03:00
}