dramatic refactoring

This commit is contained in:
gobwas 2018-02-16 17:36:02 +03:00 committed by Sergey Kamardin
parent e7a84e9525
commit e4652bc1f4
41 changed files with 1478 additions and 1315 deletions

View File

@ -3,12 +3,12 @@ package main
import (
"flag"
"fmt"
"github.com/gobwas/glob"
"github.com/gobwas/glob/match"
"github.com/gobwas/glob/match/debug"
"os"
"strings"
"unicode/utf8"
"github.com/gobwas/glob"
"github.com/gobwas/glob/match"
)
func main() {
@ -40,5 +40,5 @@ func main() {
}
matcher := glob.(match.Matcher)
fmt.Fprint(os.Stdout, debug.Graphviz(*pattern, matcher))
fmt.Fprint(os.Stdout, match.Graphviz(*pattern, matcher))
}

View File

@ -3,11 +3,12 @@ package main
import (
"flag"
"fmt"
"github.com/gobwas/glob"
"os"
"strings"
"testing"
"unicode/utf8"
"github.com/gobwas/glob"
)
func benchString(r testing.BenchmarkResult) string {

View File

@ -5,467 +5,58 @@ package compiler
import (
"fmt"
"reflect"
"os"
"strings"
"sync/atomic"
"github.com/gobwas/glob/match"
"github.com/gobwas/glob/syntax/ast"
"github.com/gobwas/glob/util/runes"
)
func optimizeMatcher(matcher match.Matcher) match.Matcher {
switch m := matcher.(type) {
case match.Any:
if len(m.Separators) == 0 {
return match.NewSuper()
}
case match.AnyOf:
if len(m.Matchers) == 1 {
return m.Matchers[0]
}
return m
case match.List:
if m.Not == false && len(m.List) == 1 {
return match.NewText(string(m.List))
}
return m
case match.BTree:
m.Left = optimizeMatcher(m.Left)
m.Right = optimizeMatcher(m.Right)
r, ok := m.Value.(match.Text)
if !ok {
return m
}
var (
leftNil = m.Left == nil
rightNil = m.Right == nil
)
if leftNil && rightNil {
return match.NewText(r.Str)
}
_, leftSuper := m.Left.(match.Super)
lp, leftPrefix := m.Left.(match.Prefix)
la, leftAny := m.Left.(match.Any)
_, rightSuper := m.Right.(match.Super)
rs, rightSuffix := m.Right.(match.Suffix)
ra, rightAny := m.Right.(match.Any)
switch {
case leftSuper && rightSuper:
return match.NewContains(r.Str, false)
case leftSuper && rightNil:
return match.NewSuffix(r.Str)
case rightSuper && leftNil:
return match.NewPrefix(r.Str)
case leftNil && rightSuffix:
return match.NewPrefixSuffix(r.Str, rs.Suffix)
case rightNil && leftPrefix:
return match.NewPrefixSuffix(lp.Prefix, r.Str)
case rightNil && leftAny:
return match.NewSuffixAny(r.Str, la.Separators)
case leftNil && rightAny:
return match.NewPrefixAny(r.Str, ra.Separators)
}
return m
func Compile(tree *ast.Node, sep []rune) (match.Matcher, error) {
m, err := compile(tree, sep)
if err != nil {
return nil, err
}
return matcher
return m, nil
}
func compileMatchers(matchers []match.Matcher) (match.Matcher, error) {
if len(matchers) == 0 {
return nil, fmt.Errorf("compile error: need at least one matcher")
}
if len(matchers) == 1 {
return matchers[0], nil
}
if m := glueMatchers(matchers); m != nil {
return m, nil
}
idx := -1
maxLen := -1
var val match.Matcher
for i, matcher := range matchers {
if l := matcher.Len(); l != -1 && l >= maxLen {
maxLen = l
idx = i
val = matcher
}
}
if val == nil { // not found matcher with static length
r, err := compileMatchers(matchers[1:])
if err != nil {
return nil, err
}
return match.NewBTree(matchers[0], nil, r), nil
}
left := matchers[:idx]
var right []match.Matcher
if len(matchers) > idx+1 {
right = matchers[idx+1:]
}
var l, r match.Matcher
var err error
if len(left) > 0 {
l, err = compileMatchers(left)
if err != nil {
return nil, err
}
}
if len(right) > 0 {
r, err = compileMatchers(right)
if err != nil {
return nil, err
}
}
return match.NewBTree(val, l, r), nil
}
func glueMatchers(matchers []match.Matcher) match.Matcher {
if m := glueMatchersAsEvery(matchers); m != nil {
return m
}
if m := glueMatchersAsRow(matchers); m != nil {
return m
}
return nil
}
func glueMatchersAsRow(matchers []match.Matcher) match.Matcher {
if len(matchers) <= 1 {
return nil
}
var (
c []match.Matcher
l int
)
for _, matcher := range matchers {
if ml := matcher.Len(); ml == -1 {
return nil
} else {
c = append(c, matcher)
l += ml
}
}
return match.NewRow(l, c...)
}
func glueMatchersAsEvery(matchers []match.Matcher) match.Matcher {
if len(matchers) <= 1 {
return nil
}
var (
hasAny bool
hasSuper bool
hasSingle bool
min int
separator []rune
)
for i, matcher := range matchers {
var sep []rune
switch m := matcher.(type) {
case match.Super:
sep = []rune{}
hasSuper = true
case match.Any:
sep = m.Separators
hasAny = true
case match.Single:
sep = m.Separators
hasSingle = true
min++
case match.List:
if !m.Not {
return nil
}
sep = m.List
hasSingle = true
min++
default:
return nil
}
// initialize
if i == 0 {
separator = sep
}
if runes.Equal(sep, separator) {
continue
}
return nil
}
if hasSuper && !hasAny && !hasSingle {
return match.NewSuper()
}
if hasAny && !hasSuper && !hasSingle {
return match.NewAny(separator)
}
if (hasAny || hasSuper) && min > 0 && len(separator) == 0 {
return match.NewMin(min)
}
every := match.NewEveryOf()
if min > 0 {
every.Add(match.NewMin(min))
if !hasAny && !hasSuper {
every.Add(match.NewMax(min))
}
}
if len(separator) > 0 {
every.Add(match.NewContains(string(separator), true))
}
return every
}
func minimizeMatchers(matchers []match.Matcher) []match.Matcher {
var done match.Matcher
var left, right, count int
for l := 0; l < len(matchers); l++ {
for r := len(matchers); r > l; r-- {
if glued := glueMatchers(matchers[l:r]); glued != nil {
var swap bool
if done == nil {
swap = true
} else {
cl, gl := done.Len(), glued.Len()
swap = cl > -1 && gl > -1 && gl > cl
swap = swap || count < r-l
}
if swap {
done = glued
left = l
right = r
count = r - l
}
}
}
}
if done == nil {
return matchers
}
next := append(append([]match.Matcher{}, matchers[:left]...), done)
if right < len(matchers) {
next = append(next, matchers[right:]...)
}
if len(next) == len(matchers) {
return next
}
return minimizeMatchers(next)
}
// minimizeAnyOf tries to apply some heuristics to minimize number of nodes in given tree
func minimizeTree(tree *ast.Node) *ast.Node {
switch tree.Kind {
case ast.KindAnyOf:
return minimizeTreeAnyOf(tree)
default:
return nil
}
}
// minimizeAnyOf tries to find common children of given node of AnyOf pattern
// it searches for common children from left and from right
// if any common children are found then it returns new optimized ast tree
// else it returns nil
func minimizeTreeAnyOf(tree *ast.Node) *ast.Node {
if !areOfSameKind(tree.Children, ast.KindPattern) {
return nil
}
commonLeft, commonRight := commonChildren(tree.Children)
commonLeftCount, commonRightCount := len(commonLeft), len(commonRight)
if commonLeftCount == 0 && commonRightCount == 0 { // there are no common parts
return nil
}
var result []*ast.Node
if commonLeftCount > 0 {
result = append(result, ast.NewNode(ast.KindPattern, nil, commonLeft...))
}
var anyOf []*ast.Node
for _, child := range tree.Children {
reuse := child.Children[commonLeftCount : len(child.Children)-commonRightCount]
var node *ast.Node
if len(reuse) == 0 {
// this pattern is completely reduced by commonLeft and commonRight patterns
// so it become nothing
node = ast.NewNode(ast.KindNothing, nil)
} else {
node = ast.NewNode(ast.KindPattern, nil, reuse...)
}
anyOf = appendIfUnique(anyOf, node)
}
switch {
case len(anyOf) == 1 && anyOf[0].Kind != ast.KindNothing:
result = append(result, anyOf[0])
case len(anyOf) > 1:
result = append(result, ast.NewNode(ast.KindAnyOf, nil, anyOf...))
}
if commonRightCount > 0 {
result = append(result, ast.NewNode(ast.KindPattern, nil, commonRight...))
}
return ast.NewNode(ast.KindPattern, nil, result...)
}
func commonChildren(nodes []*ast.Node) (commonLeft, commonRight []*ast.Node) {
if len(nodes) <= 1 {
return
}
// find node that has least number of children
idx := leastChildren(nodes)
if idx == -1 {
return
}
tree := nodes[idx]
treeLength := len(tree.Children)
// allocate max able size for rightCommon slice
// to get ability insert elements in reverse order (from end to start)
// without sorting
commonRight = make([]*ast.Node, treeLength)
lastRight := treeLength // will use this to get results as commonRight[lastRight:]
var (
breakLeft bool
breakRight bool
commonTotal int
)
for i, j := 0, treeLength-1; commonTotal < treeLength && j >= 0 && !(breakLeft && breakRight); i, j = i+1, j-1 {
treeLeft := tree.Children[i]
treeRight := tree.Children[j]
for k := 0; k < len(nodes) && !(breakLeft && breakRight); k++ {
// skip least children node
if k == idx {
continue
}
restLeft := nodes[k].Children[i]
restRight := nodes[k].Children[j+len(nodes[k].Children)-treeLength]
breakLeft = breakLeft || !treeLeft.Equal(restLeft)
// disable searching for right common parts, if left part is already overlapping
breakRight = breakRight || (!breakLeft && j <= i)
breakRight = breakRight || !treeRight.Equal(restRight)
}
if !breakLeft {
commonTotal++
commonLeft = append(commonLeft, treeLeft)
}
if !breakRight {
commonTotal++
lastRight = j
commonRight[j] = treeRight
}
}
commonRight = commonRight[lastRight:]
return
}
func appendIfUnique(target []*ast.Node, val *ast.Node) []*ast.Node {
for _, n := range target {
if reflect.DeepEqual(n, val) {
return target
}
}
return append(target, val)
}
func areOfSameKind(nodes []*ast.Node, kind ast.Kind) bool {
for _, n := range nodes {
if n.Kind != kind {
return false
}
}
return true
}
func leastChildren(nodes []*ast.Node) int {
min := -1
idx := -1
for i, n := range nodes {
if idx == -1 || (len(n.Children) < min) {
min = len(n.Children)
idx = i
}
}
return idx
}
func compileTreeChildren(tree *ast.Node, sep []rune) ([]match.Matcher, error) {
func compileNodes(ns []*ast.Node, sep []rune) ([]match.Matcher, error) {
var matchers []match.Matcher
for _, desc := range tree.Children {
m, err := compile(desc, sep)
for _, n := range ns {
m, err := compile(n, sep)
if err != nil {
return nil, err
}
matchers = append(matchers, optimizeMatcher(m))
matchers = append(matchers, m)
}
return matchers, nil
}
func compile(tree *ast.Node, sep []rune) (m match.Matcher, err error) {
enter()
logf("compiling %s", tree)
defer func() {
logf("result %s", m)
leave()
}()
// todo this could be faster on pattern_alternatives_combine_lite (see glob_test.go)
if n := ast.Minimize(tree); n != nil {
logf("minimized tree")
logf("\t%s", tree)
logf("\t%s", n)
r, err := compile(n, sep)
if err == nil {
return r, nil
}
logf("compile minimized tree failed: %v", err)
}
switch tree.Kind {
case ast.KindAnyOf:
// todo this could be faster on pattern_alternatives_combine_lite (see glob_test.go)
if n := minimizeTree(tree); n != nil {
return compile(n, sep)
}
matchers, err := compileTreeChildren(tree, sep)
matchers, err := compileNodes(tree.Children, sep)
if err != nil {
return nil, err
}
@ -475,11 +66,11 @@ func compile(tree *ast.Node, sep []rune) (m match.Matcher, err error) {
if len(tree.Children) == 0 {
return match.NewNothing(), nil
}
matchers, err := compileTreeChildren(tree, sep)
matchers, err := compileNodes(tree.Children, sep)
if err != nil {
return nil, err
}
m, err = compileMatchers(minimizeMatchers(matchers))
m, err = match.Compile(match.Minimize(matchers))
if err != nil {
return nil, err
}
@ -512,14 +103,25 @@ func compile(tree *ast.Node, sep []rune) (m match.Matcher, err error) {
return nil, fmt.Errorf("could not compile tree: unknown node type")
}
return optimizeMatcher(m), nil
return match.Optimize(m), nil
}
func Compile(tree *ast.Node, sep []rune) (match.Matcher, error) {
m, err := compile(tree, sep)
if err != nil {
return nil, err
}
var i = new(int32)
return m, nil
func logf(f string, args ...interface{}) {
n := int(atomic.LoadInt32(i))
fmt.Fprint(os.Stderr,
strings.Repeat(" ", n),
fmt.Sprintf("(%d) ", n),
fmt.Sprintf(f, args...),
"\n",
)
}
func enter() {
atomic.AddInt32(i, 1)
}
func leave() {
atomic.AddInt32(i, -1)
}

View File

@ -1,140 +1,16 @@
package compiler
import (
"reflect"
"testing"
"github.com/gobwas/glob/match"
"github.com/gobwas/glob/match/debug"
"github.com/gobwas/glob/syntax/ast"
"reflect"
"testing"
)
var separators = []rune{'.'}
func TestCommonChildren(t *testing.T) {
for i, test := range []struct {
nodes []*ast.Node
left []*ast.Node
right []*ast.Node
}{
{
nodes: []*ast.Node{
ast.NewNode(ast.KindNothing, nil,
ast.NewNode(ast.KindText, ast.Text{"a"}),
ast.NewNode(ast.KindText, ast.Text{"z"}),
ast.NewNode(ast.KindText, ast.Text{"c"}),
),
},
},
{
nodes: []*ast.Node{
ast.NewNode(ast.KindNothing, nil,
ast.NewNode(ast.KindText, ast.Text{"a"}),
ast.NewNode(ast.KindText, ast.Text{"z"}),
ast.NewNode(ast.KindText, ast.Text{"c"}),
),
ast.NewNode(ast.KindNothing, nil,
ast.NewNode(ast.KindText, ast.Text{"a"}),
ast.NewNode(ast.KindText, ast.Text{"b"}),
ast.NewNode(ast.KindText, ast.Text{"c"}),
),
},
left: []*ast.Node{
ast.NewNode(ast.KindText, ast.Text{"a"}),
},
right: []*ast.Node{
ast.NewNode(ast.KindText, ast.Text{"c"}),
},
},
{
nodes: []*ast.Node{
ast.NewNode(ast.KindNothing, nil,
ast.NewNode(ast.KindText, ast.Text{"a"}),
ast.NewNode(ast.KindText, ast.Text{"b"}),
ast.NewNode(ast.KindText, ast.Text{"c"}),
ast.NewNode(ast.KindText, ast.Text{"d"}),
),
ast.NewNode(ast.KindNothing, nil,
ast.NewNode(ast.KindText, ast.Text{"a"}),
ast.NewNode(ast.KindText, ast.Text{"b"}),
ast.NewNode(ast.KindText, ast.Text{"c"}),
ast.NewNode(ast.KindText, ast.Text{"c"}),
ast.NewNode(ast.KindText, ast.Text{"d"}),
),
},
left: []*ast.Node{
ast.NewNode(ast.KindText, ast.Text{"a"}),
ast.NewNode(ast.KindText, ast.Text{"b"}),
},
right: []*ast.Node{
ast.NewNode(ast.KindText, ast.Text{"c"}),
ast.NewNode(ast.KindText, ast.Text{"d"}),
},
},
{
nodes: []*ast.Node{
ast.NewNode(ast.KindNothing, nil,
ast.NewNode(ast.KindText, ast.Text{"a"}),
ast.NewNode(ast.KindText, ast.Text{"b"}),
ast.NewNode(ast.KindText, ast.Text{"c"}),
),
ast.NewNode(ast.KindNothing, nil,
ast.NewNode(ast.KindText, ast.Text{"a"}),
ast.NewNode(ast.KindText, ast.Text{"b"}),
ast.NewNode(ast.KindText, ast.Text{"b"}),
ast.NewNode(ast.KindText, ast.Text{"c"}),
),
},
left: []*ast.Node{
ast.NewNode(ast.KindText, ast.Text{"a"}),
ast.NewNode(ast.KindText, ast.Text{"b"}),
},
right: []*ast.Node{
ast.NewNode(ast.KindText, ast.Text{"c"}),
},
},
{
nodes: []*ast.Node{
ast.NewNode(ast.KindNothing, nil,
ast.NewNode(ast.KindText, ast.Text{"a"}),
ast.NewNode(ast.KindText, ast.Text{"d"}),
),
ast.NewNode(ast.KindNothing, nil,
ast.NewNode(ast.KindText, ast.Text{"a"}),
ast.NewNode(ast.KindText, ast.Text{"d"}),
),
ast.NewNode(ast.KindNothing, nil,
ast.NewNode(ast.KindText, ast.Text{"a"}),
ast.NewNode(ast.KindText, ast.Text{"e"}),
),
},
left: []*ast.Node{
ast.NewNode(ast.KindText, ast.Text{"a"}),
},
right: []*ast.Node{},
},
} {
left, right := commonChildren(test.nodes)
if !nodesEqual(left, test.left) {
t.Errorf("[%d] left, right := commonChildren(); left = %v; want %v", i, left, test.left)
}
if !nodesEqual(right, test.right) {
t.Errorf("[%d] left, right := commonChildren(); right = %v; want %v", i, right, test.right)
}
}
}
func nodesEqual(a, b []*ast.Node) bool {
if len(a) != len(b) {
return false
}
for i, av := range a {
if !av.Equal(b[i]) {
return false
}
}
return true
}
func TestGlueMatchers(t *testing.T) {
for id, test := range []struct {
in []match.Matcher

View File

@ -60,6 +60,24 @@ func glob(s bool, p, m string, d ...rune) test {
return test{p, m, s, d}
}
func globc(p string, d ...rune) test {
return test{pattern: p, delimiters: d}
}
func TestCompilation(t *testing.T) {
for _, test := range []test{
globc("{*,**,?}", '.'),
globc("{*.google.*,yandex.*}", '.'),
} {
t.Run("", func(t *testing.T) {
_, err := Compile(test.pattern, test.delimiters...)
if err != nil {
t.Fatal(err)
}
})
}
}
func TestGlob(t *testing.T) {
for _, test := range []test{
glob(true, "* ?at * eyes", "my cat has very bright eyes"),
@ -164,6 +182,11 @@ func TestGlob(t *testing.T) {
glob(false, pattern_prefix_suffix, fixture_prefix_suffix_mismatch),
} {
t.Run("", func(t *testing.T) {
defer func() {
if thePanic := recover(); thePanic != nil {
t.Fatalf("panic recovered: %v", thePanic)
}
}()
g := MustCompile(test.pattern, test.delimiters...)
result := g.Match(test.match)
if result != test.should {

View File

@ -2,23 +2,24 @@ package match
import (
"fmt"
"github.com/gobwas/glob/util/strings"
"github.com/gobwas/glob/util/runes"
)
type Any struct {
Separators []rune
sep []rune
}
func NewAny(s []rune) Any {
return Any{s}
}
func (self Any) Match(s string) bool {
return strings.IndexAnyRunes(s, self.Separators) == -1
func (a Any) Match(s string) bool {
return runes.IndexAnyRune(s, a.sep) == -1
}
func (self Any) Index(s string) (int, []int) {
found := strings.IndexAnyRunes(s, self.Separators)
func (a Any) Index(s string) (int, []int) {
found := runes.IndexAnyRune(s, a.sep)
switch found {
case -1:
case 0:
@ -36,10 +37,10 @@ func (self Any) Index(s string) (int, []int) {
return 0, segments
}
func (self Any) Len() int {
return lenNo
func (a Any) MinLen() int {
return 0
}
func (self Any) String() string {
return fmt.Sprintf("<any:![%s]>", string(self.Separators))
func (a Any) String() string {
return fmt.Sprintf("<any:![%s]>", string(a.sep))
}

View File

@ -1,82 +1,74 @@
package match
import "fmt"
import (
"fmt"
)
type AnyOf struct {
Matchers Matchers
ms []Matcher
min int
}
func NewAnyOf(m ...Matcher) AnyOf {
return AnyOf{Matchers(m)}
func NewAnyOf(ms ...Matcher) Matcher {
a := AnyOf{ms, minLen(ms)}
if mis, ok := MatchIndexers(ms); ok {
return IndexedAnyOf{a, mis}
}
return a
}
func (self *AnyOf) Add(m Matcher) error {
self.Matchers = append(self.Matchers, m)
return nil
}
func (self AnyOf) Match(s string) bool {
for _, m := range self.Matchers {
func (a AnyOf) Match(s string) bool {
for _, m := range a.ms {
if m.Match(s) {
return true
}
}
return false
}
func (self AnyOf) Index(s string) (int, []int) {
index := -1
func (a AnyOf) MinLen() (n int) {
return a.min
}
func (a AnyOf) Content() []Matcher {
return a.ms
}
func (a AnyOf) String() string {
return fmt.Sprintf("<any_of:[%s]>", Matchers(a.ms))
}
type IndexedAnyOf struct {
AnyOf
ms []MatchIndexer
}
func (a IndexedAnyOf) Index(s string) (int, []int) {
index := -1
segments := acquireSegments(len(s))
for _, m := range self.Matchers {
idx, seg := m.Index(s)
if idx == -1 {
for _, m := range a.ms {
i, seg := m.Index(s)
if i == -1 {
continue
}
if index == -1 || idx < index {
index = idx
if index == -1 || i < index {
index = i
segments = append(segments[:0], seg...)
continue
}
if idx > index {
if i > index {
continue
}
// here idx == index
// here i == index
segments = appendMerge(segments, seg)
}
if index == -1 {
releaseSegments(segments)
return -1, nil
}
return index, segments
}
func (self AnyOf) Len() (l int) {
l = -1
for _, m := range self.Matchers {
ml := m.Len()
switch {
case l == -1:
l = ml
continue
case ml == -1:
return -1
case l != ml:
return -1
}
}
return
}
func (self AnyOf) String() string {
return fmt.Sprintf("<any_of:[%s]>", self.Matchers)
func (a IndexedAnyOf) String() string {
return fmt.Sprintf("<indexed_any_of:[%s]>", a.ms)
}

View File

@ -5,7 +5,7 @@ import (
"testing"
)
func TestAnyOfIndex(t *testing.T) {
func TestIndexedAnyOf(t *testing.T) {
for id, test := range []struct {
matchers Matchers
fixture string
@ -41,8 +41,8 @@ func TestAnyOfIndex(t *testing.T) {
[]int{1},
},
} {
everyOf := NewAnyOf(test.matchers...)
index, segments := everyOf.Index(test.fixture)
a := NewAnyOf(test.matchers...).(IndexedAnyOf)
index, segments := a.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
}

View File

@ -1,185 +0,0 @@
package match
import (
"fmt"
"unicode/utf8"
)
type BTree struct {
Value Matcher
Left Matcher
Right Matcher
ValueLengthRunes int
LeftLengthRunes int
RightLengthRunes int
LengthRunes int
}
func NewBTree(Value, Left, Right Matcher) (tree BTree) {
tree.Value = Value
tree.Left = Left
tree.Right = Right
lenOk := true
if tree.ValueLengthRunes = Value.Len(); tree.ValueLengthRunes == -1 {
lenOk = false
}
if Left != nil {
if tree.LeftLengthRunes = Left.Len(); tree.LeftLengthRunes == -1 {
lenOk = false
}
}
if Right != nil {
if tree.RightLengthRunes = Right.Len(); tree.RightLengthRunes == -1 {
lenOk = false
}
}
if lenOk {
tree.LengthRunes = tree.LeftLengthRunes + tree.ValueLengthRunes + tree.RightLengthRunes
} else {
tree.LengthRunes = -1
}
return tree
}
func (self BTree) Len() int {
return self.LengthRunes
}
// todo?
func (self BTree) Index(s string) (index int, segments []int) {
//inputLen := len(s)
//// try to cut unnecessary parts
//// by knowledge of length of right and left part
//offset, limit := self.offsetLimit(inputLen)
//for offset < limit {
// // search for matching part in substring
// vi, segments := self.Value.Index(s[offset:limit])
// if index == -1 {
// return -1, nil
// }
// if self.Left == nil {
// if index != offset {
// return -1, nil
// }
// } else {
// left := s[:offset+vi]
// i := self.Left.IndexSuffix(left)
// if i == -1 {
// return -1, nil
// }
// index = i
// }
// if self.Right != nil {
// for _, seg := range segments {
// right := s[:offset+vi+seg]
// }
// }
// l := s[:offset+index]
// var left bool
// if self.Left != nil {
// left = self.Left.Index(l)
// } else {
// left = l == ""
// }
//}
return -1, nil
}
func (self BTree) Match(s string) bool {
inputLen := len(s)
// try to cut unnecessary parts
// by knowledge of length of right and left part
offset, limit := self.offsetLimit(inputLen)
for offset < limit {
// search for matching part in substring
index, segments := self.Value.Index(s[offset:limit])
if index == -1 {
releaseSegments(segments)
return false
}
l := s[:offset+index]
var left bool
if self.Left != nil {
left = self.Left.Match(l)
} else {
left = l == ""
}
if left {
for i := len(segments) - 1; i >= 0; i-- {
length := segments[i]
var right bool
var r string
// if there is no string for the right branch
if inputLen <= offset+index+length {
r = ""
} else {
r = s[offset+index+length:]
}
if self.Right != nil {
right = self.Right.Match(r)
} else {
right = r == ""
}
if right {
releaseSegments(segments)
return true
}
}
}
_, step := utf8.DecodeRuneInString(s[offset+index:])
offset += index + step
releaseSegments(segments)
}
return false
}
func (self BTree) offsetLimit(inputLen int) (offset int, limit int) {
// self.Length, self.RLen and self.LLen are values meaning the length of runes for each part
// here we manipulating byte length for better optimizations
// but these checks still works, cause minLen of 1-rune string is 1 byte.
if self.LengthRunes != -1 && self.LengthRunes > inputLen {
return 0, 0
}
if self.LeftLengthRunes >= 0 {
offset = self.LeftLengthRunes
}
if self.RightLengthRunes >= 0 {
limit = inputLen - self.RightLengthRunes
} else {
limit = inputLen
}
return offset, limit
}
func (self BTree) String() string {
const n string = "<nil>"
var l, r string
if self.Left == nil {
l = n
} else {
l = self.Left.String()
}
if self.Right == nil {
r = n
} else {
r = self.Right.String()
}
return fmt.Sprintf("<btree:[%s<-%s->%s]>", l, self.Value, r)
}

View File

@ -1,90 +0,0 @@
package match
import (
"testing"
)
func TestBTree(t *testing.T) {
for id, test := range []struct {
tree BTree
str string
exp bool
}{
{
NewBTree(NewText("abc"), NewSuper(), NewSuper()),
"abc",
true,
},
{
NewBTree(NewText("a"), NewSingle(nil), NewSingle(nil)),
"aaa",
true,
},
{
NewBTree(NewText("b"), NewSingle(nil), nil),
"bbb",
false,
},
{
NewBTree(
NewText("c"),
NewBTree(
NewSingle(nil),
NewSuper(),
nil,
),
nil,
),
"abc",
true,
},
} {
act := test.tree.Match(test.str)
if act != test.exp {
t.Errorf("#%d match %q error: act: %t; exp: %t", id, test.str, act, test.exp)
continue
}
}
}
type fakeMatcher struct {
len int
name string
}
func (f *fakeMatcher) Match(string) bool {
return true
}
var i = 3
func (f *fakeMatcher) Index(s string) (int, []int) {
seg := make([]int, 0, i)
for x := 0; x < i; x++ {
seg = append(seg, x)
}
return 0, seg
}
func (f *fakeMatcher) Len() int {
return f.len
}
func (f *fakeMatcher) String() string {
return f.name
}
func BenchmarkMatchBTree(b *testing.B) {
l := &fakeMatcher{4, "left_fake"}
r := &fakeMatcher{4, "right_fake"}
v := &fakeMatcher{2, "value_fake"}
// must be <= len(l + r + v)
fixture := "abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij"
bt := NewBTree(v, l, r)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
bt.Match(fixture)
}
})
}

View File

@ -6,29 +6,29 @@ import (
)
type Contains struct {
Needle string
Not bool
s string
not bool
}
func NewContains(needle string, not bool) Contains {
return Contains{needle, not}
func NewContains(needle string) Contains {
return Contains{needle, false}
}
func (self Contains) Match(s string) bool {
return strings.Contains(s, self.Needle) != self.Not
func (c Contains) Match(s string) bool {
return strings.Contains(s, c.s) != c.not
}
func (self Contains) Index(s string) (int, []int) {
func (c Contains) Index(s string) (int, []int) {
var offset int
idx := strings.Index(s, self.Needle)
idx := strings.Index(s, c.s)
if !self.Not {
if !c.not {
if idx == -1 {
return -1, nil
}
offset = idx + len(self.Needle)
offset = idx + len(c.s)
if len(s) <= offset {
return 0, []int{offset}
}
@ -45,14 +45,14 @@ func (self Contains) Index(s string) (int, []int) {
return 0, append(segments, offset+len(s))
}
func (self Contains) Len() int {
return lenNo
func (c Contains) MinLen() int {
return 0
}
func (self Contains) String() string {
func (c Contains) String() string {
var not string
if self.Not {
if c.not {
not = "!"
}
return fmt.Sprintf("<contains:%s[%s]>", not, self.Needle)
return fmt.Sprintf("<contains:%s[%s]>", not, c.s)
}

View File

@ -42,7 +42,7 @@ func TestContainsIndex(t *testing.T) {
[]int{0, 1, 2, 3},
},
} {
p := NewContains(test.prefix, test.not)
p := Contains{test.prefix, test.not}
index, segments := p.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
@ -54,8 +54,7 @@ func TestContainsIndex(t *testing.T) {
}
func BenchmarkIndexContains(b *testing.B) {
m := NewContains(string(bench_separators), true)
m := Contains{string(bench_separators), true}
for i := 0; i < b.N; i++ {
_, s := m.Index(bench_pattern)
releaseSegments(s)
@ -63,8 +62,7 @@ func BenchmarkIndexContains(b *testing.B) {
}
func BenchmarkIndexContainsParallel(b *testing.B) {
m := NewContains(string(bench_separators), true)
m := Contains{string(bench_separators), true}
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_, s := m.Index(bench_pattern)

77
match/debug.go Normal file
View File

@ -0,0 +1,77 @@
package match
import (
"bytes"
"fmt"
"math/rand"
"os"
"strings"
"sync/atomic"
)
var i = new(int32)
func logf(f string, args ...interface{}) {
n := int(atomic.LoadInt32(i))
fmt.Fprint(os.Stderr,
strings.Repeat(" ", n),
fmt.Sprintf("(%d) ", n),
fmt.Sprintf(f, args...),
"\n",
)
}
func enter() {
atomic.AddInt32(i, 1)
}
func leave() {
atomic.AddInt32(i, -1)
}
func Graphviz(pattern string, m Matcher) string {
return fmt.Sprintf(`digraph G {graph[label="%s"];%s}`, pattern, graphviz(m, fmt.Sprintf("%x", rand.Int63())))
}
func graphviz(m Matcher, id string) string {
buf := &bytes.Buffer{}
switch v := m.(type) {
case Tree:
fmt.Fprintf(buf, `"%s"[label="%s"];`, id, v.value)
for _, m := range []Matcher{v.left, v.right} {
switch n := m.(type) {
case nil:
rnd := rand.Int63()
fmt.Fprintf(buf, `"%x"[label="<nil>"];`, rnd)
fmt.Fprintf(buf, `"%s"->"%x";`, id, rnd)
default:
sub := fmt.Sprintf("%x", rand.Int63())
fmt.Fprintf(buf, `"%s"->"%s";`, id, sub)
fmt.Fprintf(buf, graphviz(n, sub))
}
}
case Container:
fmt.Fprintf(buf, `"%s"[label="*AnyOf"];`, id)
for _, m := range v.Content() {
rnd := rand.Int63()
fmt.Fprintf(buf, graphviz(m, fmt.Sprintf("%x", rnd)))
fmt.Fprintf(buf, `"%s"->"%x";`, id, rnd)
}
case EveryOf:
fmt.Fprintf(buf, `"%s"[label="EveryOf"];`, id)
for _, m := range v.ms {
rnd := rand.Int63()
fmt.Fprintf(buf, graphviz(m, fmt.Sprintf("%x", rnd)))
fmt.Fprintf(buf, `"%s"->"%x";`, id, rnd)
}
default:
fmt.Fprintf(buf, `"%s"[label="%s"];`, id, m)
}
return buf.String()
}

View File

@ -1,55 +0,0 @@
package debug
import (
"bytes"
"fmt"
"github.com/gobwas/glob/match"
"math/rand"
)
func Graphviz(pattern string, m match.Matcher) string {
return fmt.Sprintf(`digraph G {graph[label="%s"];%s}`, pattern, graphviz_internal(m, fmt.Sprintf("%x", rand.Int63())))
}
func graphviz_internal(m match.Matcher, id string) string {
buf := &bytes.Buffer{}
switch matcher := m.(type) {
case match.BTree:
fmt.Fprintf(buf, `"%s"[label="%s"];`, id, matcher.Value.String())
for _, m := range []match.Matcher{matcher.Left, matcher.Right} {
switch n := m.(type) {
case nil:
rnd := rand.Int63()
fmt.Fprintf(buf, `"%x"[label="<nil>"];`, rnd)
fmt.Fprintf(buf, `"%s"->"%x";`, id, rnd)
default:
sub := fmt.Sprintf("%x", rand.Int63())
fmt.Fprintf(buf, `"%s"->"%s";`, id, sub)
fmt.Fprintf(buf, graphviz_internal(n, sub))
}
}
case match.AnyOf:
fmt.Fprintf(buf, `"%s"[label="AnyOf"];`, id)
for _, m := range matcher.Matchers {
rnd := rand.Int63()
fmt.Fprintf(buf, graphviz_internal(m, fmt.Sprintf("%x", rnd)))
fmt.Fprintf(buf, `"%s"->"%x";`, id, rnd)
}
case match.EveryOf:
fmt.Fprintf(buf, `"%s"[label="EveryOf"];`, id)
for _, m := range matcher.Matchers {
rnd := rand.Int63()
fmt.Fprintf(buf, graphviz_internal(m, fmt.Sprintf("%x", rnd)))
fmt.Fprintf(buf, `"%s"->"%x";`, id, rnd)
}
default:
fmt.Fprintf(buf, `"%s"[label="%s"];`, id, m.String())
}
return buf.String()
}

View File

@ -5,31 +5,41 @@ import (
)
type EveryOf struct {
Matchers Matchers
ms []Matcher
min int
}
func NewEveryOf(m ...Matcher) EveryOf {
return EveryOf{Matchers(m)}
func NewEveryOf(ms []Matcher) Matcher {
e := EveryOf{ms, minLen(ms)}
if mis, ok := MatchIndexers(ms); ok {
return IndexedEveryOf{e, mis}
}
return e
}
func (self *EveryOf) Add(m Matcher) error {
self.Matchers = append(self.Matchers, m)
return nil
func (e EveryOf) MinLen() (n int) {
return e.min
}
func (self EveryOf) Len() (l int) {
for _, m := range self.Matchers {
if ml := m.Len(); l > 0 {
l += ml
} else {
return -1
func (e EveryOf) Match(s string) bool {
for _, m := range e.ms {
if !m.Match(s) {
return false
}
}
return
return true
}
func (self EveryOf) Index(s string) (int, []int) {
func (e EveryOf) String() string {
return fmt.Sprintf("<every_of:[%s]>", e.ms)
}
type IndexedEveryOf struct {
EveryOf
ms []MatchIndexer
}
func (e IndexedEveryOf) Index(s string) (int, []int) {
var index int
var offset int
@ -39,7 +49,7 @@ func (self EveryOf) Index(s string) (int, []int) {
current := acquireSegments(len(s))
sub := s
for i, m := range self.Matchers {
for i, m := range e.ms {
idx, seg := m.Index(sub)
if idx == -1 {
releaseSegments(next)
@ -84,16 +94,6 @@ func (self EveryOf) Index(s string) (int, []int) {
return index, current
}
func (self EveryOf) Match(s string) bool {
for _, m := range self.Matchers {
if !m.Match(s) {
return false
}
}
return true
}
func (self EveryOf) String() string {
return fmt.Sprintf("<every_of:[%s]>", self.Matchers)
func (e IndexedEveryOf) String() string {
return fmt.Sprintf("<indexed_every_of:[%s]>", e.ms)
}

View File

@ -5,7 +5,7 @@ import (
"testing"
)
func TestEveryOfIndex(t *testing.T) {
func TestIndexedEveryOf(t *testing.T) {
for id, test := range []struct {
matchers Matchers
fixture string
@ -33,7 +33,7 @@ func TestEveryOfIndex(t *testing.T) {
[]int{2},
},
} {
everyOf := NewEveryOf(test.matchers...)
everyOf := NewEveryOf(test.matchers).(IndexedEveryOf)
index, segments := everyOf.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)

View File

@ -2,48 +2,47 @@ package match
import (
"fmt"
"github.com/gobwas/glob/util/runes"
"unicode/utf8"
"github.com/gobwas/glob/util/runes"
)
type List struct {
List []rune
Not bool
rs []rune
not bool
}
func NewList(list []rune, not bool) List {
return List{list, not}
func NewList(rs []rune, not bool) List {
return List{rs, not}
}
func (self List) Match(s string) bool {
func (l List) Match(s string) bool {
r, w := utf8.DecodeRuneInString(s)
if len(s) > w {
// Invalid rune.
return false
}
inList := runes.IndexRune(self.List, r) != -1
return inList == !self.Not
inList := runes.IndexRune(l.rs, r) != -1
return inList == !l.not
}
func (self List) Len() int {
return lenOne
func (l List) MinLen() int {
return 1
}
func (self List) Index(s string) (int, []int) {
func (l List) Index(s string) (int, []int) {
for i, r := range s {
if self.Not == (runes.IndexRune(self.List, r) == -1) {
if l.not == (runes.IndexRune(l.rs, r) == -1) {
return i, segmentsByRuneLength[utf8.RuneLen(r)]
}
}
return -1, nil
}
func (self List) String() string {
func (l List) String() string {
var not string
if self.Not {
if l.not {
not = "!"
}
return fmt.Sprintf("<list:%s[%s]>", not, string(self.List))
return fmt.Sprintf("<list:%s[%s]>", not, string(l.rs))
}

View File

@ -7,15 +7,50 @@ import (
"strings"
)
const lenOne = 1
const lenZero = 0
const lenNo = -1
type Matcher interface {
Match(string) bool
MinLen() int
}
type Indexer interface {
Index(string) (int, []int)
Len() int
String() string
}
type Sizer interface {
RunesCount() int
}
type MatchIndexer interface {
Matcher
Indexer
}
type MatchSizer interface {
Matcher
Sizer
}
type MatchIndexSizer interface {
Matcher
Indexer
Sizer
}
type Container interface {
Content() []Matcher
}
func MatchIndexers(ms []Matcher) ([]MatchIndexer, bool) {
for _, m := range ms {
if _, ok := m.(Indexer); !ok {
return nil, false
}
}
mis := make([]MatchIndexer, len(ms))
for i := range mis {
mis[i] = ms[i].(MatchIndexer)
}
return mis, true
}
type Matchers []Matcher

View File

@ -6,32 +6,31 @@ import (
)
type Max struct {
Limit int
n int
}
func NewMax(l int) Max {
return Max{l}
func NewMax(n int) Max {
return Max{n}
}
func (self Max) Match(s string) bool {
var l int
func (m Max) Match(s string) bool {
var n int
for range s {
l += 1
if l > self.Limit {
n += 1
if n > m.n {
return false
}
}
return true
}
func (self Max) Index(s string) (int, []int) {
segments := acquireSegments(self.Limit + 1)
func (m Max) Index(s string) (int, []int) {
segments := acquireSegments(m.n + 1)
segments = append(segments, 0)
var count int
for i, r := range s {
count++
if count > self.Limit {
if count > m.n {
break
}
segments = append(segments, i+utf8.RuneLen(r))
@ -40,10 +39,10 @@ func (self Max) Index(s string) (int, []int) {
return 0, segments
}
func (self Max) Len() int {
return lenNo
func (m Max) MinLen() int {
return 0
}
func (self Max) String() string {
return fmt.Sprintf("<max:%d>", self.Limit)
func (m Max) String() string {
return fmt.Sprintf("<max:%d>", m.n)
}

View File

@ -6,52 +6,48 @@ import (
)
type Min struct {
Limit int
n int
}
func NewMin(l int) Min {
return Min{l}
func NewMin(n int) Min {
return Min{n}
}
func (self Min) Match(s string) bool {
var l int
func (m Min) Match(s string) bool {
var n int
for range s {
l += 1
if l >= self.Limit {
n += 1
if n >= m.n {
return true
}
}
return false
}
func (self Min) Index(s string) (int, []int) {
func (m Min) Index(s string) (int, []int) {
var count int
c := len(s) - self.Limit + 1
c := len(s) - m.n + 1
if c <= 0 {
return -1, nil
}
segments := acquireSegments(c)
for i, r := range s {
count++
if count >= self.Limit {
if count >= m.n {
segments = append(segments, i+utf8.RuneLen(r))
}
}
if len(segments) == 0 {
return -1, nil
}
return 0, segments
}
func (self Min) Len() int {
return lenNo
func (m Min) MinLen() int {
return m.n
}
func (self Min) String() string {
return fmt.Sprintf("<min:%d>", self.Limit)
func (m Min) String() string {
return fmt.Sprintf("<min:%d>", m.n)
}

View File

@ -18,8 +18,12 @@ func (self Nothing) Index(s string) (int, []int) {
return 0, segments0
}
func (self Nothing) Len() int {
return lenZero
func (self Nothing) MinLen() int {
return 0
}
func (self Nothing) RunesCount() int {
return 0
}
func (self Nothing) String() string {

278
match/optimize.go Normal file
View File

@ -0,0 +1,278 @@
package match
import (
"fmt"
"gopkg.in/readline.v1/runes"
)
func Optimize(m Matcher) Matcher {
switch v := m.(type) {
case Any:
if len(v.sep) == 0 {
return NewSuper()
}
case Container:
ms := v.Content()
if len(ms) == 1 {
return ms[0]
}
return m
case List:
if v.not == false && len(v.rs) == 1 {
return NewText(string(v.rs))
}
return m
case Tree:
v.left = Optimize(v.left)
v.right = Optimize(v.right)
txt, ok := v.value.(Text)
if !ok {
return m
}
var (
leftNil = v.left == nil
rightNil = v.right == nil
)
if leftNil && rightNil {
return NewText(txt.s)
}
_, leftSuper := v.left.(Super)
lp, leftPrefix := v.left.(Prefix)
la, leftAny := v.left.(Any)
_, rightSuper := v.right.(Super)
rs, rightSuffix := v.right.(Suffix)
ra, rightAny := v.right.(Any)
switch {
case leftSuper && rightSuper:
return NewContains(txt.s)
case leftSuper && rightNil:
return NewSuffix(txt.s)
case rightSuper && leftNil:
return NewPrefix(txt.s)
case leftNil && rightSuffix:
return NewPrefixSuffix(txt.s, rs.s)
case rightNil && leftPrefix:
return NewPrefixSuffix(lp.s, txt.s)
case rightNil && leftAny:
return NewSuffixAny(txt.s, la.sep)
case leftNil && rightAny:
return NewPrefixAny(txt.s, ra.sep)
}
}
return m
}
func Compile(ms []Matcher) (Matcher, error) {
if len(ms) == 0 {
return nil, fmt.Errorf("compile error: need at least one matcher")
}
if len(ms) == 1 {
return ms[0], nil
}
if m := glueMatchers(ms); m != nil {
return m, nil
}
var (
idx = -1
maxLen = -2
indexer MatchIndexer
)
for i, m := range ms {
mi, ok := m.(MatchIndexer)
if !ok {
continue
}
if n := m.MinLen(); n > maxLen {
maxLen = n
idx = i
indexer = mi
}
}
if indexer == nil {
return nil, fmt.Errorf("can not index on matchers")
}
left := ms[:idx]
var right []Matcher
if len(ms) > idx+1 {
right = ms[idx+1:]
}
var l, r Matcher
var err error
if len(left) > 0 {
l, err = Compile(left)
if err != nil {
return nil, err
}
}
if len(right) > 0 {
r, err = Compile(right)
if err != nil {
return nil, err
}
}
return NewTree(indexer, l, r), nil
}
func glueMatchers(ms []Matcher) Matcher {
if m := glueMatchersAsEvery(ms); m != nil {
return m
}
if m := glueMatchersAsRow(ms); m != nil {
return m
}
return nil
}
func glueMatchersAsRow(ms []Matcher) Matcher {
if len(ms) <= 1 {
return nil
}
var s []MatchIndexSizer
for _, m := range ms {
rsz, ok := m.(MatchIndexSizer)
if !ok {
return nil
}
s = append(s, rsz)
}
return NewRow(s)
}
func glueMatchersAsEvery(ms []Matcher) Matcher {
if len(ms) <= 1 {
return nil
}
var (
hasAny bool
hasSuper bool
hasSingle bool
min int
separator []rune
)
for i, matcher := range ms {
var sep []rune
switch m := matcher.(type) {
case Super:
sep = []rune{}
hasSuper = true
case Any:
sep = m.sep
hasAny = true
case Single:
sep = m.sep
hasSingle = true
min++
case List:
if !m.not {
return nil
}
sep = m.rs
hasSingle = true
min++
default:
return nil
}
// initialize
if i == 0 {
separator = sep
}
if runes.Equal(sep, separator) {
continue
}
return nil
}
if hasSuper && !hasAny && !hasSingle {
return NewSuper()
}
if hasAny && !hasSuper && !hasSingle {
return NewAny(separator)
}
if (hasAny || hasSuper) && min > 0 && len(separator) == 0 {
return NewMin(min)
}
var every []Matcher
if min > 0 {
every = append(every, NewMin(min))
if !hasAny && !hasSuper {
every = append(every, NewMax(min))
}
}
if len(separator) > 0 {
every = append(every, NewAny(separator))
}
return NewEveryOf(every)
}
func Minimize(ms []Matcher) []Matcher {
var (
result Matcher
left int
right int
count int
)
for l := 0; l < len(ms); l++ {
for r := len(ms); r > l; r-- {
if glued := glueMatchers(ms[l:r]); glued != nil {
var swap bool
if result == nil {
swap = true
} else {
swap = glued.MinLen() > result.MinLen() || count < r-l
}
if swap {
result = glued
left = l
right = r
count = r - l
}
}
}
}
if result == nil {
return ms
}
next := append(append([]Matcher{}, ms[:left]...), result)
if right < len(ms) {
next = append(next, ms[right:]...)
}
if len(next) == len(ms) {
return next
}
return Minimize(next)
}

View File

@ -7,20 +7,24 @@ import (
)
type Prefix struct {
Prefix string
s string
minSize int
}
func NewPrefix(p string) Prefix {
return Prefix{p}
return Prefix{
s: p,
minSize: utf8.RuneCountInString(p),
}
}
func (self Prefix) Index(s string) (int, []int) {
idx := strings.Index(s, self.Prefix)
func (p Prefix) Index(s string) (int, []int) {
idx := strings.Index(s, p.s)
if idx == -1 {
return -1, nil
}
length := len(self.Prefix)
length := len(p.s)
var sub string
if len(s) > idx+length {
sub = s[idx+length:]
@ -37,14 +41,14 @@ func (self Prefix) Index(s string) (int, []int) {
return idx, segments
}
func (self Prefix) Len() int {
return lenNo
func (p Prefix) MinLen() int {
return p.minSize
}
func (self Prefix) Match(s string) bool {
return strings.HasPrefix(s, self.Prefix)
func (p Prefix) Match(s string) bool {
return strings.HasPrefix(s, p.s)
}
func (self Prefix) String() string {
return fmt.Sprintf("<prefix:%s>", self.Prefix)
func (p Prefix) String() string {
return fmt.Sprintf("<prefix:%s>", p.s)
}

View File

@ -5,27 +5,28 @@ import (
"strings"
"unicode/utf8"
sutil "github.com/gobwas/glob/util/strings"
"github.com/gobwas/glob/util/runes"
)
type PrefixAny struct {
Prefix string
Separators []rune
s string
sep []rune
minLen int
}
func NewPrefixAny(s string, sep []rune) PrefixAny {
return PrefixAny{s, sep}
return PrefixAny{s, sep, utf8.RuneCountInString(s)}
}
func (self PrefixAny) Index(s string) (int, []int) {
idx := strings.Index(s, self.Prefix)
func (p PrefixAny) Index(s string) (int, []int) {
idx := strings.Index(s, p.s)
if idx == -1 {
return -1, nil
}
n := len(self.Prefix)
n := len(p.s)
sub := s[idx+n:]
i := sutil.IndexAnyRunes(sub, self.Separators)
i := runes.IndexAnyRune(sub, p.sep)
if i > -1 {
sub = sub[:i]
}
@ -39,17 +40,17 @@ func (self PrefixAny) Index(s string) (int, []int) {
return idx, seg
}
func (self PrefixAny) Len() int {
return lenNo
func (p PrefixAny) MinLen() int {
return p.minLen
}
func (self PrefixAny) Match(s string) bool {
if !strings.HasPrefix(s, self.Prefix) {
func (p PrefixAny) Match(s string) bool {
if !strings.HasPrefix(s, p.s) {
return false
}
return sutil.IndexAnyRunes(s[len(self.Prefix):], self.Separators) == -1
return runes.IndexAnyRune(s[len(p.s):], p.sep) == -1
}
func (self PrefixAny) String() string {
return fmt.Sprintf("<prefix_any:%s![%s]>", self.Prefix, string(self.Separators))
func (p PrefixAny) String() string {
return fmt.Sprintf("<prefix_any:%s![%s]>", p.s, string(p.sep))
}

View File

@ -3,23 +3,27 @@ package match
import (
"fmt"
"strings"
"unicode/utf8"
)
type PrefixSuffix struct {
Prefix, Suffix string
p, s string
minLen int
}
func NewPrefixSuffix(p, s string) PrefixSuffix {
return PrefixSuffix{p, s}
pn := utf8.RuneCountInString(p)
sn := utf8.RuneCountInString(s)
return PrefixSuffix{p, s, pn + sn}
}
func (self PrefixSuffix) Index(s string) (int, []int) {
prefixIdx := strings.Index(s, self.Prefix)
func (ps PrefixSuffix) Index(s string) (int, []int) {
prefixIdx := strings.Index(s, ps.p)
if prefixIdx == -1 {
return -1, nil
}
suffixLen := len(self.Suffix)
suffixLen := len(ps.s)
if suffixLen <= 0 {
return prefixIdx, []int{len(s) - prefixIdx}
}
@ -30,7 +34,7 @@ func (self PrefixSuffix) Index(s string) (int, []int) {
segments := acquireSegments(len(s) - prefixIdx)
for sub := s[prefixIdx:]; ; {
suffixIdx := strings.LastIndex(sub, self.Suffix)
suffixIdx := strings.LastIndex(sub, ps.s)
if suffixIdx == -1 {
break
}
@ -49,14 +53,14 @@ func (self PrefixSuffix) Index(s string) (int, []int) {
return prefixIdx, segments
}
func (self PrefixSuffix) Len() int {
return lenNo
func (ps PrefixSuffix) Match(s string) bool {
return strings.HasPrefix(s, ps.p) && strings.HasSuffix(s, ps.s)
}
func (self PrefixSuffix) Match(s string) bool {
return strings.HasPrefix(s, self.Prefix) && strings.HasSuffix(s, self.Suffix)
func (ps PrefixSuffix) MinLen() int {
return ps.minLen
}
func (self PrefixSuffix) String() string {
return fmt.Sprintf("<prefix_suffix:[%s,%s]>", self.Prefix, self.Suffix)
func (ps PrefixSuffix) String() string {
return fmt.Sprintf("<prefix_suffix:[%s,%s]>", ps.p, ps.s)
}

View File

@ -14,8 +14,8 @@ func NewRange(lo, hi rune, not bool) Range {
return Range{lo, hi, not}
}
func (self Range) Len() int {
return lenOne
func (self Range) MinLen() int {
return 1
}
func (self Range) Match(s string) bool {

View File

@ -2,76 +2,72 @@ package match
import (
"fmt"
"unicode/utf8"
"github.com/gobwas/glob/util/runes"
)
type Row struct {
Matchers Matchers
RunesLength int
Segments []int
ms []MatchIndexSizer
runes int
seg []int
}
func NewRow(len int, m ...Matcher) Row {
func NewRow(ms []MatchIndexSizer) Row {
var r int
for _, m := range ms {
r += m.RunesCount()
}
return Row{
Matchers: Matchers(m),
RunesLength: len,
Segments: []int{len},
ms: ms,
runes: r,
seg: []int{r},
}
}
func (self Row) matchAll(s string) bool {
var idx int
for _, m := range self.Matchers {
length := m.Len()
var next, i int
for next = range s[idx:] {
i++
if i == length {
break
}
}
if i < length || !m.Match(s[idx:idx+next+1]) {
return false
}
idx += next + 1
func (r Row) Match(s string) bool {
if !runes.ExactlyRunesCount(s, r.runes) {
return false
}
return true
return r.matchAll(s)
}
func (self Row) lenOk(s string) bool {
var i int
for range s {
i++
if i > self.RunesLength {
return false
func (r Row) MinLen() int {
return r.runes
}
func (r Row) RunesCount() int {
return r.runes
}
func (r Row) Index(s string) (int, []int) {
for j := 0; j < len(s)-r.runes; {
i, _ := r.ms[0].Index(s[j:])
if i == -1 {
return -1, nil
}
}
return self.RunesLength == i
}
func (self Row) Match(s string) bool {
return self.lenOk(s) && self.matchAll(s)
}
func (self Row) Len() (l int) {
return self.RunesLength
}
func (self Row) Index(s string) (int, []int) {
for i := range s {
if len(s[i:]) < self.RunesLength {
break
}
if self.matchAll(s[i:]) {
return i, self.Segments
if r.matchAll(s[i:]) {
return j + i, r.seg
}
_, x := utf8.DecodeRuneInString(s[i:])
j += x
}
return -1, nil
}
func (self Row) String() string {
return fmt.Sprintf("<row_%d:[%s]>", self.RunesLength, self.Matchers)
func (r Row) String() string {
return fmt.Sprintf("<row_%d:[%s]>", r.runes, r.ms)
}
func (r Row) matchAll(s string) bool {
var i int
for _, m := range r.ms {
n := m.RunesCount()
sub := runes.Head(s[i:], n)
if !m.Match(sub) {
return false
}
i += len(sub)
}
return true
}

View File

@ -7,36 +7,33 @@ import (
func TestRowIndex(t *testing.T) {
for id, test := range []struct {
matchers Matchers
length int
matchers []MatchIndexSizer
fixture string
index int
segments []int
}{
{
Matchers{
[]MatchIndexSizer{
NewText("abc"),
NewText("def"),
NewSingle(nil),
},
7,
"qweabcdefghij",
3,
[]int{7},
},
{
Matchers{
[]MatchIndexSizer{
NewText("abc"),
NewText("def"),
NewSingle(nil),
},
7,
"abcd",
-1,
nil,
},
} {
p := NewRow(test.length, test.matchers...)
p := NewRow(test.matchers)
index, segments := p.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
@ -48,15 +45,11 @@ func TestRowIndex(t *testing.T) {
}
func BenchmarkRowIndex(b *testing.B) {
m := NewRow(
7,
Matchers{
NewText("abc"),
NewText("def"),
NewSingle(nil),
}...,
)
m := NewRow([]MatchIndexSizer{
NewText("abc"),
NewText("def"),
NewSingle(nil),
})
for i := 0; i < b.N; i++ {
_, s := m.Index(bench_pattern)
releaseSegments(s)
@ -64,15 +57,11 @@ func BenchmarkRowIndex(b *testing.B) {
}
func BenchmarkIndexRowParallel(b *testing.B) {
m := NewRow(
7,
Matchers{
NewText("abc"),
NewText("def"),
NewSingle(nil),
}...,
)
m := NewRow([]MatchIndexSizer{
NewText("abc"),
NewText("def"),
NewSingle(nil),
})
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_, s := m.Index(bench_pattern)

View File

@ -2,42 +2,45 @@ package match
import (
"fmt"
"github.com/gobwas/glob/util/runes"
"unicode/utf8"
"github.com/gobwas/glob/util/runes"
)
// single represents ?
type Single struct {
Separators []rune
sep []rune
}
func NewSingle(s []rune) Single {
return Single{s}
}
func (self Single) Match(s string) bool {
r, w := utf8.DecodeRuneInString(s)
if len(s) > w {
func (s Single) Match(v string) bool {
r, w := utf8.DecodeRuneInString(v)
if len(v) > w {
return false
}
return runes.IndexRune(self.Separators, r) == -1
return runes.IndexRune(s.sep, r) == -1
}
func (self Single) Len() int {
return lenOne
func (s Single) MinLen() int {
return 1
}
func (self Single) Index(s string) (int, []int) {
for i, r := range s {
if runes.IndexRune(self.Separators, r) == -1 {
func (s Single) RunesCount() int {
return 1
}
func (s Single) Index(v string) (int, []int) {
for i, r := range v {
if runes.IndexRune(s.sep, r) == -1 {
return i, segmentsByRuneLength[utf8.RuneLen(r)]
}
}
return -1, nil
}
func (self Single) String() string {
return fmt.Sprintf("<single:![%s]>", string(self.Separators))
func (s Single) String() string {
return fmt.Sprintf("<single:![%s]>", string(s.sep))
}

View File

@ -3,33 +3,34 @@ package match
import (
"fmt"
"strings"
"unicode/utf8"
)
type Suffix struct {
Suffix string
s string
minLen int
}
func NewSuffix(s string) Suffix {
return Suffix{s}
return Suffix{s, utf8.RuneCountInString(s)}
}
func (self Suffix) Len() int {
return lenNo
func (s Suffix) MinLen() int {
return s.minLen
}
func (self Suffix) Match(s string) bool {
return strings.HasSuffix(s, self.Suffix)
func (s Suffix) Match(v string) bool {
return strings.HasSuffix(v, s.s)
}
func (self Suffix) Index(s string) (int, []int) {
idx := strings.Index(s, self.Suffix)
func (s Suffix) Index(v string) (int, []int) {
idx := strings.Index(v, s.s)
if idx == -1 {
return -1, nil
}
return 0, []int{idx + len(self.Suffix)}
return 0, []int{idx + len(s.s)}
}
func (self Suffix) String() string {
return fmt.Sprintf("<suffix:%s>", self.Suffix)
func (s Suffix) String() string {
return fmt.Sprintf("<suffix:%s>", s.s)
}

View File

@ -3,41 +3,43 @@ package match
import (
"fmt"
"strings"
"unicode/utf8"
sutil "github.com/gobwas/glob/util/strings"
"github.com/gobwas/glob/util/runes"
)
type SuffixAny struct {
Suffix string
Separators []rune
s string
sep []rune
minLen int
}
func NewSuffixAny(s string, sep []rune) SuffixAny {
return SuffixAny{s, sep}
return SuffixAny{s, sep, utf8.RuneCountInString(s)}
}
func (self SuffixAny) Index(s string) (int, []int) {
idx := strings.Index(s, self.Suffix)
func (s SuffixAny) Index(v string) (int, []int) {
idx := strings.Index(v, s.s)
if idx == -1 {
return -1, nil
}
i := sutil.LastIndexAnyRunes(s[:idx], self.Separators) + 1
i := runes.LastIndexAnyRune(v[:idx], s.sep) + 1
return i, []int{idx + len(self.Suffix) - i}
return i, []int{idx + len(s.s) - i}
}
func (self SuffixAny) Len() int {
return lenNo
func (s SuffixAny) MinLen() int {
return s.minLen
}
func (self SuffixAny) Match(s string) bool {
if !strings.HasSuffix(s, self.Suffix) {
func (s SuffixAny) Match(v string) bool {
if !strings.HasSuffix(v, s.s) {
return false
}
return sutil.IndexAnyRunes(s[:len(s)-len(self.Suffix)], self.Separators) == -1
return runes.IndexAnyRune(v[:len(v)-len(s.s)], s.sep) == -1
}
func (self SuffixAny) String() string {
return fmt.Sprintf("<suffix_any:![%s]%s>", string(self.Separators), self.Suffix)
func (s SuffixAny) String() string {
return fmt.Sprintf("<suffix_any:![%s]%s>", string(s.sep), s.s)
}

View File

@ -10,24 +10,23 @@ func NewSuper() Super {
return Super{}
}
func (self Super) Match(s string) bool {
func (s Super) Match(_ string) bool {
return true
}
func (self Super) Len() int {
return lenNo
func (s Super) MinLen() int {
return 0
}
func (self Super) Index(s string) (int, []int) {
segments := acquireSegments(len(s) + 1)
for i := range s {
segments = append(segments, i)
func (s Super) Index(v string) (int, []int) {
seg := acquireSegments(len(v) + 1)
for i := range v {
seg = append(seg, i)
}
segments = append(segments, len(s))
return 0, segments
seg = append(seg, len(v))
return 0, seg
}
func (self Super) String() string {
func (s Super) String() string {
return fmt.Sprintf("<super>")
}

View File

@ -8,38 +8,45 @@ import (
// raw represents raw string to match
type Text struct {
Str string
RunesLength int
BytesLength int
Segments []int
s string
runes int
bytes int
seg []int
}
func NewText(s string) Text {
return Text{
Str: s,
RunesLength: utf8.RuneCountInString(s),
BytesLength: len(s),
Segments: []int{len(s)},
s: s,
runes: utf8.RuneCountInString(s),
bytes: len(s),
seg: []int{len(s)},
}
}
func (self Text) Match(s string) bool {
return self.Str == s
func (t Text) Match(s string) bool {
return t.s == s
}
func (self Text) Len() int {
return self.RunesLength
}
func (self Text) Index(s string) (int, []int) {
index := strings.Index(s, self.Str)
if index == -1 {
func (t Text) Index(s string) (int, []int) {
i := strings.Index(s, t.s)
if i == -1 {
return -1, nil
}
return index, self.Segments
return i, t.seg
}
func (self Text) String() string {
return fmt.Sprintf("<text:`%v`>", self.Str)
func (t Text) MinLen() int {
return t.runes
}
func (t Text) BytesCount() int {
return t.bytes
}
func (t Text) RunesCount() int {
return t.runes
}
func (t Text) String() string {
return fmt.Sprintf("<text:`%v`>", t.s)
}

154
match/tree.go Normal file
View File

@ -0,0 +1,154 @@
package match
import (
"fmt"
"unicode/utf8"
"github.com/gobwas/glob/util/runes"
)
type Tree struct {
value MatchIndexer
left Matcher
right Matcher
minLen int
runes int
vrunes int
lrunes int
rrunes int
}
type SizedTree struct {
Tree
}
func (st SizedTree) RunesCount() int {
return st.Tree.runes
}
func NewTree(v MatchIndexer, l, r Matcher) Matcher {
tree := Tree{
value: v,
left: l,
right: r,
}
tree.minLen = v.MinLen()
if l != nil {
tree.minLen += l.MinLen()
}
if r != nil {
tree.minLen += r.MinLen()
}
var (
ls, lsz = l.(Sizer)
rs, rsz = r.(Sizer)
vs, vsz = v.(Sizer)
)
if lsz {
tree.lrunes = ls.RunesCount()
} else {
tree.lrunes = -1
}
if rsz {
tree.rrunes = rs.RunesCount()
} else {
tree.rrunes = -1
}
if vsz {
tree.vrunes = vs.RunesCount()
} else {
tree.vrunes = -1
}
if vsz && lsz && rsz {
tree.runes = tree.vrunes + tree.lrunes + tree.rrunes
return SizedTree{tree}
}
tree.runes = -1
return tree
}
func (t Tree) MinLen() int {
return t.minLen
}
func (t Tree) Match(s string) (ok bool) {
enter()
logf("matching %q: %v", s, t)
defer func(s string) {
logf("result: %q -> %v", s, ok)
leave()
}(s)
offset, limit := t.offsetLimit(s)
q := s[offset : len(s)-limit]
logf("OFFSET/LIMIT: %d/%d %q of %q", offset, limit, q, s)
for len(q) >= t.vrunes {
// search for matching part in substring
index, segments := t.value.Index(q)
logf("INDEX #%d %q (%v)", index, q, t.value)
if index == -1 {
releaseSegments(segments)
return false
}
l := s[:offset+index]
var left bool
if t.left != nil {
left = t.left.Match(l)
} else {
left = l == ""
}
logf("LEFT %q %v", l, left)
if left {
for _, seg := range segments {
var (
right bool
)
r := s[offset+index+seg:]
if t.right != nil {
right = t.right.Match(r)
} else {
right = r == ""
}
logf("RIGHT %q %v", r, right)
if right {
releaseSegments(segments)
return true
}
}
}
_, x := utf8.DecodeRuneInString(q[index:])
releaseSegments(segments)
q = q[x:]
offset += x
logf("SLICED TO %q", q)
}
return false
}
// Retuns substring and offset/limit pair in bytes.
func (t Tree) offsetLimit(s string) (offset, limit int) {
n := utf8.RuneCountInString(s)
if t.runes > n {
return 0, 0
}
if n := t.lrunes; n > 0 {
offset = len(runes.Head(s, n))
}
if n := t.rrunes; n > 0 {
limit = len(runes.Tail(s, n))
}
return
}
func (t Tree) String() string {
return fmt.Sprintf(
"<btree:[%v<-%s->%v]>",
t.left, t.value, t.right,
)
}

94
match/tree_test.go Normal file
View File

@ -0,0 +1,94 @@
package match
import (
"fmt"
"testing"
)
func TestTree(t *testing.T) {
for _, test := range []struct {
tree Matcher
str string
exp bool
}{
{
NewTree(NewText("abc"), NewSuper(), NewSuper()),
"abc",
true,
},
{
NewTree(NewText("a"), NewSingle(nil), NewSingle(nil)),
"aaa",
true,
},
{
NewTree(NewText("b"), NewSingle(nil), nil),
"bbb",
false,
},
{
NewTree(
NewText("c"),
NewTree(
NewSingle(nil),
NewSuper(),
nil,
),
nil,
),
"abc",
true,
},
} {
t.Run("", func(t *testing.T) {
act := test.tree.Match(test.str)
if act != test.exp {
fmt.Println(Graphviz("NIL", test.tree))
t.Errorf("match %q error: act: %t; exp: %t", test.str, act, test.exp)
}
})
}
}
type fakeMatcher struct {
len int
segn int
name string
}
func (f *fakeMatcher) Match(string) bool {
return true
}
func (f *fakeMatcher) Index(s string) (int, []int) {
seg := make([]int, 0, f.segn)
for x := 0; x < f.segn; x++ {
seg = append(seg, f.segn)
}
return 0, seg
}
func (f *fakeMatcher) MinLen() int {
return f.len
}
func (f *fakeMatcher) String() string {
return f.name
}
func BenchmarkMatchTree(b *testing.B) {
l := &fakeMatcher{4, 3, "left_fake"}
r := &fakeMatcher{4, 3, "right_fake"}
v := &fakeMatcher{2, 3, "value_fake"}
// must be <= len(l + r + v)
fixture := "abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij"
bt := NewTree(v, l, r)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
bt.Match(fixture)
}
})
}

11
match/util.go Normal file
View File

@ -0,0 +1,11 @@
package match
func minLen(ms []Matcher) (min int) {
for i, m := range ms {
n := m.MinLen()
if i == 0 || n < min {
min = n
}
}
return min
}

165
syntax/ast/optimize.go Normal file
View File

@ -0,0 +1,165 @@
package ast
import (
"reflect"
)
// Minimize tries to apply some heuristics to minimize number of nodes in given
// t
func Minimize(t *Node) *Node {
switch t.Kind {
case KindAnyOf:
return minimizeAnyOf(t)
default:
return nil
}
}
// minimizeAnyOf tries to find common children of given node of AnyOf pattern
// it searches for common children from left and from right
// if any common children are found then it returns new optimized ast t
// else it returns nil
func minimizeAnyOf(t *Node) *Node {
if !SameKind(t.Children, KindPattern) {
return nil
}
commonLeft, commonRight := CommonChildren(t.Children)
commonLeftCount, commonRightCount := len(commonLeft), len(commonRight)
if commonLeftCount == 0 && commonRightCount == 0 { // there are no common parts
return nil
}
var result []*Node
if commonLeftCount > 0 {
result = append(result, NewNode(KindPattern, nil, commonLeft...))
}
var anyOf []*Node
for _, child := range t.Children {
reuse := child.Children[commonLeftCount : len(child.Children)-commonRightCount]
var node *Node
if len(reuse) == 0 {
// this pattern is completely reduced by commonLeft and commonRight patterns
// so it become nothing
node = NewNode(KindNothing, nil)
} else {
node = NewNode(KindPattern, nil, reuse...)
}
anyOf = AppendUnique(anyOf, node)
}
switch {
case len(anyOf) == 1 && anyOf[0].Kind != KindNothing:
result = append(result, anyOf[0])
case len(anyOf) > 1:
result = append(result, NewNode(KindAnyOf, nil, anyOf...))
}
if commonRightCount > 0 {
result = append(result, NewNode(KindPattern, nil, commonRight...))
}
return NewNode(KindPattern, nil, result...)
}
func CommonChildren(nodes []*Node) (commonLeft, commonRight []*Node) {
if len(nodes) <= 1 {
return
}
// find node that has least number of children
idx := OneWithLeastChildren(nodes)
if idx == -1 {
return
}
tree := nodes[idx]
treeLength := len(tree.Children)
// allocate max able size for rightCommon slice
// to get ability insert elements in reverse order (from end to start)
// without sorting
commonRight = make([]*Node, treeLength)
lastRight := treeLength // will use this to get results as commonRight[lastRight:]
var (
breakLeft bool
breakRight bool
commonTotal int
)
for i, j := 0, treeLength-1; commonTotal < treeLength && j >= 0 && !(breakLeft && breakRight); i, j = i+1, j-1 {
treeLeft := tree.Children[i]
treeRight := tree.Children[j]
for k := 0; k < len(nodes) && !(breakLeft && breakRight); k++ {
// skip least children node
if k == idx {
continue
}
restLeft := nodes[k].Children[i]
restRight := nodes[k].Children[j+len(nodes[k].Children)-treeLength]
breakLeft = breakLeft || !treeLeft.Equal(restLeft)
// disable searching for right common parts, if left part is already overlapping
breakRight = breakRight || (!breakLeft && j <= i)
breakRight = breakRight || !treeRight.Equal(restRight)
}
if !breakLeft {
commonTotal++
commonLeft = append(commonLeft, treeLeft)
}
if !breakRight {
commonTotal++
lastRight = j
commonRight[j] = treeRight
}
}
commonRight = commonRight[lastRight:]
return
}
func AppendUnique(target []*Node, val *Node) []*Node {
for _, n := range target {
if reflect.DeepEqual(n, val) {
return target
}
}
return append(target, val)
}
func SameKind(nodes []*Node, kind Kind) bool {
for _, n := range nodes {
if n.Kind != kind {
return false
}
}
return true
}
func OneWithLeastChildren(nodes []*Node) int {
min := -1
idx := -1
for i, n := range nodes {
if idx == -1 || (len(n.Children) < min) {
min = len(n.Children)
idx = i
}
}
return idx
}
func Equal(a, b []*Node) bool {
if len(a) != len(b) {
return false
}
for i, av := range a {
if !av.Equal(b[i]) {
return false
}
}
return true
}

126
syntax/ast/optimize_test.go Normal file
View File

@ -0,0 +1,126 @@
package ast
import (
"testing"
)
func TestCommonChildren(t *testing.T) {
for _, test := range []struct {
nodes []*Node
left []*Node
right []*Node
}{
{
nodes: []*Node{
NewNode(KindNothing, nil,
NewNode(KindText, Text{"a"}),
NewNode(KindText, Text{"z"}),
NewNode(KindText, Text{"c"}),
),
},
},
{
nodes: []*Node{
NewNode(KindNothing, nil,
NewNode(KindText, Text{"a"}),
NewNode(KindText, Text{"z"}),
NewNode(KindText, Text{"c"}),
),
NewNode(KindNothing, nil,
NewNode(KindText, Text{"a"}),
NewNode(KindText, Text{"b"}),
NewNode(KindText, Text{"c"}),
),
},
left: []*Node{
NewNode(KindText, Text{"a"}),
},
right: []*Node{
NewNode(KindText, Text{"c"}),
},
},
{
nodes: []*Node{
NewNode(KindNothing, nil,
NewNode(KindText, Text{"a"}),
NewNode(KindText, Text{"b"}),
NewNode(KindText, Text{"c"}),
NewNode(KindText, Text{"d"}),
),
NewNode(KindNothing, nil,
NewNode(KindText, Text{"a"}),
NewNode(KindText, Text{"b"}),
NewNode(KindText, Text{"c"}),
NewNode(KindText, Text{"c"}),
NewNode(KindText, Text{"d"}),
),
},
left: []*Node{
NewNode(KindText, Text{"a"}),
NewNode(KindText, Text{"b"}),
},
right: []*Node{
NewNode(KindText, Text{"c"}),
NewNode(KindText, Text{"d"}),
},
},
{
nodes: []*Node{
NewNode(KindNothing, nil,
NewNode(KindText, Text{"a"}),
NewNode(KindText, Text{"b"}),
NewNode(KindText, Text{"c"}),
),
NewNode(KindNothing, nil,
NewNode(KindText, Text{"a"}),
NewNode(KindText, Text{"b"}),
NewNode(KindText, Text{"b"}),
NewNode(KindText, Text{"c"}),
),
},
left: []*Node{
NewNode(KindText, Text{"a"}),
NewNode(KindText, Text{"b"}),
},
right: []*Node{
NewNode(KindText, Text{"c"}),
},
},
{
nodes: []*Node{
NewNode(KindNothing, nil,
NewNode(KindText, Text{"a"}),
NewNode(KindText, Text{"d"}),
),
NewNode(KindNothing, nil,
NewNode(KindText, Text{"a"}),
NewNode(KindText, Text{"d"}),
),
NewNode(KindNothing, nil,
NewNode(KindText, Text{"a"}),
NewNode(KindText, Text{"e"}),
),
},
left: []*Node{
NewNode(KindText, Text{"a"}),
},
right: []*Node{},
},
} {
t.Run("", func(t *testing.T) {
left, right := CommonChildren(test.nodes)
if !Equal(left, test.left) {
t.Errorf(
"left, right := commonChildren(); left = %v; want %v",
left, test.left,
)
}
if !Equal(right, test.right) {
t.Errorf(
"left, right := commonChildren(); right = %v; want %v",
right, test.right,
)
}
})
}
}

View File

@ -3,8 +3,9 @@ package ast
import (
"errors"
"fmt"
"github.com/gobwas/glob/syntax/lexer"
"unicode/utf8"
"github.com/gobwas/glob/syntax/lexer"
)
type Lexer interface {

View File

@ -1,5 +1,98 @@
package runes
import (
"strings"
"unicode/utf8"
)
func Head(s string, r int) string {
var i, m int
for i < len(s) {
_, n := utf8.DecodeRuneInString(s[i:])
i += n
m += 1
if m == r {
break
}
}
return s[:i]
}
func Tail(s string, r int) string {
var i, n int
for i = len(s); i >= 0; {
var ok bool
for j := 1; j <= 4 && i-j >= 0; j++ {
v, _ := utf8.DecodeRuneInString(s[i-j:])
if v != utf8.RuneError {
i -= j
n++
ok = true
break
}
}
if !ok || n == r {
return s[i:]
}
}
return s[i:]
}
func ExactlyRunesCount(s string, n int) bool {
var m int
for range s {
m++
if m > n {
return false
}
}
return m == n
}
func AtLeastRunesCount(s string, n int) bool {
var m int
for range s {
m++
if m >= n {
return true
}
}
return false
}
func IndexAnyRune(s string, rs []rune) int {
for _, r := range rs {
if i := strings.IndexRune(s, r); i != -1 {
return i
}
}
return -1
}
func LastIndexAnyRune(s string, rs []rune) int {
for _, r := range rs {
i := -1
if 0 <= r && r < utf8.RuneSelf {
i = strings.LastIndexByte(s, byte(r))
} else {
sub := s
for len(sub) > 0 {
j := strings.IndexRune(s, r)
if j == -1 {
break
}
i = j
sub = sub[i+1:]
}
}
if i != -1 {
return i
}
}
return -1
}
func Index(s, needle []rune) int {
ls, ln := len(s), len(needle)
@ -130,6 +223,7 @@ func IndexLastRune(s []rune, r rune) int {
}
func Equal(a, b []rune) bool {
// TODO use bytes.Equal with unsafe.
if len(a) == len(b) {
for i := 0; i < len(a); i++ {
if a[i] != b[i] {

View File

@ -1,39 +0,0 @@
package strings
import (
"strings"
"unicode/utf8"
)
func IndexAnyRunes(s string, rs []rune) int {
for _, r := range rs {
if i := strings.IndexRune(s, r); i != -1 {
return i
}
}
return -1
}
func LastIndexAnyRunes(s string, rs []rune) int {
for _, r := range rs {
i := -1
if 0 <= r && r < utf8.RuneSelf {
i = strings.LastIndexByte(s, byte(r))
} else {
sub := s
for len(sub) > 0 {
j := strings.IndexRune(s, r)
if j == -1 {
break
}
i = j
sub = sub[i+1:]
}
}
if i != -1 {
return i
}
}
return -1
}