This commit is contained in:
gobwas 2016-01-08 20:14:31 +03:00
parent dbda43cc7c
commit 76b6c27015
26 changed files with 1621 additions and 525 deletions

258
compiler.go Normal file
View File

@ -0,0 +1,258 @@
package glob
import (
"fmt"
"github.com/gobwas/glob/match"
)
func optimize(matcher match.Matcher) match.Matcher {
switch m := matcher.(type) {
case match.Any:
if m.Separators == "" {
return match.Super{}
}
case match.BTree:
m.Left = optimize(m.Left)
m.Right = optimize(m.Right)
r, ok := m.Value.(match.Raw)
if !ok {
return m
}
leftNil := m.Left == nil
rightNil := m.Right == nil
if leftNil && rightNil {
return match.Raw{r.Str}
}
_, leftSuper := m.Left.(match.Super)
lp, leftPrefix := m.Left.(match.Prefix)
_, rightSuper := m.Right.(match.Super)
rs, rightSuffix := m.Right.(match.Suffix)
if leftSuper && rightSuper {
return match.Contains{r.Str, false}
}
if leftSuper && rightNil {
return match.Suffix{r.Str}
}
if rightSuper && leftNil {
return match.Prefix{r.Str}
}
if leftNil && rightSuffix {
return match.Every{match.Matchers{match.Prefix{r.Str}, rs}}
}
if rightNil && leftPrefix {
return match.Every{match.Matchers{lp, match.Suffix{r.Str}}}
}
return m
}
return matcher
}
func glueMatchers(matchers []match.Matcher) match.Matcher {
switch len(matchers) {
case 0:
return nil
case 1:
return matchers[0]
}
var (
hasAny bool
hasSuper bool
hasSingle bool
min int
separator string
)
for i, matcher := range matchers {
var sep string
switch m := matcher.(type) {
case match.Super:
sep = ""
hasSuper = true
case match.Any:
sep = m.Separators
hasAny = true
case match.Single:
sep = m.Separators
hasSingle = true
min++
case match.List:
if !m.Not {
return nil
}
sep = m.List
hasSingle = true
min++
default:
return nil
}
// initialize
if i == 0 {
separator = sep
}
if sep == separator {
continue
}
return nil
}
if hasSuper && !hasAny && !hasSingle {
return match.Super{}
}
if hasAny && !hasSuper && !hasSingle {
return match.Any{separator}
}
if (hasAny || hasSuper) && min > 0 && separator == "" {
return match.Min{min}
}
every := match.Every{}
if min > 0 {
every.Add(match.Min{min})
if !hasAny && !hasSuper {
every.Add(match.Max{min})
}
}
if separator != "" {
every.Add(match.Contains{separator, true})
}
return every
}
func convertMatchers(matchers []match.Matcher) (match.Matcher, error) {
if m := glueMatchers(matchers); m != nil {
return m, nil
}
var (
val match.Primitive
idx int
)
for i, matcher := range matchers {
if p, ok := matcher.(match.Primitive); ok {
idx = i
val = p
if _, ok := matcher.(match.Raw); ok {
break
}
}
}
if val == nil {
return nil, fmt.Errorf("could not convert matchers %s: need at least one primitive", match.Matchers(matchers))
}
left := matchers[:idx]
var right []match.Matcher
if len(matchers) > idx+1 {
right = matchers[idx+1:]
}
tree := match.BTree{Value: val}
if len(left) > 0 {
l, err := convertMatchers(left)
if err != nil {
return nil, err
}
tree.Left = l
}
if len(right) > 0 {
r, err := convertMatchers(right)
if err != nil {
return nil, err
}
tree.Right = r
}
return tree, nil
}
func do(node node, s string) (m match.Matcher, err error) {
switch n := node.(type) {
case *nodeAnyOf, *nodePattern:
var matchers []match.Matcher
for _, desc := range node.children() {
m, err := do(desc, s)
if err != nil {
return nil, err
}
matchers = append(matchers, optimize(m))
}
if _, ok := node.(*nodeAnyOf); ok {
m = match.AnyOf{matchers}
} else {
m, err = convertMatchers(matchers)
if err != nil {
return nil, err
}
}
case *nodeList:
m = match.List{n.chars, n.not}
case *nodeRange:
m = match.Range{n.lo, n.hi, n.not}
case *nodeAny:
m = match.Any{s}
case *nodeSuper:
m = match.Super{}
case *nodeSingle:
m = match.Single{s}
case *nodeText:
m = match.Raw{n.text}
default:
return nil, fmt.Errorf("could not compile tree: unknown node type")
}
return optimize(m), nil
}
func compile(ast *nodePattern, s string) (Glob, error) {
g, err := do(ast, s)
if err != nil {
return nil, err
}
return g, nil
}

258
compiler_test.go Normal file
View File

@ -0,0 +1,258 @@
package glob
import (
"github.com/gobwas/glob/match"
"reflect"
"testing"
)
const separators = "."
func TestGlueMatchers(t *testing.T) {
for id, test := range []struct {
in []match.Matcher
exp match.Matcher
}{
{
[]match.Matcher{
match.Super{},
match.Single{},
},
match.Min{1},
},
{
[]match.Matcher{
match.Any{separators},
match.Single{separators},
},
match.Every{match.Matchers{
match.Min{1},
match.Contains{separators, true},
}},
},
{
[]match.Matcher{
match.Single{},
match.Single{},
match.Single{},
},
match.Every{match.Matchers{
match.Min{3},
match.Max{3},
}},
},
{
[]match.Matcher{
match.List{"a", true},
match.Any{"a"},
},
match.Every{match.Matchers{
match.Min{1},
match.Contains{"a", true},
}},
},
} {
act, err := convertMatchers(test.in)
if err != nil {
t.Errorf("#%d convert matchers error: %s", id, err)
continue
}
if !reflect.DeepEqual(act, test.exp) {
t.Errorf("#%d unexpected convert matchers result:\nact: %s;\nexp: %s", id, act, test.exp)
continue
}
}
}
func TestConvertMatchers(t *testing.T) {
for id, test := range []struct {
in []match.Matcher
exp match.Matcher
}{
{
[]match.Matcher{
match.Super{},
match.Single{separators},
match.Raw{"c"},
},
match.BTree{
Left: match.BTree{
Left: match.Super{},
Value: match.Single{separators},
},
Value: match.Raw{"c"},
},
},
{
[]match.Matcher{
match.Any{},
match.Raw{"c"},
match.Any{},
},
match.BTree{
Left: match.Any{},
Value: match.Raw{"c"},
Right: match.Any{},
},
},
} {
act, err := convertMatchers(test.in)
if err != nil {
t.Errorf("#%d convert matchers error: %s", id, err)
continue
}
if !reflect.DeepEqual(act, test.exp) {
t.Errorf("#%d unexpected convert matchers result:\nact: %s;\nexp: %s", id, act, test.exp)
continue
}
}
}
func pattern(nodes ...node) *nodePattern {
return &nodePattern{
nodeImpl: nodeImpl{
desc: nodes,
},
}
}
func anyOf(nodes ...node) *nodeAnyOf {
return &nodeAnyOf{
nodeImpl: nodeImpl{
desc: nodes,
},
}
}
func TestCompiler(t *testing.T) {
for id, test := range []struct {
ast *nodePattern
result Glob
sep string
}{
{
ast: pattern(&nodeText{text: "abc"}),
result: match.Raw{"abc"},
},
{
ast: pattern(&nodeAny{}),
sep: separators,
result: match.Any{separators},
},
{
ast: pattern(&nodeAny{}),
result: match.Super{},
},
{
ast: pattern(&nodeSuper{}),
result: match.Super{},
},
{
ast: pattern(&nodeSingle{}),
sep: separators,
result: match.Single{separators},
},
{
ast: pattern(&nodeRange{
lo: 'a',
hi: 'z',
not: true,
}),
result: match.Range{'a', 'z', true},
},
{
ast: pattern(&nodeList{
chars: "abc",
not: true,
}),
result: match.List{"abc", true},
},
{
ast: pattern(&nodeAny{}, &nodeSingle{}, &nodeSingle{}, &nodeSingle{}),
sep: separators,
result: match.Every{Matchers: match.Matchers{
match.Min{3},
match.Contains{separators, true},
}},
},
{
ast: pattern(&nodeAny{}, &nodeSingle{}, &nodeSingle{}, &nodeSingle{}),
result: match.Min{3},
},
{
ast: pattern(&nodeAny{}, &nodeText{text: "abc"}, &nodeSingle{}),
sep: separators,
result: match.BTree{
Left: match.Any{separators},
Value: match.Raw{"abc"},
Right: match.Single{separators},
},
},
{
ast: pattern(&nodeSuper{}, &nodeSingle{}, &nodeText{text: "abc"}, &nodeSingle{}),
sep: separators,
result: match.BTree{
Left: match.BTree{
Left: match.Super{},
Value: match.Single{separators},
},
Value: match.Raw{"abc"},
Right: match.Single{separators},
},
},
{
ast: pattern(&nodeAny{}, &nodeText{text: "abc"}),
result: match.Suffix{"abc"},
},
{
ast: pattern(&nodeText{text: "abc"}, &nodeAny{}),
result: match.Prefix{"abc"},
},
{
ast: pattern(&nodeText{text: "abc"}, &nodeAny{}, &nodeText{text: "def"}),
result: match.Every{match.Matchers{match.Prefix{"abc"}, match.Suffix{"def"}}},
},
{
ast: pattern(&nodeAny{}, &nodeAny{}, &nodeAny{}, &nodeText{text: "abc"}, &nodeAny{}, &nodeAny{}),
result: match.Contains{"abc", false},
},
{
ast: pattern(&nodeAny{}, &nodeAny{}, &nodeAny{}, &nodeText{text: "abc"}, &nodeAny{}, &nodeAny{}),
sep: separators,
result: match.BTree{Left: match.Any{separators}, Value: match.Raw{"abc"}, Right: match.Any{separators}},
},
{
ast: pattern(&nodeSuper{}, &nodeSingle{}, &nodeText{text: "abc"}, &nodeSuper{}, &nodeSingle{}),
result: match.BTree{
Left: match.Min{1},
Value: match.Raw{"abc"},
Right: match.Min{1},
},
},
{
ast: pattern(anyOf(&nodeText{text: "abc"})),
result: match.AnyOf{match.Matchers{
match.Raw{"abc"},
}},
},
{
ast: pattern(anyOf(pattern(anyOf(pattern(&nodeText{text: "abc"}))))),
result: match.AnyOf{match.Matchers{
match.AnyOf{match.Matchers{
match.Raw{"abc"},
}},
}},
},
} {
prog, err := compile(test.ast, test.sep)
if err != nil {
t.Errorf("compilation error: %s", err)
continue
}
if !reflect.DeepEqual(prog, test.result) {
t.Errorf("#%d results are not equal:\nexp: %s,\nact: %s", id, test.result, prog)
continue
}
}
}

170
glob.go
View File

@ -1,25 +1,6 @@
package glob package glob
import ( import "strings"
"fmt"
"github.com/gobwas/glob/match"
"strings"
)
const (
any = '*'
single = '?'
escape = '\\'
range_open = '['
range_close = ']'
)
const (
inside_range_not = '!'
inside_range_minus = '-'
)
var syntaxPhrases = string([]byte{any, single, escape, range_open, range_close})
// Glob represents compiled glob pattern. // Glob represents compiled glob pattern.
type Glob interface { type Glob interface {
@ -37,156 +18,25 @@ type Glob interface {
// `?` matches any single non-separator character // `?` matches any single non-separator character
// c matches character c (c != `*`, `**`, `?`, `\`) // c matches character c (c != `*`, `**`, `?`, `\`)
// `\` c matches character c // `\` c matches character c
func New(pattern string, separators ...string) (Glob, error) { func Compile(pattern string, separators ...string) (Glob, error) {
chunks, err := parse(pattern, strings.Join(separators, ""), state{}) ast, err := parse(newLexer(pattern))
if err != nil { if err != nil {
return nil, err return nil, err
} }
switch len(chunks) { matcher, err := compile(ast, strings.Join(separators, ""))
case 1:
return chunks[0].matcher, nil
case 2:
if chunks[0].matcher.Kind() == match.KindRaw && chunks[1].matcher.Kind() == match.KindMultipleSuper {
return &match.Prefix{chunks[0].str}, nil
}
if chunks[1].matcher.Kind() == match.KindRaw && chunks[0].matcher.Kind() == match.KindMultipleSuper {
return &match.Suffix{chunks[1].str}, nil
}
case 3:
if chunks[0].matcher.Kind() == match.KindRaw && chunks[1].matcher.Kind() == match.KindMultipleSuper && chunks[2].matcher.Kind() == match.KindRaw {
return &match.PrefixSuffix{chunks[0].str, chunks[2].str}, nil
}
}
var c []match.Matcher
for _, chunk := range chunks {
c = append(c, chunk.matcher)
}
return &match.Composite{c}, nil
}
// parse parsed given pattern into list of tokens
func parse(str string, sep string, st state) ([]token, error) {
if len(str) == 0 {
return st.tokens, nil
}
// if there are no syntax symbols - pattern is simple string
i := strings.IndexAny(str, syntaxPhrases)
if i == -1 {
return append(st.tokens, token{match.Raw{str}, str}), nil
}
c := string(str[i])
// if syntax symbol is not at the start of pattern - add raw part before it
if i > 0 {
st.tokens = append(st.tokens, token{match.Raw{str[0:i]}, str[0:i]})
}
// if we are in escape state
if st.escape {
st.tokens = append(st.tokens, token{match.Raw{c}, c})
st.escape = false
} else {
switch str[i] {
case range_open:
closed := indexByteNonEscaped(str, range_close, escape, 0)
if closed == -1 {
return nil, fmt.Errorf("'%s' should be closed with '%s'", string(range_open), string(range_close))
}
r := str[i+1 : closed]
g, err := parseRange(r)
if err != nil { if err != nil {
return nil, err return nil, err
} }
st.tokens = append(st.tokens, token{g, r})
if closed == len(str)-1 { return matcher, nil
return st.tokens, nil
}
return parse(str[closed+1:], sep, st)
case escape:
st.escape = true
case any:
if len(str) > i+1 && str[i+1] == any {
st.tokens = append(st.tokens, token{match.Any{}, c})
return parse(str[i+len(c)+1:], sep, st)
}
st.tokens = append(st.tokens, token{match.Any{sep}, c})
case single:
st.tokens = append(st.tokens, token{match.Single{sep}, c})
}
}
return parse(str[i+len(c):], sep, st)
} }
func parseRange(def string) (match.Matcher, error) { func MustCompile(pattern string, separators ...string) Glob {
var ( g, err := Compile(pattern, separators...)
not bool if err != nil {
esc bool panic(err)
minus bool
minusIndex int
b []byte
)
for i, c := range []byte(def) {
if esc {
b = append(b, c)
esc = false
continue
} }
switch c { return g
case inside_range_not:
if i == 0 {
not = true
}
case escape:
if i == len(def)-1 {
return nil, fmt.Errorf("there should be any character after '%s'", string(escape))
}
esc = true
case inside_range_minus:
minus = true
minusIndex = len(b)
default:
b = append(b, c)
}
}
if len(b) == 0 {
return nil, fmt.Errorf("range could not be empty")
}
def = string(b)
if minus {
r := []rune(def)
if len(r) != 2 || minusIndex != 1 {
return nil, fmt.Errorf("invalid range syntax: '%s' should be between two characters", string(inside_range_minus))
}
return &match.Between{r[0], r[1], not}, nil
}
return &match.RangeList{def, not}, nil
}
type token struct {
matcher match.Matcher
str string
}
type state struct {
escape bool
tokens []token
} }

View File

@ -1,6 +1,8 @@
package glob package glob
import ( import (
"github.com/gobwas/glob/match"
"reflect"
"testing" "testing"
) )
@ -20,7 +22,6 @@ const (
fixture_prefix_suffix = "abcdef" fixture_prefix_suffix = "abcdef"
) )
type test struct { type test struct {
pattern, match string pattern, match string
should bool should bool
@ -31,6 +32,32 @@ func glob(s bool, p, m string, d ...string) test {
return test{p, m, s, d} return test{p, m, s, d}
} }
func TestCompilePattern(t *testing.T) {
for id, test := range []struct {
pattern string
sep string
exp match.Matcher
}{
// {
// pattern: "[!a]*****",
// exp: match.Raw{"t"},
// },
} {
glob, err := Compile(test.pattern, test.sep)
if err != nil {
t.Errorf("#%d compile pattern error: %s", id, err)
continue
}
matcher := glob.(match.Matcher)
if !reflect.DeepEqual(test.exp, matcher) {
t.Errorf("#%d unexpected compilation:\nexp: %s\nact: %s", id, test.exp, matcher)
continue
}
}
}
func TestIndexByteNonEscaped(t *testing.T) { func TestIndexByteNonEscaped(t *testing.T) {
for _, test := range []struct { for _, test := range []struct {
s string s string
@ -109,7 +136,13 @@ func TestGlob(t *testing.T) {
glob(false, "*is", "this is a test"), glob(false, "*is", "this is a test"),
glob(false, "*no*", "this is a test"), glob(false, "*no*", "this is a test"),
glob(true, "[!a]*", "this is a test"), glob(true, "[!a]*", "this is a test3"),
// glob(true, "*abc", "abcabc"),
glob(true, "**abc", "abcabc"),
// glob(true, "???", "abc"),
// glob(true, "?*?", "abc"),
// glob(true, "?*?", "ac"),
glob(true, pattern_all, fixture_all), glob(true, pattern_all, fixture_all),
glob(true, pattern_plain, fixture_plain), glob(true, pattern_plain, fixture_plain),
@ -118,7 +151,7 @@ func TestGlob(t *testing.T) {
glob(true, pattern_suffix, fixture_prefix_suffix), glob(true, pattern_suffix, fixture_prefix_suffix),
glob(true, pattern_prefix_suffix, fixture_prefix_suffix), glob(true, pattern_prefix_suffix, fixture_prefix_suffix),
} { } {
g, err := New(test.pattern, test.delimiters...) g, err := Compile(test.pattern, test.delimiters...)
if err != nil { if err != nil {
t.Errorf("parsing pattern %q error: %s", test.pattern, err) t.Errorf("parsing pattern %q error: %s", test.pattern, err)
continue continue
@ -131,15 +164,14 @@ func TestGlob(t *testing.T) {
} }
} }
func BenchmarkParse(b *testing.B) { func BenchmarkParse(b *testing.B) {
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
New(pattern_all) Compile(pattern_all)
} }
} }
func BenchmarkAll(b *testing.B) { func BenchmarkAll(b *testing.B) {
m, _ := New(pattern_all) m, _ := Compile(pattern_all)
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
_ = m.Match(fixture_all) _ = m.Match(fixture_all)
@ -147,35 +179,35 @@ func BenchmarkAll(b *testing.B) {
} }
func BenchmarkMultiple(b *testing.B) { func BenchmarkMultiple(b *testing.B) {
m, _ := New(pattern_multiple) m, _ := Compile(pattern_multiple)
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
_ = m.Match(fixture_multiple) _ = m.Match(fixture_multiple)
} }
} }
func BenchmarkPlain(b *testing.B) { func BenchmarkPlain(b *testing.B) {
m, _ := New(pattern_plain) m, _ := Compile(pattern_plain)
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
_ = m.Match(fixture_plain) _ = m.Match(fixture_plain)
} }
} }
func BenchmarkPrefix(b *testing.B) { func BenchmarkPrefix(b *testing.B) {
m, _ := New(pattern_prefix) m, _ := Compile(pattern_prefix)
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
_ = m.Match(fixture_prefix_suffix) _ = m.Match(fixture_prefix_suffix)
} }
} }
func BenchmarkSuffix(b *testing.B) { func BenchmarkSuffix(b *testing.B) {
m, _ := New(pattern_suffix) m, _ := Compile(pattern_suffix)
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
_ = m.Match(fixture_prefix_suffix) _ = m.Match(fixture_prefix_suffix)
} }
} }
func BenchmarkPrefixSuffix(b *testing.B) { func BenchmarkPrefixSuffix(b *testing.B) {
m, _ := New(pattern_prefix_suffix) m, _ := Compile(pattern_prefix_suffix)
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
_ = m.Match(fixture_prefix_suffix) _ = m.Match(fixture_prefix_suffix)

241
lexer.go
View File

@ -6,6 +6,19 @@ import (
"unicode/utf8" "unicode/utf8"
) )
const (
char_any = '*'
char_separator = ','
char_single = '?'
char_escape = '\\'
char_range_open = '['
char_range_close = ']'
char_terms_open = '{'
char_terms_close = '}'
char_range_not = '!'
char_range_between = '-'
)
var eof rune = 0 var eof rune = 0
type stateFn func(*lexer) stateFn type stateFn func(*lexer) stateFn
@ -16,15 +29,19 @@ const (
item_eof itemType = iota item_eof itemType = iota
item_error item_error
item_text item_text
item_char
item_any item_any
item_super
item_single item_single
item_not
item_separator
item_range_open item_range_open
item_range_not
item_range_lo
item_range_minus
item_range_hi
item_range_chars
item_range_close item_range_close
item_range_lo
item_range_hi
item_range_between
item_terms_open
item_terms_close
) )
func (i itemType) String() string { func (i itemType) String() string {
@ -38,32 +55,44 @@ func (i itemType) String() string {
case item_text: case item_text:
return "text" return "text"
case item_char:
return "char"
case item_any: case item_any:
return "any" return "any"
case item_super:
return "super"
case item_single: case item_single:
return "single" return "single"
case item_not:
return "not"
case item_separator:
return "separator"
case item_range_open: case item_range_open:
return "range_open" return "range_open"
case item_range_not: case item_range_close:
return "range_not" return "range_close"
case item_range_lo: case item_range_lo:
return "range_lo" return "range_lo"
case item_range_minus:
return "range_minus"
case item_range_hi: case item_range_hi:
return "range_hi" return "range_hi"
case item_range_chars: case item_range_between:
return "range_chars" return "range_between"
case item_range_close: case item_terms_open:
return "range_close" return "terms_open"
case item_terms_close:
return "terms_close"
default: default:
return "undef" return "undef"
@ -85,6 +114,8 @@ type lexer struct {
pos int pos int
width int width int
runes int runes int
termScopes []int
termPhrases map[int]int
state stateFn state stateFn
items chan item items chan item
} }
@ -94,6 +125,7 @@ func newLexer(source string) *lexer {
input: source, input: source,
state: lexText, state: lexText,
items: make(chan item, 5), items: make(chan item, 5),
termPhrases: make(map[int]int),
} }
return l return l
} }
@ -105,6 +137,23 @@ func (l *lexer) run() {
close(l.items) close(l.items)
} }
func (l *lexer) nextItem() item {
for {
select {
case item := <-l.items:
return item
default:
if l.state == nil {
return item{t: item_eof}
}
l.state = l.state(l)
}
}
panic("something went wrong")
}
func (l *lexer) read() (r rune) { func (l *lexer) read() (r rune) {
if l.pos >= len(l.input) { if l.pos >= len(l.input) {
return eof return eof
@ -134,7 +183,9 @@ func (l *lexer) ignore() {
func (l *lexer) lookahead() rune { func (l *lexer) lookahead() rune {
r := l.read() r := l.read()
if r != eof {
l.unread() l.unread()
}
return r return r
} }
@ -153,7 +204,12 @@ func (l *lexer) acceptAll(valid string) {
} }
func (l *lexer) emit(t itemType) { func (l *lexer) emit(t itemType) {
if l.pos == len(l.input) {
l.items <- item{t, l.input[l.start:]}
} else {
l.items <- item{t, l.input[l.start:l.pos]} l.items <- item{t, l.input[l.start:l.pos]}
}
l.start = l.pos l.start = l.pos
l.runes = 0 l.runes = 0
l.width = 0 l.width = 0
@ -169,23 +225,6 @@ func (l *lexer) errorf(format string, args ...interface{}) {
l.items <- item{item_error, fmt.Sprintf(format, args...)} l.items <- item{item_error, fmt.Sprintf(format, args...)}
} }
func (l *lexer) nextItem() item {
for {
select {
case item := <-l.items:
return item
default:
if l.state == nil {
return item{t: item_eof}
}
l.state = l.state(l)
}
}
panic("something went wrong")
}
func lexText(l *lexer) stateFn { func lexText(l *lexer) stateFn {
for { for {
c := l.read() c := l.read()
@ -194,23 +233,55 @@ func lexText(l *lexer) stateFn {
} }
switch c { switch c {
case escape: case char_escape:
l.unread()
l.emitMaybe(item_text)
l.read()
l.ignore()
if l.read() == eof { if l.read() == eof {
l.errorf("unclosed '%s' character", string(escape)) l.errorf("unclosed '%s' character", string(char_escape))
return nil return nil
} }
case single:
case char_single:
l.unread() l.unread()
l.emitMaybe(item_text) l.emitMaybe(item_text)
return lexSingle return lexSingle
case any:
case char_any:
var n stateFn
if l.lookahead() == char_any {
n = lexSuper
} else {
n = lexAny
}
l.unread() l.unread()
l.emitMaybe(item_text) l.emitMaybe(item_text)
return lexAny return n
case range_open:
case char_range_open:
l.unread() l.unread()
l.emitMaybe(item_text) l.emitMaybe(item_text)
return lexRangeOpen return lexRangeOpen
case char_terms_open:
l.unread()
l.emitMaybe(item_text)
return lexTermsOpen
case char_terms_close:
l.unread()
l.emitMaybe(item_text)
return lexTermsClose
case char_separator:
l.unread()
l.emitMaybe(item_text)
return lexSeparator
} }
} }
@ -219,6 +290,11 @@ func lexText(l *lexer) stateFn {
l.emit(item_text) l.emit(item_text)
} }
if len(l.termScopes) != 0 {
l.errorf("invalid pattern syntax: unclosed terms")
return nil
}
l.emit(item_eof) l.emit(item_eof)
return nil return nil
@ -233,13 +309,13 @@ func lexInsideRange(l *lexer) stateFn {
} }
switch c { switch c {
case inside_range_not: case char_range_not:
// only first char makes sense // only first char makes sense
if l.pos-l.width == l.start { if l.pos-l.width == l.start {
l.emit(item_range_not) l.emit(item_not)
} }
case inside_range_minus: case char_range_between:
if l.runes != 2 { if l.runes != 2 {
l.errorf("unexpected length of lo char inside range") l.errorf("unexpected length of lo char inside range")
return nil return nil
@ -248,20 +324,14 @@ func lexInsideRange(l *lexer) stateFn {
l.reset() l.reset()
return lexRangeHiLo return lexRangeHiLo
case range_close: case char_range_close:
l.unread() l.unread()
l.emitMaybe(item_range_chars) l.emitMaybe(item_text)
return lexRangeClose return lexRangeClose
} }
} }
} }
func lexAny(l *lexer) stateFn {
l.pos += 1
l.emit(item_any)
return lexText
}
func lexRangeHiLo(l *lexer) stateFn { func lexRangeHiLo(l *lexer) stateFn {
start := l.start start := l.start
@ -273,15 +343,15 @@ func lexRangeHiLo(l *lexer) stateFn {
} }
switch c { switch c {
case inside_range_minus: case char_range_between:
if l.runes != 1 { if l.runes != 1 {
l.errorf("unexpected length of range: single character expected before minus") l.errorf("unexpected length of range: single character expected before minus")
return nil return nil
} }
l.emit(item_range_minus) l.emit(item_range_between)
case range_close: case char_range_close:
l.unread() l.unread()
if l.runes != 1 { if l.runes != 1 {
@ -307,12 +377,81 @@ func lexRangeHiLo(l *lexer) stateFn {
} }
} }
func lexAny(l *lexer) stateFn {
l.pos += 1
l.emit(item_any)
return lexText
}
func lexSuper(l *lexer) stateFn {
l.pos += 2
l.emit(item_super)
return lexText
}
func lexSingle(l *lexer) stateFn { func lexSingle(l *lexer) stateFn {
l.pos += 1 l.pos += 1
l.emit(item_single) l.emit(item_single)
return lexText return lexText
} }
func lexSeparator(l *lexer) stateFn {
if len(l.termScopes) == 0 {
l.errorf("syntax error: separator not inside terms list")
return nil
}
posOpen := l.termScopes[len(l.termScopes)-1]
if l.pos-posOpen == 1 {
l.errorf("syntax error: empty term before separator")
return nil
}
l.termPhrases[posOpen] += 1
l.pos += 1
l.emit(item_separator)
return lexText
}
func lexTermsOpen(l *lexer) stateFn {
l.termScopes = append(l.termScopes, l.pos)
l.pos += 1
l.emit(item_terms_open)
return lexText
}
func lexTermsClose(l *lexer) stateFn {
if len(l.termScopes) == 0 {
l.errorf("unexpected closing of terms: there is no opened terms")
return nil
}
lastOpen := len(l.termScopes) - 1
posOpen := l.termScopes[lastOpen]
// if it is empty term
if posOpen == l.pos-1 {
l.errorf("term could not be empty")
return nil
}
if l.termPhrases[posOpen] == 0 {
l.errorf("term must contain >1 phrases")
return nil
}
// cleanup
l.termScopes = l.termScopes[:lastOpen]
delete(l.termPhrases, posOpen)
l.pos += 1
l.emit(item_terms_close)
return lexText
}
func lexRangeOpen(l *lexer) stateFn { func lexRangeOpen(l *lexer) stateFn {
l.pos += 1 l.pos += 1
l.emit(item_range_open) l.emit(item_range_open)

View File

@ -5,7 +5,7 @@ import (
) )
func TestLexGood(t *testing.T) { func TestLexGood(t *testing.T) {
for _, test := range []struct { for id, test := range []struct {
pattern string pattern string
items []item items []item
}{ }{
@ -25,9 +25,9 @@ func TestLexGood(t *testing.T) {
}, },
}, },
{ {
pattern: "hello*", pattern: "hellof*",
items: []item{ items: []item{
item{item_text, "hello"}, item{item_text, "hellof"},
item{item_any, "*"}, item{item_any, "*"},
item{item_eof, ""}, item{item_eof, ""},
}, },
@ -36,8 +36,7 @@ func TestLexGood(t *testing.T) {
pattern: "hello**", pattern: "hello**",
items: []item{ items: []item{
item{item_text, "hello"}, item{item_text, "hello"},
item{item_any, "*"}, item{item_super, "**"},
item{item_any, "*"},
item{item_eof, ""}, item{item_eof, ""},
}, },
}, },
@ -46,7 +45,7 @@ func TestLexGood(t *testing.T) {
items: []item{ items: []item{
item{item_range_open, "["}, item{item_range_open, "["},
item{item_range_lo, "日"}, item{item_range_lo, "日"},
item{item_range_minus, "-"}, item{item_range_between, "-"},
item{item_range_hi, "語"}, item{item_range_hi, "語"},
item{item_range_close, "]"}, item{item_range_close, "]"},
item{item_eof, ""}, item{item_eof, ""},
@ -56,9 +55,9 @@ func TestLexGood(t *testing.T) {
pattern: "[!日-語]", pattern: "[!日-語]",
items: []item{ items: []item{
item{item_range_open, "["}, item{item_range_open, "["},
item{item_range_not, "!"}, item{item_not, "!"},
item{item_range_lo, "日"}, item{item_range_lo, "日"},
item{item_range_minus, "-"}, item{item_range_between, "-"},
item{item_range_hi, "語"}, item{item_range_hi, "語"},
item{item_range_close, "]"}, item{item_range_close, "]"},
item{item_eof, ""}, item{item_eof, ""},
@ -68,7 +67,7 @@ func TestLexGood(t *testing.T) {
pattern: "[日本語]", pattern: "[日本語]",
items: []item{ items: []item{
item{item_range_open, "["}, item{item_range_open, "["},
item{item_range_chars, "日本語"}, item{item_text, "日本語"},
item{item_range_close, "]"}, item{item_range_close, "]"},
item{item_eof, ""}, item{item_eof, ""},
}, },
@ -77,22 +76,59 @@ func TestLexGood(t *testing.T) {
pattern: "[!日本語]", pattern: "[!日本語]",
items: []item{ items: []item{
item{item_range_open, "["}, item{item_range_open, "["},
item{item_range_not, "!"}, item{item_not, "!"},
item{item_range_chars, "日本語"}, item{item_text, "日本語"},
item{item_range_close, "]"}, item{item_range_close, "]"},
item{item_eof, ""}, item{item_eof, ""},
}, },
}, },
{
pattern: "{a,b}",
items: []item{
item{item_terms_open, "{"},
item{item_text, "a"},
item{item_separator, ","},
item{item_text, "b"},
item{item_terms_close, "}"},
item{item_eof, ""},
},
},
{
pattern: "{[!日-語],*,?,{a,b,\\c}}",
items: []item{
item{item_terms_open, "{"},
item{item_range_open, "["},
item{item_not, "!"},
item{item_range_lo, "日"},
item{item_range_between, "-"},
item{item_range_hi, "語"},
item{item_range_close, "]"},
item{item_separator, ","},
item{item_any, "*"},
item{item_separator, ","},
item{item_single, "?"},
item{item_separator, ","},
item{item_terms_open, "{"},
item{item_text, "a"},
item{item_separator, ","},
item{item_text, "b"},
item{item_separator, ","},
item{item_text, "c"},
item{item_terms_close, "}"},
item{item_terms_close, "}"},
item{item_eof, ""},
},
},
} { } {
lexer := newLexer(test.pattern) lexer := newLexer(test.pattern)
for _, exp := range test.items { for i, exp := range test.items {
act := lexer.nextItem() act := lexer.nextItem()
if act.t != exp.t { if act.t != exp.t {
t.Errorf("wrong item type: exp: %v; act: %v (%s vs %s)", exp.t, act.t, exp, act) t.Errorf("#%d wrong %d-th item type: exp: %v; act: %v (%s vs %s)", id, i, exp.t, act.t, exp, act)
break break
} }
if act.s != exp.s { if act.s != exp.s {
t.Errorf("wrong item contents: exp: %q; act: %q (%s vs %s)", exp.s, act.s, exp, act) t.Errorf("#%d wrong %d-th item contents: exp: %q; act: %q (%s vs %s)", id, i, exp.s, act.s, exp, act)
break break
} }
} }

42
match/any.go Normal file
View File

@ -0,0 +1,42 @@
package match
import (
"fmt"
"strings"
)
type Any struct {
Separators string
}
func (self Any) Match(s string) bool {
return strings.IndexAny(s, self.Separators) == -1
}
func (self Any) Index(s string) (index, min, max int) {
index = -1
for i, r := range []rune(s) {
if strings.IndexRune(self.Separators, r) == -1 {
if index == -1 {
index = i
}
max++
continue
}
if index != -1 {
break
}
}
return
}
func (self Any) Kind() Kind {
return KindAny
}
func (self Any) String() string {
return fmt.Sprintf("[any:%s]", self.Separators)
}

31
match/any_of.go Normal file
View File

@ -0,0 +1,31 @@
package match
import (
"fmt"
)
type AnyOf struct {
Matchers Matchers
}
func (self *AnyOf) Add(m Matcher) {
self.Matchers = append(self.Matchers, m)
}
func (self AnyOf) Match(s string) bool {
for _, m := range self.Matchers {
if m.Match(s) {
return true
}
}
return false
}
func (self AnyOf) Kind() Kind {
return KindAnyOf
}
func (self AnyOf) String() string {
return fmt.Sprintf("[any_of:%s]", self.Matchers)
}

View File

@ -1,38 +0,0 @@
package match
import (
"fmt"
)
type Between struct {
Lo, Hi rune
Not bool
}
func (self Between) Kind() Kind {
return KindRangeBetween
}
func (self Between) Search(s string) (i int, l int, ok bool) {
if self.Match(s) {
return 0, len(s), true
}
return
}
func (self Between) Match(s string) bool {
r := []rune(s)
if (len(r) != 1) {
return false
}
inRange := r[0] >= self.Lo && r[0] <= self.Hi
return inRange == !self.Not
}
func (self Between) String() string {
return fmt.Sprintf("[range_between:%s-%s(%t)]", self.Lo, self.Hi, self.Not)
}

68
match/btree.go Normal file
View File

@ -0,0 +1,68 @@
package match
import (
"fmt"
)
type BTree struct {
Value Primitive
Left, Right Matcher
}
func (self BTree) Kind() Kind {
return KindBTree
}
func (self BTree) Match(s string) bool {
runes := []rune(s)
inputLen := len(runes)
for offset := 0; offset < inputLen; {
index, min, max := self.Value.Index(string(runes[offset:]))
if index == -1 {
return false
}
for length := min; length <= max; length++ {
var left, right bool
l := string(runes[:offset+index])
if self.Left != nil {
left = self.Left.Match(l)
} else {
left = l == ""
}
if !left {
break
}
var r string
// if there is no string for the right branch
if inputLen <= offset+index+length {
r = ""
} else {
r = string(runes[offset+index+length:])
}
if self.Right != nil {
right = self.Right.Match(r)
} else {
right = r == ""
}
if left && right {
return true
}
}
offset += index + 1
}
return false
}
func (self BTree) String() string {
return fmt.Sprintf("[btree:%s<-%s->%s]", self.Left, self.Value, self.Right)
}

46
match/btree_test.go Normal file
View File

@ -0,0 +1,46 @@
package match
import (
"testing"
)
func TestBTree(t *testing.T) {
for id, test := range []struct {
tree BTree
str string
exp bool
}{
{
BTree{Value: Raw{"abc"}, Left: Super{}, Right: Super{}},
"abc",
true,
},
{
BTree{Value: Raw{"a"}, Left: Single{}, Right: Single{}},
"aaa",
true,
},
{
BTree{Value: Raw{"b"}, Left: Single{}},
"bbb",
false,
},
{
BTree{
Left: BTree{
Left: Super{},
Value: Single{},
},
Value: Raw{"c"},
},
"abc",
true,
},
} {
act := test.tree.Match(test.str)
if act != test.exp {
t.Errorf("#%d match %q error: act: %t; exp: %t", id, test.str, act, test.exp)
continue
}
}
}

View File

@ -1,70 +0,0 @@
package match
import (
"strings"
"fmt"
)
// composite
type Composite struct {
Chunks []Matcher
}
func (self Composite) Kind() Kind {
return KindComposite
}
func (self Composite) Search(s string) (i int, l int, ok bool) {
if self.Match(s) {
return 0, len(s), true
}
return
}
func m(chunks []Matcher, s string) bool {
var prev Matcher
for _, c := range chunks {
if c.Kind() == KindRaw {
i, l, ok := c.Search(s)
if !ok {
return false
}
if prev != nil {
if !prev.Match(s[:i]) {
return false
}
prev = nil
}
s = s[i+l:]
continue
}
prev = c
}
if prev != nil {
return prev.Match(s)
}
return len(s) == 0
}
func (self Composite) Match(s string) bool {
return m(self.Chunks, s)
}
func (self Composite) String() string {
var l []string
for _, c := range self.Chunks {
l = append(l, fmt.Sprint(c))
}
return fmt.Sprintf("[composite:%s]", strings.Join(l, ","))
}

23
match/contains.go Normal file
View File

@ -0,0 +1,23 @@
package match
import (
"fmt"
"strings"
)
type Contains struct {
Needle string
Not bool
}
func (self Contains) Match(s string) bool {
return strings.Contains(s, self.Needle) != self.Not
}
func (self Contains) Kind() Kind {
return KindContains
}
func (self Contains) String() string {
return fmt.Sprintf("[contains:needle=%s not=%t]", self.Needle, self.Not)
}

31
match/every_of.go Normal file
View File

@ -0,0 +1,31 @@
package match
import (
"fmt"
)
type Every struct {
Matchers Matchers
}
func (self *Every) Add(m Matcher) {
self.Matchers = append(self.Matchers, m)
}
func (self Every) Match(s string) bool {
for _, m := range self.Matchers {
if !m.Match(s) {
return false
}
}
return true
}
func (self Every) Kind() Kind {
return KindEveryOf
}
func (self Every) String() string {
return fmt.Sprintf("[every_of:%s]", self.Matchers)
}

View File

@ -1,41 +1,39 @@
package match package match
import ( import (
"strings"
"fmt" "fmt"
"strings"
) )
type List struct {
type RangeList struct {
List string List string
Not bool Not bool
} }
func (self RangeList) Kind() Kind { func (self List) Kind() Kind {
return KindRangeList return KindList
} }
func (self RangeList) Search(s string) (i int, l int, ok bool) { func (self List) Match(s string) bool {
if self.Match(s) { if len([]rune(s)) != 1 {
return 0, len(s), true
}
return
}
func (self RangeList) Match(s string) bool {
r := []rune(s)
if (len(r) != 1) {
return false return false
} }
inList := strings.IndexRune(self.List, r[0]) >= 0 inList := strings.Index(self.List, s) != -1
return inList == !self.Not return inList == !self.Not
} }
func (self RangeList) String() string { func (self List) Index(s string) (index, min, max int) {
return fmt.Sprintf("[range_list:%s]", self.List) for i, r := range []rune(s) {
if self.Not == (strings.IndexRune(self.List, r) == -1) {
return i, 1, 1
}
}
return -1, 0, 0
}
func (self List) String() string {
return fmt.Sprintf("[list:list=%s not=%t]", self.List, self.Not)
} }

View File

@ -1,22 +1,47 @@
package match package match
import (
"fmt"
"strings"
)
type Kind int type Kind int
const(
// todo use String for Kind, and self.Kind() in every matcher.String()
const (
KindRaw Kind = iota KindRaw Kind = iota
KindMultipleSeparated KindEveryOf
KindMultipleSuper KindAnyOf
KindAny
KindSuper
KindSingle KindSingle
KindComposite KindComposition
KindPrefix KindPrefix
KindSuffix KindSuffix
KindPrefixSuffix KindPrefixSuffix
KindRangeBetween KindRange
KindRangeList KindList
KindMin
KindMax
KindBTree
KindContains
) )
type Matcher interface { type Matcher interface {
Match(string) bool Match(string) bool
Search(string) (int, int, bool) }
Kind() Kind
type Primitive interface {
Index(string) (int, int, int)
}
type Matchers []Matcher
func (m Matchers) String() string {
var s []string
for _, matcher := range m {
s = append(s, fmt.Sprint(matcher))
}
return fmt.Sprintf("matchers[%s]", strings.Join(s, ","))
} }

23
match/max.go Normal file
View File

@ -0,0 +1,23 @@
package match
import "fmt"
type Max struct {
Limit int
}
func (self Max) Match(s string) bool {
return len([]rune(s)) <= self.Limit
}
func (self Max) Search(s string) (int, int, bool) {
return 0, 0, false
}
func (self Max) Kind() Kind {
return KindMax
}
func (self Max) String() string {
return fmt.Sprintf("[max:%d]", self.Limit)
}

23
match/min.go Normal file
View File

@ -0,0 +1,23 @@
package match
import "fmt"
type Min struct {
Limit int
}
func (self Min) Match(s string) bool {
return len([]rune(s)) >= self.Limit
}
func (self Min) Search(s string) (int, int, bool) {
return 0, 0, false
}
func (self Min) Kind() Kind {
return KindMin
}
func (self Min) String() string {
return fmt.Sprintf("[min:%d]", self.Limit)
}

View File

@ -1,34 +0,0 @@
package match
import (
"fmt"
"strings"
)
type Any struct {
Separators string
}
func (self Any) Match(s string) bool {
return strings.IndexAny(s, self.Separators) == -1
}
func (self Any) Search(s string) (i, l int, ok bool) {
if self.Match(s) {
return 0, len(s), true
}
return
}
func (self Any) Kind() Kind {
if self.Separators == "" {
return KindMultipleSuper
} else {
return KindMultipleSeparated
}
}
func (self Any) String() string {
return fmt.Sprintf("[multiple:%s]", self.Separators)
}

View File

@ -1,21 +1,19 @@
package match package match
import ( import (
"strings"
"fmt" "fmt"
"strings"
) )
type PrefixSuffix struct { type PrefixSuffix struct {
Prefix, Suffix string Prefix, Suffix string
} }
func (self PrefixSuffix) kind() Kind { func (self PrefixSuffix) Kind() Kind {
return KindPrefixSuffix return KindPrefixSuffix
} }
func (self PrefixSuffix) search(s string) (i int, l int, ok bool) { func (self PrefixSuffix) Search(s string) (i int, l int, ok bool) {
if self.Match(s) { if self.Match(s) {
return 0, len(s), true return 0, len(s), true
} }
@ -27,7 +25,6 @@ func (self PrefixSuffix) Match(s string) bool {
return strings.HasPrefix(s, self.Prefix) && strings.HasSuffix(s, self.Suffix) return strings.HasPrefix(s, self.Prefix) && strings.HasSuffix(s, self.Suffix)
} }
func (self PrefixSuffix) String() string { func (self PrefixSuffix) String() string {
return fmt.Sprintf("[prefix_suffix:%s-%s]", self.Prefix, self.Suffix) return fmt.Sprintf("[prefix_suffix:%s-%s]", self.Prefix, self.Suffix)
} }

40
match/range.go Normal file
View File

@ -0,0 +1,40 @@
package match
import (
"fmt"
)
type Range struct {
Lo, Hi rune
Not bool
}
func (self Range) Kind() Kind {
return KindRange
}
func (self Range) Match(s string) bool {
r := []rune(s)
if len(r) != 1 {
return false
}
inRange := r[0] >= self.Lo && r[0] <= self.Hi
return inRange == !self.Not
}
func (self Range) Index(s string) (index, min, max int) {
for i, r := range []rune(s) {
if self.Not != (r >= self.Lo && r <= self.Hi) {
return i, 1, 1
}
}
return -1, 0, 0
}
func (self Range) String() string {
return fmt.Sprintf("[range_between:%s-%s(%t)]", self.Lo, self.Hi, self.Not)
}

View File

@ -1,8 +1,8 @@
package match package match
import ( import (
"strings"
"fmt" "fmt"
"strings"
) )
// raw represents raw string to match // raw represents raw string to match
@ -18,15 +18,14 @@ func (self Raw) Kind() Kind {
return KindRaw return KindRaw
} }
func (self Raw) Search(s string) (i int, l int, ok bool) { func (self Raw) Index(s string) (index, min, max int) {
index := strings.Index(s, self.Str) index = strings.Index(s, self.Str)
if index == -1 { if index == -1 {
return return
} }
i = index min = len(self.Str)
l = len(self.Str) max = min
ok = true
return return
} }

View File

@ -1,11 +1,10 @@
package match package match
import ( import (
"strings"
"fmt" "fmt"
"strings"
) )
// single represents ? // single represents ?
type Single struct { type Single struct {
Separators string Separators string
@ -15,19 +14,20 @@ func (self Single) Match(s string) bool {
return len([]rune(s)) == 1 && strings.IndexAny(s, self.Separators) == -1 return len([]rune(s)) == 1 && strings.IndexAny(s, self.Separators) == -1
} }
func (self Single) Search(s string) (i int, l int, ok bool) { func (self Single) Index(s string) (index, min, max int) {
if self.Match(s) { for i, c := range []rune(s) {
return 0, len(s), true if strings.IndexRune(self.Separators, c) == -1 {
return i, 1, 1
}
} }
return return -1, 0, 0
} }
func (self Single) Kind() Kind { func (self Single) Kind() Kind {
return KindSingle return KindSingle
} }
func (self Single) String() string { func (self Single) String() string {
return fmt.Sprintf("[single:%s]", self.Separators) return fmt.Sprintf("[single:%s]", self.Separators)
} }

23
match/super.go Normal file
View File

@ -0,0 +1,23 @@
package match
import (
"fmt"
)
type Super struct{}
func (self Super) Match(s string) bool {
return true
}
func (self Super) Index(s string) (index, min, max int) {
return 0, 0, len([]rune(s))
}
func (self Super) Kind() Kind {
return KindSuper
}
func (self Super) String() string {
return fmt.Sprintf("[super]")
}

233
parser.go
View File

@ -3,72 +3,154 @@ package glob
import ( import (
"errors" "errors"
"fmt" "fmt"
"github.com/gobwas/glob/match"
) )
func parseAll(source, separators string) ([]token, error) { type node interface {
lexer := newLexer(source) children() []node
append(node)
}
var tokens []token type nodeImpl struct {
for parser := parserMain; parser != nil; { desc []node
var err error }
tokens, parser, err = parser(lexer, separators)
func (n *nodeImpl) append(c node) {
n.desc = append(n.desc, c)
}
func (n *nodeImpl) children() []node {
return n.desc
}
type nodeList struct {
nodeImpl
not bool
chars string
}
type nodeRange struct {
nodeImpl
not bool
lo, hi rune
}
type nodeText struct {
nodeImpl
text string
}
type nodePattern struct{ nodeImpl }
type nodeAny struct{ nodeImpl }
type nodeSuper struct{ nodeImpl }
type nodeSingle struct{ nodeImpl }
type nodeAnyOf struct{ nodeImpl }
type tree struct {
root node
current node
path []node
}
func (t *tree) enter(c node) {
if t.root == nil {
t.root = c
t.current = c
return
}
t.current.append(c)
t.path = append(t.path, c)
t.current = c
}
func (t *tree) leave() {
if len(t.path)-1 <= 0 {
t.current = t.root
t.path = nil
return
}
t.path = t.path[:len(t.path)-1]
t.current = t.path[len(t.path)-1]
}
type parseFn func(*tree, *lexer) (parseFn, error)
func parse(lexer *lexer) (*nodePattern, error) {
var parser parseFn
root := &nodePattern{}
tree := &tree{}
tree.enter(root)
for parser = parserMain; ; {
next, err := parser(tree, lexer)
if err != nil { if err != nil {
return nil, err return nil, err
} }
}
return tokens, nil if next == nil {
}
type parseFn func(*lexer, string) ([]token, parseFn, error)
func parserMain(lexer *lexer, separators string) ([]token, parseFn, error) {
var (
prev *token
tokens []token
)
for item := lexer.nextItem(); ; {
var t token
if item.t == item_eof {
break break
} }
switch item.t { parser = next
case item_eof:
return tokens, nil, nil
case item_error:
return nil, nil, errors.New(item.s)
case item_text:
t = token{match.Raw{item.s}, item.s}
case item_any:
if prev != nil && prev.matcher.Kind() == match.KindMultipleSeparated {
// remove simple any and replace it with super_any
tokens = tokens[:len(tokens)-1]
t = token{match.Any{""}, item.s}
} else {
t = token{match.Any{separators}, item.s}
} }
case item_single: return root, nil
t = token{match.Single{separators}, item.s}
case item_range_open:
return tokens, parserRange, nil
}
prev = &t
}
return tokens, nil, nil
} }
func parserRange(lexer *lexer, separators string) ([]token, parseFn, error) { func parserMain(tree *tree, lexer *lexer) (parseFn, error) {
for stop := false; !stop; {
item := lexer.nextItem()
switch item.t {
case item_eof:
stop = true
continue
case item_error:
return nil, errors.New(item.s)
case item_text:
tree.current.append(&nodeText{text: item.s})
return parserMain, nil
case item_any:
tree.current.append(&nodeAny{})
return parserMain, nil
case item_super:
tree.current.append(&nodeSuper{})
return parserMain, nil
case item_single:
tree.current.append(&nodeSingle{})
return parserMain, nil
case item_range_open:
return parserRange, nil
case item_terms_open:
tree.enter(&nodeAnyOf{})
tree.enter(&nodePattern{})
return parserMain, nil
case item_separator:
tree.leave()
tree.enter(&nodePattern{})
return parserMain, nil
case item_terms_close:
tree.leave()
tree.leave()
return parserMain, nil
default:
return nil, fmt.Errorf("unexpected token: %s", item)
}
}
return nil, nil
}
func parserRange(tree *tree, lexer *lexer) (parseFn, error) {
var ( var (
not bool not bool
lo rune lo rune
@ -76,60 +158,67 @@ func parserRange(lexer *lexer, separators string) ([]token, parseFn, error) {
chars string chars string
) )
for item := lexer.nextItem(); ; { for {
item := lexer.nextItem()
switch item.t { switch item.t {
case item_eof: case item_eof:
return nil, nil, errors.New("unexpected end") return nil, errors.New("unexpected end")
case item_error: case item_error:
return nil, nil, errors.New(item.s) return nil, errors.New(item.s)
case item_range_not: case item_not:
not = true not = true
case item_range_lo: case item_range_lo:
r := []rune(item.s) r := []rune(item.s)
if len(r) != 1 { if len(r) != 1 {
return nil, nil, fmt.Errorf("unexpected length of lo character") return nil, fmt.Errorf("unexpected length of lo character")
} }
lo = r[0] lo = r[0]
case item_range_minus: case item_range_between:
// //
case item_range_hi: case item_range_hi:
r := []rune(item.s) r := []rune(item.s)
if len(r) != 1 { if len(r) != 1 {
return nil, nil, fmt.Errorf("unexpected length of hi character") return nil, fmt.Errorf("unexpected length of hi character")
}
if hi < lo {
return nil, nil, fmt.Errorf("hi character should be greater than lo")
} }
hi = r[0] hi = r[0]
case item_range_chars: if hi < lo {
return nil, fmt.Errorf("hi character '%s' should be greater than lo '%s'", string(hi), string(lo))
}
case item_text:
chars = item.s chars = item.s
case item_range_close: case item_range_close:
isRange := lo != 0 && hi != 0 isRange := lo != 0 && hi != 0
isChars := chars == "" isChars := chars != ""
if !(isChars != isRange) { if isChars == isRange {
return nil, nil, fmt.Errorf("parse error: unexpected lo, hi, chars in range") return nil, fmt.Errorf("could not parse range")
} }
if isRange { if isRange {
return []token{token{match.Between{lo, hi, not}, ""}}, parserMain, nil tree.current.append(&nodeRange{
lo: lo,
hi: hi,
not: not,
})
} else { } else {
if len(chars) == 0 { tree.current.append(&nodeList{
return nil, nil, fmt.Errorf("chars range should not be empty") chars: chars,
not: not,
})
} }
return []token{token{match.RangeList{chars, not}, ""}}, parserMain, nil return parserMain, nil
}
} }
} }
} }

View File

@ -1,12 +1,219 @@
package glob package glob
import ( import (
"fmt"
"reflect"
"testing" "testing"
) )
func TestParseString(t *testing.T) { func TestParseString(t *testing.T) {
// lexer := newLexer("hello") for id, test := range []struct {
// fmt.Println(lexer.nextItem()) pattern string
// fmt.Println(lexer.nextItem()) tree node
// fmt.Println(lexer.nextItem()) }{
{
pattern: "abc",
tree: &nodePattern{
nodeImpl: nodeImpl{
desc: []node{
&nodeText{text: "abc"},
},
},
},
},
{
pattern: "a*c",
tree: &nodePattern{
nodeImpl: nodeImpl{
desc: []node{
&nodeText{text: "a"},
&nodeAny{},
&nodeText{text: "c"},
},
},
},
},
{
pattern: "a**c",
tree: &nodePattern{
nodeImpl: nodeImpl{
desc: []node{
&nodeText{text: "a"},
&nodeSuper{},
&nodeText{text: "c"},
},
},
},
},
{
pattern: "a?c",
tree: &nodePattern{
nodeImpl: nodeImpl{
desc: []node{
&nodeText{text: "a"},
&nodeSingle{},
&nodeText{text: "c"},
},
},
},
},
{
pattern: "[!a-z]",
tree: &nodePattern{
nodeImpl: nodeImpl{
desc: []node{
&nodeRange{lo: 'a', hi: 'z', not: true},
},
},
},
},
{
pattern: "[az]",
tree: &nodePattern{
nodeImpl: nodeImpl{
desc: []node{
&nodeList{chars: "az"},
},
},
},
},
{
pattern: "{a,z}",
tree: &nodePattern{
nodeImpl: nodeImpl{
desc: []node{
&nodeAnyOf{nodeImpl: nodeImpl{desc: []node{
&nodePattern{
nodeImpl: nodeImpl{desc: []node{
&nodeText{text: "a"},
}},
},
&nodePattern{
nodeImpl: nodeImpl{desc: []node{
&nodeText{text: "z"},
}},
},
}}},
},
},
},
},
{
pattern: "{a,{x,y},?,[a-z],[!qwe]}",
tree: &nodePattern{
nodeImpl: nodeImpl{
desc: []node{
&nodeAnyOf{nodeImpl: nodeImpl{desc: []node{
&nodePattern{
nodeImpl: nodeImpl{desc: []node{
&nodeText{text: "a"},
}},
},
&nodePattern{
nodeImpl: nodeImpl{desc: []node{
&nodeAnyOf{nodeImpl: nodeImpl{desc: []node{
&nodePattern{
nodeImpl: nodeImpl{desc: []node{
&nodeText{text: "x"},
}},
},
&nodePattern{
nodeImpl: nodeImpl{desc: []node{
&nodeText{text: "y"},
}},
},
}}},
}},
},
&nodePattern{
nodeImpl: nodeImpl{desc: []node{
&nodeSingle{},
}},
},
&nodePattern{
nodeImpl: nodeImpl{
desc: []node{
&nodeRange{lo: 'a', hi: 'z', not: false},
},
},
},
&nodePattern{
nodeImpl: nodeImpl{
desc: []node{
&nodeList{chars: "qwe", not: true},
},
},
},
}}},
},
},
},
},
} {
pattern, err := parse(newLexer(test.pattern))
if err != nil {
t.Errorf("#%d %s", id, err)
continue
}
if !reflect.DeepEqual(test.tree, pattern) {
t.Errorf("#%d tries are not equal", id)
if err = nodeEqual(test.tree, pattern); err != nil {
t.Errorf("#%d %s", id, err)
continue
}
}
}
}
const abstractNodeImpl = "nodeImpl"
func nodeEqual(a, b node) error {
if (a == nil || b == nil) && a != b {
return fmt.Errorf("nodes are not equal: exp %s, act %s", a, b)
}
aValue, bValue := reflect.Indirect(reflect.ValueOf(a)), reflect.Indirect(reflect.ValueOf(b))
aType, bType := aValue.Type(), bValue.Type()
if aType != bType {
return fmt.Errorf("nodes are not equal: exp %s, act %s", aValue.Type(), bValue.Type())
}
for i := 0; i < aType.NumField(); i++ {
var eq bool
f := aType.Field(i).Name
if f == abstractNodeImpl {
continue
}
af, bf := aValue.FieldByName(f), bValue.FieldByName(f)
switch af.Kind() {
case reflect.String:
eq = af.String() == bf.String()
case reflect.Bool:
eq = af.Bool() == bf.Bool()
default:
eq = fmt.Sprint(af) == fmt.Sprint(bf)
}
if !eq {
return fmt.Errorf("nodes<%s> %q fields are not equal: exp %q, act %q", aType, f, af, bf)
}
}
for i, aDesc := range a.children() {
if len(b.children())-1 < i {
return fmt.Errorf("node does not have enough children (got %d children, wanted %d-th token)", len(b.children()), i)
}
bDesc := b.children()[i]
if err := nodeEqual(aDesc, bDesc); err != nil {
return err
}
}
return nil
} }