diff --git a/compiler.go b/compiler.go new file mode 100644 index 0000000..67fce2a --- /dev/null +++ b/compiler.go @@ -0,0 +1,258 @@ +package glob + +import ( + "fmt" + "github.com/gobwas/glob/match" +) + +func optimize(matcher match.Matcher) match.Matcher { + switch m := matcher.(type) { + + case match.Any: + if m.Separators == "" { + return match.Super{} + } + + case match.BTree: + m.Left = optimize(m.Left) + m.Right = optimize(m.Right) + + r, ok := m.Value.(match.Raw) + if !ok { + return m + } + + leftNil := m.Left == nil + rightNil := m.Right == nil + + if leftNil && rightNil { + return match.Raw{r.Str} + } + + _, leftSuper := m.Left.(match.Super) + lp, leftPrefix := m.Left.(match.Prefix) + + _, rightSuper := m.Right.(match.Super) + rs, rightSuffix := m.Right.(match.Suffix) + + if leftSuper && rightSuper { + return match.Contains{r.Str, false} + } + + if leftSuper && rightNil { + return match.Suffix{r.Str} + } + + if rightSuper && leftNil { + return match.Prefix{r.Str} + } + + if leftNil && rightSuffix { + return match.Every{match.Matchers{match.Prefix{r.Str}, rs}} + } + + if rightNil && leftPrefix { + return match.Every{match.Matchers{lp, match.Suffix{r.Str}}} + } + + return m + } + + return matcher +} + +func glueMatchers(matchers []match.Matcher) match.Matcher { + switch len(matchers) { + case 0: + return nil + case 1: + return matchers[0] + } + + var ( + hasAny bool + hasSuper bool + hasSingle bool + min int + separator string + ) + + for i, matcher := range matchers { + var sep string + switch m := matcher.(type) { + + case match.Super: + sep = "" + hasSuper = true + + case match.Any: + sep = m.Separators + hasAny = true + + case match.Single: + sep = m.Separators + hasSingle = true + min++ + + case match.List: + if !m.Not { + return nil + } + sep = m.List + hasSingle = true + min++ + + default: + return nil + } + + // initialize + if i == 0 { + separator = sep + } + + if sep == separator { + continue + } + + return nil + } + + if hasSuper && !hasAny && !hasSingle { + return match.Super{} + } + + if hasAny && !hasSuper && !hasSingle { + return match.Any{separator} + } + + if (hasAny || hasSuper) && min > 0 && separator == "" { + return match.Min{min} + } + + every := match.Every{} + + if min > 0 { + every.Add(match.Min{min}) + + if !hasAny && !hasSuper { + every.Add(match.Max{min}) + } + } + + if separator != "" { + every.Add(match.Contains{separator, true}) + } + + return every +} + +func convertMatchers(matchers []match.Matcher) (match.Matcher, error) { + if m := glueMatchers(matchers); m != nil { + return m, nil + } + + var ( + val match.Primitive + idx int + ) + + for i, matcher := range matchers { + if p, ok := matcher.(match.Primitive); ok { + idx = i + val = p + + if _, ok := matcher.(match.Raw); ok { + break + } + } + } + + if val == nil { + return nil, fmt.Errorf("could not convert matchers %s: need at least one primitive", match.Matchers(matchers)) + } + + left := matchers[:idx] + var right []match.Matcher + if len(matchers) > idx+1 { + right = matchers[idx+1:] + } + + tree := match.BTree{Value: val} + + if len(left) > 0 { + l, err := convertMatchers(left) + if err != nil { + return nil, err + } + + tree.Left = l + } + + if len(right) > 0 { + r, err := convertMatchers(right) + if err != nil { + return nil, err + } + + tree.Right = r + } + + return tree, nil +} + +func do(node node, s string) (m match.Matcher, err error) { + switch n := node.(type) { + + case *nodeAnyOf, *nodePattern: + var matchers []match.Matcher + for _, desc := range node.children() { + m, err := do(desc, s) + if err != nil { + return nil, err + } + matchers = append(matchers, optimize(m)) + } + + if _, ok := node.(*nodeAnyOf); ok { + m = match.AnyOf{matchers} + } else { + m, err = convertMatchers(matchers) + if err != nil { + return nil, err + } + } + + case *nodeList: + m = match.List{n.chars, n.not} + + case *nodeRange: + m = match.Range{n.lo, n.hi, n.not} + + case *nodeAny: + m = match.Any{s} + + case *nodeSuper: + m = match.Super{} + + case *nodeSingle: + m = match.Single{s} + + case *nodeText: + m = match.Raw{n.text} + + default: + return nil, fmt.Errorf("could not compile tree: unknown node type") + } + + return optimize(m), nil +} + +func compile(ast *nodePattern, s string) (Glob, error) { + g, err := do(ast, s) + if err != nil { + return nil, err + } + + return g, nil +} diff --git a/compiler_test.go b/compiler_test.go new file mode 100644 index 0000000..13efe01 --- /dev/null +++ b/compiler_test.go @@ -0,0 +1,258 @@ +package glob + +import ( + "github.com/gobwas/glob/match" + "reflect" + "testing" +) + +const separators = "." + +func TestGlueMatchers(t *testing.T) { + for id, test := range []struct { + in []match.Matcher + exp match.Matcher + }{ + { + []match.Matcher{ + match.Super{}, + match.Single{}, + }, + match.Min{1}, + }, + { + []match.Matcher{ + match.Any{separators}, + match.Single{separators}, + }, + match.Every{match.Matchers{ + match.Min{1}, + match.Contains{separators, true}, + }}, + }, + { + []match.Matcher{ + match.Single{}, + match.Single{}, + match.Single{}, + }, + match.Every{match.Matchers{ + match.Min{3}, + match.Max{3}, + }}, + }, + { + []match.Matcher{ + match.List{"a", true}, + match.Any{"a"}, + }, + match.Every{match.Matchers{ + match.Min{1}, + match.Contains{"a", true}, + }}, + }, + } { + act, err := convertMatchers(test.in) + if err != nil { + t.Errorf("#%d convert matchers error: %s", id, err) + continue + } + + if !reflect.DeepEqual(act, test.exp) { + t.Errorf("#%d unexpected convert matchers result:\nact: %s;\nexp: %s", id, act, test.exp) + continue + } + } +} + +func TestConvertMatchers(t *testing.T) { + for id, test := range []struct { + in []match.Matcher + exp match.Matcher + }{ + { + []match.Matcher{ + match.Super{}, + match.Single{separators}, + match.Raw{"c"}, + }, + match.BTree{ + Left: match.BTree{ + Left: match.Super{}, + Value: match.Single{separators}, + }, + Value: match.Raw{"c"}, + }, + }, + { + []match.Matcher{ + match.Any{}, + match.Raw{"c"}, + match.Any{}, + }, + match.BTree{ + Left: match.Any{}, + Value: match.Raw{"c"}, + Right: match.Any{}, + }, + }, + } { + act, err := convertMatchers(test.in) + if err != nil { + t.Errorf("#%d convert matchers error: %s", id, err) + continue + } + + if !reflect.DeepEqual(act, test.exp) { + t.Errorf("#%d unexpected convert matchers result:\nact: %s;\nexp: %s", id, act, test.exp) + continue + } + } +} + +func pattern(nodes ...node) *nodePattern { + return &nodePattern{ + nodeImpl: nodeImpl{ + desc: nodes, + }, + } +} +func anyOf(nodes ...node) *nodeAnyOf { + return &nodeAnyOf{ + nodeImpl: nodeImpl{ + desc: nodes, + }, + } +} +func TestCompiler(t *testing.T) { + for id, test := range []struct { + ast *nodePattern + result Glob + sep string + }{ + { + ast: pattern(&nodeText{text: "abc"}), + result: match.Raw{"abc"}, + }, + { + ast: pattern(&nodeAny{}), + sep: separators, + result: match.Any{separators}, + }, + { + ast: pattern(&nodeAny{}), + result: match.Super{}, + }, + { + ast: pattern(&nodeSuper{}), + result: match.Super{}, + }, + { + ast: pattern(&nodeSingle{}), + sep: separators, + result: match.Single{separators}, + }, + { + ast: pattern(&nodeRange{ + lo: 'a', + hi: 'z', + not: true, + }), + result: match.Range{'a', 'z', true}, + }, + { + ast: pattern(&nodeList{ + chars: "abc", + not: true, + }), + result: match.List{"abc", true}, + }, + { + ast: pattern(&nodeAny{}, &nodeSingle{}, &nodeSingle{}, &nodeSingle{}), + sep: separators, + result: match.Every{Matchers: match.Matchers{ + match.Min{3}, + match.Contains{separators, true}, + }}, + }, + { + ast: pattern(&nodeAny{}, &nodeSingle{}, &nodeSingle{}, &nodeSingle{}), + result: match.Min{3}, + }, + { + ast: pattern(&nodeAny{}, &nodeText{text: "abc"}, &nodeSingle{}), + sep: separators, + result: match.BTree{ + Left: match.Any{separators}, + Value: match.Raw{"abc"}, + Right: match.Single{separators}, + }, + }, + { + ast: pattern(&nodeSuper{}, &nodeSingle{}, &nodeText{text: "abc"}, &nodeSingle{}), + sep: separators, + result: match.BTree{ + Left: match.BTree{ + Left: match.Super{}, + Value: match.Single{separators}, + }, + Value: match.Raw{"abc"}, + Right: match.Single{separators}, + }, + }, + { + ast: pattern(&nodeAny{}, &nodeText{text: "abc"}), + result: match.Suffix{"abc"}, + }, + { + ast: pattern(&nodeText{text: "abc"}, &nodeAny{}), + result: match.Prefix{"abc"}, + }, + { + ast: pattern(&nodeText{text: "abc"}, &nodeAny{}, &nodeText{text: "def"}), + result: match.Every{match.Matchers{match.Prefix{"abc"}, match.Suffix{"def"}}}, + }, + { + ast: pattern(&nodeAny{}, &nodeAny{}, &nodeAny{}, &nodeText{text: "abc"}, &nodeAny{}, &nodeAny{}), + result: match.Contains{"abc", false}, + }, + { + ast: pattern(&nodeAny{}, &nodeAny{}, &nodeAny{}, &nodeText{text: "abc"}, &nodeAny{}, &nodeAny{}), + sep: separators, + result: match.BTree{Left: match.Any{separators}, Value: match.Raw{"abc"}, Right: match.Any{separators}}, + }, + { + ast: pattern(&nodeSuper{}, &nodeSingle{}, &nodeText{text: "abc"}, &nodeSuper{}, &nodeSingle{}), + result: match.BTree{ + Left: match.Min{1}, + Value: match.Raw{"abc"}, + Right: match.Min{1}, + }, + }, + { + ast: pattern(anyOf(&nodeText{text: "abc"})), + result: match.AnyOf{match.Matchers{ + match.Raw{"abc"}, + }}, + }, + { + ast: pattern(anyOf(pattern(anyOf(pattern(&nodeText{text: "abc"}))))), + result: match.AnyOf{match.Matchers{ + match.AnyOf{match.Matchers{ + match.Raw{"abc"}, + }}, + }}, + }, + } { + prog, err := compile(test.ast, test.sep) + if err != nil { + t.Errorf("compilation error: %s", err) + continue + } + + if !reflect.DeepEqual(prog, test.result) { + t.Errorf("#%d results are not equal:\nexp: %s,\nact: %s", id, test.result, prog) + continue + } + } +} diff --git a/glob.go b/glob.go index 0dd2859..f335d78 100644 --- a/glob.go +++ b/glob.go @@ -1,25 +1,6 @@ package glob -import ( - "fmt" - "github.com/gobwas/glob/match" - "strings" -) - -const ( - any = '*' - single = '?' - escape = '\\' - range_open = '[' - range_close = ']' -) - -const ( - inside_range_not = '!' - inside_range_minus = '-' -) - -var syntaxPhrases = string([]byte{any, single, escape, range_open, range_close}) +import "strings" // Glob represents compiled glob pattern. type Glob interface { @@ -37,156 +18,25 @@ type Glob interface { // `?` matches any single non-separator character // c matches character c (c != `*`, `**`, `?`, `\`) // `\` c matches character c -func New(pattern string, separators ...string) (Glob, error) { - chunks, err := parse(pattern, strings.Join(separators, ""), state{}) +func Compile(pattern string, separators ...string) (Glob, error) { + ast, err := parse(newLexer(pattern)) if err != nil { return nil, err } - switch len(chunks) { - case 1: - return chunks[0].matcher, nil - case 2: - if chunks[0].matcher.Kind() == match.KindRaw && chunks[1].matcher.Kind() == match.KindMultipleSuper { - return &match.Prefix{chunks[0].str}, nil - } - if chunks[1].matcher.Kind() == match.KindRaw && chunks[0].matcher.Kind() == match.KindMultipleSuper { - return &match.Suffix{chunks[1].str}, nil - } - case 3: - if chunks[0].matcher.Kind() == match.KindRaw && chunks[1].matcher.Kind() == match.KindMultipleSuper && chunks[2].matcher.Kind() == match.KindRaw { - return &match.PrefixSuffix{chunks[0].str, chunks[2].str}, nil - } + matcher, err := compile(ast, strings.Join(separators, "")) + if err != nil { + return nil, err } - var c []match.Matcher - for _, chunk := range chunks { - c = append(c, chunk.matcher) - } - - return &match.Composite{c}, nil + return matcher, nil } -// parse parsed given pattern into list of tokens -func parse(str string, sep string, st state) ([]token, error) { - if len(str) == 0 { - return st.tokens, nil +func MustCompile(pattern string, separators ...string) Glob { + g, err := Compile(pattern, separators...) + if err != nil { + panic(err) } - // if there are no syntax symbols - pattern is simple string - i := strings.IndexAny(str, syntaxPhrases) - if i == -1 { - return append(st.tokens, token{match.Raw{str}, str}), nil - } - - c := string(str[i]) - - // if syntax symbol is not at the start of pattern - add raw part before it - if i > 0 { - st.tokens = append(st.tokens, token{match.Raw{str[0:i]}, str[0:i]}) - } - - // if we are in escape state - if st.escape { - st.tokens = append(st.tokens, token{match.Raw{c}, c}) - st.escape = false - } else { - switch str[i] { - case range_open: - closed := indexByteNonEscaped(str, range_close, escape, 0) - if closed == -1 { - return nil, fmt.Errorf("'%s' should be closed with '%s'", string(range_open), string(range_close)) - } - - r := str[i+1 : closed] - g, err := parseRange(r) - if err != nil { - return nil, err - } - st.tokens = append(st.tokens, token{g, r}) - - if closed == len(str)-1 { - return st.tokens, nil - } - - return parse(str[closed+1:], sep, st) - - case escape: - st.escape = true - case any: - if len(str) > i+1 && str[i+1] == any { - st.tokens = append(st.tokens, token{match.Any{}, c}) - return parse(str[i+len(c)+1:], sep, st) - } - - st.tokens = append(st.tokens, token{match.Any{sep}, c}) - case single: - st.tokens = append(st.tokens, token{match.Single{sep}, c}) - } - } - - return parse(str[i+len(c):], sep, st) -} - -func parseRange(def string) (match.Matcher, error) { - var ( - not bool - esc bool - minus bool - minusIndex int - b []byte - ) - - for i, c := range []byte(def) { - if esc { - b = append(b, c) - esc = false - continue - } - - switch c { - case inside_range_not: - if i == 0 { - not = true - } - case escape: - if i == len(def)-1 { - return nil, fmt.Errorf("there should be any character after '%s'", string(escape)) - } - - esc = true - case inside_range_minus: - minus = true - minusIndex = len(b) - default: - b = append(b, c) - } - } - - if len(b) == 0 { - return nil, fmt.Errorf("range could not be empty") - } - - def = string(b) - - if minus { - r := []rune(def) - if len(r) != 2 || minusIndex != 1 { - return nil, fmt.Errorf("invalid range syntax: '%s' should be between two characters", string(inside_range_minus)) - } - - return &match.Between{r[0], r[1], not}, nil - } - - return &match.RangeList{def, not}, nil -} - -type token struct { - matcher match.Matcher - str string -} - -type state struct { - escape bool - tokens []token + return g } diff --git a/glob_test.go b/glob_test.go index 8e36ae1..b09035e 100644 --- a/glob_test.go +++ b/glob_test.go @@ -1,6 +1,8 @@ package glob import ( + "github.com/gobwas/glob/match" + "reflect" "testing" ) @@ -14,13 +16,12 @@ const ( pattern_multiple = "https://*.google.*" fixture_multiple = "https://account.google.com" - pattern_prefix = "abc*" - pattern_suffix = "*def" + pattern_prefix = "abc*" + pattern_suffix = "*def" pattern_prefix_suffix = "ab*ef" fixture_prefix_suffix = "abcdef" ) - type test struct { pattern, match string should bool @@ -31,11 +32,37 @@ func glob(s bool, p, m string, d ...string) test { return test{p, m, s, d} } +func TestCompilePattern(t *testing.T) { + for id, test := range []struct { + pattern string + sep string + exp match.Matcher + }{ + // { + // pattern: "[!a]*****", + // exp: match.Raw{"t"}, + // }, + } { + glob, err := Compile(test.pattern, test.sep) + if err != nil { + t.Errorf("#%d compile pattern error: %s", id, err) + continue + } + + matcher := glob.(match.Matcher) + + if !reflect.DeepEqual(test.exp, matcher) { + t.Errorf("#%d unexpected compilation:\nexp: %s\nact: %s", id, test.exp, matcher) + continue + } + } +} + func TestIndexByteNonEscaped(t *testing.T) { for _, test := range []struct { - s string + s string n, e byte - i int + i int }{ { "\\n_n", @@ -109,7 +136,13 @@ func TestGlob(t *testing.T) { glob(false, "*is", "this is a test"), glob(false, "*no*", "this is a test"), - glob(true, "[!a]*", "this is a test"), + glob(true, "[!a]*", "this is a test3"), + + // glob(true, "*abc", "abcabc"), + glob(true, "**abc", "abcabc"), + // glob(true, "???", "abc"), + // glob(true, "?*?", "abc"), + // glob(true, "?*?", "ac"), glob(true, pattern_all, fixture_all), glob(true, pattern_plain, fixture_plain), @@ -118,7 +151,7 @@ func TestGlob(t *testing.T) { glob(true, pattern_suffix, fixture_prefix_suffix), glob(true, pattern_prefix_suffix, fixture_prefix_suffix), } { - g, err := New(test.pattern, test.delimiters...) + g, err := Compile(test.pattern, test.delimiters...) if err != nil { t.Errorf("parsing pattern %q error: %s", test.pattern, err) continue @@ -131,15 +164,14 @@ func TestGlob(t *testing.T) { } } - func BenchmarkParse(b *testing.B) { for i := 0; i < b.N; i++ { - New(pattern_all) + Compile(pattern_all) } } func BenchmarkAll(b *testing.B) { - m, _ := New(pattern_all) + m, _ := Compile(pattern_all) for i := 0; i < b.N; i++ { _ = m.Match(fixture_all) @@ -147,37 +179,37 @@ func BenchmarkAll(b *testing.B) { } func BenchmarkMultiple(b *testing.B) { - m, _ := New(pattern_multiple) + m, _ := Compile(pattern_multiple) for i := 0; i < b.N; i++ { _ = m.Match(fixture_multiple) } } func BenchmarkPlain(b *testing.B) { - m, _ := New(pattern_plain) + m, _ := Compile(pattern_plain) for i := 0; i < b.N; i++ { _ = m.Match(fixture_plain) } } func BenchmarkPrefix(b *testing.B) { - m, _ := New(pattern_prefix) + m, _ := Compile(pattern_prefix) for i := 0; i < b.N; i++ { _ = m.Match(fixture_prefix_suffix) } } func BenchmarkSuffix(b *testing.B) { - m, _ := New(pattern_suffix) + m, _ := Compile(pattern_suffix) for i := 0; i < b.N; i++ { _ = m.Match(fixture_prefix_suffix) } } func BenchmarkPrefixSuffix(b *testing.B) { - m, _ := New(pattern_prefix_suffix) + m, _ := Compile(pattern_prefix_suffix) for i := 0; i < b.N; i++ { _ = m.Match(fixture_prefix_suffix) } -} \ No newline at end of file +} diff --git a/lexer.go b/lexer.go index 76685c9..0747e9e 100644 --- a/lexer.go +++ b/lexer.go @@ -6,6 +6,19 @@ import ( "unicode/utf8" ) +const ( + char_any = '*' + char_separator = ',' + char_single = '?' + char_escape = '\\' + char_range_open = '[' + char_range_close = ']' + char_terms_open = '{' + char_terms_close = '}' + char_range_not = '!' + char_range_between = '-' +) + var eof rune = 0 type stateFn func(*lexer) stateFn @@ -16,15 +29,19 @@ const ( item_eof itemType = iota item_error item_text + item_char item_any + item_super item_single + item_not + item_separator item_range_open - item_range_not - item_range_lo - item_range_minus - item_range_hi - item_range_chars item_range_close + item_range_lo + item_range_hi + item_range_between + item_terms_open + item_terms_close ) func (i itemType) String() string { @@ -38,32 +55,44 @@ func (i itemType) String() string { case item_text: return "text" + case item_char: + return "char" + case item_any: return "any" + case item_super: + return "super" + case item_single: return "single" + case item_not: + return "not" + + case item_separator: + return "separator" + case item_range_open: return "range_open" - case item_range_not: - return "range_not" + case item_range_close: + return "range_close" case item_range_lo: return "range_lo" - case item_range_minus: - return "range_minus" - case item_range_hi: return "range_hi" - case item_range_chars: - return "range_chars" + case item_range_between: + return "range_between" - case item_range_close: - return "range_close" + case item_terms_open: + return "terms_open" + + case item_terms_close: + return "terms_close" default: return "undef" @@ -80,20 +109,23 @@ func (i item) String() string { } type lexer struct { - input string - start int - pos int - width int - runes int - state stateFn - items chan item + input string + start int + pos int + width int + runes int + termScopes []int + termPhrases map[int]int + state stateFn + items chan item } func newLexer(source string) *lexer { l := &lexer{ - input: source, - state: lexText, - items: make(chan item, 5), + input: source, + state: lexText, + items: make(chan item, 5), + termPhrases: make(map[int]int), } return l } @@ -105,6 +137,23 @@ func (l *lexer) run() { close(l.items) } +func (l *lexer) nextItem() item { + for { + select { + case item := <-l.items: + return item + default: + if l.state == nil { + return item{t: item_eof} + } + + l.state = l.state(l) + } + } + + panic("something went wrong") +} + func (l *lexer) read() (r rune) { if l.pos >= len(l.input) { return eof @@ -134,7 +183,9 @@ func (l *lexer) ignore() { func (l *lexer) lookahead() rune { r := l.read() - l.unread() + if r != eof { + l.unread() + } return r } @@ -153,7 +204,12 @@ func (l *lexer) acceptAll(valid string) { } func (l *lexer) emit(t itemType) { - l.items <- item{t, l.input[l.start:l.pos]} + if l.pos == len(l.input) { + l.items <- item{t, l.input[l.start:]} + } else { + l.items <- item{t, l.input[l.start:l.pos]} + } + l.start = l.pos l.runes = 0 l.width = 0 @@ -169,23 +225,6 @@ func (l *lexer) errorf(format string, args ...interface{}) { l.items <- item{item_error, fmt.Sprintf(format, args...)} } -func (l *lexer) nextItem() item { - for { - select { - case item := <-l.items: - return item - default: - if l.state == nil { - return item{t: item_eof} - } - - l.state = l.state(l) - } - } - - panic("something went wrong") -} - func lexText(l *lexer) stateFn { for { c := l.read() @@ -194,23 +233,55 @@ func lexText(l *lexer) stateFn { } switch c { - case escape: + case char_escape: + l.unread() + l.emitMaybe(item_text) + + l.read() + l.ignore() + if l.read() == eof { - l.errorf("unclosed '%s' character", string(escape)) + l.errorf("unclosed '%s' character", string(char_escape)) return nil } - case single: + + case char_single: l.unread() l.emitMaybe(item_text) return lexSingle - case any: + + case char_any: + var n stateFn + if l.lookahead() == char_any { + n = lexSuper + } else { + n = lexAny + } + l.unread() l.emitMaybe(item_text) - return lexAny - case range_open: + return n + + case char_range_open: l.unread() l.emitMaybe(item_text) return lexRangeOpen + + case char_terms_open: + l.unread() + l.emitMaybe(item_text) + return lexTermsOpen + + case char_terms_close: + l.unread() + l.emitMaybe(item_text) + return lexTermsClose + + case char_separator: + l.unread() + l.emitMaybe(item_text) + return lexSeparator + } } @@ -219,6 +290,11 @@ func lexText(l *lexer) stateFn { l.emit(item_text) } + if len(l.termScopes) != 0 { + l.errorf("invalid pattern syntax: unclosed terms") + return nil + } + l.emit(item_eof) return nil @@ -233,13 +309,13 @@ func lexInsideRange(l *lexer) stateFn { } switch c { - case inside_range_not: + case char_range_not: // only first char makes sense if l.pos-l.width == l.start { - l.emit(item_range_not) + l.emit(item_not) } - case inside_range_minus: + case char_range_between: if l.runes != 2 { l.errorf("unexpected length of lo char inside range") return nil @@ -248,20 +324,14 @@ func lexInsideRange(l *lexer) stateFn { l.reset() return lexRangeHiLo - case range_close: + case char_range_close: l.unread() - l.emitMaybe(item_range_chars) + l.emitMaybe(item_text) return lexRangeClose } } } -func lexAny(l *lexer) stateFn { - l.pos += 1 - l.emit(item_any) - return lexText -} - func lexRangeHiLo(l *lexer) stateFn { start := l.start @@ -273,15 +343,15 @@ func lexRangeHiLo(l *lexer) stateFn { } switch c { - case inside_range_minus: + case char_range_between: if l.runes != 1 { l.errorf("unexpected length of range: single character expected before minus") return nil } - l.emit(item_range_minus) + l.emit(item_range_between) - case range_close: + case char_range_close: l.unread() if l.runes != 1 { @@ -307,12 +377,81 @@ func lexRangeHiLo(l *lexer) stateFn { } } +func lexAny(l *lexer) stateFn { + l.pos += 1 + l.emit(item_any) + return lexText +} + +func lexSuper(l *lexer) stateFn { + l.pos += 2 + l.emit(item_super) + return lexText +} + func lexSingle(l *lexer) stateFn { l.pos += 1 l.emit(item_single) return lexText } +func lexSeparator(l *lexer) stateFn { + if len(l.termScopes) == 0 { + l.errorf("syntax error: separator not inside terms list") + return nil + } + + posOpen := l.termScopes[len(l.termScopes)-1] + + if l.pos-posOpen == 1 { + l.errorf("syntax error: empty term before separator") + return nil + } + + l.termPhrases[posOpen] += 1 + l.pos += 1 + l.emit(item_separator) + return lexText +} + +func lexTermsOpen(l *lexer) stateFn { + l.termScopes = append(l.termScopes, l.pos) + l.pos += 1 + l.emit(item_terms_open) + + return lexText +} + +func lexTermsClose(l *lexer) stateFn { + if len(l.termScopes) == 0 { + l.errorf("unexpected closing of terms: there is no opened terms") + return nil + } + + lastOpen := len(l.termScopes) - 1 + posOpen := l.termScopes[lastOpen] + + // if it is empty term + if posOpen == l.pos-1 { + l.errorf("term could not be empty") + return nil + } + + if l.termPhrases[posOpen] == 0 { + l.errorf("term must contain >1 phrases") + return nil + } + + // cleanup + l.termScopes = l.termScopes[:lastOpen] + delete(l.termPhrases, posOpen) + + l.pos += 1 + l.emit(item_terms_close) + + return lexText +} + func lexRangeOpen(l *lexer) stateFn { l.pos += 1 l.emit(item_range_open) diff --git a/lexer_test.go b/lexer_test.go index 63fc5b1..44d18dc 100644 --- a/lexer_test.go +++ b/lexer_test.go @@ -5,7 +5,7 @@ import ( ) func TestLexGood(t *testing.T) { - for _, test := range []struct { + for id, test := range []struct { pattern string items []item }{ @@ -25,9 +25,9 @@ func TestLexGood(t *testing.T) { }, }, { - pattern: "hello*", + pattern: "hellof*", items: []item{ - item{item_text, "hello"}, + item{item_text, "hellof"}, item{item_any, "*"}, item{item_eof, ""}, }, @@ -36,8 +36,7 @@ func TestLexGood(t *testing.T) { pattern: "hello**", items: []item{ item{item_text, "hello"}, - item{item_any, "*"}, - item{item_any, "*"}, + item{item_super, "**"}, item{item_eof, ""}, }, }, @@ -46,7 +45,7 @@ func TestLexGood(t *testing.T) { items: []item{ item{item_range_open, "["}, item{item_range_lo, "日"}, - item{item_range_minus, "-"}, + item{item_range_between, "-"}, item{item_range_hi, "語"}, item{item_range_close, "]"}, item{item_eof, ""}, @@ -56,9 +55,9 @@ func TestLexGood(t *testing.T) { pattern: "[!日-語]", items: []item{ item{item_range_open, "["}, - item{item_range_not, "!"}, + item{item_not, "!"}, item{item_range_lo, "日"}, - item{item_range_minus, "-"}, + item{item_range_between, "-"}, item{item_range_hi, "語"}, item{item_range_close, "]"}, item{item_eof, ""}, @@ -68,7 +67,7 @@ func TestLexGood(t *testing.T) { pattern: "[日本語]", items: []item{ item{item_range_open, "["}, - item{item_range_chars, "日本語"}, + item{item_text, "日本語"}, item{item_range_close, "]"}, item{item_eof, ""}, }, @@ -77,22 +76,59 @@ func TestLexGood(t *testing.T) { pattern: "[!日本語]", items: []item{ item{item_range_open, "["}, - item{item_range_not, "!"}, - item{item_range_chars, "日本語"}, + item{item_not, "!"}, + item{item_text, "日本語"}, item{item_range_close, "]"}, item{item_eof, ""}, }, }, + { + pattern: "{a,b}", + items: []item{ + item{item_terms_open, "{"}, + item{item_text, "a"}, + item{item_separator, ","}, + item{item_text, "b"}, + item{item_terms_close, "}"}, + item{item_eof, ""}, + }, + }, + { + pattern: "{[!日-語],*,?,{a,b,\\c}}", + items: []item{ + item{item_terms_open, "{"}, + item{item_range_open, "["}, + item{item_not, "!"}, + item{item_range_lo, "日"}, + item{item_range_between, "-"}, + item{item_range_hi, "語"}, + item{item_range_close, "]"}, + item{item_separator, ","}, + item{item_any, "*"}, + item{item_separator, ","}, + item{item_single, "?"}, + item{item_separator, ","}, + item{item_terms_open, "{"}, + item{item_text, "a"}, + item{item_separator, ","}, + item{item_text, "b"}, + item{item_separator, ","}, + item{item_text, "c"}, + item{item_terms_close, "}"}, + item{item_terms_close, "}"}, + item{item_eof, ""}, + }, + }, } { lexer := newLexer(test.pattern) - for _, exp := range test.items { + for i, exp := range test.items { act := lexer.nextItem() if act.t != exp.t { - t.Errorf("wrong item type: exp: %v; act: %v (%s vs %s)", exp.t, act.t, exp, act) + t.Errorf("#%d wrong %d-th item type: exp: %v; act: %v (%s vs %s)", id, i, exp.t, act.t, exp, act) break } if act.s != exp.s { - t.Errorf("wrong item contents: exp: %q; act: %q (%s vs %s)", exp.s, act.s, exp, act) + t.Errorf("#%d wrong %d-th item contents: exp: %q; act: %q (%s vs %s)", id, i, exp.s, act.s, exp, act) break } } diff --git a/match/any.go b/match/any.go new file mode 100644 index 0000000..174f810 --- /dev/null +++ b/match/any.go @@ -0,0 +1,42 @@ +package match + +import ( + "fmt" + "strings" +) + +type Any struct { + Separators string +} + +func (self Any) Match(s string) bool { + return strings.IndexAny(s, self.Separators) == -1 +} + +func (self Any) Index(s string) (index, min, max int) { + index = -1 + + for i, r := range []rune(s) { + if strings.IndexRune(self.Separators, r) == -1 { + if index == -1 { + index = i + } + max++ + continue + } + + if index != -1 { + break + } + } + + return +} + +func (self Any) Kind() Kind { + return KindAny +} + +func (self Any) String() string { + return fmt.Sprintf("[any:%s]", self.Separators) +} diff --git a/match/any_of.go b/match/any_of.go new file mode 100644 index 0000000..21d66ee --- /dev/null +++ b/match/any_of.go @@ -0,0 +1,31 @@ +package match + +import ( + "fmt" +) + +type AnyOf struct { + Matchers Matchers +} + +func (self *AnyOf) Add(m Matcher) { + self.Matchers = append(self.Matchers, m) +} + +func (self AnyOf) Match(s string) bool { + for _, m := range self.Matchers { + if m.Match(s) { + return true + } + } + + return false +} + +func (self AnyOf) Kind() Kind { + return KindAnyOf +} + +func (self AnyOf) String() string { + return fmt.Sprintf("[any_of:%s]", self.Matchers) +} diff --git a/match/between.go b/match/between.go deleted file mode 100644 index 1c3e0f0..0000000 --- a/match/between.go +++ /dev/null @@ -1,38 +0,0 @@ -package match - -import ( - "fmt" -) - -type Between struct { - Lo, Hi rune - Not bool -} - -func (self Between) Kind() Kind { - return KindRangeBetween -} - -func (self Between) Search(s string) (i int, l int, ok bool) { - if self.Match(s) { - return 0, len(s), true - } - - return -} - -func (self Between) Match(s string) bool { - r := []rune(s) - - if (len(r) != 1) { - return false - } - - inRange := r[0] >= self.Lo && r[0] <= self.Hi - - return inRange == !self.Not -} - -func (self Between) String() string { - return fmt.Sprintf("[range_between:%s-%s(%t)]", self.Lo, self.Hi, self.Not) -} \ No newline at end of file diff --git a/match/btree.go b/match/btree.go new file mode 100644 index 0000000..b3b7e7e --- /dev/null +++ b/match/btree.go @@ -0,0 +1,68 @@ +package match + +import ( + "fmt" +) + +type BTree struct { + Value Primitive + Left, Right Matcher +} + +func (self BTree) Kind() Kind { + return KindBTree +} + +func (self BTree) Match(s string) bool { + runes := []rune(s) + inputLen := len(runes) + + for offset := 0; offset < inputLen; { + index, min, max := self.Value.Index(string(runes[offset:])) + + if index == -1 { + return false + } + + for length := min; length <= max; length++ { + var left, right bool + + l := string(runes[:offset+index]) + if self.Left != nil { + left = self.Left.Match(l) + } else { + left = l == "" + } + + if !left { + break + } + + var r string + // if there is no string for the right branch + if inputLen <= offset+index+length { + r = "" + } else { + r = string(runes[offset+index+length:]) + } + + if self.Right != nil { + right = self.Right.Match(r) + } else { + right = r == "" + } + + if left && right { + return true + } + } + + offset += index + 1 + } + + return false +} + +func (self BTree) String() string { + return fmt.Sprintf("[btree:%s<-%s->%s]", self.Left, self.Value, self.Right) +} diff --git a/match/btree_test.go b/match/btree_test.go new file mode 100644 index 0000000..459d907 --- /dev/null +++ b/match/btree_test.go @@ -0,0 +1,46 @@ +package match + +import ( + "testing" +) + +func TestBTree(t *testing.T) { + for id, test := range []struct { + tree BTree + str string + exp bool + }{ + { + BTree{Value: Raw{"abc"}, Left: Super{}, Right: Super{}}, + "abc", + true, + }, + { + BTree{Value: Raw{"a"}, Left: Single{}, Right: Single{}}, + "aaa", + true, + }, + { + BTree{Value: Raw{"b"}, Left: Single{}}, + "bbb", + false, + }, + { + BTree{ + Left: BTree{ + Left: Super{}, + Value: Single{}, + }, + Value: Raw{"c"}, + }, + "abc", + true, + }, + } { + act := test.tree.Match(test.str) + if act != test.exp { + t.Errorf("#%d match %q error: act: %t; exp: %t", id, test.str, act, test.exp) + continue + } + } +} diff --git a/match/composite.go b/match/composite.go deleted file mode 100644 index 9072ee1..0000000 --- a/match/composite.go +++ /dev/null @@ -1,70 +0,0 @@ -package match - -import ( - "strings" - "fmt" -) - - - -// composite -type Composite struct { - Chunks []Matcher -} - - -func (self Composite) Kind() Kind { - return KindComposite -} - -func (self Composite) Search(s string) (i int, l int, ok bool) { - if self.Match(s) { - return 0, len(s), true - } - - return -} - -func m(chunks []Matcher, s string) bool { - var prev Matcher - for _, c := range chunks { - if c.Kind() == KindRaw { - i, l, ok := c.Search(s) - if !ok { - return false - } - - if prev != nil { - if !prev.Match(s[:i]) { - return false - } - - prev = nil - } - - s = s[i+l:] - continue - } - - prev = c - } - - if prev != nil { - return prev.Match(s) - } - - return len(s) == 0 -} - -func (self Composite) Match(s string) bool { - return m(self.Chunks, s) -} - -func (self Composite) String() string { - var l []string - for _, c := range self.Chunks { - l = append(l, fmt.Sprint(c)) - } - - return fmt.Sprintf("[composite:%s]", strings.Join(l, ",")) -} diff --git a/match/contains.go b/match/contains.go new file mode 100644 index 0000000..f956e38 --- /dev/null +++ b/match/contains.go @@ -0,0 +1,23 @@ +package match + +import ( + "fmt" + "strings" +) + +type Contains struct { + Needle string + Not bool +} + +func (self Contains) Match(s string) bool { + return strings.Contains(s, self.Needle) != self.Not +} + +func (self Contains) Kind() Kind { + return KindContains +} + +func (self Contains) String() string { + return fmt.Sprintf("[contains:needle=%s not=%t]", self.Needle, self.Not) +} diff --git a/match/every_of.go b/match/every_of.go new file mode 100644 index 0000000..f1d5bf4 --- /dev/null +++ b/match/every_of.go @@ -0,0 +1,31 @@ +package match + +import ( + "fmt" +) + +type Every struct { + Matchers Matchers +} + +func (self *Every) Add(m Matcher) { + self.Matchers = append(self.Matchers, m) +} + +func (self Every) Match(s string) bool { + for _, m := range self.Matchers { + if !m.Match(s) { + return false + } + } + + return true +} + +func (self Every) Kind() Kind { + return KindEveryOf +} + +func (self Every) String() string { + return fmt.Sprintf("[every_of:%s]", self.Matchers) +} diff --git a/match/list.go b/match/list.go index 229405f..d883887 100644 --- a/match/list.go +++ b/match/list.go @@ -1,41 +1,39 @@ package match - import ( - "strings" "fmt" + "strings" ) - -type RangeList struct { +type List struct { List string Not bool } -func (self RangeList) Kind() Kind { - return KindRangeList +func (self List) Kind() Kind { + return KindList } -func (self RangeList) Search(s string) (i int, l int, ok bool) { - if self.Match(s) { - return 0, len(s), true - } - - return -} - -func (self RangeList) Match(s string) bool { - r := []rune(s) - - if (len(r) != 1) { +func (self List) Match(s string) bool { + if len([]rune(s)) != 1 { return false } - inList := strings.IndexRune(self.List, r[0]) >= 0 + inList := strings.Index(self.List, s) != -1 return inList == !self.Not } -func (self RangeList) String() string { - return fmt.Sprintf("[range_list:%s]", self.List) +func (self List) Index(s string) (index, min, max int) { + for i, r := range []rune(s) { + if self.Not == (strings.IndexRune(self.List, r) == -1) { + return i, 1, 1 + } + } + + return -1, 0, 0 +} + +func (self List) String() string { + return fmt.Sprintf("[list:list=%s not=%t]", self.List, self.Not) } diff --git a/match/match.go b/match/match.go index e519b44..54ffee5 100644 --- a/match/match.go +++ b/match/match.go @@ -1,22 +1,47 @@ package match +import ( + "fmt" + "strings" +) + type Kind int -const( + +// todo use String for Kind, and self.Kind() in every matcher.String() +const ( KindRaw Kind = iota - KindMultipleSeparated - KindMultipleSuper + KindEveryOf + KindAnyOf + KindAny + KindSuper KindSingle - KindComposite + KindComposition KindPrefix KindSuffix KindPrefixSuffix - KindRangeBetween - KindRangeList + KindRange + KindList + KindMin + KindMax + KindBTree + KindContains ) - type Matcher interface { Match(string) bool - Search(string) (int, int, bool) - Kind() Kind -} \ No newline at end of file +} + +type Primitive interface { + Index(string) (int, int, int) +} + +type Matchers []Matcher + +func (m Matchers) String() string { + var s []string + for _, matcher := range m { + s = append(s, fmt.Sprint(matcher)) + } + + return fmt.Sprintf("matchers[%s]", strings.Join(s, ",")) +} diff --git a/match/max.go b/match/max.go new file mode 100644 index 0000000..aeccece --- /dev/null +++ b/match/max.go @@ -0,0 +1,23 @@ +package match + +import "fmt" + +type Max struct { + Limit int +} + +func (self Max) Match(s string) bool { + return len([]rune(s)) <= self.Limit +} + +func (self Max) Search(s string) (int, int, bool) { + return 0, 0, false +} + +func (self Max) Kind() Kind { + return KindMax +} + +func (self Max) String() string { + return fmt.Sprintf("[max:%d]", self.Limit) +} diff --git a/match/min.go b/match/min.go new file mode 100644 index 0000000..5216a6d --- /dev/null +++ b/match/min.go @@ -0,0 +1,23 @@ +package match + +import "fmt" + +type Min struct { + Limit int +} + +func (self Min) Match(s string) bool { + return len([]rune(s)) >= self.Limit +} + +func (self Min) Search(s string) (int, int, bool) { + return 0, 0, false +} + +func (self Min) Kind() Kind { + return KindMin +} + +func (self Min) String() string { + return fmt.Sprintf("[min:%d]", self.Limit) +} diff --git a/match/multiple.go b/match/multiple.go deleted file mode 100644 index 42e84f4..0000000 --- a/match/multiple.go +++ /dev/null @@ -1,34 +0,0 @@ -package match - -import ( - "fmt" - "strings" -) - -type Any struct { - Separators string -} - -func (self Any) Match(s string) bool { - return strings.IndexAny(s, self.Separators) == -1 -} - -func (self Any) Search(s string) (i, l int, ok bool) { - if self.Match(s) { - return 0, len(s), true - } - - return -} - -func (self Any) Kind() Kind { - if self.Separators == "" { - return KindMultipleSuper - } else { - return KindMultipleSeparated - } -} - -func (self Any) String() string { - return fmt.Sprintf("[multiple:%s]", self.Separators) -} diff --git a/match/prefix_suffix.go b/match/prefix_suffix.go index 326c68a..870c955 100644 --- a/match/prefix_suffix.go +++ b/match/prefix_suffix.go @@ -1,21 +1,19 @@ package match import ( - "strings" "fmt" + "strings" ) - - type PrefixSuffix struct { Prefix, Suffix string } -func (self PrefixSuffix) kind() Kind { +func (self PrefixSuffix) Kind() Kind { return KindPrefixSuffix } -func (self PrefixSuffix) search(s string) (i int, l int, ok bool) { +func (self PrefixSuffix) Search(s string) (i int, l int, ok bool) { if self.Match(s) { return 0, len(s), true } @@ -27,7 +25,6 @@ func (self PrefixSuffix) Match(s string) bool { return strings.HasPrefix(s, self.Prefix) && strings.HasSuffix(s, self.Suffix) } - func (self PrefixSuffix) String() string { return fmt.Sprintf("[prefix_suffix:%s-%s]", self.Prefix, self.Suffix) -} \ No newline at end of file +} diff --git a/match/range.go b/match/range.go new file mode 100644 index 0000000..9ae0775 --- /dev/null +++ b/match/range.go @@ -0,0 +1,40 @@ +package match + +import ( + "fmt" +) + +type Range struct { + Lo, Hi rune + Not bool +} + +func (self Range) Kind() Kind { + return KindRange +} + +func (self Range) Match(s string) bool { + r := []rune(s) + + if len(r) != 1 { + return false + } + + inRange := r[0] >= self.Lo && r[0] <= self.Hi + + return inRange == !self.Not +} + +func (self Range) Index(s string) (index, min, max int) { + for i, r := range []rune(s) { + if self.Not != (r >= self.Lo && r <= self.Hi) { + return i, 1, 1 + } + } + + return -1, 0, 0 +} + +func (self Range) String() string { + return fmt.Sprintf("[range_between:%s-%s(%t)]", self.Lo, self.Hi, self.Not) +} diff --git a/match/raw.go b/match/raw.go index ccec392..ca717f1 100644 --- a/match/raw.go +++ b/match/raw.go @@ -1,8 +1,8 @@ package match import ( - "strings" "fmt" + "strings" ) // raw represents raw string to match @@ -18,15 +18,14 @@ func (self Raw) Kind() Kind { return KindRaw } -func (self Raw) Search(s string) (i int, l int, ok bool) { - index := strings.Index(s, self.Str) +func (self Raw) Index(s string) (index, min, max int) { + index = strings.Index(s, self.Str) if index == -1 { return } - i = index - l = len(self.Str) - ok = true + min = len(self.Str) + max = min return } diff --git a/match/single.go b/match/single.go index d9744c8..d28718b 100644 --- a/match/single.go +++ b/match/single.go @@ -1,11 +1,10 @@ package match import ( - "strings" "fmt" + "strings" ) - // single represents ? type Single struct { Separators string @@ -15,19 +14,20 @@ func (self Single) Match(s string) bool { return len([]rune(s)) == 1 && strings.IndexAny(s, self.Separators) == -1 } -func (self Single) Search(s string) (i int, l int, ok bool) { - if self.Match(s) { - return 0, len(s), true +func (self Single) Index(s string) (index, min, max int) { + for i, c := range []rune(s) { + if strings.IndexRune(self.Separators, c) == -1 { + return i, 1, 1 + } } - return + return -1, 0, 0 } func (self Single) Kind() Kind { return KindSingle } - func (self Single) String() string { return fmt.Sprintf("[single:%s]", self.Separators) } diff --git a/match/super.go b/match/super.go new file mode 100644 index 0000000..6c3169c --- /dev/null +++ b/match/super.go @@ -0,0 +1,23 @@ +package match + +import ( + "fmt" +) + +type Super struct{} + +func (self Super) Match(s string) bool { + return true +} + +func (self Super) Index(s string) (index, min, max int) { + return 0, 0, len([]rune(s)) +} + +func (self Super) Kind() Kind { + return KindSuper +} + +func (self Super) String() string { + return fmt.Sprintf("[super]") +} diff --git a/parser.go b/parser.go index d5614f7..4509b3d 100644 --- a/parser.go +++ b/parser.go @@ -3,72 +3,154 @@ package glob import ( "errors" "fmt" - "github.com/gobwas/glob/match" ) -func parseAll(source, separators string) ([]token, error) { - lexer := newLexer(source) +type node interface { + children() []node + append(node) +} - var tokens []token - for parser := parserMain; parser != nil; { - var err error - tokens, parser, err = parser(lexer, separators) +type nodeImpl struct { + desc []node +} + +func (n *nodeImpl) append(c node) { + n.desc = append(n.desc, c) +} +func (n *nodeImpl) children() []node { + return n.desc +} + +type nodeList struct { + nodeImpl + not bool + chars string +} +type nodeRange struct { + nodeImpl + not bool + lo, hi rune +} +type nodeText struct { + nodeImpl + text string +} + +type nodePattern struct{ nodeImpl } +type nodeAny struct{ nodeImpl } +type nodeSuper struct{ nodeImpl } +type nodeSingle struct{ nodeImpl } +type nodeAnyOf struct{ nodeImpl } + +type tree struct { + root node + current node + path []node +} + +func (t *tree) enter(c node) { + if t.root == nil { + t.root = c + t.current = c + return + } + + t.current.append(c) + t.path = append(t.path, c) + t.current = c +} + +func (t *tree) leave() { + if len(t.path)-1 <= 0 { + t.current = t.root + t.path = nil + return + } + + t.path = t.path[:len(t.path)-1] + t.current = t.path[len(t.path)-1] +} + +type parseFn func(*tree, *lexer) (parseFn, error) + +func parse(lexer *lexer) (*nodePattern, error) { + var parser parseFn + + root := &nodePattern{} + tree := &tree{} + tree.enter(root) + + for parser = parserMain; ; { + next, err := parser(tree, lexer) if err != nil { return nil, err } - } - return tokens, nil -} - -type parseFn func(*lexer, string) ([]token, parseFn, error) - -func parserMain(lexer *lexer, separators string) ([]token, parseFn, error) { - var ( - prev *token - tokens []token - ) - - for item := lexer.nextItem(); ; { - var t token - - if item.t == item_eof { + if next == nil { break } - switch item.t { - case item_eof: - return tokens, nil, nil - - case item_error: - return nil, nil, errors.New(item.s) - - case item_text: - t = token{match.Raw{item.s}, item.s} - - case item_any: - if prev != nil && prev.matcher.Kind() == match.KindMultipleSeparated { - // remove simple any and replace it with super_any - tokens = tokens[:len(tokens)-1] - t = token{match.Any{""}, item.s} - } else { - t = token{match.Any{separators}, item.s} - } - - case item_single: - t = token{match.Single{separators}, item.s} - - case item_range_open: - return tokens, parserRange, nil - } - - prev = &t + parser = next } - return tokens, nil, nil + return root, nil } -func parserRange(lexer *lexer, separators string) ([]token, parseFn, error) { +func parserMain(tree *tree, lexer *lexer) (parseFn, error) { + for stop := false; !stop; { + item := lexer.nextItem() + + switch item.t { + case item_eof: + stop = true + continue + + case item_error: + return nil, errors.New(item.s) + + case item_text: + tree.current.append(&nodeText{text: item.s}) + return parserMain, nil + + case item_any: + tree.current.append(&nodeAny{}) + return parserMain, nil + + case item_super: + tree.current.append(&nodeSuper{}) + return parserMain, nil + + case item_single: + tree.current.append(&nodeSingle{}) + return parserMain, nil + + case item_range_open: + return parserRange, nil + + case item_terms_open: + tree.enter(&nodeAnyOf{}) + tree.enter(&nodePattern{}) + return parserMain, nil + + case item_separator: + tree.leave() + tree.enter(&nodePattern{}) + return parserMain, nil + + case item_terms_close: + tree.leave() + tree.leave() + return parserMain, nil + + default: + return nil, fmt.Errorf("unexpected token: %s", item) + } + } + + return nil, nil +} + +func parserRange(tree *tree, lexer *lexer) (parseFn, error) { var ( not bool lo rune @@ -76,60 +158,67 @@ func parserRange(lexer *lexer, separators string) ([]token, parseFn, error) { chars string ) - for item := lexer.nextItem(); ; { + for { + item := lexer.nextItem() + switch item.t { case item_eof: - return nil, nil, errors.New("unexpected end") + return nil, errors.New("unexpected end") case item_error: - return nil, nil, errors.New(item.s) + return nil, errors.New(item.s) - case item_range_not: + case item_not: not = true case item_range_lo: r := []rune(item.s) if len(r) != 1 { - return nil, nil, fmt.Errorf("unexpected length of lo character") + return nil, fmt.Errorf("unexpected length of lo character") } lo = r[0] - case item_range_minus: + case item_range_between: // case item_range_hi: r := []rune(item.s) if len(r) != 1 { - return nil, nil, fmt.Errorf("unexpected length of hi character") - } - - if hi < lo { - return nil, nil, fmt.Errorf("hi character should be greater than lo") + return nil, fmt.Errorf("unexpected length of hi character") } hi = r[0] - case item_range_chars: + if hi < lo { + return nil, fmt.Errorf("hi character '%s' should be greater than lo '%s'", string(hi), string(lo)) + } + + case item_text: chars = item.s case item_range_close: isRange := lo != 0 && hi != 0 - isChars := chars == "" + isChars := chars != "" - if !(isChars != isRange) { - return nil, nil, fmt.Errorf("parse error: unexpected lo, hi, chars in range") + if isChars == isRange { + return nil, fmt.Errorf("could not parse range") } if isRange { - return []token{token{match.Between{lo, hi, not}, ""}}, parserMain, nil + tree.current.append(&nodeRange{ + lo: lo, + hi: hi, + not: not, + }) } else { - if len(chars) == 0 { - return nil, nil, fmt.Errorf("chars range should not be empty") - } - - return []token{token{match.RangeList{chars, not}, ""}}, parserMain, nil + tree.current.append(&nodeList{ + chars: chars, + not: not, + }) } + + return parserMain, nil } } } diff --git a/parser_test.go b/parser_test.go index 685d003..159c1ff 100644 --- a/parser_test.go +++ b/parser_test.go @@ -1,12 +1,219 @@ package glob import ( + "fmt" + "reflect" "testing" ) func TestParseString(t *testing.T) { - // lexer := newLexer("hello") - // fmt.Println(lexer.nextItem()) - // fmt.Println(lexer.nextItem()) - // fmt.Println(lexer.nextItem()) + for id, test := range []struct { + pattern string + tree node + }{ + { + pattern: "abc", + tree: &nodePattern{ + nodeImpl: nodeImpl{ + desc: []node{ + &nodeText{text: "abc"}, + }, + }, + }, + }, + { + pattern: "a*c", + tree: &nodePattern{ + nodeImpl: nodeImpl{ + desc: []node{ + &nodeText{text: "a"}, + &nodeAny{}, + &nodeText{text: "c"}, + }, + }, + }, + }, + { + pattern: "a**c", + tree: &nodePattern{ + nodeImpl: nodeImpl{ + desc: []node{ + &nodeText{text: "a"}, + &nodeSuper{}, + &nodeText{text: "c"}, + }, + }, + }, + }, + { + pattern: "a?c", + tree: &nodePattern{ + nodeImpl: nodeImpl{ + desc: []node{ + &nodeText{text: "a"}, + &nodeSingle{}, + &nodeText{text: "c"}, + }, + }, + }, + }, + { + pattern: "[!a-z]", + tree: &nodePattern{ + nodeImpl: nodeImpl{ + desc: []node{ + &nodeRange{lo: 'a', hi: 'z', not: true}, + }, + }, + }, + }, + { + pattern: "[az]", + tree: &nodePattern{ + nodeImpl: nodeImpl{ + desc: []node{ + &nodeList{chars: "az"}, + }, + }, + }, + }, + { + pattern: "{a,z}", + tree: &nodePattern{ + nodeImpl: nodeImpl{ + desc: []node{ + &nodeAnyOf{nodeImpl: nodeImpl{desc: []node{ + &nodePattern{ + nodeImpl: nodeImpl{desc: []node{ + &nodeText{text: "a"}, + }}, + }, + &nodePattern{ + nodeImpl: nodeImpl{desc: []node{ + &nodeText{text: "z"}, + }}, + }, + }}}, + }, + }, + }, + }, + { + pattern: "{a,{x,y},?,[a-z],[!qwe]}", + tree: &nodePattern{ + nodeImpl: nodeImpl{ + desc: []node{ + &nodeAnyOf{nodeImpl: nodeImpl{desc: []node{ + &nodePattern{ + nodeImpl: nodeImpl{desc: []node{ + &nodeText{text: "a"}, + }}, + }, + &nodePattern{ + nodeImpl: nodeImpl{desc: []node{ + &nodeAnyOf{nodeImpl: nodeImpl{desc: []node{ + &nodePattern{ + nodeImpl: nodeImpl{desc: []node{ + &nodeText{text: "x"}, + }}, + }, + &nodePattern{ + nodeImpl: nodeImpl{desc: []node{ + &nodeText{text: "y"}, + }}, + }, + }}}, + }}, + }, + &nodePattern{ + nodeImpl: nodeImpl{desc: []node{ + &nodeSingle{}, + }}, + }, + &nodePattern{ + nodeImpl: nodeImpl{ + desc: []node{ + &nodeRange{lo: 'a', hi: 'z', not: false}, + }, + }, + }, + &nodePattern{ + nodeImpl: nodeImpl{ + desc: []node{ + &nodeList{chars: "qwe", not: true}, + }, + }, + }, + }}}, + }, + }, + }, + }, + } { + pattern, err := parse(newLexer(test.pattern)) + if err != nil { + t.Errorf("#%d %s", id, err) + continue + } + + if !reflect.DeepEqual(test.tree, pattern) { + t.Errorf("#%d tries are not equal", id) + if err = nodeEqual(test.tree, pattern); err != nil { + t.Errorf("#%d %s", id, err) + continue + } + } + } +} + +const abstractNodeImpl = "nodeImpl" + +func nodeEqual(a, b node) error { + if (a == nil || b == nil) && a != b { + return fmt.Errorf("nodes are not equal: exp %s, act %s", a, b) + } + + aValue, bValue := reflect.Indirect(reflect.ValueOf(a)), reflect.Indirect(reflect.ValueOf(b)) + aType, bType := aValue.Type(), bValue.Type() + if aType != bType { + return fmt.Errorf("nodes are not equal: exp %s, act %s", aValue.Type(), bValue.Type()) + } + + for i := 0; i < aType.NumField(); i++ { + var eq bool + + f := aType.Field(i).Name + if f == abstractNodeImpl { + continue + } + + af, bf := aValue.FieldByName(f), bValue.FieldByName(f) + + switch af.Kind() { + case reflect.String: + eq = af.String() == bf.String() + case reflect.Bool: + eq = af.Bool() == bf.Bool() + default: + eq = fmt.Sprint(af) == fmt.Sprint(bf) + } + + if !eq { + return fmt.Errorf("nodes<%s> %q fields are not equal: exp %q, act %q", aType, f, af, bf) + } + } + + for i, aDesc := range a.children() { + if len(b.children())-1 < i { + return fmt.Errorf("node does not have enough children (got %d children, wanted %d-th token)", len(b.children()), i) + } + + bDesc := b.children()[i] + + if err := nodeEqual(aDesc, bDesc); err != nil { + return err + } + } + + return nil }