From 2d733288bc78e37c7ccb6ce1e588debb1bdf3912 Mon Sep 17 00:00:00 2001 From: gobwas Date: Fri, 27 May 2016 20:47:19 +0300 Subject: [PATCH] refactoring --- lexer.go => lexer/lexer.go | 155 +++---------- lexer/lexer_test.go | 192 ++++++++++++++++ lexer/token.go | 88 +++++++ lexer_test.go | 192 ---------------- parser.go | 230 ------------------- parser/ast.go | 48 ++++ parser/parser.go | 144 ++++++++++++ parser/parser_test.go | 332 +++++++++++++++++++++++++++ parser_test.go | 324 -------------------------- {runes => util/runes}/runes.go | 0 {runes => util/runes}/runes_test.go | 0 {strings => util/strings}/strings.go | 0 12 files changed, 832 insertions(+), 873 deletions(-) rename lexer.go => lexer/lexer.go (59%) create mode 100644 lexer/lexer_test.go create mode 100644 lexer/token.go delete mode 100644 lexer_test.go delete mode 100644 parser.go create mode 100644 parser/ast.go create mode 100644 parser/parser.go create mode 100644 parser/parser_test.go delete mode 100644 parser_test.go rename {runes => util/runes}/runes.go (100%) rename {runes => util/runes}/runes_test.go (100%) rename {strings => util/strings}/strings.go (100%) diff --git a/lexer.go b/lexer/lexer.go similarity index 59% rename from lexer.go rename to lexer/lexer.go index 9b756dc..e074b09 100644 --- a/lexer.go +++ b/lexer/lexer.go @@ -1,9 +1,9 @@ -package glob +package lexer import ( "bytes" "fmt" - "github.com/gobwas/glob/runes" + "github.com/gobwas/glob/util/runes" "unicode/utf8" ) @@ -30,123 +30,24 @@ var specials = []byte{ char_terms_close, } -func special(c byte) bool { +func Special(c byte) bool { return bytes.IndexByte(specials, c) != -1 } -type itemType int +type tokens []Token -const ( - item_eof itemType = iota - item_error - item_text - item_char - item_any - item_super - item_single - item_not - item_separator - item_range_open - item_range_close - item_range_lo - item_range_hi - item_range_between - item_terms_open - item_terms_close -) - -func (i itemType) String() string { - switch i { - case item_eof: - return "eof" - - case item_error: - return "error" - - case item_text: - return "text" - - case item_char: - return "char" - - case item_any: - return "any" - - case item_super: - return "super" - - case item_single: - return "single" - - case item_not: - return "not" - - case item_separator: - return "separator" - - case item_range_open: - return "range_open" - - case item_range_close: - return "range_close" - - case item_range_lo: - return "range_lo" - - case item_range_hi: - return "range_hi" - - case item_range_between: - return "range_between" - - case item_terms_open: - return "terms_open" - - case item_terms_close: - return "terms_close" - - default: - return "undef" - } -} - -type item struct { - t itemType - s string -} - -func (i item) String() string { - return fmt.Sprintf("%v<%q>", i.t, i.s) -} - -type stubLexer struct { - Items []item - pos int -} - -func (s *stubLexer) nextItem() (ret item) { - if s.pos == len(s.Items) { - return item{item_eof, ""} - } - ret = s.Items[s.pos] - s.pos++ - return -} - -type items []item - -func (i *items) shift() (ret item) { +func (i *tokens) shift() (ret Token) { ret = (*i)[0] copy(*i, (*i)[1:]) *i = (*i)[:len(*i)-1] return } -func (i *items) push(v item) { +func (i *tokens) push(v Token) { *i = append(*i, v) } -func (i *items) empty() bool { +func (i *tokens) empty() bool { return len(*i) == 0 } @@ -157,7 +58,7 @@ type lexer struct { pos int err error - items items + tokens tokens termsLevel int lastRune rune @@ -167,8 +68,8 @@ type lexer struct { func newLexer(source string) *lexer { l := &lexer{ - data: source, - items: items(make([]item, 0, 4)), + data: source, + tokens: tokens(make([]Token, 0, 4)), } return l } @@ -233,12 +134,12 @@ func (l *lexer) termsLeave() { l.termsLevel-- } -func (l *lexer) nextItem() item { +func (l *lexer) nextItem() Token { if l.err != nil { - return item{item_error, l.err.Error()} + return Token{Error, l.err.Error()} } - if !l.items.empty() { - return l.items.shift() + if !l.tokens.empty() { + return l.tokens.shift() } l.fetchItem() @@ -252,32 +153,32 @@ func (l *lexer) fetchItem() { r := l.read() switch { case r == eof: - l.items.push(item{item_eof, ""}) + l.tokens.push(Token{EOF, ""}) case r == char_terms_open: l.termsEnter() - l.items.push(item{item_terms_open, string(r)}) + l.tokens.push(Token{TermsOpen, string(r)}) case r == char_comma && l.inTerms(): - l.items.push(item{item_separator, string(r)}) + l.tokens.push(Token{Separator, string(r)}) case r == char_terms_close && l.inTerms(): - l.items.push(item{item_terms_close, string(r)}) + l.tokens.push(Token{TermsClose, string(r)}) l.termsLeave() case r == char_range_open: - l.items.push(item{item_range_open, string(r)}) + l.tokens.push(Token{RangeOpen, string(r)}) l.fetchRange() case r == char_single: - l.items.push(item{item_single, string(r)}) + l.tokens.push(Token{Single, string(r)}) case r == char_any: if l.read() == char_any { - l.items.push(item{item_super, string(r) + string(r)}) + l.tokens.push(Token{Super, string(r) + string(r)}) } else { l.unread() - l.items.push(item{item_any, string(r)}) + l.tokens.push(Token{Any, string(r)}) } default: @@ -308,27 +209,27 @@ func (l *lexer) fetchRange() { if r != char_range_close { l.errorf("expected close range character") } else { - l.items.push(item{item_range_close, string(r)}) + l.tokens.push(Token{RangeClose, string(r)}) } return } if wantHi { - l.items.push(item{item_range_hi, string(r)}) + l.tokens.push(Token{RangeHi, string(r)}) wantClose = true continue } if !seenNot && r == char_range_not { - l.items.push(item{item_not, string(r)}) + l.tokens.push(Token{Not, string(r)}) seenNot = true continue } if n, w := l.peek(); n == char_range_between { l.seek(w) - l.items.push(item{item_range_lo, string(r)}) - l.items.push(item{item_range_between, string(n)}) + l.tokens.push(Token{RangeLo, string(r)}) + l.tokens.push(Token{RangeBetween, string(n)}) wantHi = true continue } @@ -367,6 +268,6 @@ reading: } if len(data) > 0 { - l.items.push(item{item_text, string(data)}) + l.tokens.push(Token{Text, string(data)}) } } diff --git a/lexer/lexer_test.go b/lexer/lexer_test.go new file mode 100644 index 0000000..8af3aa6 --- /dev/null +++ b/lexer/lexer_test.go @@ -0,0 +1,192 @@ +package lexer + +import ( + "testing" +) + +func TestLexGood(t *testing.T) { + for id, test := range []struct { + pattern string + items []Token + }{ + { + pattern: "", + items: []Token{ + Token{EOF, ""}, + }, + }, + { + pattern: "hello", + items: []Token{ + Token{Text, "hello"}, + Token{EOF, ""}, + }, + }, + { + pattern: "/{rate,[0-9]]}*", + items: []Token{ + Token{Text, "/"}, + Token{TermsOpen, "{"}, + Token{Text, "rate"}, + Token{Separator, ","}, + Token{RangeOpen, "["}, + Token{RangeLo, "0"}, + Token{RangeBetween, "-"}, + Token{RangeHi, "9"}, + Token{RangeClose, "]"}, + Token{Text, "]"}, + Token{TermsClose, "}"}, + Token{Any, "*"}, + Token{EOF, ""}, + }, + }, + { + pattern: "hello,world", + items: []Token{ + Token{Text, "hello,world"}, + Token{EOF, ""}, + }, + }, + { + pattern: "hello\\,world", + items: []Token{ + Token{Text, "hello,world"}, + Token{EOF, ""}, + }, + }, + { + pattern: "hello\\{world", + items: []Token{ + Token{Text, "hello{world"}, + Token{EOF, ""}, + }, + }, + { + pattern: "hello?", + items: []Token{ + Token{Text, "hello"}, + Token{Single, "?"}, + Token{EOF, ""}, + }, + }, + { + pattern: "hellof*", + items: []Token{ + Token{Text, "hellof"}, + Token{Any, "*"}, + Token{EOF, ""}, + }, + }, + { + pattern: "hello**", + items: []Token{ + Token{Text, "hello"}, + Token{Super, "**"}, + Token{EOF, ""}, + }, + }, + { + pattern: "[日-語]", + items: []Token{ + Token{RangeOpen, "["}, + Token{RangeLo, "日"}, + Token{RangeBetween, "-"}, + Token{RangeHi, "語"}, + Token{RangeClose, "]"}, + Token{EOF, ""}, + }, + }, + { + pattern: "[!日-語]", + items: []Token{ + Token{RangeOpen, "["}, + Token{Not, "!"}, + Token{RangeLo, "日"}, + Token{RangeBetween, "-"}, + Token{RangeHi, "語"}, + Token{RangeClose, "]"}, + Token{EOF, ""}, + }, + }, + { + pattern: "[日本語]", + items: []Token{ + Token{RangeOpen, "["}, + Token{Text, "日本語"}, + Token{RangeClose, "]"}, + Token{EOF, ""}, + }, + }, + { + pattern: "[!日本語]", + items: []Token{ + Token{RangeOpen, "["}, + Token{Not, "!"}, + Token{Text, "日本語"}, + Token{RangeClose, "]"}, + Token{EOF, ""}, + }, + }, + { + pattern: "{a,b}", + items: []Token{ + Token{TermsOpen, "{"}, + Token{Text, "a"}, + Token{Separator, ","}, + Token{Text, "b"}, + Token{TermsClose, "}"}, + Token{EOF, ""}, + }, + }, + { + pattern: "/{z,ab}*", + items: []Token{ + Token{Text, "/"}, + Token{TermsOpen, "{"}, + Token{Text, "z"}, + Token{Separator, ","}, + Token{Text, "ab"}, + Token{TermsClose, "}"}, + Token{Any, "*"}, + Token{EOF, ""}, + }, + }, + { + pattern: "{[!日-語],*,?,{a,b,\\c}}", + items: []Token{ + Token{TermsOpen, "{"}, + Token{RangeOpen, "["}, + Token{Not, "!"}, + Token{RangeLo, "日"}, + Token{RangeBetween, "-"}, + Token{RangeHi, "語"}, + Token{RangeClose, "]"}, + Token{Separator, ","}, + Token{Any, "*"}, + Token{Separator, ","}, + Token{Single, "?"}, + Token{Separator, ","}, + Token{TermsOpen, "{"}, + Token{Text, "a"}, + Token{Separator, ","}, + Token{Text, "b"}, + Token{Separator, ","}, + Token{Text, "c"}, + Token{TermsClose, "}"}, + Token{TermsClose, "}"}, + Token{EOF, ""}, + }, + }, + } { + lexer := newLexer(test.pattern) + for i, exp := range test.items { + act := lexer.nextItem() + if act.Type != exp.Type { + t.Errorf("#%d %q: wrong %d-th item type: exp: %q; act: %q\n\t(%s vs %s)", id, test.pattern, i, exp.Type, act.Type, exp, act) + } + if act.Raw != exp.Raw { + t.Errorf("#%d %q: wrong %d-th item contents: exp: %q; act: %q\n\t(%s vs %s)", id, test.pattern, i, exp.Raw, act.Raw, exp, act) + } + } + } +} diff --git a/lexer/token.go b/lexer/token.go new file mode 100644 index 0000000..2797c4e --- /dev/null +++ b/lexer/token.go @@ -0,0 +1,88 @@ +package lexer + +import "fmt" + +type TokenType int + +const ( + EOF TokenType = iota + Error + Text + Char + Any + Super + Single + Not + Separator + RangeOpen + RangeClose + RangeLo + RangeHi + RangeBetween + TermsOpen + TermsClose +) + +func (tt TokenType) String() string { + switch tt { + case EOF: + return "eof" + + case Error: + return "error" + + case Text: + return "text" + + case Char: + return "char" + + case Any: + return "any" + + case Super: + return "super" + + case Single: + return "single" + + case Not: + return "not" + + case Separator: + return "separator" + + case RangeOpen: + return "range_open" + + case RangeClose: + return "range_close" + + case RangeLo: + return "range_lo" + + case RangeHi: + return "range_hi" + + case RangeBetween: + return "range_between" + + case TermsOpen: + return "terms_open" + + case TermsClose: + return "terms_close" + + default: + return "undef" + } +} + +type Token struct { + Type TokenType + Raw string +} + +func (t Token) String() string { + return fmt.Sprintf("%v<%q>", t.Type, t.Raw) +} diff --git a/lexer_test.go b/lexer_test.go deleted file mode 100644 index 8ede767..0000000 --- a/lexer_test.go +++ /dev/null @@ -1,192 +0,0 @@ -package glob - -import ( - "testing" -) - -func TestLexGood(t *testing.T) { - for id, test := range []struct { - pattern string - items []item - }{ - { - pattern: "", - items: []item{ - item{item_eof, ""}, - }, - }, - { - pattern: "hello", - items: []item{ - item{item_text, "hello"}, - item{item_eof, ""}, - }, - }, - { - pattern: "/{rate,[0-9]]}*", - items: []item{ - item{item_text, "/"}, - item{item_terms_open, "{"}, - item{item_text, "rate"}, - item{item_separator, ","}, - item{item_range_open, "["}, - item{item_range_lo, "0"}, - item{item_range_between, "-"}, - item{item_range_hi, "9"}, - item{item_range_close, "]"}, - item{item_text, "]"}, - item{item_terms_close, "}"}, - item{item_any, "*"}, - item{item_eof, ""}, - }, - }, - { - pattern: "hello,world", - items: []item{ - item{item_text, "hello,world"}, - item{item_eof, ""}, - }, - }, - { - pattern: "hello\\,world", - items: []item{ - item{item_text, "hello,world"}, - item{item_eof, ""}, - }, - }, - { - pattern: "hello\\{world", - items: []item{ - item{item_text, "hello{world"}, - item{item_eof, ""}, - }, - }, - { - pattern: "hello?", - items: []item{ - item{item_text, "hello"}, - item{item_single, "?"}, - item{item_eof, ""}, - }, - }, - { - pattern: "hellof*", - items: []item{ - item{item_text, "hellof"}, - item{item_any, "*"}, - item{item_eof, ""}, - }, - }, - { - pattern: "hello**", - items: []item{ - item{item_text, "hello"}, - item{item_super, "**"}, - item{item_eof, ""}, - }, - }, - { - pattern: "[日-語]", - items: []item{ - item{item_range_open, "["}, - item{item_range_lo, "日"}, - item{item_range_between, "-"}, - item{item_range_hi, "語"}, - item{item_range_close, "]"}, - item{item_eof, ""}, - }, - }, - { - pattern: "[!日-語]", - items: []item{ - item{item_range_open, "["}, - item{item_not, "!"}, - item{item_range_lo, "日"}, - item{item_range_between, "-"}, - item{item_range_hi, "語"}, - item{item_range_close, "]"}, - item{item_eof, ""}, - }, - }, - { - pattern: "[日本語]", - items: []item{ - item{item_range_open, "["}, - item{item_text, "日本語"}, - item{item_range_close, "]"}, - item{item_eof, ""}, - }, - }, - { - pattern: "[!日本語]", - items: []item{ - item{item_range_open, "["}, - item{item_not, "!"}, - item{item_text, "日本語"}, - item{item_range_close, "]"}, - item{item_eof, ""}, - }, - }, - { - pattern: "{a,b}", - items: []item{ - item{item_terms_open, "{"}, - item{item_text, "a"}, - item{item_separator, ","}, - item{item_text, "b"}, - item{item_terms_close, "}"}, - item{item_eof, ""}, - }, - }, - { - pattern: "/{z,ab}*", - items: []item{ - item{item_text, "/"}, - item{item_terms_open, "{"}, - item{item_text, "z"}, - item{item_separator, ","}, - item{item_text, "ab"}, - item{item_terms_close, "}"}, - item{item_any, "*"}, - item{item_eof, ""}, - }, - }, - { - pattern: "{[!日-語],*,?,{a,b,\\c}}", - items: []item{ - item{item_terms_open, "{"}, - item{item_range_open, "["}, - item{item_not, "!"}, - item{item_range_lo, "日"}, - item{item_range_between, "-"}, - item{item_range_hi, "語"}, - item{item_range_close, "]"}, - item{item_separator, ","}, - item{item_any, "*"}, - item{item_separator, ","}, - item{item_single, "?"}, - item{item_separator, ","}, - item{item_terms_open, "{"}, - item{item_text, "a"}, - item{item_separator, ","}, - item{item_text, "b"}, - item{item_separator, ","}, - item{item_text, "c"}, - item{item_terms_close, "}"}, - item{item_terms_close, "}"}, - item{item_eof, ""}, - }, - }, - } { - lexer := newLexer(test.pattern) - for i, exp := range test.items { - act := lexer.nextItem() - if act.t != exp.t { - t.Errorf("#%d %q: wrong %d-th item type: exp: %q; act: %q\n\t(%s vs %s)", id, test.pattern, i, exp.t, act.t, exp, act) - } - if act.s != exp.s { - t.Errorf("#%d %q: wrong %d-th item contents: exp: %q; act: %q\n\t(%s vs %s)", id, test.pattern, i, exp.s, act.s, exp, act) - } - } - } -} diff --git a/parser.go b/parser.go deleted file mode 100644 index 760ec96..0000000 --- a/parser.go +++ /dev/null @@ -1,230 +0,0 @@ -package glob - -import ( - "errors" - "fmt" - "unicode/utf8" -) - -type node interface { - children() []node - append(node) -} - -// todo may be split it into another package -type lexerIface interface { - nextItem() item -} - -type nodeImpl struct { - desc []node -} - -func (n *nodeImpl) append(c node) { - n.desc = append(n.desc, c) -} -func (n *nodeImpl) children() []node { - return n.desc -} - -type nodeList struct { - nodeImpl - not bool - chars string -} -type nodeRange struct { - nodeImpl - not bool - lo, hi rune -} -type nodeText struct { - nodeImpl - text string -} - -type nodePattern struct{ nodeImpl } -type nodeAny struct{ nodeImpl } -type nodeSuper struct{ nodeImpl } -type nodeSingle struct{ nodeImpl } -type nodeAnyOf struct{ nodeImpl } - -type tree struct { - root node - current node - path []node -} - -func (t *tree) enter(c node) { - if t.root == nil { - t.root = c - t.current = c - return - } - - t.current.append(c) - t.path = append(t.path, c) - t.current = c -} - -func (t *tree) leave() { - if len(t.path)-1 <= 0 { - t.current = t.root - t.path = nil - return - } - - t.path = t.path[:len(t.path)-1] - t.current = t.path[len(t.path)-1] -} - -type parseFn func(*tree, lexerIface) (parseFn, error) - -func parse(lexer lexerIface) (*nodePattern, error) { - var parser parseFn - - root := &nodePattern{} - tree := &tree{} - tree.enter(root) - - for parser = parserMain; ; { - next, err := parser(tree, lexer) - if err != nil { - return nil, err - } - - if next == nil { - break - } - - parser = next - } - - return root, nil -} - -func parserMain(tree *tree, lexer lexerIface) (parseFn, error) { - for stop := false; !stop; { - item := lexer.nextItem() - - switch item.t { - case item_eof: - stop = true - continue - - case item_error: - return nil, errors.New(item.s) - - case item_text: - tree.current.append(&nodeText{text: item.s}) - return parserMain, nil - - case item_any: - tree.current.append(&nodeAny{}) - return parserMain, nil - - case item_super: - tree.current.append(&nodeSuper{}) - return parserMain, nil - - case item_single: - tree.current.append(&nodeSingle{}) - return parserMain, nil - - case item_range_open: - return parserRange, nil - - case item_terms_open: - tree.enter(&nodeAnyOf{}) - tree.enter(&nodePattern{}) - return parserMain, nil - - case item_separator: - tree.leave() - tree.enter(&nodePattern{}) - return parserMain, nil - - case item_terms_close: - tree.leave() - tree.leave() - return parserMain, nil - - default: - return nil, fmt.Errorf("unexpected token: %s", item) - } - } - - return nil, nil -} - -func parserRange(tree *tree, lexer lexerIface) (parseFn, error) { - var ( - not bool - lo rune - hi rune - chars string - ) - - for { - item := lexer.nextItem() - - switch item.t { - case item_eof: - return nil, errors.New("unexpected end") - - case item_error: - return nil, errors.New(item.s) - - case item_not: - not = true - - case item_range_lo: - r, w := utf8.DecodeRuneInString(item.s) - if len(item.s) > w { - return nil, fmt.Errorf("unexpected length of lo character") - } - - lo = r - - case item_range_between: - // - - case item_range_hi: - r, w := utf8.DecodeRuneInString(item.s) - if len(item.s) > w { - return nil, fmt.Errorf("unexpected length of lo character") - } - - hi = r - - if hi < lo { - return nil, fmt.Errorf("hi character '%s' should be greater than lo '%s'", string(hi), string(lo)) - } - - case item_text: - chars = item.s - - case item_range_close: - isRange := lo != 0 && hi != 0 - isChars := chars != "" - - if isChars == isRange { - return nil, fmt.Errorf("could not parse range") - } - - if isRange { - tree.current.append(&nodeRange{ - lo: lo, - hi: hi, - not: not, - }) - } else { - tree.current.append(&nodeList{ - chars: chars, - not: not, - }) - } - - return parserMain, nil - } - } -} diff --git a/parser/ast.go b/parser/ast.go new file mode 100644 index 0000000..588a45c --- /dev/null +++ b/parser/ast.go @@ -0,0 +1,48 @@ +package parser + +type Node interface { + Children() []Node + Parent() Node + append(Node) Node +} + +type node struct { + parent Node + children []Node +} + +func (n *node) Children() []Node { + return n.children +} + +func (n *node) Parent() Node { + return n.parent +} + +func (n *node) append(c Node) Node { + n.children = append(n.children, c) + return c +} + +type ListNode struct { + node + Not bool + Chars string +} + +type RangeNode struct { + node + Not bool + Lo, Hi rune +} + +type TextNode struct { + node + Text string +} + +type PatternNode struct{ node } +type AnyNode struct{ node } +type SuperNode struct{ node } +type SingleNode struct{ node } +type AnyOfNode struct{ node } diff --git a/parser/parser.go b/parser/parser.go new file mode 100644 index 0000000..6adea39 --- /dev/null +++ b/parser/parser.go @@ -0,0 +1,144 @@ +package parser + +import ( + "errors" + "fmt" + "github.com/gobwas/glob/lexer" + "unicode/utf8" +) + +type Lexer interface { + Next() lexer.Token +} + +type parseFn func(Node, Lexer) (parseFn, Node, error) + +func Parse(lexer Lexer) (*PatternNode, error) { + var parser parseFn + + root := &PatternNode{} + + var ( + tree Node + err error + ) + for parser, tree = parserMain, root; parser != nil; { + parser, tree, err = parser(tree, lexer) + if err != nil { + return nil, err + } + } + + return root, nil +} + +func parserMain(tree Node, lex Lexer) (parseFn, Node, error) { + for { + token := lex.Next() + switch token.Type { + case lexer.EOF: + return nil, tree, nil + + case lexer.Error: + return nil, tree, errors.New(token.Raw) + + case lexer.Text: + return parserMain, tree.append(&TextNode{Text: token.Raw}), nil + + case lexer.Any: + return parserMain, tree.append(&AnyNode{}), nil + + case lexer.Super: + return parserMain, tree.append(&SuperNode{}), nil + + case lexer.Single: + return parserMain, tree.append(&SingleNode{}), nil + + case lexer.RangeOpen: + return parserRange, tree, nil + + case lexer.TermsOpen: + return parserMain, tree.append(&AnyOfNode{}).append(&PatternNode{}), nil + + case lexer.Separator: + return parserMain, tree.Parent().append(&PatternNode{}), nil + + case lexer.TermsClose: + return parserMain, tree.Parent().Parent(), nil + + default: + return nil, tree, fmt.Errorf("unexpected token: %s", token) + } + } + return nil, tree, fmt.Errorf("unknown error") +} + +func parserRange(tree Node, lex Lexer) (parseFn, Node, error) { + var ( + not bool + lo rune + hi rune + chars string + ) + for { + token := lex.Next() + switch token.Type { + case lexer.EOF: + return nil, tree, errors.New("unexpected end") + + case lexer.Error: + return nil, tree, errors.New(token.Raw) + + case lexer.Not: + not = true + + case lexer.RangeLo: + r, w := utf8.DecodeRuneInString(token.Raw) + if len(token.Raw) > w { + return nil, tree, fmt.Errorf("unexpected length of lo character") + } + lo = r + + case lexer.RangeBetween: + // + + case lexer.RangeHi: + r, w := utf8.DecodeRuneInString(token.Raw) + if len(token.Raw) > w { + return nil, tree, fmt.Errorf("unexpected length of lo character") + } + + hi = r + + if hi < lo { + return nil, tree, fmt.Errorf("hi character '%s' should be greater than lo '%s'", string(hi), string(lo)) + } + + case lexer.Text: + chars = token.Raw + + case lexer.RangeClose: + isRange := lo != 0 && hi != 0 + isChars := chars != "" + + if isChars == isRange { + return nil, tree, fmt.Errorf("could not parse range") + } + + if isRange { + tree = tree.append(&RangeNode{ + Lo: lo, + Hi: hi, + Not: not, + }) + } else { + tree = tree.append(&ListNode{ + Chars: chars, + Not: not, + }) + } + + return parserMain, tree, nil + } + } +} diff --git a/parser/parser_test.go b/parser/parser_test.go new file mode 100644 index 0000000..177d7bf --- /dev/null +++ b/parser/parser_test.go @@ -0,0 +1,332 @@ +package parser + +import ( + "fmt" + "github.com/gobwas/glob/lexer" + "reflect" + "testing" +) + +type stubLexer struct { + tokens []lexer.Token + pos int +} + +func (s *stubLexer) Next() (ret lexer.Token) { + if s.pos == len(s.tokens) { + return lexer.Token{lexer.EOF, ""} + } + ret = s.tokens[s.pos] + s.pos++ + return +} + +func TestParseString(t *testing.T) { + for id, test := range []struct { + tokens []lexer.Token + tree Node + }{ + { + //pattern: "abc", + tokens: []lexer.Token{ + lexer.Token{lexer.Text, "abc"}, + lexer.Token{lexer.EOF, ""}, + }, + tree: &PatternNode{ + node: node{ + children: []Node{ + &TextNode{Text: "abc"}, + }, + }, + }, + }, + { + //pattern: "a*c", + tokens: []lexer.Token{ + lexer.Token{lexer.Text, "a"}, + lexer.Token{lexer.Any, "*"}, + lexer.Token{lexer.Text, "c"}, + lexer.Token{lexer.EOF, ""}, + }, + tree: &PatternNode{ + node: node{ + children: []Node{ + &TextNode{Text: "a"}, + &AnyNode{}, + &TextNode{Text: "c"}, + }, + }, + }, + }, + { + //pattern: "a**c", + tokens: []lexer.Token{ + lexer.Token{lexer.Text, "a"}, + lexer.Token{lexer.Super, "**"}, + lexer.Token{lexer.Text, "c"}, + lexer.Token{lexer.EOF, ""}, + }, + tree: &PatternNode{ + node: node{ + children: []Node{ + &TextNode{Text: "a"}, + &SuperNode{}, + &TextNode{Text: "c"}, + }, + }, + }, + }, + { + //pattern: "a?c", + tokens: []lexer.Token{ + lexer.Token{lexer.Text, "a"}, + lexer.Token{lexer.Single, "?"}, + lexer.Token{lexer.Text, "c"}, + lexer.Token{lexer.EOF, ""}, + }, + tree: &PatternNode{ + node: node{ + children: []Node{ + &TextNode{Text: "a"}, + &SingleNode{}, + &TextNode{Text: "c"}, + }, + }, + }, + }, + { + //pattern: "[!a-z]", + tokens: []lexer.Token{ + lexer.Token{lexer.RangeOpen, "["}, + lexer.Token{lexer.Not, "!"}, + lexer.Token{lexer.RangeLo, "a"}, + lexer.Token{lexer.RangeBetween, "-"}, + lexer.Token{lexer.RangeHi, "z"}, + lexer.Token{lexer.RangeClose, "]"}, + lexer.Token{lexer.EOF, ""}, + }, + tree: &PatternNode{ + node: node{ + children: []Node{ + &RangeNode{Lo: 'a', Hi: 'z', Not: true}, + }, + }, + }, + }, + { + //pattern: "[az]", + tokens: []lexer.Token{ + lexer.Token{lexer.RangeOpen, "["}, + lexer.Token{lexer.Text, "az"}, + lexer.Token{lexer.RangeClose, "]"}, + lexer.Token{lexer.EOF, ""}, + }, + tree: &PatternNode{ + node: node{ + children: []Node{ + &ListNode{Chars: "az"}, + }, + }, + }, + }, + { + //pattern: "{a,z}", + tokens: []lexer.Token{ + lexer.Token{lexer.TermsOpen, "{"}, + lexer.Token{lexer.Text, "a"}, + lexer.Token{lexer.Separator, ","}, + lexer.Token{lexer.Text, "z"}, + lexer.Token{lexer.TermsClose, "}"}, + lexer.Token{lexer.EOF, ""}, + }, + tree: &PatternNode{ + node: node{ + children: []Node{ + &AnyOfNode{node: node{children: []Node{ + &PatternNode{ + node: node{children: []Node{ + &TextNode{Text: "a"}, + }}, + }, + &PatternNode{ + node: node{children: []Node{ + &TextNode{Text: "z"}, + }}, + }, + }}}, + }, + }, + }, + }, + { + //pattern: "/{z,ab}*", + tokens: []lexer.Token{ + lexer.Token{lexer.Text, "/"}, + lexer.Token{lexer.TermsOpen, "{"}, + lexer.Token{lexer.Text, "z"}, + lexer.Token{lexer.Separator, ","}, + lexer.Token{lexer.Text, "ab"}, + lexer.Token{lexer.TermsClose, "}"}, + lexer.Token{lexer.Any, "*"}, + lexer.Token{lexer.EOF, ""}, + }, + tree: &PatternNode{ + node: node{ + children: []Node{ + &TextNode{Text: "/"}, + &AnyOfNode{node: node{children: []Node{ + &PatternNode{ + node: node{children: []Node{ + &TextNode{Text: "z"}, + }}, + }, + &PatternNode{ + node: node{children: []Node{ + &TextNode{Text: "ab"}, + }}, + }, + }}}, + &AnyNode{}, + }, + }, + }, + }, + { + //pattern: "{a,{x,y},?,[a-z],[!qwe]}", + tokens: []lexer.Token{ + lexer.Token{lexer.TermsOpen, "{"}, + lexer.Token{lexer.Text, "a"}, + lexer.Token{lexer.Separator, ","}, + lexer.Token{lexer.TermsOpen, "{"}, + lexer.Token{lexer.Text, "x"}, + lexer.Token{lexer.Separator, ","}, + lexer.Token{lexer.Text, "y"}, + lexer.Token{lexer.TermsClose, "}"}, + lexer.Token{lexer.Separator, ","}, + lexer.Token{lexer.Single, "?"}, + lexer.Token{lexer.Separator, ","}, + lexer.Token{lexer.RangeOpen, "["}, + lexer.Token{lexer.RangeLo, "a"}, + lexer.Token{lexer.RangeBetween, "-"}, + lexer.Token{lexer.RangeHi, "z"}, + lexer.Token{lexer.RangeClose, "]"}, + lexer.Token{lexer.Separator, ","}, + lexer.Token{lexer.RangeOpen, "["}, + lexer.Token{lexer.Not, "!"}, + lexer.Token{lexer.Text, "qwe"}, + lexer.Token{lexer.RangeClose, "]"}, + lexer.Token{lexer.TermsClose, "}"}, + lexer.Token{lexer.EOF, ""}, + }, + tree: &PatternNode{ + node: node{ + children: []Node{ + &AnyOfNode{node: node{children: []Node{ + &PatternNode{ + node: node{children: []Node{ + &TextNode{Text: "a"}, + }}, + }, + &PatternNode{ + node: node{children: []Node{ + &AnyOfNode{node: node{children: []Node{ + &PatternNode{ + node: node{children: []Node{ + &TextNode{Text: "x"}, + }}, + }, + &PatternNode{ + node: node{children: []Node{ + &TextNode{Text: "y"}, + }}, + }, + }}}, + }}, + }, + &PatternNode{ + node: node{children: []Node{ + &SingleNode{}, + }}, + }, + &PatternNode{ + node: node{ + children: []Node{ + &RangeNode{Lo: 'a', Hi: 'z', Not: false}, + }, + }, + }, + &PatternNode{ + node: node{ + children: []Node{ + &ListNode{Chars: "qwe", Not: true}, + }, + }, + }, + }}}, + }, + }, + }, + }, + } { + lexer := &stubLexer{tokens: test.tokens} + result, err := Parse(lexer) + if err != nil { + t.Errorf("[%d] unexpected error: %s", id, err) + } + if !reflect.DeepEqual(test.tree, result) { + t.Errorf("[%d] Parse():\nact:\t%s\nexp:\t%s\n", id, result, test.tree) + } + } +} + +const abstractNodeImpl = "nodeImpl" + +func nodeEqual(a, b Node) error { + if (a == nil || b == nil) && a != b { + return fmt.Errorf("nodes are not equal: exp %s, act %s", a, b) + } + + aValue, bValue := reflect.Indirect(reflect.ValueOf(a)), reflect.Indirect(reflect.ValueOf(b)) + aType, bType := aValue.Type(), bValue.Type() + if aType != bType { + return fmt.Errorf("nodes are not equal: exp %s, act %s", aValue.Type(), bValue.Type()) + } + + for i := 0; i < aType.NumField(); i++ { + var eq bool + + f := aType.Field(i).Name + if f == abstractNodeImpl { + continue + } + + af, bf := aValue.FieldByName(f), bValue.FieldByName(f) + + switch af.Kind() { + case reflect.String: + eq = af.String() == bf.String() + case reflect.Bool: + eq = af.Bool() == bf.Bool() + default: + eq = fmt.Sprint(af) == fmt.Sprint(bf) + } + + if !eq { + return fmt.Errorf("nodes<%s> %q fields are not equal: exp %q, act %q", aType, f, af, bf) + } + } + + for i, aDesc := range a.Children() { + if len(b.Children())-1 < i { + return fmt.Errorf("node does not have enough children (got %d children, wanted %d-th token)", len(b.Children()), i) + } + + bDesc := b.Children()[i] + + if err := nodeEqual(aDesc, bDesc); err != nil { + return err + } + } + + return nil +} diff --git a/parser_test.go b/parser_test.go deleted file mode 100644 index 0e9312f..0000000 --- a/parser_test.go +++ /dev/null @@ -1,324 +0,0 @@ -package glob - -import ( - "fmt" - "reflect" - "testing" -) - -func TestParseString(t *testing.T) { - for id, test := range []struct { - items []item - tree node - }{ - { - //pattern: "abc", - items: []item{ - item{item_text, "abc"}, - item{item_eof, ""}, - }, - tree: &nodePattern{ - nodeImpl: nodeImpl{ - desc: []node{ - &nodeText{text: "abc"}, - }, - }, - }, - }, - { - //pattern: "a*c", - items: []item{ - item{item_text, "a"}, - item{item_any, "*"}, - item{item_text, "c"}, - item{item_eof, ""}, - }, - tree: &nodePattern{ - nodeImpl: nodeImpl{ - desc: []node{ - &nodeText{text: "a"}, - &nodeAny{}, - &nodeText{text: "c"}, - }, - }, - }, - }, - { - //pattern: "a**c", - items: []item{ - item{item_text, "a"}, - item{item_super, "**"}, - item{item_text, "c"}, - item{item_eof, ""}, - }, - tree: &nodePattern{ - nodeImpl: nodeImpl{ - desc: []node{ - &nodeText{text: "a"}, - &nodeSuper{}, - &nodeText{text: "c"}, - }, - }, - }, - }, - { - //pattern: "a?c", - items: []item{ - item{item_text, "a"}, - item{item_single, "?"}, - item{item_text, "c"}, - item{item_eof, ""}, - }, - tree: &nodePattern{ - nodeImpl: nodeImpl{ - desc: []node{ - &nodeText{text: "a"}, - &nodeSingle{}, - &nodeText{text: "c"}, - }, - }, - }, - }, - { - //pattern: "[!a-z]", - items: []item{ - item{item_range_open, "["}, - item{item_not, "!"}, - item{item_range_lo, "a"}, - item{item_range_between, "-"}, - item{item_range_hi, "z"}, - item{item_range_close, "]"}, - item{item_eof, ""}, - }, - tree: &nodePattern{ - nodeImpl: nodeImpl{ - desc: []node{ - &nodeRange{lo: 'a', hi: 'z', not: true}, - }, - }, - }, - }, - { - //pattern: "[az]", - items: []item{ - item{item_range_open, "["}, - item{item_text, "az"}, - item{item_range_close, "]"}, - item{item_eof, ""}, - }, - tree: &nodePattern{ - nodeImpl: nodeImpl{ - desc: []node{ - &nodeList{chars: "az"}, - }, - }, - }, - }, - { - //pattern: "{a,z}", - items: []item{ - item{item_terms_open, "{"}, - item{item_text, "a"}, - item{item_separator, ","}, - item{item_text, "z"}, - item{item_terms_close, "}"}, - item{item_eof, ""}, - }, - tree: &nodePattern{ - nodeImpl: nodeImpl{ - desc: []node{ - &nodeAnyOf{nodeImpl: nodeImpl{desc: []node{ - &nodePattern{ - nodeImpl: nodeImpl{desc: []node{ - &nodeText{text: "a"}, - }}, - }, - &nodePattern{ - nodeImpl: nodeImpl{desc: []node{ - &nodeText{text: "z"}, - }}, - }, - }}}, - }, - }, - }, - }, - { - //pattern: "/{z,ab}*", - items: []item{ - item{item_text, "/"}, - item{item_terms_open, "{"}, - item{item_text, "z"}, - item{item_separator, ","}, - item{item_text, "ab"}, - item{item_terms_close, "}"}, - item{item_any, "*"}, - item{item_eof, ""}, - }, - tree: &nodePattern{ - nodeImpl: nodeImpl{ - desc: []node{ - &nodeText{text: "/"}, - &nodeAnyOf{nodeImpl: nodeImpl{desc: []node{ - &nodePattern{ - nodeImpl: nodeImpl{desc: []node{ - &nodeText{text: "z"}, - }}, - }, - &nodePattern{ - nodeImpl: nodeImpl{desc: []node{ - &nodeText{text: "ab"}, - }}, - }, - }}}, - &nodeAny{}, - }, - }, - }, - }, - { - //pattern: "{a,{x,y},?,[a-z],[!qwe]}", - items: []item{ - item{item_terms_open, "{"}, - item{item_text, "a"}, - item{item_separator, ","}, - item{item_terms_open, "{"}, - item{item_text, "x"}, - item{item_separator, ","}, - item{item_text, "y"}, - item{item_terms_close, "}"}, - item{item_separator, ","}, - item{item_single, "?"}, - item{item_separator, ","}, - item{item_range_open, "["}, - item{item_range_lo, "a"}, - item{item_range_between, "-"}, - item{item_range_hi, "z"}, - item{item_range_close, "]"}, - item{item_separator, ","}, - item{item_range_open, "["}, - item{item_not, "!"}, - item{item_text, "qwe"}, - item{item_range_close, "]"}, - item{item_terms_close, "}"}, - item{item_eof, ""}, - }, - tree: &nodePattern{ - nodeImpl: nodeImpl{ - desc: []node{ - &nodeAnyOf{nodeImpl: nodeImpl{desc: []node{ - &nodePattern{ - nodeImpl: nodeImpl{desc: []node{ - &nodeText{text: "a"}, - }}, - }, - &nodePattern{ - nodeImpl: nodeImpl{desc: []node{ - &nodeAnyOf{nodeImpl: nodeImpl{desc: []node{ - &nodePattern{ - nodeImpl: nodeImpl{desc: []node{ - &nodeText{text: "x"}, - }}, - }, - &nodePattern{ - nodeImpl: nodeImpl{desc: []node{ - &nodeText{text: "y"}, - }}, - }, - }}}, - }}, - }, - &nodePattern{ - nodeImpl: nodeImpl{desc: []node{ - &nodeSingle{}, - }}, - }, - &nodePattern{ - nodeImpl: nodeImpl{ - desc: []node{ - &nodeRange{lo: 'a', hi: 'z', not: false}, - }, - }, - }, - &nodePattern{ - nodeImpl: nodeImpl{ - desc: []node{ - &nodeList{chars: "qwe", not: true}, - }, - }, - }, - }}}, - }, - }, - }, - }, - } { - lexer := &stubLexer{Items: test.items} - pattern, err := parse(lexer) - - if err != nil { - t.Errorf("#%d %s", id, err) - continue - } - - if !reflect.DeepEqual(test.tree, pattern) { - t.Errorf("#%d tries are not equal", id) - if err = nodeEqual(test.tree, pattern); err != nil { - t.Errorf("#%d %s", id, err) - continue - } - } - } -} - -const abstractNodeImpl = "nodeImpl" - -func nodeEqual(a, b node) error { - if (a == nil || b == nil) && a != b { - return fmt.Errorf("nodes are not equal: exp %s, act %s", a, b) - } - - aValue, bValue := reflect.Indirect(reflect.ValueOf(a)), reflect.Indirect(reflect.ValueOf(b)) - aType, bType := aValue.Type(), bValue.Type() - if aType != bType { - return fmt.Errorf("nodes are not equal: exp %s, act %s", aValue.Type(), bValue.Type()) - } - - for i := 0; i < aType.NumField(); i++ { - var eq bool - - f := aType.Field(i).Name - if f == abstractNodeImpl { - continue - } - - af, bf := aValue.FieldByName(f), bValue.FieldByName(f) - - switch af.Kind() { - case reflect.String: - eq = af.String() == bf.String() - case reflect.Bool: - eq = af.Bool() == bf.Bool() - default: - eq = fmt.Sprint(af) == fmt.Sprint(bf) - } - - if !eq { - return fmt.Errorf("nodes<%s> %q fields are not equal: exp %q, act %q", aType, f, af, bf) - } - } - - for i, aDesc := range a.children() { - if len(b.children())-1 < i { - return fmt.Errorf("node does not have enough children (got %d children, wanted %d-th token)", len(b.children()), i) - } - - bDesc := b.children()[i] - - if err := nodeEqual(aDesc, bDesc); err != nil { - return err - } - } - - return nil -} diff --git a/runes/runes.go b/util/runes/runes.go similarity index 100% rename from runes/runes.go rename to util/runes/runes.go diff --git a/runes/runes_test.go b/util/runes/runes_test.go similarity index 100% rename from runes/runes_test.go rename to util/runes/runes_test.go diff --git a/strings/strings.go b/util/strings/strings.go similarity index 100% rename from strings/strings.go rename to util/strings/strings.go