lexer tests

This commit is contained in:
s.kamardin 2015-12-25 19:40:36 +03:00
parent eba73b4b86
commit d400ac872c
5 changed files with 254 additions and 77 deletions

18
glob.go
View File

@ -1,9 +1,9 @@
package glob package glob
import ( import (
"strings"
"github.com/gobwas/glob/match"
"fmt" "fmt"
"github.com/gobwas/glob/match"
"strings"
) )
const ( const (
@ -67,7 +67,6 @@ func New(pattern string, separators ...string) (Glob, error) {
return &match.Composite{c}, nil return &match.Composite{c}, nil
} }
// parse parsed given pattern into list of tokens // parse parsed given pattern into list of tokens
func parse(str string, sep string, st state) ([]token, error) { func parse(str string, sep string, st state) ([]token, error) {
if len(str) == 0 { if len(str) == 0 {
@ -99,14 +98,14 @@ func parse(str string, sep string, st state) ([]token, error) {
return nil, fmt.Errorf("'%s' should be closed with '%s'", string(range_open), string(range_close)) return nil, fmt.Errorf("'%s' should be closed with '%s'", string(range_open), string(range_close))
} }
r := str[i+1:closed] r := str[i+1 : closed]
g, err := parseRange(r) g, err := parseRange(r)
if err != nil { if err != nil {
return nil, err return nil, err
} }
st.tokens = append(st.tokens, token{g, r}) st.tokens = append(st.tokens, token{g, r})
if closed == len(str) -1 { if closed == len(str)-1 {
return st.tokens, nil return st.tokens, nil
} }
@ -116,11 +115,11 @@ func parse(str string, sep string, st state) ([]token, error) {
st.escape = true st.escape = true
case any: case any:
if len(str) > i+1 && str[i+1] == any { if len(str) > i+1 && str[i+1] == any {
st.tokens = append(st.tokens, token{match.Multiple{}, c}) st.tokens = append(st.tokens, token{match.Any{}, c})
return parse(str[i+len(c)+1:], sep, st) return parse(str[i+len(c)+1:], sep, st)
} }
st.tokens = append(st.tokens, token{match.Multiple{sep}, c}) st.tokens = append(st.tokens, token{match.Any{sep}, c})
case single: case single:
st.tokens = append(st.tokens, token{match.Single{sep}, c}) st.tokens = append(st.tokens, token{match.Single{sep}, c})
} }
@ -129,7 +128,6 @@ func parse(str string, sep string, st state) ([]token, error) {
return parse(str[i+len(c):], sep, st) return parse(str[i+len(c):], sep, st)
} }
func parseRange(def string) (match.Matcher, error) { func parseRange(def string) (match.Matcher, error) {
var ( var (
not bool not bool
@ -146,13 +144,13 @@ func parseRange(def string) (match.Matcher, error) {
continue continue
} }
switch c{ switch c {
case inside_range_not: case inside_range_not:
if i == 0 { if i == 0 {
not = true not = true
} }
case escape: case escape:
if i == len(def) - 1 { if i == len(def)-1 {
return nil, fmt.Errorf("there should be any character after '%s'", string(escape)) return nil, fmt.Errorf("there should be any character after '%s'", string(escape))
} }

138
lexer.go
View File

@ -6,7 +6,7 @@ import (
"unicode/utf8" "unicode/utf8"
) )
var eof int = 0 var eof rune = 0
type stateFn func(*lexer) stateFn type stateFn func(*lexer) stateFn
@ -20,7 +20,10 @@ const (
item_single item_single
item_range_open item_range_open
item_range_not item_range_not
item_range_lo
item_range_minus item_range_minus
item_range_hi
item_range_chars
item_range_close item_range_close
) )
@ -29,15 +32,29 @@ type item struct {
s string s string
} }
func (i item) String() string {
return fmt.Sprintf("%v[%s]", i.t, i.s)
}
type lexer struct { type lexer struct {
input string input string
start int start int
pos int pos int
width int width int
runes int runes int
state stateFn
items chan item items chan item
} }
func newLexer(source string) *lexer {
l := &lexer{
input: source,
state: lexText,
items: make(chan item, 5),
}
return l
}
func (l *lexer) run() { func (l *lexer) run() {
for state := lexText; state != nil; { for state := lexText; state != nil; {
state = state(l) state = state(l)
@ -45,12 +62,12 @@ func (l *lexer) run() {
close(l.items) close(l.items)
} }
func (l *lexer) read() (rune int) { func (l *lexer) read() (r rune) {
if l.pos >= len(l.input) { if l.pos >= len(l.input) {
return eof return eof
} }
rune, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
l.pos += l.width l.pos += l.width
l.runes++ l.runes++
@ -62,12 +79,18 @@ func (l *lexer) unread() {
l.runes-- l.runes--
} }
func (l *lexer) shift(i int) {
l.pos += i
l.start = l.pos
l.runes = 0
}
func (l *lexer) ignore() { func (l *lexer) ignore() {
l.start = l.pos l.start = l.pos
l.runes = 0 l.runes = 0
} }
func (l *lexer) lookahead() int { func (l *lexer) lookahead() rune {
r := l.read() r := l.read()
l.unread() l.unread()
return r return r
@ -101,42 +124,53 @@ func (l *lexer) flush(t itemType) {
} }
func (l *lexer) errorf(format string, args ...interface{}) { func (l *lexer) errorf(format string, args ...interface{}) {
l.emit(item{item_error, fmt.Sprintf(format, args...)}) l.items <- item{item_error, fmt.Sprintf(format, args...)}
} }
func lex(source string) *lexer { func (l *lexer) nextItem() item {
l := &lexer{ for {
input: strings.NewReader(source), select {
items: make(chan item), case item := <-l.items:
return item
default:
if l.state == nil {
return item{t: item_eof}
} }
go l.run() l.state = l.state(l)
}
}
return l panic("something went wrong")
} }
func lexText(l *lexer) stateFn { func lexText(l *lexer) stateFn {
for { for {
switch l.input[l.pos] { c := l.read()
if c == eof {
break
}
switch c {
case escape: case escape:
if l.read() == eof { if l.read() == eof {
l.errorf("unclosed '%s' character", string(escape)) l.errorf("unclosed '%s' character", string(escape))
return nil return nil
} }
case single: case single:
l.unread()
l.flush(item_text) l.flush(item_text)
return lexSingle return lexSingle
case any: case any:
l.unread()
l.flush(item_text) l.flush(item_text)
return lexAny return lexAny
case range_open: case range_open:
l.unread()
l.flush(item_text) l.flush(item_text)
return lexRangeOpen return lexRangeOpen
} }
if l.read() == eof {
break
}
} }
if l.pos > l.start { if l.pos > l.start {
@ -148,16 +182,15 @@ func lexText(l *lexer) stateFn {
return nil return nil
} }
func lexRangeOpen(l *lexer) stateFn {
l.pos += 1
l.emit(item_range_open)
return lexInsideRange
}
func lexInsideRange(l *lexer) stateFn { func lexInsideRange(l *lexer) stateFn {
for { for {
switch l.input[l.pos] { c := l.read()
if c == eof {
l.errorf("unclosed range construction")
return nil
}
switch c {
case inside_range_not: case inside_range_not:
// only first char makes sense // only first char makes sense
if l.pos == l.start { if l.pos == l.start {
@ -165,36 +198,19 @@ func lexInsideRange(l *lexer) stateFn {
} }
case inside_range_minus: case inside_range_minus:
if len(l.runes) != 1 { if l.pos-l.start != 2 {
l.errorf("unexpected character '%s'", string(inside_range_minus)) l.errorf("unexpected length of lo char inside range")
return nil return nil
} }
l.emit(item_text) l.shift(-2)
return lexRangeHiLo
l.pos += 1
l.emit(item_range_minus)
switch l.input[l.pos] {
case eof, range_close:
l.errorf("unexpected end of range: character is expected")
return nil
default:
l.read()
l.emit(item_text)
}
return lexText
case range_close: case range_close:
l.flush(item_text) l.unread()
l.flush(item_range_chars)
return lexRangeClose return lexRangeClose
} }
if l.read() == eof {
l.errorf("unclosed range construction")
return nil
}
} }
} }
@ -205,13 +221,31 @@ func lexAny(l *lexer) stateFn {
} }
func lexRangeHiLo(l *lexer) stateFn { func lexRangeHiLo(l *lexer) stateFn {
for {
c := l.read()
if c == eof {
l.errorf("unexpected end of input")
return nil
}
l.emit(item_text) if l.pos-l.start != 1 {
return lexRangeMinus l.errorf("unexpected length of char inside range")
return nil
}
l.pos += 1 switch c {
case inside_range_minus:
l.emit(item_range_minus) l.emit(item_range_minus)
return lexInsideRange
case range_close:
l.unread()
l.flush(item_range_hi)
return lexRangeClose
default:
l.flush(item_range_lo)
}
}
} }
func lexSingle(l *lexer) stateFn { func lexSingle(l *lexer) stateFn {
@ -220,6 +254,12 @@ func lexSingle(l *lexer) stateFn {
return lexText return lexText
} }
func lexRangeOpen(l *lexer) stateFn {
l.pos += 1
l.emit(item_range_open)
return lexInsideRange
}
func lexRangeClose(l *lexer) stateFn { func lexRangeClose(l *lexer) stateFn {
l.pos += 1 l.pos += 1
l.emit(item_range_close) l.emit(item_range_close)

View File

@ -1,20 +1,19 @@
package match package match
import ( import (
"strings"
"fmt" "fmt"
"strings"
) )
// multiple represents * type Any struct {
type Multiple struct {
Separators string Separators string
} }
func (self Multiple) Match(s string) bool { func (self Any) Match(s string) bool {
return strings.IndexAny(s, self.Separators) == -1 return strings.IndexAny(s, self.Separators) == -1
} }
func (self Multiple) Search(s string) (i, l int, ok bool) { func (self Any) Search(s string) (i, l int, ok bool) {
if self.Match(s) { if self.Match(s) {
return 0, len(s), true return 0, len(s), true
} }
@ -22,7 +21,7 @@ func (self Multiple) Search(s string) (i, l int, ok bool) {
return return
} }
func (self Multiple) Kind() Kind { func (self Any) Kind() Kind {
if self.Separators == "" { if self.Separators == "" {
return KindMultipleSuper return KindMultipleSuper
} else { } else {
@ -30,6 +29,6 @@ func (self Multiple) Kind() Kind {
} }
} }
func (self Multiple) String() string { func (self Any) String() string {
return fmt.Sprintf("[multiple:%s]", self.Separators) return fmt.Sprintf("[multiple:%s]", self.Separators)
} }

127
parser.go Normal file
View File

@ -0,0 +1,127 @@
package glob
import (
"errors"
"fmt"
"github.com/gobwas/glob/match"
)
func parseAll(source, separators string) ([]token, error) {
lexer := newLexer(source)
var tokens []token
for parser := parserMain; parser != nil; {
var err error
tokens, parser, err = parser(lexer, separators)
if err != nil {
return nil, err
}
}
return tokens, nil
}
type parseFn func(*lexer, string) ([]token, parseFn, error)
func parserMain(lexer *lexer, separators string) ([]token, parseFn, error) {
var (
prev *token
tokens []token
)
for item := lexer.nextItem(); ; {
var t token
if item.t == item_eof {
break
}
switch item.t {
case item_eof:
return tokens, nil, nil
case item_error:
return nil, nil, errors.New(item.s)
case item_text:
t = token{match.Raw{item.s}, item.s}
case item_any:
if prev != nil && prev.matcher.Kind() == match.KindMultipleSeparated {
// remove simple any and replace it with super_any
tokens = tokens[:len(tokens)-1]
t = token{match.Any{""}, item.s}
} else {
t = token{match.Any{separators}, item.s}
}
case item_single:
t = token{match.Single{separators}, item.s}
case item_range_open:
return tokens, parserRange, nil
}
prev = &t
}
return tokens, nil, nil
}
func parserRange(lexer *lexer, separators string) ([]token, parseFn, error) {
var (
not bool
lo rune
hi rune
chars string
)
for item := lexer.nextItem(); ; {
switch item.t {
case item_eof:
return nil, nil, errors.New("unexpected end")
case item_error:
return nil, nil, errors.New(item.s)
case item_range_not:
not = true
case item_range_lo:
r := []rune(item.s)
if len(r) != 1 {
return nil, nil, fmt.Errorf("unexpected length of lo character")
}
lo = r[0]
case item_range_minus:
//
case item_range_hi:
r := []rune(item.s)
if len(r) != 1 {
return nil, nil, fmt.Errorf("unexpected length of hi character")
}
hi = r[0]
case item_range_chars:
chars = item.s
case item_range_close:
isRange := lo != 0 && hi != 0
isChars := chars == ""
if !(isChars != isRange) {
return nil, nil, fmt.Errorf("parse error: unexpected lo, hi, chars in range")
}
if isRange {
return []token{token{match.Between{lo, hi, not}, ""}}, parserMain, nil
} else {
return []token{token{match.RangeList{chars, not}, ""}}, parserMain, nil
}
}
}
}

13
parser_test.go Normal file
View File

@ -0,0 +1,13 @@
package glob
import (
"fmt"
"testing"
)
func TestParseString(t *testing.T) {
lexer := newLexer("hello")
fmt.Println(lexer.nextItem())
fmt.Println(lexer.nextItem())
fmt.Println(lexer.nextItem())
}