lexer tests

This commit is contained in:
s.kamardin 2015-12-25 19:40:36 +03:00
parent eba73b4b86
commit d400ac872c
5 changed files with 254 additions and 77 deletions

34
glob.go
View File

@ -1,21 +1,21 @@
package glob
import (
"strings"
"github.com/gobwas/glob/match"
"fmt"
"github.com/gobwas/glob/match"
"strings"
)
const (
any = '*'
single = '?'
single = '?'
escape = '\\'
range_open = '['
range_close = ']'
)
const (
inside_range_not = '!'
inside_range_not = '!'
inside_range_minus = '-'
)
@ -67,7 +67,6 @@ func New(pattern string, separators ...string) (Glob, error) {
return &match.Composite{c}, nil
}
// parse parsed given pattern into list of tokens
func parse(str string, sep string, st state) ([]token, error) {
if len(str) == 0 {
@ -99,14 +98,14 @@ func parse(str string, sep string, st state) ([]token, error) {
return nil, fmt.Errorf("'%s' should be closed with '%s'", string(range_open), string(range_close))
}
r := str[i+1:closed]
r := str[i+1 : closed]
g, err := parseRange(r)
if err != nil {
return nil, err
}
st.tokens = append(st.tokens, token{g, r})
if closed == len(str) -1 {
if closed == len(str)-1 {
return st.tokens, nil
}
@ -116,11 +115,11 @@ func parse(str string, sep string, st state) ([]token, error) {
st.escape = true
case any:
if len(str) > i+1 && str[i+1] == any {
st.tokens = append(st.tokens, token{match.Multiple{}, c})
st.tokens = append(st.tokens, token{match.Any{}, c})
return parse(str[i+len(c)+1:], sep, st)
}
st.tokens = append(st.tokens, token{match.Multiple{sep}, c})
st.tokens = append(st.tokens, token{match.Any{sep}, c})
case single:
st.tokens = append(st.tokens, token{match.Single{sep}, c})
}
@ -129,14 +128,13 @@ func parse(str string, sep string, st state) ([]token, error) {
return parse(str[i+len(c):], sep, st)
}
func parseRange(def string) (match.Matcher, error) {
var (
not bool
esc bool
minus bool
not bool
esc bool
minus bool
minusIndex int
b []byte
b []byte
)
for i, c := range []byte(def) {
@ -146,13 +144,13 @@ func parseRange(def string) (match.Matcher, error) {
continue
}
switch c{
switch c {
case inside_range_not:
if i == 0 {
not = true
}
case escape:
if i == len(def) - 1 {
if i == len(def)-1 {
return nil, fmt.Errorf("there should be any character after '%s'", string(escape))
}
@ -171,7 +169,7 @@ func parseRange(def string) (match.Matcher, error) {
def = string(b)
if minus {
if minus {
r := []rune(def)
if len(r) != 2 || minusIndex != 1 {
return nil, fmt.Errorf("invalid range syntax: '%s' should be between two characters", string(inside_range_minus))
@ -191,4 +189,4 @@ type token struct {
type state struct {
escape bool
tokens []token
}
}

142
lexer.go
View File

@ -6,7 +6,7 @@ import (
"unicode/utf8"
)
var eof int = 0
var eof rune = 0
type stateFn func(*lexer) stateFn
@ -20,7 +20,10 @@ const (
item_single
item_range_open
item_range_not
item_range_lo
item_range_minus
item_range_hi
item_range_chars
item_range_close
)
@ -29,15 +32,29 @@ type item struct {
s string
}
func (i item) String() string {
return fmt.Sprintf("%v[%s]", i.t, i.s)
}
type lexer struct {
input string
start int
pos int
width int
runes int
state stateFn
items chan item
}
func newLexer(source string) *lexer {
l := &lexer{
input: source,
state: lexText,
items: make(chan item, 5),
}
return l
}
func (l *lexer) run() {
for state := lexText; state != nil; {
state = state(l)
@ -45,12 +62,12 @@ func (l *lexer) run() {
close(l.items)
}
func (l *lexer) read() (rune int) {
func (l *lexer) read() (r rune) {
if l.pos >= len(l.input) {
return eof
}
rune, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
l.pos += l.width
l.runes++
@ -62,12 +79,18 @@ func (l *lexer) unread() {
l.runes--
}
func (l *lexer) shift(i int) {
l.pos += i
l.start = l.pos
l.runes = 0
}
func (l *lexer) ignore() {
l.start = l.pos
l.runes = 0
}
func (l *lexer) lookahead() int {
func (l *lexer) lookahead() rune {
r := l.read()
l.unread()
return r
@ -101,42 +124,53 @@ func (l *lexer) flush(t itemType) {
}
func (l *lexer) errorf(format string, args ...interface{}) {
l.emit(item{item_error, fmt.Sprintf(format, args...)})
l.items <- item{item_error, fmt.Sprintf(format, args...)}
}
func lex(source string) *lexer {
l := &lexer{
input: strings.NewReader(source),
items: make(chan item),
func (l *lexer) nextItem() item {
for {
select {
case item := <-l.items:
return item
default:
if l.state == nil {
return item{t: item_eof}
}
l.state = l.state(l)
}
}
go l.run()
return l
panic("something went wrong")
}
func lexText(l *lexer) stateFn {
for {
switch l.input[l.pos] {
c := l.read()
if c == eof {
break
}
switch c {
case escape:
if l.read() == eof {
l.errorf("unclosed '%s' character", string(escape))
return nil
}
case single:
l.unread()
l.flush(item_text)
return lexSingle
case any:
l.unread()
l.flush(item_text)
return lexAny
case range_open:
l.unread()
l.flush(item_text)
return lexRangeOpen
}
if l.read() == eof {
break
}
}
if l.pos > l.start {
@ -148,16 +182,15 @@ func lexText(l *lexer) stateFn {
return nil
}
func lexRangeOpen(l *lexer) stateFn {
l.pos += 1
l.emit(item_range_open)
return lexInsideRange
}
func lexInsideRange(l *lexer) stateFn {
for {
switch l.input[l.pos] {
c := l.read()
if c == eof {
l.errorf("unclosed range construction")
return nil
}
switch c {
case inside_range_not:
// only first char makes sense
if l.pos == l.start {
@ -165,36 +198,19 @@ func lexInsideRange(l *lexer) stateFn {
}
case inside_range_minus:
if len(l.runes) != 1 {
l.errorf("unexpected character '%s'", string(inside_range_minus))
if l.pos-l.start != 2 {
l.errorf("unexpected length of lo char inside range")
return nil
}
l.emit(item_text)
l.pos += 1
l.emit(item_range_minus)
switch l.input[l.pos] {
case eof, range_close:
l.errorf("unexpected end of range: character is expected")
return nil
default:
l.read()
l.emit(item_text)
}
return lexText
l.shift(-2)
return lexRangeHiLo
case range_close:
l.flush(item_text)
l.unread()
l.flush(item_range_chars)
return lexRangeClose
}
if l.read() == eof {
l.errorf("unclosed range construction")
return nil
}
}
}
@ -205,13 +221,31 @@ func lexAny(l *lexer) stateFn {
}
func lexRangeHiLo(l *lexer) stateFn {
for {
c := l.read()
if c == eof {
l.errorf("unexpected end of input")
return nil
}
l.emit(item_text)
return lexRangeMinus
if l.pos-l.start != 1 {
l.errorf("unexpected length of char inside range")
return nil
}
l.pos += 1
l.emit(item_range_minus)
return lexInsideRange
switch c {
case inside_range_minus:
l.emit(item_range_minus)
case range_close:
l.unread()
l.flush(item_range_hi)
return lexRangeClose
default:
l.flush(item_range_lo)
}
}
}
func lexSingle(l *lexer) stateFn {
@ -220,6 +254,12 @@ func lexSingle(l *lexer) stateFn {
return lexText
}
func lexRangeOpen(l *lexer) stateFn {
l.pos += 1
l.emit(item_range_open)
return lexInsideRange
}
func lexRangeClose(l *lexer) stateFn {
l.pos += 1
l.emit(item_range_close)

View File

@ -1,20 +1,19 @@
package match
import (
"strings"
"fmt"
"strings"
)
// multiple represents *
type Multiple struct {
type Any struct {
Separators string
}
func (self Multiple) Match(s string) bool {
func (self Any) Match(s string) bool {
return strings.IndexAny(s, self.Separators) == -1
}
func (self Multiple) Search(s string) (i, l int, ok bool) {
func (self Any) Search(s string) (i, l int, ok bool) {
if self.Match(s) {
return 0, len(s), true
}
@ -22,7 +21,7 @@ func (self Multiple) Search(s string) (i, l int, ok bool) {
return
}
func (self Multiple) Kind() Kind {
func (self Any) Kind() Kind {
if self.Separators == "" {
return KindMultipleSuper
} else {
@ -30,6 +29,6 @@ func (self Multiple) Kind() Kind {
}
}
func (self Multiple) String() string {
func (self Any) String() string {
return fmt.Sprintf("[multiple:%s]", self.Separators)
}
}

127
parser.go Normal file
View File

@ -0,0 +1,127 @@
package glob
import (
"errors"
"fmt"
"github.com/gobwas/glob/match"
)
func parseAll(source, separators string) ([]token, error) {
lexer := newLexer(source)
var tokens []token
for parser := parserMain; parser != nil; {
var err error
tokens, parser, err = parser(lexer, separators)
if err != nil {
return nil, err
}
}
return tokens, nil
}
type parseFn func(*lexer, string) ([]token, parseFn, error)
func parserMain(lexer *lexer, separators string) ([]token, parseFn, error) {
var (
prev *token
tokens []token
)
for item := lexer.nextItem(); ; {
var t token
if item.t == item_eof {
break
}
switch item.t {
case item_eof:
return tokens, nil, nil
case item_error:
return nil, nil, errors.New(item.s)
case item_text:
t = token{match.Raw{item.s}, item.s}
case item_any:
if prev != nil && prev.matcher.Kind() == match.KindMultipleSeparated {
// remove simple any and replace it with super_any
tokens = tokens[:len(tokens)-1]
t = token{match.Any{""}, item.s}
} else {
t = token{match.Any{separators}, item.s}
}
case item_single:
t = token{match.Single{separators}, item.s}
case item_range_open:
return tokens, parserRange, nil
}
prev = &t
}
return tokens, nil, nil
}
func parserRange(lexer *lexer, separators string) ([]token, parseFn, error) {
var (
not bool
lo rune
hi rune
chars string
)
for item := lexer.nextItem(); ; {
switch item.t {
case item_eof:
return nil, nil, errors.New("unexpected end")
case item_error:
return nil, nil, errors.New(item.s)
case item_range_not:
not = true
case item_range_lo:
r := []rune(item.s)
if len(r) != 1 {
return nil, nil, fmt.Errorf("unexpected length of lo character")
}
lo = r[0]
case item_range_minus:
//
case item_range_hi:
r := []rune(item.s)
if len(r) != 1 {
return nil, nil, fmt.Errorf("unexpected length of hi character")
}
hi = r[0]
case item_range_chars:
chars = item.s
case item_range_close:
isRange := lo != 0 && hi != 0
isChars := chars == ""
if !(isChars != isRange) {
return nil, nil, fmt.Errorf("parse error: unexpected lo, hi, chars in range")
}
if isRange {
return []token{token{match.Between{lo, hi, not}, ""}}, parserMain, nil
} else {
return []token{token{match.RangeList{chars, not}, ""}}, parserMain, nil
}
}
}
}

13
parser_test.go Normal file
View File

@ -0,0 +1,13 @@
package glob
import (
"fmt"
"testing"
)
func TestParseString(t *testing.T) {
lexer := newLexer("hello")
fmt.Println(lexer.nextItem())
fmt.Println(lexer.nextItem())
fmt.Println(lexer.nextItem())
}