glob/lexer/lexer.go

274 lines
4.4 KiB
Go
Raw Normal View History

2016-05-27 20:47:19 +03:00
package lexer
2015-12-24 22:30:20 +03:00
import (
2016-02-24 23:53:19 +03:00
"bytes"
2015-12-24 22:30:20 +03:00
"fmt"
2016-05-27 20:47:19 +03:00
"github.com/gobwas/glob/util/runes"
2015-12-24 22:30:20 +03:00
"unicode/utf8"
)
2016-01-08 20:14:31 +03:00
const (
char_any = '*'
2016-05-12 00:17:33 +03:00
char_comma = ','
2016-01-08 20:14:31 +03:00
char_single = '?'
char_escape = '\\'
char_range_open = '['
char_range_close = ']'
char_terms_open = '{'
char_terms_close = '}'
char_range_not = '!'
char_range_between = '-'
)
2016-02-24 23:53:19 +03:00
var specials = []byte{
char_any,
char_single,
char_escape,
char_range_open,
2016-05-15 00:31:14 +03:00
char_range_close,
2016-02-24 23:53:19 +03:00
char_terms_open,
char_terms_close,
}
2016-05-27 20:47:19 +03:00
func Special(c byte) bool {
2016-02-24 23:53:19 +03:00
return bytes.IndexByte(specials, c) != -1
}
2016-05-27 20:47:19 +03:00
type tokens []Token
2015-12-24 22:30:20 +03:00
2016-05-27 20:47:19 +03:00
func (i *tokens) shift() (ret Token) {
2016-05-16 01:08:55 +03:00
ret = (*i)[0]
copy(*i, (*i)[1:])
*i = (*i)[:len(*i)-1]
2016-05-16 01:01:08 +03:00
return
}
2016-05-27 20:47:19 +03:00
func (i *tokens) push(v Token) {
2016-05-16 01:01:08 +03:00
*i = append(*i, v)
}
2016-05-27 20:47:19 +03:00
func (i *tokens) empty() bool {
2016-05-16 01:01:08 +03:00
return len(*i) == 0
}
var eof rune = 0
2015-12-24 22:30:20 +03:00
type lexer struct {
2016-05-16 01:01:08 +03:00
data string
pos int
err error
2016-05-27 20:47:19 +03:00
tokens tokens
2016-05-15 00:31:14 +03:00
termsLevel int
2016-05-16 01:01:08 +03:00
lastRune rune
lastRuneSize int
hasRune bool
2015-12-24 22:30:20 +03:00
}
2015-12-25 19:40:36 +03:00
func newLexer(source string) *lexer {
l := &lexer{
2016-05-27 20:47:19 +03:00
data: source,
tokens: tokens(make([]Token, 0, 4)),
2015-12-25 19:40:36 +03:00
}
return l
}
2016-05-16 01:01:08 +03:00
func (l *lexer) peek() (r rune, w int) {
if l.pos == len(l.data) {
return eof, 0
}
r, w = utf8.DecodeRuneInString(l.data[l.pos:])
if r == utf8.RuneError {
l.errorf("could not read rune")
r = eof
w = 0
}
2015-12-24 22:30:20 +03:00
return
}
2016-05-16 01:01:08 +03:00
func (l *lexer) read() rune {
if l.hasRune {
l.hasRune = false
l.seek(l.lastRuneSize)
return l.lastRune
}
r, s := l.peek()
l.seek(s)
l.lastRune = r
l.lastRuneSize = s
return r
2015-12-25 19:40:36 +03:00
}
2016-05-16 01:01:08 +03:00
func (l *lexer) seek(w int) {
l.pos += w
2015-12-24 22:30:20 +03:00
}
2016-05-16 01:01:08 +03:00
func (l *lexer) unread() {
if l.hasRune {
l.errorf("could not unread rune")
return
}
l.seek(-l.lastRuneSize)
l.hasRune = true
}
func (l *lexer) errorf(f string, v ...interface{}) {
l.err = fmt.Errorf(f, v...)
2015-12-24 22:30:20 +03:00
}
2016-05-15 00:31:14 +03:00
func (l *lexer) inTerms() bool {
return l.termsLevel > 0
2016-05-12 00:17:33 +03:00
}
2016-01-08 20:14:31 +03:00
2016-05-15 00:31:14 +03:00
func (l *lexer) termsEnter() {
l.termsLevel++
2015-12-24 22:30:20 +03:00
}
2016-05-15 00:31:14 +03:00
func (l *lexer) termsLeave() {
l.termsLevel--
2015-12-24 22:30:20 +03:00
}
2016-05-27 20:47:19 +03:00
func (l *lexer) nextItem() Token {
2016-05-16 01:01:08 +03:00
if l.err != nil {
2016-05-27 20:47:19 +03:00
return Token{Error, l.err.Error()}
2015-12-24 22:30:20 +03:00
}
2016-05-27 20:47:19 +03:00
if !l.tokens.empty() {
return l.tokens.shift()
2016-01-08 20:14:31 +03:00
}
2016-05-16 01:01:08 +03:00
l.fetchItem()
return l.nextItem()
}
2016-05-16 01:08:55 +03:00
var inTextBreakers = []rune{char_single, char_any, char_range_open, char_terms_open}
var inTermsBreakers = append(inTextBreakers, char_terms_close, char_comma)
2016-05-16 01:01:08 +03:00
func (l *lexer) fetchItem() {
r := l.read()
switch {
case r == eof:
2016-05-27 20:47:19 +03:00
l.tokens.push(Token{EOF, ""})
2016-05-16 01:01:08 +03:00
case r == char_terms_open:
2016-05-15 00:31:14 +03:00
l.termsEnter()
2016-05-27 20:47:19 +03:00
l.tokens.push(Token{TermsOpen, string(r)})
2015-12-24 22:30:20 +03:00
2016-05-16 01:01:08 +03:00
case r == char_comma && l.inTerms():
2016-05-27 20:47:19 +03:00
l.tokens.push(Token{Separator, string(r)})
2016-05-12 00:17:33 +03:00
2016-05-16 01:01:08 +03:00
case r == char_terms_close && l.inTerms():
2016-05-27 20:47:19 +03:00
l.tokens.push(Token{TermsClose, string(r)})
2016-05-16 01:01:08 +03:00
l.termsLeave()
2016-05-12 00:17:33 +03:00
2016-05-16 01:01:08 +03:00
case r == char_range_open:
2016-05-27 20:47:19 +03:00
l.tokens.push(Token{RangeOpen, string(r)})
2016-05-15 00:31:14 +03:00
l.fetchRange()
2016-05-12 00:17:33 +03:00
2016-05-16 01:01:08 +03:00
case r == char_single:
2016-05-27 20:47:19 +03:00
l.tokens.push(Token{Single, string(r)})
2016-05-12 00:17:33 +03:00
2016-05-16 01:01:08 +03:00
case r == char_any:
if l.read() == char_any {
2016-05-27 20:47:19 +03:00
l.tokens.push(Token{Super, string(r) + string(r)})
2016-05-16 01:01:08 +03:00
} else {
l.unread()
2016-05-27 20:47:19 +03:00
l.tokens.push(Token{Any, string(r)})
2016-05-12 00:17:33 +03:00
}
2016-05-16 01:01:08 +03:00
default:
l.unread()
2016-05-16 01:08:55 +03:00
var breakers []rune
2016-05-16 01:01:08 +03:00
if l.inTerms() {
2016-05-16 01:08:55 +03:00
breakers = inTermsBreakers
} else {
breakers = inTextBreakers
2016-05-16 01:01:08 +03:00
}
l.fetchText(breakers)
2016-05-14 22:08:32 +03:00
}
2016-05-12 00:17:33 +03:00
}
2016-05-15 00:31:14 +03:00
func (l *lexer) fetchRange() {
var wantHi bool
var wantClose bool
var seenNot bool
2015-12-24 22:30:20 +03:00
for {
2016-05-16 01:01:08 +03:00
r := l.read()
if r == eof {
l.errorf("unexpected end of input")
2016-05-15 00:31:14 +03:00
return
2015-12-25 19:40:36 +03:00
}
2015-12-24 22:30:20 +03:00
2016-05-15 00:31:14 +03:00
if wantClose {
if r != char_range_close {
2016-05-16 01:01:08 +03:00
l.errorf("expected close range character")
2016-05-15 00:31:14 +03:00
} else {
2016-05-27 20:47:19 +03:00
l.tokens.push(Token{RangeClose, string(r)})
2015-12-24 22:30:20 +03:00
}
2016-05-15 00:31:14 +03:00
return
}
2015-12-24 22:30:20 +03:00
2016-05-15 00:31:14 +03:00
if wantHi {
2016-05-27 20:47:19 +03:00
l.tokens.push(Token{RangeHi, string(r)})
2016-05-15 00:31:14 +03:00
wantClose = true
continue
}
2015-12-24 22:30:20 +03:00
2016-05-15 00:31:14 +03:00
if !seenNot && r == char_range_not {
2016-05-27 20:47:19 +03:00
l.tokens.push(Token{Not, string(r)})
2016-05-15 00:31:14 +03:00
seenNot = true
continue
}
2016-05-12 00:17:33 +03:00
2016-05-16 01:01:08 +03:00
if n, w := l.peek(); n == char_range_between {
l.seek(w)
2016-05-27 20:47:19 +03:00
l.tokens.push(Token{RangeLo, string(r)})
l.tokens.push(Token{RangeBetween, string(n)})
2016-05-15 00:31:14 +03:00
wantHi = true
continue
2015-12-24 22:30:20 +03:00
}
2016-05-15 00:31:14 +03:00
2016-05-16 01:01:08 +03:00
l.unread() // unread first peek and fetch as text
2016-05-15 00:31:14 +03:00
l.fetchText([]rune{char_range_close})
wantClose = true
2015-12-24 22:30:20 +03:00
}
}
2016-05-15 00:31:14 +03:00
func (l *lexer) fetchText(breakers []rune) {
var data []rune
var escaped bool
2015-12-25 21:08:54 +03:00
2016-05-15 00:31:14 +03:00
reading:
2015-12-25 19:40:36 +03:00
for {
2016-05-16 01:01:08 +03:00
r := l.read()
if r == eof {
2016-05-15 00:31:14 +03:00
break
2015-12-25 19:40:36 +03:00
}
2016-05-15 00:31:14 +03:00
if !escaped {
if r == char_escape {
escaped = true
2015-12-25 21:08:54 +03:00
continue
}
2016-05-15 00:31:14 +03:00
if runes.IndexRune(breakers, r) != -1 {
2016-05-16 01:01:08 +03:00
l.unread()
2016-05-15 00:31:14 +03:00
break reading
2015-12-25 21:08:54 +03:00
}
2015-12-25 19:40:36 +03:00
}
2016-01-08 20:14:31 +03:00
2016-05-15 00:31:14 +03:00
escaped = false
data = append(data, r)
2016-01-08 20:14:31 +03:00
}
2016-05-15 00:31:14 +03:00
if len(data) > 0 {
2016-05-27 20:47:19 +03:00
l.tokens.push(Token{Text, string(data)})
2016-01-08 20:14:31 +03:00
}
2015-12-24 22:30:20 +03:00
}