glob/lexer.go

324 lines
5.0 KiB
Go
Raw Normal View History

2015-12-24 22:30:20 +03:00
package glob
import (
2016-05-15 00:31:14 +03:00
"bufio"
2016-02-24 23:53:19 +03:00
"bytes"
2015-12-24 22:30:20 +03:00
"fmt"
2016-05-15 00:31:14 +03:00
"github.com/gobwas/glob/runes"
"io"
2015-12-24 22:30:20 +03:00
"strings"
"unicode/utf8"
)
2016-01-08 20:14:31 +03:00
const (
char_any = '*'
2016-05-12 00:17:33 +03:00
char_comma = ','
2016-01-08 20:14:31 +03:00
char_single = '?'
char_escape = '\\'
char_range_open = '['
char_range_close = ']'
char_terms_open = '{'
char_terms_close = '}'
char_range_not = '!'
char_range_between = '-'
)
2016-02-24 23:53:19 +03:00
var specials = []byte{
char_any,
char_single,
char_escape,
char_range_open,
2016-05-15 00:31:14 +03:00
char_range_close,
2016-02-24 23:53:19 +03:00
char_terms_open,
char_terms_close,
}
func special(c byte) bool {
return bytes.IndexByte(specials, c) != -1
}
2015-12-25 19:40:36 +03:00
var eof rune = 0
2015-12-24 22:30:20 +03:00
type itemType int
const (
item_eof itemType = iota
item_error
item_text
2016-01-08 20:14:31 +03:00
item_char
2015-12-24 22:30:20 +03:00
item_any
2016-01-08 20:14:31 +03:00
item_super
2015-12-24 22:30:20 +03:00
item_single
2016-01-08 20:14:31 +03:00
item_not
item_separator
2015-12-24 22:30:20 +03:00
item_range_open
2016-01-08 20:14:31 +03:00
item_range_close
2015-12-25 19:40:36 +03:00
item_range_lo
item_range_hi
2016-01-08 20:14:31 +03:00
item_range_between
item_terms_open
item_terms_close
2015-12-24 22:30:20 +03:00
)
2015-12-25 21:08:54 +03:00
func (i itemType) String() string {
switch i {
case item_eof:
return "eof"
case item_error:
return "error"
case item_text:
return "text"
2016-01-08 20:14:31 +03:00
case item_char:
return "char"
2015-12-25 21:08:54 +03:00
case item_any:
return "any"
2016-01-08 20:14:31 +03:00
case item_super:
return "super"
2015-12-25 21:08:54 +03:00
case item_single:
return "single"
2016-01-08 20:14:31 +03:00
case item_not:
return "not"
case item_separator:
return "separator"
2015-12-25 21:08:54 +03:00
case item_range_open:
return "range_open"
2016-01-08 20:14:31 +03:00
case item_range_close:
return "range_close"
2015-12-25 21:08:54 +03:00
case item_range_lo:
return "range_lo"
case item_range_hi:
return "range_hi"
2016-01-08 20:14:31 +03:00
case item_range_between:
return "range_between"
2015-12-25 21:08:54 +03:00
2016-01-08 20:14:31 +03:00
case item_terms_open:
return "terms_open"
case item_terms_close:
return "terms_close"
2015-12-25 21:08:54 +03:00
default:
return "undef"
}
}
2015-12-24 22:30:20 +03:00
type item struct {
t itemType
s string
}
2015-12-25 19:40:36 +03:00
func (i item) String() string {
2015-12-25 21:08:54 +03:00
return fmt.Sprintf("%v<%s>", i.t, i.s)
2015-12-25 19:40:36 +03:00
}
2016-05-12 10:46:16 +03:00
type stubLexer struct {
Items []item
pos int
}
func (s *stubLexer) nextItem() (ret item) {
if s.pos == len(s.Items) {
return item{item_eof, ""}
}
ret = s.Items[s.pos]
s.pos++
return
}
2015-12-24 22:30:20 +03:00
type lexer struct {
2016-05-15 00:31:14 +03:00
data string
start int
pos int
current rune
items []item
termsLevel int
r *bufio.Reader
2015-12-24 22:30:20 +03:00
}
2015-12-25 19:40:36 +03:00
func newLexer(source string) *lexer {
l := &lexer{
2016-05-15 00:31:14 +03:00
r: bufio.NewReader(strings.NewReader(source)),
data: source,
2015-12-25 19:40:36 +03:00
}
return l
}
2016-05-15 00:31:14 +03:00
func (l *lexer) shiftItem() (ret item) {
ret, l.items = l.items[0], l.items[1:]
2015-12-24 22:30:20 +03:00
return
}
2016-05-15 00:31:14 +03:00
func (l *lexer) pushItem(i item) {
l.items = append(l.items, i)
2015-12-25 19:40:36 +03:00
}
2016-05-15 00:31:14 +03:00
func (l *lexer) hasItem() bool {
return len(l.items) > 0
2015-12-24 22:30:20 +03:00
}
2016-05-15 00:31:14 +03:00
func (l *lexer) peekRune() rune {
r, _ := utf8.DecodeRuneInString(l.data[l.start:])
2015-12-24 22:30:20 +03:00
return r
}
2016-05-15 00:31:14 +03:00
func (l *lexer) inTerms() bool {
return l.termsLevel > 0
2016-05-12 00:17:33 +03:00
}
2016-01-08 20:14:31 +03:00
2016-05-15 00:31:14 +03:00
func (l *lexer) termsEnter() {
l.termsLevel++
2015-12-24 22:30:20 +03:00
}
2016-05-15 00:31:14 +03:00
func (l *lexer) termsLeave() {
l.termsLevel--
2015-12-24 22:30:20 +03:00
}
2016-05-15 00:31:14 +03:00
func (l *lexer) nextItem() item {
if l.hasItem() {
return l.shiftItem()
2015-12-24 22:30:20 +03:00
}
2016-05-15 00:31:14 +03:00
r, _, err := l.r.ReadRune()
if err != nil {
switch err {
case io.EOF:
return item{item_eof, ""}
default:
return item{item_error, err.Error()}
}
2016-01-08 20:14:31 +03:00
}
2016-05-15 00:31:14 +03:00
switch r {
case char_terms_open:
l.termsEnter()
return item{item_terms_open, string(r)}
2015-12-24 22:30:20 +03:00
2016-05-15 00:31:14 +03:00
case char_comma:
if l.inTerms() {
return item{item_separator, string(r)}
}
2016-05-12 00:17:33 +03:00
2016-05-15 00:31:14 +03:00
case char_terms_close:
if l.inTerms() {
l.termsLeave()
return item{item_terms_close, string(r)}
}
2016-05-12 00:17:33 +03:00
2016-05-15 00:31:14 +03:00
case char_range_open:
l.fetchRange()
return item{item_range_open, string(r)}
2016-05-12 00:17:33 +03:00
2016-05-15 00:31:14 +03:00
case char_single:
return item{item_single, string(r)}
2016-05-12 00:17:33 +03:00
2016-05-15 00:31:14 +03:00
case char_any:
b, err := l.r.Peek(1)
if err == nil && b[0] == char_any {
l.r.ReadRune()
return item{item_super, string(r) + string(r)}
2016-05-12 00:17:33 +03:00
}
2016-05-15 00:31:14 +03:00
return item{item_any, string(r)}
2016-05-12 00:17:33 +03:00
}
2016-05-15 00:31:14 +03:00
l.r.UnreadRune()
breakers := []rune{char_single, char_any, char_range_open, char_terms_open}
if l.inTerms() {
breakers = append(breakers, char_terms_close, char_comma)
2016-05-14 22:08:32 +03:00
}
2016-05-15 00:31:14 +03:00
l.fetchText(breakers)
2016-05-14 22:08:32 +03:00
2016-05-15 00:31:14 +03:00
return l.nextItem()
2016-05-12 00:17:33 +03:00
}
2016-05-15 00:31:14 +03:00
func (l *lexer) fetchRange() {
var wantHi bool
var wantClose bool
var seenNot bool
2015-12-24 22:30:20 +03:00
for {
2016-05-15 00:31:14 +03:00
r, _, err := l.r.ReadRune()
if err != nil {
l.pushItem(item{item_error, err.Error()})
return
2015-12-25 19:40:36 +03:00
}
2015-12-24 22:30:20 +03:00
2016-05-15 00:31:14 +03:00
if wantClose {
if r != char_range_close {
l.pushItem(item{item_error, "expecting close range character"})
} else {
l.pushItem(item{item_range_close, string(r)})
2015-12-24 22:30:20 +03:00
}
2016-05-15 00:31:14 +03:00
return
}
2015-12-24 22:30:20 +03:00
2016-05-15 00:31:14 +03:00
if wantHi {
l.pushItem(item{item_range_hi, string(r)})
wantClose = true
continue
}
2015-12-24 22:30:20 +03:00
2016-05-15 00:31:14 +03:00
if !seenNot && r == char_range_not {
l.pushItem(item{item_not, string(r)})
seenNot = true
continue
}
2016-05-12 00:17:33 +03:00
2016-05-15 00:31:14 +03:00
b, err := l.r.Peek(1)
if err == nil && b[0] == char_range_between {
l.pushItem(item{item_range_lo, string(r)})
l.r.ReadRune()
l.pushItem(item{item_range_between, string(char_range_between)})
wantHi = true
continue
2015-12-24 22:30:20 +03:00
}
2016-05-15 00:31:14 +03:00
l.r.UnreadRune()
l.fetchText([]rune{char_range_close})
wantClose = true
2015-12-24 22:30:20 +03:00
}
}
2016-05-15 00:31:14 +03:00
func (l *lexer) fetchText(breakers []rune) {
var data []rune
var escaped bool
2015-12-25 21:08:54 +03:00
2016-05-15 00:31:14 +03:00
reading:
2015-12-25 19:40:36 +03:00
for {
2016-05-15 00:31:14 +03:00
r, _, err := l.r.ReadRune()
if err != nil {
break
2015-12-25 19:40:36 +03:00
}
2016-05-15 00:31:14 +03:00
if !escaped {
if r == char_escape {
escaped = true
2015-12-25 21:08:54 +03:00
continue
}
2016-05-15 00:31:14 +03:00
if runes.IndexRune(breakers, r) != -1 {
l.r.UnreadRune()
break reading
2015-12-25 21:08:54 +03:00
}
2015-12-25 19:40:36 +03:00
}
2016-01-08 20:14:31 +03:00
2016-05-15 00:31:14 +03:00
escaped = false
data = append(data, r)
2016-01-08 20:14:31 +03:00
}
2016-05-15 00:31:14 +03:00
if len(data) > 0 {
l.pushItem(item{item_text, string(data)})
2016-01-08 20:14:31 +03:00
}
2015-12-24 22:30:20 +03:00
}