2015-12-24 22:30:20 +03:00
|
|
|
package glob
|
|
|
|
|
|
|
|
import (
|
2016-02-24 23:53:19 +03:00
|
|
|
"bytes"
|
2015-12-24 22:30:20 +03:00
|
|
|
"fmt"
|
|
|
|
"strings"
|
|
|
|
"unicode/utf8"
|
|
|
|
)
|
|
|
|
|
2016-01-08 20:14:31 +03:00
|
|
|
const (
|
|
|
|
char_any = '*'
|
2016-05-12 00:17:33 +03:00
|
|
|
char_comma = ','
|
2016-01-08 20:14:31 +03:00
|
|
|
char_single = '?'
|
|
|
|
char_escape = '\\'
|
|
|
|
char_range_open = '['
|
|
|
|
char_range_close = ']'
|
|
|
|
char_terms_open = '{'
|
|
|
|
char_terms_close = '}'
|
|
|
|
char_range_not = '!'
|
|
|
|
char_range_between = '-'
|
|
|
|
)
|
|
|
|
|
2016-02-24 23:53:19 +03:00
|
|
|
var specials = []byte{
|
|
|
|
char_any,
|
|
|
|
char_single,
|
|
|
|
char_escape,
|
|
|
|
char_range_open,
|
|
|
|
char_range_close,
|
|
|
|
char_terms_open,
|
|
|
|
char_terms_close,
|
|
|
|
}
|
|
|
|
|
|
|
|
func special(c byte) bool {
|
|
|
|
return bytes.IndexByte(specials, c) != -1
|
|
|
|
}
|
|
|
|
|
2015-12-25 19:40:36 +03:00
|
|
|
var eof rune = 0
|
2015-12-24 22:30:20 +03:00
|
|
|
|
|
|
|
type stateFn func(*lexer) stateFn
|
|
|
|
|
|
|
|
type itemType int
|
|
|
|
|
|
|
|
const (
|
|
|
|
item_eof itemType = iota
|
|
|
|
item_error
|
|
|
|
item_text
|
2016-01-08 20:14:31 +03:00
|
|
|
item_char
|
2015-12-24 22:30:20 +03:00
|
|
|
item_any
|
2016-01-08 20:14:31 +03:00
|
|
|
item_super
|
2015-12-24 22:30:20 +03:00
|
|
|
item_single
|
2016-01-08 20:14:31 +03:00
|
|
|
item_not
|
|
|
|
item_separator
|
2015-12-24 22:30:20 +03:00
|
|
|
item_range_open
|
2016-01-08 20:14:31 +03:00
|
|
|
item_range_close
|
2015-12-25 19:40:36 +03:00
|
|
|
item_range_lo
|
|
|
|
item_range_hi
|
2016-01-08 20:14:31 +03:00
|
|
|
item_range_between
|
|
|
|
item_terms_open
|
|
|
|
item_terms_close
|
2015-12-24 22:30:20 +03:00
|
|
|
)
|
|
|
|
|
2015-12-25 21:08:54 +03:00
|
|
|
func (i itemType) String() string {
|
|
|
|
switch i {
|
|
|
|
case item_eof:
|
|
|
|
return "eof"
|
|
|
|
|
|
|
|
case item_error:
|
|
|
|
return "error"
|
|
|
|
|
|
|
|
case item_text:
|
|
|
|
return "text"
|
|
|
|
|
2016-01-08 20:14:31 +03:00
|
|
|
case item_char:
|
|
|
|
return "char"
|
|
|
|
|
2015-12-25 21:08:54 +03:00
|
|
|
case item_any:
|
|
|
|
return "any"
|
|
|
|
|
2016-01-08 20:14:31 +03:00
|
|
|
case item_super:
|
|
|
|
return "super"
|
|
|
|
|
2015-12-25 21:08:54 +03:00
|
|
|
case item_single:
|
|
|
|
return "single"
|
|
|
|
|
2016-01-08 20:14:31 +03:00
|
|
|
case item_not:
|
|
|
|
return "not"
|
|
|
|
|
|
|
|
case item_separator:
|
|
|
|
return "separator"
|
|
|
|
|
2015-12-25 21:08:54 +03:00
|
|
|
case item_range_open:
|
|
|
|
return "range_open"
|
|
|
|
|
2016-01-08 20:14:31 +03:00
|
|
|
case item_range_close:
|
|
|
|
return "range_close"
|
2015-12-25 21:08:54 +03:00
|
|
|
|
|
|
|
case item_range_lo:
|
|
|
|
return "range_lo"
|
|
|
|
|
|
|
|
case item_range_hi:
|
|
|
|
return "range_hi"
|
|
|
|
|
2016-01-08 20:14:31 +03:00
|
|
|
case item_range_between:
|
|
|
|
return "range_between"
|
2015-12-25 21:08:54 +03:00
|
|
|
|
2016-01-08 20:14:31 +03:00
|
|
|
case item_terms_open:
|
|
|
|
return "terms_open"
|
|
|
|
|
|
|
|
case item_terms_close:
|
|
|
|
return "terms_close"
|
2015-12-25 21:08:54 +03:00
|
|
|
|
|
|
|
default:
|
|
|
|
return "undef"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-12-24 22:30:20 +03:00
|
|
|
type item struct {
|
|
|
|
t itemType
|
|
|
|
s string
|
|
|
|
}
|
|
|
|
|
2015-12-25 19:40:36 +03:00
|
|
|
func (i item) String() string {
|
2015-12-25 21:08:54 +03:00
|
|
|
return fmt.Sprintf("%v<%s>", i.t, i.s)
|
2015-12-25 19:40:36 +03:00
|
|
|
}
|
|
|
|
|
2015-12-24 22:30:20 +03:00
|
|
|
type lexer struct {
|
2016-01-08 20:14:31 +03:00
|
|
|
input string
|
|
|
|
start int
|
|
|
|
pos int
|
|
|
|
width int
|
|
|
|
runes int
|
|
|
|
termScopes []int
|
|
|
|
termPhrases map[int]int
|
|
|
|
state stateFn
|
|
|
|
items chan item
|
2015-12-24 22:30:20 +03:00
|
|
|
}
|
|
|
|
|
2015-12-25 19:40:36 +03:00
|
|
|
func newLexer(source string) *lexer {
|
|
|
|
l := &lexer{
|
2016-01-08 20:14:31 +03:00
|
|
|
input: source,
|
2016-05-12 00:17:33 +03:00
|
|
|
state: lexRaw,
|
2016-02-25 00:31:37 +03:00
|
|
|
items: make(chan item, len(source)),
|
2016-01-08 20:14:31 +03:00
|
|
|
termPhrases: make(map[int]int),
|
2015-12-25 19:40:36 +03:00
|
|
|
}
|
|
|
|
return l
|
|
|
|
}
|
|
|
|
|
2015-12-24 22:30:20 +03:00
|
|
|
func (l *lexer) run() {
|
2016-05-12 00:17:33 +03:00
|
|
|
for state := lexRaw; state != nil; {
|
2015-12-24 22:30:20 +03:00
|
|
|
state = state(l)
|
|
|
|
}
|
|
|
|
close(l.items)
|
|
|
|
}
|
|
|
|
|
2016-01-08 20:14:31 +03:00
|
|
|
func (l *lexer) nextItem() item {
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case item := <-l.items:
|
|
|
|
return item
|
|
|
|
default:
|
|
|
|
if l.state == nil {
|
|
|
|
return item{t: item_eof}
|
|
|
|
}
|
|
|
|
|
|
|
|
l.state = l.state(l)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
panic("something went wrong")
|
|
|
|
}
|
|
|
|
|
2015-12-25 19:40:36 +03:00
|
|
|
func (l *lexer) read() (r rune) {
|
2015-12-24 22:30:20 +03:00
|
|
|
if l.pos >= len(l.input) {
|
|
|
|
return eof
|
|
|
|
}
|
|
|
|
|
2015-12-25 19:40:36 +03:00
|
|
|
r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
|
2015-12-24 22:30:20 +03:00
|
|
|
l.pos += l.width
|
|
|
|
l.runes++
|
|
|
|
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
func (l *lexer) unread() {
|
|
|
|
l.pos -= l.width
|
|
|
|
l.runes--
|
|
|
|
}
|
|
|
|
|
2015-12-26 12:14:30 +03:00
|
|
|
func (l *lexer) reset() {
|
|
|
|
l.pos = l.start
|
2015-12-25 19:40:36 +03:00
|
|
|
l.runes = 0
|
|
|
|
}
|
|
|
|
|
2015-12-24 22:30:20 +03:00
|
|
|
func (l *lexer) ignore() {
|
|
|
|
l.start = l.pos
|
|
|
|
l.runes = 0
|
|
|
|
}
|
|
|
|
|
2015-12-25 19:40:36 +03:00
|
|
|
func (l *lexer) lookahead() rune {
|
2015-12-24 22:30:20 +03:00
|
|
|
r := l.read()
|
2016-01-08 20:14:31 +03:00
|
|
|
if r != eof {
|
|
|
|
l.unread()
|
|
|
|
}
|
2015-12-24 22:30:20 +03:00
|
|
|
return r
|
|
|
|
}
|
|
|
|
|
|
|
|
func (l *lexer) accept(valid string) bool {
|
|
|
|
if strings.IndexRune(valid, l.read()) != -1 {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
l.unread()
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
func (l *lexer) acceptAll(valid string) {
|
|
|
|
for strings.IndexRune(valid, l.read()) != -1 {
|
|
|
|
}
|
|
|
|
l.unread()
|
|
|
|
}
|
|
|
|
|
2016-05-12 00:17:33 +03:00
|
|
|
func (l *lexer) emitCurrent(t itemType) {
|
|
|
|
l.emit(t, l.input[l.start:l.pos])
|
|
|
|
}
|
2016-01-08 20:14:31 +03:00
|
|
|
|
2016-05-12 00:17:33 +03:00
|
|
|
func (l *lexer) emit(t itemType, s string) {
|
|
|
|
l.items <- item{t, s}
|
2015-12-24 22:30:20 +03:00
|
|
|
l.start = l.pos
|
|
|
|
l.runes = 0
|
|
|
|
l.width = 0
|
|
|
|
}
|
|
|
|
|
|
|
|
func (l *lexer) errorf(format string, args ...interface{}) {
|
2015-12-25 19:40:36 +03:00
|
|
|
l.items <- item{item_error, fmt.Sprintf(format, args...)}
|
2015-12-24 22:30:20 +03:00
|
|
|
}
|
|
|
|
|
2016-05-12 00:17:33 +03:00
|
|
|
func (l *lexer) inTerms() bool {
|
|
|
|
return len(l.termScopes) > 0
|
|
|
|
}
|
|
|
|
|
|
|
|
func lexRaw(l *lexer) stateFn {
|
2015-12-24 22:30:20 +03:00
|
|
|
for {
|
2015-12-25 19:40:36 +03:00
|
|
|
c := l.read()
|
|
|
|
if c == eof {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
|
|
|
|
switch c {
|
2016-01-08 20:14:31 +03:00
|
|
|
case char_single:
|
2015-12-25 19:40:36 +03:00
|
|
|
l.unread()
|
2015-12-24 22:30:20 +03:00
|
|
|
return lexSingle
|
2016-01-08 20:14:31 +03:00
|
|
|
|
|
|
|
case char_any:
|
|
|
|
var n stateFn
|
|
|
|
if l.lookahead() == char_any {
|
|
|
|
n = lexSuper
|
|
|
|
} else {
|
|
|
|
n = lexAny
|
|
|
|
}
|
|
|
|
|
2015-12-25 19:40:36 +03:00
|
|
|
l.unread()
|
2016-01-08 20:14:31 +03:00
|
|
|
return n
|
|
|
|
|
|
|
|
case char_range_open:
|
2015-12-25 19:40:36 +03:00
|
|
|
l.unread()
|
2015-12-24 22:30:20 +03:00
|
|
|
return lexRangeOpen
|
2016-01-08 20:14:31 +03:00
|
|
|
|
|
|
|
case char_terms_open:
|
|
|
|
l.unread()
|
|
|
|
return lexTermsOpen
|
|
|
|
|
|
|
|
case char_terms_close:
|
|
|
|
l.unread()
|
|
|
|
return lexTermsClose
|
|
|
|
|
2016-05-12 00:17:33 +03:00
|
|
|
case char_comma:
|
|
|
|
if l.inTerms() { // if we are not in terms
|
|
|
|
l.unread()
|
|
|
|
return lexSeparator
|
|
|
|
}
|
|
|
|
fallthrough
|
|
|
|
|
|
|
|
default:
|
2016-01-08 20:14:31 +03:00
|
|
|
l.unread()
|
2016-05-12 00:17:33 +03:00
|
|
|
return lexText
|
2015-12-24 22:30:20 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if l.pos > l.start {
|
2016-05-12 00:17:33 +03:00
|
|
|
l.emitCurrent(item_text)
|
2015-12-24 22:30:20 +03:00
|
|
|
}
|
|
|
|
|
2016-01-08 20:14:31 +03:00
|
|
|
if len(l.termScopes) != 0 {
|
|
|
|
l.errorf("invalid pattern syntax: unclosed terms")
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2016-05-12 00:17:33 +03:00
|
|
|
l.emitCurrent(item_eof)
|
2015-12-24 22:30:20 +03:00
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2016-05-12 00:17:33 +03:00
|
|
|
func lexText(l *lexer) stateFn {
|
|
|
|
var escaped bool
|
|
|
|
var data []rune
|
|
|
|
|
|
|
|
scan:
|
|
|
|
for c := l.read(); c != eof; c = l.read() {
|
|
|
|
switch {
|
|
|
|
case c == char_escape:
|
|
|
|
escaped = true
|
|
|
|
continue
|
|
|
|
|
|
|
|
case !escaped && c == char_comma && l.inTerms():
|
|
|
|
l.unread()
|
|
|
|
break scan
|
|
|
|
|
|
|
|
case !escaped && utf8.RuneLen(c) == 1 && special(byte(c)):
|
|
|
|
l.unread()
|
|
|
|
break scan
|
|
|
|
|
|
|
|
default:
|
|
|
|
data = append(data, c)
|
|
|
|
}
|
|
|
|
|
|
|
|
escaped = false
|
|
|
|
}
|
|
|
|
|
|
|
|
l.emit(item_text, string(data))
|
|
|
|
return lexRaw
|
|
|
|
}
|
|
|
|
|
2015-12-24 22:30:20 +03:00
|
|
|
func lexInsideRange(l *lexer) stateFn {
|
|
|
|
for {
|
2015-12-25 19:40:36 +03:00
|
|
|
c := l.read()
|
|
|
|
if c == eof {
|
|
|
|
l.errorf("unclosed range construction")
|
|
|
|
return nil
|
|
|
|
}
|
2015-12-24 22:30:20 +03:00
|
|
|
|
2015-12-25 19:40:36 +03:00
|
|
|
switch c {
|
2016-01-08 20:14:31 +03:00
|
|
|
case char_range_not:
|
2015-12-24 22:30:20 +03:00
|
|
|
// only first char makes sense
|
2015-12-26 12:14:30 +03:00
|
|
|
if l.pos-l.width == l.start {
|
2016-05-12 00:17:33 +03:00
|
|
|
l.emitCurrent(item_not)
|
2015-12-24 22:30:20 +03:00
|
|
|
}
|
|
|
|
|
2016-01-08 20:14:31 +03:00
|
|
|
case char_range_between:
|
2015-12-26 12:14:30 +03:00
|
|
|
if l.runes != 2 {
|
2015-12-25 19:40:36 +03:00
|
|
|
l.errorf("unexpected length of lo char inside range")
|
2015-12-24 22:30:20 +03:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2015-12-26 12:14:30 +03:00
|
|
|
l.reset()
|
2015-12-25 19:40:36 +03:00
|
|
|
return lexRangeHiLo
|
2015-12-24 22:30:20 +03:00
|
|
|
|
2016-01-08 20:14:31 +03:00
|
|
|
case char_range_close:
|
2016-05-12 00:17:33 +03:00
|
|
|
if l.runes == 1 {
|
|
|
|
l.errorf("range should contain at least single char")
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2015-12-25 19:40:36 +03:00
|
|
|
l.unread()
|
2016-05-12 00:17:33 +03:00
|
|
|
l.emitCurrent(item_text)
|
2015-12-24 22:30:20 +03:00
|
|
|
return lexRangeClose
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func lexRangeHiLo(l *lexer) stateFn {
|
2015-12-25 21:08:54 +03:00
|
|
|
start := l.start
|
|
|
|
|
2015-12-25 19:40:36 +03:00
|
|
|
for {
|
|
|
|
c := l.read()
|
|
|
|
if c == eof {
|
|
|
|
l.errorf("unexpected end of input")
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
switch c {
|
2016-01-08 20:14:31 +03:00
|
|
|
case char_range_between:
|
2015-12-26 12:14:30 +03:00
|
|
|
if l.runes != 1 {
|
2015-12-25 21:08:54 +03:00
|
|
|
l.errorf("unexpected length of range: single character expected before minus")
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2016-05-12 00:17:33 +03:00
|
|
|
l.emitCurrent(item_range_between)
|
2015-12-24 22:30:20 +03:00
|
|
|
|
2016-01-08 20:14:31 +03:00
|
|
|
case char_range_close:
|
2015-12-25 19:40:36 +03:00
|
|
|
l.unread()
|
2015-12-25 21:08:54 +03:00
|
|
|
|
2015-12-26 12:14:30 +03:00
|
|
|
if l.runes != 1 {
|
2015-12-25 21:08:54 +03:00
|
|
|
l.errorf("unexpected length of range: single character expected before close")
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2016-05-12 00:17:33 +03:00
|
|
|
l.emitCurrent(item_range_hi)
|
2015-12-25 19:40:36 +03:00
|
|
|
return lexRangeClose
|
|
|
|
|
|
|
|
default:
|
2015-12-25 21:08:54 +03:00
|
|
|
if start != l.start {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2015-12-26 12:14:30 +03:00
|
|
|
if l.runes != 1 {
|
2015-12-25 21:08:54 +03:00
|
|
|
l.errorf("unexpected length of range: single character expected at the begining")
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2016-05-12 00:17:33 +03:00
|
|
|
l.emitCurrent(item_range_lo)
|
2015-12-25 19:40:36 +03:00
|
|
|
}
|
|
|
|
}
|
2015-12-24 22:30:20 +03:00
|
|
|
}
|
|
|
|
|
2016-01-08 20:14:31 +03:00
|
|
|
func lexAny(l *lexer) stateFn {
|
|
|
|
l.pos += 1
|
2016-05-12 00:17:33 +03:00
|
|
|
l.emitCurrent(item_any)
|
|
|
|
return lexRaw
|
2016-01-08 20:14:31 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
func lexSuper(l *lexer) stateFn {
|
|
|
|
l.pos += 2
|
2016-05-12 00:17:33 +03:00
|
|
|
l.emitCurrent(item_super)
|
|
|
|
return lexRaw
|
2016-01-08 20:14:31 +03:00
|
|
|
}
|
|
|
|
|
2015-12-24 22:30:20 +03:00
|
|
|
func lexSingle(l *lexer) stateFn {
|
|
|
|
l.pos += 1
|
2016-05-12 00:17:33 +03:00
|
|
|
l.emitCurrent(item_single)
|
|
|
|
return lexRaw
|
2015-12-24 22:30:20 +03:00
|
|
|
}
|
|
|
|
|
2016-01-08 20:14:31 +03:00
|
|
|
func lexSeparator(l *lexer) stateFn {
|
|
|
|
posOpen := l.termScopes[len(l.termScopes)-1]
|
|
|
|
|
|
|
|
if l.pos-posOpen == 1 {
|
|
|
|
l.errorf("syntax error: empty term before separator")
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
l.termPhrases[posOpen] += 1
|
|
|
|
l.pos += 1
|
2016-05-12 00:17:33 +03:00
|
|
|
l.emitCurrent(item_separator)
|
|
|
|
return lexRaw
|
2016-01-08 20:14:31 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
func lexTermsOpen(l *lexer) stateFn {
|
|
|
|
l.termScopes = append(l.termScopes, l.pos)
|
|
|
|
l.pos += 1
|
2016-05-12 00:17:33 +03:00
|
|
|
l.emitCurrent(item_terms_open)
|
2016-01-08 20:14:31 +03:00
|
|
|
|
2016-05-12 00:17:33 +03:00
|
|
|
return lexRaw
|
2016-01-08 20:14:31 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
func lexTermsClose(l *lexer) stateFn {
|
|
|
|
if len(l.termScopes) == 0 {
|
|
|
|
l.errorf("unexpected closing of terms: there is no opened terms")
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
lastOpen := len(l.termScopes) - 1
|
|
|
|
posOpen := l.termScopes[lastOpen]
|
|
|
|
|
|
|
|
// if it is empty term
|
|
|
|
if posOpen == l.pos-1 {
|
|
|
|
l.errorf("term could not be empty")
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
if l.termPhrases[posOpen] == 0 {
|
|
|
|
l.errorf("term must contain >1 phrases")
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// cleanup
|
|
|
|
l.termScopes = l.termScopes[:lastOpen]
|
|
|
|
delete(l.termPhrases, posOpen)
|
|
|
|
|
|
|
|
l.pos += 1
|
2016-05-12 00:17:33 +03:00
|
|
|
l.emitCurrent(item_terms_close)
|
2016-01-08 20:14:31 +03:00
|
|
|
|
2016-05-12 00:17:33 +03:00
|
|
|
return lexRaw
|
2016-01-08 20:14:31 +03:00
|
|
|
}
|
|
|
|
|
2015-12-25 19:40:36 +03:00
|
|
|
func lexRangeOpen(l *lexer) stateFn {
|
|
|
|
l.pos += 1
|
2016-05-12 00:17:33 +03:00
|
|
|
l.emitCurrent(item_range_open)
|
2015-12-25 19:40:36 +03:00
|
|
|
return lexInsideRange
|
|
|
|
}
|
|
|
|
|
2015-12-24 22:30:20 +03:00
|
|
|
func lexRangeClose(l *lexer) stateFn {
|
|
|
|
l.pos += 1
|
2016-05-12 00:17:33 +03:00
|
|
|
l.emitCurrent(item_range_close)
|
|
|
|
return lexRaw
|
2015-12-24 22:30:20 +03:00
|
|
|
}
|