mirror of https://github.com/tidwall/tile38.git
1142 lines
28 KiB
Go
1142 lines
28 KiB
Go
// Copyright 2013-2018 The NATS Authors
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
// Customized heavily from
|
|
// https://github.com/BurntSushi/toml/blob/master/lex.go, which is based on
|
|
// Rob Pike's talk: http://cuddle.googlecode.com/hg/talk/lex.html
|
|
|
|
// The format supported is less restrictive than today's formats.
|
|
// Supports mixed Arrays [], nested Maps {}, multiple comment types (# and //)
|
|
// Also supports key value assigments using '=' or ':' or whiteSpace()
|
|
// e.g. foo = 2, foo : 2, foo 2
|
|
// maps can be assigned with no key separator as well
|
|
// semicolons as value terminators in key/value assignments are optional
|
|
//
|
|
// see lex_test.go for more examples.
|
|
|
|
package conf
|
|
|
|
import (
|
|
"encoding/hex"
|
|
"fmt"
|
|
"strings"
|
|
"unicode"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
type itemType int
|
|
|
|
const (
|
|
itemError itemType = iota
|
|
itemNIL // used in the parser to indicate no type
|
|
itemEOF
|
|
itemKey
|
|
itemText
|
|
itemString
|
|
itemBool
|
|
itemInteger
|
|
itemFloat
|
|
itemDatetime
|
|
itemArrayStart
|
|
itemArrayEnd
|
|
itemMapStart
|
|
itemMapEnd
|
|
itemCommentStart
|
|
itemVariable
|
|
itemInclude
|
|
)
|
|
|
|
const (
|
|
eof = 0
|
|
mapStart = '{'
|
|
mapEnd = '}'
|
|
keySepEqual = '='
|
|
keySepColon = ':'
|
|
arrayStart = '['
|
|
arrayEnd = ']'
|
|
arrayValTerm = ','
|
|
mapValTerm = ','
|
|
commentHashStart = '#'
|
|
commentSlashStart = '/'
|
|
dqStringStart = '"'
|
|
dqStringEnd = '"'
|
|
sqStringStart = '\''
|
|
sqStringEnd = '\''
|
|
optValTerm = ';'
|
|
topOptStart = '{'
|
|
topOptValTerm = ','
|
|
topOptTerm = '}'
|
|
blockStart = '('
|
|
blockEnd = ')'
|
|
)
|
|
|
|
type stateFn func(lx *lexer) stateFn
|
|
|
|
type lexer struct {
|
|
input string
|
|
start int
|
|
pos int
|
|
width int
|
|
line int
|
|
state stateFn
|
|
items chan item
|
|
|
|
// A stack of state functions used to maintain context.
|
|
// The idea is to reuse parts of the state machine in various places.
|
|
// For example, values can appear at the top level or within arbitrarily
|
|
// nested arrays. The last state on the stack is used after a value has
|
|
// been lexed. Similarly for comments.
|
|
stack []stateFn
|
|
|
|
// Used for processing escapable substrings in double-quoted and raw strings
|
|
stringParts []string
|
|
stringStateFn stateFn
|
|
}
|
|
|
|
type item struct {
|
|
typ itemType
|
|
val string
|
|
line int
|
|
}
|
|
|
|
func (lx *lexer) nextItem() item {
|
|
for {
|
|
select {
|
|
case item := <-lx.items:
|
|
return item
|
|
default:
|
|
lx.state = lx.state(lx)
|
|
}
|
|
}
|
|
}
|
|
|
|
func lex(input string) *lexer {
|
|
lx := &lexer{
|
|
input: input,
|
|
state: lexTop,
|
|
line: 1,
|
|
items: make(chan item, 10),
|
|
stack: make([]stateFn, 0, 10),
|
|
stringParts: []string{},
|
|
}
|
|
return lx
|
|
}
|
|
|
|
func (lx *lexer) push(state stateFn) {
|
|
lx.stack = append(lx.stack, state)
|
|
}
|
|
|
|
func (lx *lexer) pop() stateFn {
|
|
if len(lx.stack) == 0 {
|
|
return lx.errorf("BUG in lexer: no states to pop.")
|
|
}
|
|
li := len(lx.stack) - 1
|
|
last := lx.stack[li]
|
|
lx.stack = lx.stack[0:li]
|
|
return last
|
|
}
|
|
|
|
func (lx *lexer) emit(typ itemType) {
|
|
lx.items <- item{typ, strings.Join(lx.stringParts, "") + lx.input[lx.start:lx.pos], lx.line}
|
|
lx.start = lx.pos
|
|
}
|
|
|
|
func (lx *lexer) emitString() {
|
|
var finalString string
|
|
if len(lx.stringParts) > 0 {
|
|
finalString = strings.Join(lx.stringParts, "") + lx.input[lx.start:lx.pos]
|
|
lx.stringParts = []string{}
|
|
} else {
|
|
finalString = lx.input[lx.start:lx.pos]
|
|
}
|
|
lx.items <- item{itemString, finalString, lx.line}
|
|
lx.start = lx.pos
|
|
}
|
|
|
|
func (lx *lexer) addCurrentStringPart(offset int) {
|
|
lx.stringParts = append(lx.stringParts, lx.input[lx.start:lx.pos-offset])
|
|
lx.start = lx.pos
|
|
}
|
|
|
|
func (lx *lexer) addStringPart(s string) stateFn {
|
|
lx.stringParts = append(lx.stringParts, s)
|
|
lx.start = lx.pos
|
|
return lx.stringStateFn
|
|
}
|
|
|
|
func (lx *lexer) hasEscapedParts() bool {
|
|
return len(lx.stringParts) > 0
|
|
}
|
|
|
|
func (lx *lexer) next() (r rune) {
|
|
if lx.pos >= len(lx.input) {
|
|
lx.width = 0
|
|
return eof
|
|
}
|
|
|
|
if lx.input[lx.pos] == '\n' {
|
|
lx.line++
|
|
}
|
|
r, lx.width = utf8.DecodeRuneInString(lx.input[lx.pos:])
|
|
lx.pos += lx.width
|
|
return r
|
|
}
|
|
|
|
// ignore skips over the pending input before this point.
|
|
func (lx *lexer) ignore() {
|
|
lx.start = lx.pos
|
|
}
|
|
|
|
// backup steps back one rune. Can be called only once per call of next.
|
|
func (lx *lexer) backup() {
|
|
lx.pos -= lx.width
|
|
if lx.pos < len(lx.input) && lx.input[lx.pos] == '\n' {
|
|
lx.line--
|
|
}
|
|
}
|
|
|
|
// peek returns but does not consume the next rune in the input.
|
|
func (lx *lexer) peek() rune {
|
|
r := lx.next()
|
|
lx.backup()
|
|
return r
|
|
}
|
|
|
|
// errorf stops all lexing by emitting an error and returning `nil`.
|
|
// Note that any value that is a character is escaped if it's a special
|
|
// character (new lines, tabs, etc.).
|
|
func (lx *lexer) errorf(format string, values ...interface{}) stateFn {
|
|
for i, value := range values {
|
|
if v, ok := value.(rune); ok {
|
|
values[i] = escapeSpecial(v)
|
|
}
|
|
}
|
|
lx.items <- item{
|
|
itemError,
|
|
fmt.Sprintf(format, values...),
|
|
lx.line,
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// lexTop consumes elements at the top level of data structure.
|
|
func lexTop(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
if unicode.IsSpace(r) {
|
|
return lexSkip(lx, lexTop)
|
|
}
|
|
|
|
switch r {
|
|
case topOptStart:
|
|
return lexSkip(lx, lexTop)
|
|
case commentHashStart:
|
|
lx.push(lexTop)
|
|
return lexCommentStart
|
|
case commentSlashStart:
|
|
rn := lx.next()
|
|
if rn == commentSlashStart {
|
|
lx.push(lexTop)
|
|
return lexCommentStart
|
|
}
|
|
lx.backup()
|
|
fallthrough
|
|
case eof:
|
|
if lx.pos > lx.start {
|
|
return lx.errorf("Unexpected EOF.")
|
|
}
|
|
lx.emit(itemEOF)
|
|
return nil
|
|
}
|
|
|
|
// At this point, the only valid item can be a key, so we back up
|
|
// and let the key lexer do the rest.
|
|
lx.backup()
|
|
lx.push(lexTopValueEnd)
|
|
return lexKeyStart
|
|
}
|
|
|
|
// lexTopValueEnd is entered whenever a top-level value has been consumed.
|
|
// It must see only whitespace, and will turn back to lexTop upon a new line.
|
|
// If it sees EOF, it will quit the lexer successfully.
|
|
func lexTopValueEnd(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
switch {
|
|
case r == commentHashStart:
|
|
// a comment will read to a new line for us.
|
|
lx.push(lexTop)
|
|
return lexCommentStart
|
|
case r == commentSlashStart:
|
|
rn := lx.next()
|
|
if rn == commentSlashStart {
|
|
lx.push(lexTop)
|
|
return lexCommentStart
|
|
}
|
|
lx.backup()
|
|
fallthrough
|
|
case isWhitespace(r):
|
|
return lexTopValueEnd
|
|
case isNL(r) || r == eof || r == optValTerm || r == topOptValTerm || r == topOptTerm:
|
|
lx.ignore()
|
|
return lexTop
|
|
}
|
|
return lx.errorf("Expected a top-level value to end with a new line, "+
|
|
"comment or EOF, but got '%v' instead.", r)
|
|
}
|
|
|
|
// lexKeyStart consumes a key name up until the first non-whitespace character.
|
|
// lexKeyStart will ignore whitespace. It will also eat enclosing quotes.
|
|
func lexKeyStart(lx *lexer) stateFn {
|
|
r := lx.peek()
|
|
switch {
|
|
case isKeySeparator(r):
|
|
return lx.errorf("Unexpected key separator '%v'", r)
|
|
case unicode.IsSpace(r):
|
|
lx.next()
|
|
return lexSkip(lx, lexKeyStart)
|
|
case r == dqStringStart:
|
|
lx.next()
|
|
return lexSkip(lx, lexDubQuotedKey)
|
|
case r == sqStringStart:
|
|
lx.next()
|
|
return lexSkip(lx, lexQuotedKey)
|
|
}
|
|
lx.ignore()
|
|
lx.next()
|
|
return lexKey
|
|
}
|
|
|
|
// lexDubQuotedKey consumes the text of a key between quotes.
|
|
func lexDubQuotedKey(lx *lexer) stateFn {
|
|
r := lx.peek()
|
|
if r == dqStringEnd {
|
|
lx.emit(itemKey)
|
|
lx.next()
|
|
return lexSkip(lx, lexKeyEnd)
|
|
}
|
|
lx.next()
|
|
return lexDubQuotedKey
|
|
}
|
|
|
|
// lexQuotedKey consumes the text of a key between quotes.
|
|
func lexQuotedKey(lx *lexer) stateFn {
|
|
r := lx.peek()
|
|
if r == sqStringEnd {
|
|
lx.emit(itemKey)
|
|
lx.next()
|
|
return lexSkip(lx, lexKeyEnd)
|
|
}
|
|
lx.next()
|
|
return lexQuotedKey
|
|
}
|
|
|
|
// keyCheckKeyword will check for reserved keywords as the key value when the key is
|
|
// separated with a space.
|
|
func (lx *lexer) keyCheckKeyword(fallThrough, push stateFn) stateFn {
|
|
key := strings.ToLower(lx.input[lx.start:lx.pos])
|
|
switch key {
|
|
case "include":
|
|
lx.ignore()
|
|
if push != nil {
|
|
lx.push(push)
|
|
}
|
|
return lexIncludeStart
|
|
}
|
|
lx.emit(itemKey)
|
|
return fallThrough
|
|
}
|
|
|
|
// lexIncludeStart will consume the whitespace til the start of the value.
|
|
func lexIncludeStart(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
if isWhitespace(r) {
|
|
return lexSkip(lx, lexIncludeStart)
|
|
}
|
|
lx.backup()
|
|
return lexInclude
|
|
}
|
|
|
|
// lexIncludeQuotedString consumes the inner contents of a string. It assumes that the
|
|
// beginning '"' has already been consumed and ignored. It will not interpret any
|
|
// internal contents.
|
|
func lexIncludeQuotedString(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
switch {
|
|
case r == sqStringEnd:
|
|
lx.backup()
|
|
lx.emit(itemInclude)
|
|
lx.next()
|
|
lx.ignore()
|
|
return lx.pop()
|
|
}
|
|
return lexIncludeQuotedString
|
|
}
|
|
|
|
// lexIncludeDubQuotedString consumes the inner contents of a string. It assumes that the
|
|
// beginning '"' has already been consumed and ignored. It will not interpret any
|
|
// internal contents.
|
|
func lexIncludeDubQuotedString(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
switch {
|
|
case r == dqStringEnd:
|
|
lx.backup()
|
|
lx.emit(itemInclude)
|
|
lx.next()
|
|
lx.ignore()
|
|
return lx.pop()
|
|
}
|
|
return lexIncludeDubQuotedString
|
|
}
|
|
|
|
// lexIncludeString consumes the inner contents of a raw string.
|
|
func lexIncludeString(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
switch {
|
|
case isNL(r) || r == eof || r == optValTerm || r == mapEnd || isWhitespace(r):
|
|
lx.backup()
|
|
lx.emit(itemInclude)
|
|
return lx.pop()
|
|
case r == sqStringEnd:
|
|
lx.backup()
|
|
lx.emit(itemInclude)
|
|
lx.next()
|
|
lx.ignore()
|
|
return lx.pop()
|
|
}
|
|
return lexIncludeString
|
|
}
|
|
|
|
// lexInclude will consume the include value.
|
|
func lexInclude(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
switch {
|
|
case r == sqStringStart:
|
|
lx.ignore() // ignore the " or '
|
|
return lexIncludeQuotedString
|
|
case r == dqStringStart:
|
|
lx.ignore() // ignore the " or '
|
|
return lexIncludeDubQuotedString
|
|
case r == arrayStart:
|
|
return lx.errorf("Expected include value but found start of an array")
|
|
case r == mapStart:
|
|
return lx.errorf("Expected include value but found start of a map")
|
|
case r == blockStart:
|
|
return lx.errorf("Expected include value but found start of a block")
|
|
case unicode.IsDigit(r), r == '-':
|
|
return lx.errorf("Expected include value but found start of a number")
|
|
case r == '\\':
|
|
return lx.errorf("Expected include value but found escape sequence")
|
|
case isNL(r):
|
|
return lx.errorf("Expected include value but found new line")
|
|
}
|
|
lx.backup()
|
|
return lexIncludeString
|
|
}
|
|
|
|
// lexKey consumes the text of a key. Assumes that the first character (which
|
|
// is not whitespace) has already been consumed.
|
|
func lexKey(lx *lexer) stateFn {
|
|
r := lx.peek()
|
|
if unicode.IsSpace(r) {
|
|
// Spaces signal we could be looking at a keyword, e.g. include.
|
|
// Keywords will eat the keyword and set the appropriate return stateFn.
|
|
return lx.keyCheckKeyword(lexKeyEnd, nil)
|
|
} else if isKeySeparator(r) || r == eof {
|
|
lx.emit(itemKey)
|
|
return lexKeyEnd
|
|
}
|
|
lx.next()
|
|
return lexKey
|
|
}
|
|
|
|
// lexKeyEnd consumes the end of a key (up to the key separator).
|
|
// Assumes that the first whitespace character after a key (or the '=' or ':'
|
|
// separator) has NOT been consumed.
|
|
func lexKeyEnd(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
switch {
|
|
case unicode.IsSpace(r):
|
|
return lexSkip(lx, lexKeyEnd)
|
|
case isKeySeparator(r):
|
|
return lexSkip(lx, lexValue)
|
|
case r == eof:
|
|
lx.emit(itemEOF)
|
|
return nil
|
|
}
|
|
// We start the value here
|
|
lx.backup()
|
|
return lexValue
|
|
}
|
|
|
|
// lexValue starts the consumption of a value anywhere a value is expected.
|
|
// lexValue will ignore whitespace.
|
|
// After a value is lexed, the last state on the next is popped and returned.
|
|
func lexValue(lx *lexer) stateFn {
|
|
// We allow whitespace to precede a value, but NOT new lines.
|
|
// In array syntax, the array states are responsible for ignoring new lines.
|
|
r := lx.next()
|
|
if isWhitespace(r) {
|
|
return lexSkip(lx, lexValue)
|
|
}
|
|
|
|
switch {
|
|
case r == arrayStart:
|
|
lx.ignore()
|
|
lx.emit(itemArrayStart)
|
|
return lexArrayValue
|
|
case r == mapStart:
|
|
lx.ignore()
|
|
lx.emit(itemMapStart)
|
|
return lexMapKeyStart
|
|
case r == sqStringStart:
|
|
lx.ignore() // ignore the " or '
|
|
return lexQuotedString
|
|
case r == dqStringStart:
|
|
lx.ignore() // ignore the " or '
|
|
lx.stringStateFn = lexDubQuotedString
|
|
return lexDubQuotedString
|
|
case r == '-':
|
|
return lexNegNumberStart
|
|
case r == blockStart:
|
|
lx.ignore()
|
|
return lexBlock
|
|
case unicode.IsDigit(r):
|
|
lx.backup() // avoid an extra state and use the same as above
|
|
return lexNumberOrDateOrIPStart
|
|
case r == '.': // special error case, be kind to users
|
|
return lx.errorf("Floats must start with a digit")
|
|
case isNL(r):
|
|
return lx.errorf("Expected value but found new line")
|
|
}
|
|
lx.backup()
|
|
lx.stringStateFn = lexString
|
|
return lexString
|
|
}
|
|
|
|
// lexArrayValue consumes one value in an array. It assumes that '[' or ','
|
|
// have already been consumed. All whitespace and new lines are ignored.
|
|
func lexArrayValue(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
switch {
|
|
case unicode.IsSpace(r):
|
|
return lexSkip(lx, lexArrayValue)
|
|
case r == commentHashStart:
|
|
lx.push(lexArrayValue)
|
|
return lexCommentStart
|
|
case r == commentSlashStart:
|
|
rn := lx.next()
|
|
if rn == commentSlashStart {
|
|
lx.push(lexArrayValue)
|
|
return lexCommentStart
|
|
}
|
|
lx.backup()
|
|
fallthrough
|
|
case r == arrayValTerm:
|
|
return lx.errorf("Unexpected array value terminator '%v'.", arrayValTerm)
|
|
case r == arrayEnd:
|
|
return lexArrayEnd
|
|
}
|
|
|
|
lx.backup()
|
|
lx.push(lexArrayValueEnd)
|
|
return lexValue
|
|
}
|
|
|
|
// lexArrayValueEnd consumes the cruft between values of an array. Namely,
|
|
// it ignores whitespace and expects either a ',' or a ']'.
|
|
func lexArrayValueEnd(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
switch {
|
|
case isWhitespace(r):
|
|
return lexSkip(lx, lexArrayValueEnd)
|
|
case r == commentHashStart:
|
|
lx.push(lexArrayValueEnd)
|
|
return lexCommentStart
|
|
case r == commentSlashStart:
|
|
rn := lx.next()
|
|
if rn == commentSlashStart {
|
|
lx.push(lexArrayValueEnd)
|
|
return lexCommentStart
|
|
}
|
|
lx.backup()
|
|
fallthrough
|
|
case r == arrayValTerm || isNL(r):
|
|
return lexSkip(lx, lexArrayValue) // Move onto next
|
|
case r == arrayEnd:
|
|
return lexArrayEnd
|
|
}
|
|
return lx.errorf("Expected an array value terminator %q or an array "+
|
|
"terminator %q, but got '%v' instead.", arrayValTerm, arrayEnd, r)
|
|
}
|
|
|
|
// lexArrayEnd finishes the lexing of an array. It assumes that a ']' has
|
|
// just been consumed.
|
|
func lexArrayEnd(lx *lexer) stateFn {
|
|
lx.ignore()
|
|
lx.emit(itemArrayEnd)
|
|
return lx.pop()
|
|
}
|
|
|
|
// lexMapKeyStart consumes a key name up until the first non-whitespace
|
|
// character.
|
|
// lexMapKeyStart will ignore whitespace.
|
|
func lexMapKeyStart(lx *lexer) stateFn {
|
|
r := lx.peek()
|
|
switch {
|
|
case isKeySeparator(r):
|
|
return lx.errorf("Unexpected key separator '%v'.", r)
|
|
case unicode.IsSpace(r):
|
|
lx.next()
|
|
return lexSkip(lx, lexMapKeyStart)
|
|
case r == mapEnd:
|
|
lx.next()
|
|
return lexSkip(lx, lexMapEnd)
|
|
case r == commentHashStart:
|
|
lx.next()
|
|
lx.push(lexMapKeyStart)
|
|
return lexCommentStart
|
|
case r == commentSlashStart:
|
|
lx.next()
|
|
rn := lx.next()
|
|
if rn == commentSlashStart {
|
|
lx.push(lexMapKeyStart)
|
|
return lexCommentStart
|
|
}
|
|
lx.backup()
|
|
case r == sqStringStart:
|
|
lx.next()
|
|
return lexSkip(lx, lexMapQuotedKey)
|
|
case r == dqStringStart:
|
|
lx.next()
|
|
return lexSkip(lx, lexMapDubQuotedKey)
|
|
}
|
|
lx.ignore()
|
|
lx.next()
|
|
return lexMapKey
|
|
}
|
|
|
|
// lexMapQuotedKey consumes the text of a key between quotes.
|
|
func lexMapQuotedKey(lx *lexer) stateFn {
|
|
r := lx.peek()
|
|
if r == sqStringEnd {
|
|
lx.emit(itemKey)
|
|
lx.next()
|
|
return lexSkip(lx, lexMapKeyEnd)
|
|
}
|
|
lx.next()
|
|
return lexMapQuotedKey
|
|
}
|
|
|
|
// lexMapQuotedKey consumes the text of a key between quotes.
|
|
func lexMapDubQuotedKey(lx *lexer) stateFn {
|
|
r := lx.peek()
|
|
if r == dqStringEnd {
|
|
lx.emit(itemKey)
|
|
lx.next()
|
|
return lexSkip(lx, lexMapKeyEnd)
|
|
}
|
|
lx.next()
|
|
return lexMapDubQuotedKey
|
|
}
|
|
|
|
// lexMapKey consumes the text of a key. Assumes that the first character (which
|
|
// is not whitespace) has already been consumed.
|
|
func lexMapKey(lx *lexer) stateFn {
|
|
r := lx.peek()
|
|
if unicode.IsSpace(r) {
|
|
// Spaces signal we could be looking at a keyword, e.g. include.
|
|
// Keywords will eat the keyword and set the appropriate return stateFn.
|
|
return lx.keyCheckKeyword(lexMapKeyEnd, lexMapValueEnd)
|
|
} else if isKeySeparator(r) {
|
|
lx.emit(itemKey)
|
|
return lexMapKeyEnd
|
|
}
|
|
lx.next()
|
|
return lexMapKey
|
|
}
|
|
|
|
// lexMapKeyEnd consumes the end of a key (up to the key separator).
|
|
// Assumes that the first whitespace character after a key (or the '='
|
|
// separator) has NOT been consumed.
|
|
func lexMapKeyEnd(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
switch {
|
|
case unicode.IsSpace(r):
|
|
return lexSkip(lx, lexMapKeyEnd)
|
|
case isKeySeparator(r):
|
|
return lexSkip(lx, lexMapValue)
|
|
}
|
|
// We start the value here
|
|
lx.backup()
|
|
return lexMapValue
|
|
}
|
|
|
|
// lexMapValue consumes one value in a map. It assumes that '{' or ','
|
|
// have already been consumed. All whitespace and new lines are ignored.
|
|
// Map values can be separated by ',' or simple NLs.
|
|
func lexMapValue(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
switch {
|
|
case unicode.IsSpace(r):
|
|
return lexSkip(lx, lexMapValue)
|
|
case r == mapValTerm:
|
|
return lx.errorf("Unexpected map value terminator %q.", mapValTerm)
|
|
case r == mapEnd:
|
|
return lexSkip(lx, lexMapEnd)
|
|
}
|
|
lx.backup()
|
|
lx.push(lexMapValueEnd)
|
|
return lexValue
|
|
}
|
|
|
|
// lexMapValueEnd consumes the cruft between values of a map. Namely,
|
|
// it ignores whitespace and expects either a ',' or a '}'.
|
|
func lexMapValueEnd(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
switch {
|
|
case isWhitespace(r):
|
|
return lexSkip(lx, lexMapValueEnd)
|
|
case r == commentHashStart:
|
|
lx.push(lexMapValueEnd)
|
|
return lexCommentStart
|
|
case r == commentSlashStart:
|
|
rn := lx.next()
|
|
if rn == commentSlashStart {
|
|
lx.push(lexMapValueEnd)
|
|
return lexCommentStart
|
|
}
|
|
lx.backup()
|
|
fallthrough
|
|
case r == optValTerm || r == mapValTerm || isNL(r):
|
|
return lexSkip(lx, lexMapKeyStart) // Move onto next
|
|
case r == mapEnd:
|
|
return lexSkip(lx, lexMapEnd)
|
|
}
|
|
return lx.errorf("Expected a map value terminator %q or a map "+
|
|
"terminator %q, but got '%v' instead.", mapValTerm, mapEnd, r)
|
|
}
|
|
|
|
// lexMapEnd finishes the lexing of a map. It assumes that a '}' has
|
|
// just been consumed.
|
|
func lexMapEnd(lx *lexer) stateFn {
|
|
lx.ignore()
|
|
lx.emit(itemMapEnd)
|
|
return lx.pop()
|
|
}
|
|
|
|
// Checks if the unquoted string was actually a boolean
|
|
func (lx *lexer) isBool() bool {
|
|
str := strings.ToLower(lx.input[lx.start:lx.pos])
|
|
return str == "true" || str == "false" ||
|
|
str == "on" || str == "off" ||
|
|
str == "yes" || str == "no"
|
|
}
|
|
|
|
// Check if the unquoted string is a variable reference, starting with $.
|
|
func (lx *lexer) isVariable() bool {
|
|
if lx.input[lx.start] == '$' {
|
|
lx.start += 1
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
// lexQuotedString consumes the inner contents of a string. It assumes that the
|
|
// beginning '"' has already been consumed and ignored. It will not interpret any
|
|
// internal contents.
|
|
func lexQuotedString(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
switch {
|
|
case r == sqStringEnd:
|
|
lx.backup()
|
|
lx.emit(itemString)
|
|
lx.next()
|
|
lx.ignore()
|
|
return lx.pop()
|
|
}
|
|
return lexQuotedString
|
|
}
|
|
|
|
// lexDubQuotedString consumes the inner contents of a string. It assumes that the
|
|
// beginning '"' has already been consumed and ignored. It will not interpret any
|
|
// internal contents.
|
|
func lexDubQuotedString(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
switch {
|
|
case r == '\\':
|
|
lx.addCurrentStringPart(1)
|
|
return lexStringEscape
|
|
case r == dqStringEnd:
|
|
lx.backup()
|
|
lx.emitString()
|
|
lx.next()
|
|
lx.ignore()
|
|
return lx.pop()
|
|
}
|
|
return lexDubQuotedString
|
|
}
|
|
|
|
// lexString consumes the inner contents of a raw string.
|
|
func lexString(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
switch {
|
|
case r == '\\':
|
|
lx.addCurrentStringPart(1)
|
|
return lexStringEscape
|
|
// Termination of non-quoted strings
|
|
case isNL(r) || r == eof || r == optValTerm ||
|
|
r == arrayValTerm || r == arrayEnd || r == mapEnd ||
|
|
isWhitespace(r):
|
|
|
|
lx.backup()
|
|
if lx.hasEscapedParts() {
|
|
lx.emitString()
|
|
} else if lx.isBool() {
|
|
lx.emit(itemBool)
|
|
} else if lx.isVariable() {
|
|
lx.emit(itemVariable)
|
|
} else {
|
|
lx.emitString()
|
|
}
|
|
return lx.pop()
|
|
case r == sqStringEnd:
|
|
lx.backup()
|
|
lx.emitString()
|
|
lx.next()
|
|
lx.ignore()
|
|
return lx.pop()
|
|
}
|
|
return lexString
|
|
}
|
|
|
|
// lexBlock consumes the inner contents as a string. It assumes that the
|
|
// beginning '(' has already been consumed and ignored. It will continue
|
|
// processing until it finds a ')' on a new line by itself.
|
|
func lexBlock(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
switch {
|
|
case r == blockEnd:
|
|
lx.backup()
|
|
lx.backup()
|
|
|
|
// Looking for a ')' character on a line by itself, if the previous
|
|
// character isn't a new line, then break so we keep processing the block.
|
|
if lx.next() != '\n' {
|
|
lx.next()
|
|
break
|
|
}
|
|
lx.next()
|
|
|
|
// Make sure the next character is a new line or an eof. We want a ')' on a
|
|
// bare line by itself.
|
|
switch lx.next() {
|
|
case '\n', eof:
|
|
lx.backup()
|
|
lx.backup()
|
|
lx.emit(itemString)
|
|
lx.next()
|
|
lx.ignore()
|
|
return lx.pop()
|
|
}
|
|
lx.backup()
|
|
}
|
|
return lexBlock
|
|
}
|
|
|
|
// lexStringEscape consumes an escaped character. It assumes that the preceding
|
|
// '\\' has already been consumed.
|
|
func lexStringEscape(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
switch r {
|
|
case 'x':
|
|
return lexStringBinary
|
|
case 't':
|
|
return lx.addStringPart("\t")
|
|
case 'n':
|
|
return lx.addStringPart("\n")
|
|
case 'r':
|
|
return lx.addStringPart("\r")
|
|
case '"':
|
|
return lx.addStringPart("\"")
|
|
case '\\':
|
|
return lx.addStringPart("\\")
|
|
}
|
|
return lx.errorf("Invalid escape character '%v'. Only the following "+
|
|
"escape characters are allowed: \\xXX, \\t, \\n, \\r, \\\", \\\\.", r)
|
|
}
|
|
|
|
// lexStringBinary consumes two hexadecimal digits following '\x'. It assumes
|
|
// that the '\x' has already been consumed.
|
|
func lexStringBinary(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
if isNL(r) {
|
|
return lx.errorf("Expected two hexadecimal digits after '\\x', but hit end of line")
|
|
}
|
|
r = lx.next()
|
|
if isNL(r) {
|
|
return lx.errorf("Expected two hexadecimal digits after '\\x', but hit end of line")
|
|
}
|
|
offset := lx.pos - 2
|
|
byteString, err := hex.DecodeString(lx.input[offset:lx.pos])
|
|
if err != nil {
|
|
return lx.errorf("Expected two hexadecimal digits after '\\x', but got '%s'", lx.input[offset:lx.pos])
|
|
}
|
|
lx.addStringPart(string(byteString))
|
|
return lx.stringStateFn
|
|
}
|
|
|
|
// lexNumberOrDateStart consumes either a (positive) integer, a float, a datetime, or IP.
|
|
// It assumes that NO negative sign has been consumed, that is triggered above.
|
|
func lexNumberOrDateOrIPStart(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
if !unicode.IsDigit(r) {
|
|
if r == '.' {
|
|
return lx.errorf("Floats must start with a digit, not '.'.")
|
|
}
|
|
return lx.errorf("Expected a digit but got '%v'.", r)
|
|
}
|
|
return lexNumberOrDateOrIP
|
|
}
|
|
|
|
// lexNumberOrDateOrIP consumes either a (positive) integer, float, datetime or IP.
|
|
func lexNumberOrDateOrIP(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
switch {
|
|
case r == '-':
|
|
if lx.pos-lx.start != 5 {
|
|
return lx.errorf("All ISO8601 dates must be in full Zulu form.")
|
|
}
|
|
return lexDateAfterYear
|
|
case unicode.IsDigit(r):
|
|
return lexNumberOrDateOrIP
|
|
case r == '.':
|
|
return lexFloatStart // Assume float at first, but could be IP
|
|
case isNumberSuffix(r):
|
|
return lexConvenientNumber
|
|
}
|
|
|
|
lx.backup()
|
|
lx.emit(itemInteger)
|
|
return lx.pop()
|
|
}
|
|
|
|
// lexConvenientNumber is when we have a suffix, e.g. 1k or 1Mb
|
|
func lexConvenientNumber(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
switch {
|
|
case r == 'b' || r == 'B':
|
|
return lexConvenientNumber
|
|
}
|
|
lx.backup()
|
|
lx.emit(itemInteger)
|
|
return lx.pop()
|
|
}
|
|
|
|
// lexDateAfterYear consumes a full Zulu Datetime in ISO8601 format.
|
|
// It assumes that "YYYY-" has already been consumed.
|
|
func lexDateAfterYear(lx *lexer) stateFn {
|
|
formats := []rune{
|
|
// digits are '0'.
|
|
// everything else is direct equality.
|
|
'0', '0', '-', '0', '0',
|
|
'T',
|
|
'0', '0', ':', '0', '0', ':', '0', '0',
|
|
'Z',
|
|
}
|
|
for _, f := range formats {
|
|
r := lx.next()
|
|
if f == '0' {
|
|
if !unicode.IsDigit(r) {
|
|
return lx.errorf("Expected digit in ISO8601 datetime, "+
|
|
"but found '%v' instead.", r)
|
|
}
|
|
} else if f != r {
|
|
return lx.errorf("Expected '%v' in ISO8601 datetime, "+
|
|
"but found '%v' instead.", f, r)
|
|
}
|
|
}
|
|
lx.emit(itemDatetime)
|
|
return lx.pop()
|
|
}
|
|
|
|
// lexNegNumberStart consumes either an integer or a float. It assumes that a
|
|
// negative sign has already been read, but that *no* digits have been consumed.
|
|
// lexNegNumberStart will move to the appropriate integer or float states.
|
|
func lexNegNumberStart(lx *lexer) stateFn {
|
|
// we MUST see a digit. Even floats have to start with a digit.
|
|
r := lx.next()
|
|
if !unicode.IsDigit(r) {
|
|
if r == '.' {
|
|
return lx.errorf("Floats must start with a digit, not '.'.")
|
|
}
|
|
return lx.errorf("Expected a digit but got '%v'.", r)
|
|
}
|
|
return lexNegNumber
|
|
}
|
|
|
|
// lexNumber consumes a negative integer or a float after seeing the first digit.
|
|
func lexNegNumber(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
switch {
|
|
case unicode.IsDigit(r):
|
|
return lexNegNumber
|
|
case r == '.':
|
|
return lexFloatStart
|
|
case isNumberSuffix(r):
|
|
return lexConvenientNumber
|
|
}
|
|
lx.backup()
|
|
lx.emit(itemInteger)
|
|
return lx.pop()
|
|
}
|
|
|
|
// lexFloatStart starts the consumption of digits of a float after a '.'.
|
|
// Namely, at least one digit is required.
|
|
func lexFloatStart(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
if !unicode.IsDigit(r) {
|
|
return lx.errorf("Floats must have a digit after the '.', but got "+
|
|
"'%v' instead.", r)
|
|
}
|
|
return lexFloat
|
|
}
|
|
|
|
// lexFloat consumes the digits of a float after a '.'.
|
|
// Assumes that one digit has been consumed after a '.' already.
|
|
func lexFloat(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
if unicode.IsDigit(r) {
|
|
return lexFloat
|
|
}
|
|
|
|
// Not a digit, if its another '.', need to see if we falsely assumed a float.
|
|
if r == '.' {
|
|
return lexIPAddr
|
|
}
|
|
|
|
lx.backup()
|
|
lx.emit(itemFloat)
|
|
return lx.pop()
|
|
}
|
|
|
|
// lexIPAddr consumes IP addrs, like 127.0.0.1:4222
|
|
func lexIPAddr(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
if unicode.IsDigit(r) || r == '.' || r == ':' || r == '-' {
|
|
return lexIPAddr
|
|
}
|
|
lx.backup()
|
|
lx.emit(itemString)
|
|
return lx.pop()
|
|
}
|
|
|
|
// lexCommentStart begins the lexing of a comment. It will emit
|
|
// itemCommentStart and consume no characters, passing control to lexComment.
|
|
func lexCommentStart(lx *lexer) stateFn {
|
|
lx.ignore()
|
|
lx.emit(itemCommentStart)
|
|
return lexComment
|
|
}
|
|
|
|
// lexComment lexes an entire comment. It assumes that '#' has been consumed.
|
|
// It will consume *up to* the first new line character, and pass control
|
|
// back to the last state on the stack.
|
|
func lexComment(lx *lexer) stateFn {
|
|
r := lx.peek()
|
|
if isNL(r) || r == eof {
|
|
lx.emit(itemText)
|
|
return lx.pop()
|
|
}
|
|
lx.next()
|
|
return lexComment
|
|
}
|
|
|
|
// lexSkip ignores all slurped input and moves on to the next state.
|
|
func lexSkip(lx *lexer, nextState stateFn) stateFn {
|
|
return func(lx *lexer) stateFn {
|
|
lx.ignore()
|
|
return nextState
|
|
}
|
|
}
|
|
|
|
// Tests to see if we have a number suffix
|
|
func isNumberSuffix(r rune) bool {
|
|
return r == 'k' || r == 'K' || r == 'm' || r == 'M' || r == 'g' || r == 'G'
|
|
}
|
|
|
|
// Tests for both key separators
|
|
func isKeySeparator(r rune) bool {
|
|
return r == keySepEqual || r == keySepColon
|
|
}
|
|
|
|
// isWhitespace returns true if `r` is a whitespace character according
|
|
// to the spec.
|
|
func isWhitespace(r rune) bool {
|
|
return r == '\t' || r == ' '
|
|
}
|
|
|
|
func isNL(r rune) bool {
|
|
return r == '\n' || r == '\r'
|
|
}
|
|
|
|
func (itype itemType) String() string {
|
|
switch itype {
|
|
case itemError:
|
|
return "Error"
|
|
case itemNIL:
|
|
return "NIL"
|
|
case itemEOF:
|
|
return "EOF"
|
|
case itemText:
|
|
return "Text"
|
|
case itemString:
|
|
return "String"
|
|
case itemBool:
|
|
return "Bool"
|
|
case itemInteger:
|
|
return "Integer"
|
|
case itemFloat:
|
|
return "Float"
|
|
case itemDatetime:
|
|
return "DateTime"
|
|
case itemKey:
|
|
return "Key"
|
|
case itemArrayStart:
|
|
return "ArrayStart"
|
|
case itemArrayEnd:
|
|
return "ArrayEnd"
|
|
case itemMapStart:
|
|
return "MapStart"
|
|
case itemMapEnd:
|
|
return "MapEnd"
|
|
case itemCommentStart:
|
|
return "CommentStart"
|
|
case itemVariable:
|
|
return "Variable"
|
|
case itemInclude:
|
|
return "Include"
|
|
}
|
|
panic(fmt.Sprintf("BUG: Unknown type '%s'.", itype.String()))
|
|
}
|
|
|
|
func (item item) String() string {
|
|
return fmt.Sprintf("(%s, '%s', %d)", item.typ.String(), item.val, item.line)
|
|
}
|
|
|
|
func escapeSpecial(c rune) string {
|
|
switch c {
|
|
case '\n':
|
|
return "\\n"
|
|
}
|
|
return string(c)
|
|
}
|