tile38/vendor/github.com/yuin/gopher-lua/parse/lexer.go

536 lines
11 KiB
Go
Raw Permalink Normal View History

2017-10-05 18:20:40 +03:00
package parse
import (
"bufio"
"bytes"
"fmt"
"github.com/yuin/gopher-lua/ast"
"io"
"reflect"
"strconv"
"strings"
)
const EOF = -1
const whitespace1 = 1<<'\t' | 1<<' '
const whitespace2 = 1<<'\t' | 1<<'\n' | 1<<'\r' | 1<<' '
type Error struct {
Pos ast.Position
Message string
Token string
}
func (e *Error) Error() string {
pos := e.Pos
if pos.Line == EOF {
return fmt.Sprintf("%v at EOF: %s\n", pos.Source, e.Message)
} else {
return fmt.Sprintf("%v line:%d(column:%d) near '%v': %s\n", pos.Source, pos.Line, pos.Column, e.Token, e.Message)
}
}
func writeChar(buf *bytes.Buffer, c int) { buf.WriteByte(byte(c)) }
func isDecimal(ch int) bool { return '0' <= ch && ch <= '9' }
func isIdent(ch int, pos int) bool {
return ch == '_' || 'A' <= ch && ch <= 'Z' || 'a' <= ch && ch <= 'z' || isDecimal(ch) && pos > 0
}
func isDigit(ch int) bool {
return '0' <= ch && ch <= '9' || 'a' <= ch && ch <= 'f' || 'A' <= ch && ch <= 'F'
}
type Scanner struct {
Pos ast.Position
reader *bufio.Reader
}
func NewScanner(reader io.Reader, source string) *Scanner {
return &Scanner{
Pos: ast.Position{source, 1, 0},
reader: bufio.NewReaderSize(reader, 4096),
}
}
func (sc *Scanner) Error(tok string, msg string) *Error { return &Error{sc.Pos, msg, tok} }
func (sc *Scanner) TokenError(tok ast.Token, msg string) *Error { return &Error{tok.Pos, msg, tok.Str} }
func (sc *Scanner) readNext() int {
ch, err := sc.reader.ReadByte()
if err == io.EOF {
return EOF
}
return int(ch)
}
func (sc *Scanner) Newline(ch int) {
if ch < 0 {
return
}
sc.Pos.Line += 1
sc.Pos.Column = 0
next := sc.Peek()
if ch == '\n' && next == '\r' || ch == '\r' && next == '\n' {
sc.reader.ReadByte()
}
}
func (sc *Scanner) Next() int {
ch := sc.readNext()
switch ch {
case '\n', '\r':
sc.Newline(ch)
ch = int('\n')
case EOF:
sc.Pos.Line = EOF
sc.Pos.Column = 0
default:
sc.Pos.Column++
}
return ch
}
func (sc *Scanner) Peek() int {
ch := sc.readNext()
if ch != EOF {
sc.reader.UnreadByte()
}
return ch
}
func (sc *Scanner) skipWhiteSpace(whitespace int64) int {
ch := sc.Next()
for ; whitespace&(1<<uint(ch)) != 0; ch = sc.Next() {
}
return ch
}
func (sc *Scanner) skipComments(ch int) error {
// multiline comment
if sc.Peek() == '[' {
ch = sc.Next()
if sc.Peek() == '[' || sc.Peek() == '=' {
var buf bytes.Buffer
if err := sc.scanMultilineString(sc.Next(), &buf); err != nil {
return sc.Error(buf.String(), "invalid multiline comment")
}
return nil
}
}
for {
if ch == '\n' || ch == '\r' || ch < 0 {
break
}
ch = sc.Next()
}
return nil
}
func (sc *Scanner) scanIdent(ch int, buf *bytes.Buffer) error {
writeChar(buf, ch)
for isIdent(sc.Peek(), 1) {
writeChar(buf, sc.Next())
}
return nil
}
func (sc *Scanner) scanDecimal(ch int, buf *bytes.Buffer) error {
writeChar(buf, ch)
for isDecimal(sc.Peek()) {
writeChar(buf, sc.Next())
}
return nil
}
func (sc *Scanner) scanNumber(ch int, buf *bytes.Buffer) error {
if ch == '0' { // octal
if sc.Peek() == 'x' || sc.Peek() == 'X' {
writeChar(buf, ch)
writeChar(buf, sc.Next())
hasvalue := false
for isDigit(sc.Peek()) {
writeChar(buf, sc.Next())
hasvalue = true
}
if !hasvalue {
return sc.Error(buf.String(), "illegal hexadecimal number")
}
return nil
} else if sc.Peek() != '.' && isDecimal(sc.Peek()) {
ch = sc.Next()
}
}
sc.scanDecimal(ch, buf)
if sc.Peek() == '.' {
sc.scanDecimal(sc.Next(), buf)
}
if ch = sc.Peek(); ch == 'e' || ch == 'E' {
writeChar(buf, sc.Next())
if ch = sc.Peek(); ch == '-' || ch == '+' {
writeChar(buf, sc.Next())
}
sc.scanDecimal(sc.Next(), buf)
}
return nil
}
func (sc *Scanner) scanString(quote int, buf *bytes.Buffer) error {
ch := sc.Next()
for ch != quote {
if ch == '\n' || ch == '\r' || ch < 0 {
return sc.Error(buf.String(), "unterminated string")
}
if ch == '\\' {
if err := sc.scanEscape(ch, buf); err != nil {
return err
}
} else {
writeChar(buf, ch)
}
ch = sc.Next()
}
return nil
}
func (sc *Scanner) scanEscape(ch int, buf *bytes.Buffer) error {
ch = sc.Next()
switch ch {
case 'a':
buf.WriteByte('\a')
case 'b':
buf.WriteByte('\b')
case 'f':
buf.WriteByte('\f')
case 'n':
buf.WriteByte('\n')
case 'r':
buf.WriteByte('\r')
case 't':
buf.WriteByte('\t')
case 'v':
buf.WriteByte('\v')
case '\\':
buf.WriteByte('\\')
case '"':
buf.WriteByte('"')
case '\'':
buf.WriteByte('\'')
case '\n':
buf.WriteByte('\n')
case '\r':
buf.WriteByte('\n')
sc.Newline('\r')
default:
if '0' <= ch && ch <= '9' {
bytes := []byte{byte(ch)}
for i := 0; i < 2 && isDecimal(sc.Peek()); i++ {
bytes = append(bytes, byte(sc.Next()))
}
val, _ := strconv.ParseInt(string(bytes), 10, 32)
writeChar(buf, int(val))
} else {
buf.WriteByte('\\')
writeChar(buf, ch)
return sc.Error(buf.String(), "Invalid escape sequence")
}
}
return nil
}
func (sc *Scanner) countSep(ch int) (int, int) {
count := 0
for ; ch == '='; count = count + 1 {
ch = sc.Next()
}
return count, ch
}
func (sc *Scanner) scanMultilineString(ch int, buf *bytes.Buffer) error {
var count1, count2 int
count1, ch = sc.countSep(ch)
if ch != '[' {
return sc.Error(string(ch), "invalid multiline string")
}
ch = sc.Next()
if ch == '\n' || ch == '\r' {
ch = sc.Next()
}
for {
if ch < 0 {
return sc.Error(buf.String(), "unterminated multiline string")
} else if ch == ']' {
count2, ch = sc.countSep(sc.Next())
if count1 == count2 && ch == ']' {
goto finally
}
buf.WriteByte(']')
buf.WriteString(strings.Repeat("=", count2))
continue
}
writeChar(buf, ch)
ch = sc.Next()
}
finally:
return nil
}
var reservedWords = map[string]int{
"and": TAnd, "break": TBreak, "do": TDo, "else": TElse, "elseif": TElseIf,
"end": TEnd, "false": TFalse, "for": TFor, "function": TFunction,
"if": TIf, "in": TIn, "local": TLocal, "nil": TNil, "not": TNot, "or": TOr,
"return": TReturn, "repeat": TRepeat, "then": TThen, "true": TTrue,
"until": TUntil, "while": TWhile}
func (sc *Scanner) Scan(lexer *Lexer) (ast.Token, error) {
redo:
var err error
tok := ast.Token{}
newline := false
ch := sc.skipWhiteSpace(whitespace1)
if ch == '\n' || ch == '\r' {
newline = true
ch = sc.skipWhiteSpace(whitespace2)
}
if ch == '(' && lexer.PrevTokenType == ')' {
lexer.PNewLine = newline
}
var _buf bytes.Buffer
buf := &_buf
tok.Pos = sc.Pos
switch {
case isIdent(ch, 0):
tok.Type = TIdent
err = sc.scanIdent(ch, buf)
tok.Str = buf.String()
if err != nil {
goto finally
}
if typ, ok := reservedWords[tok.Str]; ok {
tok.Type = typ
}
case isDecimal(ch):
tok.Type = TNumber
err = sc.scanNumber(ch, buf)
tok.Str = buf.String()
default:
switch ch {
case EOF:
tok.Type = EOF
case '-':
if sc.Peek() == '-' {
err = sc.skipComments(sc.Next())
if err != nil {
goto finally
}
goto redo
} else {
tok.Type = ch
tok.Str = string(ch)
}
case '"', '\'':
tok.Type = TString
err = sc.scanString(ch, buf)
tok.Str = buf.String()
case '[':
if c := sc.Peek(); c == '[' || c == '=' {
tok.Type = TString
err = sc.scanMultilineString(sc.Next(), buf)
tok.Str = buf.String()
} else {
tok.Type = ch
tok.Str = string(ch)
}
case '=':
if sc.Peek() == '=' {
tok.Type = TEqeq
tok.Str = "=="
sc.Next()
} else {
tok.Type = ch
tok.Str = string(ch)
}
case '~':
if sc.Peek() == '=' {
tok.Type = TNeq
tok.Str = "~="
sc.Next()
} else {
err = sc.Error("~", "Invalid '~' token")
}
case '<':
if sc.Peek() == '=' {
tok.Type = TLte
tok.Str = "<="
sc.Next()
} else {
tok.Type = ch
tok.Str = string(ch)
}
case '>':
if sc.Peek() == '=' {
tok.Type = TGte
tok.Str = ">="
sc.Next()
} else {
tok.Type = ch
tok.Str = string(ch)
}
case '.':
ch2 := sc.Peek()
switch {
case isDecimal(ch2):
tok.Type = TNumber
err = sc.scanNumber(ch, buf)
tok.Str = buf.String()
case ch2 == '.':
writeChar(buf, ch)
writeChar(buf, sc.Next())
if sc.Peek() == '.' {
writeChar(buf, sc.Next())
tok.Type = T3Comma
} else {
tok.Type = T2Comma
}
default:
tok.Type = '.'
}
tok.Str = buf.String()
case '+', '*', '/', '%', '^', '#', '(', ')', '{', '}', ']', ';', ':', ',':
tok.Type = ch
tok.Str = string(ch)
default:
writeChar(buf, ch)
err = sc.Error(buf.String(), "Invalid token")
goto finally
}
}
finally:
tok.Name = TokenName(int(tok.Type))
return tok, err
}
// yacc interface {{{
type Lexer struct {
scanner *Scanner
Stmts []ast.Stmt
PNewLine bool
Token ast.Token
PrevTokenType int
}
func (lx *Lexer) Lex(lval *yySymType) int {
lx.PrevTokenType = lx.Token.Type
tok, err := lx.scanner.Scan(lx)
if err != nil {
panic(err)
}
if tok.Type < 0 {
return 0
}
lval.token = tok
lx.Token = tok
return int(tok.Type)
}
func (lx *Lexer) Error(message string) {
panic(lx.scanner.Error(lx.Token.Str, message))
}
func (lx *Lexer) TokenError(tok ast.Token, message string) {
panic(lx.scanner.TokenError(tok, message))
}
func Parse(reader io.Reader, name string) (chunk []ast.Stmt, err error) {
lexer := &Lexer{NewScanner(reader, name), nil, false, ast.Token{Str: ""}, TNil}
chunk = nil
defer func() {
if e := recover(); e != nil {
err, _ = e.(error)
}
}()
yyParse(lexer)
chunk = lexer.Stmts
return
}
// }}}
// Dump {{{
func isInlineDumpNode(rv reflect.Value) bool {
switch rv.Kind() {
case reflect.Struct, reflect.Slice, reflect.Interface, reflect.Ptr:
return false
default:
return true
}
}
func dump(node interface{}, level int, s string) string {
rt := reflect.TypeOf(node)
if fmt.Sprint(rt) == "<nil>" {
return strings.Repeat(s, level) + "<nil>"
}
rv := reflect.ValueOf(node)
buf := []string{}
switch rt.Kind() {
case reflect.Slice:
if rv.Len() == 0 {
return strings.Repeat(s, level) + "<empty>"
}
for i := 0; i < rv.Len(); i++ {
buf = append(buf, dump(rv.Index(i).Interface(), level, s))
}
case reflect.Ptr:
vt := rv.Elem()
tt := rt.Elem()
indicies := []int{}
for i := 0; i < tt.NumField(); i++ {
if strings.Index(tt.Field(i).Name, "Base") > -1 {
continue
}
indicies = append(indicies, i)
}
switch {
case len(indicies) == 0:
return strings.Repeat(s, level) + "<empty>"
case len(indicies) == 1 && isInlineDumpNode(vt.Field(indicies[0])):
for _, i := range indicies {
buf = append(buf, strings.Repeat(s, level)+"- Node$"+tt.Name()+": "+dump(vt.Field(i).Interface(), 0, s))
}
default:
buf = append(buf, strings.Repeat(s, level)+"- Node$"+tt.Name())
for _, i := range indicies {
if isInlineDumpNode(vt.Field(i)) {
inf := dump(vt.Field(i).Interface(), 0, s)
buf = append(buf, strings.Repeat(s, level+1)+tt.Field(i).Name+": "+inf)
} else {
buf = append(buf, strings.Repeat(s, level+1)+tt.Field(i).Name+": ")
buf = append(buf, dump(vt.Field(i).Interface(), level+2, s))
}
}
}
default:
buf = append(buf, strings.Repeat(s, level)+fmt.Sprint(node))
}
return strings.Join(buf, "\n")
}
func Dump(chunk []ast.Stmt) string {
return dump(chunk, 0, " ")
}
// }}