package parse import ( "bufio" "bytes" "fmt" "github.com/yuin/gopher-lua/ast" "io" "reflect" "strconv" "strings" ) const EOF = -1 const whitespace1 = 1<<'\t' | 1<<' ' const whitespace2 = 1<<'\t' | 1<<'\n' | 1<<'\r' | 1<<' ' type Error struct { Pos ast.Position Message string Token string } func (e *Error) Error() string { pos := e.Pos if pos.Line == EOF { return fmt.Sprintf("%v at EOF: %s\n", pos.Source, e.Message) } else { return fmt.Sprintf("%v line:%d(column:%d) near '%v': %s\n", pos.Source, pos.Line, pos.Column, e.Token, e.Message) } } func writeChar(buf *bytes.Buffer, c int) { buf.WriteByte(byte(c)) } func isDecimal(ch int) bool { return '0' <= ch && ch <= '9' } func isIdent(ch int, pos int) bool { return ch == '_' || 'A' <= ch && ch <= 'Z' || 'a' <= ch && ch <= 'z' || isDecimal(ch) && pos > 0 } func isDigit(ch int) bool { return '0' <= ch && ch <= '9' || 'a' <= ch && ch <= 'f' || 'A' <= ch && ch <= 'F' } type Scanner struct { Pos ast.Position reader *bufio.Reader } func NewScanner(reader io.Reader, source string) *Scanner { return &Scanner{ Pos: ast.Position{ Source: source, Line: 1, Column: 0, }, reader: bufio.NewReaderSize(reader, 4096), } } func (sc *Scanner) Error(tok string, msg string) *Error { return &Error{sc.Pos, msg, tok} } func (sc *Scanner) TokenError(tok ast.Token, msg string) *Error { return &Error{tok.Pos, msg, tok.Str} } func (sc *Scanner) readNext() int { ch, err := sc.reader.ReadByte() if err == io.EOF { return EOF } return int(ch) } func (sc *Scanner) Newline(ch int) { if ch < 0 { return } sc.Pos.Line += 1 sc.Pos.Column = 0 next := sc.Peek() if ch == '\n' && next == '\r' || ch == '\r' && next == '\n' { sc.reader.ReadByte() } } func (sc *Scanner) Next() int { ch := sc.readNext() switch ch { case '\n', '\r': sc.Newline(ch) ch = int('\n') case EOF: sc.Pos.Line = EOF sc.Pos.Column = 0 default: sc.Pos.Column++ } return ch } func (sc *Scanner) Peek() int { ch := sc.readNext() if ch != EOF { sc.reader.UnreadByte() } return ch } func (sc *Scanner) skipWhiteSpace(whitespace int64) int { ch := sc.Next() for ; whitespace&(1<<uint(ch)) != 0; ch = sc.Next() { } return ch } func (sc *Scanner) skipComments(ch int) error { // multiline comment if sc.Peek() == '[' { ch = sc.Next() if sc.Peek() == '[' || sc.Peek() == '=' { var buf bytes.Buffer if err := sc.scanMultilineString(sc.Next(), &buf); err != nil { return sc.Error(buf.String(), "invalid multiline comment") } return nil } } for { if ch == '\n' || ch == '\r' || ch < 0 { break } ch = sc.Next() } return nil } func (sc *Scanner) scanIdent(ch int, buf *bytes.Buffer) error { writeChar(buf, ch) for isIdent(sc.Peek(), 1) { writeChar(buf, sc.Next()) } return nil } func (sc *Scanner) scanDecimal(ch int, buf *bytes.Buffer) error { writeChar(buf, ch) for isDecimal(sc.Peek()) { writeChar(buf, sc.Next()) } return nil } func (sc *Scanner) scanNumber(ch int, buf *bytes.Buffer) error { if ch == '0' { // octal if sc.Peek() == 'x' || sc.Peek() == 'X' { writeChar(buf, ch) writeChar(buf, sc.Next()) hasvalue := false for isDigit(sc.Peek()) { writeChar(buf, sc.Next()) hasvalue = true } if !hasvalue { return sc.Error(buf.String(), "illegal hexadecimal number") } return nil } else if sc.Peek() != '.' && isDecimal(sc.Peek()) { ch = sc.Next() } } sc.scanDecimal(ch, buf) if sc.Peek() == '.' { sc.scanDecimal(sc.Next(), buf) } if ch = sc.Peek(); ch == 'e' || ch == 'E' { writeChar(buf, sc.Next()) if ch = sc.Peek(); ch == '-' || ch == '+' { writeChar(buf, sc.Next()) } sc.scanDecimal(sc.Next(), buf) } return nil } func (sc *Scanner) scanString(quote int, buf *bytes.Buffer) error { ch := sc.Next() for ch != quote { if ch == '\n' || ch == '\r' || ch < 0 { return sc.Error(buf.String(), "unterminated string") } if ch == '\\' { if err := sc.scanEscape(ch, buf); err != nil { return err } } else { writeChar(buf, ch) } ch = sc.Next() } return nil } func (sc *Scanner) scanEscape(ch int, buf *bytes.Buffer) error { ch = sc.Next() switch ch { case 'a': buf.WriteByte('\a') case 'b': buf.WriteByte('\b') case 'f': buf.WriteByte('\f') case 'n': buf.WriteByte('\n') case 'r': buf.WriteByte('\r') case 't': buf.WriteByte('\t') case 'v': buf.WriteByte('\v') case '\\': buf.WriteByte('\\') case '"': buf.WriteByte('"') case '\'': buf.WriteByte('\'') case '\n': buf.WriteByte('\n') case '\r': buf.WriteByte('\n') sc.Newline('\r') default: if '0' <= ch && ch <= '9' { bytes := []byte{byte(ch)} for i := 0; i < 2 && isDecimal(sc.Peek()); i++ { bytes = append(bytes, byte(sc.Next())) } val, _ := strconv.ParseInt(string(bytes), 10, 32) writeChar(buf, int(val)) } else { writeChar(buf, ch) } } return nil } func (sc *Scanner) countSep(ch int) (int, int) { count := 0 for ; ch == '='; count = count + 1 { ch = sc.Next() } return count, ch } func (sc *Scanner) scanMultilineString(ch int, buf *bytes.Buffer) error { var count1, count2 int count1, ch = sc.countSep(ch) if ch != '[' { return sc.Error(string(ch), "invalid multiline string") } ch = sc.Next() if ch == '\n' || ch == '\r' { ch = sc.Next() } for { if ch < 0 { return sc.Error(buf.String(), "unterminated multiline string") } else if ch == ']' { count2, ch = sc.countSep(sc.Next()) if count1 == count2 && ch == ']' { goto finally } buf.WriteByte(']') buf.WriteString(strings.Repeat("=", count2)) continue } writeChar(buf, ch) ch = sc.Next() } finally: return nil } var reservedWords = map[string]int{ "and": TAnd, "break": TBreak, "do": TDo, "else": TElse, "elseif": TElseIf, "end": TEnd, "false": TFalse, "for": TFor, "function": TFunction, "if": TIf, "in": TIn, "local": TLocal, "nil": TNil, "not": TNot, "or": TOr, "return": TReturn, "repeat": TRepeat, "then": TThen, "true": TTrue, "until": TUntil, "while": TWhile} func (sc *Scanner) Scan(lexer *Lexer) (ast.Token, error) { redo: var err error tok := ast.Token{} newline := false ch := sc.skipWhiteSpace(whitespace1) if ch == '\n' || ch == '\r' { newline = true ch = sc.skipWhiteSpace(whitespace2) } if ch == '(' && lexer.PrevTokenType == ')' { lexer.PNewLine = newline } else { lexer.PNewLine = false } var _buf bytes.Buffer buf := &_buf tok.Pos = sc.Pos switch { case isIdent(ch, 0): tok.Type = TIdent err = sc.scanIdent(ch, buf) tok.Str = buf.String() if err != nil { goto finally } if typ, ok := reservedWords[tok.Str]; ok { tok.Type = typ } case isDecimal(ch): tok.Type = TNumber err = sc.scanNumber(ch, buf) tok.Str = buf.String() default: switch ch { case EOF: tok.Type = EOF case '-': if sc.Peek() == '-' { err = sc.skipComments(sc.Next()) if err != nil { goto finally } goto redo } else { tok.Type = ch tok.Str = string(ch) } case '"', '\'': tok.Type = TString err = sc.scanString(ch, buf) tok.Str = buf.String() case '[': if c := sc.Peek(); c == '[' || c == '=' { tok.Type = TString err = sc.scanMultilineString(sc.Next(), buf) tok.Str = buf.String() } else { tok.Type = ch tok.Str = string(ch) } case '=': if sc.Peek() == '=' { tok.Type = TEqeq tok.Str = "==" sc.Next() } else { tok.Type = ch tok.Str = string(ch) } case '~': if sc.Peek() == '=' { tok.Type = TNeq tok.Str = "~=" sc.Next() } else { err = sc.Error("~", "Invalid '~' token") } case '<': if sc.Peek() == '=' { tok.Type = TLte tok.Str = "<=" sc.Next() } else { tok.Type = ch tok.Str = string(ch) } case '>': if sc.Peek() == '=' { tok.Type = TGte tok.Str = ">=" sc.Next() } else { tok.Type = ch tok.Str = string(ch) } case '.': ch2 := sc.Peek() switch { case isDecimal(ch2): tok.Type = TNumber err = sc.scanNumber(ch, buf) tok.Str = buf.String() case ch2 == '.': writeChar(buf, ch) writeChar(buf, sc.Next()) if sc.Peek() == '.' { writeChar(buf, sc.Next()) tok.Type = T3Comma } else { tok.Type = T2Comma } default: tok.Type = '.' } tok.Str = buf.String() case '+', '*', '/', '%', '^', '#', '(', ')', '{', '}', ']', ';', ':', ',': tok.Type = ch tok.Str = string(ch) default: writeChar(buf, ch) err = sc.Error(buf.String(), "Invalid token") goto finally } } finally: tok.Name = TokenName(int(tok.Type)) return tok, err } // yacc interface {{{ type Lexer struct { scanner *Scanner Stmts []ast.Stmt PNewLine bool Token ast.Token PrevTokenType int } func (lx *Lexer) Lex(lval *yySymType) int { lx.PrevTokenType = lx.Token.Type tok, err := lx.scanner.Scan(lx) if err != nil { panic(err) } if tok.Type < 0 { return 0 } lval.token = tok lx.Token = tok return int(tok.Type) } func (lx *Lexer) Error(message string) { panic(lx.scanner.Error(lx.Token.Str, message)) } func (lx *Lexer) TokenError(tok ast.Token, message string) { panic(lx.scanner.TokenError(tok, message)) } func Parse(reader io.Reader, name string) (chunk []ast.Stmt, err error) { lexer := &Lexer{NewScanner(reader, name), nil, false, ast.Token{Str: ""}, TNil} chunk = nil defer func() { if e := recover(); e != nil { err, _ = e.(error) } }() yyParse(lexer) chunk = lexer.Stmts return } // }}} // Dump {{{ func isInlineDumpNode(rv reflect.Value) bool { switch rv.Kind() { case reflect.Struct, reflect.Slice, reflect.Interface, reflect.Ptr: return false default: return true } } func dump(node interface{}, level int, s string) string { rt := reflect.TypeOf(node) if fmt.Sprint(rt) == "<nil>" { return strings.Repeat(s, level) + "<nil>" } rv := reflect.ValueOf(node) buf := []string{} switch rt.Kind() { case reflect.Slice: if rv.Len() == 0 { return strings.Repeat(s, level) + "<empty>" } for i := 0; i < rv.Len(); i++ { buf = append(buf, dump(rv.Index(i).Interface(), level, s)) } case reflect.Ptr: vt := rv.Elem() tt := rt.Elem() indicies := []int{} for i := 0; i < tt.NumField(); i++ { if strings.Index(tt.Field(i).Name, "Base") > -1 { continue } indicies = append(indicies, i) } switch { case len(indicies) == 0: return strings.Repeat(s, level) + "<empty>" case len(indicies) == 1 && isInlineDumpNode(vt.Field(indicies[0])): for _, i := range indicies { buf = append(buf, strings.Repeat(s, level)+"- Node$"+tt.Name()+": "+dump(vt.Field(i).Interface(), 0, s)) } default: buf = append(buf, strings.Repeat(s, level)+"- Node$"+tt.Name()) for _, i := range indicies { if isInlineDumpNode(vt.Field(i)) { inf := dump(vt.Field(i).Interface(), 0, s) buf = append(buf, strings.Repeat(s, level+1)+tt.Field(i).Name+": "+inf) } else { buf = append(buf, strings.Repeat(s, level+1)+tt.Field(i).Name+": ") buf = append(buf, dump(vt.Field(i).Interface(), level+2, s)) } } } default: buf = append(buf, strings.Repeat(s, level)+fmt.Sprint(node)) } return strings.Join(buf, "\n") } func Dump(chunk []ast.Stmt) string { return dump(chunk, 0, " ") } // }}