go-json/internal/decoder/stream.go

544 lines
10 KiB
Go
Raw Normal View History

2021-06-03 12:49:01 +03:00
package decoder
2020-05-24 15:31:10 +03:00
import (
"bytes"
2021-06-03 12:49:01 +03:00
"encoding/json"
2020-05-24 15:31:10 +03:00
"io"
2021-06-03 12:49:01 +03:00
"strconv"
2021-02-15 20:13:13 +03:00
"unsafe"
2021-06-03 12:49:01 +03:00
"github.com/goccy/go-json/internal/errors"
2020-05-24 15:31:10 +03:00
)
const (
2020-12-05 16:27:33 +03:00
initBufSize = 512
2020-05-24 15:31:10 +03:00
)
2021-06-03 12:49:01 +03:00
type Stream struct {
2020-08-14 11:59:49 +03:00
buf []byte
2020-12-05 16:27:33 +03:00
bufSize int64
2020-08-14 11:59:49 +03:00
length int64
r io.Reader
offset int64
cursor int64
2021-05-03 10:25:26 +03:00
filledBuffer bool
2020-08-14 11:59:49 +03:00
allRead bool
2021-06-03 12:49:01 +03:00
UseNumber bool
DisallowUnknownFields bool
2020-05-24 15:31:10 +03:00
}
2021-06-03 12:49:01 +03:00
func NewStream(r io.Reader) *Stream {
return &Stream{
2020-12-05 16:27:33 +03:00
r: r,
bufSize: initBufSize,
2021-06-04 06:41:18 +03:00
buf: make([]byte, initBufSize),
2020-12-05 16:27:33 +03:00
}
}
2021-06-03 12:49:01 +03:00
func (s *Stream) TotalOffset() int64 {
return s.totalOffset()
}
func (s *Stream) Buffered() io.Reader {
2020-12-24 14:08:27 +03:00
buflen := int64(len(s.buf))
for i := s.cursor; i < buflen; i++ {
if s.buf[i] == nul {
return bytes.NewReader(s.buf[s.cursor:i])
}
}
2020-05-24 15:31:10 +03:00
return bytes.NewReader(s.buf[s.cursor:])
}
2021-06-03 12:49:01 +03:00
func (s *Stream) PrepareForDecode() error {
for {
switch s.char() {
case ' ', '\t', '\r', '\n':
s.cursor++
continue
case ',', ':':
s.cursor++
return nil
case nul:
if s.read() {
continue
}
return io.EOF
}
break
}
return nil
}
func (s *Stream) totalOffset() int64 {
2020-05-24 15:31:10 +03:00
return s.offset + s.cursor
}
2021-06-03 12:49:01 +03:00
func (s *Stream) char() byte {
2020-05-24 15:31:10 +03:00
return s.buf[s.cursor]
}
2021-06-03 12:49:01 +03:00
func (s *Stream) equalChar(c byte) bool {
cur := s.buf[s.cursor]
if cur == nul {
s.read()
cur = s.buf[s.cursor]
}
return cur == c
}
2021-06-03 12:49:01 +03:00
func (s *Stream) stat() ([]byte, int64, unsafe.Pointer) {
2021-02-15 20:13:13 +03:00
return s.buf, s.cursor, (*sliceHeader)(unsafe.Pointer(&s.buf)).data
}
2021-06-03 12:49:01 +03:00
func (s *Stream) statForRetry() ([]byte, int64, unsafe.Pointer) {
2021-05-03 10:25:26 +03:00
s.cursor-- // for retry ( because caller progress cursor position in each loop )
return s.buf, s.cursor, (*sliceHeader)(unsafe.Pointer(&s.buf)).data
}
2021-06-03 12:49:01 +03:00
func (s *Stream) Reset() {
s.reset()
s.bufSize = initBufSize
}
func (s *Stream) More() bool {
for {
switch s.char() {
case ' ', '\n', '\r', '\t':
s.cursor++
continue
case '}', ']':
return false
case nul:
if s.read() {
continue
}
return false
}
break
}
return true
}
func (s *Stream) Token() (interface{}, error) {
for {
c := s.char()
switch c {
case ' ', '\n', '\r', '\t':
s.cursor++
case '{', '[', ']', '}':
s.cursor++
return json.Delim(c), nil
case ',', ':':
s.cursor++
case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
bytes := floatBytes(s)
s := *(*string)(unsafe.Pointer(&bytes))
f64, err := strconv.ParseFloat(s, 64)
if err != nil {
return nil, err
}
return f64, nil
case '"':
bytes, err := stringBytes(s)
if err != nil {
return nil, err
}
return string(bytes), nil
case 't':
if err := trueBytes(s); err != nil {
return nil, err
}
return true, nil
case 'f':
if err := falseBytes(s); err != nil {
return nil, err
}
return false, nil
case 'n':
if err := nullBytes(s); err != nil {
return nil, err
}
return nil, nil
case nul:
if s.read() {
continue
}
goto END
default:
return nil, errors.ErrInvalidCharacter(s.char(), "token", s.totalOffset())
}
}
END:
return nil, io.EOF
}
func (s *Stream) reset() {
2020-12-05 16:27:33 +03:00
s.offset += s.cursor
2020-07-30 16:41:53 +03:00
s.buf = s.buf[s.cursor:]
s.length -= s.cursor
2020-07-30 16:41:53 +03:00
s.cursor = 0
2020-12-05 16:27:33 +03:00
}
2021-06-03 12:49:01 +03:00
func (s *Stream) readBuf() []byte {
2021-05-03 10:25:26 +03:00
if s.filledBuffer {
s.bufSize *= 2
remainBuf := s.buf
s.buf = make([]byte, s.bufSize)
copy(s.buf, remainBuf)
}
2021-05-04 22:21:58 +03:00
remainLen := s.length - s.cursor
2021-05-07 19:56:58 +03:00
remainNotNulCharNum := int64(0)
for i := int64(0); i < remainLen; i++ {
if s.buf[s.cursor+i] == nul {
break
}
remainNotNulCharNum++
}
return s.buf[s.cursor+remainNotNulCharNum:]
2020-07-30 16:41:53 +03:00
}
2021-06-03 12:49:01 +03:00
func (s *Stream) read() bool {
2020-07-31 06:22:00 +03:00
if s.allRead {
return false
}
2020-12-05 16:27:33 +03:00
buf := s.readBuf()
last := len(buf) - 1
buf[last] = nul
n, err := s.r.Read(buf[:last])
s.length = s.cursor + int64(n)
2021-05-03 10:25:26 +03:00
if n == last {
s.filledBuffer = true
} else {
s.filledBuffer = false
}
if err == io.EOF {
2020-07-30 16:41:53 +03:00
s.allRead = true
2020-12-05 16:27:33 +03:00
} else if err != nil {
2020-07-30 16:41:53 +03:00
return false
}
2020-05-24 15:31:10 +03:00
return true
}
2020-07-30 16:41:53 +03:00
2021-06-03 12:49:01 +03:00
func (s *Stream) skipWhiteSpace() {
2020-07-30 16:41:53 +03:00
LOOP:
switch s.char() {
case ' ', '\n', '\t', '\r':
2020-07-31 11:10:03 +03:00
s.cursor++
2020-07-30 16:41:53 +03:00
goto LOOP
case nul:
2020-07-31 11:10:03 +03:00
if s.read() {
goto LOOP
}
2020-07-30 16:41:53 +03:00
}
}
2021-06-03 12:49:01 +03:00
func (s *Stream) skipObject(depth int64) error {
2021-02-15 20:13:13 +03:00
braceCount := 1
2021-02-15 20:24:27 +03:00
_, cursor, p := s.stat()
2020-07-30 16:41:53 +03:00
for {
2021-02-15 20:13:13 +03:00
switch char(p, cursor) {
2020-07-30 16:41:53 +03:00
case '{':
braceCount++
depth++
if depth > maxDecodeNestingDepth {
2021-06-03 12:49:01 +03:00
return errors.ErrExceededMaxDepth(s.char(), s.cursor)
}
2020-07-30 16:41:53 +03:00
case '}':
braceCount--
depth--
2021-02-15 20:13:13 +03:00
if braceCount == 0 {
s.cursor = cursor + 1
2020-07-30 16:41:53 +03:00
return nil
}
case '[':
depth++
if depth > maxDecodeNestingDepth {
2021-06-03 12:49:01 +03:00
return errors.ErrExceededMaxDepth(s.char(), s.cursor)
}
case ']':
depth--
2021-02-15 20:13:13 +03:00
case '"':
for {
cursor++
switch char(p, cursor) {
case '\\':
cursor++
if char(p, cursor) == nul {
s.cursor = cursor
if s.read() {
2021-05-03 10:25:26 +03:00
_, cursor, p = s.statForRetry()
continue
}
2021-06-03 12:49:01 +03:00
return errors.ErrUnexpectedEndOfJSON("string of object", cursor)
2021-02-15 20:13:13 +03:00
}
case '"':
2021-02-15 20:13:13 +03:00
goto SWITCH_OUT
case nul:
s.cursor = cursor
if s.read() {
2021-05-03 10:25:26 +03:00
_, cursor, p = s.statForRetry()
2021-02-15 20:13:13 +03:00
continue
}
2021-06-03 12:49:01 +03:00
return errors.ErrUnexpectedEndOfJSON("string of object", cursor)
2021-02-15 20:13:13 +03:00
}
}
case nul:
s.cursor = cursor
if s.read() {
2021-02-15 20:24:27 +03:00
_, cursor, p = s.stat()
2021-02-15 20:13:13 +03:00
continue
}
2021-06-03 12:49:01 +03:00
return errors.ErrUnexpectedEndOfJSON("object of object", cursor)
2021-02-15 20:13:13 +03:00
}
SWITCH_OUT:
cursor++
}
}
2021-06-03 12:49:01 +03:00
func (s *Stream) skipArray(depth int64) error {
2021-02-15 20:13:13 +03:00
bracketCount := 1
2021-02-15 20:24:27 +03:00
_, cursor, p := s.stat()
2021-02-15 20:13:13 +03:00
for {
switch char(p, cursor) {
case '[':
bracketCount++
depth++
if depth > maxDecodeNestingDepth {
2021-06-03 12:49:01 +03:00
return errors.ErrExceededMaxDepth(s.char(), s.cursor)
}
2020-07-30 16:41:53 +03:00
case ']':
bracketCount--
depth--
2021-02-15 20:13:13 +03:00
if bracketCount == 0 {
s.cursor = cursor + 1
2020-07-30 16:41:53 +03:00
return nil
}
case '{':
depth++
if depth > maxDecodeNestingDepth {
2021-06-03 12:49:01 +03:00
return errors.ErrExceededMaxDepth(s.char(), s.cursor)
}
case '}':
depth--
2020-07-30 16:41:53 +03:00
case '"':
2020-07-31 11:10:03 +03:00
for {
2021-02-15 20:13:13 +03:00
cursor++
switch char(p, cursor) {
case '\\':
cursor++
if char(p, cursor) == nul {
s.cursor = cursor
if s.read() {
2021-05-03 10:25:26 +03:00
_, cursor, p = s.statForRetry()
continue
}
2021-06-03 12:49:01 +03:00
return errors.ErrUnexpectedEndOfJSON("string of object", cursor)
2020-07-31 11:10:03 +03:00
}
case '"':
2021-02-15 20:13:13 +03:00
goto SWITCH_OUT
case nul:
s.cursor = cursor
if s.read() {
2021-05-03 10:25:26 +03:00
_, cursor, p = s.statForRetry()
2021-02-15 20:13:13 +03:00
continue
}
2021-06-03 12:49:01 +03:00
return errors.ErrUnexpectedEndOfJSON("string of object", cursor)
2020-07-30 16:41:53 +03:00
}
2021-02-15 20:13:13 +03:00
}
case nul:
s.cursor = cursor
if s.read() {
2021-02-15 20:24:27 +03:00
_, cursor, p = s.stat()
2021-02-15 20:13:13 +03:00
continue
}
2021-06-03 12:49:01 +03:00
return errors.ErrUnexpectedEndOfJSON("array of object", cursor)
2021-02-15 20:13:13 +03:00
}
SWITCH_OUT:
cursor++
}
}
2021-06-03 12:49:01 +03:00
func (s *Stream) skipValue(depth int64) error {
2021-02-15 20:24:27 +03:00
_, cursor, p := s.stat()
2021-02-15 20:13:13 +03:00
for {
switch char(p, cursor) {
case ' ', '\n', '\t', '\r':
cursor++
continue
case nul:
s.cursor = cursor
if s.read() {
2021-02-15 20:24:27 +03:00
_, cursor, p = s.stat()
2021-02-15 20:13:13 +03:00
continue
}
2021-06-03 12:49:01 +03:00
return errors.ErrUnexpectedEndOfJSON("value of object", s.totalOffset())
2021-02-15 20:13:13 +03:00
case '{':
s.cursor = cursor + 1
return s.skipObject(depth + 1)
2021-02-15 20:13:13 +03:00
case '[':
s.cursor = cursor + 1
return s.skipArray(depth + 1)
2021-02-15 20:13:13 +03:00
case '"':
for {
cursor++
2021-02-15 20:24:27 +03:00
switch char(p, cursor) {
case '\\':
cursor++
if char(p, cursor) == nul {
s.cursor = cursor
if s.read() {
2021-05-03 10:25:26 +03:00
_, cursor, p = s.statForRetry()
continue
}
2021-06-03 12:49:01 +03:00
return errors.ErrUnexpectedEndOfJSON("value of string", s.totalOffset())
2021-02-15 20:13:13 +03:00
}
case '"':
2021-02-15 20:13:13 +03:00
s.cursor = cursor + 1
2020-07-30 16:41:53 +03:00
return nil
2021-02-15 20:13:13 +03:00
case nul:
s.cursor = cursor
if s.read() {
2021-05-03 10:25:26 +03:00
_, cursor, p = s.statForRetry()
2021-02-15 20:13:13 +03:00
continue
}
2021-06-03 12:49:01 +03:00
return errors.ErrUnexpectedEndOfJSON("value of string", s.totalOffset())
2020-07-30 16:41:53 +03:00
}
}
case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
2020-07-31 11:10:03 +03:00
for {
2021-02-15 20:13:13 +03:00
cursor++
c := char(p, cursor)
if floatTable[c] {
2020-07-30 16:41:53 +03:00
continue
} else if c == nul {
2020-07-31 11:10:03 +03:00
if s.read() {
s.cursor-- // for retry current character
2021-02-15 20:24:27 +03:00
_, cursor, p = s.stat()
2020-07-31 11:10:03 +03:00
continue
}
2020-07-30 16:41:53 +03:00
}
2021-02-15 20:13:13 +03:00
s.cursor = cursor
2020-07-30 16:41:53 +03:00
return nil
}
2020-07-31 14:24:39 +03:00
case 't':
2021-02-15 20:13:13 +03:00
s.cursor = cursor
2020-07-31 14:24:39 +03:00
if err := trueBytes(s); err != nil {
return err
}
2021-02-15 20:13:13 +03:00
return nil
2020-07-31 14:24:39 +03:00
case 'f':
2021-02-15 20:13:13 +03:00
s.cursor = cursor
2020-07-31 14:24:39 +03:00
if err := falseBytes(s); err != nil {
return err
}
2021-02-15 20:13:13 +03:00
return nil
2020-07-31 14:24:39 +03:00
case 'n':
2021-02-15 20:13:13 +03:00
s.cursor = cursor
2020-07-31 14:24:39 +03:00
if err := nullBytes(s); err != nil {
return err
}
2021-02-15 20:13:13 +03:00
return nil
2020-07-30 16:41:53 +03:00
}
2021-02-15 20:13:13 +03:00
cursor++
2020-07-30 16:41:53 +03:00
}
}
2021-06-03 12:49:01 +03:00
func nullBytes(s *Stream) error {
// current cursor's character is 'n'
s.cursor++
if s.char() != 'u' {
if err := retryReadNull(s); err != nil {
return err
}
}
s.cursor++
if s.char() != 'l' {
if err := retryReadNull(s); err != nil {
return err
}
}
s.cursor++
if s.char() != 'l' {
if err := retryReadNull(s); err != nil {
return err
}
}
s.cursor++
return nil
}
2021-06-03 12:49:01 +03:00
func retryReadNull(s *Stream) error {
if s.char() == nul && s.read() {
return nil
}
2021-06-03 12:49:01 +03:00
return errors.ErrInvalidCharacter(s.char(), "null", s.totalOffset())
}
2021-06-03 12:49:01 +03:00
func trueBytes(s *Stream) error {
// current cursor's character is 't'
s.cursor++
if s.char() != 'r' {
if err := retryReadTrue(s); err != nil {
return err
}
}
s.cursor++
if s.char() != 'u' {
if err := retryReadTrue(s); err != nil {
return err
}
}
s.cursor++
if s.char() != 'e' {
if err := retryReadTrue(s); err != nil {
return err
}
}
s.cursor++
return nil
}
2021-06-03 12:49:01 +03:00
func retryReadTrue(s *Stream) error {
if s.char() == nul && s.read() {
return nil
}
2021-06-03 12:49:01 +03:00
return errors.ErrInvalidCharacter(s.char(), "bool(true)", s.totalOffset())
}
2021-06-03 12:49:01 +03:00
func falseBytes(s *Stream) error {
// current cursor's character is 'f'
s.cursor++
if s.char() != 'a' {
if err := retryReadFalse(s); err != nil {
return err
}
}
s.cursor++
if s.char() != 'l' {
if err := retryReadFalse(s); err != nil {
return err
}
}
s.cursor++
if s.char() != 's' {
if err := retryReadFalse(s); err != nil {
return err
}
}
s.cursor++
if s.char() != 'e' {
if err := retryReadFalse(s); err != nil {
return err
}
}
s.cursor++
return nil
}
2021-06-03 12:49:01 +03:00
func retryReadFalse(s *Stream) error {
if s.char() == nul && s.read() {
return nil
}
2021-06-03 12:49:01 +03:00
return errors.ErrInvalidCharacter(s.char(), "bool(false)", s.totalOffset())
}