go-json/decode_stream.go

426 lines
7.9 KiB
Go
Raw Normal View History

2020-05-24 15:31:10 +03:00
package json
import (
"bytes"
"io"
2021-02-15 20:13:13 +03:00
"unsafe"
2020-05-24 15:31:10 +03:00
)
const (
2020-12-05 16:27:33 +03:00
initBufSize = 512
2020-05-24 15:31:10 +03:00
)
type stream struct {
2020-08-14 11:59:49 +03:00
buf []byte
2020-12-05 16:27:33 +03:00
bufSize int64
2020-08-14 11:59:49 +03:00
length int64
r io.Reader
offset int64
cursor int64
2021-05-03 10:25:26 +03:00
filledBuffer bool
2020-08-14 11:59:49 +03:00
allRead bool
useNumber bool
disallowUnknownFields bool
2020-05-24 15:31:10 +03:00
}
2020-12-05 16:27:33 +03:00
func newStream(r io.Reader) *stream {
return &stream{
r: r,
bufSize: initBufSize,
2020-12-07 05:40:03 +03:00
buf: []byte{nul},
2020-12-05 16:27:33 +03:00
}
}
2020-05-24 15:31:10 +03:00
func (s *stream) buffered() io.Reader {
2020-12-24 14:08:27 +03:00
buflen := int64(len(s.buf))
for i := s.cursor; i < buflen; i++ {
if s.buf[i] == nul {
return bytes.NewReader(s.buf[s.cursor:i])
}
}
2020-05-24 15:31:10 +03:00
return bytes.NewReader(s.buf[s.cursor:])
}
func (s *stream) totalOffset() int64 {
return s.offset + s.cursor
}
func (s *stream) char() byte {
return s.buf[s.cursor]
}
2021-02-15 20:13:13 +03:00
func (s *stream) stat() ([]byte, int64, unsafe.Pointer) {
return s.buf, s.cursor, (*sliceHeader)(unsafe.Pointer(&s.buf)).data
}
2021-05-03 10:25:26 +03:00
func (s *stream) statForRetry() ([]byte, int64, unsafe.Pointer) {
s.cursor-- // for retry ( because caller progress cursor position in each loop )
return s.buf, s.cursor, (*sliceHeader)(unsafe.Pointer(&s.buf)).data
}
2020-07-30 16:41:53 +03:00
func (s *stream) reset() {
2020-12-05 16:27:33 +03:00
s.offset += s.cursor
2020-07-30 16:41:53 +03:00
s.buf = s.buf[s.cursor:]
s.length -= s.cursor
2020-07-30 16:41:53 +03:00
s.cursor = 0
2020-12-05 16:27:33 +03:00
}
func (s *stream) readBuf() []byte {
2021-05-03 10:25:26 +03:00
if s.filledBuffer {
s.bufSize *= 2
remainBuf := s.buf
s.buf = make([]byte, s.bufSize)
copy(s.buf, remainBuf)
}
2021-05-04 22:21:58 +03:00
remainLen := s.length - s.cursor
if remainLen > 0 {
remainLen-- // last char is nul
}
return s.buf[s.cursor+remainLen:]
2020-07-30 16:41:53 +03:00
}
2020-05-24 15:31:10 +03:00
func (s *stream) read() bool {
2020-07-31 06:22:00 +03:00
if s.allRead {
return false
}
2020-12-05 16:27:33 +03:00
buf := s.readBuf()
last := len(buf) - 1
buf[last] = nul
n, err := s.r.Read(buf[:last])
s.length = s.cursor + int64(n)
2021-05-03 10:25:26 +03:00
if n == last {
s.filledBuffer = true
} else {
s.filledBuffer = false
}
if err == io.EOF {
2020-07-30 16:41:53 +03:00
s.allRead = true
2020-12-05 16:27:33 +03:00
} else if err != nil {
2020-07-30 16:41:53 +03:00
return false
}
2020-05-24 15:31:10 +03:00
return true
}
2020-07-30 16:41:53 +03:00
func (s *stream) skipWhiteSpace() {
LOOP:
switch s.char() {
case ' ', '\n', '\t', '\r':
2020-07-31 11:10:03 +03:00
s.cursor++
2020-07-30 16:41:53 +03:00
goto LOOP
case nul:
2020-07-31 11:10:03 +03:00
if s.read() {
goto LOOP
}
2020-07-30 16:41:53 +03:00
}
}
func (s *stream) skipObject(depth int64) error {
2021-02-15 20:13:13 +03:00
braceCount := 1
2021-02-15 20:24:27 +03:00
_, cursor, p := s.stat()
2020-07-30 16:41:53 +03:00
for {
2021-02-15 20:13:13 +03:00
switch char(p, cursor) {
2020-07-30 16:41:53 +03:00
case '{':
braceCount++
depth++
if depth > maxDecodeNestingDepth {
return errExceededMaxDepth(s.char(), s.cursor)
}
2020-07-30 16:41:53 +03:00
case '}':
braceCount--
depth--
2021-02-15 20:13:13 +03:00
if braceCount == 0 {
s.cursor = cursor + 1
2020-07-30 16:41:53 +03:00
return nil
}
case '[':
depth++
if depth > maxDecodeNestingDepth {
return errExceededMaxDepth(s.char(), s.cursor)
}
case ']':
depth--
2021-02-15 20:13:13 +03:00
case '"':
for {
cursor++
switch char(p, cursor) {
case '\\':
cursor++
if char(p, cursor) == nul {
s.cursor = cursor
if s.read() {
2021-05-03 10:25:26 +03:00
_, cursor, p = s.statForRetry()
continue
}
return errUnexpectedEndOfJSON("string of object", cursor)
2021-02-15 20:13:13 +03:00
}
case '"':
2021-02-15 20:13:13 +03:00
goto SWITCH_OUT
case nul:
s.cursor = cursor
if s.read() {
2021-05-03 10:25:26 +03:00
_, cursor, p = s.statForRetry()
2021-02-15 20:13:13 +03:00
continue
}
return errUnexpectedEndOfJSON("string of object", cursor)
}
}
case nul:
s.cursor = cursor
if s.read() {
2021-02-15 20:24:27 +03:00
_, cursor, p = s.stat()
2021-02-15 20:13:13 +03:00
continue
}
return errUnexpectedEndOfJSON("object of object", cursor)
}
SWITCH_OUT:
cursor++
}
}
func (s *stream) skipArray(depth int64) error {
2021-02-15 20:13:13 +03:00
bracketCount := 1
2021-02-15 20:24:27 +03:00
_, cursor, p := s.stat()
2021-02-15 20:13:13 +03:00
for {
switch char(p, cursor) {
case '[':
bracketCount++
depth++
if depth > maxDecodeNestingDepth {
return errExceededMaxDepth(s.char(), s.cursor)
}
2020-07-30 16:41:53 +03:00
case ']':
bracketCount--
depth--
2021-02-15 20:13:13 +03:00
if bracketCount == 0 {
s.cursor = cursor + 1
2020-07-30 16:41:53 +03:00
return nil
}
case '{':
depth++
if depth > maxDecodeNestingDepth {
return errExceededMaxDepth(s.char(), s.cursor)
}
case '}':
depth--
2020-07-30 16:41:53 +03:00
case '"':
2020-07-31 11:10:03 +03:00
for {
2021-02-15 20:13:13 +03:00
cursor++
switch char(p, cursor) {
case '\\':
cursor++
if char(p, cursor) == nul {
s.cursor = cursor
if s.read() {
2021-05-03 10:25:26 +03:00
_, cursor, p = s.statForRetry()
continue
}
return errUnexpectedEndOfJSON("string of object", cursor)
2020-07-31 11:10:03 +03:00
}
case '"':
2021-02-15 20:13:13 +03:00
goto SWITCH_OUT
case nul:
s.cursor = cursor
if s.read() {
2021-05-03 10:25:26 +03:00
_, cursor, p = s.statForRetry()
2021-02-15 20:13:13 +03:00
continue
}
return errUnexpectedEndOfJSON("string of object", cursor)
2020-07-30 16:41:53 +03:00
}
2021-02-15 20:13:13 +03:00
}
case nul:
s.cursor = cursor
if s.read() {
2021-02-15 20:24:27 +03:00
_, cursor, p = s.stat()
2021-02-15 20:13:13 +03:00
continue
}
return errUnexpectedEndOfJSON("array of object", cursor)
}
SWITCH_OUT:
cursor++
}
}
func (s *stream) skipValue(depth int64) error {
2021-02-15 20:24:27 +03:00
_, cursor, p := s.stat()
2021-02-15 20:13:13 +03:00
for {
switch char(p, cursor) {
case ' ', '\n', '\t', '\r':
cursor++
continue
case nul:
s.cursor = cursor
if s.read() {
2021-02-15 20:24:27 +03:00
_, cursor, p = s.stat()
2021-02-15 20:13:13 +03:00
continue
}
return errUnexpectedEndOfJSON("value of object", s.totalOffset())
case '{':
s.cursor = cursor + 1
return s.skipObject(depth + 1)
2021-02-15 20:13:13 +03:00
case '[':
s.cursor = cursor + 1
return s.skipArray(depth + 1)
2021-02-15 20:13:13 +03:00
case '"':
for {
cursor++
2021-02-15 20:24:27 +03:00
switch char(p, cursor) {
case '\\':
cursor++
if char(p, cursor) == nul {
s.cursor = cursor
if s.read() {
2021-05-03 10:25:26 +03:00
_, cursor, p = s.statForRetry()
continue
}
return errUnexpectedEndOfJSON("value of string", s.totalOffset())
2021-02-15 20:13:13 +03:00
}
case '"':
2021-02-15 20:13:13 +03:00
s.cursor = cursor + 1
2020-07-30 16:41:53 +03:00
return nil
2021-02-15 20:13:13 +03:00
case nul:
s.cursor = cursor
if s.read() {
2021-05-03 10:25:26 +03:00
_, cursor, p = s.statForRetry()
2021-02-15 20:13:13 +03:00
continue
}
return errUnexpectedEndOfJSON("value of string", s.totalOffset())
2020-07-30 16:41:53 +03:00
}
}
case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
2020-07-31 11:10:03 +03:00
for {
2021-02-15 20:13:13 +03:00
cursor++
c := char(p, cursor)
if floatTable[c] {
2020-07-30 16:41:53 +03:00
continue
} else if c == nul {
2020-07-31 11:10:03 +03:00
if s.read() {
s.cursor-- // for retry current character
2021-02-15 20:24:27 +03:00
_, cursor, p = s.stat()
2020-07-31 11:10:03 +03:00
continue
}
2020-07-30 16:41:53 +03:00
}
2021-02-15 20:13:13 +03:00
s.cursor = cursor
2020-07-30 16:41:53 +03:00
return nil
}
2020-07-31 14:24:39 +03:00
case 't':
2021-02-15 20:13:13 +03:00
s.cursor = cursor
2020-07-31 14:24:39 +03:00
if err := trueBytes(s); err != nil {
return err
}
2021-02-15 20:13:13 +03:00
return nil
2020-07-31 14:24:39 +03:00
case 'f':
2021-02-15 20:13:13 +03:00
s.cursor = cursor
2020-07-31 14:24:39 +03:00
if err := falseBytes(s); err != nil {
return err
}
2021-02-15 20:13:13 +03:00
return nil
2020-07-31 14:24:39 +03:00
case 'n':
2021-02-15 20:13:13 +03:00
s.cursor = cursor
2020-07-31 14:24:39 +03:00
if err := nullBytes(s); err != nil {
return err
}
2021-02-15 20:13:13 +03:00
return nil
2020-07-30 16:41:53 +03:00
}
2021-02-15 20:13:13 +03:00
cursor++
2020-07-30 16:41:53 +03:00
}
}
func nullBytes(s *stream) error {
// current cursor's character is 'n'
s.cursor++
if s.char() != 'u' {
if err := retryReadNull(s); err != nil {
return err
}
}
s.cursor++
if s.char() != 'l' {
if err := retryReadNull(s); err != nil {
return err
}
}
s.cursor++
if s.char() != 'l' {
if err := retryReadNull(s); err != nil {
return err
}
}
s.cursor++
return nil
}
func retryReadNull(s *stream) error {
if s.char() == nul && s.read() {
return nil
}
return errInvalidCharacter(s.char(), "null", s.totalOffset())
}
func trueBytes(s *stream) error {
// current cursor's character is 't'
s.cursor++
if s.char() != 'r' {
if err := retryReadTrue(s); err != nil {
return err
}
}
s.cursor++
if s.char() != 'u' {
if err := retryReadTrue(s); err != nil {
return err
}
}
s.cursor++
if s.char() != 'e' {
if err := retryReadTrue(s); err != nil {
return err
}
}
s.cursor++
return nil
}
func retryReadTrue(s *stream) error {
if s.char() == nul && s.read() {
return nil
}
return errInvalidCharacter(s.char(), "bool(true)", s.totalOffset())
}
func falseBytes(s *stream) error {
// current cursor's character is 'f'
s.cursor++
if s.char() != 'a' {
if err := retryReadFalse(s); err != nil {
return err
}
}
s.cursor++
if s.char() != 'l' {
if err := retryReadFalse(s); err != nil {
return err
}
}
s.cursor++
if s.char() != 's' {
if err := retryReadFalse(s); err != nil {
return err
}
}
s.cursor++
if s.char() != 'e' {
if err := retryReadFalse(s); err != nil {
return err
}
}
s.cursor++
return nil
}
func retryReadFalse(s *stream) error {
if s.char() == nul && s.read() {
return nil
}
return errInvalidCharacter(s.char(), "bool(false)", s.totalOffset())
}