Fix stream decoding

This commit is contained in:
Masaaki Goshima 2020-12-05 22:27:33 +09:00
parent 7f97aca5e6
commit 4332d1353e
13 changed files with 67 additions and 92 deletions

View File

@ -60,7 +60,7 @@ const (
// The decoder introduces its own buffering and may // The decoder introduces its own buffering and may
// read data from r beyond the JSON values requested. // read data from r beyond the JSON values requested.
func NewDecoder(r io.Reader) *Decoder { func NewDecoder(r io.Reader) *Decoder {
s := &stream{r: r} s := newStream(r)
s.read() s.read()
return &Decoder{ return &Decoder{
s: s, s: s,

View File

@ -19,12 +19,17 @@ func (d *bytesDecoder) decodeStream(s *stream, p unsafe.Pointer) error {
if err != nil { if err != nil {
return err return err
} }
if bytes == nil {
s.reset()
return nil
}
decodedLen := base64.StdEncoding.DecodedLen(len(bytes)) decodedLen := base64.StdEncoding.DecodedLen(len(bytes))
buf := make([]byte, decodedLen) buf := make([]byte, decodedLen)
if _, err := base64.StdEncoding.Decode(buf, bytes); err != nil { if _, err := base64.StdEncoding.Decode(buf, bytes); err != nil {
return err return err
} }
*(*[]byte)(p) = buf *(*[]byte)(p) = buf
s.reset()
return nil return nil
} }
@ -51,7 +56,6 @@ func binaryBytes(s *stream) ([]byte, error) {
case '"': case '"':
literal := s.buf[start:s.cursor] literal := s.buf[start:s.cursor]
s.cursor++ s.cursor++
s.reset()
return literal, nil return literal, nil
case nul: case nul:
if s.read() { if s.read() {
@ -77,7 +81,7 @@ func (d *bytesDecoder) decodeStreamBinary(s *stream) ([]byte, error) {
if err := nullBytes(s); err != nil { if err := nullBytes(s); err != nil {
return nil, err return nil, err
} }
return []byte{}, nil return nil, nil
case nul: case nul:
if s.read() { if s.read() {
continue continue

View File

@ -1,6 +1,8 @@
package json package json
import "unsafe" import (
"unsafe"
)
type intDecoder struct { type intDecoder struct {
op func(unsafe.Pointer, int64) op func(unsafe.Pointer, int64)
@ -74,7 +76,6 @@ func (d *intDecoder) decodeStreamByte(s *stream) ([]byte, error) {
break break
} }
num := s.buf[start:s.cursor] num := s.buf[start:s.cursor]
s.reset()
if len(num) < 2 { if len(num) < 2 {
goto ERROR goto ERROR
} }
@ -94,7 +95,6 @@ func (d *intDecoder) decodeStreamByte(s *stream) ([]byte, error) {
break break
} }
num := s.buf[start:s.cursor] num := s.buf[start:s.cursor]
s.reset()
return num, nil return num, nil
case nul: case nul:
if s.read() { if s.read() {
@ -138,6 +138,7 @@ func (d *intDecoder) decodeStream(s *stream, p unsafe.Pointer) error {
return err return err
} }
d.op(p, d.parseInt(bytes)) d.op(p, d.parseInt(bytes))
s.reset()
return nil return nil
} }

View File

@ -52,7 +52,7 @@ func (d *interfaceDecoder) decodeStream(s *stream, p unsafe.Pointer) error {
).decodeStream(s, ptr); err != nil { ).decodeStream(s, ptr); err != nil {
return err return err
} }
**(**interface{})(unsafe.Pointer(&p)) = v *(*interface{})(p) = v
return nil return nil
case '[': case '[':
var v []interface{} var v []interface{}
@ -66,7 +66,7 @@ func (d *interfaceDecoder) decodeStream(s *stream, p unsafe.Pointer) error {
).decodeStream(s, ptr); err != nil { ).decodeStream(s, ptr); err != nil {
return err return err
} }
**(**interface{})(unsafe.Pointer(&p)) = v *(*interface{})(p) = v
return nil return nil
case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
return d.numDecoder(s).decodeStream(s, p) return d.numDecoder(s).decodeStream(s, p)
@ -82,7 +82,7 @@ func (d *interfaceDecoder) decodeStream(s *stream, p unsafe.Pointer) error {
case '"': case '"':
literal := s.buf[start:s.cursor] literal := s.buf[start:s.cursor]
s.cursor++ s.cursor++
**(**interface{})(unsafe.Pointer(&p)) = *(*string)(unsafe.Pointer(&literal)) *(*interface{})(p) = string(literal)
return nil return nil
case nul: case nul:
if s.read() { if s.read() {
@ -109,7 +109,7 @@ func (d *interfaceDecoder) decodeStream(s *stream, p unsafe.Pointer) error {
if err := nullBytes(s); err != nil { if err := nullBytes(s); err != nil {
return err return err
} }
**(**interface{})(unsafe.Pointer(&p)) = nil *(*interface{})(p) = nil
return nil return nil
case nul: case nul:
if s.read() { if s.read() {

View File

@ -64,8 +64,8 @@ func (d *mapDecoder) decodeStream(s *stream, p unsafe.Pointer) error {
s.skipWhiteSpace() s.skipWhiteSpace()
mapValue := makemap(d.mapType, 0) mapValue := makemap(d.mapType, 0)
if s.buf[s.cursor+1] == '}' { if s.buf[s.cursor+1] == '}' {
**(**unsafe.Pointer)(unsafe.Pointer(&p)) = mapValue *(*unsafe.Pointer)(p) = mapValue
s.cursor++ s.cursor += 2
return nil return nil
} }
for { for {
@ -82,9 +82,6 @@ func (d *mapDecoder) decodeStream(s *stream, p unsafe.Pointer) error {
return errExpected("colon after object key", s.totalOffset()) return errExpected("colon after object key", s.totalOffset())
} }
s.cursor++ s.cursor++
if s.end() {
return errUnexpectedEndOfJSON("map", s.totalOffset())
}
var value interface{} var value interface{}
if err := d.setValueStream(s, &value); err != nil { if err := d.setValueStream(s, &value); err != nil {
return err return err

View File

@ -25,8 +25,8 @@ func (d *numberDecoder) decodeStream(s *stream, p unsafe.Pointer) error {
if err != nil { if err != nil {
return err return err
} }
str := *(*string)(unsafe.Pointer(&bytes)) d.op(p, Number(string(bytes)))
d.op(p, Number(str)) s.reset()
return nil return nil
} }

View File

@ -74,7 +74,7 @@ func (d *sliceDecoder) decodeStream(s *stream, p unsafe.Pointer) error {
s.cursor++ s.cursor++
s.skipWhiteSpace() s.skipWhiteSpace()
if s.char() == ']' { if s.char() == ']' {
**(**sliceHeader)(unsafe.Pointer(&p)) = sliceHeader{ *(*sliceHeader)(p) = sliceHeader{
data: newArray(d.elemType, 0), data: newArray(d.elemType, 0),
len: 0, len: 0,
cap: 0, cap: 0,
@ -116,7 +116,7 @@ func (d *sliceDecoder) decodeStream(s *stream, p unsafe.Pointer) error {
len: slice.len, len: slice.len,
cap: slice.cap, cap: slice.cap,
}) })
**(**sliceHeader)(unsafe.Pointer(&p)) = dst *(*sliceHeader)(p) = dst
d.releaseSlice(slice) d.releaseSlice(slice)
s.cursor++ s.cursor++
return nil return nil

View File

@ -6,20 +6,29 @@ import (
) )
const ( const (
readChunkSize = 512 initBufSize = 512
) )
type stream struct { type stream struct {
buf []byte buf []byte
bufSize int64
length int64 length int64
r io.Reader r io.Reader
offset int64 offset int64
cursor int64 cursor int64
readPos int64
allRead bool allRead bool
useNumber bool useNumber bool
disallowUnknownFields bool disallowUnknownFields bool
} }
func newStream(r io.Reader) *stream {
return &stream{
r: r,
bufSize: initBufSize,
}
}
func (s *stream) buffered() io.Reader { func (s *stream) buffered() io.Reader {
return bytes.NewReader(s.buf[s.cursor:]) return bytes.NewReader(s.buf[s.cursor:])
} }
@ -36,59 +45,33 @@ func (s *stream) char() byte {
return s.buf[s.cursor] return s.buf[s.cursor]
} }
func (s *stream) end() bool {
return s.allRead && s.length <= s.cursor
}
func (s *stream) progressN(n int64) bool {
if s.cursor+n < s.length-1 || s.read() {
s.cursor += n
return true
}
s.cursor = s.length
return false
}
func (s *stream) reset() { func (s *stream) reset() {
s.offset += s.cursor
s.buf = s.buf[s.cursor:] s.buf = s.buf[s.cursor:]
s.length -= s.cursor
s.cursor = 0 s.cursor = 0
s.length = int64(len(s.buf))
}
func (s *stream) readBuf() []byte {
s.bufSize *= 2
remainBuf := s.buf
s.buf = make([]byte, s.bufSize)
copy(s.buf, remainBuf)
return s.buf[s.cursor:]
} }
func (s *stream) read() bool { func (s *stream) read() bool {
if s.allRead { if s.allRead {
return false return false
} }
buf := make([]byte, readChunkSize) buf := s.readBuf()
n, err := s.r.Read(buf) last := len(buf) - 1
if err != nil && err != io.EOF { buf[last] = nul
return false n, err := s.r.Read(buf[:last])
} s.length = s.cursor + int64(n)
if n < readChunkSize || err == io.EOF { if n < last || err == io.EOF {
s.allRead = true s.allRead = true
} } else if err != nil {
// extend buffer (2) is protect ( s.cursor++ x2 )
// e.g.) decodeEscapeString
const extendBufLength = int64(2)
totalSize := s.length + int64(n) + extendBufLength
if totalSize > readChunkSize {
newBuf := make([]byte, totalSize)
copy(newBuf, s.buf)
copy(newBuf[s.length:], buf)
s.buf = newBuf
s.length = totalSize - extendBufLength
} else if s.length > 0 {
copy(buf[s.length:], buf)
copy(buf, s.buf[:s.length])
s.buf = buf
s.length = totalSize - extendBufLength
} else {
s.buf = buf
s.length = totalSize - extendBufLength
}
s.offset += s.cursor
if n == 0 {
return false return false
} }
return true return true

View File

@ -34,7 +34,8 @@ func (d *stringDecoder) decodeStream(s *stream, p unsafe.Pointer) error {
if err != nil { if err != nil {
return err return err
} }
**(**string)(unsafe.Pointer(&p)) = *(*string)(unsafe.Pointer(&bytes)) *(*string)(p) = string(bytes)
s.reset()
return nil return nil
} }
@ -160,7 +161,6 @@ func stringBytes(s *stream) ([]byte, error) {
case '"': case '"':
literal := s.buf[start:s.cursor] literal := s.buf[start:s.cursor]
s.cursor++ s.cursor++
s.reset()
return literal, nil return literal, nil
case nul: case nul:
if s.read() { if s.read() {

View File

@ -53,11 +53,10 @@ func (d *structDecoder) decodeStream(s *stream, p unsafe.Pointer) error {
} }
s.cursor++ s.cursor++
if s.char() == nul { if s.char() == nul {
s.read() if !s.read() {
}
if s.end() {
return errExpected("object value after colon", s.totalOffset()) return errExpected("object value after colon", s.totalOffset())
} }
}
k := *(*string)(unsafe.Pointer(&key)) k := *(*string)(unsafe.Pointer(&key))
field, exists := d.fieldMap[k] field, exists := d.fieldMap[k]
if exists { if exists {

View File

@ -36,13 +36,15 @@ func (d *unmarshalJSONDecoder) decodeStream(s *stream, p unsafe.Pointer) error {
return err return err
} }
src := s.buf[start:s.cursor] src := s.buf[start:s.cursor]
dst := make([]byte, len(src))
copy(dst, src)
if d.isDoublePointer { if d.isDoublePointer {
newptr := unsafe_New(d.typ.Elem()) newptr := unsafe_New(d.typ.Elem())
v := *(*interface{})(unsafe.Pointer(&interfaceHeader{ v := *(*interface{})(unsafe.Pointer(&interfaceHeader{
typ: d.typ, typ: d.typ,
ptr: newptr, ptr: newptr,
})) }))
if err := v.(Unmarshaler).UnmarshalJSON(src); err != nil { if err := v.(Unmarshaler).UnmarshalJSON(dst); err != nil {
d.annotateError(s.cursor, err) d.annotateError(s.cursor, err)
return err return err
} }
@ -52,7 +54,7 @@ func (d *unmarshalJSONDecoder) decodeStream(s *stream, p unsafe.Pointer) error {
typ: d.typ, typ: d.typ,
ptr: p, ptr: p,
})) }))
if err := v.(Unmarshaler).UnmarshalJSON(src); err != nil { if err := v.(Unmarshaler).UnmarshalJSON(dst); err != nil {
d.annotateError(s.cursor, err) d.annotateError(s.cursor, err)
return err return err
} }

View File

@ -39,15 +39,18 @@ func (d *unmarshalTextDecoder) decodeStream(s *stream, p unsafe.Pointer) error {
return err return err
} }
src := s.buf[start:s.cursor] src := s.buf[start:s.cursor]
if s, ok := unquoteBytes(src); ok { dst := make([]byte, len(src))
src = s copy(dst, src)
if b, ok := unquoteBytes(dst); ok {
dst = b
} }
newptr := unsafe_New(d.typ.Elem()) newptr := unsafe_New(d.typ.Elem())
v := *(*interface{})(unsafe.Pointer(&interfaceHeader{ v := *(*interface{})(unsafe.Pointer(&interfaceHeader{
typ: d.typ, typ: d.typ,
ptr: newptr, ptr: newptr,
})) }))
if err := v.(encoding.TextUnmarshaler).UnmarshalText(src); err != nil { if err := v.(encoding.TextUnmarshaler).UnmarshalText(dst); err != nil {
d.annotateError(s.cursor, err) d.annotateError(s.cursor, err)
return err return err
} }

View File

@ -23,25 +23,11 @@ func (d *wrappedStringDecoder) decodeStream(s *stream, p unsafe.Pointer) error {
if err != nil { if err != nil {
return err return err
} }
b := make([]byte, len(bytes)+1)
// save current state copy(b, bytes)
buf := s.buf if _, err := d.dec.decode(b, 0, p); err != nil {
length := s.length return err
cursor := s.cursor
// set content in string to stream
bytes = append(bytes, nul)
s.buf = bytes
s.cursor = 0
s.length = int64(len(bytes))
if err := d.dec.decodeStream(s, p); err != nil {
return nil
} }
// restore state
s.buf = buf
s.length = length
s.cursor = cursor
return nil return nil
} }