Add stream decoder

This commit is contained in:
Masaaki Goshima 2020-05-24 21:31:10 +09:00
parent 4acc22e0fe
commit b4b79620aa
6 changed files with 448 additions and 263 deletions

334
decode.go
View File

@ -1,35 +1,26 @@
package json
import (
"bytes"
"encoding"
"fmt"
"io"
"reflect"
"strings"
"sync"
"unsafe"
)
// A Token holds a value of one of these types:
//
// Delim, for the four JSON delimiters [ ] { }
// bool, for JSON booleans
// float64, for JSON numbers
// Number, for JSON numbers
// string, for JSON string literals
// nil, for JSON null
//
type Token interface{}
type Delim rune
func (d Delim) String() string {
return string(d)
}
type decoder interface {
decode([]byte, int64, uintptr) (int64, error)
}
type Decoder struct {
r io.Reader
buffered func() io.Reader
s *stream
}
type decoderMap struct {
@ -62,13 +53,13 @@ func init() {
// The decoder introduces its own buffering and may
// read data from r beyond the JSON values requested.
func NewDecoder(r io.Reader) *Decoder {
return &Decoder{r: r}
return &Decoder{s: &stream{r: r}}
}
// Buffered returns a reader of the data remaining in the Decoder's
// buffer. The reader is valid until the next call to Decode.
func (d *Decoder) Buffered() io.Reader {
return d.buffered()
return d.s.buffered()
}
func (d *Decoder) validateType(typ *rtype, p uintptr) error {
@ -116,6 +107,21 @@ func (d *Decoder) decodeForUnmarshalNoEscape(src []byte, v interface{}) error {
return d.decode(src, header)
}
func (d *Decoder) prepareForDecode() error {
s := d.s
for ; s.cursor < s.length || s.read(); s.cursor++ {
switch s.char() {
case ' ', '\t', '\r', '\n':
continue
case ',', ':':
s.cursor++
return nil
}
break
}
return nil
}
// Decode reads the next JSON-encoded value from its
// input and stores it in the value pointed to by v.
//
@ -142,251 +148,61 @@ func (d *Decoder) Decode(v interface{}) error {
cachedDecoder.set(typeptr, compiledDec)
dec = compiledDec
}
for {
buf := make([]byte, 1024)
n, err := d.r.Read(buf)
if n == 0 || err == io.EOF {
return nil
}
if err != nil {
return err
}
cursor, err := dec.decode(buf[:n], 0, ptr)
if err != nil {
return err
}
d.buffered = func() io.Reader {
return bytes.NewReader(buf[cursor:])
}
if err := d.prepareForDecode(); err != nil {
return err
}
s := d.s
cursor, err := dec.decode(s.buf[s.cursor:], 0, ptr)
s.cursor += cursor
fmt.Println("cursor = ", cursor, "next buf = ", string(s.buf[s.cursor:]))
if err != nil {
return err
}
return nil
}
func (d *Decoder) compileHead(typ *rtype) (decoder, error) {
if typ.Implements(unmarshalJSONType) {
return newUnmarshalJSONDecoder(typ), nil
} else if typ.Implements(unmarshalTextType) {
return newUnmarshalTextDecoder(typ), nil
}
return d.compile(typ.Elem())
}
func (d *Decoder) compile(typ *rtype) (decoder, error) {
if typ.Implements(unmarshalJSONType) {
return newUnmarshalJSONDecoder(typ), nil
} else if typ.Implements(unmarshalTextType) {
return newUnmarshalTextDecoder(typ), nil
}
switch typ.Kind() {
case reflect.Ptr:
return d.compilePtr(typ)
case reflect.Struct:
return d.compileStruct(typ)
case reflect.Slice:
return d.compileSlice(typ)
case reflect.Array:
return d.compileArray(typ)
case reflect.Map:
return d.compileMap(typ)
case reflect.Interface:
return d.compileInterface(typ)
case reflect.Int:
return d.compileInt()
case reflect.Int8:
return d.compileInt8()
case reflect.Int16:
return d.compileInt16()
case reflect.Int32:
return d.compileInt32()
case reflect.Int64:
return d.compileInt64()
case reflect.Uint:
return d.compileUint()
case reflect.Uint8:
return d.compileUint8()
case reflect.Uint16:
return d.compileUint16()
case reflect.Uint32:
return d.compileUint32()
case reflect.Uint64:
return d.compileUint64()
case reflect.String:
return d.compileString()
case reflect.Bool:
return d.compileBool()
case reflect.Float32:
return d.compileFloat32()
case reflect.Float64:
return d.compileFloat64()
}
return nil, &UnsupportedTypeError{Type: rtype2type(typ)}
}
func (d *Decoder) compilePtr(typ *rtype) (decoder, error) {
dec, err := d.compile(typ.Elem())
if err != nil {
return nil, err
}
return newPtrDecoder(dec, typ.Elem()), nil
}
func (d *Decoder) compileInt() (decoder, error) {
return newIntDecoder(func(p uintptr, v int64) {
*(*int)(unsafe.Pointer(p)) = int(v)
}), nil
}
func (d *Decoder) compileInt8() (decoder, error) {
return newIntDecoder(func(p uintptr, v int64) {
*(*int8)(unsafe.Pointer(p)) = int8(v)
}), nil
}
func (d *Decoder) compileInt16() (decoder, error) {
return newIntDecoder(func(p uintptr, v int64) {
*(*int16)(unsafe.Pointer(p)) = int16(v)
}), nil
}
func (d *Decoder) compileInt32() (decoder, error) {
return newIntDecoder(func(p uintptr, v int64) {
*(*int32)(unsafe.Pointer(p)) = int32(v)
}), nil
}
func (d *Decoder) compileInt64() (decoder, error) {
return newIntDecoder(func(p uintptr, v int64) {
*(*int64)(unsafe.Pointer(p)) = v
}), nil
}
func (d *Decoder) compileUint() (decoder, error) {
return newUintDecoder(func(p uintptr, v uint64) {
*(*uint)(unsafe.Pointer(p)) = uint(v)
}), nil
}
func (d *Decoder) compileUint8() (decoder, error) {
return newUintDecoder(func(p uintptr, v uint64) {
*(*uint8)(unsafe.Pointer(p)) = uint8(v)
}), nil
}
func (d *Decoder) compileUint16() (decoder, error) {
return newUintDecoder(func(p uintptr, v uint64) {
*(*uint16)(unsafe.Pointer(p)) = uint16(v)
}), nil
}
func (d *Decoder) compileUint32() (decoder, error) {
return newUintDecoder(func(p uintptr, v uint64) {
*(*uint32)(unsafe.Pointer(p)) = uint32(v)
}), nil
}
func (d *Decoder) compileUint64() (decoder, error) {
return newUintDecoder(func(p uintptr, v uint64) {
*(*uint64)(unsafe.Pointer(p)) = v
}), nil
}
func (d *Decoder) compileFloat32() (decoder, error) {
return newFloatDecoder(func(p uintptr, v float64) {
*(*float32)(unsafe.Pointer(p)) = float32(v)
}), nil
}
func (d *Decoder) compileFloat64() (decoder, error) {
return newFloatDecoder(func(p uintptr, v float64) {
*(*float64)(unsafe.Pointer(p)) = v
}), nil
}
func (d *Decoder) compileString() (decoder, error) {
return newStringDecoder(), nil
}
func (d *Decoder) compileBool() (decoder, error) {
return newBoolDecoder(), nil
}
func (d *Decoder) compileSlice(typ *rtype) (decoder, error) {
elem := typ.Elem()
decoder, err := d.compile(elem)
if err != nil {
return nil, err
}
return newSliceDecoder(decoder, elem, elem.Size()), nil
}
func (d *Decoder) compileArray(typ *rtype) (decoder, error) {
elem := typ.Elem()
decoder, err := d.compile(elem)
if err != nil {
return nil, err
}
return newArrayDecoder(decoder, elem, typ.Len()), nil
}
func (d *Decoder) compileMap(typ *rtype) (decoder, error) {
keyDec, err := d.compile(typ.Key())
if err != nil {
return nil, err
}
valueDec, err := d.compile(typ.Elem())
if err != nil {
return nil, err
}
return newMapDecoder(typ, keyDec, valueDec), nil
}
func (d *Decoder) compileInterface(typ *rtype) (decoder, error) {
return newInterfaceDecoder(typ), nil
}
func (d *Decoder) getTag(field reflect.StructField) string {
return field.Tag.Get("json")
}
func (d *Decoder) isIgnoredStructField(field reflect.StructField) bool {
if field.PkgPath != "" && !field.Anonymous {
// private field
return true
}
tag := d.getTag(field)
if tag == "-" {
return true
}
return false
}
func (d *Decoder) compileStruct(typ *rtype) (decoder, error) {
fieldNum := typ.NumField()
fieldMap := map[string]*structFieldSet{}
for i := 0; i < fieldNum; i++ {
field := typ.Field(i)
if d.isIgnoredStructField(field) {
func (d *Decoder) More() bool {
s := d.s
for ; s.cursor < s.length || s.read(); s.cursor++ {
switch s.char() {
case ' ', '\n', '\r', '\t':
continue
case '}', ']':
return false
}
keyName := field.Name
tag := d.getTag(field)
opts := strings.Split(tag, ",")
if len(opts) > 0 {
if opts[0] != "" {
keyName = opts[0]
}
}
dec, err := d.compile(type2rtype(field.Type))
if err != nil {
return nil, err
}
fieldSet := &structFieldSet{dec: dec, offset: field.Offset}
fieldMap[field.Name] = fieldSet
fieldMap[keyName] = fieldSet
fieldMap[strings.ToLower(keyName)] = fieldSet
break
}
return newStructDecoder(fieldMap), nil
return true
}
func (d *Decoder) Token() (Token, error) {
s := d.s
for ; s.cursor < s.length || s.read(); s.cursor++ {
switch s.char() {
case ' ', '\n', '\r', '\t':
continue
case '{':
s.cursor++
return Delim('{'), nil
case '[':
s.cursor++
return Delim('['), nil
case '}':
s.cursor++
return Delim('}'), nil
case ']':
s.cursor++
return Delim(']'), nil
case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
case '"':
case 't':
case 'f':
case 'n':
default:
return nil, errInvalidCharacter(s.char(), "token", s.totalOffset())
}
}
return nil, io.EOF
}
// DisallowUnknownFields causes the Decoder to return an error when the destination
@ -400,14 +216,6 @@ func (d *Decoder) InputOffset() int64 {
return 0
}
func (d *Decoder) More() bool {
return false
}
func (d *Decoder) Token() (Token, error) {
return nil, nil
}
// UseNumber causes the Decoder to unmarshal a number into an interface{} as a
// Number instead of as a float64.
func (d *Decoder) UseNumber() {

233
decode_compile.go Normal file
View File

@ -0,0 +1,233 @@
package json
import (
"reflect"
"strings"
"unsafe"
)
func (d *Decoder) compileHead(typ *rtype) (decoder, error) {
if typ.Implements(unmarshalJSONType) {
return newUnmarshalJSONDecoder(typ), nil
} else if typ.Implements(unmarshalTextType) {
return newUnmarshalTextDecoder(typ), nil
}
return d.compile(typ.Elem())
}
func (d *Decoder) compile(typ *rtype) (decoder, error) {
if typ.Implements(unmarshalJSONType) {
return newUnmarshalJSONDecoder(typ), nil
} else if typ.Implements(unmarshalTextType) {
return newUnmarshalTextDecoder(typ), nil
}
switch typ.Kind() {
case reflect.Ptr:
return d.compilePtr(typ)
case reflect.Struct:
return d.compileStruct(typ)
case reflect.Slice:
return d.compileSlice(typ)
case reflect.Array:
return d.compileArray(typ)
case reflect.Map:
return d.compileMap(typ)
case reflect.Interface:
return d.compileInterface(typ)
case reflect.Int:
return d.compileInt()
case reflect.Int8:
return d.compileInt8()
case reflect.Int16:
return d.compileInt16()
case reflect.Int32:
return d.compileInt32()
case reflect.Int64:
return d.compileInt64()
case reflect.Uint:
return d.compileUint()
case reflect.Uint8:
return d.compileUint8()
case reflect.Uint16:
return d.compileUint16()
case reflect.Uint32:
return d.compileUint32()
case reflect.Uint64:
return d.compileUint64()
case reflect.String:
return d.compileString()
case reflect.Bool:
return d.compileBool()
case reflect.Float32:
return d.compileFloat32()
case reflect.Float64:
return d.compileFloat64()
}
return nil, &UnsupportedTypeError{Type: rtype2type(typ)}
}
func (d *Decoder) compilePtr(typ *rtype) (decoder, error) {
dec, err := d.compile(typ.Elem())
if err != nil {
return nil, err
}
return newPtrDecoder(dec, typ.Elem()), nil
}
func (d *Decoder) compileInt() (decoder, error) {
return newIntDecoder(func(p uintptr, v int64) {
*(*int)(unsafe.Pointer(p)) = int(v)
}), nil
}
func (d *Decoder) compileInt8() (decoder, error) {
return newIntDecoder(func(p uintptr, v int64) {
*(*int8)(unsafe.Pointer(p)) = int8(v)
}), nil
}
func (d *Decoder) compileInt16() (decoder, error) {
return newIntDecoder(func(p uintptr, v int64) {
*(*int16)(unsafe.Pointer(p)) = int16(v)
}), nil
}
func (d *Decoder) compileInt32() (decoder, error) {
return newIntDecoder(func(p uintptr, v int64) {
*(*int32)(unsafe.Pointer(p)) = int32(v)
}), nil
}
func (d *Decoder) compileInt64() (decoder, error) {
return newIntDecoder(func(p uintptr, v int64) {
*(*int64)(unsafe.Pointer(p)) = v
}), nil
}
func (d *Decoder) compileUint() (decoder, error) {
return newUintDecoder(func(p uintptr, v uint64) {
*(*uint)(unsafe.Pointer(p)) = uint(v)
}), nil
}
func (d *Decoder) compileUint8() (decoder, error) {
return newUintDecoder(func(p uintptr, v uint64) {
*(*uint8)(unsafe.Pointer(p)) = uint8(v)
}), nil
}
func (d *Decoder) compileUint16() (decoder, error) {
return newUintDecoder(func(p uintptr, v uint64) {
*(*uint16)(unsafe.Pointer(p)) = uint16(v)
}), nil
}
func (d *Decoder) compileUint32() (decoder, error) {
return newUintDecoder(func(p uintptr, v uint64) {
*(*uint32)(unsafe.Pointer(p)) = uint32(v)
}), nil
}
func (d *Decoder) compileUint64() (decoder, error) {
return newUintDecoder(func(p uintptr, v uint64) {
*(*uint64)(unsafe.Pointer(p)) = v
}), nil
}
func (d *Decoder) compileFloat32() (decoder, error) {
return newFloatDecoder(func(p uintptr, v float64) {
*(*float32)(unsafe.Pointer(p)) = float32(v)
}), nil
}
func (d *Decoder) compileFloat64() (decoder, error) {
return newFloatDecoder(func(p uintptr, v float64) {
*(*float64)(unsafe.Pointer(p)) = v
}), nil
}
func (d *Decoder) compileString() (decoder, error) {
return newStringDecoder(), nil
}
func (d *Decoder) compileBool() (decoder, error) {
return newBoolDecoder(), nil
}
func (d *Decoder) compileSlice(typ *rtype) (decoder, error) {
elem := typ.Elem()
decoder, err := d.compile(elem)
if err != nil {
return nil, err
}
return newSliceDecoder(decoder, elem, elem.Size()), nil
}
func (d *Decoder) compileArray(typ *rtype) (decoder, error) {
elem := typ.Elem()
decoder, err := d.compile(elem)
if err != nil {
return nil, err
}
return newArrayDecoder(decoder, elem, typ.Len()), nil
}
func (d *Decoder) compileMap(typ *rtype) (decoder, error) {
keyDec, err := d.compile(typ.Key())
if err != nil {
return nil, err
}
valueDec, err := d.compile(typ.Elem())
if err != nil {
return nil, err
}
return newMapDecoder(typ, keyDec, valueDec), nil
}
func (d *Decoder) compileInterface(typ *rtype) (decoder, error) {
return newInterfaceDecoder(typ), nil
}
func (d *Decoder) getTag(field reflect.StructField) string {
return field.Tag.Get("json")
}
func (d *Decoder) isIgnoredStructField(field reflect.StructField) bool {
if field.PkgPath != "" && !field.Anonymous {
// private field
return true
}
tag := d.getTag(field)
if tag == "-" {
return true
}
return false
}
func (d *Decoder) compileStruct(typ *rtype) (decoder, error) {
fieldNum := typ.NumField()
fieldMap := map[string]*structFieldSet{}
for i := 0; i < fieldNum; i++ {
field := typ.Field(i)
if d.isIgnoredStructField(field) {
continue
}
keyName := field.Name
tag := d.getTag(field)
opts := strings.Split(tag, ",")
if len(opts) > 0 {
if opts[0] != "" {
keyName = opts[0]
}
}
dec, err := d.compile(type2rtype(field.Type))
if err != nil {
return nil, err
}
fieldSet := &structFieldSet{dec: dec, offset: field.Offset}
fieldMap[field.Name] = fieldSet
fieldMap[keyName] = fieldSet
fieldMap[strings.ToLower(keyName)] = fieldSet
}
return newStructDecoder(fieldMap), nil
}

View File

@ -49,6 +49,43 @@ var (
}
)
func (d *intDecoder) decodeByteStream(s *stream) ([]byte, error) {
for ; s.cursor < s.length || s.read(); s.cursor++ {
switch s.char() {
case ' ', '\n', '\t', '\r':
continue
case '-':
start := s.cursor
s.cursor++
for ; s.cursor < s.length || s.read(); s.cursor++ {
if numTable[s.char()] {
continue
}
break
}
num := s.buf[start:s.cursor]
if len(num) < 2 {
return nil, errInvalidCharacter(s.char(), "number(integer)", s.totalOffset())
}
return num, nil
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
start := s.cursor
s.cursor++
for ; s.cursor < s.length || s.read(); s.cursor++ {
if numTable[s.char()] {
continue
}
break
}
num := s.buf[start:s.cursor]
return num, nil
default:
return nil, errInvalidCharacter(s.char(), "number(integer)", s.totalOffset())
}
}
return nil, errUnexpectedEndOfJSON("number(integer)", s.totalOffset())
}
func (d *intDecoder) decodeByte(buf []byte, cursor int64) ([]byte, int64, error) {
for {
switch buf[cursor] {
@ -72,6 +109,15 @@ func (d *intDecoder) decodeByte(buf []byte, cursor int64) ([]byte, int64, error)
return nil, 0, errUnexpectedEndOfJSON("number(integer)", cursor)
}
func (d *intDecoder) decodeStream(s *stream, p uintptr) error {
bytes, err := d.decodeByteStream(s)
if err != nil {
return err
}
d.op(p, d.parseInt(bytes))
return nil
}
func (d *intDecoder) decode(buf []byte, cursor int64, p uintptr) (int64, error) {
bytes, c, err := d.decodeByte(buf, cursor)
if err != nil {

49
decode_stream.go Normal file
View File

@ -0,0 +1,49 @@
package json
import (
"bytes"
"io"
)
const (
readChunkSize = 1024
)
type stream struct {
buf []byte
length int64
r io.Reader
decodedPos int64
offset int64
cursor int64
}
func (s *stream) buffered() io.Reader {
return bytes.NewReader(s.buf[s.cursor:])
}
func (s *stream) totalOffset() int64 {
return s.offset + s.cursor
}
func (s *stream) char() byte {
return s.buf[s.cursor]
}
func (s *stream) read() bool {
buf := make([]byte, readChunkSize)
n, err := s.r.Read(buf)
if n == 0 || err == io.EOF {
return false
}
remain := s.length - s.decodedPos
newBuf := make([]byte, remain+int64(n))
copy(newBuf, s.buf[s.decodedPos:])
copy(newBuf[remain:], buf)
s.buf = newBuf
s.length = int64(len(newBuf))
s.offset += s.decodedPos
s.cursor = 0
s.decodedPos = 0
return true
}

View File

@ -3,6 +3,7 @@ package json_test
import (
"fmt"
"reflect"
"strings"
"testing"
"github.com/goccy/go-json"
@ -231,3 +232,40 @@ func Test_InvalidUnmarshalError(t *testing.T) {
assertEq(t, "invalid unmarshal error", "json: Unmarshal(non-pointer int)", err)
})
}
func Test_DecodeStream(t *testing.T) {
const stream = `
[
{"Name": "Ed", "Text": "Knock knock."},
{"Name": "Sam", "Text": "Who's there?"},
{"Name": "Ed", "Text": "Go fmt."},
{"Name": "Sam", "Text": "Go fmt who?"},
{"Name": "Ed", "Text": "Go fmt yourself!"}
]
`
type Message struct {
Name, Text string
}
dec := json.NewDecoder(strings.NewReader(stream))
tk, err := dec.Token()
assertErr(t, err)
assertEq(t, "[", fmt.Sprint(tk), "[")
elem := 0
// while the array contains values
for dec.More() {
var m Message
// decode an array value (Message)
assertErr(t, dec.Decode(&m))
if m.Name == "" || m.Text == "" {
t.Fatal("failed to assign value to struct field")
}
elem++
}
assertEq(t, "decode count", elem, 5)
tk, err = dec.Token()
assertErr(t, err)
assertEq(t, "]", fmt.Sprint(tk), "]")
}

11
json.go
View File

@ -264,3 +264,14 @@ func UnmarshalNoEscape(data []byte, v interface{}) error {
var dec Decoder
return dec.decodeForUnmarshalNoEscape(src, v)
}
// A Token holds a value of one of these types:
//
// Delim, for the four JSON delimiters [ ] { }
// bool, for JSON booleans
// float64, for JSON numbers
// Number, for JSON numbers
// string, for JSON string literals
// nil, for JSON null
//
type Token interface{}