From b4b79620aae945ba951ded9e969d90c1e53e93ec Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Sun, 24 May 2020 21:31:10 +0900 Subject: [PATCH 01/13] Add stream decoder --- decode.go | 334 ++++++++++------------------------------------ decode_compile.go | 233 ++++++++++++++++++++++++++++++++ decode_int.go | 46 +++++++ decode_stream.go | 49 +++++++ decode_test.go | 38 ++++++ json.go | 11 ++ 6 files changed, 448 insertions(+), 263 deletions(-) create mode 100644 decode_compile.go create mode 100644 decode_stream.go diff --git a/decode.go b/decode.go index 2dbb0ee..a9d261e 100644 --- a/decode.go +++ b/decode.go @@ -1,35 +1,26 @@ package json import ( - "bytes" "encoding" + "fmt" "io" "reflect" - "strings" "sync" "unsafe" ) -// A Token holds a value of one of these types: -// -// Delim, for the four JSON delimiters [ ] { } -// bool, for JSON booleans -// float64, for JSON numbers -// Number, for JSON numbers -// string, for JSON string literals -// nil, for JSON null -// -type Token interface{} - type Delim rune +func (d Delim) String() string { + return string(d) +} + type decoder interface { decode([]byte, int64, uintptr) (int64, error) } type Decoder struct { - r io.Reader - buffered func() io.Reader + s *stream } type decoderMap struct { @@ -62,13 +53,13 @@ func init() { // The decoder introduces its own buffering and may // read data from r beyond the JSON values requested. func NewDecoder(r io.Reader) *Decoder { - return &Decoder{r: r} + return &Decoder{s: &stream{r: r}} } // Buffered returns a reader of the data remaining in the Decoder's // buffer. The reader is valid until the next call to Decode. func (d *Decoder) Buffered() io.Reader { - return d.buffered() + return d.s.buffered() } func (d *Decoder) validateType(typ *rtype, p uintptr) error { @@ -116,6 +107,21 @@ func (d *Decoder) decodeForUnmarshalNoEscape(src []byte, v interface{}) error { return d.decode(src, header) } +func (d *Decoder) prepareForDecode() error { + s := d.s + for ; s.cursor < s.length || s.read(); s.cursor++ { + switch s.char() { + case ' ', '\t', '\r', '\n': + continue + case ',', ':': + s.cursor++ + return nil + } + break + } + return nil +} + // Decode reads the next JSON-encoded value from its // input and stores it in the value pointed to by v. // @@ -142,251 +148,61 @@ func (d *Decoder) Decode(v interface{}) error { cachedDecoder.set(typeptr, compiledDec) dec = compiledDec } - - for { - buf := make([]byte, 1024) - n, err := d.r.Read(buf) - if n == 0 || err == io.EOF { - return nil - } - if err != nil { - return err - } - cursor, err := dec.decode(buf[:n], 0, ptr) - if err != nil { - return err - } - d.buffered = func() io.Reader { - return bytes.NewReader(buf[cursor:]) - } + if err := d.prepareForDecode(); err != nil { + return err + } + s := d.s + cursor, err := dec.decode(s.buf[s.cursor:], 0, ptr) + s.cursor += cursor + fmt.Println("cursor = ", cursor, "next buf = ", string(s.buf[s.cursor:])) + if err != nil { + return err } return nil } -func (d *Decoder) compileHead(typ *rtype) (decoder, error) { - if typ.Implements(unmarshalJSONType) { - return newUnmarshalJSONDecoder(typ), nil - } else if typ.Implements(unmarshalTextType) { - return newUnmarshalTextDecoder(typ), nil - } - return d.compile(typ.Elem()) -} - -func (d *Decoder) compile(typ *rtype) (decoder, error) { - if typ.Implements(unmarshalJSONType) { - return newUnmarshalJSONDecoder(typ), nil - } else if typ.Implements(unmarshalTextType) { - return newUnmarshalTextDecoder(typ), nil - } - switch typ.Kind() { - case reflect.Ptr: - return d.compilePtr(typ) - case reflect.Struct: - return d.compileStruct(typ) - case reflect.Slice: - return d.compileSlice(typ) - case reflect.Array: - return d.compileArray(typ) - case reflect.Map: - return d.compileMap(typ) - case reflect.Interface: - return d.compileInterface(typ) - case reflect.Int: - return d.compileInt() - case reflect.Int8: - return d.compileInt8() - case reflect.Int16: - return d.compileInt16() - case reflect.Int32: - return d.compileInt32() - case reflect.Int64: - return d.compileInt64() - case reflect.Uint: - return d.compileUint() - case reflect.Uint8: - return d.compileUint8() - case reflect.Uint16: - return d.compileUint16() - case reflect.Uint32: - return d.compileUint32() - case reflect.Uint64: - return d.compileUint64() - case reflect.String: - return d.compileString() - case reflect.Bool: - return d.compileBool() - case reflect.Float32: - return d.compileFloat32() - case reflect.Float64: - return d.compileFloat64() - } - return nil, &UnsupportedTypeError{Type: rtype2type(typ)} -} - -func (d *Decoder) compilePtr(typ *rtype) (decoder, error) { - dec, err := d.compile(typ.Elem()) - if err != nil { - return nil, err - } - return newPtrDecoder(dec, typ.Elem()), nil -} - -func (d *Decoder) compileInt() (decoder, error) { - return newIntDecoder(func(p uintptr, v int64) { - *(*int)(unsafe.Pointer(p)) = int(v) - }), nil -} - -func (d *Decoder) compileInt8() (decoder, error) { - return newIntDecoder(func(p uintptr, v int64) { - *(*int8)(unsafe.Pointer(p)) = int8(v) - }), nil -} - -func (d *Decoder) compileInt16() (decoder, error) { - return newIntDecoder(func(p uintptr, v int64) { - *(*int16)(unsafe.Pointer(p)) = int16(v) - }), nil -} - -func (d *Decoder) compileInt32() (decoder, error) { - return newIntDecoder(func(p uintptr, v int64) { - *(*int32)(unsafe.Pointer(p)) = int32(v) - }), nil -} - -func (d *Decoder) compileInt64() (decoder, error) { - return newIntDecoder(func(p uintptr, v int64) { - *(*int64)(unsafe.Pointer(p)) = v - }), nil -} - -func (d *Decoder) compileUint() (decoder, error) { - return newUintDecoder(func(p uintptr, v uint64) { - *(*uint)(unsafe.Pointer(p)) = uint(v) - }), nil -} - -func (d *Decoder) compileUint8() (decoder, error) { - return newUintDecoder(func(p uintptr, v uint64) { - *(*uint8)(unsafe.Pointer(p)) = uint8(v) - }), nil -} - -func (d *Decoder) compileUint16() (decoder, error) { - return newUintDecoder(func(p uintptr, v uint64) { - *(*uint16)(unsafe.Pointer(p)) = uint16(v) - }), nil -} - -func (d *Decoder) compileUint32() (decoder, error) { - return newUintDecoder(func(p uintptr, v uint64) { - *(*uint32)(unsafe.Pointer(p)) = uint32(v) - }), nil -} - -func (d *Decoder) compileUint64() (decoder, error) { - return newUintDecoder(func(p uintptr, v uint64) { - *(*uint64)(unsafe.Pointer(p)) = v - }), nil -} - -func (d *Decoder) compileFloat32() (decoder, error) { - return newFloatDecoder(func(p uintptr, v float64) { - *(*float32)(unsafe.Pointer(p)) = float32(v) - }), nil -} - -func (d *Decoder) compileFloat64() (decoder, error) { - return newFloatDecoder(func(p uintptr, v float64) { - *(*float64)(unsafe.Pointer(p)) = v - }), nil -} - -func (d *Decoder) compileString() (decoder, error) { - return newStringDecoder(), nil -} - -func (d *Decoder) compileBool() (decoder, error) { - return newBoolDecoder(), nil -} - -func (d *Decoder) compileSlice(typ *rtype) (decoder, error) { - elem := typ.Elem() - decoder, err := d.compile(elem) - if err != nil { - return nil, err - } - return newSliceDecoder(decoder, elem, elem.Size()), nil -} - -func (d *Decoder) compileArray(typ *rtype) (decoder, error) { - elem := typ.Elem() - decoder, err := d.compile(elem) - if err != nil { - return nil, err - } - return newArrayDecoder(decoder, elem, typ.Len()), nil -} - -func (d *Decoder) compileMap(typ *rtype) (decoder, error) { - keyDec, err := d.compile(typ.Key()) - if err != nil { - return nil, err - } - valueDec, err := d.compile(typ.Elem()) - if err != nil { - return nil, err - } - return newMapDecoder(typ, keyDec, valueDec), nil -} - -func (d *Decoder) compileInterface(typ *rtype) (decoder, error) { - return newInterfaceDecoder(typ), nil -} - -func (d *Decoder) getTag(field reflect.StructField) string { - return field.Tag.Get("json") -} - -func (d *Decoder) isIgnoredStructField(field reflect.StructField) bool { - if field.PkgPath != "" && !field.Anonymous { - // private field - return true - } - tag := d.getTag(field) - if tag == "-" { - return true - } - return false -} - -func (d *Decoder) compileStruct(typ *rtype) (decoder, error) { - fieldNum := typ.NumField() - fieldMap := map[string]*structFieldSet{} - for i := 0; i < fieldNum; i++ { - field := typ.Field(i) - if d.isIgnoredStructField(field) { +func (d *Decoder) More() bool { + s := d.s + for ; s.cursor < s.length || s.read(); s.cursor++ { + switch s.char() { + case ' ', '\n', '\r', '\t': continue + case '}', ']': + return false } - keyName := field.Name - tag := d.getTag(field) - opts := strings.Split(tag, ",") - if len(opts) > 0 { - if opts[0] != "" { - keyName = opts[0] - } - } - dec, err := d.compile(type2rtype(field.Type)) - if err != nil { - return nil, err - } - fieldSet := &structFieldSet{dec: dec, offset: field.Offset} - fieldMap[field.Name] = fieldSet - fieldMap[keyName] = fieldSet - fieldMap[strings.ToLower(keyName)] = fieldSet + break } - return newStructDecoder(fieldMap), nil + return true +} + +func (d *Decoder) Token() (Token, error) { + s := d.s + for ; s.cursor < s.length || s.read(); s.cursor++ { + switch s.char() { + case ' ', '\n', '\r', '\t': + continue + case '{': + s.cursor++ + return Delim('{'), nil + case '[': + s.cursor++ + return Delim('['), nil + case '}': + s.cursor++ + return Delim('}'), nil + case ']': + s.cursor++ + return Delim(']'), nil + case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + case '"': + case 't': + case 'f': + case 'n': + default: + return nil, errInvalidCharacter(s.char(), "token", s.totalOffset()) + } + } + return nil, io.EOF } // DisallowUnknownFields causes the Decoder to return an error when the destination @@ -400,14 +216,6 @@ func (d *Decoder) InputOffset() int64 { return 0 } -func (d *Decoder) More() bool { - return false -} - -func (d *Decoder) Token() (Token, error) { - return nil, nil -} - // UseNumber causes the Decoder to unmarshal a number into an interface{} as a // Number instead of as a float64. func (d *Decoder) UseNumber() { diff --git a/decode_compile.go b/decode_compile.go new file mode 100644 index 0000000..d34b485 --- /dev/null +++ b/decode_compile.go @@ -0,0 +1,233 @@ +package json + +import ( + "reflect" + "strings" + "unsafe" +) + +func (d *Decoder) compileHead(typ *rtype) (decoder, error) { + if typ.Implements(unmarshalJSONType) { + return newUnmarshalJSONDecoder(typ), nil + } else if typ.Implements(unmarshalTextType) { + return newUnmarshalTextDecoder(typ), nil + } + return d.compile(typ.Elem()) +} + +func (d *Decoder) compile(typ *rtype) (decoder, error) { + if typ.Implements(unmarshalJSONType) { + return newUnmarshalJSONDecoder(typ), nil + } else if typ.Implements(unmarshalTextType) { + return newUnmarshalTextDecoder(typ), nil + } + switch typ.Kind() { + case reflect.Ptr: + return d.compilePtr(typ) + case reflect.Struct: + return d.compileStruct(typ) + case reflect.Slice: + return d.compileSlice(typ) + case reflect.Array: + return d.compileArray(typ) + case reflect.Map: + return d.compileMap(typ) + case reflect.Interface: + return d.compileInterface(typ) + case reflect.Int: + return d.compileInt() + case reflect.Int8: + return d.compileInt8() + case reflect.Int16: + return d.compileInt16() + case reflect.Int32: + return d.compileInt32() + case reflect.Int64: + return d.compileInt64() + case reflect.Uint: + return d.compileUint() + case reflect.Uint8: + return d.compileUint8() + case reflect.Uint16: + return d.compileUint16() + case reflect.Uint32: + return d.compileUint32() + case reflect.Uint64: + return d.compileUint64() + case reflect.String: + return d.compileString() + case reflect.Bool: + return d.compileBool() + case reflect.Float32: + return d.compileFloat32() + case reflect.Float64: + return d.compileFloat64() + } + return nil, &UnsupportedTypeError{Type: rtype2type(typ)} +} + +func (d *Decoder) compilePtr(typ *rtype) (decoder, error) { + dec, err := d.compile(typ.Elem()) + if err != nil { + return nil, err + } + return newPtrDecoder(dec, typ.Elem()), nil +} + +func (d *Decoder) compileInt() (decoder, error) { + return newIntDecoder(func(p uintptr, v int64) { + *(*int)(unsafe.Pointer(p)) = int(v) + }), nil +} + +func (d *Decoder) compileInt8() (decoder, error) { + return newIntDecoder(func(p uintptr, v int64) { + *(*int8)(unsafe.Pointer(p)) = int8(v) + }), nil +} + +func (d *Decoder) compileInt16() (decoder, error) { + return newIntDecoder(func(p uintptr, v int64) { + *(*int16)(unsafe.Pointer(p)) = int16(v) + }), nil +} + +func (d *Decoder) compileInt32() (decoder, error) { + return newIntDecoder(func(p uintptr, v int64) { + *(*int32)(unsafe.Pointer(p)) = int32(v) + }), nil +} + +func (d *Decoder) compileInt64() (decoder, error) { + return newIntDecoder(func(p uintptr, v int64) { + *(*int64)(unsafe.Pointer(p)) = v + }), nil +} + +func (d *Decoder) compileUint() (decoder, error) { + return newUintDecoder(func(p uintptr, v uint64) { + *(*uint)(unsafe.Pointer(p)) = uint(v) + }), nil +} + +func (d *Decoder) compileUint8() (decoder, error) { + return newUintDecoder(func(p uintptr, v uint64) { + *(*uint8)(unsafe.Pointer(p)) = uint8(v) + }), nil +} + +func (d *Decoder) compileUint16() (decoder, error) { + return newUintDecoder(func(p uintptr, v uint64) { + *(*uint16)(unsafe.Pointer(p)) = uint16(v) + }), nil +} + +func (d *Decoder) compileUint32() (decoder, error) { + return newUintDecoder(func(p uintptr, v uint64) { + *(*uint32)(unsafe.Pointer(p)) = uint32(v) + }), nil +} + +func (d *Decoder) compileUint64() (decoder, error) { + return newUintDecoder(func(p uintptr, v uint64) { + *(*uint64)(unsafe.Pointer(p)) = v + }), nil +} + +func (d *Decoder) compileFloat32() (decoder, error) { + return newFloatDecoder(func(p uintptr, v float64) { + *(*float32)(unsafe.Pointer(p)) = float32(v) + }), nil +} + +func (d *Decoder) compileFloat64() (decoder, error) { + return newFloatDecoder(func(p uintptr, v float64) { + *(*float64)(unsafe.Pointer(p)) = v + }), nil +} + +func (d *Decoder) compileString() (decoder, error) { + return newStringDecoder(), nil +} + +func (d *Decoder) compileBool() (decoder, error) { + return newBoolDecoder(), nil +} + +func (d *Decoder) compileSlice(typ *rtype) (decoder, error) { + elem := typ.Elem() + decoder, err := d.compile(elem) + if err != nil { + return nil, err + } + return newSliceDecoder(decoder, elem, elem.Size()), nil +} + +func (d *Decoder) compileArray(typ *rtype) (decoder, error) { + elem := typ.Elem() + decoder, err := d.compile(elem) + if err != nil { + return nil, err + } + return newArrayDecoder(decoder, elem, typ.Len()), nil +} + +func (d *Decoder) compileMap(typ *rtype) (decoder, error) { + keyDec, err := d.compile(typ.Key()) + if err != nil { + return nil, err + } + valueDec, err := d.compile(typ.Elem()) + if err != nil { + return nil, err + } + return newMapDecoder(typ, keyDec, valueDec), nil +} + +func (d *Decoder) compileInterface(typ *rtype) (decoder, error) { + return newInterfaceDecoder(typ), nil +} + +func (d *Decoder) getTag(field reflect.StructField) string { + return field.Tag.Get("json") +} + +func (d *Decoder) isIgnoredStructField(field reflect.StructField) bool { + if field.PkgPath != "" && !field.Anonymous { + // private field + return true + } + tag := d.getTag(field) + if tag == "-" { + return true + } + return false +} + +func (d *Decoder) compileStruct(typ *rtype) (decoder, error) { + fieldNum := typ.NumField() + fieldMap := map[string]*structFieldSet{} + for i := 0; i < fieldNum; i++ { + field := typ.Field(i) + if d.isIgnoredStructField(field) { + continue + } + keyName := field.Name + tag := d.getTag(field) + opts := strings.Split(tag, ",") + if len(opts) > 0 { + if opts[0] != "" { + keyName = opts[0] + } + } + dec, err := d.compile(type2rtype(field.Type)) + if err != nil { + return nil, err + } + fieldSet := &structFieldSet{dec: dec, offset: field.Offset} + fieldMap[field.Name] = fieldSet + fieldMap[keyName] = fieldSet + fieldMap[strings.ToLower(keyName)] = fieldSet + } + return newStructDecoder(fieldMap), nil +} diff --git a/decode_int.go b/decode_int.go index af96458..d512cb8 100644 --- a/decode_int.go +++ b/decode_int.go @@ -49,6 +49,43 @@ var ( } ) +func (d *intDecoder) decodeByteStream(s *stream) ([]byte, error) { + for ; s.cursor < s.length || s.read(); s.cursor++ { + switch s.char() { + case ' ', '\n', '\t', '\r': + continue + case '-': + start := s.cursor + s.cursor++ + for ; s.cursor < s.length || s.read(); s.cursor++ { + if numTable[s.char()] { + continue + } + break + } + num := s.buf[start:s.cursor] + if len(num) < 2 { + return nil, errInvalidCharacter(s.char(), "number(integer)", s.totalOffset()) + } + return num, nil + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + start := s.cursor + s.cursor++ + for ; s.cursor < s.length || s.read(); s.cursor++ { + if numTable[s.char()] { + continue + } + break + } + num := s.buf[start:s.cursor] + return num, nil + default: + return nil, errInvalidCharacter(s.char(), "number(integer)", s.totalOffset()) + } + } + return nil, errUnexpectedEndOfJSON("number(integer)", s.totalOffset()) +} + func (d *intDecoder) decodeByte(buf []byte, cursor int64) ([]byte, int64, error) { for { switch buf[cursor] { @@ -72,6 +109,15 @@ func (d *intDecoder) decodeByte(buf []byte, cursor int64) ([]byte, int64, error) return nil, 0, errUnexpectedEndOfJSON("number(integer)", cursor) } +func (d *intDecoder) decodeStream(s *stream, p uintptr) error { + bytes, err := d.decodeByteStream(s) + if err != nil { + return err + } + d.op(p, d.parseInt(bytes)) + return nil +} + func (d *intDecoder) decode(buf []byte, cursor int64, p uintptr) (int64, error) { bytes, c, err := d.decodeByte(buf, cursor) if err != nil { diff --git a/decode_stream.go b/decode_stream.go new file mode 100644 index 0000000..37e48e5 --- /dev/null +++ b/decode_stream.go @@ -0,0 +1,49 @@ +package json + +import ( + "bytes" + "io" +) + +const ( + readChunkSize = 1024 +) + +type stream struct { + buf []byte + length int64 + r io.Reader + decodedPos int64 + offset int64 + cursor int64 +} + +func (s *stream) buffered() io.Reader { + return bytes.NewReader(s.buf[s.cursor:]) +} + +func (s *stream) totalOffset() int64 { + return s.offset + s.cursor +} + +func (s *stream) char() byte { + return s.buf[s.cursor] +} + +func (s *stream) read() bool { + buf := make([]byte, readChunkSize) + n, err := s.r.Read(buf) + if n == 0 || err == io.EOF { + return false + } + remain := s.length - s.decodedPos + newBuf := make([]byte, remain+int64(n)) + copy(newBuf, s.buf[s.decodedPos:]) + copy(newBuf[remain:], buf) + s.buf = newBuf + s.length = int64(len(newBuf)) + s.offset += s.decodedPos + s.cursor = 0 + s.decodedPos = 0 + return true +} diff --git a/decode_test.go b/decode_test.go index c53569c..58b4cc9 100644 --- a/decode_test.go +++ b/decode_test.go @@ -3,6 +3,7 @@ package json_test import ( "fmt" "reflect" + "strings" "testing" "github.com/goccy/go-json" @@ -231,3 +232,40 @@ func Test_InvalidUnmarshalError(t *testing.T) { assertEq(t, "invalid unmarshal error", "json: Unmarshal(non-pointer int)", err) }) } + +func Test_DecodeStream(t *testing.T) { + const stream = ` + [ + {"Name": "Ed", "Text": "Knock knock."}, + {"Name": "Sam", "Text": "Who's there?"}, + {"Name": "Ed", "Text": "Go fmt."}, + {"Name": "Sam", "Text": "Go fmt who?"}, + {"Name": "Ed", "Text": "Go fmt yourself!"} + ] +` + type Message struct { + Name, Text string + } + dec := json.NewDecoder(strings.NewReader(stream)) + + tk, err := dec.Token() + assertErr(t, err) + assertEq(t, "[", fmt.Sprint(tk), "[") + + elem := 0 + // while the array contains values + for dec.More() { + var m Message + // decode an array value (Message) + assertErr(t, dec.Decode(&m)) + if m.Name == "" || m.Text == "" { + t.Fatal("failed to assign value to struct field") + } + elem++ + } + assertEq(t, "decode count", elem, 5) + + tk, err = dec.Token() + assertErr(t, err) + assertEq(t, "]", fmt.Sprint(tk), "]") +} diff --git a/json.go b/json.go index fea6348..a29a061 100644 --- a/json.go +++ b/json.go @@ -264,3 +264,14 @@ func UnmarshalNoEscape(data []byte, v interface{}) error { var dec Decoder return dec.decodeForUnmarshalNoEscape(src, v) } + +// A Token holds a value of one of these types: +// +// Delim, for the four JSON delimiters [ ] { } +// bool, for JSON booleans +// float64, for JSON numbers +// Number, for JSON numbers +// string, for JSON string literals +// nil, for JSON null +// +type Token interface{} From 20b67ad48d16faf3f6bcd1ab43d0e32f5019d694 Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Thu, 30 Jul 2020 22:41:53 +0900 Subject: [PATCH 02/13] Support Decoder.Token --- decode.go | 76 +++++++++++++++-------- decode_array.go | 30 +++++++++ decode_bool.go | 61 ++++++++++++++++++- decode_float.go | 58 ++++++++++++++++++ decode_int.go | 13 ++-- decode_interface.go | 101 +++++++++++++++++++++++++++++++ decode_map.go | 46 ++++++++++++++ decode_ptr.go | 9 +++ decode_slice.go | 54 +++++++++++++++++ decode_stream.go | 127 ++++++++++++++++++++++++++++++++++----- decode_string.go | 67 +++++++++++++++++++++ decode_struct.go | 45 ++++++++++++++ decode_test.go | 14 +++++ decode_uint.go | 33 ++++++++++ decode_unmarshal_json.go | 17 ++++++ decode_unmarshal_text.go | 17 ++++++ 16 files changed, 721 insertions(+), 47 deletions(-) diff --git a/decode.go b/decode.go index a9d261e..6975ad5 100644 --- a/decode.go +++ b/decode.go @@ -5,6 +5,7 @@ import ( "fmt" "io" "reflect" + "strconv" "sync" "unsafe" ) @@ -17,6 +18,7 @@ func (d Delim) String() string { type decoder interface { decode([]byte, int64, uintptr) (int64, error) + decodeStream(*stream, uintptr) error } type Decoder struct { @@ -53,7 +55,9 @@ func init() { // The decoder introduces its own buffering and may // read data from r beyond the JSON values requested. func NewDecoder(r io.Reader) *Decoder { - return &Decoder{s: &stream{r: r}} + s := &stream{r: r} + s.read() + return &Decoder{s: s} } // Buffered returns a reader of the data remaining in the Decoder's @@ -109,12 +113,13 @@ func (d *Decoder) decodeForUnmarshalNoEscape(src []byte, v interface{}) error { func (d *Decoder) prepareForDecode() error { s := d.s - for ; s.cursor < s.length || s.read(); s.cursor++ { + for { switch s.char() { case ' ', '\t', '\r', '\n': + s.progress() continue case ',', ':': - s.cursor++ + s.progress() return nil } break @@ -152,10 +157,7 @@ func (d *Decoder) Decode(v interface{}) error { return err } s := d.s - cursor, err := dec.decode(s.buf[s.cursor:], 0, ptr) - s.cursor += cursor - fmt.Println("cursor = ", cursor, "next buf = ", string(s.buf[s.cursor:])) - if err != nil { + if err := dec.decodeStream(s, ptr); err != nil { return err } return nil @@ -163,10 +165,12 @@ func (d *Decoder) Decode(v interface{}) error { func (d *Decoder) More() bool { s := d.s - for ; s.cursor < s.length || s.read(); s.cursor++ { + for { switch s.char() { case ' ', '\n', '\r', '\t': - continue + if s.progress() { + continue + } case '}', ']': return false } @@ -177,27 +181,51 @@ func (d *Decoder) More() bool { func (d *Decoder) Token() (Token, error) { s := d.s - for ; s.cursor < s.length || s.read(); s.cursor++ { - switch s.char() { + for { + c := s.char() + switch c { case ' ', '\n', '\r', '\t': - continue - case '{': - s.cursor++ - return Delim('{'), nil - case '[': - s.cursor++ - return Delim('['), nil - case '}': - s.cursor++ - return Delim('}'), nil - case ']': - s.cursor++ - return Delim(']'), nil + if s.progress() { + continue + } + case '{', '[', ']', '}': + s.progress() + return Delim(c), nil + case ',', ':': + if s.progress() { + continue + } case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + bytes := floatBytes(s) + s := *(*string)(unsafe.Pointer(&bytes)) + f64, err := strconv.ParseFloat(s, 64) + if err != nil { + return nil, err + } + return f64, nil case '"': + bytes, err := stringBytes(s) + if err != nil { + return nil, err + } + return string(bytes), nil case 't': + if err := trueBytes(s); err != nil { + return nil, err + } + return true, nil case 'f': + if err := falseBytes(s); err != nil { + return nil, err + } + return false, nil case 'n': + if err := nullBytes(s); err != nil { + return nil, err + } + return nil, nil + case '\000': + return nil, io.EOF default: return nil, errInvalidCharacter(s.char(), "token", s.totalOffset()) } diff --git a/decode_array.go b/decode_array.go index f4e3555..a1b89b8 100644 --- a/decode_array.go +++ b/decode_array.go @@ -16,6 +16,36 @@ func newArrayDecoder(dec decoder, elemType *rtype, alen int) *arrayDecoder { } } +func (d *arrayDecoder) decodeStream(s *stream, p uintptr) error { + for { + switch s.char() { + case ' ', '\n', '\t', '\r': + case '[': + idx := 0 + for { + s.progress() + if err := d.valueDecoder.decodeStream(s, p+uintptr(idx)*d.size); err != nil { + return err + } + s.skipWhiteSpace() + switch s.char() { + case ']': + s.progress() + return nil + case ',': + idx++ + default: + return errInvalidCharacter(s.char(), "array", s.offset) + } + } + default: + return errUnexpectedEndOfJSON("array", s.offset) + } + s.progress() + } + return errUnexpectedEndOfJSON("array", s.offset) +} + func (d *arrayDecoder) decode(buf []byte, cursor int64, p uintptr) (int64, error) { buflen := int64(len(buf)) for ; cursor < buflen; cursor++ { diff --git a/decode_bool.go b/decode_bool.go index 583bad5..a5dd822 100644 --- a/decode_bool.go +++ b/decode_bool.go @@ -10,6 +10,63 @@ func newBoolDecoder() *boolDecoder { return &boolDecoder{} } +func trueBytes(s *stream) error { + s.progress() + if s.char() != 'r' { + return errInvalidCharacter(s.char(), "bool(true)", s.totalOffset()) + } + s.progress() + if s.char() != 'u' { + return errInvalidCharacter(s.char(), "bool(true)", s.totalOffset()) + } + s.progress() + if s.char() != 'e' { + return errInvalidCharacter(s.char(), "bool(true)", s.totalOffset()) + } + s.progress() + return nil +} + +func falseBytes(s *stream) error { + s.progress() + if s.char() != 'a' { + return errInvalidCharacter(s.char(), "bool(false)", s.totalOffset()) + } + s.progress() + if s.char() != 'l' { + return errInvalidCharacter(s.char(), "bool(false)", s.totalOffset()) + } + s.progress() + if s.char() != 's' { + return errInvalidCharacter(s.char(), "bool(false)", s.totalOffset()) + } + s.progress() + if s.char() != 'e' { + return errInvalidCharacter(s.char(), "bool(false)", s.totalOffset()) + } + s.progress() + return nil +} + +func (d *boolDecoder) decodeStream(s *stream, p uintptr) error { + s.skipWhiteSpace() + switch s.char() { + case 't': + if err := trueBytes(s); err != nil { + return err + } + *(*bool)(unsafe.Pointer(p)) = true + return nil + case 'f': + if err := falseBytes(s); err != nil { + return err + } + *(*bool)(unsafe.Pointer(p)) = false + return nil + } + return errUnexpectedEndOfJSON("bool", s.totalOffset()) +} + func (d *boolDecoder) decode(buf []byte, cursor int64, p uintptr) (int64, error) { buflen := int64(len(buf)) cursor = skipWhiteSpace(buf, cursor) @@ -29,6 +86,7 @@ func (d *boolDecoder) decode(buf []byte, cursor int64, p uintptr) (int64, error) } cursor += 4 *(*bool)(unsafe.Pointer(p)) = true + return cursor, nil case 'f': if cursor+4 >= buflen { return 0, errUnexpectedEndOfJSON("bool(false)", cursor) @@ -47,6 +105,7 @@ func (d *boolDecoder) decode(buf []byte, cursor int64, p uintptr) (int64, error) } cursor += 5 *(*bool)(unsafe.Pointer(p)) = false + return cursor, nil } - return cursor, nil + return 0, errUnexpectedEndOfJSON("bool", cursor) } diff --git a/decode_float.go b/decode_float.go index 98b3088..6c7d12b 100644 --- a/decode_float.go +++ b/decode_float.go @@ -13,6 +13,48 @@ func newFloatDecoder(op func(uintptr, float64)) *floatDecoder { return &floatDecoder{op: op} } +var floatTable = [256]bool{ + '0': true, + '1': true, + '2': true, + '3': true, + '4': true, + '5': true, + '6': true, + '7': true, + '8': true, + '9': true, + '.': true, + 'e': true, + 'E': true, +} + +func floatBytes(s *stream) []byte { + start := s.cursor + for s.progress() { + if floatTable[s.char()] { + continue + } + break + } + return s.buf[start:s.cursor] +} + +func (d *floatDecoder) decodeStreamByte(s *stream) ([]byte, error) { + for { + switch s.char() { + case ' ', '\n', '\t', '\r': + s.progress() + continue + case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + return floatBytes(s), nil + default: + return nil, errUnexpectedEndOfJSON("float", s.offset) + } + } + return nil, errUnexpectedEndOfJSON("float", s.offset) +} + func (d *floatDecoder) decodeByte(buf []byte, cursor int64) ([]byte, int64, error) { buflen := int64(len(buf)) for ; cursor < buflen; cursor++ { @@ -31,11 +73,27 @@ func (d *floatDecoder) decodeByte(buf []byte, cursor int64) ([]byte, int64, erro } num := buf[start:cursor] return num, cursor, nil + default: + return nil, 0, errUnexpectedEndOfJSON("float", cursor) } } return nil, 0, errUnexpectedEndOfJSON("float", cursor) } +func (d *floatDecoder) decodeStream(s *stream, p uintptr) error { + bytes, err := d.decodeStreamByte(s) + if err != nil { + return err + } + str := *(*string)(unsafe.Pointer(&bytes)) + f64, err := strconv.ParseFloat(str, 64) + if err != nil { + return err + } + d.op(p, f64) + return nil +} + func (d *floatDecoder) decode(buf []byte, cursor int64, p uintptr) (int64, error) { bytes, c, err := d.decodeByte(buf, cursor) if err != nil { diff --git a/decode_int.go b/decode_int.go index d512cb8..5e9a3d6 100644 --- a/decode_int.go +++ b/decode_int.go @@ -49,15 +49,15 @@ var ( } ) -func (d *intDecoder) decodeByteStream(s *stream) ([]byte, error) { - for ; s.cursor < s.length || s.read(); s.cursor++ { +func (d *intDecoder) decodeStreamByte(s *stream) ([]byte, error) { + for { switch s.char() { case ' ', '\n', '\t', '\r': + s.progress() continue case '-': start := s.cursor - s.cursor++ - for ; s.cursor < s.length || s.read(); s.cursor++ { + for s.progress() { if numTable[s.char()] { continue } @@ -70,8 +70,7 @@ func (d *intDecoder) decodeByteStream(s *stream) ([]byte, error) { return num, nil case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': start := s.cursor - s.cursor++ - for ; s.cursor < s.length || s.read(); s.cursor++ { + for s.progress() { if numTable[s.char()] { continue } @@ -110,7 +109,7 @@ func (d *intDecoder) decodeByte(buf []byte, cursor int64) ([]byte, int64, error) } func (d *intDecoder) decodeStream(s *stream, p uintptr) error { - bytes, err := d.decodeByteStream(s) + bytes, err := d.decodeStreamByte(s) if err != nil { return err } diff --git a/decode_interface.go b/decode_interface.go index a0d94b3..eda9606 100644 --- a/decode_interface.go +++ b/decode_interface.go @@ -20,6 +20,107 @@ var ( ) ) +func (d *interfaceDecoder) decodeStream(s *stream, p uintptr) error { + s.skipWhiteSpace() + switch s.char() { + case '{': + var v map[interface{}]interface{} + ptr := unsafe.Pointer(&v) + d.dummy = ptr + dec := newMapDecoder(interfaceMapType, newInterfaceDecoder(d.typ), newInterfaceDecoder(d.typ)) + if err := dec.decodeStream(s, uintptr(ptr)); err != nil { + return err + } + *(*interface{})(unsafe.Pointer(p)) = v + return nil + case '[': + var v []interface{} + ptr := unsafe.Pointer(&v) + d.dummy = ptr // escape ptr + dec := newSliceDecoder(newInterfaceDecoder(d.typ), d.typ, d.typ.Size()) + if err := dec.decodeStream(s, uintptr(ptr)); err != nil { + return err + } + *(*interface{})(unsafe.Pointer(p)) = v + return nil + case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + return newFloatDecoder(func(p uintptr, v float64) { + *(*interface{})(unsafe.Pointer(p)) = v + }).decodeStream(s, p) + case '"': + s.progress() + start := s.cursor + for { + switch s.char() { + case '\\': + s.progress() + case '"': + literal := s.buf[start:s.cursor] + s.progress() + *(*interface{})(unsafe.Pointer(p)) = *(*string)(unsafe.Pointer(&literal)) + return nil + case '\000': + return errUnexpectedEndOfJSON("string", s.totalOffset()) + } + s.progress() + } + return errUnexpectedEndOfJSON("string", s.totalOffset()) + case 't': + s.progress() + if s.char() != 'r' { + return errInvalidCharacter(s.char(), "bool(true)", s.totalOffset()) + } + s.progress() + if s.char() != 'u' { + return errInvalidCharacter(s.char(), "bool(true)", s.totalOffset()) + } + s.progress() + if s.char() != 'e' { + return errInvalidCharacter(s.char(), "bool(true)", s.totalOffset()) + } + s.progress() + *(*interface{})(unsafe.Pointer(p)) = true + return nil + case 'f': + s.progress() + if s.char() != 'a' { + return errInvalidCharacter(s.char(), "bool(false)", s.totalOffset()) + } + s.progress() + if s.char() != 'l' { + return errInvalidCharacter(s.char(), "bool(false)", s.totalOffset()) + } + s.progress() + if s.char() != 's' { + return errInvalidCharacter(s.char(), "bool(false)", s.totalOffset()) + } + s.progress() + if s.char() != 'e' { + return errInvalidCharacter(s.char(), "bool(false)", s.totalOffset()) + } + s.progress() + *(*interface{})(unsafe.Pointer(p)) = false + return nil + case 'n': + s.progress() + if s.char() != 'u' { + return errInvalidCharacter(s.char(), "null", s.totalOffset()) + } + s.progress() + if s.char() != 'l' { + return errInvalidCharacter(s.char(), "null", s.totalOffset()) + } + s.progress() + if s.char() != 'l' { + return errInvalidCharacter(s.char(), "null", s.totalOffset()) + } + s.progress() + *(*interface{})(unsafe.Pointer(p)) = nil + return nil + } + return errNotAtBeginningOfValue(s.totalOffset()) +} + func (d *interfaceDecoder) decode(buf []byte, cursor int64, p uintptr) (int64, error) { cursor = skipWhiteSpace(buf, cursor) switch buf[cursor] { diff --git a/decode_map.go b/decode_map.go index 18b5119..239d9fa 100644 --- a/decode_map.go +++ b/decode_map.go @@ -35,6 +35,52 @@ func (d *mapDecoder) setValue(buf []byte, cursor int64, key interface{}) (int64, return d.valueDecoder.decode(buf, cursor, uintptr(header.ptr)) } +func (d *mapDecoder) setKeyStream(s *stream, key interface{}) error { + header := (*interfaceHeader)(unsafe.Pointer(&key)) + return d.keyDecoder.decodeStream(s, uintptr(header.ptr)) +} + +func (d *mapDecoder) setValueStream(s *stream, key interface{}) error { + header := (*interfaceHeader)(unsafe.Pointer(&key)) + return d.valueDecoder.decodeStream(s, uintptr(header.ptr)) +} + +func (d *mapDecoder) decodeStream(s *stream, p uintptr) error { + s.skipWhiteSpace() + if s.char() != '{' { + return errExpected("{ character for map value", s.totalOffset()) + } + mapValue := makemap(d.mapType, 0) + for s.progress() { + var key interface{} + if err := d.setKeyStream(s, &key); err != nil { + return err + } + s.skipWhiteSpace() + if s.char() != ':' { + return errExpected("colon after object key", s.totalOffset()) + } + s.progress() + if s.end() { + return errUnexpectedEndOfJSON("map", s.totalOffset()) + } + var value interface{} + if err := d.setValueStream(s, &value); err != nil { + return err + } + mapassign(d.mapType, mapValue, unsafe.Pointer(&key), unsafe.Pointer(&value)) + s.skipWhiteSpace() + if s.char() == '}' { + *(*unsafe.Pointer)(unsafe.Pointer(p)) = mapValue + return nil + } + if s.char() != ',' { + return errExpected("semicolon after object value", s.totalOffset()) + } + } + return nil +} + func (d *mapDecoder) decode(buf []byte, cursor int64, p uintptr) (int64, error) { cursor = skipWhiteSpace(buf, cursor) buflen := int64(len(buf)) diff --git a/decode_ptr.go b/decode_ptr.go index 6135872..4ce625d 100644 --- a/decode_ptr.go +++ b/decode_ptr.go @@ -16,6 +16,15 @@ func newPtrDecoder(dec decoder, typ *rtype) *ptrDecoder { //go:linkname unsafe_New reflect.unsafe_New func unsafe_New(*rtype) uintptr +func (d *ptrDecoder) decodeStream(s *stream, p uintptr) error { + newptr := unsafe_New(d.typ) + if err := d.dec.decodeStream(s, newptr); err != nil { + return err + } + *(*uintptr)(unsafe.Pointer(p)) = newptr + return nil +} + func (d *ptrDecoder) decode(buf []byte, cursor int64, p uintptr) (int64, error) { newptr := unsafe_New(d.typ) c, err := d.dec.decode(buf, cursor, newptr) diff --git a/decode_slice.go b/decode_slice.go index a8f6381..a76c3ae 100644 --- a/decode_slice.go +++ b/decode_slice.go @@ -47,6 +47,60 @@ func copySlice(elemType *rtype, dst, src reflect.SliceHeader) int //go:linkname newArray reflect.unsafe_NewArray func newArray(*rtype, int) unsafe.Pointer +func (d *sliceDecoder) decodeStream(s *stream, p uintptr) error { + for { + switch s.char() { + case ' ', '\n', '\t', '\r': + s.progress() + continue + case '[': + idx := 0 + slice := d.newSlice() + cap := slice.Cap + data := slice.Data + for s.progress() { + if cap <= idx { + src := reflect.SliceHeader{Data: data, Len: idx, Cap: cap} + cap *= 2 + data = uintptr(newArray(d.elemType, cap)) + dst := reflect.SliceHeader{Data: data, Len: idx, Cap: cap} + copySlice(d.elemType, dst, src) + } + if err := d.valueDecoder.decodeStream(s, data+uintptr(idx)*d.size); err != nil { + return err + } + s.skipWhiteSpace() + switch s.char() { + case ']': + slice.Cap = cap + slice.Len = idx + 1 + slice.Data = data + dstCap := idx + 1 + dst := reflect.SliceHeader{ + Data: uintptr(newArray(d.elemType, dstCap)), + Len: idx + 1, + Cap: dstCap, + } + copySlice(d.elemType, dst, *slice) + *(*reflect.SliceHeader)(unsafe.Pointer(p)) = dst + d.releaseSlice(slice) + s.progress() + return nil + case ',': + idx++ + continue + default: + slice.Cap = cap + slice.Data = data + d.releaseSlice(slice) + return errInvalidCharacter(s.char(), "slice", s.totalOffset()) + } + } + } + } + return errUnexpectedEndOfJSON("slice", s.totalOffset()) +} + func (d *sliceDecoder) decode(buf []byte, cursor int64, p uintptr) (int64, error) { buflen := int64(len(buf)) for ; cursor < buflen; cursor++ { diff --git a/decode_stream.go b/decode_stream.go index 37e48e5..817e612 100644 --- a/decode_stream.go +++ b/decode_stream.go @@ -6,16 +6,16 @@ import ( ) const ( - readChunkSize = 1024 + readChunkSize = 2 ) type stream struct { - buf []byte - length int64 - r io.Reader - decodedPos int64 - offset int64 - cursor int64 + buf []byte + length int64 + r io.Reader + offset int64 + cursor int64 + allRead bool } func (s *stream) buffered() io.Reader { @@ -26,24 +26,121 @@ func (s *stream) totalOffset() int64 { return s.offset + s.cursor } +func (s *stream) prevChar() byte { + return s.buf[s.cursor-1] +} + func (s *stream) char() byte { return s.buf[s.cursor] } +func (s *stream) end() bool { + return s.allRead && s.length <= s.cursor +} + +func (s *stream) progress() bool { + if s.cursor < s.length-1 || s.read() { + s.cursor++ + return true + } + s.cursor = s.length + return false +} + +func (s *stream) progressN(n int64) bool { + if s.cursor+n < s.length-1 || s.read() { + s.cursor += n + return true + } + s.cursor = s.length + return false +} + +func (s *stream) reset() { + s.buf = s.buf[s.cursor:] + s.length -= s.cursor + s.cursor = 0 +} + func (s *stream) read() bool { buf := make([]byte, readChunkSize) n, err := s.r.Read(buf) - if n == 0 || err == io.EOF { + if err != nil && err != io.EOF { return false } - remain := s.length - s.decodedPos - newBuf := make([]byte, remain+int64(n)) - copy(newBuf, s.buf[s.decodedPos:]) + remain := s.length + newBuf := make([]byte, remain+int64(n)+1) + copy(newBuf, s.buf) copy(newBuf[remain:], buf) s.buf = newBuf - s.length = int64(len(newBuf)) - s.offset += s.decodedPos - s.cursor = 0 - s.decodedPos = 0 + s.length = int64(len(newBuf)) - 1 + s.offset += s.cursor + if n == 0 || err == io.EOF { + s.allRead = true + return false + } return true } + +func (s *stream) skipWhiteSpace() { +LOOP: + if isWhiteSpace[s.char()] { + s.progress() + goto LOOP + } +} + +func (s *stream) skipValue() error { + s.skipWhiteSpace() + braceCount := 0 + bracketCount := 0 + for { + switch s.char() { + case '\000': + return errUnexpectedEndOfJSON("value of object", s.offset) + case '{': + braceCount++ + case '[': + bracketCount++ + case '}': + braceCount-- + if braceCount == -1 && bracketCount == 0 { + return nil + } + case ']': + bracketCount-- + case ',': + if bracketCount == 0 && braceCount == 0 { + return nil + } + case '"': + for s.progress() { + if s.char() != '"' { + continue + } + if s.prevChar() == '\\' { + continue + } + if bracketCount == 0 && braceCount == 0 { + s.progress() + return nil + } + break + } + case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + for s.progress() { + tk := int(s.char()) + if (int('0') <= tk && tk <= int('9')) || tk == '.' || tk == 'e' || tk == 'E' { + continue + } + break + } + if bracketCount == 0 && braceCount == 0 { + return nil + } + continue + } + s.progress() + } + return errUnexpectedEndOfJSON("value of object", s.offset) +} diff --git a/decode_string.go b/decode_string.go index 7b0aec0..0d92acd 100644 --- a/decode_string.go +++ b/decode_string.go @@ -11,6 +11,15 @@ func newStringDecoder() *stringDecoder { return &stringDecoder{} } +func (d *stringDecoder) decodeStream(s *stream, p uintptr) error { + bytes, err := d.decodeStreamByte(s) + if err != nil { + return err + } + *(*string)(unsafe.Pointer(p)) = *(*string)(unsafe.Pointer(&bytes)) + return nil +} + func (d *stringDecoder) decode(buf []byte, cursor int64, p uintptr) (int64, error) { bytes, c, err := d.decodeByte(buf, cursor) if err != nil { @@ -21,6 +30,64 @@ func (d *stringDecoder) decode(buf []byte, cursor int64, p uintptr) (int64, erro return cursor, nil } +func stringBytes(s *stream) ([]byte, error) { + s.progress() + start := s.cursor + for { + switch s.char() { + case '\\': + s.progress() + case '"': + literal := s.buf[start:s.cursor] + s.progress() + s.reset() + return literal, nil + case '\000': + goto ERROR + } + s.progress() + } +ERROR: + return nil, errUnexpectedEndOfJSON("string", s.totalOffset()) +} + +func nullBytes(s *stream) error { + s.progress() + if s.char() != 'u' { + return errInvalidCharacter(s.char(), "null", s.totalOffset()) + } + s.progress() + if s.char() != 'l' { + return errInvalidCharacter(s.char(), "null", s.totalOffset()) + } + s.progress() + if s.char() != 'l' { + return errInvalidCharacter(s.char(), "null", s.totalOffset()) + } + s.progress() + return nil +} + +func (d *stringDecoder) decodeStreamByte(s *stream) ([]byte, error) { + for { + switch s.char() { + case ' ', '\n', '\t', '\r': + s.progress() + case '"': + return stringBytes(s) + case 'n': + if err := nullBytes(s); err != nil { + return nil, err + } + return []byte{'n', 'u', 'l', 'l'}, nil + default: + goto ERROR + } + } +ERROR: + return nil, errNotAtBeginningOfValue(s.totalOffset()) +} + func (d *stringDecoder) decodeByte(buf []byte, cursor int64) ([]byte, int64, error) { for { switch buf[cursor] { diff --git a/decode_struct.go b/decode_struct.go index c065211..c0e7802 100644 --- a/decode_struct.go +++ b/decode_struct.go @@ -21,6 +21,51 @@ func newStructDecoder(fieldMap map[string]*structFieldSet) *structDecoder { } } +func (d *structDecoder) decodeStream(s *stream, p uintptr) error { + s.skipWhiteSpace() + if s.char() != '{' { + return errNotAtBeginningOfValue(s.totalOffset()) + } + s.progress() + for { + s.reset() + key, err := d.keyDecoder.decodeStreamByte(s) + if err != nil { + return err + } + s.skipWhiteSpace() + if s.char() != ':' { + return errExpected("colon after object key", s.totalOffset()) + } + s.progress() + if s.end() { + return errExpected("object value after colon", s.totalOffset()) + } + k := *(*string)(unsafe.Pointer(&key)) + field, exists := d.fieldMap[k] + if exists { + if err := field.dec.decodeStream(s, p+field.offset); err != nil { + return err + } + } else { + if err := s.skipValue(); err != nil { + return err + } + } + s.skipWhiteSpace() + c := s.char() + if c == '}' { + s.progress() + return nil + } + if c != ',' { + return errExpected("comma after object element", s.totalOffset()) + } + s.progress() + } + return nil +} + func (d *structDecoder) decode(buf []byte, cursor int64, p uintptr) (int64, error) { buflen := int64(len(buf)) cursor = skipWhiteSpace(buf, cursor) diff --git a/decode_test.go b/decode_test.go index 58b4cc9..f4f7bfb 100644 --- a/decode_test.go +++ b/decode_test.go @@ -233,6 +233,20 @@ func Test_InvalidUnmarshalError(t *testing.T) { }) } +func Test_Token(t *testing.T) { + dec := json.NewDecoder(strings.NewReader(`{"a": 1, "b": true, "c": [1, "two", null]}`)) + cnt := 0 + for { + if _, err := dec.Token(); err != nil { + break + } + cnt++ + } + if cnt != 12 { + t.Fatal("failed to parse token") + } +} + func Test_DecodeStream(t *testing.T) { const stream = ` [ diff --git a/decode_uint.go b/decode_uint.go index 1ba442d..49418f7 100644 --- a/decode_uint.go +++ b/decode_uint.go @@ -24,6 +24,30 @@ func (d *uintDecoder) parseUint(b []byte) uint64 { return sum } +func (d *uintDecoder) decodeStreamByte(s *stream) ([]byte, error) { + for { + switch s.char() { + case ' ', '\n', '\t', '\r': + s.progress() + continue + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + start := s.cursor + for s.progress() { + tk := int(s.char()) + if int('0') <= tk && tk <= int('9') { + continue + } + break + } + num := s.buf[start:s.cursor] + return num, nil + default: + return nil, errInvalidCharacter(s.char(), "number(unsigned integer)", s.totalOffset()) + } + } + return nil, errUnexpectedEndOfJSON("number(unsigned integer)", s.totalOffset()) +} + func (d *uintDecoder) decodeByte(buf []byte, cursor int64) ([]byte, int64, error) { buflen := int64(len(buf)) for ; cursor < buflen; cursor++ { @@ -49,6 +73,15 @@ func (d *uintDecoder) decodeByte(buf []byte, cursor int64) ([]byte, int64, error return nil, 0, errUnexpectedEndOfJSON("number(unsigned integer)", cursor) } +func (d *uintDecoder) decodeStream(s *stream, p uintptr) error { + bytes, err := d.decodeStreamByte(s) + if err != nil { + return err + } + d.op(p, d.parseUint(bytes)) + return nil +} + func (d *uintDecoder) decode(buf []byte, cursor int64, p uintptr) (int64, error) { bytes, c, err := d.decodeByte(buf, cursor) if err != nil { diff --git a/decode_unmarshal_json.go b/decode_unmarshal_json.go index dbd796f..f84f850 100644 --- a/decode_unmarshal_json.go +++ b/decode_unmarshal_json.go @@ -12,6 +12,23 @@ func newUnmarshalJSONDecoder(typ *rtype) *unmarshalJSONDecoder { return &unmarshalJSONDecoder{typ: typ} } +func (d *unmarshalJSONDecoder) decodeStream(s *stream, p uintptr) error { + s.skipWhiteSpace() + start := s.cursor + if err := s.skipValue(); err != nil { + return err + } + src := s.buf[start:s.cursor] + v := *(*interface{})(unsafe.Pointer(&interfaceHeader{ + typ: d.typ, + ptr: unsafe.Pointer(p), + })) + if err := v.(Unmarshaler).UnmarshalJSON(src); err != nil { + return err + } + return nil +} + func (d *unmarshalJSONDecoder) decode(buf []byte, cursor int64, p uintptr) (int64, error) { cursor = skipWhiteSpace(buf, cursor) start := cursor diff --git a/decode_unmarshal_text.go b/decode_unmarshal_text.go index dfcec9a..73ea335 100644 --- a/decode_unmarshal_text.go +++ b/decode_unmarshal_text.go @@ -13,6 +13,23 @@ func newUnmarshalTextDecoder(typ *rtype) *unmarshalTextDecoder { return &unmarshalTextDecoder{typ: typ} } +func (d *unmarshalTextDecoder) decodeStream(s *stream, p uintptr) error { + s.skipWhiteSpace() + start := s.cursor + if err := s.skipValue(); err != nil { + return err + } + src := s.buf[start:s.cursor] + v := *(*interface{})(unsafe.Pointer(&interfaceHeader{ + typ: d.typ, + ptr: unsafe.Pointer(p), + })) + if err := v.(encoding.TextUnmarshaler).UnmarshalText(src); err != nil { + return err + } + return nil +} + func (d *unmarshalTextDecoder) decode(buf []byte, cursor int64, p uintptr) (int64, error) { cursor = skipWhiteSpace(buf, cursor) start := cursor From 961b6a202e5f105aba63a094ab7c52fdd7c1f906 Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Fri, 31 Jul 2020 12:22:00 +0900 Subject: [PATCH 03/13] Add benchmark for stream decoding --- benchmarks/decode_test.go | 12 ++++++++++++ decode.go | 1 - decode_int.go | 2 ++ decode_stream.go | 25 +++++++++++++++++-------- 4 files changed, 31 insertions(+), 9 deletions(-) diff --git a/benchmarks/decode_test.go b/benchmarks/decode_test.go index aad5683..97c9a56 100644 --- a/benchmarks/decode_test.go +++ b/benchmarks/decode_test.go @@ -1,6 +1,7 @@ package benchmark import ( + "bytes" "encoding/json" "testing" @@ -49,6 +50,17 @@ func Benchmark_Decode_SmallStruct_GoJayUnsafe(b *testing.B) { } } +func Benchmark_Decode_SmallStruct_GoJsonDecode(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + result := SmallPayload{} + buf := bytes.NewBuffer(SmallFixture) + if err := gojson.NewDecoder(buf).Decode(&result); err != nil { + b.Fatal(err) + } + } +} + func Benchmark_Decode_SmallStruct_GoJson(b *testing.B) { b.ReportAllocs() for i := 0; i < b.N; i++ { diff --git a/decode.go b/decode.go index 6975ad5..2d9ff8e 100644 --- a/decode.go +++ b/decode.go @@ -2,7 +2,6 @@ package json import ( "encoding" - "fmt" "io" "reflect" "strconv" diff --git a/decode_int.go b/decode_int.go index 5e9a3d6..0c445f8 100644 --- a/decode_int.go +++ b/decode_int.go @@ -64,6 +64,7 @@ func (d *intDecoder) decodeStreamByte(s *stream) ([]byte, error) { break } num := s.buf[start:s.cursor] + s.reset() if len(num) < 2 { return nil, errInvalidCharacter(s.char(), "number(integer)", s.totalOffset()) } @@ -77,6 +78,7 @@ func (d *intDecoder) decodeStreamByte(s *stream) ([]byte, error) { break } num := s.buf[start:s.cursor] + s.reset() return num, nil default: return nil, errInvalidCharacter(s.char(), "number(integer)", s.totalOffset()) diff --git a/decode_stream.go b/decode_stream.go index 817e612..522bfa7 100644 --- a/decode_stream.go +++ b/decode_stream.go @@ -63,20 +63,29 @@ func (s *stream) reset() { } func (s *stream) read() bool { + if s.allRead { + return false + } buf := make([]byte, readChunkSize) n, err := s.r.Read(buf) if err != nil && err != io.EOF { return false } - remain := s.length - newBuf := make([]byte, remain+int64(n)+1) - copy(newBuf, s.buf) - copy(newBuf[remain:], buf) - s.buf = newBuf - s.length = int64(len(newBuf)) - 1 - s.offset += s.cursor - if n == 0 || err == io.EOF { + if n < readChunkSize || err == io.EOF { s.allRead = true + } + totalSize := s.length + int64(n) + 1 + if totalSize > readChunkSize { + newBuf := make([]byte, totalSize) + copy(newBuf, s.buf) + copy(newBuf[s.length:], buf) + s.buf = newBuf + } else { + s.buf = buf + } + s.length = int64(len(s.buf)) - 1 + s.offset += s.cursor + if n == 0 { return false } return true From 80acd42b8049af861ea287246386a73cfcb62699 Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Fri, 31 Jul 2020 17:10:03 +0900 Subject: [PATCH 04/13] Optimize streaming decoder --- benchmarks/decode_test.go | 5 +- decode.go | 36 ++++++--- decode_array.go | 23 ++++-- decode_bool.go | 61 +++++++++----- decode_float.go | 19 ++++- decode_int.go | 26 ++++-- decode_interface.go | 162 ++++++++++++++++---------------------- decode_map.go | 11 ++- decode_slice.go | 24 +++++- decode_stream.go | 44 ++++++----- decode_string.go | 37 ++++++--- decode_struct.go | 20 ++++- decode_uint.go | 19 +++-- 13 files changed, 297 insertions(+), 190 deletions(-) diff --git a/benchmarks/decode_test.go b/benchmarks/decode_test.go index 97c9a56..47d01e0 100644 --- a/benchmarks/decode_test.go +++ b/benchmarks/decode_test.go @@ -52,10 +52,11 @@ func Benchmark_Decode_SmallStruct_GoJayUnsafe(b *testing.B) { func Benchmark_Decode_SmallStruct_GoJsonDecode(b *testing.B) { b.ReportAllocs() + reader := bytes.NewReader(SmallFixture) for i := 0; i < b.N; i++ { result := SmallPayload{} - buf := bytes.NewBuffer(SmallFixture) - if err := gojson.NewDecoder(buf).Decode(&result); err != nil { + reader.Reset(SmallFixture) + if err := gojson.NewDecoder(reader).Decode(&result); err != nil { b.Fatal(err) } } diff --git a/decode.go b/decode.go index 2d9ff8e..057533e 100644 --- a/decode.go +++ b/decode.go @@ -49,6 +49,10 @@ func init() { cachedDecoder = decoderMap{} } +const ( + nul = '\000' +) + // NewDecoder returns a new decoder that reads from r. // // The decoder introduces its own buffering and may @@ -115,11 +119,16 @@ func (d *Decoder) prepareForDecode() error { for { switch s.char() { case ' ', '\t', '\r', '\n': - s.progress() + s.cursor++ continue case ',', ':': - s.progress() + s.cursor++ return nil + case nul: + if s.read() { + continue + } + return io.EOF } break } @@ -167,10 +176,14 @@ func (d *Decoder) More() bool { for { switch s.char() { case ' ', '\n', '\r', '\t': - if s.progress() { + s.cursor++ + continue + case '}', ']': + return false + case nul: + if s.read() { continue } - case '}', ']': return false } break @@ -184,16 +197,12 @@ func (d *Decoder) Token() (Token, error) { c := s.char() switch c { case ' ', '\n', '\r', '\t': - if s.progress() { - continue - } + s.cursor++ case '{', '[', ']', '}': - s.progress() + s.cursor++ return Delim(c), nil case ',', ':': - if s.progress() { - continue - } + s.cursor++ case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': bytes := floatBytes(s) s := *(*string)(unsafe.Pointer(&bytes)) @@ -223,7 +232,10 @@ func (d *Decoder) Token() (Token, error) { return nil, err } return nil, nil - case '\000': + case nul: + if s.read() { + continue + } return nil, io.EOF default: return nil, errInvalidCharacter(s.char(), "token", s.totalOffset()) diff --git a/decode_array.go b/decode_array.go index a1b89b8..7e34d85 100644 --- a/decode_array.go +++ b/decode_array.go @@ -23,27 +23,38 @@ func (d *arrayDecoder) decodeStream(s *stream, p uintptr) error { case '[': idx := 0 for { - s.progress() + s.cursor++ if err := d.valueDecoder.decodeStream(s, p+uintptr(idx)*d.size); err != nil { return err } s.skipWhiteSpace() switch s.char() { case ']': - s.progress() + s.cursor++ return nil case ',': idx++ + case nul: + if s.read() { + continue + } + goto ERROR default: - return errInvalidCharacter(s.char(), "array", s.offset) + goto ERROR } } + case nul: + if s.read() { + continue + } + goto ERROR default: - return errUnexpectedEndOfJSON("array", s.offset) + goto ERROR } - s.progress() + s.cursor++ } - return errUnexpectedEndOfJSON("array", s.offset) +ERROR: + return errUnexpectedEndOfJSON("array", s.totalOffset()) } func (d *arrayDecoder) decode(buf []byte, cursor int64, p uintptr) (int64, error) { diff --git a/decode_bool.go b/decode_bool.go index a5dd822..de1edc5 100644 --- a/decode_bool.go +++ b/decode_bool.go @@ -11,59 +11,78 @@ func newBoolDecoder() *boolDecoder { } func trueBytes(s *stream) error { - s.progress() + if s.cursor+3 >= s.length { + if !s.read() { + return errInvalidCharacter(s.char(), "bool(true)", s.totalOffset()) + } + } + s.cursor++ if s.char() != 'r' { return errInvalidCharacter(s.char(), "bool(true)", s.totalOffset()) } - s.progress() + s.cursor++ if s.char() != 'u' { return errInvalidCharacter(s.char(), "bool(true)", s.totalOffset()) } - s.progress() + s.cursor++ if s.char() != 'e' { return errInvalidCharacter(s.char(), "bool(true)", s.totalOffset()) } - s.progress() + s.cursor++ return nil } func falseBytes(s *stream) error { - s.progress() + if s.cursor+4 >= s.length { + if s.read() { + return errInvalidCharacter(s.char(), "bool(false)", s.totalOffset()) + } + } + s.cursor++ if s.char() != 'a' { return errInvalidCharacter(s.char(), "bool(false)", s.totalOffset()) } - s.progress() + s.cursor++ if s.char() != 'l' { return errInvalidCharacter(s.char(), "bool(false)", s.totalOffset()) } - s.progress() + s.cursor++ if s.char() != 's' { return errInvalidCharacter(s.char(), "bool(false)", s.totalOffset()) } - s.progress() + s.cursor++ if s.char() != 'e' { return errInvalidCharacter(s.char(), "bool(false)", s.totalOffset()) } - s.progress() + s.cursor++ return nil } func (d *boolDecoder) decodeStream(s *stream, p uintptr) error { s.skipWhiteSpace() - switch s.char() { - case 't': - if err := trueBytes(s); err != nil { - return err + for { + switch s.char() { + case 't': + if err := trueBytes(s); err != nil { + return err + } + *(*bool)(unsafe.Pointer(p)) = true + return nil + case 'f': + if err := falseBytes(s); err != nil { + return err + } + *(*bool)(unsafe.Pointer(p)) = false + return nil + case nul: + if s.read() { + continue + } + goto ERROR } - *(*bool)(unsafe.Pointer(p)) = true - return nil - case 'f': - if err := falseBytes(s); err != nil { - return err - } - *(*bool)(unsafe.Pointer(p)) = false - return nil + break } +ERROR: return errUnexpectedEndOfJSON("bool", s.totalOffset()) } diff --git a/decode_float.go b/decode_float.go index 6c7d12b..2cdabe2 100644 --- a/decode_float.go +++ b/decode_float.go @@ -31,9 +31,14 @@ var floatTable = [256]bool{ func floatBytes(s *stream) []byte { start := s.cursor - for s.progress() { + for { + s.cursor++ if floatTable[s.char()] { continue + } else if s.char() == nul { + if s.read() { + continue + } } break } @@ -44,15 +49,21 @@ func (d *floatDecoder) decodeStreamByte(s *stream) ([]byte, error) { for { switch s.char() { case ' ', '\n', '\t', '\r': - s.progress() + s.cursor++ continue case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': return floatBytes(s), nil + case nul: + if s.read() { + continue + } + goto ERROR default: - return nil, errUnexpectedEndOfJSON("float", s.offset) + goto ERROR } } - return nil, errUnexpectedEndOfJSON("float", s.offset) +ERROR: + return nil, errUnexpectedEndOfJSON("float", s.totalOffset()) } func (d *floatDecoder) decodeByte(buf []byte, cursor int64) ([]byte, int64, error) { diff --git a/decode_int.go b/decode_int.go index 0c445f8..676490d 100644 --- a/decode_int.go +++ b/decode_int.go @@ -53,37 +53,53 @@ func (d *intDecoder) decodeStreamByte(s *stream) ([]byte, error) { for { switch s.char() { case ' ', '\n', '\t', '\r': - s.progress() + s.cursor++ continue case '-': start := s.cursor - for s.progress() { + for { + s.cursor++ if numTable[s.char()] { continue + } else if s.char() == nul { + if s.read() { + continue + } } break } num := s.buf[start:s.cursor] s.reset() if len(num) < 2 { - return nil, errInvalidCharacter(s.char(), "number(integer)", s.totalOffset()) + goto ERROR } return num, nil case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': start := s.cursor - for s.progress() { + for { + s.cursor++ if numTable[s.char()] { continue + } else if s.char() == nul { + if s.read() { + continue + } } break } num := s.buf[start:s.cursor] s.reset() return num, nil + case nul: + if s.read() { + continue + } + goto ERROR default: - return nil, errInvalidCharacter(s.char(), "number(integer)", s.totalOffset()) + goto ERROR } } +ERROR: return nil, errUnexpectedEndOfJSON("number(integer)", s.totalOffset()) } diff --git a/decode_interface.go b/decode_interface.go index eda9606..e6af61c 100644 --- a/decode_interface.go +++ b/decode_interface.go @@ -22,101 +22,77 @@ var ( func (d *interfaceDecoder) decodeStream(s *stream, p uintptr) error { s.skipWhiteSpace() - switch s.char() { - case '{': - var v map[interface{}]interface{} - ptr := unsafe.Pointer(&v) - d.dummy = ptr - dec := newMapDecoder(interfaceMapType, newInterfaceDecoder(d.typ), newInterfaceDecoder(d.typ)) - if err := dec.decodeStream(s, uintptr(ptr)); err != nil { - return err - } - *(*interface{})(unsafe.Pointer(p)) = v - return nil - case '[': - var v []interface{} - ptr := unsafe.Pointer(&v) - d.dummy = ptr // escape ptr - dec := newSliceDecoder(newInterfaceDecoder(d.typ), d.typ, d.typ.Size()) - if err := dec.decodeStream(s, uintptr(ptr)); err != nil { - return err - } - *(*interface{})(unsafe.Pointer(p)) = v - return nil - case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': - return newFloatDecoder(func(p uintptr, v float64) { - *(*interface{})(unsafe.Pointer(p)) = v - }).decodeStream(s, p) - case '"': - s.progress() - start := s.cursor - for { - switch s.char() { - case '\\': - s.progress() - case '"': - literal := s.buf[start:s.cursor] - s.progress() - *(*interface{})(unsafe.Pointer(p)) = *(*string)(unsafe.Pointer(&literal)) - return nil - case '\000': - return errUnexpectedEndOfJSON("string", s.totalOffset()) + for { + switch s.char() { + case '{': + var v map[interface{}]interface{} + ptr := unsafe.Pointer(&v) + d.dummy = ptr + dec := newMapDecoder(interfaceMapType, newInterfaceDecoder(d.typ), newInterfaceDecoder(d.typ)) + if err := dec.decodeStream(s, uintptr(ptr)); err != nil { + return err + } + *(*interface{})(unsafe.Pointer(p)) = v + return nil + case '[': + var v []interface{} + ptr := unsafe.Pointer(&v) + d.dummy = ptr // escape ptr + dec := newSliceDecoder(newInterfaceDecoder(d.typ), d.typ, d.typ.Size()) + if err := dec.decodeStream(s, uintptr(ptr)); err != nil { + return err + } + *(*interface{})(unsafe.Pointer(p)) = v + return nil + case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + return newFloatDecoder(func(p uintptr, v float64) { + *(*interface{})(unsafe.Pointer(p)) = v + }).decodeStream(s, p) + case '"': + s.cursor++ + start := s.cursor + for { + switch s.char() { + case '\\': + s.cursor++ + case '"': + literal := s.buf[start:s.cursor] + s.cursor++ + *(*interface{})(unsafe.Pointer(p)) = *(*string)(unsafe.Pointer(&literal)) + return nil + case nul: + if s.read() { + continue + } + return errUnexpectedEndOfJSON("string", s.totalOffset()) + } + s.cursor++ + } + return errUnexpectedEndOfJSON("string", s.totalOffset()) + case 't': + if err := trueBytes(s); err != nil { + return err + } + *(*interface{})(unsafe.Pointer(p)) = true + return nil + case 'f': + if err := falseBytes(s); err != nil { + return err + } + *(*interface{})(unsafe.Pointer(p)) = false + return nil + case 'n': + if err := nullBytes(s); err != nil { + return err + } + *(*interface{})(unsafe.Pointer(p)) = nil + return nil + case nul: + if s.read() { + continue } - s.progress() } - return errUnexpectedEndOfJSON("string", s.totalOffset()) - case 't': - s.progress() - if s.char() != 'r' { - return errInvalidCharacter(s.char(), "bool(true)", s.totalOffset()) - } - s.progress() - if s.char() != 'u' { - return errInvalidCharacter(s.char(), "bool(true)", s.totalOffset()) - } - s.progress() - if s.char() != 'e' { - return errInvalidCharacter(s.char(), "bool(true)", s.totalOffset()) - } - s.progress() - *(*interface{})(unsafe.Pointer(p)) = true - return nil - case 'f': - s.progress() - if s.char() != 'a' { - return errInvalidCharacter(s.char(), "bool(false)", s.totalOffset()) - } - s.progress() - if s.char() != 'l' { - return errInvalidCharacter(s.char(), "bool(false)", s.totalOffset()) - } - s.progress() - if s.char() != 's' { - return errInvalidCharacter(s.char(), "bool(false)", s.totalOffset()) - } - s.progress() - if s.char() != 'e' { - return errInvalidCharacter(s.char(), "bool(false)", s.totalOffset()) - } - s.progress() - *(*interface{})(unsafe.Pointer(p)) = false - return nil - case 'n': - s.progress() - if s.char() != 'u' { - return errInvalidCharacter(s.char(), "null", s.totalOffset()) - } - s.progress() - if s.char() != 'l' { - return errInvalidCharacter(s.char(), "null", s.totalOffset()) - } - s.progress() - if s.char() != 'l' { - return errInvalidCharacter(s.char(), "null", s.totalOffset()) - } - s.progress() - *(*interface{})(unsafe.Pointer(p)) = nil - return nil + break } return errNotAtBeginningOfValue(s.totalOffset()) } diff --git a/decode_map.go b/decode_map.go index 239d9fa..2ae99fb 100644 --- a/decode_map.go +++ b/decode_map.go @@ -51,16 +51,20 @@ func (d *mapDecoder) decodeStream(s *stream, p uintptr) error { return errExpected("{ character for map value", s.totalOffset()) } mapValue := makemap(d.mapType, 0) - for s.progress() { + for { + s.cursor++ var key interface{} if err := d.setKeyStream(s, &key); err != nil { return err } s.skipWhiteSpace() + if s.char() == nul { + s.read() + } if s.char() != ':' { return errExpected("colon after object key", s.totalOffset()) } - s.progress() + s.cursor++ if s.end() { return errUnexpectedEndOfJSON("map", s.totalOffset()) } @@ -70,6 +74,9 @@ func (d *mapDecoder) decodeStream(s *stream, p uintptr) error { } mapassign(d.mapType, mapValue, unsafe.Pointer(&key), unsafe.Pointer(&value)) s.skipWhiteSpace() + if s.char() == nul { + s.read() + } if s.char() == '}' { *(*unsafe.Pointer)(unsafe.Pointer(p)) = mapValue return nil diff --git a/decode_slice.go b/decode_slice.go index a76c3ae..8c7f2e6 100644 --- a/decode_slice.go +++ b/decode_slice.go @@ -51,14 +51,15 @@ func (d *sliceDecoder) decodeStream(s *stream, p uintptr) error { for { switch s.char() { case ' ', '\n', '\t', '\r': - s.progress() + s.cursor++ continue case '[': idx := 0 slice := d.newSlice() cap := slice.Cap data := slice.Data - for s.progress() { + for { + s.cursor++ if cap <= idx { src := reflect.SliceHeader{Data: data, Len: idx, Cap: cap} cap *= 2 @@ -70,6 +71,7 @@ func (d *sliceDecoder) decodeStream(s *stream, p uintptr) error { return err } s.skipWhiteSpace() + RETRY: switch s.char() { case ']': slice.Cap = cap @@ -84,20 +86,34 @@ func (d *sliceDecoder) decodeStream(s *stream, p uintptr) error { copySlice(d.elemType, dst, *slice) *(*reflect.SliceHeader)(unsafe.Pointer(p)) = dst d.releaseSlice(slice) - s.progress() + s.cursor++ return nil case ',': idx++ continue + case nul: + if s.read() { + goto RETRY + } + slice.Cap = cap + slice.Data = data + d.releaseSlice(slice) + goto ERROR default: slice.Cap = cap slice.Data = data d.releaseSlice(slice) - return errInvalidCharacter(s.char(), "slice", s.totalOffset()) + goto ERROR } } + case nul: + if s.read() { + continue + } + goto ERROR } } +ERROR: return errUnexpectedEndOfJSON("slice", s.totalOffset()) } diff --git a/decode_stream.go b/decode_stream.go index 522bfa7..fd4ba46 100644 --- a/decode_stream.go +++ b/decode_stream.go @@ -38,15 +38,6 @@ func (s *stream) end() bool { return s.allRead && s.length <= s.cursor } -func (s *stream) progress() bool { - if s.cursor < s.length-1 || s.read() { - s.cursor++ - return true - } - s.cursor = s.length - return false -} - func (s *stream) progressN(n int64) bool { if s.cursor+n < s.length-1 || s.read() { s.cursor += n @@ -94,8 +85,12 @@ func (s *stream) read() bool { func (s *stream) skipWhiteSpace() { LOOP: if isWhiteSpace[s.char()] { - s.progress() + s.cursor++ goto LOOP + } else if s.char() == nul { + if s.read() { + goto LOOP + } } } @@ -105,8 +100,11 @@ func (s *stream) skipValue() error { bracketCount := 0 for { switch s.char() { - case '\000': - return errUnexpectedEndOfJSON("value of object", s.offset) + case nul: + if s.read() { + continue + } + return errUnexpectedEndOfJSON("value of object", s.totalOffset()) case '{': braceCount++ case '[': @@ -123,7 +121,13 @@ func (s *stream) skipValue() error { return nil } case '"': - for s.progress() { + for { + s.cursor++ + if s.char() == nul { + if !s.read() { + return errUnexpectedEndOfJSON("value of string", s.totalOffset()) + } + } if s.char() != '"' { continue } @@ -131,16 +135,20 @@ func (s *stream) skipValue() error { continue } if bracketCount == 0 && braceCount == 0 { - s.progress() + s.cursor++ return nil } break } case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': - for s.progress() { - tk := int(s.char()) - if (int('0') <= tk && tk <= int('9')) || tk == '.' || tk == 'e' || tk == 'E' { + for { + s.cursor++ + if floatTable[s.char()] { continue + } else if s.char() == nul { + if s.read() { + continue + } } break } @@ -149,7 +157,7 @@ func (s *stream) skipValue() error { } continue } - s.progress() + s.cursor++ } return errUnexpectedEndOfJSON("value of object", s.offset) } diff --git a/decode_string.go b/decode_string.go index 0d92acd..4eeb633 100644 --- a/decode_string.go +++ b/decode_string.go @@ -31,40 +31,48 @@ func (d *stringDecoder) decode(buf []byte, cursor int64, p uintptr) (int64, erro } func stringBytes(s *stream) ([]byte, error) { - s.progress() + s.cursor++ start := s.cursor for { switch s.char() { case '\\': - s.progress() + s.cursor++ case '"': literal := s.buf[start:s.cursor] - s.progress() + s.cursor++ s.reset() return literal, nil - case '\000': + case nul: + if s.read() { + continue + } goto ERROR } - s.progress() + s.cursor++ } ERROR: return nil, errUnexpectedEndOfJSON("string", s.totalOffset()) } func nullBytes(s *stream) error { - s.progress() + if s.cursor+3 >= s.length { + if !s.read() { + return errInvalidCharacter(s.char(), "null", s.totalOffset()) + } + } + s.cursor++ if s.char() != 'u' { return errInvalidCharacter(s.char(), "null", s.totalOffset()) } - s.progress() + s.cursor++ if s.char() != 'l' { return errInvalidCharacter(s.char(), "null", s.totalOffset()) } - s.progress() + s.cursor++ if s.char() != 'l' { return errInvalidCharacter(s.char(), "null", s.totalOffset()) } - s.progress() + s.cursor++ return nil } @@ -72,7 +80,8 @@ func (d *stringDecoder) decodeStreamByte(s *stream) ([]byte, error) { for { switch s.char() { case ' ', '\n', '\t', '\r': - s.progress() + s.cursor++ + continue case '"': return stringBytes(s) case 'n': @@ -80,11 +89,13 @@ func (d *stringDecoder) decodeStreamByte(s *stream) ([]byte, error) { return nil, err } return []byte{'n', 'u', 'l', 'l'}, nil - default: - goto ERROR + case nul: + if s.read() { + continue + } } + break } -ERROR: return nil, errNotAtBeginningOfValue(s.totalOffset()) } diff --git a/decode_struct.go b/decode_struct.go index c0e7802..decbc52 100644 --- a/decode_struct.go +++ b/decode_struct.go @@ -23,10 +23,13 @@ func newStructDecoder(fieldMap map[string]*structFieldSet) *structDecoder { func (d *structDecoder) decodeStream(s *stream, p uintptr) error { s.skipWhiteSpace() + if s.char() == nul { + s.read() + } if s.char() != '{' { return errNotAtBeginningOfValue(s.totalOffset()) } - s.progress() + s.cursor++ for { s.reset() key, err := d.keyDecoder.decodeStreamByte(s) @@ -34,10 +37,16 @@ func (d *structDecoder) decodeStream(s *stream, p uintptr) error { return err } s.skipWhiteSpace() + if s.char() == nul { + s.read() + } if s.char() != ':' { return errExpected("colon after object key", s.totalOffset()) } - s.progress() + s.cursor++ + if s.char() == nul { + s.read() + } if s.end() { return errExpected("object value after colon", s.totalOffset()) } @@ -53,15 +62,18 @@ func (d *structDecoder) decodeStream(s *stream, p uintptr) error { } } s.skipWhiteSpace() + if s.char() == nul { + s.read() + } c := s.char() if c == '}' { - s.progress() + s.cursor++ return nil } if c != ',' { return errExpected("comma after object element", s.totalOffset()) } - s.progress() + s.cursor++ } return nil } diff --git a/decode_uint.go b/decode_uint.go index 49418f7..0048e60 100644 --- a/decode_uint.go +++ b/decode_uint.go @@ -28,22 +28,29 @@ func (d *uintDecoder) decodeStreamByte(s *stream) ([]byte, error) { for { switch s.char() { case ' ', '\n', '\t', '\r': - s.progress() + s.cursor++ continue case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': start := s.cursor - for s.progress() { - tk := int(s.char()) - if int('0') <= tk && tk <= int('9') { + for { + s.cursor++ + if numTable[s.char()] { continue + } else if s.char() == nul { + if s.read() { + continue + } } break } num := s.buf[start:s.cursor] return num, nil - default: - return nil, errInvalidCharacter(s.char(), "number(unsigned integer)", s.totalOffset()) + case nul: + if s.read() { + continue + } } + break } return nil, errUnexpectedEndOfJSON("number(unsigned integer)", s.totalOffset()) } From 53792a1f58a712dd715c75a34dd31120a369a63c Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Fri, 31 Jul 2020 18:07:11 +0900 Subject: [PATCH 05/13] Fix stream decoder bug --- decode_float.go | 1 + decode_int.go | 2 ++ decode_uint.go | 1 + 3 files changed, 4 insertions(+) diff --git a/decode_float.go b/decode_float.go index 2cdabe2..3393a6c 100644 --- a/decode_float.go +++ b/decode_float.go @@ -37,6 +37,7 @@ func floatBytes(s *stream) []byte { continue } else if s.char() == nul { if s.read() { + s.cursor-- // for retry current character continue } } diff --git a/decode_int.go b/decode_int.go index 676490d..3db1f45 100644 --- a/decode_int.go +++ b/decode_int.go @@ -63,6 +63,7 @@ func (d *intDecoder) decodeStreamByte(s *stream) ([]byte, error) { continue } else if s.char() == nul { if s.read() { + s.cursor-- // for retry current character continue } } @@ -82,6 +83,7 @@ func (d *intDecoder) decodeStreamByte(s *stream) ([]byte, error) { continue } else if s.char() == nul { if s.read() { + s.cursor-- // for retry current character continue } } diff --git a/decode_uint.go b/decode_uint.go index 0048e60..5ab2277 100644 --- a/decode_uint.go +++ b/decode_uint.go @@ -38,6 +38,7 @@ func (d *uintDecoder) decodeStreamByte(s *stream) ([]byte, error) { continue } else if s.char() == nul { if s.read() { + s.cursor-- // for retry current character continue } } From 2240ebcb1a96547f4556892124e2a4249f501148 Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Fri, 31 Jul 2020 18:07:23 +0900 Subject: [PATCH 06/13] Refactor nul character --- decode_string.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/decode_string.go b/decode_string.go index 4eeb633..b521d11 100644 --- a/decode_string.go +++ b/decode_string.go @@ -115,7 +115,7 @@ func (d *stringDecoder) decodeByte(buf []byte, cursor int64) ([]byte, int64, err literal := buf[start:cursor] cursor++ return literal, cursor, nil - case '\000': + case nul: return nil, 0, errUnexpectedEndOfJSON("string", cursor) } cursor++ From 38ac0e025d3718fbc54b732acfab58718cfee4a7 Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Fri, 31 Jul 2020 18:07:40 +0900 Subject: [PATCH 07/13] Add benchmark for Decode of GoJay --- benchmarks/decode_test.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/benchmarks/decode_test.go b/benchmarks/decode_test.go index 47d01e0..edcac10 100644 --- a/benchmarks/decode_test.go +++ b/benchmarks/decode_test.go @@ -30,6 +30,18 @@ func Benchmark_Decode_SmallStruct_JsonIter(b *testing.B) { } } +func Benchmark_Decode_SmallStruct_GoJayDecode(b *testing.B) { + b.ReportAllocs() + reader := bytes.NewReader(SmallFixture) + for n := 0; n < b.N; n++ { + reader.Reset(SmallFixture) + result := SmallPayload{} + if err := gojay.NewDecoder(reader).DecodeObject(&result); err != nil { + b.Fatal(err) + } + } +} + func Benchmark_Decode_SmallStruct_GoJay(b *testing.B) { b.ReportAllocs() for n := 0; n < b.N; n++ { From 016803b843ff284e207539dd3101fc258374be00 Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Fri, 31 Jul 2020 18:08:21 +0900 Subject: [PATCH 08/13] Fix chunkBufferSize --- decode_stream.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/decode_stream.go b/decode_stream.go index fd4ba46..acf8b49 100644 --- a/decode_stream.go +++ b/decode_stream.go @@ -6,7 +6,7 @@ import ( ) const ( - readChunkSize = 2 + readChunkSize = 512 ) type stream struct { From 9eaa46775c5f838ebb51dfa2c829a2e80e4bdff5 Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Fri, 31 Jul 2020 18:24:33 +0900 Subject: [PATCH 09/13] Fix benchmark source --- benchmarks/decode_test.go | 112 +++++++++++++++++++++++--------------- 1 file changed, 68 insertions(+), 44 deletions(-) diff --git a/benchmarks/decode_test.go b/benchmarks/decode_test.go index edcac10..237e020 100644 --- a/benchmarks/decode_test.go +++ b/benchmarks/decode_test.go @@ -10,7 +10,7 @@ import ( jsoniter "github.com/json-iterator/go" ) -func Benchmark_Decode_SmallStruct_EncodingJson(b *testing.B) { +func Benchmark_Decode_SmallStruct_Unmarshal_EncodingJson(b *testing.B) { b.ReportAllocs() for n := 0; n < b.N; n++ { result := SmallPayload{} @@ -20,7 +20,7 @@ func Benchmark_Decode_SmallStruct_EncodingJson(b *testing.B) { } } -func Benchmark_Decode_SmallStruct_JsonIter(b *testing.B) { +func Benchmark_Decode_SmallStruct_Unmarshal_JsonIter(b *testing.B) { b.ReportAllocs() for n := 0; n < b.N; n++ { result := SmallPayload{} @@ -30,7 +30,71 @@ func Benchmark_Decode_SmallStruct_JsonIter(b *testing.B) { } } -func Benchmark_Decode_SmallStruct_GoJayDecode(b *testing.B) { +func Benchmark_Decode_SmallStruct_Unmarshal_GoJay(b *testing.B) { + b.ReportAllocs() + for n := 0; n < b.N; n++ { + result := SmallPayload{} + if err := gojay.UnmarshalJSONObject(SmallFixture, &result); err != nil { + b.Fatal(err) + } + } +} + +func Benchmark_Decode_SmallStruct_Unmarshal_GoJayUnsafe(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + result := SmallPayload{} + if err := gojay.Unsafe.UnmarshalJSONObject(SmallFixture, &result); err != nil { + b.Fatal(err) + } + } +} + +func Benchmark_Decode_SmallStruct_Unmarshal_GoJson(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + result := SmallPayload{} + if err := gojson.Unmarshal(SmallFixture, &result); err != nil { + b.Fatal(err) + } + } +} + +func Benchmark_Decode_SmallStruct_Unmarshal_GoJsonNoEscape(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + result := SmallPayload{} + if err := gojson.UnmarshalNoEscape(SmallFixture, &result); err != nil { + b.Fatal(err) + } + } +} + +func Benchmark_Decode_SmallStruct_Stream_EncodingJson(b *testing.B) { + b.ReportAllocs() + reader := bytes.NewReader(SmallFixture) + for i := 0; i < b.N; i++ { + result := SmallPayload{} + reader.Reset(SmallFixture) + if err := json.NewDecoder(reader).Decode(&result); err != nil { + b.Fatal(err) + } + } +} + +func Benchmark_Decode_SmallStruct_Stream_JsonIter(b *testing.B) { + b.ReportAllocs() + reader := bytes.NewReader(SmallFixture) + for i := 0; i < b.N; i++ { + result := SmallPayload{} + reader.Reset(SmallFixture) + if err := jsoniter.NewDecoder(reader).Decode(&result); err != nil { + b.Fatal(err) + } + } +} + +func Benchmark_Decode_SmallStruct_Stream_GoJay(b *testing.B) { b.ReportAllocs() reader := bytes.NewReader(SmallFixture) for n := 0; n < b.N; n++ { @@ -42,27 +106,7 @@ func Benchmark_Decode_SmallStruct_GoJayDecode(b *testing.B) { } } -func Benchmark_Decode_SmallStruct_GoJay(b *testing.B) { - b.ReportAllocs() - for n := 0; n < b.N; n++ { - result := SmallPayload{} - if err := gojay.UnmarshalJSONObject(SmallFixture, &result); err != nil { - b.Fatal(err) - } - } -} - -func Benchmark_Decode_SmallStruct_GoJayUnsafe(b *testing.B) { - b.ReportAllocs() - for i := 0; i < b.N; i++ { - result := SmallPayload{} - if err := gojay.Unsafe.UnmarshalJSONObject(SmallFixture, &result); err != nil { - b.Fatal(err) - } - } -} - -func Benchmark_Decode_SmallStruct_GoJsonDecode(b *testing.B) { +func Benchmark_Decode_SmallStruct_Stream_GoJson(b *testing.B) { b.ReportAllocs() reader := bytes.NewReader(SmallFixture) for i := 0; i < b.N; i++ { @@ -74,26 +118,6 @@ func Benchmark_Decode_SmallStruct_GoJsonDecode(b *testing.B) { } } -func Benchmark_Decode_SmallStruct_GoJson(b *testing.B) { - b.ReportAllocs() - for i := 0; i < b.N; i++ { - result := SmallPayload{} - if err := gojson.Unmarshal(SmallFixture, &result); err != nil { - b.Fatal(err) - } - } -} - -func Benchmark_Decode_SmallStruct_GoJsonNoEscape(b *testing.B) { - b.ReportAllocs() - for i := 0; i < b.N; i++ { - result := SmallPayload{} - if err := gojson.UnmarshalNoEscape(SmallFixture, &result); err != nil { - b.Fatal(err) - } - } -} - func Benchmark_Decode_MediumStruct_EncodingJson(b *testing.B) { b.ReportAllocs() for n := 0; n < b.N; n++ { From c14253089e1a14d46ead571329093fa26a7df97e Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Fri, 31 Jul 2020 18:52:22 +0900 Subject: [PATCH 10/13] Fix stream decoder for slice --- decode_slice.go | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/decode_slice.go b/decode_slice.go index d82c0b7..34ffad8 100644 --- a/decode_slice.go +++ b/decode_slice.go @@ -65,34 +65,38 @@ func (d *sliceDecoder) decodeStream(s *stream, p uintptr) error { case '[': idx := 0 slice := d.newSlice() - cap := slice.Cap - data := slice.Data + cap := slice.cap + data := slice.data for { s.cursor++ if cap <= idx { - src := reflect.SliceHeader{Data: data, Len: idx, Cap: cap} + src := reflect.SliceHeader{Data: uintptr(data), Len: idx, Cap: cap} cap *= 2 - data = uintptr(newArray(d.elemType, cap)) - dst := reflect.SliceHeader{Data: data, Len: idx, Cap: cap} + data = newArray(d.elemType, cap) + dst := reflect.SliceHeader{Data: uintptr(data), Len: idx, Cap: cap} copySlice(d.elemType, dst, src) } - if err := d.valueDecoder.decodeStream(s, data+uintptr(idx)*d.size); err != nil { + if err := d.valueDecoder.decodeStream(s, uintptr(data)+uintptr(idx)*d.size); err != nil { return err } s.skipWhiteSpace() RETRY: switch s.char() { case ']': - slice.Cap = cap - slice.Len = idx + 1 - slice.Data = data + slice.cap = cap + slice.len = idx + 1 + slice.data = data dstCap := idx + 1 dst := reflect.SliceHeader{ Data: uintptr(newArray(d.elemType, dstCap)), Len: idx + 1, Cap: dstCap, } - copySlice(d.elemType, dst, *slice) + copySlice(d.elemType, dst, reflect.SliceHeader{ + Data: uintptr(slice.data), + Len: slice.len, + Cap: slice.cap, + }) *(*reflect.SliceHeader)(unsafe.Pointer(p)) = dst d.releaseSlice(slice) s.cursor++ @@ -104,13 +108,13 @@ func (d *sliceDecoder) decodeStream(s *stream, p uintptr) error { if s.read() { goto RETRY } - slice.Cap = cap - slice.Data = data + slice.cap = cap + slice.data = data d.releaseSlice(slice) goto ERROR default: - slice.Cap = cap - slice.Data = data + slice.cap = cap + slice.data = data d.releaseSlice(slice) goto ERROR } From c4c6b6a99da2342c7189e2ed87a1ee9be3e95886 Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Fri, 31 Jul 2020 20:24:39 +0900 Subject: [PATCH 11/13] Add benchmark for medium struct --- benchmarks/decode_test.go | 72 ++++++++++++++++++++++++++++++++------- decode_context.go | 57 +++++++++++++++++++++++++++++++ decode_stream.go | 32 ++++++++++++++++- decode_string.go | 4 +-- 4 files changed, 150 insertions(+), 15 deletions(-) diff --git a/benchmarks/decode_test.go b/benchmarks/decode_test.go index 237e020..c274a31 100644 --- a/benchmarks/decode_test.go +++ b/benchmarks/decode_test.go @@ -118,7 +118,7 @@ func Benchmark_Decode_SmallStruct_Stream_GoJson(b *testing.B) { } } -func Benchmark_Decode_MediumStruct_EncodingJson(b *testing.B) { +func Benchmark_Decode_MediumStruct_Unmarshal_EncodingJson(b *testing.B) { b.ReportAllocs() for n := 0; n < b.N; n++ { result := MediumPayload{} @@ -128,7 +128,7 @@ func Benchmark_Decode_MediumStruct_EncodingJson(b *testing.B) { } } -func Benchmark_Decode_MediumStruct_JsonIter(b *testing.B) { +func Benchmark_Decode_MediumStruct_Unmarshal_JsonIter(b *testing.B) { b.ReportAllocs() for n := 0; n < b.N; n++ { result := MediumPayload{} @@ -138,7 +138,7 @@ func Benchmark_Decode_MediumStruct_JsonIter(b *testing.B) { } } -func Benchmark_Decode_MediumStruct_GoJay(b *testing.B) { +func Benchmark_Decode_MediumStruct_Unmarshal_GoJay(b *testing.B) { b.ReportAllocs() for n := 0; n < b.N; n++ { result := MediumPayload{} @@ -148,7 +148,7 @@ func Benchmark_Decode_MediumStruct_GoJay(b *testing.B) { } } -func Benchmark_Decode_MediumStruct_GoJayUnsafe(b *testing.B) { +func Benchmark_Decode_MediumStruct_Unmarshal_GoJayUnsafe(b *testing.B) { b.ReportAllocs() for i := 0; i < b.N; i++ { result := MediumPayload{} @@ -158,7 +158,7 @@ func Benchmark_Decode_MediumStruct_GoJayUnsafe(b *testing.B) { } } -func Benchmark_Decode_MediumStruct_GoJson(b *testing.B) { +func Benchmark_Decode_MediumStruct_Unmarshal_GoJson(b *testing.B) { b.ReportAllocs() for i := 0; i < b.N; i++ { result := MediumPayload{} @@ -168,7 +168,7 @@ func Benchmark_Decode_MediumStruct_GoJson(b *testing.B) { } } -func Benchmark_Decode_MediumStruct_GoJsonNoEscape(b *testing.B) { +func Benchmark_Decode_MediumStruct_Unmarshal_GoJsonNoEscape(b *testing.B) { b.ReportAllocs() for i := 0; i < b.N; i++ { result := MediumPayload{} @@ -178,7 +178,55 @@ func Benchmark_Decode_MediumStruct_GoJsonNoEscape(b *testing.B) { } } -func Benchmark_Decode_LargeStruct_EncodingJson(b *testing.B) { +func Benchmark_Decode_MediumStruct_Stream_EncodingJson(b *testing.B) { + b.ReportAllocs() + reader := bytes.NewReader(MediumFixture) + for i := 0; i < b.N; i++ { + result := MediumPayload{} + reader.Reset(MediumFixture) + if err := json.NewDecoder(reader).Decode(&result); err != nil { + b.Fatal(err) + } + } +} + +func Benchmark_Decode_MediumStruct_Stream_JsonIter(b *testing.B) { + b.ReportAllocs() + reader := bytes.NewReader(MediumFixture) + for i := 0; i < b.N; i++ { + result := MediumPayload{} + reader.Reset(MediumFixture) + if err := jsoniter.NewDecoder(reader).Decode(&result); err != nil { + b.Fatal(err) + } + } +} + +func Benchmark_Decode_MediumStruct_Stream_GoJay(b *testing.B) { + b.ReportAllocs() + reader := bytes.NewReader(MediumFixture) + for n := 0; n < b.N; n++ { + reader.Reset(MediumFixture) + result := MediumPayload{} + if err := gojay.NewDecoder(reader).DecodeObject(&result); err != nil { + b.Fatal(err) + } + } +} + +func Benchmark_Decode_MediumStruct_Stream_GoJson(b *testing.B) { + b.ReportAllocs() + reader := bytes.NewReader(MediumFixture) + for i := 0; i < b.N; i++ { + result := MediumPayload{} + reader.Reset(MediumFixture) + if err := gojson.NewDecoder(reader).Decode(&result); err != nil { + b.Fatal(err) + } + } +} + +func Benchmark_Decode_LargeStruct_Unmarshal_EncodingJson(b *testing.B) { b.ReportAllocs() for n := 0; n < b.N; n++ { result := LargePayload{} @@ -188,7 +236,7 @@ func Benchmark_Decode_LargeStruct_EncodingJson(b *testing.B) { } } -func Benchmark_Decode_LargeStruct_JsonIter(b *testing.B) { +func Benchmark_Decode_LargeStruct_Unmarshal_JsonIter(b *testing.B) { b.ReportAllocs() for n := 0; n < b.N; n++ { result := LargePayload{} @@ -198,7 +246,7 @@ func Benchmark_Decode_LargeStruct_JsonIter(b *testing.B) { } } -func Benchmark_Decode_LargeStruct_GoJay(b *testing.B) { +func Benchmark_Decode_LargeStruct_Unmarshal_GoJay(b *testing.B) { b.ReportAllocs() for n := 0; n < b.N; n++ { result := LargePayload{} @@ -208,7 +256,7 @@ func Benchmark_Decode_LargeStruct_GoJay(b *testing.B) { } } -func Benchmark_Decode_LargeStruct_GoJayUnsafe(b *testing.B) { +func Benchmark_Decode_LargeStruct_Unmarshal_GoJayUnsafe(b *testing.B) { b.ReportAllocs() for i := 0; i < b.N; i++ { result := LargePayload{} @@ -218,7 +266,7 @@ func Benchmark_Decode_LargeStruct_GoJayUnsafe(b *testing.B) { } } -func Benchmark_Decode_LargeStruct_GoJson(b *testing.B) { +func Benchmark_Decode_LargeStruct_Unmarshal_GoJson(b *testing.B) { b.ReportAllocs() for i := 0; i < b.N; i++ { result := LargePayload{} @@ -228,7 +276,7 @@ func Benchmark_Decode_LargeStruct_GoJson(b *testing.B) { } } -func Benchmark_Decode_LargeStruct_GoJsonNoEscape(b *testing.B) { +func Benchmark_Decode_LargeStruct_Unmarshal_GoJsonNoEscape(b *testing.B) { b.ReportAllocs() for i := 0; i < b.N; i++ { result := LargePayload{} diff --git a/decode_context.go b/decode_context.go index 90371c4..dd68530 100644 --- a/decode_context.go +++ b/decode_context.go @@ -72,6 +72,63 @@ func skipValue(buf []byte, cursor int64) (int64, error) { return cursor, nil } continue + case 't': + if cursor+3 >= buflen { + return 0, errUnexpectedEndOfJSON("bool of object", cursor) + } + if buf[cursor+1] != 'r' { + return 0, errUnexpectedEndOfJSON("bool of object", cursor) + } + if buf[cursor+2] != 'u' { + return 0, errUnexpectedEndOfJSON("bool of object", cursor) + } + if buf[cursor+3] != 'e' { + return 0, errUnexpectedEndOfJSON("bool of object", cursor) + } + cursor += 4 + if bracketCount == 0 && braceCount == 0 { + return cursor, nil + } + continue + case 'f': + if cursor+4 >= buflen { + return 0, errUnexpectedEndOfJSON("bool of object", cursor) + } + if buf[cursor+1] != 'a' { + return 0, errUnexpectedEndOfJSON("bool of object", cursor) + } + if buf[cursor+2] != 'l' { + return 0, errUnexpectedEndOfJSON("bool of object", cursor) + } + if buf[cursor+3] != 's' { + return 0, errUnexpectedEndOfJSON("bool of object", cursor) + } + if buf[cursor+4] != 'e' { + return 0, errUnexpectedEndOfJSON("bool of object", cursor) + } + cursor += 5 + if bracketCount == 0 && braceCount == 0 { + return cursor, nil + } + continue + case 'n': + if cursor+3 >= buflen { + return 0, errUnexpectedEndOfJSON("null", cursor) + } + if buf[cursor+1] != 'u' { + return 0, errUnexpectedEndOfJSON("null", cursor) + } + if buf[cursor+2] != 'l' { + return 0, errUnexpectedEndOfJSON("null", cursor) + } + if buf[cursor+3] != 'l' { + return 0, errUnexpectedEndOfJSON("null", cursor) + } + cursor += 4 + if bracketCount == 0 && braceCount == 0 { + return cursor, nil + } + continue } cursor++ } diff --git a/decode_stream.go b/decode_stream.go index acf8b49..e84b0b8 100644 --- a/decode_stream.go +++ b/decode_stream.go @@ -71,10 +71,16 @@ func (s *stream) read() bool { copy(newBuf, s.buf) copy(newBuf[s.length:], buf) s.buf = newBuf + s.length = totalSize - 1 + } else if s.length > 0 { + copy(buf[s.length:], buf) + copy(buf, s.buf[:s.length]) + s.buf = buf + s.length = totalSize - 1 } else { s.buf = buf + s.length = totalSize - 1 } - s.length = int64(len(s.buf)) - 1 s.offset += s.cursor if n == 0 { return false @@ -156,6 +162,30 @@ func (s *stream) skipValue() error { return nil } continue + case 't': + if err := trueBytes(s); err != nil { + return err + } + if bracketCount == 0 && braceCount == 0 { + return nil + } + continue + case 'f': + if err := falseBytes(s); err != nil { + return err + } + if bracketCount == 0 && braceCount == 0 { + return nil + } + continue + case 'n': + if err := nullBytes(s); err != nil { + return err + } + if bracketCount == 0 && braceCount == 0 { + return nil + } + continue } s.cursor++ } diff --git a/decode_string.go b/decode_string.go index b521d11..2c08153 100644 --- a/decode_string.go +++ b/decode_string.go @@ -88,7 +88,7 @@ func (d *stringDecoder) decodeStreamByte(s *stream) ([]byte, error) { if err := nullBytes(s); err != nil { return nil, err } - return []byte{'n', 'u', 'l', 'l'}, nil + return []byte{}, nil case nul: if s.read() { continue @@ -136,7 +136,7 @@ func (d *stringDecoder) decodeByte(buf []byte, cursor int64) ([]byte, int64, err return nil, 0, errInvalidCharacter(buf[cursor+3], "null", cursor) } cursor += 5 - return []byte{'n', 'u', 'l', 'l'}, cursor, nil + return []byte{}, cursor, nil default: goto ERROR } From 6583ab9b3138eaed90b0ec9cf49788c3ee355390 Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Fri, 31 Jul 2020 20:28:04 +0900 Subject: [PATCH 12/13] Add benchmark for large struct --- benchmarks/decode_test.go | 48 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/benchmarks/decode_test.go b/benchmarks/decode_test.go index c274a31..a644050 100644 --- a/benchmarks/decode_test.go +++ b/benchmarks/decode_test.go @@ -285,3 +285,51 @@ func Benchmark_Decode_LargeStruct_Unmarshal_GoJsonNoEscape(b *testing.B) { } } } + +func Benchmark_Decode_LargeStruct_Stream_EncodingJson(b *testing.B) { + b.ReportAllocs() + reader := bytes.NewReader(LargeFixture) + for i := 0; i < b.N; i++ { + result := LargePayload{} + reader.Reset(LargeFixture) + if err := json.NewDecoder(reader).Decode(&result); err != nil { + b.Fatal(err) + } + } +} + +func Benchmark_Decode_LargeStruct_Stream_JsonIter(b *testing.B) { + b.ReportAllocs() + reader := bytes.NewReader(LargeFixture) + for i := 0; i < b.N; i++ { + result := LargePayload{} + reader.Reset(LargeFixture) + if err := jsoniter.NewDecoder(reader).Decode(&result); err != nil { + b.Fatal(err) + } + } +} + +func Benchmark_Decode_LargeStruct_Stream_GoJay(b *testing.B) { + b.ReportAllocs() + reader := bytes.NewReader(LargeFixture) + for n := 0; n < b.N; n++ { + reader.Reset(LargeFixture) + result := LargePayload{} + if err := gojay.NewDecoder(reader).DecodeObject(&result); err != nil { + b.Fatal(err) + } + } +} + +func Benchmark_Decode_LargeStruct_Stream_GoJson(b *testing.B) { + b.ReportAllocs() + reader := bytes.NewReader(LargeFixture) + for i := 0; i < b.N; i++ { + result := LargePayload{} + reader.Reset(LargeFixture) + if err := gojson.NewDecoder(reader).Decode(&result); err != nil { + b.Fatal(err) + } + } +} From 66b0490583c1dd615f72b81c3592cbb32326616c Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Fri, 31 Jul 2020 20:31:14 +0900 Subject: [PATCH 13/13] Support InputOffset --- decode.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/decode.go b/decode.go index 057533e..e9b1c31 100644 --- a/decode.go +++ b/decode.go @@ -252,7 +252,7 @@ func (d *Decoder) DisallowUnknownFields() { } func (d *Decoder) InputOffset() int64 { - return 0 + return d.s.totalOffset() } // UseNumber causes the Decoder to unmarshal a number into an interface{} as a