diff --git a/benchmarks/decode_test.go b/benchmarks/decode_test.go index aad5683..a644050 100644 --- a/benchmarks/decode_test.go +++ b/benchmarks/decode_test.go @@ -1,6 +1,7 @@ package benchmark import ( + "bytes" "encoding/json" "testing" @@ -9,7 +10,7 @@ import ( jsoniter "github.com/json-iterator/go" ) -func Benchmark_Decode_SmallStruct_EncodingJson(b *testing.B) { +func Benchmark_Decode_SmallStruct_Unmarshal_EncodingJson(b *testing.B) { b.ReportAllocs() for n := 0; n < b.N; n++ { result := SmallPayload{} @@ -19,7 +20,7 @@ func Benchmark_Decode_SmallStruct_EncodingJson(b *testing.B) { } } -func Benchmark_Decode_SmallStruct_JsonIter(b *testing.B) { +func Benchmark_Decode_SmallStruct_Unmarshal_JsonIter(b *testing.B) { b.ReportAllocs() for n := 0; n < b.N; n++ { result := SmallPayload{} @@ -29,7 +30,7 @@ func Benchmark_Decode_SmallStruct_JsonIter(b *testing.B) { } } -func Benchmark_Decode_SmallStruct_GoJay(b *testing.B) { +func Benchmark_Decode_SmallStruct_Unmarshal_GoJay(b *testing.B) { b.ReportAllocs() for n := 0; n < b.N; n++ { result := SmallPayload{} @@ -39,7 +40,7 @@ func Benchmark_Decode_SmallStruct_GoJay(b *testing.B) { } } -func Benchmark_Decode_SmallStruct_GoJayUnsafe(b *testing.B) { +func Benchmark_Decode_SmallStruct_Unmarshal_GoJayUnsafe(b *testing.B) { b.ReportAllocs() for i := 0; i < b.N; i++ { result := SmallPayload{} @@ -49,7 +50,7 @@ func Benchmark_Decode_SmallStruct_GoJayUnsafe(b *testing.B) { } } -func Benchmark_Decode_SmallStruct_GoJson(b *testing.B) { +func Benchmark_Decode_SmallStruct_Unmarshal_GoJson(b *testing.B) { b.ReportAllocs() for i := 0; i < b.N; i++ { result := SmallPayload{} @@ -59,7 +60,7 @@ func Benchmark_Decode_SmallStruct_GoJson(b *testing.B) { } } -func Benchmark_Decode_SmallStruct_GoJsonNoEscape(b *testing.B) { +func Benchmark_Decode_SmallStruct_Unmarshal_GoJsonNoEscape(b *testing.B) { b.ReportAllocs() for i := 0; i < b.N; i++ { result := SmallPayload{} @@ -69,7 +70,55 @@ func Benchmark_Decode_SmallStruct_GoJsonNoEscape(b *testing.B) { } } -func Benchmark_Decode_MediumStruct_EncodingJson(b *testing.B) { +func Benchmark_Decode_SmallStruct_Stream_EncodingJson(b *testing.B) { + b.ReportAllocs() + reader := bytes.NewReader(SmallFixture) + for i := 0; i < b.N; i++ { + result := SmallPayload{} + reader.Reset(SmallFixture) + if err := json.NewDecoder(reader).Decode(&result); err != nil { + b.Fatal(err) + } + } +} + +func Benchmark_Decode_SmallStruct_Stream_JsonIter(b *testing.B) { + b.ReportAllocs() + reader := bytes.NewReader(SmallFixture) + for i := 0; i < b.N; i++ { + result := SmallPayload{} + reader.Reset(SmallFixture) + if err := jsoniter.NewDecoder(reader).Decode(&result); err != nil { + b.Fatal(err) + } + } +} + +func Benchmark_Decode_SmallStruct_Stream_GoJay(b *testing.B) { + b.ReportAllocs() + reader := bytes.NewReader(SmallFixture) + for n := 0; n < b.N; n++ { + reader.Reset(SmallFixture) + result := SmallPayload{} + if err := gojay.NewDecoder(reader).DecodeObject(&result); err != nil { + b.Fatal(err) + } + } +} + +func Benchmark_Decode_SmallStruct_Stream_GoJson(b *testing.B) { + b.ReportAllocs() + reader := bytes.NewReader(SmallFixture) + for i := 0; i < b.N; i++ { + result := SmallPayload{} + reader.Reset(SmallFixture) + if err := gojson.NewDecoder(reader).Decode(&result); err != nil { + b.Fatal(err) + } + } +} + +func Benchmark_Decode_MediumStruct_Unmarshal_EncodingJson(b *testing.B) { b.ReportAllocs() for n := 0; n < b.N; n++ { result := MediumPayload{} @@ -79,7 +128,7 @@ func Benchmark_Decode_MediumStruct_EncodingJson(b *testing.B) { } } -func Benchmark_Decode_MediumStruct_JsonIter(b *testing.B) { +func Benchmark_Decode_MediumStruct_Unmarshal_JsonIter(b *testing.B) { b.ReportAllocs() for n := 0; n < b.N; n++ { result := MediumPayload{} @@ -89,7 +138,7 @@ func Benchmark_Decode_MediumStruct_JsonIter(b *testing.B) { } } -func Benchmark_Decode_MediumStruct_GoJay(b *testing.B) { +func Benchmark_Decode_MediumStruct_Unmarshal_GoJay(b *testing.B) { b.ReportAllocs() for n := 0; n < b.N; n++ { result := MediumPayload{} @@ -99,7 +148,7 @@ func Benchmark_Decode_MediumStruct_GoJay(b *testing.B) { } } -func Benchmark_Decode_MediumStruct_GoJayUnsafe(b *testing.B) { +func Benchmark_Decode_MediumStruct_Unmarshal_GoJayUnsafe(b *testing.B) { b.ReportAllocs() for i := 0; i < b.N; i++ { result := MediumPayload{} @@ -109,7 +158,7 @@ func Benchmark_Decode_MediumStruct_GoJayUnsafe(b *testing.B) { } } -func Benchmark_Decode_MediumStruct_GoJson(b *testing.B) { +func Benchmark_Decode_MediumStruct_Unmarshal_GoJson(b *testing.B) { b.ReportAllocs() for i := 0; i < b.N; i++ { result := MediumPayload{} @@ -119,7 +168,7 @@ func Benchmark_Decode_MediumStruct_GoJson(b *testing.B) { } } -func Benchmark_Decode_MediumStruct_GoJsonNoEscape(b *testing.B) { +func Benchmark_Decode_MediumStruct_Unmarshal_GoJsonNoEscape(b *testing.B) { b.ReportAllocs() for i := 0; i < b.N; i++ { result := MediumPayload{} @@ -129,7 +178,55 @@ func Benchmark_Decode_MediumStruct_GoJsonNoEscape(b *testing.B) { } } -func Benchmark_Decode_LargeStruct_EncodingJson(b *testing.B) { +func Benchmark_Decode_MediumStruct_Stream_EncodingJson(b *testing.B) { + b.ReportAllocs() + reader := bytes.NewReader(MediumFixture) + for i := 0; i < b.N; i++ { + result := MediumPayload{} + reader.Reset(MediumFixture) + if err := json.NewDecoder(reader).Decode(&result); err != nil { + b.Fatal(err) + } + } +} + +func Benchmark_Decode_MediumStruct_Stream_JsonIter(b *testing.B) { + b.ReportAllocs() + reader := bytes.NewReader(MediumFixture) + for i := 0; i < b.N; i++ { + result := MediumPayload{} + reader.Reset(MediumFixture) + if err := jsoniter.NewDecoder(reader).Decode(&result); err != nil { + b.Fatal(err) + } + } +} + +func Benchmark_Decode_MediumStruct_Stream_GoJay(b *testing.B) { + b.ReportAllocs() + reader := bytes.NewReader(MediumFixture) + for n := 0; n < b.N; n++ { + reader.Reset(MediumFixture) + result := MediumPayload{} + if err := gojay.NewDecoder(reader).DecodeObject(&result); err != nil { + b.Fatal(err) + } + } +} + +func Benchmark_Decode_MediumStruct_Stream_GoJson(b *testing.B) { + b.ReportAllocs() + reader := bytes.NewReader(MediumFixture) + for i := 0; i < b.N; i++ { + result := MediumPayload{} + reader.Reset(MediumFixture) + if err := gojson.NewDecoder(reader).Decode(&result); err != nil { + b.Fatal(err) + } + } +} + +func Benchmark_Decode_LargeStruct_Unmarshal_EncodingJson(b *testing.B) { b.ReportAllocs() for n := 0; n < b.N; n++ { result := LargePayload{} @@ -139,7 +236,7 @@ func Benchmark_Decode_LargeStruct_EncodingJson(b *testing.B) { } } -func Benchmark_Decode_LargeStruct_JsonIter(b *testing.B) { +func Benchmark_Decode_LargeStruct_Unmarshal_JsonIter(b *testing.B) { b.ReportAllocs() for n := 0; n < b.N; n++ { result := LargePayload{} @@ -149,7 +246,7 @@ func Benchmark_Decode_LargeStruct_JsonIter(b *testing.B) { } } -func Benchmark_Decode_LargeStruct_GoJay(b *testing.B) { +func Benchmark_Decode_LargeStruct_Unmarshal_GoJay(b *testing.B) { b.ReportAllocs() for n := 0; n < b.N; n++ { result := LargePayload{} @@ -159,7 +256,7 @@ func Benchmark_Decode_LargeStruct_GoJay(b *testing.B) { } } -func Benchmark_Decode_LargeStruct_GoJayUnsafe(b *testing.B) { +func Benchmark_Decode_LargeStruct_Unmarshal_GoJayUnsafe(b *testing.B) { b.ReportAllocs() for i := 0; i < b.N; i++ { result := LargePayload{} @@ -169,7 +266,7 @@ func Benchmark_Decode_LargeStruct_GoJayUnsafe(b *testing.B) { } } -func Benchmark_Decode_LargeStruct_GoJson(b *testing.B) { +func Benchmark_Decode_LargeStruct_Unmarshal_GoJson(b *testing.B) { b.ReportAllocs() for i := 0; i < b.N; i++ { result := LargePayload{} @@ -179,7 +276,7 @@ func Benchmark_Decode_LargeStruct_GoJson(b *testing.B) { } } -func Benchmark_Decode_LargeStruct_GoJsonNoEscape(b *testing.B) { +func Benchmark_Decode_LargeStruct_Unmarshal_GoJsonNoEscape(b *testing.B) { b.ReportAllocs() for i := 0; i < b.N; i++ { result := LargePayload{} @@ -188,3 +285,51 @@ func Benchmark_Decode_LargeStruct_GoJsonNoEscape(b *testing.B) { } } } + +func Benchmark_Decode_LargeStruct_Stream_EncodingJson(b *testing.B) { + b.ReportAllocs() + reader := bytes.NewReader(LargeFixture) + for i := 0; i < b.N; i++ { + result := LargePayload{} + reader.Reset(LargeFixture) + if err := json.NewDecoder(reader).Decode(&result); err != nil { + b.Fatal(err) + } + } +} + +func Benchmark_Decode_LargeStruct_Stream_JsonIter(b *testing.B) { + b.ReportAllocs() + reader := bytes.NewReader(LargeFixture) + for i := 0; i < b.N; i++ { + result := LargePayload{} + reader.Reset(LargeFixture) + if err := jsoniter.NewDecoder(reader).Decode(&result); err != nil { + b.Fatal(err) + } + } +} + +func Benchmark_Decode_LargeStruct_Stream_GoJay(b *testing.B) { + b.ReportAllocs() + reader := bytes.NewReader(LargeFixture) + for n := 0; n < b.N; n++ { + reader.Reset(LargeFixture) + result := LargePayload{} + if err := gojay.NewDecoder(reader).DecodeObject(&result); err != nil { + b.Fatal(err) + } + } +} + +func Benchmark_Decode_LargeStruct_Stream_GoJson(b *testing.B) { + b.ReportAllocs() + reader := bytes.NewReader(LargeFixture) + for i := 0; i < b.N; i++ { + result := LargePayload{} + reader.Reset(LargeFixture) + if err := gojson.NewDecoder(reader).Decode(&result); err != nil { + b.Fatal(err) + } + } +} diff --git a/decode.go b/decode.go index 2dbb0ee..e9b1c31 100644 --- a/decode.go +++ b/decode.go @@ -1,35 +1,27 @@ package json import ( - "bytes" "encoding" "io" "reflect" - "strings" + "strconv" "sync" "unsafe" ) -// A Token holds a value of one of these types: -// -// Delim, for the four JSON delimiters [ ] { } -// bool, for JSON booleans -// float64, for JSON numbers -// Number, for JSON numbers -// string, for JSON string literals -// nil, for JSON null -// -type Token interface{} - type Delim rune +func (d Delim) String() string { + return string(d) +} + type decoder interface { decode([]byte, int64, uintptr) (int64, error) + decodeStream(*stream, uintptr) error } type Decoder struct { - r io.Reader - buffered func() io.Reader + s *stream } type decoderMap struct { @@ -57,18 +49,24 @@ func init() { cachedDecoder = decoderMap{} } +const ( + nul = '\000' +) + // NewDecoder returns a new decoder that reads from r. // // The decoder introduces its own buffering and may // read data from r beyond the JSON values requested. func NewDecoder(r io.Reader) *Decoder { - return &Decoder{r: r} + s := &stream{r: r} + s.read() + return &Decoder{s: s} } // Buffered returns a reader of the data remaining in the Decoder's // buffer. The reader is valid until the next call to Decode. func (d *Decoder) Buffered() io.Reader { - return d.buffered() + return d.s.buffered() } func (d *Decoder) validateType(typ *rtype, p uintptr) error { @@ -116,6 +114,27 @@ func (d *Decoder) decodeForUnmarshalNoEscape(src []byte, v interface{}) error { return d.decode(src, header) } +func (d *Decoder) prepareForDecode() error { + s := d.s + for { + switch s.char() { + case ' ', '\t', '\r', '\n': + s.cursor++ + continue + case ',', ':': + s.cursor++ + return nil + case nul: + if s.read() { + continue + } + return io.EOF + } + break + } + return nil +} + // Decode reads the next JSON-encoded value from its // input and stores it in the value pointed to by v. // @@ -142,251 +161,87 @@ func (d *Decoder) Decode(v interface{}) error { cachedDecoder.set(typeptr, compiledDec) dec = compiledDec } - - for { - buf := make([]byte, 1024) - n, err := d.r.Read(buf) - if n == 0 || err == io.EOF { - return nil - } - if err != nil { - return err - } - cursor, err := dec.decode(buf[:n], 0, ptr) - if err != nil { - return err - } - d.buffered = func() io.Reader { - return bytes.NewReader(buf[cursor:]) - } + if err := d.prepareForDecode(); err != nil { + return err + } + s := d.s + if err := dec.decodeStream(s, ptr); err != nil { + return err } return nil } -func (d *Decoder) compileHead(typ *rtype) (decoder, error) { - if typ.Implements(unmarshalJSONType) { - return newUnmarshalJSONDecoder(typ), nil - } else if typ.Implements(unmarshalTextType) { - return newUnmarshalTextDecoder(typ), nil - } - return d.compile(typ.Elem()) -} - -func (d *Decoder) compile(typ *rtype) (decoder, error) { - if typ.Implements(unmarshalJSONType) { - return newUnmarshalJSONDecoder(typ), nil - } else if typ.Implements(unmarshalTextType) { - return newUnmarshalTextDecoder(typ), nil - } - switch typ.Kind() { - case reflect.Ptr: - return d.compilePtr(typ) - case reflect.Struct: - return d.compileStruct(typ) - case reflect.Slice: - return d.compileSlice(typ) - case reflect.Array: - return d.compileArray(typ) - case reflect.Map: - return d.compileMap(typ) - case reflect.Interface: - return d.compileInterface(typ) - case reflect.Int: - return d.compileInt() - case reflect.Int8: - return d.compileInt8() - case reflect.Int16: - return d.compileInt16() - case reflect.Int32: - return d.compileInt32() - case reflect.Int64: - return d.compileInt64() - case reflect.Uint: - return d.compileUint() - case reflect.Uint8: - return d.compileUint8() - case reflect.Uint16: - return d.compileUint16() - case reflect.Uint32: - return d.compileUint32() - case reflect.Uint64: - return d.compileUint64() - case reflect.String: - return d.compileString() - case reflect.Bool: - return d.compileBool() - case reflect.Float32: - return d.compileFloat32() - case reflect.Float64: - return d.compileFloat64() - } - return nil, &UnsupportedTypeError{Type: rtype2type(typ)} -} - -func (d *Decoder) compilePtr(typ *rtype) (decoder, error) { - dec, err := d.compile(typ.Elem()) - if err != nil { - return nil, err - } - return newPtrDecoder(dec, typ.Elem()), nil -} - -func (d *Decoder) compileInt() (decoder, error) { - return newIntDecoder(func(p uintptr, v int64) { - *(*int)(unsafe.Pointer(p)) = int(v) - }), nil -} - -func (d *Decoder) compileInt8() (decoder, error) { - return newIntDecoder(func(p uintptr, v int64) { - *(*int8)(unsafe.Pointer(p)) = int8(v) - }), nil -} - -func (d *Decoder) compileInt16() (decoder, error) { - return newIntDecoder(func(p uintptr, v int64) { - *(*int16)(unsafe.Pointer(p)) = int16(v) - }), nil -} - -func (d *Decoder) compileInt32() (decoder, error) { - return newIntDecoder(func(p uintptr, v int64) { - *(*int32)(unsafe.Pointer(p)) = int32(v) - }), nil -} - -func (d *Decoder) compileInt64() (decoder, error) { - return newIntDecoder(func(p uintptr, v int64) { - *(*int64)(unsafe.Pointer(p)) = v - }), nil -} - -func (d *Decoder) compileUint() (decoder, error) { - return newUintDecoder(func(p uintptr, v uint64) { - *(*uint)(unsafe.Pointer(p)) = uint(v) - }), nil -} - -func (d *Decoder) compileUint8() (decoder, error) { - return newUintDecoder(func(p uintptr, v uint64) { - *(*uint8)(unsafe.Pointer(p)) = uint8(v) - }), nil -} - -func (d *Decoder) compileUint16() (decoder, error) { - return newUintDecoder(func(p uintptr, v uint64) { - *(*uint16)(unsafe.Pointer(p)) = uint16(v) - }), nil -} - -func (d *Decoder) compileUint32() (decoder, error) { - return newUintDecoder(func(p uintptr, v uint64) { - *(*uint32)(unsafe.Pointer(p)) = uint32(v) - }), nil -} - -func (d *Decoder) compileUint64() (decoder, error) { - return newUintDecoder(func(p uintptr, v uint64) { - *(*uint64)(unsafe.Pointer(p)) = v - }), nil -} - -func (d *Decoder) compileFloat32() (decoder, error) { - return newFloatDecoder(func(p uintptr, v float64) { - *(*float32)(unsafe.Pointer(p)) = float32(v) - }), nil -} - -func (d *Decoder) compileFloat64() (decoder, error) { - return newFloatDecoder(func(p uintptr, v float64) { - *(*float64)(unsafe.Pointer(p)) = v - }), nil -} - -func (d *Decoder) compileString() (decoder, error) { - return newStringDecoder(), nil -} - -func (d *Decoder) compileBool() (decoder, error) { - return newBoolDecoder(), nil -} - -func (d *Decoder) compileSlice(typ *rtype) (decoder, error) { - elem := typ.Elem() - decoder, err := d.compile(elem) - if err != nil { - return nil, err - } - return newSliceDecoder(decoder, elem, elem.Size()), nil -} - -func (d *Decoder) compileArray(typ *rtype) (decoder, error) { - elem := typ.Elem() - decoder, err := d.compile(elem) - if err != nil { - return nil, err - } - return newArrayDecoder(decoder, elem, typ.Len()), nil -} - -func (d *Decoder) compileMap(typ *rtype) (decoder, error) { - keyDec, err := d.compile(typ.Key()) - if err != nil { - return nil, err - } - valueDec, err := d.compile(typ.Elem()) - if err != nil { - return nil, err - } - return newMapDecoder(typ, keyDec, valueDec), nil -} - -func (d *Decoder) compileInterface(typ *rtype) (decoder, error) { - return newInterfaceDecoder(typ), nil -} - -func (d *Decoder) getTag(field reflect.StructField) string { - return field.Tag.Get("json") -} - -func (d *Decoder) isIgnoredStructField(field reflect.StructField) bool { - if field.PkgPath != "" && !field.Anonymous { - // private field - return true - } - tag := d.getTag(field) - if tag == "-" { - return true - } - return false -} - -func (d *Decoder) compileStruct(typ *rtype) (decoder, error) { - fieldNum := typ.NumField() - fieldMap := map[string]*structFieldSet{} - for i := 0; i < fieldNum; i++ { - field := typ.Field(i) - if d.isIgnoredStructField(field) { +func (d *Decoder) More() bool { + s := d.s + for { + switch s.char() { + case ' ', '\n', '\r', '\t': + s.cursor++ continue - } - keyName := field.Name - tag := d.getTag(field) - opts := strings.Split(tag, ",") - if len(opts) > 0 { - if opts[0] != "" { - keyName = opts[0] + case '}', ']': + return false + case nul: + if s.read() { + continue } + return false } - dec, err := d.compile(type2rtype(field.Type)) - if err != nil { - return nil, err - } - fieldSet := &structFieldSet{dec: dec, offset: field.Offset} - fieldMap[field.Name] = fieldSet - fieldMap[keyName] = fieldSet - fieldMap[strings.ToLower(keyName)] = fieldSet + break } - return newStructDecoder(fieldMap), nil + return true +} + +func (d *Decoder) Token() (Token, error) { + s := d.s + for { + c := s.char() + switch c { + case ' ', '\n', '\r', '\t': + s.cursor++ + case '{', '[', ']', '}': + s.cursor++ + return Delim(c), nil + case ',', ':': + s.cursor++ + case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + bytes := floatBytes(s) + s := *(*string)(unsafe.Pointer(&bytes)) + f64, err := strconv.ParseFloat(s, 64) + if err != nil { + return nil, err + } + return f64, nil + case '"': + bytes, err := stringBytes(s) + if err != nil { + return nil, err + } + return string(bytes), nil + case 't': + if err := trueBytes(s); err != nil { + return nil, err + } + return true, nil + case 'f': + if err := falseBytes(s); err != nil { + return nil, err + } + return false, nil + case 'n': + if err := nullBytes(s); err != nil { + return nil, err + } + return nil, nil + case nul: + if s.read() { + continue + } + return nil, io.EOF + default: + return nil, errInvalidCharacter(s.char(), "token", s.totalOffset()) + } + } + return nil, io.EOF } // DisallowUnknownFields causes the Decoder to return an error when the destination @@ -397,15 +252,7 @@ func (d *Decoder) DisallowUnknownFields() { } func (d *Decoder) InputOffset() int64 { - return 0 -} - -func (d *Decoder) More() bool { - return false -} - -func (d *Decoder) Token() (Token, error) { - return nil, nil + return d.s.totalOffset() } // UseNumber causes the Decoder to unmarshal a number into an interface{} as a diff --git a/decode_array.go b/decode_array.go index f4e3555..7e34d85 100644 --- a/decode_array.go +++ b/decode_array.go @@ -16,6 +16,47 @@ func newArrayDecoder(dec decoder, elemType *rtype, alen int) *arrayDecoder { } } +func (d *arrayDecoder) decodeStream(s *stream, p uintptr) error { + for { + switch s.char() { + case ' ', '\n', '\t', '\r': + case '[': + idx := 0 + for { + s.cursor++ + if err := d.valueDecoder.decodeStream(s, p+uintptr(idx)*d.size); err != nil { + return err + } + s.skipWhiteSpace() + switch s.char() { + case ']': + s.cursor++ + return nil + case ',': + idx++ + case nul: + if s.read() { + continue + } + goto ERROR + default: + goto ERROR + } + } + case nul: + if s.read() { + continue + } + goto ERROR + default: + goto ERROR + } + s.cursor++ + } +ERROR: + return errUnexpectedEndOfJSON("array", s.totalOffset()) +} + func (d *arrayDecoder) decode(buf []byte, cursor int64, p uintptr) (int64, error) { buflen := int64(len(buf)) for ; cursor < buflen; cursor++ { diff --git a/decode_bool.go b/decode_bool.go index 583bad5..de1edc5 100644 --- a/decode_bool.go +++ b/decode_bool.go @@ -10,6 +10,82 @@ func newBoolDecoder() *boolDecoder { return &boolDecoder{} } +func trueBytes(s *stream) error { + if s.cursor+3 >= s.length { + if !s.read() { + return errInvalidCharacter(s.char(), "bool(true)", s.totalOffset()) + } + } + s.cursor++ + if s.char() != 'r' { + return errInvalidCharacter(s.char(), "bool(true)", s.totalOffset()) + } + s.cursor++ + if s.char() != 'u' { + return errInvalidCharacter(s.char(), "bool(true)", s.totalOffset()) + } + s.cursor++ + if s.char() != 'e' { + return errInvalidCharacter(s.char(), "bool(true)", s.totalOffset()) + } + s.cursor++ + return nil +} + +func falseBytes(s *stream) error { + if s.cursor+4 >= s.length { + if s.read() { + return errInvalidCharacter(s.char(), "bool(false)", s.totalOffset()) + } + } + s.cursor++ + if s.char() != 'a' { + return errInvalidCharacter(s.char(), "bool(false)", s.totalOffset()) + } + s.cursor++ + if s.char() != 'l' { + return errInvalidCharacter(s.char(), "bool(false)", s.totalOffset()) + } + s.cursor++ + if s.char() != 's' { + return errInvalidCharacter(s.char(), "bool(false)", s.totalOffset()) + } + s.cursor++ + if s.char() != 'e' { + return errInvalidCharacter(s.char(), "bool(false)", s.totalOffset()) + } + s.cursor++ + return nil +} + +func (d *boolDecoder) decodeStream(s *stream, p uintptr) error { + s.skipWhiteSpace() + for { + switch s.char() { + case 't': + if err := trueBytes(s); err != nil { + return err + } + *(*bool)(unsafe.Pointer(p)) = true + return nil + case 'f': + if err := falseBytes(s); err != nil { + return err + } + *(*bool)(unsafe.Pointer(p)) = false + return nil + case nul: + if s.read() { + continue + } + goto ERROR + } + break + } +ERROR: + return errUnexpectedEndOfJSON("bool", s.totalOffset()) +} + func (d *boolDecoder) decode(buf []byte, cursor int64, p uintptr) (int64, error) { buflen := int64(len(buf)) cursor = skipWhiteSpace(buf, cursor) @@ -29,6 +105,7 @@ func (d *boolDecoder) decode(buf []byte, cursor int64, p uintptr) (int64, error) } cursor += 4 *(*bool)(unsafe.Pointer(p)) = true + return cursor, nil case 'f': if cursor+4 >= buflen { return 0, errUnexpectedEndOfJSON("bool(false)", cursor) @@ -47,6 +124,7 @@ func (d *boolDecoder) decode(buf []byte, cursor int64, p uintptr) (int64, error) } cursor += 5 *(*bool)(unsafe.Pointer(p)) = false + return cursor, nil } - return cursor, nil + return 0, errUnexpectedEndOfJSON("bool", cursor) } diff --git a/decode_compile.go b/decode_compile.go new file mode 100644 index 0000000..d34b485 --- /dev/null +++ b/decode_compile.go @@ -0,0 +1,233 @@ +package json + +import ( + "reflect" + "strings" + "unsafe" +) + +func (d *Decoder) compileHead(typ *rtype) (decoder, error) { + if typ.Implements(unmarshalJSONType) { + return newUnmarshalJSONDecoder(typ), nil + } else if typ.Implements(unmarshalTextType) { + return newUnmarshalTextDecoder(typ), nil + } + return d.compile(typ.Elem()) +} + +func (d *Decoder) compile(typ *rtype) (decoder, error) { + if typ.Implements(unmarshalJSONType) { + return newUnmarshalJSONDecoder(typ), nil + } else if typ.Implements(unmarshalTextType) { + return newUnmarshalTextDecoder(typ), nil + } + switch typ.Kind() { + case reflect.Ptr: + return d.compilePtr(typ) + case reflect.Struct: + return d.compileStruct(typ) + case reflect.Slice: + return d.compileSlice(typ) + case reflect.Array: + return d.compileArray(typ) + case reflect.Map: + return d.compileMap(typ) + case reflect.Interface: + return d.compileInterface(typ) + case reflect.Int: + return d.compileInt() + case reflect.Int8: + return d.compileInt8() + case reflect.Int16: + return d.compileInt16() + case reflect.Int32: + return d.compileInt32() + case reflect.Int64: + return d.compileInt64() + case reflect.Uint: + return d.compileUint() + case reflect.Uint8: + return d.compileUint8() + case reflect.Uint16: + return d.compileUint16() + case reflect.Uint32: + return d.compileUint32() + case reflect.Uint64: + return d.compileUint64() + case reflect.String: + return d.compileString() + case reflect.Bool: + return d.compileBool() + case reflect.Float32: + return d.compileFloat32() + case reflect.Float64: + return d.compileFloat64() + } + return nil, &UnsupportedTypeError{Type: rtype2type(typ)} +} + +func (d *Decoder) compilePtr(typ *rtype) (decoder, error) { + dec, err := d.compile(typ.Elem()) + if err != nil { + return nil, err + } + return newPtrDecoder(dec, typ.Elem()), nil +} + +func (d *Decoder) compileInt() (decoder, error) { + return newIntDecoder(func(p uintptr, v int64) { + *(*int)(unsafe.Pointer(p)) = int(v) + }), nil +} + +func (d *Decoder) compileInt8() (decoder, error) { + return newIntDecoder(func(p uintptr, v int64) { + *(*int8)(unsafe.Pointer(p)) = int8(v) + }), nil +} + +func (d *Decoder) compileInt16() (decoder, error) { + return newIntDecoder(func(p uintptr, v int64) { + *(*int16)(unsafe.Pointer(p)) = int16(v) + }), nil +} + +func (d *Decoder) compileInt32() (decoder, error) { + return newIntDecoder(func(p uintptr, v int64) { + *(*int32)(unsafe.Pointer(p)) = int32(v) + }), nil +} + +func (d *Decoder) compileInt64() (decoder, error) { + return newIntDecoder(func(p uintptr, v int64) { + *(*int64)(unsafe.Pointer(p)) = v + }), nil +} + +func (d *Decoder) compileUint() (decoder, error) { + return newUintDecoder(func(p uintptr, v uint64) { + *(*uint)(unsafe.Pointer(p)) = uint(v) + }), nil +} + +func (d *Decoder) compileUint8() (decoder, error) { + return newUintDecoder(func(p uintptr, v uint64) { + *(*uint8)(unsafe.Pointer(p)) = uint8(v) + }), nil +} + +func (d *Decoder) compileUint16() (decoder, error) { + return newUintDecoder(func(p uintptr, v uint64) { + *(*uint16)(unsafe.Pointer(p)) = uint16(v) + }), nil +} + +func (d *Decoder) compileUint32() (decoder, error) { + return newUintDecoder(func(p uintptr, v uint64) { + *(*uint32)(unsafe.Pointer(p)) = uint32(v) + }), nil +} + +func (d *Decoder) compileUint64() (decoder, error) { + return newUintDecoder(func(p uintptr, v uint64) { + *(*uint64)(unsafe.Pointer(p)) = v + }), nil +} + +func (d *Decoder) compileFloat32() (decoder, error) { + return newFloatDecoder(func(p uintptr, v float64) { + *(*float32)(unsafe.Pointer(p)) = float32(v) + }), nil +} + +func (d *Decoder) compileFloat64() (decoder, error) { + return newFloatDecoder(func(p uintptr, v float64) { + *(*float64)(unsafe.Pointer(p)) = v + }), nil +} + +func (d *Decoder) compileString() (decoder, error) { + return newStringDecoder(), nil +} + +func (d *Decoder) compileBool() (decoder, error) { + return newBoolDecoder(), nil +} + +func (d *Decoder) compileSlice(typ *rtype) (decoder, error) { + elem := typ.Elem() + decoder, err := d.compile(elem) + if err != nil { + return nil, err + } + return newSliceDecoder(decoder, elem, elem.Size()), nil +} + +func (d *Decoder) compileArray(typ *rtype) (decoder, error) { + elem := typ.Elem() + decoder, err := d.compile(elem) + if err != nil { + return nil, err + } + return newArrayDecoder(decoder, elem, typ.Len()), nil +} + +func (d *Decoder) compileMap(typ *rtype) (decoder, error) { + keyDec, err := d.compile(typ.Key()) + if err != nil { + return nil, err + } + valueDec, err := d.compile(typ.Elem()) + if err != nil { + return nil, err + } + return newMapDecoder(typ, keyDec, valueDec), nil +} + +func (d *Decoder) compileInterface(typ *rtype) (decoder, error) { + return newInterfaceDecoder(typ), nil +} + +func (d *Decoder) getTag(field reflect.StructField) string { + return field.Tag.Get("json") +} + +func (d *Decoder) isIgnoredStructField(field reflect.StructField) bool { + if field.PkgPath != "" && !field.Anonymous { + // private field + return true + } + tag := d.getTag(field) + if tag == "-" { + return true + } + return false +} + +func (d *Decoder) compileStruct(typ *rtype) (decoder, error) { + fieldNum := typ.NumField() + fieldMap := map[string]*structFieldSet{} + for i := 0; i < fieldNum; i++ { + field := typ.Field(i) + if d.isIgnoredStructField(field) { + continue + } + keyName := field.Name + tag := d.getTag(field) + opts := strings.Split(tag, ",") + if len(opts) > 0 { + if opts[0] != "" { + keyName = opts[0] + } + } + dec, err := d.compile(type2rtype(field.Type)) + if err != nil { + return nil, err + } + fieldSet := &structFieldSet{dec: dec, offset: field.Offset} + fieldMap[field.Name] = fieldSet + fieldMap[keyName] = fieldSet + fieldMap[strings.ToLower(keyName)] = fieldSet + } + return newStructDecoder(fieldMap), nil +} diff --git a/decode_context.go b/decode_context.go index 90371c4..dd68530 100644 --- a/decode_context.go +++ b/decode_context.go @@ -72,6 +72,63 @@ func skipValue(buf []byte, cursor int64) (int64, error) { return cursor, nil } continue + case 't': + if cursor+3 >= buflen { + return 0, errUnexpectedEndOfJSON("bool of object", cursor) + } + if buf[cursor+1] != 'r' { + return 0, errUnexpectedEndOfJSON("bool of object", cursor) + } + if buf[cursor+2] != 'u' { + return 0, errUnexpectedEndOfJSON("bool of object", cursor) + } + if buf[cursor+3] != 'e' { + return 0, errUnexpectedEndOfJSON("bool of object", cursor) + } + cursor += 4 + if bracketCount == 0 && braceCount == 0 { + return cursor, nil + } + continue + case 'f': + if cursor+4 >= buflen { + return 0, errUnexpectedEndOfJSON("bool of object", cursor) + } + if buf[cursor+1] != 'a' { + return 0, errUnexpectedEndOfJSON("bool of object", cursor) + } + if buf[cursor+2] != 'l' { + return 0, errUnexpectedEndOfJSON("bool of object", cursor) + } + if buf[cursor+3] != 's' { + return 0, errUnexpectedEndOfJSON("bool of object", cursor) + } + if buf[cursor+4] != 'e' { + return 0, errUnexpectedEndOfJSON("bool of object", cursor) + } + cursor += 5 + if bracketCount == 0 && braceCount == 0 { + return cursor, nil + } + continue + case 'n': + if cursor+3 >= buflen { + return 0, errUnexpectedEndOfJSON("null", cursor) + } + if buf[cursor+1] != 'u' { + return 0, errUnexpectedEndOfJSON("null", cursor) + } + if buf[cursor+2] != 'l' { + return 0, errUnexpectedEndOfJSON("null", cursor) + } + if buf[cursor+3] != 'l' { + return 0, errUnexpectedEndOfJSON("null", cursor) + } + cursor += 4 + if bracketCount == 0 && braceCount == 0 { + return cursor, nil + } + continue } cursor++ } diff --git a/decode_float.go b/decode_float.go index 98b3088..3393a6c 100644 --- a/decode_float.go +++ b/decode_float.go @@ -13,6 +13,60 @@ func newFloatDecoder(op func(uintptr, float64)) *floatDecoder { return &floatDecoder{op: op} } +var floatTable = [256]bool{ + '0': true, + '1': true, + '2': true, + '3': true, + '4': true, + '5': true, + '6': true, + '7': true, + '8': true, + '9': true, + '.': true, + 'e': true, + 'E': true, +} + +func floatBytes(s *stream) []byte { + start := s.cursor + for { + s.cursor++ + if floatTable[s.char()] { + continue + } else if s.char() == nul { + if s.read() { + s.cursor-- // for retry current character + continue + } + } + break + } + return s.buf[start:s.cursor] +} + +func (d *floatDecoder) decodeStreamByte(s *stream) ([]byte, error) { + for { + switch s.char() { + case ' ', '\n', '\t', '\r': + s.cursor++ + continue + case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + return floatBytes(s), nil + case nul: + if s.read() { + continue + } + goto ERROR + default: + goto ERROR + } + } +ERROR: + return nil, errUnexpectedEndOfJSON("float", s.totalOffset()) +} + func (d *floatDecoder) decodeByte(buf []byte, cursor int64) ([]byte, int64, error) { buflen := int64(len(buf)) for ; cursor < buflen; cursor++ { @@ -31,11 +85,27 @@ func (d *floatDecoder) decodeByte(buf []byte, cursor int64) ([]byte, int64, erro } num := buf[start:cursor] return num, cursor, nil + default: + return nil, 0, errUnexpectedEndOfJSON("float", cursor) } } return nil, 0, errUnexpectedEndOfJSON("float", cursor) } +func (d *floatDecoder) decodeStream(s *stream, p uintptr) error { + bytes, err := d.decodeStreamByte(s) + if err != nil { + return err + } + str := *(*string)(unsafe.Pointer(&bytes)) + f64, err := strconv.ParseFloat(str, 64) + if err != nil { + return err + } + d.op(p, f64) + return nil +} + func (d *floatDecoder) decode(buf []byte, cursor int64, p uintptr) (int64, error) { bytes, c, err := d.decodeByte(buf, cursor) if err != nil { diff --git a/decode_int.go b/decode_int.go index af96458..3db1f45 100644 --- a/decode_int.go +++ b/decode_int.go @@ -49,6 +49,62 @@ var ( } ) +func (d *intDecoder) decodeStreamByte(s *stream) ([]byte, error) { + for { + switch s.char() { + case ' ', '\n', '\t', '\r': + s.cursor++ + continue + case '-': + start := s.cursor + for { + s.cursor++ + if numTable[s.char()] { + continue + } else if s.char() == nul { + if s.read() { + s.cursor-- // for retry current character + continue + } + } + break + } + num := s.buf[start:s.cursor] + s.reset() + if len(num) < 2 { + goto ERROR + } + return num, nil + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + start := s.cursor + for { + s.cursor++ + if numTable[s.char()] { + continue + } else if s.char() == nul { + if s.read() { + s.cursor-- // for retry current character + continue + } + } + break + } + num := s.buf[start:s.cursor] + s.reset() + return num, nil + case nul: + if s.read() { + continue + } + goto ERROR + default: + goto ERROR + } + } +ERROR: + return nil, errUnexpectedEndOfJSON("number(integer)", s.totalOffset()) +} + func (d *intDecoder) decodeByte(buf []byte, cursor int64) ([]byte, int64, error) { for { switch buf[cursor] { @@ -72,6 +128,15 @@ func (d *intDecoder) decodeByte(buf []byte, cursor int64) ([]byte, int64, error) return nil, 0, errUnexpectedEndOfJSON("number(integer)", cursor) } +func (d *intDecoder) decodeStream(s *stream, p uintptr) error { + bytes, err := d.decodeStreamByte(s) + if err != nil { + return err + } + d.op(p, d.parseInt(bytes)) + return nil +} + func (d *intDecoder) decode(buf []byte, cursor int64, p uintptr) (int64, error) { bytes, c, err := d.decodeByte(buf, cursor) if err != nil { diff --git a/decode_interface.go b/decode_interface.go index a0d94b3..e6af61c 100644 --- a/decode_interface.go +++ b/decode_interface.go @@ -20,6 +20,83 @@ var ( ) ) +func (d *interfaceDecoder) decodeStream(s *stream, p uintptr) error { + s.skipWhiteSpace() + for { + switch s.char() { + case '{': + var v map[interface{}]interface{} + ptr := unsafe.Pointer(&v) + d.dummy = ptr + dec := newMapDecoder(interfaceMapType, newInterfaceDecoder(d.typ), newInterfaceDecoder(d.typ)) + if err := dec.decodeStream(s, uintptr(ptr)); err != nil { + return err + } + *(*interface{})(unsafe.Pointer(p)) = v + return nil + case '[': + var v []interface{} + ptr := unsafe.Pointer(&v) + d.dummy = ptr // escape ptr + dec := newSliceDecoder(newInterfaceDecoder(d.typ), d.typ, d.typ.Size()) + if err := dec.decodeStream(s, uintptr(ptr)); err != nil { + return err + } + *(*interface{})(unsafe.Pointer(p)) = v + return nil + case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + return newFloatDecoder(func(p uintptr, v float64) { + *(*interface{})(unsafe.Pointer(p)) = v + }).decodeStream(s, p) + case '"': + s.cursor++ + start := s.cursor + for { + switch s.char() { + case '\\': + s.cursor++ + case '"': + literal := s.buf[start:s.cursor] + s.cursor++ + *(*interface{})(unsafe.Pointer(p)) = *(*string)(unsafe.Pointer(&literal)) + return nil + case nul: + if s.read() { + continue + } + return errUnexpectedEndOfJSON("string", s.totalOffset()) + } + s.cursor++ + } + return errUnexpectedEndOfJSON("string", s.totalOffset()) + case 't': + if err := trueBytes(s); err != nil { + return err + } + *(*interface{})(unsafe.Pointer(p)) = true + return nil + case 'f': + if err := falseBytes(s); err != nil { + return err + } + *(*interface{})(unsafe.Pointer(p)) = false + return nil + case 'n': + if err := nullBytes(s); err != nil { + return err + } + *(*interface{})(unsafe.Pointer(p)) = nil + return nil + case nul: + if s.read() { + continue + } + } + break + } + return errNotAtBeginningOfValue(s.totalOffset()) +} + func (d *interfaceDecoder) decode(buf []byte, cursor int64, p uintptr) (int64, error) { cursor = skipWhiteSpace(buf, cursor) switch buf[cursor] { diff --git a/decode_map.go b/decode_map.go index 18b5119..2ae99fb 100644 --- a/decode_map.go +++ b/decode_map.go @@ -35,6 +35,59 @@ func (d *mapDecoder) setValue(buf []byte, cursor int64, key interface{}) (int64, return d.valueDecoder.decode(buf, cursor, uintptr(header.ptr)) } +func (d *mapDecoder) setKeyStream(s *stream, key interface{}) error { + header := (*interfaceHeader)(unsafe.Pointer(&key)) + return d.keyDecoder.decodeStream(s, uintptr(header.ptr)) +} + +func (d *mapDecoder) setValueStream(s *stream, key interface{}) error { + header := (*interfaceHeader)(unsafe.Pointer(&key)) + return d.valueDecoder.decodeStream(s, uintptr(header.ptr)) +} + +func (d *mapDecoder) decodeStream(s *stream, p uintptr) error { + s.skipWhiteSpace() + if s.char() != '{' { + return errExpected("{ character for map value", s.totalOffset()) + } + mapValue := makemap(d.mapType, 0) + for { + s.cursor++ + var key interface{} + if err := d.setKeyStream(s, &key); err != nil { + return err + } + s.skipWhiteSpace() + if s.char() == nul { + s.read() + } + if s.char() != ':' { + return errExpected("colon after object key", s.totalOffset()) + } + s.cursor++ + if s.end() { + return errUnexpectedEndOfJSON("map", s.totalOffset()) + } + var value interface{} + if err := d.setValueStream(s, &value); err != nil { + return err + } + mapassign(d.mapType, mapValue, unsafe.Pointer(&key), unsafe.Pointer(&value)) + s.skipWhiteSpace() + if s.char() == nul { + s.read() + } + if s.char() == '}' { + *(*unsafe.Pointer)(unsafe.Pointer(p)) = mapValue + return nil + } + if s.char() != ',' { + return errExpected("semicolon after object value", s.totalOffset()) + } + } + return nil +} + func (d *mapDecoder) decode(buf []byte, cursor int64, p uintptr) (int64, error) { cursor = skipWhiteSpace(buf, cursor) buflen := int64(len(buf)) diff --git a/decode_ptr.go b/decode_ptr.go index 6135872..4ce625d 100644 --- a/decode_ptr.go +++ b/decode_ptr.go @@ -16,6 +16,15 @@ func newPtrDecoder(dec decoder, typ *rtype) *ptrDecoder { //go:linkname unsafe_New reflect.unsafe_New func unsafe_New(*rtype) uintptr +func (d *ptrDecoder) decodeStream(s *stream, p uintptr) error { + newptr := unsafe_New(d.typ) + if err := d.dec.decodeStream(s, newptr); err != nil { + return err + } + *(*uintptr)(unsafe.Pointer(p)) = newptr + return nil +} + func (d *ptrDecoder) decode(buf []byte, cursor int64, p uintptr) (int64, error) { newptr := unsafe_New(d.typ) c, err := d.dec.decode(buf, cursor, newptr) diff --git a/decode_slice.go b/decode_slice.go index ce14341..34ffad8 100644 --- a/decode_slice.go +++ b/decode_slice.go @@ -56,6 +56,80 @@ func copySlice(elemType *rtype, dst, src reflect.SliceHeader) int //go:linkname newArray reflect.unsafe_NewArray func newArray(*rtype, int) unsafe.Pointer +func (d *sliceDecoder) decodeStream(s *stream, p uintptr) error { + for { + switch s.char() { + case ' ', '\n', '\t', '\r': + s.cursor++ + continue + case '[': + idx := 0 + slice := d.newSlice() + cap := slice.cap + data := slice.data + for { + s.cursor++ + if cap <= idx { + src := reflect.SliceHeader{Data: uintptr(data), Len: idx, Cap: cap} + cap *= 2 + data = newArray(d.elemType, cap) + dst := reflect.SliceHeader{Data: uintptr(data), Len: idx, Cap: cap} + copySlice(d.elemType, dst, src) + } + if err := d.valueDecoder.decodeStream(s, uintptr(data)+uintptr(idx)*d.size); err != nil { + return err + } + s.skipWhiteSpace() + RETRY: + switch s.char() { + case ']': + slice.cap = cap + slice.len = idx + 1 + slice.data = data + dstCap := idx + 1 + dst := reflect.SliceHeader{ + Data: uintptr(newArray(d.elemType, dstCap)), + Len: idx + 1, + Cap: dstCap, + } + copySlice(d.elemType, dst, reflect.SliceHeader{ + Data: uintptr(slice.data), + Len: slice.len, + Cap: slice.cap, + }) + *(*reflect.SliceHeader)(unsafe.Pointer(p)) = dst + d.releaseSlice(slice) + s.cursor++ + return nil + case ',': + idx++ + continue + case nul: + if s.read() { + goto RETRY + } + slice.cap = cap + slice.data = data + d.releaseSlice(slice) + goto ERROR + default: + slice.cap = cap + slice.data = data + d.releaseSlice(slice) + goto ERROR + } + } + case nul: + if s.read() { + continue + } + goto ERROR + } + } +ERROR: + return errUnexpectedEndOfJSON("slice", s.totalOffset()) +} + func (d *sliceDecoder) decode(buf []byte, cursor int64, p uintptr) (int64, error) { buflen := int64(len(buf)) for ; cursor < buflen; cursor++ { diff --git a/decode_stream.go b/decode_stream.go new file mode 100644 index 0000000..e84b0b8 --- /dev/null +++ b/decode_stream.go @@ -0,0 +1,193 @@ +package json + +import ( + "bytes" + "io" +) + +const ( + readChunkSize = 512 +) + +type stream struct { + buf []byte + length int64 + r io.Reader + offset int64 + cursor int64 + allRead bool +} + +func (s *stream) buffered() io.Reader { + return bytes.NewReader(s.buf[s.cursor:]) +} + +func (s *stream) totalOffset() int64 { + return s.offset + s.cursor +} + +func (s *stream) prevChar() byte { + return s.buf[s.cursor-1] +} + +func (s *stream) char() byte { + return s.buf[s.cursor] +} + +func (s *stream) end() bool { + return s.allRead && s.length <= s.cursor +} + +func (s *stream) progressN(n int64) bool { + if s.cursor+n < s.length-1 || s.read() { + s.cursor += n + return true + } + s.cursor = s.length + return false +} + +func (s *stream) reset() { + s.buf = s.buf[s.cursor:] + s.length -= s.cursor + s.cursor = 0 +} + +func (s *stream) read() bool { + if s.allRead { + return false + } + buf := make([]byte, readChunkSize) + n, err := s.r.Read(buf) + if err != nil && err != io.EOF { + return false + } + if n < readChunkSize || err == io.EOF { + s.allRead = true + } + totalSize := s.length + int64(n) + 1 + if totalSize > readChunkSize { + newBuf := make([]byte, totalSize) + copy(newBuf, s.buf) + copy(newBuf[s.length:], buf) + s.buf = newBuf + s.length = totalSize - 1 + } else if s.length > 0 { + copy(buf[s.length:], buf) + copy(buf, s.buf[:s.length]) + s.buf = buf + s.length = totalSize - 1 + } else { + s.buf = buf + s.length = totalSize - 1 + } + s.offset += s.cursor + if n == 0 { + return false + } + return true +} + +func (s *stream) skipWhiteSpace() { +LOOP: + if isWhiteSpace[s.char()] { + s.cursor++ + goto LOOP + } else if s.char() == nul { + if s.read() { + goto LOOP + } + } +} + +func (s *stream) skipValue() error { + s.skipWhiteSpace() + braceCount := 0 + bracketCount := 0 + for { + switch s.char() { + case nul: + if s.read() { + continue + } + return errUnexpectedEndOfJSON("value of object", s.totalOffset()) + case '{': + braceCount++ + case '[': + bracketCount++ + case '}': + braceCount-- + if braceCount == -1 && bracketCount == 0 { + return nil + } + case ']': + bracketCount-- + case ',': + if bracketCount == 0 && braceCount == 0 { + return nil + } + case '"': + for { + s.cursor++ + if s.char() == nul { + if !s.read() { + return errUnexpectedEndOfJSON("value of string", s.totalOffset()) + } + } + if s.char() != '"' { + continue + } + if s.prevChar() == '\\' { + continue + } + if bracketCount == 0 && braceCount == 0 { + s.cursor++ + return nil + } + break + } + case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + for { + s.cursor++ + if floatTable[s.char()] { + continue + } else if s.char() == nul { + if s.read() { + continue + } + } + break + } + if bracketCount == 0 && braceCount == 0 { + return nil + } + continue + case 't': + if err := trueBytes(s); err != nil { + return err + } + if bracketCount == 0 && braceCount == 0 { + return nil + } + continue + case 'f': + if err := falseBytes(s); err != nil { + return err + } + if bracketCount == 0 && braceCount == 0 { + return nil + } + continue + case 'n': + if err := nullBytes(s); err != nil { + return err + } + if bracketCount == 0 && braceCount == 0 { + return nil + } + continue + } + s.cursor++ + } + return errUnexpectedEndOfJSON("value of object", s.offset) +} diff --git a/decode_string.go b/decode_string.go index 7b0aec0..2c08153 100644 --- a/decode_string.go +++ b/decode_string.go @@ -11,6 +11,15 @@ func newStringDecoder() *stringDecoder { return &stringDecoder{} } +func (d *stringDecoder) decodeStream(s *stream, p uintptr) error { + bytes, err := d.decodeStreamByte(s) + if err != nil { + return err + } + *(*string)(unsafe.Pointer(p)) = *(*string)(unsafe.Pointer(&bytes)) + return nil +} + func (d *stringDecoder) decode(buf []byte, cursor int64, p uintptr) (int64, error) { bytes, c, err := d.decodeByte(buf, cursor) if err != nil { @@ -21,6 +30,75 @@ func (d *stringDecoder) decode(buf []byte, cursor int64, p uintptr) (int64, erro return cursor, nil } +func stringBytes(s *stream) ([]byte, error) { + s.cursor++ + start := s.cursor + for { + switch s.char() { + case '\\': + s.cursor++ + case '"': + literal := s.buf[start:s.cursor] + s.cursor++ + s.reset() + return literal, nil + case nul: + if s.read() { + continue + } + goto ERROR + } + s.cursor++ + } +ERROR: + return nil, errUnexpectedEndOfJSON("string", s.totalOffset()) +} + +func nullBytes(s *stream) error { + if s.cursor+3 >= s.length { + if !s.read() { + return errInvalidCharacter(s.char(), "null", s.totalOffset()) + } + } + s.cursor++ + if s.char() != 'u' { + return errInvalidCharacter(s.char(), "null", s.totalOffset()) + } + s.cursor++ + if s.char() != 'l' { + return errInvalidCharacter(s.char(), "null", s.totalOffset()) + } + s.cursor++ + if s.char() != 'l' { + return errInvalidCharacter(s.char(), "null", s.totalOffset()) + } + s.cursor++ + return nil +} + +func (d *stringDecoder) decodeStreamByte(s *stream) ([]byte, error) { + for { + switch s.char() { + case ' ', '\n', '\t', '\r': + s.cursor++ + continue + case '"': + return stringBytes(s) + case 'n': + if err := nullBytes(s); err != nil { + return nil, err + } + return []byte{}, nil + case nul: + if s.read() { + continue + } + } + break + } + return nil, errNotAtBeginningOfValue(s.totalOffset()) +} + func (d *stringDecoder) decodeByte(buf []byte, cursor int64) ([]byte, int64, error) { for { switch buf[cursor] { @@ -37,7 +115,7 @@ func (d *stringDecoder) decodeByte(buf []byte, cursor int64) ([]byte, int64, err literal := buf[start:cursor] cursor++ return literal, cursor, nil - case '\000': + case nul: return nil, 0, errUnexpectedEndOfJSON("string", cursor) } cursor++ @@ -58,7 +136,7 @@ func (d *stringDecoder) decodeByte(buf []byte, cursor int64) ([]byte, int64, err return nil, 0, errInvalidCharacter(buf[cursor+3], "null", cursor) } cursor += 5 - return []byte{'n', 'u', 'l', 'l'}, cursor, nil + return []byte{}, cursor, nil default: goto ERROR } diff --git a/decode_struct.go b/decode_struct.go index c065211..decbc52 100644 --- a/decode_struct.go +++ b/decode_struct.go @@ -21,6 +21,63 @@ func newStructDecoder(fieldMap map[string]*structFieldSet) *structDecoder { } } +func (d *structDecoder) decodeStream(s *stream, p uintptr) error { + s.skipWhiteSpace() + if s.char() == nul { + s.read() + } + if s.char() != '{' { + return errNotAtBeginningOfValue(s.totalOffset()) + } + s.cursor++ + for { + s.reset() + key, err := d.keyDecoder.decodeStreamByte(s) + if err != nil { + return err + } + s.skipWhiteSpace() + if s.char() == nul { + s.read() + } + if s.char() != ':' { + return errExpected("colon after object key", s.totalOffset()) + } + s.cursor++ + if s.char() == nul { + s.read() + } + if s.end() { + return errExpected("object value after colon", s.totalOffset()) + } + k := *(*string)(unsafe.Pointer(&key)) + field, exists := d.fieldMap[k] + if exists { + if err := field.dec.decodeStream(s, p+field.offset); err != nil { + return err + } + } else { + if err := s.skipValue(); err != nil { + return err + } + } + s.skipWhiteSpace() + if s.char() == nul { + s.read() + } + c := s.char() + if c == '}' { + s.cursor++ + return nil + } + if c != ',' { + return errExpected("comma after object element", s.totalOffset()) + } + s.cursor++ + } + return nil +} + func (d *structDecoder) decode(buf []byte, cursor int64, p uintptr) (int64, error) { buflen := int64(len(buf)) cursor = skipWhiteSpace(buf, cursor) diff --git a/decode_test.go b/decode_test.go index c53569c..f4f7bfb 100644 --- a/decode_test.go +++ b/decode_test.go @@ -3,6 +3,7 @@ package json_test import ( "fmt" "reflect" + "strings" "testing" "github.com/goccy/go-json" @@ -231,3 +232,54 @@ func Test_InvalidUnmarshalError(t *testing.T) { assertEq(t, "invalid unmarshal error", "json: Unmarshal(non-pointer int)", err) }) } + +func Test_Token(t *testing.T) { + dec := json.NewDecoder(strings.NewReader(`{"a": 1, "b": true, "c": [1, "two", null]}`)) + cnt := 0 + for { + if _, err := dec.Token(); err != nil { + break + } + cnt++ + } + if cnt != 12 { + t.Fatal("failed to parse token") + } +} + +func Test_DecodeStream(t *testing.T) { + const stream = ` + [ + {"Name": "Ed", "Text": "Knock knock."}, + {"Name": "Sam", "Text": "Who's there?"}, + {"Name": "Ed", "Text": "Go fmt."}, + {"Name": "Sam", "Text": "Go fmt who?"}, + {"Name": "Ed", "Text": "Go fmt yourself!"} + ] +` + type Message struct { + Name, Text string + } + dec := json.NewDecoder(strings.NewReader(stream)) + + tk, err := dec.Token() + assertErr(t, err) + assertEq(t, "[", fmt.Sprint(tk), "[") + + elem := 0 + // while the array contains values + for dec.More() { + var m Message + // decode an array value (Message) + assertErr(t, dec.Decode(&m)) + if m.Name == "" || m.Text == "" { + t.Fatal("failed to assign value to struct field") + } + elem++ + } + assertEq(t, "decode count", elem, 5) + + tk, err = dec.Token() + assertErr(t, err) + assertEq(t, "]", fmt.Sprint(tk), "]") +} diff --git a/decode_uint.go b/decode_uint.go index 1ba442d..5ab2277 100644 --- a/decode_uint.go +++ b/decode_uint.go @@ -24,6 +24,38 @@ func (d *uintDecoder) parseUint(b []byte) uint64 { return sum } +func (d *uintDecoder) decodeStreamByte(s *stream) ([]byte, error) { + for { + switch s.char() { + case ' ', '\n', '\t', '\r': + s.cursor++ + continue + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + start := s.cursor + for { + s.cursor++ + if numTable[s.char()] { + continue + } else if s.char() == nul { + if s.read() { + s.cursor-- // for retry current character + continue + } + } + break + } + num := s.buf[start:s.cursor] + return num, nil + case nul: + if s.read() { + continue + } + } + break + } + return nil, errUnexpectedEndOfJSON("number(unsigned integer)", s.totalOffset()) +} + func (d *uintDecoder) decodeByte(buf []byte, cursor int64) ([]byte, int64, error) { buflen := int64(len(buf)) for ; cursor < buflen; cursor++ { @@ -49,6 +81,15 @@ func (d *uintDecoder) decodeByte(buf []byte, cursor int64) ([]byte, int64, error return nil, 0, errUnexpectedEndOfJSON("number(unsigned integer)", cursor) } +func (d *uintDecoder) decodeStream(s *stream, p uintptr) error { + bytes, err := d.decodeStreamByte(s) + if err != nil { + return err + } + d.op(p, d.parseUint(bytes)) + return nil +} + func (d *uintDecoder) decode(buf []byte, cursor int64, p uintptr) (int64, error) { bytes, c, err := d.decodeByte(buf, cursor) if err != nil { diff --git a/decode_unmarshal_json.go b/decode_unmarshal_json.go index dbd796f..f84f850 100644 --- a/decode_unmarshal_json.go +++ b/decode_unmarshal_json.go @@ -12,6 +12,23 @@ func newUnmarshalJSONDecoder(typ *rtype) *unmarshalJSONDecoder { return &unmarshalJSONDecoder{typ: typ} } +func (d *unmarshalJSONDecoder) decodeStream(s *stream, p uintptr) error { + s.skipWhiteSpace() + start := s.cursor + if err := s.skipValue(); err != nil { + return err + } + src := s.buf[start:s.cursor] + v := *(*interface{})(unsafe.Pointer(&interfaceHeader{ + typ: d.typ, + ptr: unsafe.Pointer(p), + })) + if err := v.(Unmarshaler).UnmarshalJSON(src); err != nil { + return err + } + return nil +} + func (d *unmarshalJSONDecoder) decode(buf []byte, cursor int64, p uintptr) (int64, error) { cursor = skipWhiteSpace(buf, cursor) start := cursor diff --git a/decode_unmarshal_text.go b/decode_unmarshal_text.go index dfcec9a..73ea335 100644 --- a/decode_unmarshal_text.go +++ b/decode_unmarshal_text.go @@ -13,6 +13,23 @@ func newUnmarshalTextDecoder(typ *rtype) *unmarshalTextDecoder { return &unmarshalTextDecoder{typ: typ} } +func (d *unmarshalTextDecoder) decodeStream(s *stream, p uintptr) error { + s.skipWhiteSpace() + start := s.cursor + if err := s.skipValue(); err != nil { + return err + } + src := s.buf[start:s.cursor] + v := *(*interface{})(unsafe.Pointer(&interfaceHeader{ + typ: d.typ, + ptr: unsafe.Pointer(p), + })) + if err := v.(encoding.TextUnmarshaler).UnmarshalText(src); err != nil { + return err + } + return nil +} + func (d *unmarshalTextDecoder) decode(buf []byte, cursor int64, p uintptr) (int64, error) { cursor = skipWhiteSpace(buf, cursor) start := cursor diff --git a/json.go b/json.go index fea6348..a29a061 100644 --- a/json.go +++ b/json.go @@ -264,3 +264,14 @@ func UnmarshalNoEscape(data []byte, v interface{}) error { var dec Decoder return dec.decodeForUnmarshalNoEscape(src, v) } + +// A Token holds a value of one of these types: +// +// Delim, for the four JSON delimiters [ ] { } +// bool, for JSON booleans +// float64, for JSON numbers +// Number, for JSON numbers +// string, for JSON string literals +// nil, for JSON null +// +type Token interface{}