diff --git a/decode.go b/decode.go index 2dbb0ee..a9d261e 100644 --- a/decode.go +++ b/decode.go @@ -1,35 +1,26 @@ package json import ( - "bytes" "encoding" + "fmt" "io" "reflect" - "strings" "sync" "unsafe" ) -// A Token holds a value of one of these types: -// -// Delim, for the four JSON delimiters [ ] { } -// bool, for JSON booleans -// float64, for JSON numbers -// Number, for JSON numbers -// string, for JSON string literals -// nil, for JSON null -// -type Token interface{} - type Delim rune +func (d Delim) String() string { + return string(d) +} + type decoder interface { decode([]byte, int64, uintptr) (int64, error) } type Decoder struct { - r io.Reader - buffered func() io.Reader + s *stream } type decoderMap struct { @@ -62,13 +53,13 @@ func init() { // The decoder introduces its own buffering and may // read data from r beyond the JSON values requested. func NewDecoder(r io.Reader) *Decoder { - return &Decoder{r: r} + return &Decoder{s: &stream{r: r}} } // Buffered returns a reader of the data remaining in the Decoder's // buffer. The reader is valid until the next call to Decode. func (d *Decoder) Buffered() io.Reader { - return d.buffered() + return d.s.buffered() } func (d *Decoder) validateType(typ *rtype, p uintptr) error { @@ -116,6 +107,21 @@ func (d *Decoder) decodeForUnmarshalNoEscape(src []byte, v interface{}) error { return d.decode(src, header) } +func (d *Decoder) prepareForDecode() error { + s := d.s + for ; s.cursor < s.length || s.read(); s.cursor++ { + switch s.char() { + case ' ', '\t', '\r', '\n': + continue + case ',', ':': + s.cursor++ + return nil + } + break + } + return nil +} + // Decode reads the next JSON-encoded value from its // input and stores it in the value pointed to by v. // @@ -142,251 +148,61 @@ func (d *Decoder) Decode(v interface{}) error { cachedDecoder.set(typeptr, compiledDec) dec = compiledDec } - - for { - buf := make([]byte, 1024) - n, err := d.r.Read(buf) - if n == 0 || err == io.EOF { - return nil - } - if err != nil { - return err - } - cursor, err := dec.decode(buf[:n], 0, ptr) - if err != nil { - return err - } - d.buffered = func() io.Reader { - return bytes.NewReader(buf[cursor:]) - } + if err := d.prepareForDecode(); err != nil { + return err + } + s := d.s + cursor, err := dec.decode(s.buf[s.cursor:], 0, ptr) + s.cursor += cursor + fmt.Println("cursor = ", cursor, "next buf = ", string(s.buf[s.cursor:])) + if err != nil { + return err } return nil } -func (d *Decoder) compileHead(typ *rtype) (decoder, error) { - if typ.Implements(unmarshalJSONType) { - return newUnmarshalJSONDecoder(typ), nil - } else if typ.Implements(unmarshalTextType) { - return newUnmarshalTextDecoder(typ), nil - } - return d.compile(typ.Elem()) -} - -func (d *Decoder) compile(typ *rtype) (decoder, error) { - if typ.Implements(unmarshalJSONType) { - return newUnmarshalJSONDecoder(typ), nil - } else if typ.Implements(unmarshalTextType) { - return newUnmarshalTextDecoder(typ), nil - } - switch typ.Kind() { - case reflect.Ptr: - return d.compilePtr(typ) - case reflect.Struct: - return d.compileStruct(typ) - case reflect.Slice: - return d.compileSlice(typ) - case reflect.Array: - return d.compileArray(typ) - case reflect.Map: - return d.compileMap(typ) - case reflect.Interface: - return d.compileInterface(typ) - case reflect.Int: - return d.compileInt() - case reflect.Int8: - return d.compileInt8() - case reflect.Int16: - return d.compileInt16() - case reflect.Int32: - return d.compileInt32() - case reflect.Int64: - return d.compileInt64() - case reflect.Uint: - return d.compileUint() - case reflect.Uint8: - return d.compileUint8() - case reflect.Uint16: - return d.compileUint16() - case reflect.Uint32: - return d.compileUint32() - case reflect.Uint64: - return d.compileUint64() - case reflect.String: - return d.compileString() - case reflect.Bool: - return d.compileBool() - case reflect.Float32: - return d.compileFloat32() - case reflect.Float64: - return d.compileFloat64() - } - return nil, &UnsupportedTypeError{Type: rtype2type(typ)} -} - -func (d *Decoder) compilePtr(typ *rtype) (decoder, error) { - dec, err := d.compile(typ.Elem()) - if err != nil { - return nil, err - } - return newPtrDecoder(dec, typ.Elem()), nil -} - -func (d *Decoder) compileInt() (decoder, error) { - return newIntDecoder(func(p uintptr, v int64) { - *(*int)(unsafe.Pointer(p)) = int(v) - }), nil -} - -func (d *Decoder) compileInt8() (decoder, error) { - return newIntDecoder(func(p uintptr, v int64) { - *(*int8)(unsafe.Pointer(p)) = int8(v) - }), nil -} - -func (d *Decoder) compileInt16() (decoder, error) { - return newIntDecoder(func(p uintptr, v int64) { - *(*int16)(unsafe.Pointer(p)) = int16(v) - }), nil -} - -func (d *Decoder) compileInt32() (decoder, error) { - return newIntDecoder(func(p uintptr, v int64) { - *(*int32)(unsafe.Pointer(p)) = int32(v) - }), nil -} - -func (d *Decoder) compileInt64() (decoder, error) { - return newIntDecoder(func(p uintptr, v int64) { - *(*int64)(unsafe.Pointer(p)) = v - }), nil -} - -func (d *Decoder) compileUint() (decoder, error) { - return newUintDecoder(func(p uintptr, v uint64) { - *(*uint)(unsafe.Pointer(p)) = uint(v) - }), nil -} - -func (d *Decoder) compileUint8() (decoder, error) { - return newUintDecoder(func(p uintptr, v uint64) { - *(*uint8)(unsafe.Pointer(p)) = uint8(v) - }), nil -} - -func (d *Decoder) compileUint16() (decoder, error) { - return newUintDecoder(func(p uintptr, v uint64) { - *(*uint16)(unsafe.Pointer(p)) = uint16(v) - }), nil -} - -func (d *Decoder) compileUint32() (decoder, error) { - return newUintDecoder(func(p uintptr, v uint64) { - *(*uint32)(unsafe.Pointer(p)) = uint32(v) - }), nil -} - -func (d *Decoder) compileUint64() (decoder, error) { - return newUintDecoder(func(p uintptr, v uint64) { - *(*uint64)(unsafe.Pointer(p)) = v - }), nil -} - -func (d *Decoder) compileFloat32() (decoder, error) { - return newFloatDecoder(func(p uintptr, v float64) { - *(*float32)(unsafe.Pointer(p)) = float32(v) - }), nil -} - -func (d *Decoder) compileFloat64() (decoder, error) { - return newFloatDecoder(func(p uintptr, v float64) { - *(*float64)(unsafe.Pointer(p)) = v - }), nil -} - -func (d *Decoder) compileString() (decoder, error) { - return newStringDecoder(), nil -} - -func (d *Decoder) compileBool() (decoder, error) { - return newBoolDecoder(), nil -} - -func (d *Decoder) compileSlice(typ *rtype) (decoder, error) { - elem := typ.Elem() - decoder, err := d.compile(elem) - if err != nil { - return nil, err - } - return newSliceDecoder(decoder, elem, elem.Size()), nil -} - -func (d *Decoder) compileArray(typ *rtype) (decoder, error) { - elem := typ.Elem() - decoder, err := d.compile(elem) - if err != nil { - return nil, err - } - return newArrayDecoder(decoder, elem, typ.Len()), nil -} - -func (d *Decoder) compileMap(typ *rtype) (decoder, error) { - keyDec, err := d.compile(typ.Key()) - if err != nil { - return nil, err - } - valueDec, err := d.compile(typ.Elem()) - if err != nil { - return nil, err - } - return newMapDecoder(typ, keyDec, valueDec), nil -} - -func (d *Decoder) compileInterface(typ *rtype) (decoder, error) { - return newInterfaceDecoder(typ), nil -} - -func (d *Decoder) getTag(field reflect.StructField) string { - return field.Tag.Get("json") -} - -func (d *Decoder) isIgnoredStructField(field reflect.StructField) bool { - if field.PkgPath != "" && !field.Anonymous { - // private field - return true - } - tag := d.getTag(field) - if tag == "-" { - return true - } - return false -} - -func (d *Decoder) compileStruct(typ *rtype) (decoder, error) { - fieldNum := typ.NumField() - fieldMap := map[string]*structFieldSet{} - for i := 0; i < fieldNum; i++ { - field := typ.Field(i) - if d.isIgnoredStructField(field) { +func (d *Decoder) More() bool { + s := d.s + for ; s.cursor < s.length || s.read(); s.cursor++ { + switch s.char() { + case ' ', '\n', '\r', '\t': continue + case '}', ']': + return false } - keyName := field.Name - tag := d.getTag(field) - opts := strings.Split(tag, ",") - if len(opts) > 0 { - if opts[0] != "" { - keyName = opts[0] - } - } - dec, err := d.compile(type2rtype(field.Type)) - if err != nil { - return nil, err - } - fieldSet := &structFieldSet{dec: dec, offset: field.Offset} - fieldMap[field.Name] = fieldSet - fieldMap[keyName] = fieldSet - fieldMap[strings.ToLower(keyName)] = fieldSet + break } - return newStructDecoder(fieldMap), nil + return true +} + +func (d *Decoder) Token() (Token, error) { + s := d.s + for ; s.cursor < s.length || s.read(); s.cursor++ { + switch s.char() { + case ' ', '\n', '\r', '\t': + continue + case '{': + s.cursor++ + return Delim('{'), nil + case '[': + s.cursor++ + return Delim('['), nil + case '}': + s.cursor++ + return Delim('}'), nil + case ']': + s.cursor++ + return Delim(']'), nil + case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + case '"': + case 't': + case 'f': + case 'n': + default: + return nil, errInvalidCharacter(s.char(), "token", s.totalOffset()) + } + } + return nil, io.EOF } // DisallowUnknownFields causes the Decoder to return an error when the destination @@ -400,14 +216,6 @@ func (d *Decoder) InputOffset() int64 { return 0 } -func (d *Decoder) More() bool { - return false -} - -func (d *Decoder) Token() (Token, error) { - return nil, nil -} - // UseNumber causes the Decoder to unmarshal a number into an interface{} as a // Number instead of as a float64. func (d *Decoder) UseNumber() { diff --git a/decode_compile.go b/decode_compile.go new file mode 100644 index 0000000..d34b485 --- /dev/null +++ b/decode_compile.go @@ -0,0 +1,233 @@ +package json + +import ( + "reflect" + "strings" + "unsafe" +) + +func (d *Decoder) compileHead(typ *rtype) (decoder, error) { + if typ.Implements(unmarshalJSONType) { + return newUnmarshalJSONDecoder(typ), nil + } else if typ.Implements(unmarshalTextType) { + return newUnmarshalTextDecoder(typ), nil + } + return d.compile(typ.Elem()) +} + +func (d *Decoder) compile(typ *rtype) (decoder, error) { + if typ.Implements(unmarshalJSONType) { + return newUnmarshalJSONDecoder(typ), nil + } else if typ.Implements(unmarshalTextType) { + return newUnmarshalTextDecoder(typ), nil + } + switch typ.Kind() { + case reflect.Ptr: + return d.compilePtr(typ) + case reflect.Struct: + return d.compileStruct(typ) + case reflect.Slice: + return d.compileSlice(typ) + case reflect.Array: + return d.compileArray(typ) + case reflect.Map: + return d.compileMap(typ) + case reflect.Interface: + return d.compileInterface(typ) + case reflect.Int: + return d.compileInt() + case reflect.Int8: + return d.compileInt8() + case reflect.Int16: + return d.compileInt16() + case reflect.Int32: + return d.compileInt32() + case reflect.Int64: + return d.compileInt64() + case reflect.Uint: + return d.compileUint() + case reflect.Uint8: + return d.compileUint8() + case reflect.Uint16: + return d.compileUint16() + case reflect.Uint32: + return d.compileUint32() + case reflect.Uint64: + return d.compileUint64() + case reflect.String: + return d.compileString() + case reflect.Bool: + return d.compileBool() + case reflect.Float32: + return d.compileFloat32() + case reflect.Float64: + return d.compileFloat64() + } + return nil, &UnsupportedTypeError{Type: rtype2type(typ)} +} + +func (d *Decoder) compilePtr(typ *rtype) (decoder, error) { + dec, err := d.compile(typ.Elem()) + if err != nil { + return nil, err + } + return newPtrDecoder(dec, typ.Elem()), nil +} + +func (d *Decoder) compileInt() (decoder, error) { + return newIntDecoder(func(p uintptr, v int64) { + *(*int)(unsafe.Pointer(p)) = int(v) + }), nil +} + +func (d *Decoder) compileInt8() (decoder, error) { + return newIntDecoder(func(p uintptr, v int64) { + *(*int8)(unsafe.Pointer(p)) = int8(v) + }), nil +} + +func (d *Decoder) compileInt16() (decoder, error) { + return newIntDecoder(func(p uintptr, v int64) { + *(*int16)(unsafe.Pointer(p)) = int16(v) + }), nil +} + +func (d *Decoder) compileInt32() (decoder, error) { + return newIntDecoder(func(p uintptr, v int64) { + *(*int32)(unsafe.Pointer(p)) = int32(v) + }), nil +} + +func (d *Decoder) compileInt64() (decoder, error) { + return newIntDecoder(func(p uintptr, v int64) { + *(*int64)(unsafe.Pointer(p)) = v + }), nil +} + +func (d *Decoder) compileUint() (decoder, error) { + return newUintDecoder(func(p uintptr, v uint64) { + *(*uint)(unsafe.Pointer(p)) = uint(v) + }), nil +} + +func (d *Decoder) compileUint8() (decoder, error) { + return newUintDecoder(func(p uintptr, v uint64) { + *(*uint8)(unsafe.Pointer(p)) = uint8(v) + }), nil +} + +func (d *Decoder) compileUint16() (decoder, error) { + return newUintDecoder(func(p uintptr, v uint64) { + *(*uint16)(unsafe.Pointer(p)) = uint16(v) + }), nil +} + +func (d *Decoder) compileUint32() (decoder, error) { + return newUintDecoder(func(p uintptr, v uint64) { + *(*uint32)(unsafe.Pointer(p)) = uint32(v) + }), nil +} + +func (d *Decoder) compileUint64() (decoder, error) { + return newUintDecoder(func(p uintptr, v uint64) { + *(*uint64)(unsafe.Pointer(p)) = v + }), nil +} + +func (d *Decoder) compileFloat32() (decoder, error) { + return newFloatDecoder(func(p uintptr, v float64) { + *(*float32)(unsafe.Pointer(p)) = float32(v) + }), nil +} + +func (d *Decoder) compileFloat64() (decoder, error) { + return newFloatDecoder(func(p uintptr, v float64) { + *(*float64)(unsafe.Pointer(p)) = v + }), nil +} + +func (d *Decoder) compileString() (decoder, error) { + return newStringDecoder(), nil +} + +func (d *Decoder) compileBool() (decoder, error) { + return newBoolDecoder(), nil +} + +func (d *Decoder) compileSlice(typ *rtype) (decoder, error) { + elem := typ.Elem() + decoder, err := d.compile(elem) + if err != nil { + return nil, err + } + return newSliceDecoder(decoder, elem, elem.Size()), nil +} + +func (d *Decoder) compileArray(typ *rtype) (decoder, error) { + elem := typ.Elem() + decoder, err := d.compile(elem) + if err != nil { + return nil, err + } + return newArrayDecoder(decoder, elem, typ.Len()), nil +} + +func (d *Decoder) compileMap(typ *rtype) (decoder, error) { + keyDec, err := d.compile(typ.Key()) + if err != nil { + return nil, err + } + valueDec, err := d.compile(typ.Elem()) + if err != nil { + return nil, err + } + return newMapDecoder(typ, keyDec, valueDec), nil +} + +func (d *Decoder) compileInterface(typ *rtype) (decoder, error) { + return newInterfaceDecoder(typ), nil +} + +func (d *Decoder) getTag(field reflect.StructField) string { + return field.Tag.Get("json") +} + +func (d *Decoder) isIgnoredStructField(field reflect.StructField) bool { + if field.PkgPath != "" && !field.Anonymous { + // private field + return true + } + tag := d.getTag(field) + if tag == "-" { + return true + } + return false +} + +func (d *Decoder) compileStruct(typ *rtype) (decoder, error) { + fieldNum := typ.NumField() + fieldMap := map[string]*structFieldSet{} + for i := 0; i < fieldNum; i++ { + field := typ.Field(i) + if d.isIgnoredStructField(field) { + continue + } + keyName := field.Name + tag := d.getTag(field) + opts := strings.Split(tag, ",") + if len(opts) > 0 { + if opts[0] != "" { + keyName = opts[0] + } + } + dec, err := d.compile(type2rtype(field.Type)) + if err != nil { + return nil, err + } + fieldSet := &structFieldSet{dec: dec, offset: field.Offset} + fieldMap[field.Name] = fieldSet + fieldMap[keyName] = fieldSet + fieldMap[strings.ToLower(keyName)] = fieldSet + } + return newStructDecoder(fieldMap), nil +} diff --git a/decode_int.go b/decode_int.go index af96458..d512cb8 100644 --- a/decode_int.go +++ b/decode_int.go @@ -49,6 +49,43 @@ var ( } ) +func (d *intDecoder) decodeByteStream(s *stream) ([]byte, error) { + for ; s.cursor < s.length || s.read(); s.cursor++ { + switch s.char() { + case ' ', '\n', '\t', '\r': + continue + case '-': + start := s.cursor + s.cursor++ + for ; s.cursor < s.length || s.read(); s.cursor++ { + if numTable[s.char()] { + continue + } + break + } + num := s.buf[start:s.cursor] + if len(num) < 2 { + return nil, errInvalidCharacter(s.char(), "number(integer)", s.totalOffset()) + } + return num, nil + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + start := s.cursor + s.cursor++ + for ; s.cursor < s.length || s.read(); s.cursor++ { + if numTable[s.char()] { + continue + } + break + } + num := s.buf[start:s.cursor] + return num, nil + default: + return nil, errInvalidCharacter(s.char(), "number(integer)", s.totalOffset()) + } + } + return nil, errUnexpectedEndOfJSON("number(integer)", s.totalOffset()) +} + func (d *intDecoder) decodeByte(buf []byte, cursor int64) ([]byte, int64, error) { for { switch buf[cursor] { @@ -72,6 +109,15 @@ func (d *intDecoder) decodeByte(buf []byte, cursor int64) ([]byte, int64, error) return nil, 0, errUnexpectedEndOfJSON("number(integer)", cursor) } +func (d *intDecoder) decodeStream(s *stream, p uintptr) error { + bytes, err := d.decodeByteStream(s) + if err != nil { + return err + } + d.op(p, d.parseInt(bytes)) + return nil +} + func (d *intDecoder) decode(buf []byte, cursor int64, p uintptr) (int64, error) { bytes, c, err := d.decodeByte(buf, cursor) if err != nil { diff --git a/decode_stream.go b/decode_stream.go new file mode 100644 index 0000000..37e48e5 --- /dev/null +++ b/decode_stream.go @@ -0,0 +1,49 @@ +package json + +import ( + "bytes" + "io" +) + +const ( + readChunkSize = 1024 +) + +type stream struct { + buf []byte + length int64 + r io.Reader + decodedPos int64 + offset int64 + cursor int64 +} + +func (s *stream) buffered() io.Reader { + return bytes.NewReader(s.buf[s.cursor:]) +} + +func (s *stream) totalOffset() int64 { + return s.offset + s.cursor +} + +func (s *stream) char() byte { + return s.buf[s.cursor] +} + +func (s *stream) read() bool { + buf := make([]byte, readChunkSize) + n, err := s.r.Read(buf) + if n == 0 || err == io.EOF { + return false + } + remain := s.length - s.decodedPos + newBuf := make([]byte, remain+int64(n)) + copy(newBuf, s.buf[s.decodedPos:]) + copy(newBuf[remain:], buf) + s.buf = newBuf + s.length = int64(len(newBuf)) + s.offset += s.decodedPos + s.cursor = 0 + s.decodedPos = 0 + return true +} diff --git a/decode_test.go b/decode_test.go index c53569c..58b4cc9 100644 --- a/decode_test.go +++ b/decode_test.go @@ -3,6 +3,7 @@ package json_test import ( "fmt" "reflect" + "strings" "testing" "github.com/goccy/go-json" @@ -231,3 +232,40 @@ func Test_InvalidUnmarshalError(t *testing.T) { assertEq(t, "invalid unmarshal error", "json: Unmarshal(non-pointer int)", err) }) } + +func Test_DecodeStream(t *testing.T) { + const stream = ` + [ + {"Name": "Ed", "Text": "Knock knock."}, + {"Name": "Sam", "Text": "Who's there?"}, + {"Name": "Ed", "Text": "Go fmt."}, + {"Name": "Sam", "Text": "Go fmt who?"}, + {"Name": "Ed", "Text": "Go fmt yourself!"} + ] +` + type Message struct { + Name, Text string + } + dec := json.NewDecoder(strings.NewReader(stream)) + + tk, err := dec.Token() + assertErr(t, err) + assertEq(t, "[", fmt.Sprint(tk), "[") + + elem := 0 + // while the array contains values + for dec.More() { + var m Message + // decode an array value (Message) + assertErr(t, dec.Decode(&m)) + if m.Name == "" || m.Text == "" { + t.Fatal("failed to assign value to struct field") + } + elem++ + } + assertEq(t, "decode count", elem, 5) + + tk, err = dec.Token() + assertErr(t, err) + assertEq(t, "]", fmt.Sprint(tk), "]") +} diff --git a/json.go b/json.go index fea6348..a29a061 100644 --- a/json.go +++ b/json.go @@ -264,3 +264,14 @@ func UnmarshalNoEscape(data []byte, v interface{}) error { var dec Decoder return dec.decodeForUnmarshalNoEscape(src, v) } + +// A Token holds a value of one of these types: +// +// Delim, for the four JSON delimiters [ ] { } +// bool, for JSON booleans +// float64, for JSON numbers +// Number, for JSON numbers +// string, for JSON string literals +// nil, for JSON null +// +type Token interface{}