From 80acd42b8049af861ea287246386a73cfcb62699 Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Fri, 31 Jul 2020 17:10:03 +0900 Subject: [PATCH] Optimize streaming decoder --- benchmarks/decode_test.go | 5 +- decode.go | 36 ++++++--- decode_array.go | 23 ++++-- decode_bool.go | 61 +++++++++----- decode_float.go | 19 ++++- decode_int.go | 26 ++++-- decode_interface.go | 162 ++++++++++++++++---------------------- decode_map.go | 11 ++- decode_slice.go | 24 +++++- decode_stream.go | 44 ++++++----- decode_string.go | 37 ++++++--- decode_struct.go | 20 ++++- decode_uint.go | 19 +++-- 13 files changed, 297 insertions(+), 190 deletions(-) diff --git a/benchmarks/decode_test.go b/benchmarks/decode_test.go index 97c9a56..47d01e0 100644 --- a/benchmarks/decode_test.go +++ b/benchmarks/decode_test.go @@ -52,10 +52,11 @@ func Benchmark_Decode_SmallStruct_GoJayUnsafe(b *testing.B) { func Benchmark_Decode_SmallStruct_GoJsonDecode(b *testing.B) { b.ReportAllocs() + reader := bytes.NewReader(SmallFixture) for i := 0; i < b.N; i++ { result := SmallPayload{} - buf := bytes.NewBuffer(SmallFixture) - if err := gojson.NewDecoder(buf).Decode(&result); err != nil { + reader.Reset(SmallFixture) + if err := gojson.NewDecoder(reader).Decode(&result); err != nil { b.Fatal(err) } } diff --git a/decode.go b/decode.go index 2d9ff8e..057533e 100644 --- a/decode.go +++ b/decode.go @@ -49,6 +49,10 @@ func init() { cachedDecoder = decoderMap{} } +const ( + nul = '\000' +) + // NewDecoder returns a new decoder that reads from r. // // The decoder introduces its own buffering and may @@ -115,11 +119,16 @@ func (d *Decoder) prepareForDecode() error { for { switch s.char() { case ' ', '\t', '\r', '\n': - s.progress() + s.cursor++ continue case ',', ':': - s.progress() + s.cursor++ return nil + case nul: + if s.read() { + continue + } + return io.EOF } break } @@ -167,10 +176,14 @@ func (d *Decoder) More() bool { for { switch s.char() { case ' ', '\n', '\r', '\t': - if s.progress() { + s.cursor++ + continue + case '}', ']': + return false + case nul: + if s.read() { continue } - case '}', ']': return false } break @@ -184,16 +197,12 @@ func (d *Decoder) Token() (Token, error) { c := s.char() switch c { case ' ', '\n', '\r', '\t': - if s.progress() { - continue - } + s.cursor++ case '{', '[', ']', '}': - s.progress() + s.cursor++ return Delim(c), nil case ',', ':': - if s.progress() { - continue - } + s.cursor++ case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': bytes := floatBytes(s) s := *(*string)(unsafe.Pointer(&bytes)) @@ -223,7 +232,10 @@ func (d *Decoder) Token() (Token, error) { return nil, err } return nil, nil - case '\000': + case nul: + if s.read() { + continue + } return nil, io.EOF default: return nil, errInvalidCharacter(s.char(), "token", s.totalOffset()) diff --git a/decode_array.go b/decode_array.go index a1b89b8..7e34d85 100644 --- a/decode_array.go +++ b/decode_array.go @@ -23,27 +23,38 @@ func (d *arrayDecoder) decodeStream(s *stream, p uintptr) error { case '[': idx := 0 for { - s.progress() + s.cursor++ if err := d.valueDecoder.decodeStream(s, p+uintptr(idx)*d.size); err != nil { return err } s.skipWhiteSpace() switch s.char() { case ']': - s.progress() + s.cursor++ return nil case ',': idx++ + case nul: + if s.read() { + continue + } + goto ERROR default: - return errInvalidCharacter(s.char(), "array", s.offset) + goto ERROR } } + case nul: + if s.read() { + continue + } + goto ERROR default: - return errUnexpectedEndOfJSON("array", s.offset) + goto ERROR } - s.progress() + s.cursor++ } - return errUnexpectedEndOfJSON("array", s.offset) +ERROR: + return errUnexpectedEndOfJSON("array", s.totalOffset()) } func (d *arrayDecoder) decode(buf []byte, cursor int64, p uintptr) (int64, error) { diff --git a/decode_bool.go b/decode_bool.go index a5dd822..de1edc5 100644 --- a/decode_bool.go +++ b/decode_bool.go @@ -11,59 +11,78 @@ func newBoolDecoder() *boolDecoder { } func trueBytes(s *stream) error { - s.progress() + if s.cursor+3 >= s.length { + if !s.read() { + return errInvalidCharacter(s.char(), "bool(true)", s.totalOffset()) + } + } + s.cursor++ if s.char() != 'r' { return errInvalidCharacter(s.char(), "bool(true)", s.totalOffset()) } - s.progress() + s.cursor++ if s.char() != 'u' { return errInvalidCharacter(s.char(), "bool(true)", s.totalOffset()) } - s.progress() + s.cursor++ if s.char() != 'e' { return errInvalidCharacter(s.char(), "bool(true)", s.totalOffset()) } - s.progress() + s.cursor++ return nil } func falseBytes(s *stream) error { - s.progress() + if s.cursor+4 >= s.length { + if s.read() { + return errInvalidCharacter(s.char(), "bool(false)", s.totalOffset()) + } + } + s.cursor++ if s.char() != 'a' { return errInvalidCharacter(s.char(), "bool(false)", s.totalOffset()) } - s.progress() + s.cursor++ if s.char() != 'l' { return errInvalidCharacter(s.char(), "bool(false)", s.totalOffset()) } - s.progress() + s.cursor++ if s.char() != 's' { return errInvalidCharacter(s.char(), "bool(false)", s.totalOffset()) } - s.progress() + s.cursor++ if s.char() != 'e' { return errInvalidCharacter(s.char(), "bool(false)", s.totalOffset()) } - s.progress() + s.cursor++ return nil } func (d *boolDecoder) decodeStream(s *stream, p uintptr) error { s.skipWhiteSpace() - switch s.char() { - case 't': - if err := trueBytes(s); err != nil { - return err + for { + switch s.char() { + case 't': + if err := trueBytes(s); err != nil { + return err + } + *(*bool)(unsafe.Pointer(p)) = true + return nil + case 'f': + if err := falseBytes(s); err != nil { + return err + } + *(*bool)(unsafe.Pointer(p)) = false + return nil + case nul: + if s.read() { + continue + } + goto ERROR } - *(*bool)(unsafe.Pointer(p)) = true - return nil - case 'f': - if err := falseBytes(s); err != nil { - return err - } - *(*bool)(unsafe.Pointer(p)) = false - return nil + break } +ERROR: return errUnexpectedEndOfJSON("bool", s.totalOffset()) } diff --git a/decode_float.go b/decode_float.go index 6c7d12b..2cdabe2 100644 --- a/decode_float.go +++ b/decode_float.go @@ -31,9 +31,14 @@ var floatTable = [256]bool{ func floatBytes(s *stream) []byte { start := s.cursor - for s.progress() { + for { + s.cursor++ if floatTable[s.char()] { continue + } else if s.char() == nul { + if s.read() { + continue + } } break } @@ -44,15 +49,21 @@ func (d *floatDecoder) decodeStreamByte(s *stream) ([]byte, error) { for { switch s.char() { case ' ', '\n', '\t', '\r': - s.progress() + s.cursor++ continue case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': return floatBytes(s), nil + case nul: + if s.read() { + continue + } + goto ERROR default: - return nil, errUnexpectedEndOfJSON("float", s.offset) + goto ERROR } } - return nil, errUnexpectedEndOfJSON("float", s.offset) +ERROR: + return nil, errUnexpectedEndOfJSON("float", s.totalOffset()) } func (d *floatDecoder) decodeByte(buf []byte, cursor int64) ([]byte, int64, error) { diff --git a/decode_int.go b/decode_int.go index 0c445f8..676490d 100644 --- a/decode_int.go +++ b/decode_int.go @@ -53,37 +53,53 @@ func (d *intDecoder) decodeStreamByte(s *stream) ([]byte, error) { for { switch s.char() { case ' ', '\n', '\t', '\r': - s.progress() + s.cursor++ continue case '-': start := s.cursor - for s.progress() { + for { + s.cursor++ if numTable[s.char()] { continue + } else if s.char() == nul { + if s.read() { + continue + } } break } num := s.buf[start:s.cursor] s.reset() if len(num) < 2 { - return nil, errInvalidCharacter(s.char(), "number(integer)", s.totalOffset()) + goto ERROR } return num, nil case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': start := s.cursor - for s.progress() { + for { + s.cursor++ if numTable[s.char()] { continue + } else if s.char() == nul { + if s.read() { + continue + } } break } num := s.buf[start:s.cursor] s.reset() return num, nil + case nul: + if s.read() { + continue + } + goto ERROR default: - return nil, errInvalidCharacter(s.char(), "number(integer)", s.totalOffset()) + goto ERROR } } +ERROR: return nil, errUnexpectedEndOfJSON("number(integer)", s.totalOffset()) } diff --git a/decode_interface.go b/decode_interface.go index eda9606..e6af61c 100644 --- a/decode_interface.go +++ b/decode_interface.go @@ -22,101 +22,77 @@ var ( func (d *interfaceDecoder) decodeStream(s *stream, p uintptr) error { s.skipWhiteSpace() - switch s.char() { - case '{': - var v map[interface{}]interface{} - ptr := unsafe.Pointer(&v) - d.dummy = ptr - dec := newMapDecoder(interfaceMapType, newInterfaceDecoder(d.typ), newInterfaceDecoder(d.typ)) - if err := dec.decodeStream(s, uintptr(ptr)); err != nil { - return err - } - *(*interface{})(unsafe.Pointer(p)) = v - return nil - case '[': - var v []interface{} - ptr := unsafe.Pointer(&v) - d.dummy = ptr // escape ptr - dec := newSliceDecoder(newInterfaceDecoder(d.typ), d.typ, d.typ.Size()) - if err := dec.decodeStream(s, uintptr(ptr)); err != nil { - return err - } - *(*interface{})(unsafe.Pointer(p)) = v - return nil - case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': - return newFloatDecoder(func(p uintptr, v float64) { - *(*interface{})(unsafe.Pointer(p)) = v - }).decodeStream(s, p) - case '"': - s.progress() - start := s.cursor - for { - switch s.char() { - case '\\': - s.progress() - case '"': - literal := s.buf[start:s.cursor] - s.progress() - *(*interface{})(unsafe.Pointer(p)) = *(*string)(unsafe.Pointer(&literal)) - return nil - case '\000': - return errUnexpectedEndOfJSON("string", s.totalOffset()) + for { + switch s.char() { + case '{': + var v map[interface{}]interface{} + ptr := unsafe.Pointer(&v) + d.dummy = ptr + dec := newMapDecoder(interfaceMapType, newInterfaceDecoder(d.typ), newInterfaceDecoder(d.typ)) + if err := dec.decodeStream(s, uintptr(ptr)); err != nil { + return err + } + *(*interface{})(unsafe.Pointer(p)) = v + return nil + case '[': + var v []interface{} + ptr := unsafe.Pointer(&v) + d.dummy = ptr // escape ptr + dec := newSliceDecoder(newInterfaceDecoder(d.typ), d.typ, d.typ.Size()) + if err := dec.decodeStream(s, uintptr(ptr)); err != nil { + return err + } + *(*interface{})(unsafe.Pointer(p)) = v + return nil + case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + return newFloatDecoder(func(p uintptr, v float64) { + *(*interface{})(unsafe.Pointer(p)) = v + }).decodeStream(s, p) + case '"': + s.cursor++ + start := s.cursor + for { + switch s.char() { + case '\\': + s.cursor++ + case '"': + literal := s.buf[start:s.cursor] + s.cursor++ + *(*interface{})(unsafe.Pointer(p)) = *(*string)(unsafe.Pointer(&literal)) + return nil + case nul: + if s.read() { + continue + } + return errUnexpectedEndOfJSON("string", s.totalOffset()) + } + s.cursor++ + } + return errUnexpectedEndOfJSON("string", s.totalOffset()) + case 't': + if err := trueBytes(s); err != nil { + return err + } + *(*interface{})(unsafe.Pointer(p)) = true + return nil + case 'f': + if err := falseBytes(s); err != nil { + return err + } + *(*interface{})(unsafe.Pointer(p)) = false + return nil + case 'n': + if err := nullBytes(s); err != nil { + return err + } + *(*interface{})(unsafe.Pointer(p)) = nil + return nil + case nul: + if s.read() { + continue } - s.progress() } - return errUnexpectedEndOfJSON("string", s.totalOffset()) - case 't': - s.progress() - if s.char() != 'r' { - return errInvalidCharacter(s.char(), "bool(true)", s.totalOffset()) - } - s.progress() - if s.char() != 'u' { - return errInvalidCharacter(s.char(), "bool(true)", s.totalOffset()) - } - s.progress() - if s.char() != 'e' { - return errInvalidCharacter(s.char(), "bool(true)", s.totalOffset()) - } - s.progress() - *(*interface{})(unsafe.Pointer(p)) = true - return nil - case 'f': - s.progress() - if s.char() != 'a' { - return errInvalidCharacter(s.char(), "bool(false)", s.totalOffset()) - } - s.progress() - if s.char() != 'l' { - return errInvalidCharacter(s.char(), "bool(false)", s.totalOffset()) - } - s.progress() - if s.char() != 's' { - return errInvalidCharacter(s.char(), "bool(false)", s.totalOffset()) - } - s.progress() - if s.char() != 'e' { - return errInvalidCharacter(s.char(), "bool(false)", s.totalOffset()) - } - s.progress() - *(*interface{})(unsafe.Pointer(p)) = false - return nil - case 'n': - s.progress() - if s.char() != 'u' { - return errInvalidCharacter(s.char(), "null", s.totalOffset()) - } - s.progress() - if s.char() != 'l' { - return errInvalidCharacter(s.char(), "null", s.totalOffset()) - } - s.progress() - if s.char() != 'l' { - return errInvalidCharacter(s.char(), "null", s.totalOffset()) - } - s.progress() - *(*interface{})(unsafe.Pointer(p)) = nil - return nil + break } return errNotAtBeginningOfValue(s.totalOffset()) } diff --git a/decode_map.go b/decode_map.go index 239d9fa..2ae99fb 100644 --- a/decode_map.go +++ b/decode_map.go @@ -51,16 +51,20 @@ func (d *mapDecoder) decodeStream(s *stream, p uintptr) error { return errExpected("{ character for map value", s.totalOffset()) } mapValue := makemap(d.mapType, 0) - for s.progress() { + for { + s.cursor++ var key interface{} if err := d.setKeyStream(s, &key); err != nil { return err } s.skipWhiteSpace() + if s.char() == nul { + s.read() + } if s.char() != ':' { return errExpected("colon after object key", s.totalOffset()) } - s.progress() + s.cursor++ if s.end() { return errUnexpectedEndOfJSON("map", s.totalOffset()) } @@ -70,6 +74,9 @@ func (d *mapDecoder) decodeStream(s *stream, p uintptr) error { } mapassign(d.mapType, mapValue, unsafe.Pointer(&key), unsafe.Pointer(&value)) s.skipWhiteSpace() + if s.char() == nul { + s.read() + } if s.char() == '}' { *(*unsafe.Pointer)(unsafe.Pointer(p)) = mapValue return nil diff --git a/decode_slice.go b/decode_slice.go index a76c3ae..8c7f2e6 100644 --- a/decode_slice.go +++ b/decode_slice.go @@ -51,14 +51,15 @@ func (d *sliceDecoder) decodeStream(s *stream, p uintptr) error { for { switch s.char() { case ' ', '\n', '\t', '\r': - s.progress() + s.cursor++ continue case '[': idx := 0 slice := d.newSlice() cap := slice.Cap data := slice.Data - for s.progress() { + for { + s.cursor++ if cap <= idx { src := reflect.SliceHeader{Data: data, Len: idx, Cap: cap} cap *= 2 @@ -70,6 +71,7 @@ func (d *sliceDecoder) decodeStream(s *stream, p uintptr) error { return err } s.skipWhiteSpace() + RETRY: switch s.char() { case ']': slice.Cap = cap @@ -84,20 +86,34 @@ func (d *sliceDecoder) decodeStream(s *stream, p uintptr) error { copySlice(d.elemType, dst, *slice) *(*reflect.SliceHeader)(unsafe.Pointer(p)) = dst d.releaseSlice(slice) - s.progress() + s.cursor++ return nil case ',': idx++ continue + case nul: + if s.read() { + goto RETRY + } + slice.Cap = cap + slice.Data = data + d.releaseSlice(slice) + goto ERROR default: slice.Cap = cap slice.Data = data d.releaseSlice(slice) - return errInvalidCharacter(s.char(), "slice", s.totalOffset()) + goto ERROR } } + case nul: + if s.read() { + continue + } + goto ERROR } } +ERROR: return errUnexpectedEndOfJSON("slice", s.totalOffset()) } diff --git a/decode_stream.go b/decode_stream.go index 522bfa7..fd4ba46 100644 --- a/decode_stream.go +++ b/decode_stream.go @@ -38,15 +38,6 @@ func (s *stream) end() bool { return s.allRead && s.length <= s.cursor } -func (s *stream) progress() bool { - if s.cursor < s.length-1 || s.read() { - s.cursor++ - return true - } - s.cursor = s.length - return false -} - func (s *stream) progressN(n int64) bool { if s.cursor+n < s.length-1 || s.read() { s.cursor += n @@ -94,8 +85,12 @@ func (s *stream) read() bool { func (s *stream) skipWhiteSpace() { LOOP: if isWhiteSpace[s.char()] { - s.progress() + s.cursor++ goto LOOP + } else if s.char() == nul { + if s.read() { + goto LOOP + } } } @@ -105,8 +100,11 @@ func (s *stream) skipValue() error { bracketCount := 0 for { switch s.char() { - case '\000': - return errUnexpectedEndOfJSON("value of object", s.offset) + case nul: + if s.read() { + continue + } + return errUnexpectedEndOfJSON("value of object", s.totalOffset()) case '{': braceCount++ case '[': @@ -123,7 +121,13 @@ func (s *stream) skipValue() error { return nil } case '"': - for s.progress() { + for { + s.cursor++ + if s.char() == nul { + if !s.read() { + return errUnexpectedEndOfJSON("value of string", s.totalOffset()) + } + } if s.char() != '"' { continue } @@ -131,16 +135,20 @@ func (s *stream) skipValue() error { continue } if bracketCount == 0 && braceCount == 0 { - s.progress() + s.cursor++ return nil } break } case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': - for s.progress() { - tk := int(s.char()) - if (int('0') <= tk && tk <= int('9')) || tk == '.' || tk == 'e' || tk == 'E' { + for { + s.cursor++ + if floatTable[s.char()] { continue + } else if s.char() == nul { + if s.read() { + continue + } } break } @@ -149,7 +157,7 @@ func (s *stream) skipValue() error { } continue } - s.progress() + s.cursor++ } return errUnexpectedEndOfJSON("value of object", s.offset) } diff --git a/decode_string.go b/decode_string.go index 0d92acd..4eeb633 100644 --- a/decode_string.go +++ b/decode_string.go @@ -31,40 +31,48 @@ func (d *stringDecoder) decode(buf []byte, cursor int64, p uintptr) (int64, erro } func stringBytes(s *stream) ([]byte, error) { - s.progress() + s.cursor++ start := s.cursor for { switch s.char() { case '\\': - s.progress() + s.cursor++ case '"': literal := s.buf[start:s.cursor] - s.progress() + s.cursor++ s.reset() return literal, nil - case '\000': + case nul: + if s.read() { + continue + } goto ERROR } - s.progress() + s.cursor++ } ERROR: return nil, errUnexpectedEndOfJSON("string", s.totalOffset()) } func nullBytes(s *stream) error { - s.progress() + if s.cursor+3 >= s.length { + if !s.read() { + return errInvalidCharacter(s.char(), "null", s.totalOffset()) + } + } + s.cursor++ if s.char() != 'u' { return errInvalidCharacter(s.char(), "null", s.totalOffset()) } - s.progress() + s.cursor++ if s.char() != 'l' { return errInvalidCharacter(s.char(), "null", s.totalOffset()) } - s.progress() + s.cursor++ if s.char() != 'l' { return errInvalidCharacter(s.char(), "null", s.totalOffset()) } - s.progress() + s.cursor++ return nil } @@ -72,7 +80,8 @@ func (d *stringDecoder) decodeStreamByte(s *stream) ([]byte, error) { for { switch s.char() { case ' ', '\n', '\t', '\r': - s.progress() + s.cursor++ + continue case '"': return stringBytes(s) case 'n': @@ -80,11 +89,13 @@ func (d *stringDecoder) decodeStreamByte(s *stream) ([]byte, error) { return nil, err } return []byte{'n', 'u', 'l', 'l'}, nil - default: - goto ERROR + case nul: + if s.read() { + continue + } } + break } -ERROR: return nil, errNotAtBeginningOfValue(s.totalOffset()) } diff --git a/decode_struct.go b/decode_struct.go index c0e7802..decbc52 100644 --- a/decode_struct.go +++ b/decode_struct.go @@ -23,10 +23,13 @@ func newStructDecoder(fieldMap map[string]*structFieldSet) *structDecoder { func (d *structDecoder) decodeStream(s *stream, p uintptr) error { s.skipWhiteSpace() + if s.char() == nul { + s.read() + } if s.char() != '{' { return errNotAtBeginningOfValue(s.totalOffset()) } - s.progress() + s.cursor++ for { s.reset() key, err := d.keyDecoder.decodeStreamByte(s) @@ -34,10 +37,16 @@ func (d *structDecoder) decodeStream(s *stream, p uintptr) error { return err } s.skipWhiteSpace() + if s.char() == nul { + s.read() + } if s.char() != ':' { return errExpected("colon after object key", s.totalOffset()) } - s.progress() + s.cursor++ + if s.char() == nul { + s.read() + } if s.end() { return errExpected("object value after colon", s.totalOffset()) } @@ -53,15 +62,18 @@ func (d *structDecoder) decodeStream(s *stream, p uintptr) error { } } s.skipWhiteSpace() + if s.char() == nul { + s.read() + } c := s.char() if c == '}' { - s.progress() + s.cursor++ return nil } if c != ',' { return errExpected("comma after object element", s.totalOffset()) } - s.progress() + s.cursor++ } return nil } diff --git a/decode_uint.go b/decode_uint.go index 49418f7..0048e60 100644 --- a/decode_uint.go +++ b/decode_uint.go @@ -28,22 +28,29 @@ func (d *uintDecoder) decodeStreamByte(s *stream) ([]byte, error) { for { switch s.char() { case ' ', '\n', '\t', '\r': - s.progress() + s.cursor++ continue case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': start := s.cursor - for s.progress() { - tk := int(s.char()) - if int('0') <= tk && tk <= int('9') { + for { + s.cursor++ + if numTable[s.char()] { continue + } else if s.char() == nul { + if s.read() { + continue + } } break } num := s.buf[start:s.cursor] return num, nil - default: - return nil, errInvalidCharacter(s.char(), "number(unsigned integer)", s.totalOffset()) + case nul: + if s.read() { + continue + } } + break } return nil, errUnexpectedEndOfJSON("number(unsigned integer)", s.totalOffset()) }