Optimize streaming decoder

This commit is contained in:
Masaaki Goshima 2020-07-31 17:10:03 +09:00
parent 961b6a202e
commit 80acd42b80
13 changed files with 297 additions and 190 deletions

View File

@ -52,10 +52,11 @@ func Benchmark_Decode_SmallStruct_GoJayUnsafe(b *testing.B) {
func Benchmark_Decode_SmallStruct_GoJsonDecode(b *testing.B) {
b.ReportAllocs()
reader := bytes.NewReader(SmallFixture)
for i := 0; i < b.N; i++ {
result := SmallPayload{}
buf := bytes.NewBuffer(SmallFixture)
if err := gojson.NewDecoder(buf).Decode(&result); err != nil {
reader.Reset(SmallFixture)
if err := gojson.NewDecoder(reader).Decode(&result); err != nil {
b.Fatal(err)
}
}

View File

@ -49,6 +49,10 @@ func init() {
cachedDecoder = decoderMap{}
}
const (
nul = '\000'
)
// NewDecoder returns a new decoder that reads from r.
//
// The decoder introduces its own buffering and may
@ -115,11 +119,16 @@ func (d *Decoder) prepareForDecode() error {
for {
switch s.char() {
case ' ', '\t', '\r', '\n':
s.progress()
s.cursor++
continue
case ',', ':':
s.progress()
s.cursor++
return nil
case nul:
if s.read() {
continue
}
return io.EOF
}
break
}
@ -167,10 +176,14 @@ func (d *Decoder) More() bool {
for {
switch s.char() {
case ' ', '\n', '\r', '\t':
if s.progress() {
s.cursor++
continue
case '}', ']':
return false
case nul:
if s.read() {
continue
}
case '}', ']':
return false
}
break
@ -184,16 +197,12 @@ func (d *Decoder) Token() (Token, error) {
c := s.char()
switch c {
case ' ', '\n', '\r', '\t':
if s.progress() {
continue
}
s.cursor++
case '{', '[', ']', '}':
s.progress()
s.cursor++
return Delim(c), nil
case ',', ':':
if s.progress() {
continue
}
s.cursor++
case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
bytes := floatBytes(s)
s := *(*string)(unsafe.Pointer(&bytes))
@ -223,7 +232,10 @@ func (d *Decoder) Token() (Token, error) {
return nil, err
}
return nil, nil
case '\000':
case nul:
if s.read() {
continue
}
return nil, io.EOF
default:
return nil, errInvalidCharacter(s.char(), "token", s.totalOffset())

View File

@ -23,27 +23,38 @@ func (d *arrayDecoder) decodeStream(s *stream, p uintptr) error {
case '[':
idx := 0
for {
s.progress()
s.cursor++
if err := d.valueDecoder.decodeStream(s, p+uintptr(idx)*d.size); err != nil {
return err
}
s.skipWhiteSpace()
switch s.char() {
case ']':
s.progress()
s.cursor++
return nil
case ',':
idx++
case nul:
if s.read() {
continue
}
goto ERROR
default:
return errInvalidCharacter(s.char(), "array", s.offset)
goto ERROR
}
}
case nul:
if s.read() {
continue
}
goto ERROR
default:
return errUnexpectedEndOfJSON("array", s.offset)
goto ERROR
}
s.progress()
s.cursor++
}
return errUnexpectedEndOfJSON("array", s.offset)
ERROR:
return errUnexpectedEndOfJSON("array", s.totalOffset())
}
func (d *arrayDecoder) decode(buf []byte, cursor int64, p uintptr) (int64, error) {

View File

@ -11,59 +11,78 @@ func newBoolDecoder() *boolDecoder {
}
func trueBytes(s *stream) error {
s.progress()
if s.cursor+3 >= s.length {
if !s.read() {
return errInvalidCharacter(s.char(), "bool(true)", s.totalOffset())
}
}
s.cursor++
if s.char() != 'r' {
return errInvalidCharacter(s.char(), "bool(true)", s.totalOffset())
}
s.progress()
s.cursor++
if s.char() != 'u' {
return errInvalidCharacter(s.char(), "bool(true)", s.totalOffset())
}
s.progress()
s.cursor++
if s.char() != 'e' {
return errInvalidCharacter(s.char(), "bool(true)", s.totalOffset())
}
s.progress()
s.cursor++
return nil
}
func falseBytes(s *stream) error {
s.progress()
if s.cursor+4 >= s.length {
if s.read() {
return errInvalidCharacter(s.char(), "bool(false)", s.totalOffset())
}
}
s.cursor++
if s.char() != 'a' {
return errInvalidCharacter(s.char(), "bool(false)", s.totalOffset())
}
s.progress()
s.cursor++
if s.char() != 'l' {
return errInvalidCharacter(s.char(), "bool(false)", s.totalOffset())
}
s.progress()
s.cursor++
if s.char() != 's' {
return errInvalidCharacter(s.char(), "bool(false)", s.totalOffset())
}
s.progress()
s.cursor++
if s.char() != 'e' {
return errInvalidCharacter(s.char(), "bool(false)", s.totalOffset())
}
s.progress()
s.cursor++
return nil
}
func (d *boolDecoder) decodeStream(s *stream, p uintptr) error {
s.skipWhiteSpace()
switch s.char() {
case 't':
if err := trueBytes(s); err != nil {
return err
for {
switch s.char() {
case 't':
if err := trueBytes(s); err != nil {
return err
}
*(*bool)(unsafe.Pointer(p)) = true
return nil
case 'f':
if err := falseBytes(s); err != nil {
return err
}
*(*bool)(unsafe.Pointer(p)) = false
return nil
case nul:
if s.read() {
continue
}
goto ERROR
}
*(*bool)(unsafe.Pointer(p)) = true
return nil
case 'f':
if err := falseBytes(s); err != nil {
return err
}
*(*bool)(unsafe.Pointer(p)) = false
return nil
break
}
ERROR:
return errUnexpectedEndOfJSON("bool", s.totalOffset())
}

View File

@ -31,9 +31,14 @@ var floatTable = [256]bool{
func floatBytes(s *stream) []byte {
start := s.cursor
for s.progress() {
for {
s.cursor++
if floatTable[s.char()] {
continue
} else if s.char() == nul {
if s.read() {
continue
}
}
break
}
@ -44,15 +49,21 @@ func (d *floatDecoder) decodeStreamByte(s *stream) ([]byte, error) {
for {
switch s.char() {
case ' ', '\n', '\t', '\r':
s.progress()
s.cursor++
continue
case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
return floatBytes(s), nil
case nul:
if s.read() {
continue
}
goto ERROR
default:
return nil, errUnexpectedEndOfJSON("float", s.offset)
goto ERROR
}
}
return nil, errUnexpectedEndOfJSON("float", s.offset)
ERROR:
return nil, errUnexpectedEndOfJSON("float", s.totalOffset())
}
func (d *floatDecoder) decodeByte(buf []byte, cursor int64) ([]byte, int64, error) {

View File

@ -53,37 +53,53 @@ func (d *intDecoder) decodeStreamByte(s *stream) ([]byte, error) {
for {
switch s.char() {
case ' ', '\n', '\t', '\r':
s.progress()
s.cursor++
continue
case '-':
start := s.cursor
for s.progress() {
for {
s.cursor++
if numTable[s.char()] {
continue
} else if s.char() == nul {
if s.read() {
continue
}
}
break
}
num := s.buf[start:s.cursor]
s.reset()
if len(num) < 2 {
return nil, errInvalidCharacter(s.char(), "number(integer)", s.totalOffset())
goto ERROR
}
return num, nil
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
start := s.cursor
for s.progress() {
for {
s.cursor++
if numTable[s.char()] {
continue
} else if s.char() == nul {
if s.read() {
continue
}
}
break
}
num := s.buf[start:s.cursor]
s.reset()
return num, nil
case nul:
if s.read() {
continue
}
goto ERROR
default:
return nil, errInvalidCharacter(s.char(), "number(integer)", s.totalOffset())
goto ERROR
}
}
ERROR:
return nil, errUnexpectedEndOfJSON("number(integer)", s.totalOffset())
}

View File

@ -22,101 +22,77 @@ var (
func (d *interfaceDecoder) decodeStream(s *stream, p uintptr) error {
s.skipWhiteSpace()
switch s.char() {
case '{':
var v map[interface{}]interface{}
ptr := unsafe.Pointer(&v)
d.dummy = ptr
dec := newMapDecoder(interfaceMapType, newInterfaceDecoder(d.typ), newInterfaceDecoder(d.typ))
if err := dec.decodeStream(s, uintptr(ptr)); err != nil {
return err
}
*(*interface{})(unsafe.Pointer(p)) = v
return nil
case '[':
var v []interface{}
ptr := unsafe.Pointer(&v)
d.dummy = ptr // escape ptr
dec := newSliceDecoder(newInterfaceDecoder(d.typ), d.typ, d.typ.Size())
if err := dec.decodeStream(s, uintptr(ptr)); err != nil {
return err
}
*(*interface{})(unsafe.Pointer(p)) = v
return nil
case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
return newFloatDecoder(func(p uintptr, v float64) {
*(*interface{})(unsafe.Pointer(p)) = v
}).decodeStream(s, p)
case '"':
s.progress()
start := s.cursor
for {
switch s.char() {
case '\\':
s.progress()
case '"':
literal := s.buf[start:s.cursor]
s.progress()
*(*interface{})(unsafe.Pointer(p)) = *(*string)(unsafe.Pointer(&literal))
return nil
case '\000':
return errUnexpectedEndOfJSON("string", s.totalOffset())
for {
switch s.char() {
case '{':
var v map[interface{}]interface{}
ptr := unsafe.Pointer(&v)
d.dummy = ptr
dec := newMapDecoder(interfaceMapType, newInterfaceDecoder(d.typ), newInterfaceDecoder(d.typ))
if err := dec.decodeStream(s, uintptr(ptr)); err != nil {
return err
}
*(*interface{})(unsafe.Pointer(p)) = v
return nil
case '[':
var v []interface{}
ptr := unsafe.Pointer(&v)
d.dummy = ptr // escape ptr
dec := newSliceDecoder(newInterfaceDecoder(d.typ), d.typ, d.typ.Size())
if err := dec.decodeStream(s, uintptr(ptr)); err != nil {
return err
}
*(*interface{})(unsafe.Pointer(p)) = v
return nil
case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
return newFloatDecoder(func(p uintptr, v float64) {
*(*interface{})(unsafe.Pointer(p)) = v
}).decodeStream(s, p)
case '"':
s.cursor++
start := s.cursor
for {
switch s.char() {
case '\\':
s.cursor++
case '"':
literal := s.buf[start:s.cursor]
s.cursor++
*(*interface{})(unsafe.Pointer(p)) = *(*string)(unsafe.Pointer(&literal))
return nil
case nul:
if s.read() {
continue
}
return errUnexpectedEndOfJSON("string", s.totalOffset())
}
s.cursor++
}
return errUnexpectedEndOfJSON("string", s.totalOffset())
case 't':
if err := trueBytes(s); err != nil {
return err
}
*(*interface{})(unsafe.Pointer(p)) = true
return nil
case 'f':
if err := falseBytes(s); err != nil {
return err
}
*(*interface{})(unsafe.Pointer(p)) = false
return nil
case 'n':
if err := nullBytes(s); err != nil {
return err
}
*(*interface{})(unsafe.Pointer(p)) = nil
return nil
case nul:
if s.read() {
continue
}
s.progress()
}
return errUnexpectedEndOfJSON("string", s.totalOffset())
case 't':
s.progress()
if s.char() != 'r' {
return errInvalidCharacter(s.char(), "bool(true)", s.totalOffset())
}
s.progress()
if s.char() != 'u' {
return errInvalidCharacter(s.char(), "bool(true)", s.totalOffset())
}
s.progress()
if s.char() != 'e' {
return errInvalidCharacter(s.char(), "bool(true)", s.totalOffset())
}
s.progress()
*(*interface{})(unsafe.Pointer(p)) = true
return nil
case 'f':
s.progress()
if s.char() != 'a' {
return errInvalidCharacter(s.char(), "bool(false)", s.totalOffset())
}
s.progress()
if s.char() != 'l' {
return errInvalidCharacter(s.char(), "bool(false)", s.totalOffset())
}
s.progress()
if s.char() != 's' {
return errInvalidCharacter(s.char(), "bool(false)", s.totalOffset())
}
s.progress()
if s.char() != 'e' {
return errInvalidCharacter(s.char(), "bool(false)", s.totalOffset())
}
s.progress()
*(*interface{})(unsafe.Pointer(p)) = false
return nil
case 'n':
s.progress()
if s.char() != 'u' {
return errInvalidCharacter(s.char(), "null", s.totalOffset())
}
s.progress()
if s.char() != 'l' {
return errInvalidCharacter(s.char(), "null", s.totalOffset())
}
s.progress()
if s.char() != 'l' {
return errInvalidCharacter(s.char(), "null", s.totalOffset())
}
s.progress()
*(*interface{})(unsafe.Pointer(p)) = nil
return nil
break
}
return errNotAtBeginningOfValue(s.totalOffset())
}

View File

@ -51,16 +51,20 @@ func (d *mapDecoder) decodeStream(s *stream, p uintptr) error {
return errExpected("{ character for map value", s.totalOffset())
}
mapValue := makemap(d.mapType, 0)
for s.progress() {
for {
s.cursor++
var key interface{}
if err := d.setKeyStream(s, &key); err != nil {
return err
}
s.skipWhiteSpace()
if s.char() == nul {
s.read()
}
if s.char() != ':' {
return errExpected("colon after object key", s.totalOffset())
}
s.progress()
s.cursor++
if s.end() {
return errUnexpectedEndOfJSON("map", s.totalOffset())
}
@ -70,6 +74,9 @@ func (d *mapDecoder) decodeStream(s *stream, p uintptr) error {
}
mapassign(d.mapType, mapValue, unsafe.Pointer(&key), unsafe.Pointer(&value))
s.skipWhiteSpace()
if s.char() == nul {
s.read()
}
if s.char() == '}' {
*(*unsafe.Pointer)(unsafe.Pointer(p)) = mapValue
return nil

View File

@ -51,14 +51,15 @@ func (d *sliceDecoder) decodeStream(s *stream, p uintptr) error {
for {
switch s.char() {
case ' ', '\n', '\t', '\r':
s.progress()
s.cursor++
continue
case '[':
idx := 0
slice := d.newSlice()
cap := slice.Cap
data := slice.Data
for s.progress() {
for {
s.cursor++
if cap <= idx {
src := reflect.SliceHeader{Data: data, Len: idx, Cap: cap}
cap *= 2
@ -70,6 +71,7 @@ func (d *sliceDecoder) decodeStream(s *stream, p uintptr) error {
return err
}
s.skipWhiteSpace()
RETRY:
switch s.char() {
case ']':
slice.Cap = cap
@ -84,20 +86,34 @@ func (d *sliceDecoder) decodeStream(s *stream, p uintptr) error {
copySlice(d.elemType, dst, *slice)
*(*reflect.SliceHeader)(unsafe.Pointer(p)) = dst
d.releaseSlice(slice)
s.progress()
s.cursor++
return nil
case ',':
idx++
continue
case nul:
if s.read() {
goto RETRY
}
slice.Cap = cap
slice.Data = data
d.releaseSlice(slice)
goto ERROR
default:
slice.Cap = cap
slice.Data = data
d.releaseSlice(slice)
return errInvalidCharacter(s.char(), "slice", s.totalOffset())
goto ERROR
}
}
case nul:
if s.read() {
continue
}
goto ERROR
}
}
ERROR:
return errUnexpectedEndOfJSON("slice", s.totalOffset())
}

View File

@ -38,15 +38,6 @@ func (s *stream) end() bool {
return s.allRead && s.length <= s.cursor
}
func (s *stream) progress() bool {
if s.cursor < s.length-1 || s.read() {
s.cursor++
return true
}
s.cursor = s.length
return false
}
func (s *stream) progressN(n int64) bool {
if s.cursor+n < s.length-1 || s.read() {
s.cursor += n
@ -94,8 +85,12 @@ func (s *stream) read() bool {
func (s *stream) skipWhiteSpace() {
LOOP:
if isWhiteSpace[s.char()] {
s.progress()
s.cursor++
goto LOOP
} else if s.char() == nul {
if s.read() {
goto LOOP
}
}
}
@ -105,8 +100,11 @@ func (s *stream) skipValue() error {
bracketCount := 0
for {
switch s.char() {
case '\000':
return errUnexpectedEndOfJSON("value of object", s.offset)
case nul:
if s.read() {
continue
}
return errUnexpectedEndOfJSON("value of object", s.totalOffset())
case '{':
braceCount++
case '[':
@ -123,7 +121,13 @@ func (s *stream) skipValue() error {
return nil
}
case '"':
for s.progress() {
for {
s.cursor++
if s.char() == nul {
if !s.read() {
return errUnexpectedEndOfJSON("value of string", s.totalOffset())
}
}
if s.char() != '"' {
continue
}
@ -131,16 +135,20 @@ func (s *stream) skipValue() error {
continue
}
if bracketCount == 0 && braceCount == 0 {
s.progress()
s.cursor++
return nil
}
break
}
case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
for s.progress() {
tk := int(s.char())
if (int('0') <= tk && tk <= int('9')) || tk == '.' || tk == 'e' || tk == 'E' {
for {
s.cursor++
if floatTable[s.char()] {
continue
} else if s.char() == nul {
if s.read() {
continue
}
}
break
}
@ -149,7 +157,7 @@ func (s *stream) skipValue() error {
}
continue
}
s.progress()
s.cursor++
}
return errUnexpectedEndOfJSON("value of object", s.offset)
}

View File

@ -31,40 +31,48 @@ func (d *stringDecoder) decode(buf []byte, cursor int64, p uintptr) (int64, erro
}
func stringBytes(s *stream) ([]byte, error) {
s.progress()
s.cursor++
start := s.cursor
for {
switch s.char() {
case '\\':
s.progress()
s.cursor++
case '"':
literal := s.buf[start:s.cursor]
s.progress()
s.cursor++
s.reset()
return literal, nil
case '\000':
case nul:
if s.read() {
continue
}
goto ERROR
}
s.progress()
s.cursor++
}
ERROR:
return nil, errUnexpectedEndOfJSON("string", s.totalOffset())
}
func nullBytes(s *stream) error {
s.progress()
if s.cursor+3 >= s.length {
if !s.read() {
return errInvalidCharacter(s.char(), "null", s.totalOffset())
}
}
s.cursor++
if s.char() != 'u' {
return errInvalidCharacter(s.char(), "null", s.totalOffset())
}
s.progress()
s.cursor++
if s.char() != 'l' {
return errInvalidCharacter(s.char(), "null", s.totalOffset())
}
s.progress()
s.cursor++
if s.char() != 'l' {
return errInvalidCharacter(s.char(), "null", s.totalOffset())
}
s.progress()
s.cursor++
return nil
}
@ -72,7 +80,8 @@ func (d *stringDecoder) decodeStreamByte(s *stream) ([]byte, error) {
for {
switch s.char() {
case ' ', '\n', '\t', '\r':
s.progress()
s.cursor++
continue
case '"':
return stringBytes(s)
case 'n':
@ -80,11 +89,13 @@ func (d *stringDecoder) decodeStreamByte(s *stream) ([]byte, error) {
return nil, err
}
return []byte{'n', 'u', 'l', 'l'}, nil
default:
goto ERROR
case nul:
if s.read() {
continue
}
}
break
}
ERROR:
return nil, errNotAtBeginningOfValue(s.totalOffset())
}

View File

@ -23,10 +23,13 @@ func newStructDecoder(fieldMap map[string]*structFieldSet) *structDecoder {
func (d *structDecoder) decodeStream(s *stream, p uintptr) error {
s.skipWhiteSpace()
if s.char() == nul {
s.read()
}
if s.char() != '{' {
return errNotAtBeginningOfValue(s.totalOffset())
}
s.progress()
s.cursor++
for {
s.reset()
key, err := d.keyDecoder.decodeStreamByte(s)
@ -34,10 +37,16 @@ func (d *structDecoder) decodeStream(s *stream, p uintptr) error {
return err
}
s.skipWhiteSpace()
if s.char() == nul {
s.read()
}
if s.char() != ':' {
return errExpected("colon after object key", s.totalOffset())
}
s.progress()
s.cursor++
if s.char() == nul {
s.read()
}
if s.end() {
return errExpected("object value after colon", s.totalOffset())
}
@ -53,15 +62,18 @@ func (d *structDecoder) decodeStream(s *stream, p uintptr) error {
}
}
s.skipWhiteSpace()
if s.char() == nul {
s.read()
}
c := s.char()
if c == '}' {
s.progress()
s.cursor++
return nil
}
if c != ',' {
return errExpected("comma after object element", s.totalOffset())
}
s.progress()
s.cursor++
}
return nil
}

View File

@ -28,22 +28,29 @@ func (d *uintDecoder) decodeStreamByte(s *stream) ([]byte, error) {
for {
switch s.char() {
case ' ', '\n', '\t', '\r':
s.progress()
s.cursor++
continue
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
start := s.cursor
for s.progress() {
tk := int(s.char())
if int('0') <= tk && tk <= int('9') {
for {
s.cursor++
if numTable[s.char()] {
continue
} else if s.char() == nul {
if s.read() {
continue
}
}
break
}
num := s.buf[start:s.cursor]
return num, nil
default:
return nil, errInvalidCharacter(s.char(), "number(unsigned integer)", s.totalOffset())
case nul:
if s.read() {
continue
}
}
break
}
return nil, errUnexpectedEndOfJSON("number(unsigned integer)", s.totalOffset())
}