From 64e29e00d60c079c7f60172d1690d7a181c9c410 Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Tue, 16 Feb 2021 02:13:13 +0900 Subject: [PATCH] Optimize streaming decoder by BCE --- decode_stream.go | 193 ++++++++++++++++++++++++++++++++--------------- decode_string.go | 19 +++-- decode_struct.go | 92 ++++++++++++++-------- 3 files changed, 204 insertions(+), 100 deletions(-) diff --git a/decode_stream.go b/decode_stream.go index f54400d..d97fc79 100644 --- a/decode_stream.go +++ b/decode_stream.go @@ -3,6 +3,7 @@ package json import ( "bytes" "io" + "unsafe" ) const ( @@ -51,6 +52,10 @@ func (s *stream) char() byte { return s.buf[s.cursor] } +func (s *stream) stat() ([]byte, int64, unsafe.Pointer) { + return s.buf, s.cursor, (*sliceHeader)(unsafe.Pointer(&s.buf)).data +} + func (s *stream) reset() { s.offset += s.cursor s.buf = s.buf[s.cursor:] @@ -96,107 +101,171 @@ LOOP: } } -func (s *stream) skipValue() error { - s.skipWhiteSpace() - braceCount := 0 - bracketCount := 0 - start := s.cursor +func (s *stream) skipObject() error { + braceCount := 1 + buf, cursor, p := s.stat() for { - switch s.char() { - case nul: - if s.read() { - continue - } - if start == s.cursor { - return errUnexpectedEndOfJSON("value of object", s.totalOffset()) - } - if braceCount == 0 && bracketCount == 0 { - return nil - } - return errUnexpectedEndOfJSON("value of object", s.totalOffset()) + switch char(p, cursor) { case '{': braceCount++ - case '[': - bracketCount++ case '}': braceCount-- - if braceCount == -1 && bracketCount == 0 { - return nil - } - case ']': - bracketCount-- - if braceCount == 0 && bracketCount == -1 { - return nil - } - case ',': - if bracketCount == 0 && braceCount == 0 { + if braceCount == 0 { + s.cursor = cursor + 1 return nil } case '"': for { - s.cursor++ - c := s.char() - if c == nul { - if !s.read() { - return errUnexpectedEndOfJSON("value of string", s.totalOffset()) + cursor++ + switch char(p, cursor) { + case '"': + if buf[cursor-1] == '\\' { + continue } - c = s.char() + goto SWITCH_OUT + case nul: + s.cursor = cursor + if s.read() { + s.cursor-- // for retry current character + buf, cursor, p = s.stat() + continue + } + return errUnexpectedEndOfJSON("string of object", cursor) } - if c != '"' { - continue + } + case nul: + s.cursor = cursor + if s.read() { + buf, cursor, p = s.stat() + continue + } + return errUnexpectedEndOfJSON("object of object", cursor) + } + SWITCH_OUT: + cursor++ + } +} + +func (s *stream) skipArray() error { + bracketCount := 1 + buf, cursor, p := s.stat() + for { + switch char(p, cursor) { + case '[': + bracketCount++ + case ']': + bracketCount-- + if bracketCount == 0 { + s.cursor = cursor + 1 + return nil + } + case '"': + for { + cursor++ + switch char(p, cursor) { + case '"': + if buf[cursor-1] == '\\' { + continue + } + goto SWITCH_OUT + case nul: + s.cursor = cursor + if s.read() { + s.cursor-- // for retry current character + buf, cursor, p = s.stat() + continue + } + return errUnexpectedEndOfJSON("string of object", cursor) } - if s.prevChar() == '\\' { - continue - } - if bracketCount == 0 && braceCount == 0 { - s.cursor++ + } + case nul: + s.cursor = cursor + if s.read() { + buf, cursor, p = s.stat() + continue + } + return errUnexpectedEndOfJSON("array of object", cursor) + } + SWITCH_OUT: + cursor++ + } +} + +func (s *stream) skipValue() error { + buf, cursor, p := s.stat() + for { + switch char(p, cursor) { + case ' ', '\n', '\t', '\r': + cursor++ + continue + case nul: + s.cursor = cursor + if s.read() { + buf, cursor, p = s.stat() + continue + } + return errUnexpectedEndOfJSON("value of object", s.totalOffset()) + case '{': + s.cursor = cursor + 1 + return s.skipObject() + case '[': + s.cursor = cursor + 1 + return s.skipArray() + case '"': + for { + cursor++ + switch buf[cursor] { + case '"': + if buf[cursor-1] == '\\' { + continue + } + s.cursor = cursor + 1 return nil + case nul: + s.cursor = cursor + if s.read() { + s.cursor-- // for retry current character + buf, cursor, p = s.stat() + continue + } + return errUnexpectedEndOfJSON("value of string", s.totalOffset()) } - break } case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': for { - s.cursor++ - c := s.char() + cursor++ + c := char(p, cursor) if floatTable[c] { continue } else if c == nul { if s.read() { s.cursor-- // for retry current character + buf, cursor, p = s.stat() continue } } - break - } - if bracketCount == 0 && braceCount == 0 { + s.cursor = cursor return nil } - continue case 't': + s.cursor = cursor if err := trueBytes(s); err != nil { return err } - if bracketCount == 0 && braceCount == 0 { - return nil - } - continue + return nil case 'f': + s.cursor = cursor if err := falseBytes(s); err != nil { return err } - if bracketCount == 0 && braceCount == 0 { - return nil - } - continue + return nil case 'n': + s.cursor = cursor if err := nullBytes(s); err != nil { return err } - if bracketCount == 0 && braceCount == 0 { - return nil - } - continue + return nil } - s.cursor++ + cursor++ } } diff --git a/decode_string.go b/decode_string.go index d3eb17d..2dcd2e5 100644 --- a/decode_string.go +++ b/decode_string.go @@ -162,27 +162,34 @@ func appendCoerceInvalidUTF8(b []byte, s []byte) []byte { } func stringBytes(s *stream) ([]byte, error) { - s.cursor++ - start := s.cursor + buf, cursor, p := s.stat() + + cursor++ // skip double quote char + start := cursor for { - switch s.char() { + switch char(p, cursor) { case '\\': + s.cursor = cursor if err := decodeEscapeString(s); err != nil { return nil, err } + buf, cursor, p = s.stat() case '"': - literal := s.buf[start:s.cursor] + literal := buf[start:cursor] // TODO: this flow is so slow sequence. // literal = appendCoerceInvalidUTF8(make([]byte, 0, len(literal)), literal) - s.cursor++ + cursor++ + s.cursor = cursor return literal, nil case nul: + s.cursor = cursor if s.read() { + buf, cursor, p = s.stat() continue } goto ERROR } - s.cursor++ + cursor++ } ERROR: return nil, errUnexpectedEndOfJSON("string", s.totalOffset()) diff --git a/decode_struct.go b/decode_struct.go index be0700c..f1e347a 100644 --- a/decode_struct.go +++ b/decode_struct.go @@ -288,25 +288,31 @@ func decodeKeyByBitmapUint8Stream(d *structDecoder, s *stream) (*structFieldSet, field *structFieldSet curBit uint8 = math.MaxUint8 ) + buf, cursor, p := s.stat() for { - switch s.char() { + switch char(p, cursor) { case ' ', '\n', '\t', '\r': - s.cursor++ + cursor++ case nul: + s.cursor = cursor if s.read() { + buf, cursor, p = s.stat() continue } return nil, "", errNotAtBeginningOfValue(s.totalOffset()) case '"': - s.cursor++ + cursor++ FIRST_CHAR: - start := s.cursor - switch s.char() { + start := cursor + switch char(p, cursor) { case '"': - s.cursor++ + cursor++ + s.cursor = cursor return field, "", nil case nul: + s.cursor = cursor if s.read() { + buf, cursor, p = s.stat() goto FIRST_CHAR } return nil, "", errUnexpectedEndOfJSON("string", s.totalOffset()) @@ -314,20 +320,23 @@ func decodeKeyByBitmapUint8Stream(d *structDecoder, s *stream) (*structFieldSet, keyIdx := 0 bitmap := d.keyBitmapUint8 for { - c := s.char() + c := char(p, cursor) switch c { case '"': fieldSetIndex := bits.TrailingZeros8(curBit) field = d.sortedFieldSets[fieldSetIndex] - keyLen := s.cursor - start - s.cursor++ + keyLen := cursor - start + cursor++ + s.cursor = cursor if keyLen < field.keyLen { // early match return nil, field.key, nil } return field, field.key, nil case nul: + s.cursor = cursor if s.read() { + buf, cursor, p = s.stat() continue } return nil, "", errUnexpectedEndOfJSON("string", s.totalOffset()) @@ -335,30 +344,35 @@ func decodeKeyByBitmapUint8Stream(d *structDecoder, s *stream) (*structFieldSet, curBit &= bitmap[keyIdx][largeToSmallTable[c]] if curBit == 0 { for { - s.cursor++ - switch s.char() { + cursor++ + switch char(p, cursor) { case '"': - b := s.buf[start:s.cursor] + b := buf[start:cursor] key := *(*string)(unsafe.Pointer(&b)) - s.cursor++ + cursor++ + s.cursor = cursor return field, key, nil case '\\': - s.cursor++ - if s.char() == nul { + cursor++ + if char(p, cursor) == nul { + s.cursor = cursor if !s.read() { return nil, "", errUnexpectedEndOfJSON("string", s.totalOffset()) } + buf, cursor, p = s.stat() } case nul: + s.cursor = cursor if !s.read() { return nil, "", errUnexpectedEndOfJSON("string", s.totalOffset()) } + buf, cursor, p = s.stat() } } } keyIdx++ } - s.cursor++ + cursor++ } default: return nil, "", errNotAtBeginningOfValue(s.totalOffset()) @@ -371,25 +385,31 @@ func decodeKeyByBitmapUint16Stream(d *structDecoder, s *stream) (*structFieldSet field *structFieldSet curBit uint16 = math.MaxUint16 ) + buf, cursor, p := s.stat() for { - switch s.char() { + switch char(p, cursor) { case ' ', '\n', '\t', '\r': - s.cursor++ + cursor++ case nul: + s.cursor = cursor if s.read() { + buf, cursor, p = s.stat() continue } return nil, "", errNotAtBeginningOfValue(s.totalOffset()) case '"': - s.cursor++ + cursor++ FIRST_CHAR: - start := s.cursor - switch s.char() { + start := cursor + switch char(p, cursor) { case '"': - s.cursor++ + cursor++ + s.cursor = cursor return field, "", nil case nul: + s.cursor = cursor if s.read() { + buf, cursor, p = s.stat() goto FIRST_CHAR } return nil, "", errUnexpectedEndOfJSON("string", s.totalOffset()) @@ -397,20 +417,23 @@ func decodeKeyByBitmapUint16Stream(d *structDecoder, s *stream) (*structFieldSet keyIdx := 0 bitmap := d.keyBitmapUint16 for { - c := s.char() + c := char(p, cursor) switch c { case '"': fieldSetIndex := bits.TrailingZeros16(curBit) field = d.sortedFieldSets[fieldSetIndex] - keyLen := s.cursor - start - s.cursor++ + keyLen := cursor - start + cursor++ + s.cursor = cursor if keyLen < field.keyLen { // early match return nil, field.key, nil } return field, field.key, nil case nul: + s.cursor = cursor if s.read() { + buf, cursor, p = s.stat() continue } return nil, "", errUnexpectedEndOfJSON("string", s.totalOffset()) @@ -418,30 +441,35 @@ func decodeKeyByBitmapUint16Stream(d *structDecoder, s *stream) (*structFieldSet curBit &= bitmap[keyIdx][largeToSmallTable[c]] if curBit == 0 { for { - s.cursor++ - switch s.char() { + cursor++ + switch char(p, cursor) { case '"': - b := s.buf[start:s.cursor] + b := buf[start:cursor] key := *(*string)(unsafe.Pointer(&b)) - s.cursor++ + cursor++ + s.cursor = cursor return field, key, nil case '\\': - s.cursor++ - if s.char() == nul { + cursor++ + if char(p, cursor) == nul { + s.cursor = cursor if !s.read() { return nil, "", errUnexpectedEndOfJSON("string", s.totalOffset()) } + buf, cursor, p = s.stat() } case nul: + s.cursor = cursor if !s.read() { return nil, "", errUnexpectedEndOfJSON("string", s.totalOffset()) } + buf, cursor, p = s.stat() } } } keyIdx++ } - s.cursor++ + cursor++ } default: return nil, "", errNotAtBeginningOfValue(s.totalOffset())