Merge pull request #135 from goccy/feature/merge-stream

Optimize streaming decoder by BCE
This commit is contained in:
Masaaki Goshima 2021-02-16 02:30:41 +09:00 committed by GitHub
commit c14650d39d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 204 additions and 104 deletions

View File

@ -3,6 +3,7 @@ package json
import (
"bytes"
"io"
"unsafe"
)
const (
@ -43,14 +44,14 @@ func (s *stream) totalOffset() int64 {
return s.offset + s.cursor
}
func (s *stream) prevChar() byte {
return s.buf[s.cursor-1]
}
func (s *stream) char() byte {
return s.buf[s.cursor]
}
func (s *stream) stat() ([]byte, int64, unsafe.Pointer) {
return s.buf, s.cursor, (*sliceHeader)(unsafe.Pointer(&s.buf)).data
}
func (s *stream) reset() {
s.offset += s.cursor
s.buf = s.buf[s.cursor:]
@ -96,107 +97,171 @@ LOOP:
}
}
func (s *stream) skipValue() error {
s.skipWhiteSpace()
braceCount := 0
bracketCount := 0
start := s.cursor
func (s *stream) skipObject() error {
braceCount := 1
_, cursor, p := s.stat()
for {
switch s.char() {
case nul:
if s.read() {
continue
}
if start == s.cursor {
return errUnexpectedEndOfJSON("value of object", s.totalOffset())
}
if braceCount == 0 && bracketCount == 0 {
return nil
}
return errUnexpectedEndOfJSON("value of object", s.totalOffset())
switch char(p, cursor) {
case '{':
braceCount++
case '[':
bracketCount++
case '}':
braceCount--
if braceCount == -1 && bracketCount == 0 {
return nil
}
case ']':
bracketCount--
if braceCount == 0 && bracketCount == -1 {
return nil
}
case ',':
if bracketCount == 0 && braceCount == 0 {
if braceCount == 0 {
s.cursor = cursor + 1
return nil
}
case '"':
for {
s.cursor++
c := s.char()
if c == nul {
if !s.read() {
return errUnexpectedEndOfJSON("value of string", s.totalOffset())
cursor++
switch char(p, cursor) {
case '"':
if char(p, cursor-1) == '\\' {
continue
}
c = s.char()
goto SWITCH_OUT
case nul:
s.cursor = cursor
if s.read() {
s.cursor-- // for retry current character
_, cursor, p = s.stat()
continue
}
return errUnexpectedEndOfJSON("string of object", cursor)
}
if c != '"' {
continue
}
case nul:
s.cursor = cursor
if s.read() {
_, cursor, p = s.stat()
continue
}
return errUnexpectedEndOfJSON("object of object", cursor)
}
SWITCH_OUT:
cursor++
}
}
func (s *stream) skipArray() error {
bracketCount := 1
_, cursor, p := s.stat()
for {
switch char(p, cursor) {
case '[':
bracketCount++
case ']':
bracketCount--
if bracketCount == 0 {
s.cursor = cursor + 1
return nil
}
case '"':
for {
cursor++
switch char(p, cursor) {
case '"':
if char(p, cursor-1) == '\\' {
continue
}
goto SWITCH_OUT
case nul:
s.cursor = cursor
if s.read() {
s.cursor-- // for retry current character
_, cursor, p = s.stat()
continue
}
return errUnexpectedEndOfJSON("string of object", cursor)
}
if s.prevChar() == '\\' {
continue
}
if bracketCount == 0 && braceCount == 0 {
s.cursor++
}
case nul:
s.cursor = cursor
if s.read() {
_, cursor, p = s.stat()
continue
}
return errUnexpectedEndOfJSON("array of object", cursor)
}
SWITCH_OUT:
cursor++
}
}
func (s *stream) skipValue() error {
_, cursor, p := s.stat()
for {
switch char(p, cursor) {
case ' ', '\n', '\t', '\r':
cursor++
continue
case nul:
s.cursor = cursor
if s.read() {
_, cursor, p = s.stat()
continue
}
return errUnexpectedEndOfJSON("value of object", s.totalOffset())
case '{':
s.cursor = cursor + 1
return s.skipObject()
case '[':
s.cursor = cursor + 1
return s.skipArray()
case '"':
for {
cursor++
switch char(p, cursor) {
case '"':
if char(p, cursor-1) == '\\' {
continue
}
s.cursor = cursor + 1
return nil
case nul:
s.cursor = cursor
if s.read() {
s.cursor-- // for retry current character
_, cursor, p = s.stat()
continue
}
return errUnexpectedEndOfJSON("value of string", s.totalOffset())
}
break
}
case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
for {
s.cursor++
c := s.char()
cursor++
c := char(p, cursor)
if floatTable[c] {
continue
} else if c == nul {
if s.read() {
s.cursor-- // for retry current character
_, cursor, p = s.stat()
continue
}
}
break
}
if bracketCount == 0 && braceCount == 0 {
s.cursor = cursor
return nil
}
continue
case 't':
s.cursor = cursor
if err := trueBytes(s); err != nil {
return err
}
if bracketCount == 0 && braceCount == 0 {
return nil
}
continue
return nil
case 'f':
s.cursor = cursor
if err := falseBytes(s); err != nil {
return err
}
if bracketCount == 0 && braceCount == 0 {
return nil
}
continue
return nil
case 'n':
s.cursor = cursor
if err := nullBytes(s); err != nil {
return err
}
if bracketCount == 0 && braceCount == 0 {
return nil
}
continue
return nil
}
s.cursor++
cursor++
}
}

View File

@ -162,27 +162,34 @@ func appendCoerceInvalidUTF8(b []byte, s []byte) []byte {
}
func stringBytes(s *stream) ([]byte, error) {
s.cursor++
start := s.cursor
buf, cursor, p := s.stat()
cursor++ // skip double quote char
start := cursor
for {
switch s.char() {
switch char(p, cursor) {
case '\\':
s.cursor = cursor
if err := decodeEscapeString(s); err != nil {
return nil, err
}
buf, cursor, p = s.stat()
case '"':
literal := s.buf[start:s.cursor]
literal := buf[start:cursor]
// TODO: this flow is so slow sequence.
// literal = appendCoerceInvalidUTF8(make([]byte, 0, len(literal)), literal)
s.cursor++
cursor++
s.cursor = cursor
return literal, nil
case nul:
s.cursor = cursor
if s.read() {
buf, cursor, p = s.stat()
continue
}
goto ERROR
}
s.cursor++
cursor++
}
ERROR:
return nil, errUnexpectedEndOfJSON("string", s.totalOffset())

View File

@ -288,25 +288,31 @@ func decodeKeyByBitmapUint8Stream(d *structDecoder, s *stream) (*structFieldSet,
field *structFieldSet
curBit uint8 = math.MaxUint8
)
buf, cursor, p := s.stat()
for {
switch s.char() {
switch char(p, cursor) {
case ' ', '\n', '\t', '\r':
s.cursor++
cursor++
case nul:
s.cursor = cursor
if s.read() {
buf, cursor, p = s.stat()
continue
}
return nil, "", errNotAtBeginningOfValue(s.totalOffset())
case '"':
s.cursor++
cursor++
FIRST_CHAR:
start := s.cursor
switch s.char() {
start := cursor
switch char(p, cursor) {
case '"':
s.cursor++
cursor++
s.cursor = cursor
return field, "", nil
case nul:
s.cursor = cursor
if s.read() {
buf, cursor, p = s.stat()
goto FIRST_CHAR
}
return nil, "", errUnexpectedEndOfJSON("string", s.totalOffset())
@ -314,20 +320,23 @@ func decodeKeyByBitmapUint8Stream(d *structDecoder, s *stream) (*structFieldSet,
keyIdx := 0
bitmap := d.keyBitmapUint8
for {
c := s.char()
c := char(p, cursor)
switch c {
case '"':
fieldSetIndex := bits.TrailingZeros8(curBit)
field = d.sortedFieldSets[fieldSetIndex]
keyLen := s.cursor - start
s.cursor++
keyLen := cursor - start
cursor++
s.cursor = cursor
if keyLen < field.keyLen {
// early match
return nil, field.key, nil
}
return field, field.key, nil
case nul:
s.cursor = cursor
if s.read() {
buf, cursor, p = s.stat()
continue
}
return nil, "", errUnexpectedEndOfJSON("string", s.totalOffset())
@ -335,30 +344,35 @@ func decodeKeyByBitmapUint8Stream(d *structDecoder, s *stream) (*structFieldSet,
curBit &= bitmap[keyIdx][largeToSmallTable[c]]
if curBit == 0 {
for {
s.cursor++
switch s.char() {
cursor++
switch char(p, cursor) {
case '"':
b := s.buf[start:s.cursor]
b := buf[start:cursor]
key := *(*string)(unsafe.Pointer(&b))
s.cursor++
cursor++
s.cursor = cursor
return field, key, nil
case '\\':
s.cursor++
if s.char() == nul {
cursor++
if char(p, cursor) == nul {
s.cursor = cursor
if !s.read() {
return nil, "", errUnexpectedEndOfJSON("string", s.totalOffset())
}
buf, cursor, p = s.stat()
}
case nul:
s.cursor = cursor
if !s.read() {
return nil, "", errUnexpectedEndOfJSON("string", s.totalOffset())
}
buf, cursor, p = s.stat()
}
}
}
keyIdx++
}
s.cursor++
cursor++
}
default:
return nil, "", errNotAtBeginningOfValue(s.totalOffset())
@ -371,25 +385,31 @@ func decodeKeyByBitmapUint16Stream(d *structDecoder, s *stream) (*structFieldSet
field *structFieldSet
curBit uint16 = math.MaxUint16
)
buf, cursor, p := s.stat()
for {
switch s.char() {
switch char(p, cursor) {
case ' ', '\n', '\t', '\r':
s.cursor++
cursor++
case nul:
s.cursor = cursor
if s.read() {
buf, cursor, p = s.stat()
continue
}
return nil, "", errNotAtBeginningOfValue(s.totalOffset())
case '"':
s.cursor++
cursor++
FIRST_CHAR:
start := s.cursor
switch s.char() {
start := cursor
switch char(p, cursor) {
case '"':
s.cursor++
cursor++
s.cursor = cursor
return field, "", nil
case nul:
s.cursor = cursor
if s.read() {
buf, cursor, p = s.stat()
goto FIRST_CHAR
}
return nil, "", errUnexpectedEndOfJSON("string", s.totalOffset())
@ -397,20 +417,23 @@ func decodeKeyByBitmapUint16Stream(d *structDecoder, s *stream) (*structFieldSet
keyIdx := 0
bitmap := d.keyBitmapUint16
for {
c := s.char()
c := char(p, cursor)
switch c {
case '"':
fieldSetIndex := bits.TrailingZeros16(curBit)
field = d.sortedFieldSets[fieldSetIndex]
keyLen := s.cursor - start
s.cursor++
keyLen := cursor - start
cursor++
s.cursor = cursor
if keyLen < field.keyLen {
// early match
return nil, field.key, nil
}
return field, field.key, nil
case nul:
s.cursor = cursor
if s.read() {
buf, cursor, p = s.stat()
continue
}
return nil, "", errUnexpectedEndOfJSON("string", s.totalOffset())
@ -418,30 +441,35 @@ func decodeKeyByBitmapUint16Stream(d *structDecoder, s *stream) (*structFieldSet
curBit &= bitmap[keyIdx][largeToSmallTable[c]]
if curBit == 0 {
for {
s.cursor++
switch s.char() {
cursor++
switch char(p, cursor) {
case '"':
b := s.buf[start:s.cursor]
b := buf[start:cursor]
key := *(*string)(unsafe.Pointer(&b))
s.cursor++
cursor++
s.cursor = cursor
return field, key, nil
case '\\':
s.cursor++
if s.char() == nul {
cursor++
if char(p, cursor) == nul {
s.cursor = cursor
if !s.read() {
return nil, "", errUnexpectedEndOfJSON("string", s.totalOffset())
}
buf, cursor, p = s.stat()
}
case nul:
s.cursor = cursor
if !s.read() {
return nil, "", errUnexpectedEndOfJSON("string", s.totalOffset())
}
buf, cursor, p = s.stat()
}
}
}
keyIdx++
}
s.cursor++
cursor++
}
default:
return nil, "", errNotAtBeginningOfValue(s.totalOffset())