Merge pull request #135 from goccy/feature/merge-stream

Optimize streaming decoder by BCE
This commit is contained in:
Masaaki Goshima 2021-02-16 02:30:41 +09:00 committed by GitHub
commit c14650d39d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 204 additions and 104 deletions

View File

@ -3,6 +3,7 @@ package json
import ( import (
"bytes" "bytes"
"io" "io"
"unsafe"
) )
const ( const (
@ -43,14 +44,14 @@ func (s *stream) totalOffset() int64 {
return s.offset + s.cursor return s.offset + s.cursor
} }
func (s *stream) prevChar() byte {
return s.buf[s.cursor-1]
}
func (s *stream) char() byte { func (s *stream) char() byte {
return s.buf[s.cursor] return s.buf[s.cursor]
} }
func (s *stream) stat() ([]byte, int64, unsafe.Pointer) {
return s.buf, s.cursor, (*sliceHeader)(unsafe.Pointer(&s.buf)).data
}
func (s *stream) reset() { func (s *stream) reset() {
s.offset += s.cursor s.offset += s.cursor
s.buf = s.buf[s.cursor:] s.buf = s.buf[s.cursor:]
@ -96,107 +97,171 @@ LOOP:
} }
} }
func (s *stream) skipValue() error { func (s *stream) skipObject() error {
s.skipWhiteSpace() braceCount := 1
braceCount := 0 _, cursor, p := s.stat()
bracketCount := 0
start := s.cursor
for { for {
switch s.char() { switch char(p, cursor) {
case nul:
if s.read() {
continue
}
if start == s.cursor {
return errUnexpectedEndOfJSON("value of object", s.totalOffset())
}
if braceCount == 0 && bracketCount == 0 {
return nil
}
return errUnexpectedEndOfJSON("value of object", s.totalOffset())
case '{': case '{':
braceCount++ braceCount++
case '[':
bracketCount++
case '}': case '}':
braceCount-- braceCount--
if braceCount == -1 && bracketCount == 0 { if braceCount == 0 {
return nil s.cursor = cursor + 1
}
case ']':
bracketCount--
if braceCount == 0 && bracketCount == -1 {
return nil
}
case ',':
if bracketCount == 0 && braceCount == 0 {
return nil return nil
} }
case '"': case '"':
for { for {
s.cursor++ cursor++
c := s.char() switch char(p, cursor) {
if c == nul { case '"':
if !s.read() { if char(p, cursor-1) == '\\' {
return errUnexpectedEndOfJSON("value of string", s.totalOffset())
}
c = s.char()
}
if c != '"' {
continue continue
} }
if s.prevChar() == '\\' { goto SWITCH_OUT
case nul:
s.cursor = cursor
if s.read() {
s.cursor-- // for retry current character
_, cursor, p = s.stat()
continue continue
} }
if bracketCount == 0 && braceCount == 0 { return errUnexpectedEndOfJSON("string of object", cursor)
s.cursor++ }
}
case nul:
s.cursor = cursor
if s.read() {
_, cursor, p = s.stat()
continue
}
return errUnexpectedEndOfJSON("object of object", cursor)
}
SWITCH_OUT:
cursor++
}
}
func (s *stream) skipArray() error {
bracketCount := 1
_, cursor, p := s.stat()
for {
switch char(p, cursor) {
case '[':
bracketCount++
case ']':
bracketCount--
if bracketCount == 0 {
s.cursor = cursor + 1
return nil return nil
} }
break case '"':
for {
cursor++
switch char(p, cursor) {
case '"':
if char(p, cursor-1) == '\\' {
continue
}
goto SWITCH_OUT
case nul:
s.cursor = cursor
if s.read() {
s.cursor-- // for retry current character
_, cursor, p = s.stat()
continue
}
return errUnexpectedEndOfJSON("string of object", cursor)
}
}
case nul:
s.cursor = cursor
if s.read() {
_, cursor, p = s.stat()
continue
}
return errUnexpectedEndOfJSON("array of object", cursor)
}
SWITCH_OUT:
cursor++
}
}
func (s *stream) skipValue() error {
_, cursor, p := s.stat()
for {
switch char(p, cursor) {
case ' ', '\n', '\t', '\r':
cursor++
continue
case nul:
s.cursor = cursor
if s.read() {
_, cursor, p = s.stat()
continue
}
return errUnexpectedEndOfJSON("value of object", s.totalOffset())
case '{':
s.cursor = cursor + 1
return s.skipObject()
case '[':
s.cursor = cursor + 1
return s.skipArray()
case '"':
for {
cursor++
switch char(p, cursor) {
case '"':
if char(p, cursor-1) == '\\' {
continue
}
s.cursor = cursor + 1
return nil
case nul:
s.cursor = cursor
if s.read() {
s.cursor-- // for retry current character
_, cursor, p = s.stat()
continue
}
return errUnexpectedEndOfJSON("value of string", s.totalOffset())
}
} }
case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
for { for {
s.cursor++ cursor++
c := s.char() c := char(p, cursor)
if floatTable[c] { if floatTable[c] {
continue continue
} else if c == nul { } else if c == nul {
if s.read() { if s.read() {
s.cursor-- // for retry current character s.cursor-- // for retry current character
_, cursor, p = s.stat()
continue continue
} }
} }
break s.cursor = cursor
}
if bracketCount == 0 && braceCount == 0 {
return nil return nil
} }
continue
case 't': case 't':
s.cursor = cursor
if err := trueBytes(s); err != nil { if err := trueBytes(s); err != nil {
return err return err
} }
if bracketCount == 0 && braceCount == 0 {
return nil return nil
}
continue
case 'f': case 'f':
s.cursor = cursor
if err := falseBytes(s); err != nil { if err := falseBytes(s); err != nil {
return err return err
} }
if bracketCount == 0 && braceCount == 0 {
return nil return nil
}
continue
case 'n': case 'n':
s.cursor = cursor
if err := nullBytes(s); err != nil { if err := nullBytes(s); err != nil {
return err return err
} }
if bracketCount == 0 && braceCount == 0 {
return nil return nil
} }
continue cursor++
}
s.cursor++
} }
} }

View File

@ -162,27 +162,34 @@ func appendCoerceInvalidUTF8(b []byte, s []byte) []byte {
} }
func stringBytes(s *stream) ([]byte, error) { func stringBytes(s *stream) ([]byte, error) {
s.cursor++ buf, cursor, p := s.stat()
start := s.cursor
cursor++ // skip double quote char
start := cursor
for { for {
switch s.char() { switch char(p, cursor) {
case '\\': case '\\':
s.cursor = cursor
if err := decodeEscapeString(s); err != nil { if err := decodeEscapeString(s); err != nil {
return nil, err return nil, err
} }
buf, cursor, p = s.stat()
case '"': case '"':
literal := s.buf[start:s.cursor] literal := buf[start:cursor]
// TODO: this flow is so slow sequence. // TODO: this flow is so slow sequence.
// literal = appendCoerceInvalidUTF8(make([]byte, 0, len(literal)), literal) // literal = appendCoerceInvalidUTF8(make([]byte, 0, len(literal)), literal)
s.cursor++ cursor++
s.cursor = cursor
return literal, nil return literal, nil
case nul: case nul:
s.cursor = cursor
if s.read() { if s.read() {
buf, cursor, p = s.stat()
continue continue
} }
goto ERROR goto ERROR
} }
s.cursor++ cursor++
} }
ERROR: ERROR:
return nil, errUnexpectedEndOfJSON("string", s.totalOffset()) return nil, errUnexpectedEndOfJSON("string", s.totalOffset())

View File

@ -288,25 +288,31 @@ func decodeKeyByBitmapUint8Stream(d *structDecoder, s *stream) (*structFieldSet,
field *structFieldSet field *structFieldSet
curBit uint8 = math.MaxUint8 curBit uint8 = math.MaxUint8
) )
buf, cursor, p := s.stat()
for { for {
switch s.char() { switch char(p, cursor) {
case ' ', '\n', '\t', '\r': case ' ', '\n', '\t', '\r':
s.cursor++ cursor++
case nul: case nul:
s.cursor = cursor
if s.read() { if s.read() {
buf, cursor, p = s.stat()
continue continue
} }
return nil, "", errNotAtBeginningOfValue(s.totalOffset()) return nil, "", errNotAtBeginningOfValue(s.totalOffset())
case '"': case '"':
s.cursor++ cursor++
FIRST_CHAR: FIRST_CHAR:
start := s.cursor start := cursor
switch s.char() { switch char(p, cursor) {
case '"': case '"':
s.cursor++ cursor++
s.cursor = cursor
return field, "", nil return field, "", nil
case nul: case nul:
s.cursor = cursor
if s.read() { if s.read() {
buf, cursor, p = s.stat()
goto FIRST_CHAR goto FIRST_CHAR
} }
return nil, "", errUnexpectedEndOfJSON("string", s.totalOffset()) return nil, "", errUnexpectedEndOfJSON("string", s.totalOffset())
@ -314,20 +320,23 @@ func decodeKeyByBitmapUint8Stream(d *structDecoder, s *stream) (*structFieldSet,
keyIdx := 0 keyIdx := 0
bitmap := d.keyBitmapUint8 bitmap := d.keyBitmapUint8
for { for {
c := s.char() c := char(p, cursor)
switch c { switch c {
case '"': case '"':
fieldSetIndex := bits.TrailingZeros8(curBit) fieldSetIndex := bits.TrailingZeros8(curBit)
field = d.sortedFieldSets[fieldSetIndex] field = d.sortedFieldSets[fieldSetIndex]
keyLen := s.cursor - start keyLen := cursor - start
s.cursor++ cursor++
s.cursor = cursor
if keyLen < field.keyLen { if keyLen < field.keyLen {
// early match // early match
return nil, field.key, nil return nil, field.key, nil
} }
return field, field.key, nil return field, field.key, nil
case nul: case nul:
s.cursor = cursor
if s.read() { if s.read() {
buf, cursor, p = s.stat()
continue continue
} }
return nil, "", errUnexpectedEndOfJSON("string", s.totalOffset()) return nil, "", errUnexpectedEndOfJSON("string", s.totalOffset())
@ -335,30 +344,35 @@ func decodeKeyByBitmapUint8Stream(d *structDecoder, s *stream) (*structFieldSet,
curBit &= bitmap[keyIdx][largeToSmallTable[c]] curBit &= bitmap[keyIdx][largeToSmallTable[c]]
if curBit == 0 { if curBit == 0 {
for { for {
s.cursor++ cursor++
switch s.char() { switch char(p, cursor) {
case '"': case '"':
b := s.buf[start:s.cursor] b := buf[start:cursor]
key := *(*string)(unsafe.Pointer(&b)) key := *(*string)(unsafe.Pointer(&b))
s.cursor++ cursor++
s.cursor = cursor
return field, key, nil return field, key, nil
case '\\': case '\\':
s.cursor++ cursor++
if s.char() == nul { if char(p, cursor) == nul {
s.cursor = cursor
if !s.read() { if !s.read() {
return nil, "", errUnexpectedEndOfJSON("string", s.totalOffset()) return nil, "", errUnexpectedEndOfJSON("string", s.totalOffset())
} }
buf, cursor, p = s.stat()
} }
case nul: case nul:
s.cursor = cursor
if !s.read() { if !s.read() {
return nil, "", errUnexpectedEndOfJSON("string", s.totalOffset()) return nil, "", errUnexpectedEndOfJSON("string", s.totalOffset())
} }
buf, cursor, p = s.stat()
} }
} }
} }
keyIdx++ keyIdx++
} }
s.cursor++ cursor++
} }
default: default:
return nil, "", errNotAtBeginningOfValue(s.totalOffset()) return nil, "", errNotAtBeginningOfValue(s.totalOffset())
@ -371,25 +385,31 @@ func decodeKeyByBitmapUint16Stream(d *structDecoder, s *stream) (*structFieldSet
field *structFieldSet field *structFieldSet
curBit uint16 = math.MaxUint16 curBit uint16 = math.MaxUint16
) )
buf, cursor, p := s.stat()
for { for {
switch s.char() { switch char(p, cursor) {
case ' ', '\n', '\t', '\r': case ' ', '\n', '\t', '\r':
s.cursor++ cursor++
case nul: case nul:
s.cursor = cursor
if s.read() { if s.read() {
buf, cursor, p = s.stat()
continue continue
} }
return nil, "", errNotAtBeginningOfValue(s.totalOffset()) return nil, "", errNotAtBeginningOfValue(s.totalOffset())
case '"': case '"':
s.cursor++ cursor++
FIRST_CHAR: FIRST_CHAR:
start := s.cursor start := cursor
switch s.char() { switch char(p, cursor) {
case '"': case '"':
s.cursor++ cursor++
s.cursor = cursor
return field, "", nil return field, "", nil
case nul: case nul:
s.cursor = cursor
if s.read() { if s.read() {
buf, cursor, p = s.stat()
goto FIRST_CHAR goto FIRST_CHAR
} }
return nil, "", errUnexpectedEndOfJSON("string", s.totalOffset()) return nil, "", errUnexpectedEndOfJSON("string", s.totalOffset())
@ -397,20 +417,23 @@ func decodeKeyByBitmapUint16Stream(d *structDecoder, s *stream) (*structFieldSet
keyIdx := 0 keyIdx := 0
bitmap := d.keyBitmapUint16 bitmap := d.keyBitmapUint16
for { for {
c := s.char() c := char(p, cursor)
switch c { switch c {
case '"': case '"':
fieldSetIndex := bits.TrailingZeros16(curBit) fieldSetIndex := bits.TrailingZeros16(curBit)
field = d.sortedFieldSets[fieldSetIndex] field = d.sortedFieldSets[fieldSetIndex]
keyLen := s.cursor - start keyLen := cursor - start
s.cursor++ cursor++
s.cursor = cursor
if keyLen < field.keyLen { if keyLen < field.keyLen {
// early match // early match
return nil, field.key, nil return nil, field.key, nil
} }
return field, field.key, nil return field, field.key, nil
case nul: case nul:
s.cursor = cursor
if s.read() { if s.read() {
buf, cursor, p = s.stat()
continue continue
} }
return nil, "", errUnexpectedEndOfJSON("string", s.totalOffset()) return nil, "", errUnexpectedEndOfJSON("string", s.totalOffset())
@ -418,30 +441,35 @@ func decodeKeyByBitmapUint16Stream(d *structDecoder, s *stream) (*structFieldSet
curBit &= bitmap[keyIdx][largeToSmallTable[c]] curBit &= bitmap[keyIdx][largeToSmallTable[c]]
if curBit == 0 { if curBit == 0 {
for { for {
s.cursor++ cursor++
switch s.char() { switch char(p, cursor) {
case '"': case '"':
b := s.buf[start:s.cursor] b := buf[start:cursor]
key := *(*string)(unsafe.Pointer(&b)) key := *(*string)(unsafe.Pointer(&b))
s.cursor++ cursor++
s.cursor = cursor
return field, key, nil return field, key, nil
case '\\': case '\\':
s.cursor++ cursor++
if s.char() == nul { if char(p, cursor) == nul {
s.cursor = cursor
if !s.read() { if !s.read() {
return nil, "", errUnexpectedEndOfJSON("string", s.totalOffset()) return nil, "", errUnexpectedEndOfJSON("string", s.totalOffset())
} }
buf, cursor, p = s.stat()
} }
case nul: case nul:
s.cursor = cursor
if !s.read() { if !s.read() {
return nil, "", errUnexpectedEndOfJSON("string", s.totalOffset()) return nil, "", errUnexpectedEndOfJSON("string", s.totalOffset())
} }
buf, cursor, p = s.stat()
} }
} }
} }
keyIdx++ keyIdx++
} }
s.cursor++ cursor++
} }
default: default:
return nil, "", errNotAtBeginningOfValue(s.totalOffset()) return nil, "", errNotAtBeginningOfValue(s.totalOffset())