Fix decoding fields containing escaped characters for stream decoder

This commit is contained in:
Masaaki Goshima 2021-06-03 15:00:31 +09:00
parent 14c828aad7
commit 917f71bbcf
1 changed files with 166 additions and 94 deletions

View File

@ -186,7 +186,6 @@ func decodeKeyCharByEscapedChar(buf []byte, cursor int64) ([]byte, int64) {
func decodeKeyByBitmapUint8(d *structDecoder, buf []byte, cursor int64) (int64, *structFieldSet, error) { func decodeKeyByBitmapUint8(d *structDecoder, buf []byte, cursor int64) (int64, *structFieldSet, error) {
var ( var (
field *structFieldSet
curBit uint8 = math.MaxUint8 curBit uint8 = math.MaxUint8
) )
b := (*sliceHeader)(unsafe.Pointer(&buf)).data b := (*sliceHeader)(unsafe.Pointer(&buf)).data
@ -200,7 +199,7 @@ func decodeKeyByBitmapUint8(d *structDecoder, buf []byte, cursor int64) (int64,
switch c { switch c {
case '"': case '"':
cursor++ cursor++
return cursor, field, nil return cursor, nil, nil
case nul: case nul:
return 0, nil, errUnexpectedEndOfJSON("string", cursor) return 0, nil, errUnexpectedEndOfJSON("string", cursor)
} }
@ -212,7 +211,7 @@ func decodeKeyByBitmapUint8(d *structDecoder, buf []byte, cursor int64) (int64,
switch c { switch c {
case '"': case '"':
fieldSetIndex := bits.TrailingZeros8(curBit) fieldSetIndex := bits.TrailingZeros8(curBit)
field = d.sortedFieldSets[fieldSetIndex] field := d.sortedFieldSets[fieldSetIndex]
keyLen := cursor - start keyLen := cursor - start
cursor++ cursor++
if keyLen < field.keyLen { if keyLen < field.keyLen {
@ -228,7 +227,7 @@ func decodeKeyByBitmapUint8(d *structDecoder, buf []byte, cursor int64) (int64,
for _, c := range chars { for _, c := range chars {
curBit &= bitmap[keyIdx][largeToSmallTable[c]] curBit &= bitmap[keyIdx][largeToSmallTable[c]]
if curBit == 0 { if curBit == 0 {
return decodeKeyNotFound(b, cursor, field) return decodeKeyNotFound(b, cursor)
} }
keyIdx++ keyIdx++
} }
@ -236,7 +235,7 @@ func decodeKeyByBitmapUint8(d *structDecoder, buf []byte, cursor int64) (int64,
default: default:
curBit &= bitmap[keyIdx][largeToSmallTable[c]] curBit &= bitmap[keyIdx][largeToSmallTable[c]]
if curBit == 0 { if curBit == 0 {
return decodeKeyNotFound(b, cursor, field) return decodeKeyNotFound(b, cursor)
} }
keyIdx++ keyIdx++
} }
@ -248,27 +247,8 @@ func decodeKeyByBitmapUint8(d *structDecoder, buf []byte, cursor int64) (int64,
} }
} }
func decodeKeyNotFound(b unsafe.Pointer, cursor int64, field *structFieldSet) (int64, *structFieldSet, error) {
for {
cursor++
switch char(b, cursor) {
case '"':
cursor++
return cursor, field, nil
case '\\':
cursor++
if char(b, cursor) == nul {
return 0, nil, errUnexpectedEndOfJSON("string", cursor)
}
case nul:
return 0, nil, errUnexpectedEndOfJSON("string", cursor)
}
}
}
func decodeKeyByBitmapUint16(d *structDecoder, buf []byte, cursor int64) (int64, *structFieldSet, error) { func decodeKeyByBitmapUint16(d *structDecoder, buf []byte, cursor int64) (int64, *structFieldSet, error) {
var ( var (
field *structFieldSet
curBit uint16 = math.MaxUint16 curBit uint16 = math.MaxUint16
) )
b := (*sliceHeader)(unsafe.Pointer(&buf)).data b := (*sliceHeader)(unsafe.Pointer(&buf)).data
@ -282,7 +262,7 @@ func decodeKeyByBitmapUint16(d *structDecoder, buf []byte, cursor int64) (int64,
switch c { switch c {
case '"': case '"':
cursor++ cursor++
return cursor, field, nil return cursor, nil, nil
case nul: case nul:
return 0, nil, errUnexpectedEndOfJSON("string", cursor) return 0, nil, errUnexpectedEndOfJSON("string", cursor)
} }
@ -294,7 +274,7 @@ func decodeKeyByBitmapUint16(d *structDecoder, buf []byte, cursor int64) (int64,
switch c { switch c {
case '"': case '"':
fieldSetIndex := bits.TrailingZeros16(curBit) fieldSetIndex := bits.TrailingZeros16(curBit)
field = d.sortedFieldSets[fieldSetIndex] field := d.sortedFieldSets[fieldSetIndex]
keyLen := cursor - start keyLen := cursor - start
cursor++ cursor++
if keyLen < field.keyLen { if keyLen < field.keyLen {
@ -310,7 +290,7 @@ func decodeKeyByBitmapUint16(d *structDecoder, buf []byte, cursor int64) (int64,
for _, c := range chars { for _, c := range chars {
curBit &= bitmap[keyIdx][largeToSmallTable[c]] curBit &= bitmap[keyIdx][largeToSmallTable[c]]
if curBit == 0 { if curBit == 0 {
return decodeKeyNotFound(b, cursor, field) return decodeKeyNotFound(b, cursor)
} }
keyIdx++ keyIdx++
} }
@ -318,7 +298,7 @@ func decodeKeyByBitmapUint16(d *structDecoder, buf []byte, cursor int64) (int64,
default: default:
curBit &= bitmap[keyIdx][largeToSmallTable[c]] curBit &= bitmap[keyIdx][largeToSmallTable[c]]
if curBit == 0 { if curBit == 0 {
return decodeKeyNotFound(b, cursor, field) return decodeKeyNotFound(b, cursor)
} }
keyIdx++ keyIdx++
} }
@ -330,6 +310,24 @@ func decodeKeyByBitmapUint16(d *structDecoder, buf []byte, cursor int64) (int64,
} }
} }
func decodeKeyNotFound(b unsafe.Pointer, cursor int64) (int64, *structFieldSet, error) {
for {
cursor++
switch char(b, cursor) {
case '"':
cursor++
return cursor, nil, nil
case '\\':
cursor++
if char(b, cursor) == nul {
return 0, nil, errUnexpectedEndOfJSON("string", cursor)
}
case nul:
return 0, nil, errUnexpectedEndOfJSON("string", cursor)
}
}
}
func decodeKey(d *structDecoder, buf []byte, cursor int64) (int64, *structFieldSet, error) { func decodeKey(d *structDecoder, buf []byte, cursor int64) (int64, *structFieldSet, error) {
key, c, err := d.stringDecoder.decodeByte(buf, cursor) key, c, err := d.stringDecoder.decodeByte(buf, cursor)
if err != nil { if err != nil {
@ -346,10 +344,9 @@ func decodeKey(d *structDecoder, buf []byte, cursor int64) (int64, *structFieldS
func decodeKeyByBitmapUint8Stream(d *structDecoder, s *stream) (*structFieldSet, string, error) { func decodeKeyByBitmapUint8Stream(d *structDecoder, s *stream) (*structFieldSet, string, error) {
var ( var (
field *structFieldSet
curBit uint8 = math.MaxUint8 curBit uint8 = math.MaxUint8
) )
buf, cursor, p := s.stat() _, cursor, p := s.stat()
for { for {
switch char(p, cursor) { switch char(p, cursor) {
case ' ', '\n', '\t', '\r': case ' ', '\n', '\t', '\r':
@ -357,7 +354,7 @@ func decodeKeyByBitmapUint8Stream(d *structDecoder, s *stream) (*structFieldSet,
case nul: case nul:
s.cursor = cursor s.cursor = cursor
if s.read() { if s.read() {
buf, cursor, p = s.stat() _, cursor, p = s.stat()
continue continue
} }
return nil, "", errNotAtBeginningOfValue(s.totalOffset()) return nil, "", errNotAtBeginningOfValue(s.totalOffset())
@ -369,11 +366,11 @@ func decodeKeyByBitmapUint8Stream(d *structDecoder, s *stream) (*structFieldSet,
case '"': case '"':
cursor++ cursor++
s.cursor = cursor s.cursor = cursor
return field, "", nil return nil, "", nil
case nul: case nul:
s.cursor = cursor s.cursor = cursor
if s.read() { if s.read() {
buf, cursor, p = s.stat() _, cursor, p = s.stat()
goto FIRST_CHAR goto FIRST_CHAR
} }
return nil, "", errUnexpectedEndOfJSON("string", s.totalOffset()) return nil, "", errUnexpectedEndOfJSON("string", s.totalOffset())
@ -385,7 +382,7 @@ func decodeKeyByBitmapUint8Stream(d *structDecoder, s *stream) (*structFieldSet,
switch c { switch c {
case '"': case '"':
fieldSetIndex := bits.TrailingZeros8(curBit) fieldSetIndex := bits.TrailingZeros8(curBit)
field = d.sortedFieldSets[fieldSetIndex] field := d.sortedFieldSets[fieldSetIndex]
keyLen := cursor - start keyLen := cursor - start
cursor++ cursor++
s.cursor = cursor s.cursor = cursor
@ -397,39 +394,30 @@ func decodeKeyByBitmapUint8Stream(d *structDecoder, s *stream) (*structFieldSet,
case nul: case nul:
s.cursor = cursor s.cursor = cursor
if s.read() { if s.read() {
buf, cursor, p = s.stat() _, cursor, p = s.stat()
continue continue
} }
return nil, "", errUnexpectedEndOfJSON("string", s.totalOffset()) return nil, "", errUnexpectedEndOfJSON("string", s.totalOffset())
case '\\':
s.cursor = cursor + 1 // skip '\' char
chars, err := decodeKeyCharByEscapeCharStream(s)
if err != nil {
return nil, "", err
}
cursor = s.cursor
for _, c := range chars {
curBit &= bitmap[keyIdx][largeToSmallTable[c]]
if curBit == 0 {
s.cursor = cursor
return decodeKeyNotFoundStream(s, start)
}
keyIdx++
}
default: default:
curBit &= bitmap[keyIdx][largeToSmallTable[c]] curBit &= bitmap[keyIdx][largeToSmallTable[c]]
if curBit == 0 { if curBit == 0 {
for {
cursor++
switch char(p, cursor) {
case '"':
b := buf[start:cursor]
key := *(*string)(unsafe.Pointer(&b))
cursor++
s.cursor = cursor s.cursor = cursor
return field, key, nil return decodeKeyNotFoundStream(s, start)
case '\\':
cursor++
if char(p, cursor) == nul {
s.cursor = cursor
if !s.read() {
return nil, "", errUnexpectedEndOfJSON("string", s.totalOffset())
}
buf, cursor, p = s.statForRetry()
}
case nul:
s.cursor = cursor
if !s.read() {
return nil, "", errUnexpectedEndOfJSON("string", s.totalOffset())
}
buf, cursor, p = s.statForRetry()
}
}
} }
keyIdx++ keyIdx++
} }
@ -443,10 +431,9 @@ func decodeKeyByBitmapUint8Stream(d *structDecoder, s *stream) (*structFieldSet,
func decodeKeyByBitmapUint16Stream(d *structDecoder, s *stream) (*structFieldSet, string, error) { func decodeKeyByBitmapUint16Stream(d *structDecoder, s *stream) (*structFieldSet, string, error) {
var ( var (
field *structFieldSet
curBit uint16 = math.MaxUint16 curBit uint16 = math.MaxUint16
) )
buf, cursor, p := s.stat() _, cursor, p := s.stat()
for { for {
switch char(p, cursor) { switch char(p, cursor) {
case ' ', '\n', '\t', '\r': case ' ', '\n', '\t', '\r':
@ -454,7 +441,7 @@ func decodeKeyByBitmapUint16Stream(d *structDecoder, s *stream) (*structFieldSet
case nul: case nul:
s.cursor = cursor s.cursor = cursor
if s.read() { if s.read() {
buf, cursor, p = s.stat() _, cursor, p = s.stat()
continue continue
} }
return nil, "", errNotAtBeginningOfValue(s.totalOffset()) return nil, "", errNotAtBeginningOfValue(s.totalOffset())
@ -466,11 +453,11 @@ func decodeKeyByBitmapUint16Stream(d *structDecoder, s *stream) (*structFieldSet
case '"': case '"':
cursor++ cursor++
s.cursor = cursor s.cursor = cursor
return field, "", nil return nil, "", nil
case nul: case nul:
s.cursor = cursor s.cursor = cursor
if s.read() { if s.read() {
buf, cursor, p = s.stat() _, cursor, p = s.stat()
goto FIRST_CHAR goto FIRST_CHAR
} }
return nil, "", errUnexpectedEndOfJSON("string", s.totalOffset()) return nil, "", errUnexpectedEndOfJSON("string", s.totalOffset())
@ -482,7 +469,7 @@ func decodeKeyByBitmapUint16Stream(d *structDecoder, s *stream) (*structFieldSet
switch c { switch c {
case '"': case '"':
fieldSetIndex := bits.TrailingZeros16(curBit) fieldSetIndex := bits.TrailingZeros16(curBit)
field = d.sortedFieldSets[fieldSetIndex] field := d.sortedFieldSets[fieldSetIndex]
keyLen := cursor - start keyLen := cursor - start
cursor++ cursor++
s.cursor = cursor s.cursor = cursor
@ -494,13 +481,107 @@ func decodeKeyByBitmapUint16Stream(d *structDecoder, s *stream) (*structFieldSet
case nul: case nul:
s.cursor = cursor s.cursor = cursor
if s.read() { if s.read() {
buf, cursor, p = s.stat() _, cursor, p = s.stat()
continue continue
} }
return nil, "", errUnexpectedEndOfJSON("string", s.totalOffset()) return nil, "", errUnexpectedEndOfJSON("string", s.totalOffset())
case '\\':
s.cursor = cursor + 1 // skip '\' char
chars, err := decodeKeyCharByEscapeCharStream(s)
if err != nil {
return nil, "", err
}
cursor = s.cursor
for _, c := range chars {
curBit &= bitmap[keyIdx][largeToSmallTable[c]]
if curBit == 0 {
s.cursor = cursor
return decodeKeyNotFoundStream(s, start)
}
keyIdx++
}
default: default:
curBit &= bitmap[keyIdx][largeToSmallTable[c]] curBit &= bitmap[keyIdx][largeToSmallTable[c]]
if curBit == 0 { if curBit == 0 {
s.cursor = cursor
return decodeKeyNotFoundStream(s, start)
}
keyIdx++
}
cursor++
}
default:
return nil, "", errNotAtBeginningOfValue(s.totalOffset())
}
}
}
// decode from '\uXXXX'
func decodeKeyCharByUnicodeRuneStream(s *stream) ([]byte, error) {
const defaultOffset = 4
const surrogateOffset = 6
if s.cursor+defaultOffset >= s.length {
if !s.read() {
return nil, errInvalidCharacter(s.char(), "escaped unicode char", s.totalOffset())
}
}
r := unicodeToRune(s.buf[s.cursor : s.cursor+defaultOffset])
if utf16.IsSurrogate(r) {
s.cursor += defaultOffset
if s.cursor+surrogateOffset >= s.length {
s.read()
}
if s.cursor+surrogateOffset >= s.length || s.buf[s.cursor] != '\\' || s.buf[s.cursor+1] != 'u' {
s.cursor += defaultOffset - 1
return []byte(string(unicode.ReplacementChar)), nil
}
r2 := unicodeToRune(s.buf[s.cursor+defaultOffset+2 : s.cursor+surrogateOffset])
if r := utf16.DecodeRune(r, r2); r != unicode.ReplacementChar {
s.cursor += defaultOffset - 1
return []byte(string(r)), nil
}
}
s.cursor += defaultOffset - 1
return []byte(string(r)), nil
}
func decodeKeyCharByEscapeCharStream(s *stream) ([]byte, error) {
c := s.buf[s.cursor]
s.cursor++
RETRY:
switch c {
case '"':
return []byte{'"'}, nil
case '\\':
return []byte{'\\'}, nil
case '/':
return []byte{'/'}, nil
case 'b':
return []byte{'\b'}, nil
case 'f':
return []byte{'\f'}, nil
case 'n':
return []byte{'\n'}, nil
case 'r':
return []byte{'\r'}, nil
case 't':
return []byte{'\t'}, nil
case 'u':
return decodeKeyCharByUnicodeRuneStream(s)
case nul:
if !s.read() {
return nil, errInvalidCharacter(s.char(), "escaped char", s.totalOffset())
}
goto RETRY
default:
return nil, errUnexpectedEndOfJSON("struct field", s.totalOffset())
}
}
func decodeKeyNotFoundStream(s *stream, start int64) (*structFieldSet, string, error) {
buf, cursor, p := s.stat()
for { for {
cursor++ cursor++
switch char(p, cursor) { switch char(p, cursor) {
@ -509,7 +590,7 @@ func decodeKeyByBitmapUint16Stream(d *structDecoder, s *stream) (*structFieldSet
key := *(*string)(unsafe.Pointer(&b)) key := *(*string)(unsafe.Pointer(&b))
cursor++ cursor++
s.cursor = cursor s.cursor = cursor
return field, key, nil return nil, key, nil
case '\\': case '\\':
cursor++ cursor++
if char(p, cursor) == nul { if char(p, cursor) == nul {
@ -527,15 +608,6 @@ func decodeKeyByBitmapUint16Stream(d *structDecoder, s *stream) (*structFieldSet
buf, cursor, p = s.statForRetry() buf, cursor, p = s.statForRetry()
} }
} }
}
keyIdx++
}
cursor++
}
default:
return nil, "", errNotAtBeginningOfValue(s.totalOffset())
}
}
} }
func decodeKeyStream(d *structDecoder, s *stream) (*structFieldSet, string, error) { func decodeKeyStream(d *structDecoder, s *stream) (*structFieldSet, string, error) {