go-json/internal/decoder/struct.go

795 lines
18 KiB
Go

package decoder
import (
"fmt"
"math"
"math/bits"
"sort"
"strings"
"unicode"
"unicode/utf16"
"unsafe"
"github.com/goccy/go-json/internal/errors"
)
type structFieldSet struct {
dec Decoder
offset uintptr
isTaggedKey bool
fieldIdx int
key string
keyLen int64
err error
}
type structDecoder struct {
fieldMap map[string]*structFieldSet
fieldUniqueNameNum int
stringDecoder *stringDecoder
structName string
fieldName string
isTriedOptimize bool
keyBitmapUint8 [][256]uint8
keyBitmapUint16 [][256]uint16
sortedFieldSets []*structFieldSet
keyDecoder func(*structDecoder, []byte, int64) (int64, *structFieldSet, error)
keyStreamDecoder func(*structDecoder, *Stream) (*structFieldSet, string, error)
}
var (
largeToSmallTable [256]byte
)
func init() {
for i := 0; i < 256; i++ {
c := i
if 'A' <= c && c <= 'Z' {
c += 'a' - 'A'
}
largeToSmallTable[i] = byte(c)
}
}
func newStructDecoder(structName, fieldName string, fieldMap map[string]*structFieldSet) *structDecoder {
return &structDecoder{
fieldMap: fieldMap,
stringDecoder: newStringDecoder(structName, fieldName),
structName: structName,
fieldName: fieldName,
keyDecoder: decodeKey,
keyStreamDecoder: decodeKeyStream,
}
}
const (
allowOptimizeMaxKeyLen = 64
allowOptimizeMaxFieldLen = 16
)
func (d *structDecoder) tryOptimize() {
fieldUniqueNameMap := map[string]int{}
fieldIdx := -1
for k, v := range d.fieldMap {
lower := strings.ToLower(k)
idx, exists := fieldUniqueNameMap[lower]
if exists {
v.fieldIdx = idx
} else {
fieldIdx++
v.fieldIdx = fieldIdx
}
fieldUniqueNameMap[lower] = fieldIdx
}
d.fieldUniqueNameNum = len(fieldUniqueNameMap)
if d.isTriedOptimize {
return
}
fieldMap := map[string]*structFieldSet{}
conflicted := map[string]struct{}{}
for k, v := range d.fieldMap {
key := strings.ToLower(k)
if key != k {
// already exists same key (e.g. Hello and HELLO has same lower case key
if _, exists := conflicted[key]; exists {
d.isTriedOptimize = true
return
}
conflicted[key] = struct{}{}
}
if field, exists := fieldMap[key]; exists {
if field != v {
d.isTriedOptimize = true
return
}
}
fieldMap[key] = v
}
if len(fieldMap) > allowOptimizeMaxFieldLen {
d.isTriedOptimize = true
return
}
var maxKeyLen int
sortedKeys := []string{}
for key := range fieldMap {
keyLen := len(key)
if keyLen > allowOptimizeMaxKeyLen {
d.isTriedOptimize = true
return
}
if maxKeyLen < keyLen {
maxKeyLen = keyLen
}
sortedKeys = append(sortedKeys, key)
}
sort.Strings(sortedKeys)
// By allocating one extra capacity than `maxKeyLen`,
// it is possible to avoid the process of comparing the index of the key with the length of the bitmap each time.
bitmapLen := maxKeyLen + 1
if len(sortedKeys) <= 8 {
keyBitmap := make([][256]uint8, bitmapLen)
for i, key := range sortedKeys {
for j := 0; j < len(key); j++ {
c := key[j]
keyBitmap[j][c] |= (1 << uint(i))
}
d.sortedFieldSets = append(d.sortedFieldSets, fieldMap[key])
}
d.keyBitmapUint8 = keyBitmap
d.keyDecoder = decodeKeyByBitmapUint8
d.keyStreamDecoder = decodeKeyByBitmapUint8Stream
} else {
keyBitmap := make([][256]uint16, bitmapLen)
for i, key := range sortedKeys {
for j := 0; j < len(key); j++ {
c := key[j]
keyBitmap[j][c] |= (1 << uint(i))
}
d.sortedFieldSets = append(d.sortedFieldSets, fieldMap[key])
}
d.keyBitmapUint16 = keyBitmap
d.keyDecoder = decodeKeyByBitmapUint16
d.keyStreamDecoder = decodeKeyByBitmapUint16Stream
}
}
// decode from '\uXXXX'
func decodeKeyCharByUnicodeRune(buf []byte, cursor int64) ([]byte, int64) {
const defaultOffset = 4
const surrogateOffset = 6
r := unicodeToRune(buf[cursor : cursor+defaultOffset])
if utf16.IsSurrogate(r) {
cursor += defaultOffset
if cursor+surrogateOffset >= int64(len(buf)) || buf[cursor] != '\\' || buf[cursor+1] != 'u' {
return []byte(string(unicode.ReplacementChar)), cursor + defaultOffset - 1
}
cursor += 2
r2 := unicodeToRune(buf[cursor : cursor+defaultOffset])
if r := utf16.DecodeRune(r, r2); r != unicode.ReplacementChar {
return []byte(string(r)), cursor + defaultOffset - 1
}
}
return []byte(string(r)), cursor + defaultOffset - 1
}
func decodeKeyCharByEscapedChar(buf []byte, cursor int64) ([]byte, int64) {
c := buf[cursor]
cursor++
switch c {
case '"':
return []byte{'"'}, cursor
case '\\':
return []byte{'\\'}, cursor
case '/':
return []byte{'/'}, cursor
case 'b':
return []byte{'\b'}, cursor
case 'f':
return []byte{'\f'}, cursor
case 'n':
return []byte{'\n'}, cursor
case 'r':
return []byte{'\r'}, cursor
case 't':
return []byte{'\t'}, cursor
case 'u':
return decodeKeyCharByUnicodeRune(buf, cursor)
}
return nil, cursor
}
func decodeKeyByBitmapUint8(d *structDecoder, buf []byte, cursor int64) (int64, *structFieldSet, error) {
var (
curBit uint8 = math.MaxUint8
)
b := (*sliceHeader)(unsafe.Pointer(&buf)).data
for {
switch char(b, cursor) {
case ' ', '\n', '\t', '\r':
cursor++
case '"':
cursor++
c := char(b, cursor)
switch c {
case '"':
cursor++
return cursor, nil, nil
case nul:
return 0, nil, errors.ErrUnexpectedEndOfJSON("string", cursor)
}
keyIdx := 0
bitmap := d.keyBitmapUint8
start := cursor
for {
c := char(b, cursor)
switch c {
case '"':
fieldSetIndex := bits.TrailingZeros8(curBit)
field := d.sortedFieldSets[fieldSetIndex]
keyLen := cursor - start
cursor++
if keyLen < field.keyLen {
// early match
return cursor, nil, nil
}
return cursor, field, nil
case nul:
return 0, nil, errors.ErrUnexpectedEndOfJSON("string", cursor)
case '\\':
cursor++
chars, nextCursor := decodeKeyCharByEscapedChar(buf, cursor)
for _, c := range chars {
curBit &= bitmap[keyIdx][largeToSmallTable[c]]
if curBit == 0 {
return decodeKeyNotFound(b, cursor)
}
keyIdx++
}
cursor = nextCursor
default:
curBit &= bitmap[keyIdx][largeToSmallTable[c]]
if curBit == 0 {
return decodeKeyNotFound(b, cursor)
}
keyIdx++
}
cursor++
}
default:
return cursor, nil, errors.ErrNotAtBeginningOfValue(cursor)
}
}
}
func decodeKeyByBitmapUint16(d *structDecoder, buf []byte, cursor int64) (int64, *structFieldSet, error) {
var (
curBit uint16 = math.MaxUint16
)
b := (*sliceHeader)(unsafe.Pointer(&buf)).data
for {
switch char(b, cursor) {
case ' ', '\n', '\t', '\r':
cursor++
case '"':
cursor++
c := char(b, cursor)
switch c {
case '"':
cursor++
return cursor, nil, nil
case nul:
return 0, nil, errors.ErrUnexpectedEndOfJSON("string", cursor)
}
keyIdx := 0
bitmap := d.keyBitmapUint16
start := cursor
for {
c := char(b, cursor)
switch c {
case '"':
fieldSetIndex := bits.TrailingZeros16(curBit)
field := d.sortedFieldSets[fieldSetIndex]
keyLen := cursor - start
cursor++
if keyLen < field.keyLen {
// early match
return cursor, nil, nil
}
return cursor, field, nil
case nul:
return 0, nil, errors.ErrUnexpectedEndOfJSON("string", cursor)
case '\\':
cursor++
chars, nextCursor := decodeKeyCharByEscapedChar(buf, cursor)
for _, c := range chars {
curBit &= bitmap[keyIdx][largeToSmallTable[c]]
if curBit == 0 {
return decodeKeyNotFound(b, cursor)
}
keyIdx++
}
cursor = nextCursor
default:
curBit &= bitmap[keyIdx][largeToSmallTable[c]]
if curBit == 0 {
return decodeKeyNotFound(b, cursor)
}
keyIdx++
}
cursor++
}
default:
return cursor, nil, errors.ErrNotAtBeginningOfValue(cursor)
}
}
}
func decodeKeyNotFound(b unsafe.Pointer, cursor int64) (int64, *structFieldSet, error) {
for {
cursor++
switch char(b, cursor) {
case '"':
cursor++
return cursor, nil, nil
case '\\':
cursor++
if char(b, cursor) == nul {
return 0, nil, errors.ErrUnexpectedEndOfJSON("string", cursor)
}
case nul:
return 0, nil, errors.ErrUnexpectedEndOfJSON("string", cursor)
}
}
}
func decodeKey(d *structDecoder, buf []byte, cursor int64) (int64, *structFieldSet, error) {
key, c, err := d.stringDecoder.decodeByte(buf, cursor)
if err != nil {
return 0, nil, err
}
cursor = c
k := *(*string)(unsafe.Pointer(&key))
field, exists := d.fieldMap[k]
if !exists {
return cursor, nil, nil
}
return cursor, field, nil
}
func decodeKeyByBitmapUint8Stream(d *structDecoder, s *Stream) (*structFieldSet, string, error) {
var (
curBit uint8 = math.MaxUint8
)
_, cursor, p := s.stat()
for {
switch char(p, cursor) {
case ' ', '\n', '\t', '\r':
cursor++
case nul:
s.cursor = cursor
if s.read() {
_, cursor, p = s.stat()
continue
}
return nil, "", errors.ErrNotAtBeginningOfValue(s.totalOffset())
case '"':
cursor++
FIRST_CHAR:
start := cursor
switch char(p, cursor) {
case '"':
cursor++
s.cursor = cursor
return nil, "", nil
case nul:
s.cursor = cursor
if s.read() {
_, cursor, p = s.stat()
goto FIRST_CHAR
}
return nil, "", errors.ErrUnexpectedEndOfJSON("string", s.totalOffset())
}
keyIdx := 0
bitmap := d.keyBitmapUint8
for {
c := char(p, cursor)
switch c {
case '"':
fieldSetIndex := bits.TrailingZeros8(curBit)
field := d.sortedFieldSets[fieldSetIndex]
keyLen := cursor - start
cursor++
s.cursor = cursor
if keyLen < field.keyLen {
// early match
return nil, field.key, nil
}
return field, field.key, nil
case nul:
s.cursor = cursor
if s.read() {
_, cursor, p = s.stat()
continue
}
return nil, "", errors.ErrUnexpectedEndOfJSON("string", s.totalOffset())
case '\\':
s.cursor = cursor + 1 // skip '\' char
chars, err := decodeKeyCharByEscapeCharStream(s)
if err != nil {
return nil, "", err
}
cursor = s.cursor
for _, c := range chars {
curBit &= bitmap[keyIdx][largeToSmallTable[c]]
if curBit == 0 {
s.cursor = cursor
return decodeKeyNotFoundStream(s, start)
}
keyIdx++
}
default:
curBit &= bitmap[keyIdx][largeToSmallTable[c]]
if curBit == 0 {
s.cursor = cursor
return decodeKeyNotFoundStream(s, start)
}
keyIdx++
}
cursor++
}
default:
return nil, "", errors.ErrNotAtBeginningOfValue(s.totalOffset())
}
}
}
func decodeKeyByBitmapUint16Stream(d *structDecoder, s *Stream) (*structFieldSet, string, error) {
var (
curBit uint16 = math.MaxUint16
)
_, cursor, p := s.stat()
for {
switch char(p, cursor) {
case ' ', '\n', '\t', '\r':
cursor++
case nul:
s.cursor = cursor
if s.read() {
_, cursor, p = s.stat()
continue
}
return nil, "", errors.ErrNotAtBeginningOfValue(s.totalOffset())
case '"':
cursor++
FIRST_CHAR:
start := cursor
switch char(p, cursor) {
case '"':
cursor++
s.cursor = cursor
return nil, "", nil
case nul:
s.cursor = cursor
if s.read() {
_, cursor, p = s.stat()
goto FIRST_CHAR
}
return nil, "", errors.ErrUnexpectedEndOfJSON("string", s.totalOffset())
}
keyIdx := 0
bitmap := d.keyBitmapUint16
for {
c := char(p, cursor)
switch c {
case '"':
fieldSetIndex := bits.TrailingZeros16(curBit)
field := d.sortedFieldSets[fieldSetIndex]
keyLen := cursor - start
cursor++
s.cursor = cursor
if keyLen < field.keyLen {
// early match
return nil, field.key, nil
}
return field, field.key, nil
case nul:
s.cursor = cursor
if s.read() {
_, cursor, p = s.stat()
continue
}
return nil, "", errors.ErrUnexpectedEndOfJSON("string", s.totalOffset())
case '\\':
s.cursor = cursor + 1 // skip '\' char
chars, err := decodeKeyCharByEscapeCharStream(s)
if err != nil {
return nil, "", err
}
cursor = s.cursor
for _, c := range chars {
curBit &= bitmap[keyIdx][largeToSmallTable[c]]
if curBit == 0 {
s.cursor = cursor
return decodeKeyNotFoundStream(s, start)
}
keyIdx++
}
default:
curBit &= bitmap[keyIdx][largeToSmallTable[c]]
if curBit == 0 {
s.cursor = cursor
return decodeKeyNotFoundStream(s, start)
}
keyIdx++
}
cursor++
}
default:
return nil, "", errors.ErrNotAtBeginningOfValue(s.totalOffset())
}
}
}
// decode from '\uXXXX'
func decodeKeyCharByUnicodeRuneStream(s *Stream) ([]byte, error) {
const defaultOffset = 4
const surrogateOffset = 6
if s.cursor+defaultOffset >= s.length {
if !s.read() {
return nil, errors.ErrInvalidCharacter(s.char(), "escaped unicode char", s.totalOffset())
}
}
r := unicodeToRune(s.buf[s.cursor : s.cursor+defaultOffset])
if utf16.IsSurrogate(r) {
s.cursor += defaultOffset
if s.cursor+surrogateOffset >= s.length {
s.read()
}
if s.cursor+surrogateOffset >= s.length || s.buf[s.cursor] != '\\' || s.buf[s.cursor+1] != 'u' {
s.cursor += defaultOffset - 1
return []byte(string(unicode.ReplacementChar)), nil
}
r2 := unicodeToRune(s.buf[s.cursor+defaultOffset+2 : s.cursor+surrogateOffset])
if r := utf16.DecodeRune(r, r2); r != unicode.ReplacementChar {
s.cursor += defaultOffset - 1
return []byte(string(r)), nil
}
}
s.cursor += defaultOffset - 1
return []byte(string(r)), nil
}
func decodeKeyCharByEscapeCharStream(s *Stream) ([]byte, error) {
c := s.buf[s.cursor]
s.cursor++
RETRY:
switch c {
case '"':
return []byte{'"'}, nil
case '\\':
return []byte{'\\'}, nil
case '/':
return []byte{'/'}, nil
case 'b':
return []byte{'\b'}, nil
case 'f':
return []byte{'\f'}, nil
case 'n':
return []byte{'\n'}, nil
case 'r':
return []byte{'\r'}, nil
case 't':
return []byte{'\t'}, nil
case 'u':
return decodeKeyCharByUnicodeRuneStream(s)
case nul:
if !s.read() {
return nil, errors.ErrInvalidCharacter(s.char(), "escaped char", s.totalOffset())
}
goto RETRY
default:
return nil, errors.ErrUnexpectedEndOfJSON("struct field", s.totalOffset())
}
}
func decodeKeyNotFoundStream(s *Stream, start int64) (*structFieldSet, string, error) {
buf, cursor, p := s.stat()
for {
cursor++
switch char(p, cursor) {
case '"':
b := buf[start:cursor]
key := *(*string)(unsafe.Pointer(&b))
cursor++
s.cursor = cursor
return nil, key, nil
case '\\':
cursor++
if char(p, cursor) == nul {
s.cursor = cursor
if !s.read() {
return nil, "", errors.ErrUnexpectedEndOfJSON("string", s.totalOffset())
}
buf, cursor, p = s.statForRetry()
}
case nul:
s.cursor = cursor
if !s.read() {
return nil, "", errors.ErrUnexpectedEndOfJSON("string", s.totalOffset())
}
buf, cursor, p = s.statForRetry()
}
}
}
func decodeKeyStream(d *structDecoder, s *Stream) (*structFieldSet, string, error) {
key, err := d.stringDecoder.decodeStreamByte(s)
if err != nil {
return nil, "", err
}
k := *(*string)(unsafe.Pointer(&key))
return d.fieldMap[k], k, nil
}
func (d *structDecoder) DecodeStream(s *Stream, depth int64, p unsafe.Pointer) error {
depth++
if depth > maxDecodeNestingDepth {
return errors.ErrExceededMaxDepth(s.char(), s.cursor)
}
c := s.skipWhiteSpace()
switch c {
case 'n':
if err := nullBytes(s); err != nil {
return err
}
return nil
default:
if s.char() != '{' {
return errors.ErrNotAtBeginningOfValue(s.totalOffset())
}
}
s.cursor++
if s.skipWhiteSpace() == '}' {
s.cursor++
return nil
}
for {
s.reset()
field, key, err := d.keyStreamDecoder(d, s)
if err != nil {
return err
}
if s.skipWhiteSpace() != ':' {
return errors.ErrExpected("colon after object key", s.totalOffset())
}
s.cursor++
if field != nil {
if field.err != nil {
return field.err
}
if err := field.dec.DecodeStream(s, depth, unsafe.Pointer(uintptr(p)+field.offset)); err != nil {
return err
}
} else if s.DisallowUnknownFields {
return fmt.Errorf("json: unknown field %q", key)
} else {
if err := s.skipValue(depth); err != nil {
return err
}
}
c := s.skipWhiteSpace()
if c == '}' {
s.cursor++
return nil
}
if c != ',' {
return errors.ErrExpected("comma after object element", s.totalOffset())
}
s.cursor++
}
}
func (d *structDecoder) Decode(ctx *RuntimeContext, cursor, depth int64, p unsafe.Pointer) (int64, error) {
buf := ctx.Buf
depth++
if depth > maxDecodeNestingDepth {
return 0, errors.ErrExceededMaxDepth(buf[cursor], cursor)
}
buflen := int64(len(buf))
cursor = skipWhiteSpace(buf, cursor)
b := (*sliceHeader)(unsafe.Pointer(&buf)).data
switch char(b, cursor) {
case 'n':
if err := validateNull(buf, cursor); err != nil {
return 0, err
}
cursor += 4
return cursor, nil
case '{':
default:
return 0, errors.ErrNotAtBeginningOfValue(cursor)
}
cursor++
cursor = skipWhiteSpace(buf, cursor)
if buf[cursor] == '}' {
cursor++
return cursor, nil
}
var (
seenFields map[int]struct{}
seenFieldNum int
)
firstWin := ctx.Option.FirstWin
if firstWin {
seenFields = make(map[int]struct{}, d.fieldUniqueNameNum)
}
for {
c, field, err := d.keyDecoder(d, buf, cursor)
if err != nil {
return 0, err
}
cursor = skipWhiteSpace(buf, c)
if char(b, cursor) != ':' {
return 0, errors.ErrExpected("colon after object key", cursor)
}
cursor++
if cursor >= buflen {
return 0, errors.ErrExpected("object value after colon", cursor)
}
if field != nil {
if field.err != nil {
return 0, field.err
}
if firstWin {
if _, exists := seenFields[field.fieldIdx]; exists {
c, err := skipValue(buf, cursor, depth)
if err != nil {
return 0, err
}
cursor = c
} else {
c, err := field.dec.Decode(ctx, cursor, depth, unsafe.Pointer(uintptr(p)+field.offset))
if err != nil {
return 0, err
}
cursor = c
seenFieldNum++
if d.fieldUniqueNameNum <= seenFieldNum {
return skipObject(buf, cursor, depth)
}
seenFields[field.fieldIdx] = struct{}{}
}
} else {
c, err := field.dec.Decode(ctx, cursor, depth, unsafe.Pointer(uintptr(p)+field.offset))
if err != nil {
return 0, err
}
cursor = c
}
} else {
c, err := skipValue(buf, cursor, depth)
if err != nil {
return 0, err
}
cursor = c
}
cursor = skipWhiteSpace(buf, cursor)
if char(b, cursor) == '}' {
cursor++
return cursor, nil
}
if char(b, cursor) != ',' {
return 0, errors.ErrExpected("comma after object element", cursor)
}
cursor++
}
}