2020-04-23 19:39:20 +03:00
|
|
|
package json
|
|
|
|
|
|
|
|
import (
|
2020-11-23 11:16:31 +03:00
|
|
|
"reflect"
|
2020-11-22 18:10:42 +03:00
|
|
|
"unicode"
|
|
|
|
"unicode/utf16"
|
2020-12-24 08:26:18 +03:00
|
|
|
"unicode/utf8"
|
2020-04-23 19:39:20 +03:00
|
|
|
"unsafe"
|
|
|
|
)
|
|
|
|
|
|
|
|
type stringDecoder struct {
|
2020-11-23 11:16:31 +03:00
|
|
|
structName string
|
|
|
|
fieldName string
|
2020-04-23 19:39:20 +03:00
|
|
|
}
|
|
|
|
|
2020-11-23 11:16:31 +03:00
|
|
|
func newStringDecoder(structName, fieldName string) *stringDecoder {
|
|
|
|
return &stringDecoder{
|
|
|
|
structName: structName,
|
|
|
|
fieldName: fieldName,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (d *stringDecoder) errUnmarshalType(typeName string, offset int64) *UnmarshalTypeError {
|
|
|
|
return &UnmarshalTypeError{
|
|
|
|
Value: typeName,
|
|
|
|
Type: reflect.TypeOf(""),
|
|
|
|
Offset: offset,
|
|
|
|
Struct: d.structName,
|
|
|
|
Field: d.fieldName,
|
|
|
|
}
|
2020-04-23 19:39:20 +03:00
|
|
|
}
|
|
|
|
|
2020-11-19 06:47:42 +03:00
|
|
|
func (d *stringDecoder) decodeStream(s *stream, p unsafe.Pointer) error {
|
2020-07-30 16:41:53 +03:00
|
|
|
bytes, err := d.decodeStreamByte(s)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2020-12-05 16:27:33 +03:00
|
|
|
*(*string)(p) = string(bytes)
|
|
|
|
s.reset()
|
2020-07-30 16:41:53 +03:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2021-02-05 18:54:10 +03:00
|
|
|
func (d *stringDecoder) decode(buf *sliceHeader, cursor int64, p unsafe.Pointer) (int64, error) {
|
2020-05-06 20:37:29 +03:00
|
|
|
bytes, c, err := d.decodeByte(buf, cursor)
|
2020-04-23 19:39:20 +03:00
|
|
|
if err != nil {
|
2020-05-06 20:37:29 +03:00
|
|
|
return 0, err
|
2020-04-23 19:39:20 +03:00
|
|
|
}
|
2020-05-06 20:37:29 +03:00
|
|
|
cursor = c
|
2020-11-14 23:27:15 +03:00
|
|
|
**(**string)(unsafe.Pointer(&p)) = *(*string)(unsafe.Pointer(&bytes))
|
2020-05-06 20:37:29 +03:00
|
|
|
return cursor, nil
|
2020-04-23 19:39:20 +03:00
|
|
|
}
|
|
|
|
|
2020-08-20 06:38:50 +03:00
|
|
|
var (
|
|
|
|
hexToInt = [256]int{
|
|
|
|
'0': 0,
|
|
|
|
'1': 1,
|
|
|
|
'2': 2,
|
|
|
|
'3': 3,
|
|
|
|
'4': 4,
|
|
|
|
'5': 5,
|
|
|
|
'6': 6,
|
|
|
|
'7': 7,
|
|
|
|
'8': 8,
|
|
|
|
'9': 9,
|
|
|
|
'A': 10,
|
|
|
|
'B': 11,
|
|
|
|
'C': 12,
|
|
|
|
'D': 13,
|
|
|
|
'E': 14,
|
|
|
|
'F': 15,
|
|
|
|
'a': 10,
|
|
|
|
'b': 11,
|
|
|
|
'c': 12,
|
|
|
|
'd': 13,
|
|
|
|
'e': 14,
|
|
|
|
'f': 15,
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
func unicodeToRune(code []byte) rune {
|
2020-11-22 18:10:42 +03:00
|
|
|
var r rune
|
2020-08-20 06:38:50 +03:00
|
|
|
for i := 0; i < len(code); i++ {
|
2020-11-22 18:10:42 +03:00
|
|
|
r = r*16 + rune(hexToInt[code[i]])
|
2020-08-20 06:38:50 +03:00
|
|
|
}
|
2020-11-22 18:10:42 +03:00
|
|
|
return r
|
2020-08-20 06:38:50 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
func decodeEscapeString(s *stream) error {
|
|
|
|
s.cursor++
|
|
|
|
RETRY:
|
|
|
|
switch s.buf[s.cursor] {
|
|
|
|
case '"':
|
|
|
|
s.buf[s.cursor] = '"'
|
|
|
|
case '\\':
|
|
|
|
s.buf[s.cursor] = '\\'
|
|
|
|
case '/':
|
|
|
|
s.buf[s.cursor] = '/'
|
|
|
|
case 'b':
|
|
|
|
s.buf[s.cursor] = '\b'
|
|
|
|
case 'f':
|
|
|
|
s.buf[s.cursor] = '\f'
|
|
|
|
case 'n':
|
|
|
|
s.buf[s.cursor] = '\n'
|
|
|
|
case 'r':
|
|
|
|
s.buf[s.cursor] = '\r'
|
|
|
|
case 't':
|
|
|
|
s.buf[s.cursor] = '\t'
|
|
|
|
case 'u':
|
|
|
|
if s.cursor+5 >= s.length {
|
|
|
|
if !s.read() {
|
|
|
|
return errInvalidCharacter(s.char(), "escaped string", s.totalOffset())
|
|
|
|
}
|
|
|
|
}
|
2020-11-22 18:10:42 +03:00
|
|
|
r := unicodeToRune(s.buf[s.cursor+1 : s.cursor+5])
|
|
|
|
if utf16.IsSurrogate(r) {
|
|
|
|
if s.cursor+11 >= s.length || s.buf[s.cursor+5] != '\\' || s.buf[s.cursor+6] != 'u' {
|
|
|
|
r = unicode.ReplacementChar
|
|
|
|
unicode := []byte(string(r))
|
|
|
|
s.buf = append(append(s.buf[:s.cursor-1], unicode...), s.buf[s.cursor+5:]...)
|
|
|
|
s.cursor = s.cursor - 2 + int64(len(unicode))
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
r2 := unicodeToRune(s.buf[s.cursor+7 : s.cursor+11])
|
|
|
|
if r := utf16.DecodeRune(r, r2); r != unicode.ReplacementChar {
|
|
|
|
// valid surrogate pair
|
|
|
|
unicode := []byte(string(r))
|
|
|
|
s.buf = append(append(s.buf[:s.cursor-1], unicode...), s.buf[s.cursor+11:]...)
|
|
|
|
s.cursor = s.cursor - 2 + int64(len(unicode))
|
|
|
|
} else {
|
|
|
|
unicode := []byte(string(r))
|
|
|
|
s.buf = append(append(s.buf[:s.cursor-1], unicode...), s.buf[s.cursor+5:]...)
|
|
|
|
s.cursor = s.cursor - 2 + int64(len(unicode))
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
unicode := []byte(string(r))
|
|
|
|
s.buf = append(append(s.buf[:s.cursor-1], unicode...), s.buf[s.cursor+5:]...)
|
|
|
|
s.cursor = s.cursor - 2 + int64(len(unicode))
|
|
|
|
}
|
2020-08-20 06:38:50 +03:00
|
|
|
return nil
|
|
|
|
case nul:
|
|
|
|
if !s.read() {
|
|
|
|
return errInvalidCharacter(s.char(), "escaped string", s.totalOffset())
|
|
|
|
}
|
|
|
|
goto RETRY
|
|
|
|
default:
|
|
|
|
return errUnexpectedEndOfJSON("string", s.totalOffset())
|
|
|
|
}
|
|
|
|
s.buf = append(s.buf[:s.cursor-1], s.buf[s.cursor:]...)
|
|
|
|
s.cursor--
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2021-02-01 16:31:39 +03:00
|
|
|
//nolint:deadcode,unused
|
2020-12-24 08:26:18 +03:00
|
|
|
func appendCoerceInvalidUTF8(b []byte, s []byte) []byte {
|
|
|
|
c := [4]byte{}
|
|
|
|
|
|
|
|
for _, r := range string(s) {
|
|
|
|
b = append(b, c[:utf8.EncodeRune(c[:], r)]...)
|
|
|
|
}
|
|
|
|
|
|
|
|
return b
|
|
|
|
}
|
|
|
|
|
2020-07-30 16:41:53 +03:00
|
|
|
func stringBytes(s *stream) ([]byte, error) {
|
2020-07-31 11:10:03 +03:00
|
|
|
s.cursor++
|
2020-07-30 16:41:53 +03:00
|
|
|
start := s.cursor
|
|
|
|
for {
|
|
|
|
switch s.char() {
|
|
|
|
case '\\':
|
2020-11-21 20:47:18 +03:00
|
|
|
if err := decodeEscapeString(s); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2020-07-30 16:41:53 +03:00
|
|
|
case '"':
|
|
|
|
literal := s.buf[start:s.cursor]
|
2020-12-24 08:26:18 +03:00
|
|
|
// TODO: this flow is so slow sequence.
|
|
|
|
// literal = appendCoerceInvalidUTF8(make([]byte, 0, len(literal)), literal)
|
2020-07-31 11:10:03 +03:00
|
|
|
s.cursor++
|
2020-07-30 16:41:53 +03:00
|
|
|
return literal, nil
|
2020-07-31 11:10:03 +03:00
|
|
|
case nul:
|
|
|
|
if s.read() {
|
|
|
|
continue
|
|
|
|
}
|
2020-07-30 16:41:53 +03:00
|
|
|
goto ERROR
|
|
|
|
}
|
2020-07-31 11:10:03 +03:00
|
|
|
s.cursor++
|
2020-07-30 16:41:53 +03:00
|
|
|
}
|
|
|
|
ERROR:
|
|
|
|
return nil, errUnexpectedEndOfJSON("string", s.totalOffset())
|
|
|
|
}
|
|
|
|
|
|
|
|
func nullBytes(s *stream) error {
|
2020-07-31 11:10:03 +03:00
|
|
|
if s.cursor+3 >= s.length {
|
|
|
|
if !s.read() {
|
|
|
|
return errInvalidCharacter(s.char(), "null", s.totalOffset())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
s.cursor++
|
2020-07-30 16:41:53 +03:00
|
|
|
if s.char() != 'u' {
|
|
|
|
return errInvalidCharacter(s.char(), "null", s.totalOffset())
|
|
|
|
}
|
2020-07-31 11:10:03 +03:00
|
|
|
s.cursor++
|
2020-07-30 16:41:53 +03:00
|
|
|
if s.char() != 'l' {
|
|
|
|
return errInvalidCharacter(s.char(), "null", s.totalOffset())
|
|
|
|
}
|
2020-07-31 11:10:03 +03:00
|
|
|
s.cursor++
|
2020-07-30 16:41:53 +03:00
|
|
|
if s.char() != 'l' {
|
|
|
|
return errInvalidCharacter(s.char(), "null", s.totalOffset())
|
|
|
|
}
|
2020-07-31 11:10:03 +03:00
|
|
|
s.cursor++
|
2020-07-30 16:41:53 +03:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (d *stringDecoder) decodeStreamByte(s *stream) ([]byte, error) {
|
|
|
|
for {
|
|
|
|
switch s.char() {
|
|
|
|
case ' ', '\n', '\t', '\r':
|
2020-07-31 11:10:03 +03:00
|
|
|
s.cursor++
|
|
|
|
continue
|
2020-11-23 11:16:31 +03:00
|
|
|
case '[':
|
|
|
|
return nil, d.errUnmarshalType("array", s.totalOffset())
|
|
|
|
case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
|
|
|
|
return nil, d.errUnmarshalType("number", s.totalOffset())
|
2020-07-30 16:41:53 +03:00
|
|
|
case '"':
|
|
|
|
return stringBytes(s)
|
|
|
|
case 'n':
|
|
|
|
if err := nullBytes(s); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2020-07-31 14:24:39 +03:00
|
|
|
return []byte{}, nil
|
2020-07-31 11:10:03 +03:00
|
|
|
case nul:
|
|
|
|
if s.read() {
|
|
|
|
continue
|
|
|
|
}
|
2020-07-30 16:41:53 +03:00
|
|
|
}
|
2020-07-31 11:10:03 +03:00
|
|
|
break
|
2020-07-30 16:41:53 +03:00
|
|
|
}
|
|
|
|
return nil, errNotAtBeginningOfValue(s.totalOffset())
|
|
|
|
}
|
|
|
|
|
2021-02-05 18:54:10 +03:00
|
|
|
func (d *stringDecoder) decodeByte(buf *sliceHeader, cursor int64) ([]byte, int64, error) {
|
2020-05-07 07:51:17 +03:00
|
|
|
for {
|
2021-02-05 18:54:10 +03:00
|
|
|
switch char(buf.data, cursor) {
|
2020-05-07 07:51:17 +03:00
|
|
|
case ' ', '\n', '\t', '\r':
|
|
|
|
cursor++
|
2020-11-23 11:16:31 +03:00
|
|
|
case '[':
|
|
|
|
return nil, 0, d.errUnmarshalType("array", cursor)
|
|
|
|
case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
|
|
|
|
return nil, 0, d.errUnmarshalType("number", cursor)
|
2020-05-07 07:51:17 +03:00
|
|
|
case '"':
|
|
|
|
cursor++
|
|
|
|
start := cursor
|
|
|
|
for {
|
2021-02-05 18:54:10 +03:00
|
|
|
switch char(buf.data, cursor) {
|
2020-05-07 07:51:17 +03:00
|
|
|
case '\\':
|
|
|
|
cursor++
|
2021-02-05 18:54:10 +03:00
|
|
|
b := (*(*[]byte)(unsafe.Pointer(buf)))
|
|
|
|
switch char(buf.data, cursor) {
|
2020-08-20 06:38:50 +03:00
|
|
|
case '"':
|
2021-02-05 18:54:10 +03:00
|
|
|
b[cursor] = '"'
|
|
|
|
b = append(b[:cursor-1], b[cursor:]...)
|
2020-08-20 06:38:50 +03:00
|
|
|
case '\\':
|
2021-02-05 18:54:10 +03:00
|
|
|
b[cursor] = '\\'
|
|
|
|
b = append(b[:cursor-1], b[cursor:]...)
|
2020-08-20 06:38:50 +03:00
|
|
|
case '/':
|
2021-02-05 18:54:10 +03:00
|
|
|
b[cursor] = '/'
|
|
|
|
b = append(b[:cursor-1], b[cursor:]...)
|
2020-08-20 06:38:50 +03:00
|
|
|
case 'b':
|
2021-02-05 18:54:10 +03:00
|
|
|
b[cursor] = '\b'
|
|
|
|
b = append(b[:cursor-1], b[cursor:]...)
|
2020-08-20 06:38:50 +03:00
|
|
|
case 'f':
|
2021-02-05 18:54:10 +03:00
|
|
|
b[cursor] = '\f'
|
|
|
|
b = append(b[:cursor-1], b[cursor:]...)
|
2020-08-20 06:38:50 +03:00
|
|
|
case 'n':
|
2021-02-05 18:54:10 +03:00
|
|
|
b[cursor] = '\n'
|
|
|
|
b = append(b[:cursor-1], b[cursor:]...)
|
2020-08-20 06:38:50 +03:00
|
|
|
case 'r':
|
2021-02-05 18:54:10 +03:00
|
|
|
b[cursor] = '\r'
|
|
|
|
b = append(b[:cursor-1], b[cursor:]...)
|
2020-08-20 06:38:50 +03:00
|
|
|
case 't':
|
2021-02-05 18:54:10 +03:00
|
|
|
b[cursor] = '\t'
|
|
|
|
b = append(b[:cursor-1], b[cursor:]...)
|
2020-08-20 06:38:50 +03:00
|
|
|
case 'u':
|
2021-02-05 18:54:10 +03:00
|
|
|
buflen := int64(buf.len)
|
2020-08-20 06:38:50 +03:00
|
|
|
if cursor+5 >= buflen {
|
|
|
|
return nil, 0, errUnexpectedEndOfJSON("escaped string", cursor)
|
|
|
|
}
|
2021-02-05 18:54:10 +03:00
|
|
|
code := unicodeToRune(b[cursor+1 : cursor+5])
|
2020-08-20 06:38:50 +03:00
|
|
|
unicode := []byte(string(code))
|
2021-02-05 18:54:10 +03:00
|
|
|
b = append(append(b[:cursor-1], unicode...), b[cursor+5:]...)
|
2020-08-20 06:38:50 +03:00
|
|
|
default:
|
|
|
|
return nil, 0, errUnexpectedEndOfJSON("escaped string", cursor)
|
|
|
|
}
|
2021-02-05 18:54:10 +03:00
|
|
|
buf = (*sliceHeader)(unsafe.Pointer(&b))
|
2020-08-20 06:38:50 +03:00
|
|
|
continue
|
2020-05-07 07:51:17 +03:00
|
|
|
case '"':
|
2021-02-05 18:54:10 +03:00
|
|
|
literal := (*(*[]byte)(unsafe.Pointer(buf)))[start:cursor]
|
2020-05-07 07:51:17 +03:00
|
|
|
cursor++
|
|
|
|
return literal, cursor, nil
|
2020-07-31 12:07:23 +03:00
|
|
|
case nul:
|
2020-05-23 06:51:09 +03:00
|
|
|
return nil, 0, errUnexpectedEndOfJSON("string", cursor)
|
2020-05-07 07:51:17 +03:00
|
|
|
}
|
2020-05-07 07:46:32 +03:00
|
|
|
cursor++
|
2020-04-26 08:59:45 +03:00
|
|
|
}
|
2020-05-07 07:51:17 +03:00
|
|
|
case 'n':
|
2021-02-05 18:54:10 +03:00
|
|
|
buflen := int64(buf.len)
|
2020-05-07 07:51:17 +03:00
|
|
|
if cursor+3 >= buflen {
|
2020-05-23 06:51:09 +03:00
|
|
|
return nil, 0, errUnexpectedEndOfJSON("null", cursor)
|
2020-05-07 07:51:17 +03:00
|
|
|
}
|
2021-02-05 18:54:10 +03:00
|
|
|
if char(buf.data, cursor+1) != 'u' {
|
|
|
|
return nil, 0, errInvalidCharacter(char(buf.data, cursor+1), "null", cursor)
|
2020-05-07 07:51:17 +03:00
|
|
|
}
|
2021-02-05 18:54:10 +03:00
|
|
|
if char(buf.data, cursor+2) != 'l' {
|
|
|
|
return nil, 0, errInvalidCharacter(char(buf.data, cursor+2), "null", cursor)
|
2020-05-07 07:51:17 +03:00
|
|
|
}
|
2021-02-05 18:54:10 +03:00
|
|
|
if char(buf.data, cursor+3) != 'l' {
|
|
|
|
return nil, 0, errInvalidCharacter(char(buf.data, cursor+3), "null", cursor)
|
2020-05-07 07:51:17 +03:00
|
|
|
}
|
2020-08-08 06:21:25 +03:00
|
|
|
cursor += 4
|
2020-07-31 14:24:39 +03:00
|
|
|
return []byte{}, cursor, nil
|
2020-05-07 08:21:29 +03:00
|
|
|
default:
|
|
|
|
goto ERROR
|
2020-04-23 19:39:20 +03:00
|
|
|
}
|
|
|
|
}
|
2020-05-07 08:21:29 +03:00
|
|
|
ERROR:
|
2020-05-23 06:51:09 +03:00
|
|
|
return nil, 0, errNotAtBeginningOfValue(cursor)
|
2020-04-23 19:39:20 +03:00
|
|
|
}
|