Fix decoder

This commit is contained in:
Masaaki Goshima 2021-06-06 11:00:22 +09:00
parent 3c0e1b5e8c
commit a69176cd30
2 changed files with 20 additions and 16 deletions

View File

@ -203,7 +203,7 @@ func (d *interfaceDecoder) decodeStreamEmptyInterface(s *Stream, depth int64, p
for { for {
switch s.char() { switch s.char() {
case '\\': case '\\':
if err := decodeEscapeString(s); err != nil { if _, err := decodeEscapeString(s, nil); err != nil {
return err return err
} }
case '"': case '"':

View File

@ -93,38 +93,40 @@ func unicodeToRune(code []byte) rune {
return r return r
} }
func decodeUnicodeRune(s *Stream) (rune, int64, error) { func decodeUnicodeRune(s *Stream, p unsafe.Pointer) (rune, int64, unsafe.Pointer, error) {
const defaultOffset = 5 const defaultOffset = 5
const surrogateOffset = 11 const surrogateOffset = 11
if s.cursor+defaultOffset >= s.length { if s.cursor+defaultOffset >= s.length {
if !s.read() { if !s.read() {
return rune(0), 0, errors.ErrInvalidCharacter(s.char(), "escaped string", s.totalOffset()) return rune(0), 0, nil, errors.ErrInvalidCharacter(s.char(), "escaped string", s.totalOffset())
} }
p = s.bufptr()
} }
r := unicodeToRune(s.buf[s.cursor+1 : s.cursor+defaultOffset]) r := unicodeToRune(s.buf[s.cursor+1 : s.cursor+defaultOffset])
if utf16.IsSurrogate(r) { if utf16.IsSurrogate(r) {
if s.cursor+surrogateOffset >= s.length { if s.cursor+surrogateOffset >= s.length {
s.read() s.read()
p = s.bufptr()
} }
if s.cursor+surrogateOffset >= s.length || s.buf[s.cursor+defaultOffset] != '\\' || s.buf[s.cursor+defaultOffset+1] != 'u' { if s.cursor+surrogateOffset >= s.length || s.buf[s.cursor+defaultOffset] != '\\' || s.buf[s.cursor+defaultOffset+1] != 'u' {
return unicode.ReplacementChar, defaultOffset, nil return unicode.ReplacementChar, defaultOffset, p, nil
} }
r2 := unicodeToRune(s.buf[s.cursor+defaultOffset+2 : s.cursor+surrogateOffset]) r2 := unicodeToRune(s.buf[s.cursor+defaultOffset+2 : s.cursor+surrogateOffset])
if r := utf16.DecodeRune(r, r2); r != unicode.ReplacementChar { if r := utf16.DecodeRune(r, r2); r != unicode.ReplacementChar {
return r, surrogateOffset, nil return r, surrogateOffset, p, nil
} }
} }
return r, defaultOffset, nil return r, defaultOffset, p, nil
} }
func decodeUnicode(s *Stream) error { func decodeUnicode(s *Stream, p unsafe.Pointer) (unsafe.Pointer, error) {
const backSlashAndULen = 2 // length of \u const backSlashAndULen = 2 // length of \u
r, offset, err := decodeUnicodeRune(s) r, offset, pp, err := decodeUnicodeRune(s, p)
if err != nil { if err != nil {
return err return nil, err
} }
unicode := []byte(string(r)) unicode := []byte(string(r))
unicodeLen := int64(len(unicode)) unicodeLen := int64(len(unicode))
@ -132,10 +134,10 @@ func decodeUnicode(s *Stream) error {
unicodeOrgLen := offset - 1 unicodeOrgLen := offset - 1
s.length = s.length - (backSlashAndULen + (unicodeOrgLen - unicodeLen)) s.length = s.length - (backSlashAndULen + (unicodeOrgLen - unicodeLen))
s.cursor = s.cursor - backSlashAndULen + unicodeLen s.cursor = s.cursor - backSlashAndULen + unicodeLen
return nil return pp, nil
} }
func decodeEscapeString(s *Stream) error { func decodeEscapeString(s *Stream, p unsafe.Pointer) (unsafe.Pointer, error) {
s.cursor++ s.cursor++
RETRY: RETRY:
switch s.buf[s.cursor] { switch s.buf[s.cursor] {
@ -156,19 +158,19 @@ RETRY:
case 't': case 't':
s.buf[s.cursor] = '\t' s.buf[s.cursor] = '\t'
case 'u': case 'u':
return decodeUnicode(s) return decodeUnicode(s, p)
case nul: case nul:
if !s.read() { if !s.read() {
return errors.ErrInvalidCharacter(s.char(), "escaped string", s.totalOffset()) return nil, errors.ErrInvalidCharacter(s.char(), "escaped string", s.totalOffset())
} }
goto RETRY goto RETRY
default: default:
return errors.ErrUnexpectedEndOfJSON("string", s.totalOffset()) return nil, errors.ErrUnexpectedEndOfJSON("string", s.totalOffset())
} }
s.buf = append(s.buf[:s.cursor-1], s.buf[s.cursor:]...) s.buf = append(s.buf[:s.cursor-1], s.buf[s.cursor:]...)
s.length-- s.length--
s.cursor-- s.cursor--
return nil return p, nil
} }
var ( var (
@ -184,9 +186,11 @@ func stringBytes(s *Stream) ([]byte, error) {
switch char(p, cursor) { switch char(p, cursor) {
case '\\': case '\\':
s.cursor = cursor s.cursor = cursor
if err := decodeEscapeString(s); err != nil { pp, err := decodeEscapeString(s, p)
if err != nil {
return nil, err return nil, err
} }
p = pp
cursor = s.cursor cursor = s.cursor
case '"': case '"':
literal := s.buf[start:cursor] literal := s.buf[start:cursor]