Fix stream decoder for unicode char

2021-05-05 04:21:58 +09:00 · 2021-05-05 04:21:58 +09:00 · d4a64f8cde
parent 31951a151c
commit d4a64f8cde
2 changed files with 47 additions and 32 deletions
--- a/decode_stream.go
+++ b/decode_stream.go
@ -72,7 +72,11 @@ func (s *stream) readBuf() []byte {
 		s.buf = make([]byte, s.bufSize)
 		copy(s.buf, remainBuf)
 	}
-	return s.buf[s.cursor:]
+	remainLen := s.length - s.cursor
 	if remainLen > 0 {
 		remainLen-- // last char is nul
 	}
 	return s.buf[s.cursor+remainLen:]
 }
 func (s *stream) read() bool {
--- a/decode_string.go
+++ b/decode_string.go
@ -91,6 +91,47 @@ func unicodeToRune(code []byte) rune {
 	return r
 }
 func decodeUnicodeRune(s *stream) (rune, int64, error) {
 	const defaultOffset = 5
 	const surrogateOffset = 11
 	if s.cursor+defaultOffset >= s.length {
 		if !s.read() {
 			return rune(0), 0, errInvalidCharacter(s.char(), "escaped string", s.totalOffset())
 		}
 	}
 	r := unicodeToRune(s.buf[s.cursor+1 : s.cursor+defaultOffset])
 	if utf16.IsSurrogate(r) {
 		if s.cursor+surrogateOffset >= s.length {
 			s.read()
 		}
 		if s.cursor+surrogateOffset >= s.length || s.buf[s.cursor+defaultOffset] != '\\' || s.buf[s.cursor+defaultOffset+1] != 'u' {
 			return unicode.ReplacementChar, defaultOffset, nil
 		}
 		r2 := unicodeToRune(s.buf[s.cursor+defaultOffset+2 : s.cursor+surrogateOffset])
 		if r := utf16.DecodeRune(r, r2); r != unicode.ReplacementChar {
 			return r, surrogateOffset, nil
 		}
 	}
 	return r, defaultOffset, nil
 }
 func decodeUnicode(s *stream) error {
 	const backSlashAndULen = 2 // length of \u
 	r, offset, err := decodeUnicodeRune(s)
 	if err != nil {
 		return err
 	}
 	unicode := []byte(string(r))
 	unicodeLen := int64(len(unicode))
 	s.buf = append(append(s.buf[:s.cursor-1], unicode...), s.buf[s.cursor+offset:]...)
 	s.length = int64(len(s.buf))
 	s.cursor = s.cursor - backSlashAndULen + unicodeLen
 	return nil
 }
 func decodeEscapeString(s *stream) error {
 	s.cursor++
 RETRY:
@ -112,37 +153,7 @@ RETRY:
 	case 't':
 		s.buf[s.cursor] = '\t'
 	case 'u':
-		if s.cursor+5 >= s.length {
+		return decodeUnicode(s)
 			if !s.read() {
 				return errInvalidCharacter(s.char(), "escaped string", s.totalOffset())
 			}
 		}
 		r := unicodeToRune(s.buf[s.cursor+1 : s.cursor+5])
 		if utf16.IsSurrogate(r) {
 			if s.cursor+11 >= s.length || s.buf[s.cursor+5] != '\\' || s.buf[s.cursor+6] != 'u' {
 				r = unicode.ReplacementChar
 				unicode := []byte(string(r))
 				s.buf = append(append(s.buf[:s.cursor-1], unicode...), s.buf[s.cursor+5:]...)
 				s.cursor = s.cursor - 2 + int64(len(unicode))
 				return nil
 			}
 			r2 := unicodeToRune(s.buf[s.cursor+7 : s.cursor+11])
 			if r := utf16.DecodeRune(r, r2); r != unicode.ReplacementChar {
 				// valid surrogate pair
 				unicode := []byte(string(r))
 				s.buf = append(append(s.buf[:s.cursor-1], unicode...), s.buf[s.cursor+11:]...)
 				s.cursor = s.cursor - 2 + int64(len(unicode))
 			} else {
 				unicode := []byte(string(r))
 				s.buf = append(append(s.buf[:s.cursor-1], unicode...), s.buf[s.cursor+5:]...)
 				s.cursor = s.cursor - 2 + int64(len(unicode))
 			}
 		} else {
 			unicode := []byte(string(r))
 			s.buf = append(append(s.buf[:s.cursor-1], unicode...), s.buf[s.cursor+5:]...)
 			s.cursor = s.cursor - 2 + int64(len(unicode))
 		}
 		return nil
 	case nul:
 		if !s.read() {
 			return errInvalidCharacter(s.char(), "escaped string", s.totalOffset())