Merge pull request #380 from orisano/fix/#374

Fix unicode decoding when the expected buffer state is not met after reading
This commit is contained in:
Masaaki Goshima 2022-07-10 02:02:16 +09:00 committed by GitHub
commit a812201b02
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 30 additions and 9 deletions

View File

@ -7,12 +7,14 @@ import (
stdjson "encoding/json" stdjson "encoding/json"
"errors" "errors"
"fmt" "fmt"
"io"
"log" "log"
"math" "math"
"math/big" "math/big"
"reflect" "reflect"
"regexp" "regexp"
"strconv" "strconv"
"strings"
"testing" "testing"
"time" "time"
@ -2453,3 +2455,16 @@ func TestIssue370(t *testing.T) {
t.Errorf("unexpected result: %v != %v", got, expected) t.Errorf("unexpected result: %v != %v", got, expected)
} }
} }
func TestIssue374(t *testing.T) {
r := io.MultiReader(strings.NewReader(strings.Repeat(" ", 505)+`"\u`), strings.NewReader(`0000"`))
var v interface{}
if err := json.NewDecoder(r).Decode(&v); err != nil {
t.Fatal(err)
}
got := v.(string)
expected := "\u0000"
if got != expected {
t.Errorf("unexpected result: %q != %q", got, expected)
}
}

View File

@ -95,24 +95,30 @@ func unicodeToRune(code []byte) rune {
return r return r
} }
func readAtLeast(s *Stream, n int64, p *unsafe.Pointer) bool {
for s.cursor+n >= s.length {
if !s.read() {
return false
}
*p = s.bufptr()
}
return true
}
func decodeUnicodeRune(s *Stream, p unsafe.Pointer) (rune, int64, unsafe.Pointer, error) { func decodeUnicodeRune(s *Stream, p unsafe.Pointer) (rune, int64, unsafe.Pointer, error) {
const defaultOffset = 5 const defaultOffset = 5
const surrogateOffset = 11 const surrogateOffset = 11
if s.cursor+defaultOffset >= s.length { if !readAtLeast(s, defaultOffset, &p) {
if !s.read() {
return rune(0), 0, nil, errors.ErrInvalidCharacter(s.char(), "escaped string", s.totalOffset()) return rune(0), 0, nil, errors.ErrInvalidCharacter(s.char(), "escaped string", s.totalOffset())
} }
p = s.bufptr()
}
r := unicodeToRune(s.buf[s.cursor+1 : s.cursor+defaultOffset]) r := unicodeToRune(s.buf[s.cursor+1 : s.cursor+defaultOffset])
if utf16.IsSurrogate(r) { if utf16.IsSurrogate(r) {
if s.cursor+surrogateOffset >= s.length { if !readAtLeast(s, surrogateOffset, &p) {
s.read() return unicode.ReplacementChar, defaultOffset, p, nil
p = s.bufptr()
} }
if s.cursor+surrogateOffset >= s.length || s.buf[s.cursor+defaultOffset] != '\\' || s.buf[s.cursor+defaultOffset+1] != 'u' { if s.buf[s.cursor+defaultOffset] != '\\' || s.buf[s.cursor+defaultOffset+1] != 'u' {
return unicode.ReplacementChar, defaultOffset, p, nil return unicode.ReplacementChar, defaultOffset, p, nil
} }
r2 := unicodeToRune(s.buf[s.cursor+defaultOffset+2 : s.cursor+surrogateOffset]) r2 := unicodeToRune(s.buf[s.cursor+defaultOffset+2 : s.cursor+surrogateOffset])