Merge pull request #345 from orisano/fix/#306

feat: improves escapeString's performance
This commit is contained in:
Masaaki Goshima 2022-03-19 21:26:32 +09:00 committed by GitHub
commit a3b70288fb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 65 additions and 15 deletions

View File

@ -3859,3 +3859,22 @@ func TestIssue337(t *testing.T) {
t.Fatal("unexpected result", m)
}
}
func Benchmark306(b *testing.B) {
type T0 struct {
Str string
}
in := []byte(`{"Str":"` + strings.Repeat(`abcd\"`, 10000) + `"}`)
b.Run("stdjson", func(b *testing.B) {
var x T0
for i := 0; i < b.N; i++ {
stdjson.Unmarshal(in, &x)
}
})
b.Run("go-json", func(b *testing.B) {
var x T0
for i := 0; i < b.N; i++ {
json.Unmarshal(in, &x)
}
})
}

View File

@ -331,7 +331,7 @@ func (d *stringDecoder) decodeByte(buf []byte, cursor int64) ([]byte, int64, err
case '"':
literal := buf[start:cursor]
if escaped > 0 {
literal = literal[:unescapeString(literal, escaped)]
literal = literal[:unescapeString(literal)]
}
cursor++
return literal, cursor, nil
@ -363,21 +363,52 @@ var unescapeMap = [256]byte{
't': '\t',
}
func unescapeString(buf []byte, escaped int) int {
cursor := 0
for i := 0; i < escaped; i++ {
cursor += bytes.IndexByte(buf[cursor:], '\\')
c := buf[cursor+1]
if c == 'u' {
code := unicodeToRune(buf[cursor+2 : cursor+6])
unicode := []byte(string(code))
buf = append(append(buf[:cursor], unicode...), buf[cursor+6:]...)
cursor += len(unicode)
func unsafeAdd(ptr unsafe.Pointer, offset int) unsafe.Pointer {
return unsafe.Pointer(uintptr(ptr) + uintptr(offset))
}
func unescapeString(buf []byte) int {
p := (*sliceHeader)(unsafe.Pointer(&buf)).data
end := unsafeAdd(p, len(buf))
src := unsafeAdd(p, bytes.IndexByte(buf, '\\'))
dst := src
for src != end {
c := char(src, 0)
if c == '\\' {
escapeChar := char(src, 1)
if escapeChar != 'u' {
*(*byte)(dst) = unescapeMap[escapeChar]
src = unsafeAdd(src, 2)
dst = unsafeAdd(dst, 1)
} else {
v1 := hexToInt[char(src, 2)]
v2 := hexToInt[char(src, 3)]
v3 := hexToInt[char(src, 4)]
v4 := hexToInt[char(src, 5)]
code := rune((v1 << 12) | (v2 << 8) | (v3 << 4) | v4)
var b [utf8.UTFMax]byte
n := utf8.EncodeRune(b[:], code)
switch n {
case 4:
*(*byte)(unsafeAdd(dst, 3)) = b[3]
fallthrough
case 3:
*(*byte)(unsafeAdd(dst, 2)) = b[2]
fallthrough
case 2:
*(*byte)(unsafeAdd(dst, 1)) = b[1]
fallthrough
case 1:
*(*byte)(unsafeAdd(dst, 0)) = b[0]
}
src = unsafeAdd(src, 6)
dst = unsafeAdd(dst, n)
}
} else {
buf[cursor+1] = unescapeMap[c]
buf = append(buf[:cursor], buf[cursor+1:]...)
cursor++
*(*byte)(dst) = c
src = unsafeAdd(src, 1)
dst = unsafeAdd(dst, 1)
}
}
return len(buf)
return int(uintptr(dst) - uintptr(p))
}