Merge pull request #345 from orisano/fix/#306

feat: improves escapeString's performance
This commit is contained in:
Masaaki Goshima 2022-03-19 21:26:32 +09:00 committed by GitHub
commit a3b70288fb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 65 additions and 15 deletions

View File

@ -3859,3 +3859,22 @@ func TestIssue337(t *testing.T) {
t.Fatal("unexpected result", m) t.Fatal("unexpected result", m)
} }
} }
func Benchmark306(b *testing.B) {
type T0 struct {
Str string
}
in := []byte(`{"Str":"` + strings.Repeat(`abcd\"`, 10000) + `"}`)
b.Run("stdjson", func(b *testing.B) {
var x T0
for i := 0; i < b.N; i++ {
stdjson.Unmarshal(in, &x)
}
})
b.Run("go-json", func(b *testing.B) {
var x T0
for i := 0; i < b.N; i++ {
json.Unmarshal(in, &x)
}
})
}

View File

@ -331,7 +331,7 @@ func (d *stringDecoder) decodeByte(buf []byte, cursor int64) ([]byte, int64, err
case '"': case '"':
literal := buf[start:cursor] literal := buf[start:cursor]
if escaped > 0 { if escaped > 0 {
literal = literal[:unescapeString(literal, escaped)] literal = literal[:unescapeString(literal)]
} }
cursor++ cursor++
return literal, cursor, nil return literal, cursor, nil
@ -363,21 +363,52 @@ var unescapeMap = [256]byte{
't': '\t', 't': '\t',
} }
func unescapeString(buf []byte, escaped int) int { func unsafeAdd(ptr unsafe.Pointer, offset int) unsafe.Pointer {
cursor := 0 return unsafe.Pointer(uintptr(ptr) + uintptr(offset))
for i := 0; i < escaped; i++ { }
cursor += bytes.IndexByte(buf[cursor:], '\\')
c := buf[cursor+1] func unescapeString(buf []byte) int {
if c == 'u' { p := (*sliceHeader)(unsafe.Pointer(&buf)).data
code := unicodeToRune(buf[cursor+2 : cursor+6]) end := unsafeAdd(p, len(buf))
unicode := []byte(string(code)) src := unsafeAdd(p, bytes.IndexByte(buf, '\\'))
buf = append(append(buf[:cursor], unicode...), buf[cursor+6:]...) dst := src
cursor += len(unicode) for src != end {
} else { c := char(src, 0)
buf[cursor+1] = unescapeMap[c] if c == '\\' {
buf = append(buf[:cursor], buf[cursor+1:]...) escapeChar := char(src, 1)
cursor++ if escapeChar != 'u' {
} *(*byte)(dst) = unescapeMap[escapeChar]
} src = unsafeAdd(src, 2)
return len(buf) dst = unsafeAdd(dst, 1)
} else {
v1 := hexToInt[char(src, 2)]
v2 := hexToInt[char(src, 3)]
v3 := hexToInt[char(src, 4)]
v4 := hexToInt[char(src, 5)]
code := rune((v1 << 12) | (v2 << 8) | (v3 << 4) | v4)
var b [utf8.UTFMax]byte
n := utf8.EncodeRune(b[:], code)
switch n {
case 4:
*(*byte)(unsafeAdd(dst, 3)) = b[3]
fallthrough
case 3:
*(*byte)(unsafeAdd(dst, 2)) = b[2]
fallthrough
case 2:
*(*byte)(unsafeAdd(dst, 1)) = b[1]
fallthrough
case 1:
*(*byte)(unsafeAdd(dst, 0)) = b[0]
}
src = unsafeAdd(src, 6)
dst = unsafeAdd(dst, n)
}
} else {
*(*byte)(dst) = c
src = unsafeAdd(src, 1)
dst = unsafeAdd(dst, 1)
}
}
return int(uintptr(dst) - uintptr(p))
} }