Merge pull request #334 from orisano/feat-improve-performance-escaped

feat: improve performance when a payload contains escape sequence
This commit is contained in:
Masaaki Goshima 2022-03-04 20:53:41 +09:00 committed by GitHub
commit f352b8732a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 52 additions and 27 deletions

View File

@ -477,3 +477,13 @@ func Benchmark_Decode_LargeStruct_Stream_GoJsonFirstWinMode(b *testing.B) {
}
}
}
func Benchmark_Decode_LargeSlice_EscapedString_GoJson(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
var v []string
if err := gojson.Unmarshal(LargeSliceEscapedString, &v); err != nil {
b.Fatal(err)
}
}
}

View File

@ -2,6 +2,7 @@ package benchmark
import (
"strconv"
"strings"
"github.com/francoispqt/gojay"
)
@ -208,3 +209,5 @@ func NewLargePayloadEasyJson() *LargePayloadEasyJson {
},
}
}
var LargeSliceEscapedString = []byte("[" + strings.Repeat(",\"simple plain text\\r\\n\"", 10000)[1:] + "]")

View File

@ -1,6 +1,7 @@
package decoder
import (
"bytes"
"reflect"
"unicode"
"unicode/utf16"
@ -308,49 +309,30 @@ func (d *stringDecoder) decodeByte(buf []byte, cursor int64) ([]byte, int64, err
cursor++
start := cursor
b := (*sliceHeader)(unsafe.Pointer(&buf)).data
escaped := 0
for {
switch char(b, cursor) {
case '\\':
escaped++
cursor++
switch char(b, cursor) {
case '"':
buf[cursor] = '"'
buf = append(buf[:cursor-1], buf[cursor:]...)
case '\\':
buf[cursor] = '\\'
buf = append(buf[:cursor-1], buf[cursor:]...)
case '/':
buf[cursor] = '/'
buf = append(buf[:cursor-1], buf[cursor:]...)
case 'b':
buf[cursor] = '\b'
buf = append(buf[:cursor-1], buf[cursor:]...)
case 'f':
buf[cursor] = '\f'
buf = append(buf[:cursor-1], buf[cursor:]...)
case 'n':
buf[cursor] = '\n'
buf = append(buf[:cursor-1], buf[cursor:]...)
case 'r':
buf[cursor] = '\r'
buf = append(buf[:cursor-1], buf[cursor:]...)
case 't':
buf[cursor] = '\t'
buf = append(buf[:cursor-1], buf[cursor:]...)
case '"', '\\', '/', 'b', 'f', 'n', 'r', 't':
cursor++
case 'u':
buflen := int64(len(buf))
if cursor+5 >= buflen {
return nil, 0, errors.ErrUnexpectedEndOfJSON("escaped string", cursor)
}
code := unicodeToRune(buf[cursor+1 : cursor+5])
unicode := []byte(string(code))
buf = append(append(buf[:cursor-1], unicode...), buf[cursor+5:]...)
cursor += 5
default:
return nil, 0, errors.ErrUnexpectedEndOfJSON("escaped string", cursor)
}
continue
case '"':
literal := buf[start:cursor]
if escaped > 0 {
literal = literal[:unescapeString(literal, escaped)]
}
cursor++
return literal, cursor, nil
case nul:
@ -369,3 +351,33 @@ func (d *stringDecoder) decodeByte(buf []byte, cursor int64) ([]byte, int64, err
}
}
}
var unescapeMap = [256]byte{
'"': '"',
'\\': '\\',
'/': '/',
'b': '\b',
'f': '\f',
'n': '\n',
'r': '\r',
't': '\t',
}
func unescapeString(buf []byte, escaped int) int {
cursor := 0
for i := 0; i < escaped; i++ {
cursor += bytes.IndexByte(buf[cursor:], '\\')
c := buf[cursor+1]
if c == 'u' {
code := unicodeToRune(buf[cursor+2 : cursor+6])
unicode := []byte(string(code))
buf = append(append(buf[:cursor], unicode...), buf[cursor+6:]...)
cursor += len(unicode)
} else {
buf[cursor+1] = unescapeMap[c]
buf = append(buf[:cursor], buf[cursor+1:]...)
cursor++
}
}
return len(buf)
}