From 6f811065b623972feb6e599d7ae9c33306e26ff5 Mon Sep 17 00:00:00 2001 From: Nao Yonashiro Date: Sun, 13 Mar 2022 08:33:37 +0900 Subject: [PATCH] feat: improves escapeString's performance --- internal/decoder/string.go | 61 ++++++++++++++++++++++++++++---------- 1 file changed, 46 insertions(+), 15 deletions(-) diff --git a/internal/decoder/string.go b/internal/decoder/string.go index dea8647..a55ac55 100644 --- a/internal/decoder/string.go +++ b/internal/decoder/string.go @@ -331,7 +331,7 @@ func (d *stringDecoder) decodeByte(buf []byte, cursor int64) ([]byte, int64, err case '"': literal := buf[start:cursor] if escaped > 0 { - literal = literal[:unescapeString(literal, escaped)] + literal = literal[:unescapeString(literal)] } cursor++ return literal, cursor, nil @@ -363,21 +363,52 @@ var unescapeMap = [256]byte{ 't': '\t', } -func unescapeString(buf []byte, escaped int) int { - cursor := 0 - for i := 0; i < escaped; i++ { - cursor += bytes.IndexByte(buf[cursor:], '\\') - c := buf[cursor+1] - if c == 'u' { - code := unicodeToRune(buf[cursor+2 : cursor+6]) - unicode := []byte(string(code)) - buf = append(append(buf[:cursor], unicode...), buf[cursor+6:]...) - cursor += len(unicode) +func unsafeAdd(ptr unsafe.Pointer, offset int) unsafe.Pointer { + return unsafe.Pointer(uintptr(ptr) + uintptr(offset)) +} + +func unescapeString(buf []byte) int { + p := (*sliceHeader)(unsafe.Pointer(&buf)).data + end := unsafeAdd(p, len(buf)) + src := unsafeAdd(p, bytes.IndexByte(buf, '\\')) + dst := src + for src != end { + c := char(src, 0) + if c == '\\' { + escapeChar := char(src, 1) + if escapeChar != 'u' { + *(*byte)(dst) = unescapeMap[escapeChar] + src = unsafeAdd(src, 2) + dst = unsafeAdd(dst, 1) + } else { + v1 := hexToInt[char(src, 2)] + v2 := hexToInt[char(src, 3)] + v3 := hexToInt[char(src, 4)] + v4 := hexToInt[char(src, 5)] + code := rune((v1 << 12) | (v2 << 8) | (v3 << 4) | v4) + var b [utf8.UTFMax]byte + n := utf8.EncodeRune(b[:], code) + switch n { + case 4: + *(*byte)(unsafeAdd(dst, 3)) = b[3] + fallthrough + case 3: + *(*byte)(unsafeAdd(dst, 2)) = b[2] + fallthrough + case 2: + *(*byte)(unsafeAdd(dst, 1)) = b[1] + fallthrough + case 1: + *(*byte)(unsafeAdd(dst, 0)) = b[0] + } + src = unsafeAdd(src, 6) + dst = unsafeAdd(dst, n) + } } else { - buf[cursor+1] = unescapeMap[c] - buf = append(buf[:cursor], buf[cursor+1:]...) - cursor++ + *(*byte)(dst) = c + src = unsafeAdd(src, 1) + dst = unsafeAdd(dst, 1) } } - return len(buf) + return int(uintptr(dst) - uintptr(p)) }