diff --git a/internal/encoder/decode_rune.go b/internal/encoder/decode_rune.go index 5c8de16..1fa1074 100644 --- a/internal/encoder/decode_rune.go +++ b/internal/encoder/decode_rune.go @@ -2,22 +2,7 @@ package encoder import "unicode/utf8" -// Code points in the surrogate range are not valid for UTF-8. const ( - surrogateMin = 0xD800 - surrogateMax = 0xDFFF -) - -const ( - maskx = 63 //0b00111111 - mask2 = 31 //0b00011111 - mask3 = 15 //0b00001111 - mask4 = 7 //0b00000111 - - rune1Max = 1<<7 - 1 - rune2Max = 1<<11 - 1 - rune3Max = 1<<16 - 1 - // The default lowest and highest continuation byte. locb = 128 //0b10000000 hicb = 191 //0b10111111 diff --git a/internal/encoder/string.go b/internal/encoder/string.go index 2d7e363..236e2e9 100644 --- a/internal/encoder/string.go +++ b/internal/encoder/string.go @@ -348,53 +348,6 @@ var needEscape = [256]bool{ var hex = "0123456789abcdef" -// escapeIndex finds the index of the first char in `s` that requires escaping. -// A char requires escaping if it's outside of the range of [0x20, 0x7F] or if -// it includes a double quote or backslash. -// If no chars in `s` require escaping, the return value is -1. -func escapeIndex(s string) int { - chunks := stringToUint64Slice(s) - for _, n := range chunks { - // combine masks before checking for the MSB of each byte. We include - // `n` in the mask to check whether any of the *input* byte MSBs were - // set (i.e. the byte was outside the ASCII range). - mask := n | below(n, 0x20) | contains(n, '"') | contains(n, '\\') - if (mask & msb) != 0 { - return bits.TrailingZeros64(mask&msb) / 8 - } - } - - valLen := len(s) - for i := len(chunks) * 8; i < valLen; i++ { - if needEscape[s[i]] { - return i - } - } - - return -1 -} - -// below return a mask that can be used to determine if any of the bytes -// in `n` are below `b`. If a byte's MSB is set in the mask then that byte was -// below `b`. The result is only valid if `b`, and each byte in `n`, is below -// 0x80. -func below(n uint64, b byte) uint64 { - return n - expand(b) -} - -// contains returns a mask that can be used to determine if any of the -// bytes in `n` are equal to `b`. If a byte's MSB is set in the mask then -// that byte is equal to `b`. The result is only valid if `b`, and each -// byte in `n`, is below 0x80. -func contains(n uint64, b byte) uint64 { - return (n ^ expand(b)) - lsb -} - -// expand puts the specified byte into each of the 8 bytes of a uint64. -func expand(b byte) uint64 { - return lsb * uint64(b) -} - //nolint:govet func stringToUint64Slice(s string) []uint64 { return *(*[]uint64)(unsafe.Pointer(&reflect.SliceHeader{ @@ -539,19 +492,37 @@ func appendString(buf []byte, s string) []byte { return append(buf, `""`...) } buf = append(buf, '"') - var escapeIdx int + var ( + i, j int + ) if valLen >= 8 { - if escapeIdx = escapeIndex(s); escapeIdx < 0 { - return append(append(buf, s...), '"') + chunks := stringToUint64Slice(s) + for _, n := range chunks { + // combine masks before checking for the MSB of each byte. We include + // `n` in the mask to check whether any of the *input* byte MSBs were + // set (i.e. the byte was outside the ASCII range). + mask := n | (n - (lsb * 0x20)) | + ((n ^ (lsb * '"')) - lsb) | + ((n ^ (lsb * '\\')) - lsb) + if (mask & msb) != 0 { + j = bits.TrailingZeros64(mask&msb) / 8 + goto ESCAPE_END + } } + valLen := len(s) + for i := len(chunks) * 8; i < valLen; i++ { + if needEscape[s[i]] { + j = i + goto ESCAPE_END + } + } + return append(append(buf, s...), '"') } - - i := 0 - j := escapeIdx +ESCAPE_END: for j < valLen { c := s[j] - if c >= 0x20 && c <= 0x7f && c != '\\' && c != '"' { + if !needEscape[c] { // fast path: most of the time, printable ascii characters are used j++ continue