Refactor buffering of encoder for improvement performance

This commit is contained in:
Masaaki Goshima 2020-12-19 22:40:03 +09:00
parent 920c79e0b7
commit 01b439e41e
5 changed files with 2543 additions and 2055 deletions

158
encode.go
View File

@ -100,7 +100,8 @@ func (e *Encoder) EncodeWithOption(v interface{}, opts ...EncodeOption) error {
return err return err
} }
} }
if err := e.encode(v); err != nil { var err error
if e.buf, err = e.encode(v); err != nil {
return err return err
} }
if e.enabledIndent { if e.enabledIndent {
@ -149,7 +150,8 @@ func (e *Encoder) reset() {
} }
func (e *Encoder) encodeForMarshal(v interface{}) ([]byte, error) { func (e *Encoder) encodeForMarshal(v interface{}) ([]byte, error) {
if err := e.encode(v); err != nil { var err error
if e.buf, err = e.encode(v); err != nil {
return nil, err return nil, err
} }
if e.enabledIndent { if e.enabledIndent {
@ -162,15 +164,16 @@ func (e *Encoder) encodeForMarshal(v interface{}) ([]byte, error) {
return copied, nil return copied, nil
} }
func (e *Encoder) encode(v interface{}) error { func (e *Encoder) encode(v interface{}) ([]byte, error) {
b := e.buf
if v == nil { if v == nil {
e.encodeNull() b = encodeNull(b)
if e.enabledIndent { if e.enabledIndent {
e.encodeBytes([]byte{',', '\n'}) b = encodeIndentComma(b)
} else { } else {
e.encodeByte(',') b = encodeComma(b)
} }
return nil return b, nil
} }
header := (*interfaceHeader)(unsafe.Pointer(&v)) header := (*interfaceHeader)(unsafe.Pointer(&v))
typ := header.typ typ := header.typ
@ -187,9 +190,9 @@ func (e *Encoder) encode(v interface{}) error {
ctx := codeSet.ctx.Get().(*encodeRuntimeContext) ctx := codeSet.ctx.Get().(*encodeRuntimeContext)
p := uintptr(header.ptr) p := uintptr(header.ptr)
ctx.init(p) ctx.init(p)
err := e.run(ctx, code) b, err := e.run(ctx, b, code)
codeSet.ctx.Put(ctx) codeSet.ctx.Put(ctx)
return err return b, err
} }
// noescape trick for header.typ ( reflect.*rtype ) // noescape trick for header.typ ( reflect.*rtype )
@ -201,7 +204,7 @@ func (e *Encoder) encode(v interface{}) error {
withIndent: true, withIndent: true,
}) })
if err != nil { if err != nil {
return err return nil, err
} }
code, err := e.compileHead(&encodeCompileContext{ code, err := e.compileHead(&encodeCompileContext{
typ: copiedType, typ: copiedType,
@ -209,7 +212,7 @@ func (e *Encoder) encode(v interface{}) error {
withIndent: false, withIndent: false,
}) })
if err != nil { if err != nil {
return err return nil, err
} }
codeIndent = copyOpcode(codeIndent) codeIndent = copyOpcode(codeIndent)
code = copyOpcode(code) code = copyOpcode(code)
@ -239,55 +242,56 @@ func (e *Encoder) encode(v interface{}) error {
c = code c = code
} }
if err := e.run(ctx, c); err != nil { b, err = e.run(ctx, b, c)
if err != nil {
codeSet.ctx.Put(ctx) codeSet.ctx.Put(ctx)
return err return nil, err
} }
codeSet.ctx.Put(ctx) codeSet.ctx.Put(ctx)
return nil return b, nil
} }
func (e *Encoder) encodeInt(v int) { func encodeInt(b []byte, v int) []byte {
e.encodeInt64(int64(v)) return strconv.AppendInt(b, int64(v), 10)
} }
func (e *Encoder) encodeInt8(v int8) { func encodeInt8(b []byte, v int8) []byte {
e.encodeInt64(int64(v)) return strconv.AppendInt(b, int64(v), 10)
} }
func (e *Encoder) encodeInt16(v int16) { func encodeInt16(b []byte, v int16) []byte {
e.encodeInt64(int64(v)) return strconv.AppendInt(b, int64(v), 10)
} }
func (e *Encoder) encodeInt32(v int32) { func encodeInt32(b []byte, v int32) []byte {
e.encodeInt64(int64(v)) return strconv.AppendInt(b, int64(v), 10)
} }
func (e *Encoder) encodeInt64(v int64) { func encodeInt64(b []byte, v int64) []byte {
e.buf = strconv.AppendInt(e.buf, v, 10) return strconv.AppendInt(b, v, 10)
} }
func (e *Encoder) encodeUint(v uint) { func encodeUint(b []byte, v uint) []byte {
e.encodeUint64(uint64(v)) return strconv.AppendUint(b, uint64(v), 10)
} }
func (e *Encoder) encodeUint8(v uint8) { func encodeUint8(b []byte, v uint8) []byte {
e.encodeUint64(uint64(v)) return strconv.AppendUint(b, uint64(v), 10)
} }
func (e *Encoder) encodeUint16(v uint16) { func encodeUint16(b []byte, v uint16) []byte {
e.encodeUint64(uint64(v)) return strconv.AppendUint(b, uint64(v), 10)
} }
func (e *Encoder) encodeUint32(v uint32) { func encodeUint32(b []byte, v uint32) []byte {
e.encodeUint64(uint64(v)) return strconv.AppendUint(b, uint64(v), 10)
} }
func (e *Encoder) encodeUint64(v uint64) { func encodeUint64(b []byte, v uint64) []byte {
e.buf = strconv.AppendUint(e.buf, v, 10) return strconv.AppendUint(b, v, 10)
} }
func (e *Encoder) encodeFloat32(v float32) { func encodeFloat32(b []byte, v float32) []byte {
f64 := float64(v) f64 := float64(v)
abs := math.Abs(f64) abs := math.Abs(f64)
fmt := byte('f') fmt := byte('f')
@ -298,10 +302,10 @@ func (e *Encoder) encodeFloat32(v float32) {
fmt = 'e' fmt = 'e'
} }
} }
e.buf = strconv.AppendFloat(e.buf, f64, fmt, -1, 32) return strconv.AppendFloat(b, f64, fmt, -1, 32)
} }
func (e *Encoder) encodeFloat64(v float64) { func encodeFloat64(b []byte, v float64) []byte {
abs := math.Abs(v) abs := math.Abs(v)
fmt := byte('f') fmt := byte('f')
// Note: Must use float32 comparisons for underlying float32 value to get precise cutoffs right. // Note: Must use float32 comparisons for underlying float32 value to get precise cutoffs right.
@ -310,58 +314,62 @@ func (e *Encoder) encodeFloat64(v float64) {
fmt = 'e' fmt = 'e'
} }
} }
e.buf = strconv.AppendFloat(e.buf, v, fmt, -1, 64) return strconv.AppendFloat(b, v, fmt, -1, 64)
} }
func (e *Encoder) encodeBool(v bool) { func encodeBool(b []byte, v bool) []byte {
e.buf = strconv.AppendBool(e.buf, v) if v {
} return append(b, "true"...)
func (e *Encoder) encodeBytes(b []byte) {
e.buf = append(e.buf, b...)
}
func (e *Encoder) encodeNull() {
e.buf = append(e.buf, 'n', 'u', 'l', 'l')
}
func (e *Encoder) encodeKey(code *opcode) {
if e.enabledHTMLEscape {
e.encodeBytes(code.escapedKey)
} else {
e.encodeBytes(code.key)
} }
return append(b, "false"...)
} }
func (e *Encoder) encodeString(s string) { func encodeBytes(dst []byte, src []byte) []byte {
return append(dst, src...)
}
func encodeNull(b []byte) []byte {
return append(b, "null"...)
}
func encodeComma(b []byte) []byte {
return append(b, ',')
}
func encodeIndentComma(b []byte) []byte {
return append(b, ',', '\n')
}
func (e *Encoder) encodeKey(b []byte, code *opcode) []byte {
if e.enabledHTMLEscape { if e.enabledHTMLEscape {
e.encodeEscapedString(s) return append(b, code.escapedKey...)
} else {
e.encodeNoEscapedString(s)
} }
return append(b, code.key...)
} }
func (e *Encoder) encodeByteSlice(b []byte) { func (e *Encoder) encodeString(b []byte, s string) []byte {
encodedLen := base64.StdEncoding.EncodedLen(len(b)) if e.enabledHTMLEscape {
e.encodeByte('"') return encodeEscapedString(b, s)
pos := len(e.buf) }
remainLen := cap(e.buf[pos:]) return encodeNoEscapedString(b, s)
}
func encodeByteSlice(b []byte, src []byte) []byte {
encodedLen := base64.StdEncoding.EncodedLen(len(src))
b = append(b, '"')
pos := len(b)
remainLen := cap(b[pos:])
var buf []byte var buf []byte
if remainLen > encodedLen { if remainLen > encodedLen {
buf = e.buf[pos : pos+encodedLen] buf = b[pos : pos+encodedLen]
} else { } else {
buf = make([]byte, encodedLen) buf = make([]byte, encodedLen)
} }
base64.StdEncoding.Encode(buf, b) base64.StdEncoding.Encode(buf, src)
e.encodeBytes(buf) return append(append(b, buf...), '"')
e.encodeByte('"')
} }
func (e *Encoder) encodeByte(b byte) { func (e *Encoder) encodeIndent(b []byte, indent int) []byte {
e.buf = append(e.buf, b) b = append(b, e.prefix...)
} return append(b, bytes.Repeat(e.indentStr, indent)...)
func (e *Encoder) encodeIndent(indent int) {
e.buf = append(e.buf, e.prefix...)
e.buf = append(e.buf, bytes.Repeat(e.indentStr, indent)...)
} }

View File

@ -1007,7 +1007,7 @@ func (e *Encoder) compileStruct(ctx *encodeCompileContext, isPtr bool) (*opcode,
var buf bytes.Buffer var buf bytes.Buffer
enc := NewEncoder(&buf) enc := NewEncoder(&buf)
enc.encodeEscapedString(tag.key) enc.buf = encodeEscapedString(enc.buf, tag.key)
escapedKey := fmt.Sprintf(`%s:`, string(enc.buf)) escapedKey := fmt.Sprintf(`%s:`, string(enc.buf))
enc.release() enc.release()
fieldCode := &opcode{ fieldCode := &opcode{

View File

@ -1,9 +1,352 @@
package json package json
import ( import (
"math/bits"
"reflect"
"unicode/utf8" "unicode/utf8"
"unsafe"
) )
const (
lsb = 0x0101010101010101
msb = 0x8080808080808080
)
var needEscapeWithHTML = [256]bool{
'"': true,
'&': true,
'<': true,
'>': true,
'\\': true,
0x00: true,
0x01: true,
0x02: true,
0x03: true,
0x04: true,
0x05: true,
0x06: true,
0x07: true,
0x08: true,
0x09: true,
0x0a: true,
0x0b: true,
0x0c: true,
0x0d: true,
0x0e: true,
0x0f: true,
0x10: true,
0x11: true,
0x12: true,
0x13: true,
0x14: true,
0x15: true,
0x16: true,
0x17: true,
0x18: true,
0x19: true,
0x1a: true,
0x1b: true,
0x1c: true,
0x1d: true,
0x1e: true,
0x1f: true,
/* 0x20 - 0x7f */
0x80: true,
0x81: true,
0x82: true,
0x83: true,
0x84: true,
0x85: true,
0x86: true,
0x87: true,
0x88: true,
0x89: true,
0x8a: true,
0x8b: true,
0x8c: true,
0x8d: true,
0x8e: true,
0x8f: true,
0x90: true,
0x91: true,
0x92: true,
0x93: true,
0x94: true,
0x95: true,
0x96: true,
0x97: true,
0x98: true,
0x99: true,
0x9a: true,
0x9b: true,
0x9c: true,
0x9d: true,
0x9e: true,
0x9f: true,
0xa0: true,
0xa1: true,
0xa2: true,
0xa3: true,
0xa4: true,
0xa5: true,
0xa6: true,
0xa7: true,
0xa8: true,
0xa9: true,
0xaa: true,
0xab: true,
0xac: true,
0xad: true,
0xae: true,
0xaf: true,
0xb0: true,
0xb1: true,
0xb2: true,
0xb3: true,
0xb4: true,
0xb5: true,
0xb6: true,
0xb7: true,
0xb8: true,
0xb9: true,
0xba: true,
0xbb: true,
0xbc: true,
0xbd: true,
0xbe: true,
0xbf: true,
0xc0: true,
0xc1: true,
0xc2: true,
0xc3: true,
0xc4: true,
0xc5: true,
0xc6: true,
0xc7: true,
0xc8: true,
0xc9: true,
0xca: true,
0xcb: true,
0xcc: true,
0xcd: true,
0xce: true,
0xcf: true,
0xd0: true,
0xd1: true,
0xd2: true,
0xd3: true,
0xd4: true,
0xd5: true,
0xd6: true,
0xd7: true,
0xd8: true,
0xd9: true,
0xda: true,
0xdb: true,
0xdc: true,
0xdd: true,
0xde: true,
0xdf: true,
0xe0: true,
0xe1: true,
0xe2: true,
0xe3: true,
0xe4: true,
0xe5: true,
0xe6: true,
0xe7: true,
0xe8: true,
0xe9: true,
0xea: true,
0xeb: true,
0xec: true,
0xed: true,
0xee: true,
0xef: true,
0xf0: true,
0xf1: true,
0xf2: true,
0xf3: true,
0xf4: true,
0xf5: true,
0xf6: true,
0xf7: true,
0xf8: true,
0xf9: true,
0xfa: true,
0xfb: true,
0xfc: true,
0xfd: true,
0xfe: true,
0xff: true,
}
var needEscape = [256]bool{
'"': true,
'\\': true,
0x00: true,
0x01: true,
0x02: true,
0x03: true,
0x04: true,
0x05: true,
0x06: true,
0x07: true,
0x08: true,
0x09: true,
0x0a: true,
0x0b: true,
0x0c: true,
0x0d: true,
0x0e: true,
0x0f: true,
0x10: true,
0x11: true,
0x12: true,
0x13: true,
0x14: true,
0x15: true,
0x16: true,
0x17: true,
0x18: true,
0x19: true,
0x1a: true,
0x1b: true,
0x1c: true,
0x1d: true,
0x1e: true,
0x1f: true,
/* 0x20 - 0x7f */
0x80: true,
0x81: true,
0x82: true,
0x83: true,
0x84: true,
0x85: true,
0x86: true,
0x87: true,
0x88: true,
0x89: true,
0x8a: true,
0x8b: true,
0x8c: true,
0x8d: true,
0x8e: true,
0x8f: true,
0x90: true,
0x91: true,
0x92: true,
0x93: true,
0x94: true,
0x95: true,
0x96: true,
0x97: true,
0x98: true,
0x99: true,
0x9a: true,
0x9b: true,
0x9c: true,
0x9d: true,
0x9e: true,
0x9f: true,
0xa0: true,
0xa1: true,
0xa2: true,
0xa3: true,
0xa4: true,
0xa5: true,
0xa6: true,
0xa7: true,
0xa8: true,
0xa9: true,
0xaa: true,
0xab: true,
0xac: true,
0xad: true,
0xae: true,
0xaf: true,
0xb0: true,
0xb1: true,
0xb2: true,
0xb3: true,
0xb4: true,
0xb5: true,
0xb6: true,
0xb7: true,
0xb8: true,
0xb9: true,
0xba: true,
0xbb: true,
0xbc: true,
0xbd: true,
0xbe: true,
0xbf: true,
0xc0: true,
0xc1: true,
0xc2: true,
0xc3: true,
0xc4: true,
0xc5: true,
0xc6: true,
0xc7: true,
0xc8: true,
0xc9: true,
0xca: true,
0xcb: true,
0xcc: true,
0xcd: true,
0xce: true,
0xcf: true,
0xd0: true,
0xd1: true,
0xd2: true,
0xd3: true,
0xd4: true,
0xd5: true,
0xd6: true,
0xd7: true,
0xd8: true,
0xd9: true,
0xda: true,
0xdb: true,
0xdc: true,
0xdd: true,
0xde: true,
0xdf: true,
0xe0: true,
0xe1: true,
0xe2: true,
0xe3: true,
0xe4: true,
0xe5: true,
0xe6: true,
0xe7: true,
0xe8: true,
0xe9: true,
0xea: true,
0xeb: true,
0xec: true,
0xed: true,
0xee: true,
0xef: true,
0xf0: true,
0xf1: true,
0xf2: true,
0xf3: true,
0xf4: true,
0xf5: true,
0xf6: true,
0xf7: true,
0xf8: true,
0xf9: true,
0xfa: true,
0xfb: true,
0xfc: true,
0xfd: true,
0xfe: true,
0xff: true,
}
// htmlSafeSet holds the value true if the ASCII character with the given // htmlSafeSet holds the value true if the ASCII character with the given
// array position can be safely represented inside a JSON string, embedded // array position can be safely represented inside a JSON string, embedded
// inside of HTML <script> tags, without any additional escaping. // inside of HTML <script> tags, without any additional escaping.
@ -345,69 +688,171 @@ var safeSet = [utf8.RuneSelf]bool{
var hex = "0123456789abcdef" var hex = "0123456789abcdef"
func (e *Encoder) encodeEscapedString(s string) { // escapeIndex finds the index of the first char in `s` that requires escaping.
// A char requires escaping if it's outside of the range of [0x20, 0x7F] or if
// it includes a double quote or backslash.
// If no chars in `s` require escaping, the return value is -1.
func escapeIndex(s string) int {
chunks := stringToUint64Slice(s)
for _, n := range chunks {
// combine masks before checking for the MSB of each byte. We include
// `n` in the mask to check whether any of the *input* byte MSBs were
// set (i.e. the byte was outside the ASCII range).
mask := n | below(n, 0x20) | contains(n, '"') | contains(n, '\\')
if (mask & msb) != 0 {
return bits.TrailingZeros64(mask&msb) / 8
}
}
valLen := len(s) valLen := len(s)
// write string, the fast path, without utf8 and escape support for i := len(chunks) * 8; i < valLen; i++ {
i := 0 if needEscape[s[i]] {
for ; i < valLen; i++ { return i
if !htmlSafeSet[s[i]] {
break
} }
} }
e.buf = append(e.buf, '"')
if i == valLen { return -1
e.buf = append(e.buf, s...)
e.buf = append(e.buf, '"')
return
}
e.buf = append(e.buf, s[:i]...)
e.writeStringSlowPathWithHTMLEscaped(i, s, valLen)
} }
func (e *Encoder) writeStringSlowPathWithHTMLEscaped(i int, s string, valLen int) { // escapeIndex finds the index of the first char in `s` that requires escaping.
start := i // A char requires escaping if it's outside of the range of [0x20, 0x7F] or if
// for the remaining parts, we process them char by char // it includes a double quote or backslash.
for i < valLen { // Also, the chars <, > and & require escaping.
if b := s[i]; b < utf8.RuneSelf { // If no chars in `s` require escaping, the return value is -1.
if htmlSafeSet[b] { func escapeIndexWithHTMLEscape(s string) int {
i++ chunks := stringToUint64Slice(s)
for _, n := range chunks {
// combine masks before checking for the MSB of each byte. We include
// `n` in the mask to check whether any of the *input* byte MSBs were
// set (i.e. the byte was outside the ASCII range).
mask := n | below(n, 0x20) | contains(n, '"') | contains(n, '\\') | contains(n, '<') | contains(n, '>') | contains(n, '&')
if (mask & msb) != 0 {
return bits.TrailingZeros64(mask&msb) / 8
}
}
valLen := len(s)
for i := len(chunks) * 8; i < valLen; i++ {
if needEscapeWithHTML[s[i]] {
return i
}
}
return -1
}
// below return a mask that can be used to determine if any of the bytes
// in `n` are below `b`. If a byte's MSB is set in the mask then that byte was
// below `b`. The result is only valid if `b`, and each byte in `n`, is below
// 0x80.
func below(n uint64, b byte) uint64 {
return n - expand(b)
}
// contains returns a mask that can be used to determine if any of the
// bytes in `n` are equal to `b`. If a byte's MSB is set in the mask then
// that byte is equal to `b`. The result is only valid if `b`, and each
// byte in `n`, is below 0x80.
func contains(n uint64, b byte) uint64 {
return (n ^ expand(b)) - lsb
}
// expand puts the specified byte into each of the 8 bytes of a uint64.
func expand(b byte) uint64 {
return lsb * uint64(b)
}
func stringToUint64Slice(s string) []uint64 {
return *(*[]uint64)(unsafe.Pointer(&reflect.SliceHeader{
Data: ((*reflect.StringHeader)(unsafe.Pointer(&s))).Data,
Len: len(s) / 8,
Cap: len(s) / 8,
}))
}
func encodeEscapedString(buf []byte, s string) []byte {
valLen := len(s)
if valLen == 0 {
return append(buf, `""`...)
}
buf = append(buf, '"')
var escapeIdx int
if valLen >= 8 {
if escapeIdx = escapeIndexWithHTMLEscape(s); escapeIdx < 0 {
return append(append(buf, s...), '"')
}
}
i := 0
j := escapeIdx
for j < valLen {
c := s[j]
if c >= 0x20 && c <= 0x7f && c != '\\' && c != '"' && (c != '<' && c != '>' && c != '&') {
// fast path: most of the time, printable ascii characters are used
j++
continue continue
} }
if start < i {
e.buf = append(e.buf, s[start:i]...) switch c {
}
switch b {
case '\\', '"': case '\\', '"':
e.buf = append(e.buf, '\\', b) buf = append(buf, s[i:j]...)
buf = append(buf, '\\', c)
i = j + 1
j = j + 1
continue
case '\n': case '\n':
e.buf = append(e.buf, '\\', 'n') buf = append(buf, s[i:j]...)
buf = append(buf, '\\', 'n')
i = j + 1
j = j + 1
continue
case '\r': case '\r':
e.buf = append(e.buf, '\\', 'r') buf = append(buf, s[i:j]...)
buf = append(buf, '\\', 'r')
i = j + 1
j = j + 1
continue
case '\t': case '\t':
e.buf = append(e.buf, '\\', 't') buf = append(buf, s[i:j]...)
default: buf = append(buf, '\\', 't')
i = j + 1
j = j + 1
continue
case '<', '>', '&':
buf = append(buf, s[i:j]...)
buf = append(buf, `\u00`...)
buf = append(buf, hex[c>>4], hex[c&0xF])
i = j + 1
j = j + 1
continue
}
// This encodes bytes < 0x20 except for \t, \n and \r. // This encodes bytes < 0x20 except for \t, \n and \r.
// If escapeHTML is set, it also escapes <, >, and & if c < 0x20 {
// because they can lead to security holes when buf = append(buf, s[i:j]...)
// user-controlled strings are rendered into JSON buf = append(buf, `\u00`...)
// and served to some browsers. buf = append(buf, hex[c>>4], hex[c&0xF])
e.buf = append(e.buf, `\u00`...) i = j + 1
e.buf = append(e.buf, hex[b>>4], hex[b&0xF]) j = j + 1
}
i++
start = i
continue continue
} }
c, size := utf8.DecodeRuneInString(s[i:])
if c == utf8.RuneError && size == 1 { r, size := utf8.DecodeRuneInString(s[j:])
if start < i {
e.buf = append(e.buf, s[start:i]...) if r == utf8.RuneError && size == 1 {
} buf = append(buf, s[i:j]...)
e.buf = append(e.buf, `\ufffd`...) buf = append(buf, `\ufffd`...)
i++ i = j + size
start = i j = j + size
continue continue
} }
switch r {
case '\u2028', '\u2029':
// U+2028 is LINE SEPARATOR. // U+2028 is LINE SEPARATOR.
// U+2029 is PARAGRAPH SEPARATOR. // U+2029 is PARAGRAPH SEPARATOR.
// They are both technically valid characters in JSON strings, // They are both technically valid characters in JSON strings,
@ -415,84 +860,121 @@ func (e *Encoder) writeStringSlowPathWithHTMLEscaped(i int, s string, valLen int
// and can lead to security holes there. It is valid JSON to // and can lead to security holes there. It is valid JSON to
// escape them, so we do so unconditionally. // escape them, so we do so unconditionally.
// See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion. // See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion.
if c == '\u2028' || c == '\u2029' { buf = append(buf, s[i:j]...)
if start < i { buf = append(buf, `\u202`...)
e.buf = append(e.buf, s[start:i]...) buf = append(buf, hex[r&0xF])
} i = j + size
e.buf = append(e.buf, `\u202`...) j = j + size
e.buf = append(e.buf, hex[c&0xF])
i += size
start = i
continue continue
} }
i += size
j += size
} }
if start < len(s) {
e.buf = append(e.buf, s[start:]...) return append(append(buf, s[i:]...), '"')
}
e.buf = append(e.buf, '"')
} }
func (e *Encoder) encodeNoEscapedString(s string) { func encodeNoEscapedString(buf []byte, s string) []byte {
valLen := len(s) valLen := len(s)
if valLen == 0 {
return append(buf, `""`...)
}
buf = append(buf, '"')
var escapeIdx int
if valLen >= 8 {
if escapeIdx = escapeIndex(s); escapeIdx < 0 {
return append(append(buf, s...), '"')
}
}
// write string, the fast path, without utf8 and escape support
i := 0 i := 0
for ; i < valLen; i++ { j := escapeIdx
c := s[i] for j < valLen {
if c <= 31 || c == '"' || c == '\\' { c := s[j]
break
}
}
e.buf = append(e.buf, '"')
if i == valLen {
e.buf = append(e.buf, s...)
e.buf = append(e.buf, '"')
return
}
e.buf = append(e.buf, s[:i]...)
e.writeStringSlowPath(i, s, valLen)
}
func (e *Encoder) writeStringSlowPath(i int, s string, valLen int) { if c >= 0x20 && c <= 0x7f && c != '\\' && c != '"' {
start := i // fast path: most of the time, printable ascii characters are used
// for the remaining parts, we process them char by char j++
for i < valLen {
if b := s[i]; b < utf8.RuneSelf {
if safeSet[b] {
i++
continue continue
} }
if start < i {
e.buf = append(e.buf, s[start:i]...) switch c {
}
switch b {
case '\\', '"': case '\\', '"':
e.buf = append(e.buf, '\\', b) buf = append(buf, s[i:j]...)
buf = append(buf, '\\', c)
i = j + 1
j = j + 1
continue
case '\n': case '\n':
e.buf = append(e.buf, '\\', 'n') buf = append(buf, s[i:j]...)
buf = append(buf, '\\', 'n')
i = j + 1
j = j + 1
continue
case '\r': case '\r':
e.buf = append(e.buf, '\\', 'r') buf = append(buf, s[i:j]...)
buf = append(buf, '\\', 'r')
i = j + 1
j = j + 1
continue
case '\t': case '\t':
e.buf = append(e.buf, '\\', 't') buf = append(buf, s[i:j]...)
default: buf = append(buf, '\\', 't')
i = j + 1
j = j + 1
continue
case '<', '>', '&':
buf = append(buf, s[i:j]...)
buf = append(buf, `\u00`...)
buf = append(buf, hex[c>>4], hex[c&0xF])
i = j + 1
j = j + 1
continue
}
// This encodes bytes < 0x20 except for \t, \n and \r. // This encodes bytes < 0x20 except for \t, \n and \r.
// If escapeHTML is set, it also escapes <, >, and & if c < 0x20 {
// because they can lead to security holes when buf = append(buf, s[i:j]...)
// user-controlled strings are rendered into JSON buf = append(buf, `\u00`...)
// and served to some browsers. buf = append(buf, hex[c>>4], hex[c&0xF])
e.buf = append(e.buf, []byte(`\u00`)...) i = j + 1
e.buf = append(e.buf, hex[b>>4], hex[b&0xF]) j = j + 1
}
i++
start = i
continue continue
} }
i++
r, size := utf8.DecodeRuneInString(s[j:])
if r == utf8.RuneError && size == 1 {
buf = append(buf, s[i:j]...)
buf = append(buf, `\ufffd`...)
i = j + size
j = j + size
continue continue
} }
if start < len(s) {
e.buf = append(e.buf, s[start:]...) switch r {
case '\u2028', '\u2029':
// U+2028 is LINE SEPARATOR.
// U+2029 is PARAGRAPH SEPARATOR.
// They are both technically valid characters in JSON strings,
// but don't work in JSONP, which has to be evaluated as JavaScript,
// and can lead to security holes there. It is valid JSON to
// escape them, so we do so unconditionally.
// See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion.
buf = append(buf, s[i:j]...)
buf = append(buf, `\u202`...)
buf = append(buf, hex[r&0xF])
i = j + size
j = j + size
continue
} }
e.buf = append(e.buf, '"')
j += size
}
return append(append(buf, s[i:]...), '"')
} }

File diff suppressed because it is too large Load Diff

View File

@ -393,7 +393,7 @@ func HTMLEscape(dst *bytes.Buffer, src []byte) {
} }
enc := NewEncoder(dst) enc := NewEncoder(dst)
enc.SetEscapeHTML(true) enc.SetEscapeHTML(true)
enc.encode(v) enc.buf, _ = enc.encode(v)
dst.Write(enc.buf[:len(enc.buf)-1]) // remove last ',' character dst.Write(enc.buf[:len(enc.buf)-1]) // remove last ',' character
} }