From 01b439e41ec479517fce8f49bec86c4a710f7e69 Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Sat, 19 Dec 2020 22:40:03 +0900 Subject: [PATCH 1/6] Refactor buffering of encoder for improvement performance --- encode.go | 158 +- encode_compile.go | 2 +- encode_string.go | 750 +++++++-- encode_vm.go | 3686 ++++++++++++++++++++++----------------------- json.go | 2 +- 5 files changed, 2543 insertions(+), 2055 deletions(-) diff --git a/encode.go b/encode.go index 7b29967..2e94831 100644 --- a/encode.go +++ b/encode.go @@ -100,7 +100,8 @@ func (e *Encoder) EncodeWithOption(v interface{}, opts ...EncodeOption) error { return err } } - if err := e.encode(v); err != nil { + var err error + if e.buf, err = e.encode(v); err != nil { return err } if e.enabledIndent { @@ -149,7 +150,8 @@ func (e *Encoder) reset() { } func (e *Encoder) encodeForMarshal(v interface{}) ([]byte, error) { - if err := e.encode(v); err != nil { + var err error + if e.buf, err = e.encode(v); err != nil { return nil, err } if e.enabledIndent { @@ -162,15 +164,16 @@ func (e *Encoder) encodeForMarshal(v interface{}) ([]byte, error) { return copied, nil } -func (e *Encoder) encode(v interface{}) error { +func (e *Encoder) encode(v interface{}) ([]byte, error) { + b := e.buf if v == nil { - e.encodeNull() + b = encodeNull(b) if e.enabledIndent { - e.encodeBytes([]byte{',', '\n'}) + b = encodeIndentComma(b) } else { - e.encodeByte(',') + b = encodeComma(b) } - return nil + return b, nil } header := (*interfaceHeader)(unsafe.Pointer(&v)) typ := header.typ @@ -187,9 +190,9 @@ func (e *Encoder) encode(v interface{}) error { ctx := codeSet.ctx.Get().(*encodeRuntimeContext) p := uintptr(header.ptr) ctx.init(p) - err := e.run(ctx, code) + b, err := e.run(ctx, b, code) codeSet.ctx.Put(ctx) - return err + return b, err } // noescape trick for header.typ ( reflect.*rtype ) @@ -201,7 +204,7 @@ func (e *Encoder) encode(v interface{}) error { withIndent: true, }) if err != nil { - return err + return nil, err } code, err := e.compileHead(&encodeCompileContext{ typ: copiedType, @@ -209,7 +212,7 @@ func (e *Encoder) encode(v interface{}) error { withIndent: false, }) if err != nil { - return err + return nil, err } codeIndent = copyOpcode(codeIndent) code = copyOpcode(code) @@ -239,55 +242,56 @@ func (e *Encoder) encode(v interface{}) error { c = code } - if err := e.run(ctx, c); err != nil { + b, err = e.run(ctx, b, c) + if err != nil { codeSet.ctx.Put(ctx) - return err + return nil, err } codeSet.ctx.Put(ctx) - return nil + return b, nil } -func (e *Encoder) encodeInt(v int) { - e.encodeInt64(int64(v)) +func encodeInt(b []byte, v int) []byte { + return strconv.AppendInt(b, int64(v), 10) } -func (e *Encoder) encodeInt8(v int8) { - e.encodeInt64(int64(v)) +func encodeInt8(b []byte, v int8) []byte { + return strconv.AppendInt(b, int64(v), 10) } -func (e *Encoder) encodeInt16(v int16) { - e.encodeInt64(int64(v)) +func encodeInt16(b []byte, v int16) []byte { + return strconv.AppendInt(b, int64(v), 10) } -func (e *Encoder) encodeInt32(v int32) { - e.encodeInt64(int64(v)) +func encodeInt32(b []byte, v int32) []byte { + return strconv.AppendInt(b, int64(v), 10) } -func (e *Encoder) encodeInt64(v int64) { - e.buf = strconv.AppendInt(e.buf, v, 10) +func encodeInt64(b []byte, v int64) []byte { + return strconv.AppendInt(b, v, 10) } -func (e *Encoder) encodeUint(v uint) { - e.encodeUint64(uint64(v)) +func encodeUint(b []byte, v uint) []byte { + return strconv.AppendUint(b, uint64(v), 10) } -func (e *Encoder) encodeUint8(v uint8) { - e.encodeUint64(uint64(v)) +func encodeUint8(b []byte, v uint8) []byte { + return strconv.AppendUint(b, uint64(v), 10) } -func (e *Encoder) encodeUint16(v uint16) { - e.encodeUint64(uint64(v)) +func encodeUint16(b []byte, v uint16) []byte { + return strconv.AppendUint(b, uint64(v), 10) } -func (e *Encoder) encodeUint32(v uint32) { - e.encodeUint64(uint64(v)) +func encodeUint32(b []byte, v uint32) []byte { + return strconv.AppendUint(b, uint64(v), 10) } -func (e *Encoder) encodeUint64(v uint64) { - e.buf = strconv.AppendUint(e.buf, v, 10) +func encodeUint64(b []byte, v uint64) []byte { + return strconv.AppendUint(b, v, 10) } -func (e *Encoder) encodeFloat32(v float32) { +func encodeFloat32(b []byte, v float32) []byte { f64 := float64(v) abs := math.Abs(f64) fmt := byte('f') @@ -298,10 +302,10 @@ func (e *Encoder) encodeFloat32(v float32) { fmt = 'e' } } - e.buf = strconv.AppendFloat(e.buf, f64, fmt, -1, 32) + return strconv.AppendFloat(b, f64, fmt, -1, 32) } -func (e *Encoder) encodeFloat64(v float64) { +func encodeFloat64(b []byte, v float64) []byte { abs := math.Abs(v) fmt := byte('f') // Note: Must use float32 comparisons for underlying float32 value to get precise cutoffs right. @@ -310,58 +314,62 @@ func (e *Encoder) encodeFloat64(v float64) { fmt = 'e' } } - e.buf = strconv.AppendFloat(e.buf, v, fmt, -1, 64) + return strconv.AppendFloat(b, v, fmt, -1, 64) } -func (e *Encoder) encodeBool(v bool) { - e.buf = strconv.AppendBool(e.buf, v) -} - -func (e *Encoder) encodeBytes(b []byte) { - e.buf = append(e.buf, b...) -} - -func (e *Encoder) encodeNull() { - e.buf = append(e.buf, 'n', 'u', 'l', 'l') -} - -func (e *Encoder) encodeKey(code *opcode) { - if e.enabledHTMLEscape { - e.encodeBytes(code.escapedKey) - } else { - e.encodeBytes(code.key) +func encodeBool(b []byte, v bool) []byte { + if v { + return append(b, "true"...) } + return append(b, "false"...) } -func (e *Encoder) encodeString(s string) { +func encodeBytes(dst []byte, src []byte) []byte { + return append(dst, src...) +} + +func encodeNull(b []byte) []byte { + return append(b, "null"...) +} + +func encodeComma(b []byte) []byte { + return append(b, ',') +} + +func encodeIndentComma(b []byte) []byte { + return append(b, ',', '\n') +} + +func (e *Encoder) encodeKey(b []byte, code *opcode) []byte { if e.enabledHTMLEscape { - e.encodeEscapedString(s) - } else { - e.encodeNoEscapedString(s) + return append(b, code.escapedKey...) } + return append(b, code.key...) } -func (e *Encoder) encodeByteSlice(b []byte) { - encodedLen := base64.StdEncoding.EncodedLen(len(b)) - e.encodeByte('"') - pos := len(e.buf) - remainLen := cap(e.buf[pos:]) +func (e *Encoder) encodeString(b []byte, s string) []byte { + if e.enabledHTMLEscape { + return encodeEscapedString(b, s) + } + return encodeNoEscapedString(b, s) +} + +func encodeByteSlice(b []byte, src []byte) []byte { + encodedLen := base64.StdEncoding.EncodedLen(len(src)) + b = append(b, '"') + pos := len(b) + remainLen := cap(b[pos:]) var buf []byte if remainLen > encodedLen { - buf = e.buf[pos : pos+encodedLen] + buf = b[pos : pos+encodedLen] } else { buf = make([]byte, encodedLen) } - base64.StdEncoding.Encode(buf, b) - e.encodeBytes(buf) - e.encodeByte('"') + base64.StdEncoding.Encode(buf, src) + return append(append(b, buf...), '"') } -func (e *Encoder) encodeByte(b byte) { - e.buf = append(e.buf, b) -} - -func (e *Encoder) encodeIndent(indent int) { - e.buf = append(e.buf, e.prefix...) - e.buf = append(e.buf, bytes.Repeat(e.indentStr, indent)...) +func (e *Encoder) encodeIndent(b []byte, indent int) []byte { + b = append(b, e.prefix...) + return append(b, bytes.Repeat(e.indentStr, indent)...) } diff --git a/encode_compile.go b/encode_compile.go index dd41b56..0090afa 100644 --- a/encode_compile.go +++ b/encode_compile.go @@ -1007,7 +1007,7 @@ func (e *Encoder) compileStruct(ctx *encodeCompileContext, isPtr bool) (*opcode, var buf bytes.Buffer enc := NewEncoder(&buf) - enc.encodeEscapedString(tag.key) + enc.buf = encodeEscapedString(enc.buf, tag.key) escapedKey := fmt.Sprintf(`%s:`, string(enc.buf)) enc.release() fieldCode := &opcode{ diff --git a/encode_string.go b/encode_string.go index bd3b140..5aca3a5 100644 --- a/encode_string.go +++ b/encode_string.go @@ -1,9 +1,352 @@ package json import ( + "math/bits" + "reflect" "unicode/utf8" + "unsafe" ) +const ( + lsb = 0x0101010101010101 + msb = 0x8080808080808080 +) + +var needEscapeWithHTML = [256]bool{ + '"': true, + '&': true, + '<': true, + '>': true, + '\\': true, + 0x00: true, + 0x01: true, + 0x02: true, + 0x03: true, + 0x04: true, + 0x05: true, + 0x06: true, + 0x07: true, + 0x08: true, + 0x09: true, + 0x0a: true, + 0x0b: true, + 0x0c: true, + 0x0d: true, + 0x0e: true, + 0x0f: true, + 0x10: true, + 0x11: true, + 0x12: true, + 0x13: true, + 0x14: true, + 0x15: true, + 0x16: true, + 0x17: true, + 0x18: true, + 0x19: true, + 0x1a: true, + 0x1b: true, + 0x1c: true, + 0x1d: true, + 0x1e: true, + 0x1f: true, + /* 0x20 - 0x7f */ + 0x80: true, + 0x81: true, + 0x82: true, + 0x83: true, + 0x84: true, + 0x85: true, + 0x86: true, + 0x87: true, + 0x88: true, + 0x89: true, + 0x8a: true, + 0x8b: true, + 0x8c: true, + 0x8d: true, + 0x8e: true, + 0x8f: true, + 0x90: true, + 0x91: true, + 0x92: true, + 0x93: true, + 0x94: true, + 0x95: true, + 0x96: true, + 0x97: true, + 0x98: true, + 0x99: true, + 0x9a: true, + 0x9b: true, + 0x9c: true, + 0x9d: true, + 0x9e: true, + 0x9f: true, + 0xa0: true, + 0xa1: true, + 0xa2: true, + 0xa3: true, + 0xa4: true, + 0xa5: true, + 0xa6: true, + 0xa7: true, + 0xa8: true, + 0xa9: true, + 0xaa: true, + 0xab: true, + 0xac: true, + 0xad: true, + 0xae: true, + 0xaf: true, + 0xb0: true, + 0xb1: true, + 0xb2: true, + 0xb3: true, + 0xb4: true, + 0xb5: true, + 0xb6: true, + 0xb7: true, + 0xb8: true, + 0xb9: true, + 0xba: true, + 0xbb: true, + 0xbc: true, + 0xbd: true, + 0xbe: true, + 0xbf: true, + 0xc0: true, + 0xc1: true, + 0xc2: true, + 0xc3: true, + 0xc4: true, + 0xc5: true, + 0xc6: true, + 0xc7: true, + 0xc8: true, + 0xc9: true, + 0xca: true, + 0xcb: true, + 0xcc: true, + 0xcd: true, + 0xce: true, + 0xcf: true, + 0xd0: true, + 0xd1: true, + 0xd2: true, + 0xd3: true, + 0xd4: true, + 0xd5: true, + 0xd6: true, + 0xd7: true, + 0xd8: true, + 0xd9: true, + 0xda: true, + 0xdb: true, + 0xdc: true, + 0xdd: true, + 0xde: true, + 0xdf: true, + 0xe0: true, + 0xe1: true, + 0xe2: true, + 0xe3: true, + 0xe4: true, + 0xe5: true, + 0xe6: true, + 0xe7: true, + 0xe8: true, + 0xe9: true, + 0xea: true, + 0xeb: true, + 0xec: true, + 0xed: true, + 0xee: true, + 0xef: true, + 0xf0: true, + 0xf1: true, + 0xf2: true, + 0xf3: true, + 0xf4: true, + 0xf5: true, + 0xf6: true, + 0xf7: true, + 0xf8: true, + 0xf9: true, + 0xfa: true, + 0xfb: true, + 0xfc: true, + 0xfd: true, + 0xfe: true, + 0xff: true, +} + +var needEscape = [256]bool{ + '"': true, + '\\': true, + 0x00: true, + 0x01: true, + 0x02: true, + 0x03: true, + 0x04: true, + 0x05: true, + 0x06: true, + 0x07: true, + 0x08: true, + 0x09: true, + 0x0a: true, + 0x0b: true, + 0x0c: true, + 0x0d: true, + 0x0e: true, + 0x0f: true, + 0x10: true, + 0x11: true, + 0x12: true, + 0x13: true, + 0x14: true, + 0x15: true, + 0x16: true, + 0x17: true, + 0x18: true, + 0x19: true, + 0x1a: true, + 0x1b: true, + 0x1c: true, + 0x1d: true, + 0x1e: true, + 0x1f: true, + /* 0x20 - 0x7f */ + 0x80: true, + 0x81: true, + 0x82: true, + 0x83: true, + 0x84: true, + 0x85: true, + 0x86: true, + 0x87: true, + 0x88: true, + 0x89: true, + 0x8a: true, + 0x8b: true, + 0x8c: true, + 0x8d: true, + 0x8e: true, + 0x8f: true, + 0x90: true, + 0x91: true, + 0x92: true, + 0x93: true, + 0x94: true, + 0x95: true, + 0x96: true, + 0x97: true, + 0x98: true, + 0x99: true, + 0x9a: true, + 0x9b: true, + 0x9c: true, + 0x9d: true, + 0x9e: true, + 0x9f: true, + 0xa0: true, + 0xa1: true, + 0xa2: true, + 0xa3: true, + 0xa4: true, + 0xa5: true, + 0xa6: true, + 0xa7: true, + 0xa8: true, + 0xa9: true, + 0xaa: true, + 0xab: true, + 0xac: true, + 0xad: true, + 0xae: true, + 0xaf: true, + 0xb0: true, + 0xb1: true, + 0xb2: true, + 0xb3: true, + 0xb4: true, + 0xb5: true, + 0xb6: true, + 0xb7: true, + 0xb8: true, + 0xb9: true, + 0xba: true, + 0xbb: true, + 0xbc: true, + 0xbd: true, + 0xbe: true, + 0xbf: true, + 0xc0: true, + 0xc1: true, + 0xc2: true, + 0xc3: true, + 0xc4: true, + 0xc5: true, + 0xc6: true, + 0xc7: true, + 0xc8: true, + 0xc9: true, + 0xca: true, + 0xcb: true, + 0xcc: true, + 0xcd: true, + 0xce: true, + 0xcf: true, + 0xd0: true, + 0xd1: true, + 0xd2: true, + 0xd3: true, + 0xd4: true, + 0xd5: true, + 0xd6: true, + 0xd7: true, + 0xd8: true, + 0xd9: true, + 0xda: true, + 0xdb: true, + 0xdc: true, + 0xdd: true, + 0xde: true, + 0xdf: true, + 0xe0: true, + 0xe1: true, + 0xe2: true, + 0xe3: true, + 0xe4: true, + 0xe5: true, + 0xe6: true, + 0xe7: true, + 0xe8: true, + 0xe9: true, + 0xea: true, + 0xeb: true, + 0xec: true, + 0xed: true, + 0xee: true, + 0xef: true, + 0xf0: true, + 0xf1: true, + 0xf2: true, + 0xf3: true, + 0xf4: true, + 0xf5: true, + 0xf6: true, + 0xf7: true, + 0xf8: true, + 0xf9: true, + 0xfa: true, + 0xfb: true, + 0xfc: true, + 0xfd: true, + 0xfe: true, + 0xff: true, +} + // htmlSafeSet holds the value true if the ASCII character with the given // array position can be safely represented inside a JSON string, embedded // inside of HTML