diff --git a/encode.go b/encode.go index 7b29967..55fdf41 100644 --- a/encode.go +++ b/encode.go @@ -16,6 +16,7 @@ import ( // An Encoder writes JSON values to an output stream. type Encoder struct { w io.Writer + ctx *encodeRuntimeContext buf []byte enabledIndent bool enabledHTMLEscape bool @@ -37,7 +38,7 @@ const ( type opcodeSet struct { codeIndent *opcode code *opcode - ctx sync.Pool + codeLength int } func loadOpcodeMap() map[uintptr]*opcodeSet { @@ -68,6 +69,10 @@ func init() { encPool = sync.Pool{ New: func() interface{} { return &Encoder{ + ctx: &encodeRuntimeContext{ + ptrs: make([]uintptr, 128), + keepRefs: make([]unsafe.Pointer, 0, 8), + }, buf: make([]byte, 0, bufSize), structTypeToCompiledCode: map[uintptr]*compiledCode{}, structTypeToCompiledIndentCode: map[uintptr]*compiledCode{}, @@ -100,7 +105,8 @@ func (e *Encoder) EncodeWithOption(v interface{}, opts ...EncodeOption) error { return err } } - if err := e.encode(v); err != nil { + var err error + if e.buf, err = e.encode(v); err != nil { return err } if e.enabledIndent { @@ -149,7 +155,8 @@ func (e *Encoder) reset() { } func (e *Encoder) encodeForMarshal(v interface{}) ([]byte, error) { - if err := e.encode(v); err != nil { + var err error + if e.buf, err = e.encode(v); err != nil { return nil, err } if e.enabledIndent { @@ -162,15 +169,16 @@ func (e *Encoder) encodeForMarshal(v interface{}) ([]byte, error) { return copied, nil } -func (e *Encoder) encode(v interface{}) error { +func (e *Encoder) encode(v interface{}) ([]byte, error) { + b := e.buf if v == nil { - e.encodeNull() + b = encodeNull(b) if e.enabledIndent { - e.encodeBytes([]byte{',', '\n'}) + b = encodeIndentComma(b) } else { - e.encodeByte(',') + b = encodeComma(b) } - return nil + return b, nil } header := (*interfaceHeader)(unsafe.Pointer(&v)) typ := header.typ @@ -184,12 +192,10 @@ func (e *Encoder) encode(v interface{}) error { } else { code = codeSet.code } - ctx := codeSet.ctx.Get().(*encodeRuntimeContext) + ctx := e.ctx p := uintptr(header.ptr) - ctx.init(p) - err := e.run(ctx, code) - codeSet.ctx.Put(ctx) - return err + ctx.init(p, codeSet.codeLength) + return e.run(ctx, b, code) } // noescape trick for header.typ ( reflect.*rtype ) @@ -201,7 +207,7 @@ func (e *Encoder) encode(v interface{}) error { withIndent: true, }) if err != nil { - return err + return nil, err } code, err := e.compileHead(&encodeCompileContext{ typ: copiedType, @@ -209,7 +215,7 @@ func (e *Encoder) encode(v interface{}) error { withIndent: false, }) if err != nil { - return err + return nil, err } codeIndent = copyOpcode(codeIndent) code = copyOpcode(code) @@ -217,20 +223,13 @@ func (e *Encoder) encode(v interface{}) error { codeSet := &opcodeSet{ codeIndent: codeIndent, code: code, - ctx: sync.Pool{ - New: func() interface{} { - return &encodeRuntimeContext{ - ptrs: make([]uintptr, codeLength), - keepRefs: make([]unsafe.Pointer, 8), - } - }, - }, + codeLength: codeLength, } storeOpcodeSet(typeptr, codeSet, opcodeMap) p := uintptr(header.ptr) - ctx := codeSet.ctx.Get().(*encodeRuntimeContext) - ctx.init(p) + ctx := e.ctx + ctx.init(p, codeLength) var c *opcode if e.enabledIndent { @@ -239,55 +238,14 @@ func (e *Encoder) encode(v interface{}) error { c = code } - if err := e.run(ctx, c); err != nil { - codeSet.ctx.Put(ctx) - return err + b, err = e.run(ctx, b, c) + if err != nil { + return nil, err } - codeSet.ctx.Put(ctx) - return nil + return b, nil } -func (e *Encoder) encodeInt(v int) { - e.encodeInt64(int64(v)) -} - -func (e *Encoder) encodeInt8(v int8) { - e.encodeInt64(int64(v)) -} - -func (e *Encoder) encodeInt16(v int16) { - e.encodeInt64(int64(v)) -} - -func (e *Encoder) encodeInt32(v int32) { - e.encodeInt64(int64(v)) -} - -func (e *Encoder) encodeInt64(v int64) { - e.buf = strconv.AppendInt(e.buf, v, 10) -} - -func (e *Encoder) encodeUint(v uint) { - e.encodeUint64(uint64(v)) -} - -func (e *Encoder) encodeUint8(v uint8) { - e.encodeUint64(uint64(v)) -} - -func (e *Encoder) encodeUint16(v uint16) { - e.encodeUint64(uint64(v)) -} - -func (e *Encoder) encodeUint32(v uint32) { - e.encodeUint64(uint64(v)) -} - -func (e *Encoder) encodeUint64(v uint64) { - e.buf = strconv.AppendUint(e.buf, v, 10) -} - -func (e *Encoder) encodeFloat32(v float32) { +func encodeFloat32(b []byte, v float32) []byte { f64 := float64(v) abs := math.Abs(f64) fmt := byte('f') @@ -298,10 +256,10 @@ func (e *Encoder) encodeFloat32(v float32) { fmt = 'e' } } - e.buf = strconv.AppendFloat(e.buf, f64, fmt, -1, 32) + return strconv.AppendFloat(b, f64, fmt, -1, 32) } -func (e *Encoder) encodeFloat64(v float64) { +func encodeFloat64(b []byte, v float64) []byte { abs := math.Abs(v) fmt := byte('f') // Note: Must use float32 comparisons for underlying float32 value to get precise cutoffs right. @@ -310,58 +268,62 @@ func (e *Encoder) encodeFloat64(v float64) { fmt = 'e' } } - e.buf = strconv.AppendFloat(e.buf, v, fmt, -1, 64) + return strconv.AppendFloat(b, v, fmt, -1, 64) } -func (e *Encoder) encodeBool(v bool) { - e.buf = strconv.AppendBool(e.buf, v) -} - -func (e *Encoder) encodeBytes(b []byte) { - e.buf = append(e.buf, b...) -} - -func (e *Encoder) encodeNull() { - e.buf = append(e.buf, 'n', 'u', 'l', 'l') -} - -func (e *Encoder) encodeKey(code *opcode) { - if e.enabledHTMLEscape { - e.encodeBytes(code.escapedKey) - } else { - e.encodeBytes(code.key) +func encodeBool(b []byte, v bool) []byte { + if v { + return append(b, "true"...) } + return append(b, "false"...) } -func (e *Encoder) encodeString(s string) { +func encodeBytes(dst []byte, src []byte) []byte { + return append(dst, src...) +} + +func encodeNull(b []byte) []byte { + return append(b, "null"...) +} + +func encodeComma(b []byte) []byte { + return append(b, ',') +} + +func encodeIndentComma(b []byte) []byte { + return append(b, ',', '\n') +} + +func (e *Encoder) encodeKey(b []byte, code *opcode) []byte { if e.enabledHTMLEscape { - e.encodeEscapedString(s) - } else { - e.encodeNoEscapedString(s) + return append(b, code.escapedKey...) } + return append(b, code.key...) } -func (e *Encoder) encodeByteSlice(b []byte) { - encodedLen := base64.StdEncoding.EncodedLen(len(b)) - e.encodeByte('"') - pos := len(e.buf) - remainLen := cap(e.buf[pos:]) +func (e *Encoder) encodeString(b []byte, s string) []byte { + if e.enabledHTMLEscape { + return encodeEscapedString(b, s) + } + return encodeNoEscapedString(b, s) +} + +func encodeByteSlice(b []byte, src []byte) []byte { + encodedLen := base64.StdEncoding.EncodedLen(len(src)) + b = append(b, '"') + pos := len(b) + remainLen := cap(b[pos:]) var buf []byte if remainLen > encodedLen { - buf = e.buf[pos : pos+encodedLen] + buf = b[pos : pos+encodedLen] } else { buf = make([]byte, encodedLen) } - base64.StdEncoding.Encode(buf, b) - e.encodeBytes(buf) - e.encodeByte('"') + base64.StdEncoding.Encode(buf, src) + return append(append(b, buf...), '"') } -func (e *Encoder) encodeByte(b byte) { - e.buf = append(e.buf, b) -} - -func (e *Encoder) encodeIndent(indent int) { - e.buf = append(e.buf, e.prefix...) - e.buf = append(e.buf, bytes.Repeat(e.indentStr, indent)...) +func (e *Encoder) encodeIndent(b []byte, indent int) []byte { + b = append(b, e.prefix...) + return append(b, bytes.Repeat(e.indentStr, indent)...) } diff --git a/encode_compile.go b/encode_compile.go index dd41b56..0090afa 100644 --- a/encode_compile.go +++ b/encode_compile.go @@ -1007,7 +1007,7 @@ func (e *Encoder) compileStruct(ctx *encodeCompileContext, isPtr bool) (*opcode, var buf bytes.Buffer enc := NewEncoder(&buf) - enc.encodeEscapedString(tag.key) + enc.buf = encodeEscapedString(enc.buf, tag.key) escapedKey := fmt.Sprintf(`%s:`, string(enc.buf)) enc.release() fieldCode := &opcode{ diff --git a/encode_context.go b/encode_context.go index 7a241ff..cdeefb4 100644 --- a/encode_context.go +++ b/encode_context.go @@ -88,7 +88,10 @@ type encodeRuntimeContext struct { keepRefs []unsafe.Pointer } -func (c *encodeRuntimeContext) init(p uintptr) { +func (c *encodeRuntimeContext) init(p uintptr, codelen int) { + if len(c.ptrs) < codelen { + c.ptrs = make([]uintptr, codelen) + } c.ptrs[0] = p c.keepRefs = c.keepRefs[:0] } diff --git a/encode_int.go b/encode_int.go new file mode 100644 index 0000000..b53149c --- /dev/null +++ b/encode_int.go @@ -0,0 +1,98 @@ +package json + +import ( + "unsafe" +) + +var endianness int + +func init() { + var b [2]byte + *(*uint16)(unsafe.Pointer(&b)) = uint16(0xABCD) + + switch b[0] { + case 0xCD: + endianness = 0 // LE + case 0xAB: + endianness = 1 // BE + default: + panic("could not determine endianness") + } +} + +// "00010203...96979899" cast to []uint16 +var intLELookup = [100]uint16{ + 0x3030, 0x3130, 0x3230, 0x3330, 0x3430, 0x3530, 0x3630, 0x3730, 0x3830, 0x3930, + 0x3031, 0x3131, 0x3231, 0x3331, 0x3431, 0x3531, 0x3631, 0x3731, 0x3831, 0x3931, + 0x3032, 0x3132, 0x3232, 0x3332, 0x3432, 0x3532, 0x3632, 0x3732, 0x3832, 0x3932, + 0x3033, 0x3133, 0x3233, 0x3333, 0x3433, 0x3533, 0x3633, 0x3733, 0x3833, 0x3933, + 0x3034, 0x3134, 0x3234, 0x3334, 0x3434, 0x3534, 0x3634, 0x3734, 0x3834, 0x3934, + 0x3035, 0x3135, 0x3235, 0x3335, 0x3435, 0x3535, 0x3635, 0x3735, 0x3835, 0x3935, + 0x3036, 0x3136, 0x3236, 0x3336, 0x3436, 0x3536, 0x3636, 0x3736, 0x3836, 0x3936, + 0x3037, 0x3137, 0x3237, 0x3337, 0x3437, 0x3537, 0x3637, 0x3737, 0x3837, 0x3937, + 0x3038, 0x3138, 0x3238, 0x3338, 0x3438, 0x3538, 0x3638, 0x3738, 0x3838, 0x3938, + 0x3039, 0x3139, 0x3239, 0x3339, 0x3439, 0x3539, 0x3639, 0x3739, 0x3839, 0x3939, +} + +var intBELookup = [100]uint16{ + 0x3030, 0x3031, 0x3032, 0x3033, 0x3034, 0x3035, 0x3036, 0x3037, 0x3038, 0x3039, + 0x3130, 0x3131, 0x3132, 0x3133, 0x3134, 0x3135, 0x3136, 0x3137, 0x3138, 0x3139, + 0x3230, 0x3231, 0x3232, 0x3233, 0x3234, 0x3235, 0x3236, 0x3237, 0x3238, 0x3239, + 0x3330, 0x3331, 0x3332, 0x3333, 0x3334, 0x3335, 0x3336, 0x3337, 0x3338, 0x3339, + 0x3430, 0x3431, 0x3432, 0x3433, 0x3434, 0x3435, 0x3436, 0x3437, 0x3438, 0x3439, + 0x3530, 0x3531, 0x3532, 0x3533, 0x3534, 0x3535, 0x3536, 0x3537, 0x3538, 0x3539, + 0x3630, 0x3631, 0x3632, 0x3633, 0x3634, 0x3635, 0x3636, 0x3637, 0x3638, 0x3639, + 0x3730, 0x3731, 0x3732, 0x3733, 0x3734, 0x3735, 0x3736, 0x3737, 0x3738, 0x3739, + 0x3830, 0x3831, 0x3832, 0x3833, 0x3834, 0x3835, 0x3836, 0x3837, 0x3838, 0x3839, + 0x3930, 0x3931, 0x3932, 0x3933, 0x3934, 0x3935, 0x3936, 0x3937, 0x3938, 0x3939, +} + +var intLookup = [2]*[100]uint16{&intLELookup, &intBELookup} + +func appendInt(b []byte, n int64) []byte { + return formatInteger(b, uint64(n), n < 0) +} + +func appendUint(b []byte, n uint64) []byte { + return formatInteger(b, n, false) +} + +func formatInteger(out []byte, n uint64, negative bool) []byte { + if !negative { + if n < 10 { + return append(out, byte(n+'0')) + } else if n < 100 { + u := intLELookup[n] + return append(out, byte(u), byte(u>>8)) + } + } else { + n = -n + } + + lookup := intLookup[endianness] + + var b [22]byte + u := (*[11]uint16)(unsafe.Pointer(&b)) + i := 11 + + for n >= 100 { + j := n % 100 + n /= 100 + i-- + u[i] = lookup[j] + } + + i-- + u[i] = lookup[n] + + i *= 2 // convert to byte index + if n < 10 { + i++ // remove leading zero + } + if negative { + i-- + b[i] = '-' + } + + return append(out, b[i:]...) +} diff --git a/encode_string.go b/encode_string.go index bd3b140..9dca7a5 100644 --- a/encode_string.go +++ b/encode_string.go @@ -1,9 +1,352 @@ package json import ( + "math/bits" + "reflect" "unicode/utf8" + "unsafe" ) +const ( + lsb = 0x0101010101010101 + msb = 0x8080808080808080 +) + +var needEscapeWithHTML = [256]bool{ + '"': true, + '&': true, + '<': true, + '>': true, + '\\': true, + 0x00: true, + 0x01: true, + 0x02: true, + 0x03: true, + 0x04: true, + 0x05: true, + 0x06: true, + 0x07: true, + 0x08: true, + 0x09: true, + 0x0a: true, + 0x0b: true, + 0x0c: true, + 0x0d: true, + 0x0e: true, + 0x0f: true, + 0x10: true, + 0x11: true, + 0x12: true, + 0x13: true, + 0x14: true, + 0x15: true, + 0x16: true, + 0x17: true, + 0x18: true, + 0x19: true, + 0x1a: true, + 0x1b: true, + 0x1c: true, + 0x1d: true, + 0x1e: true, + 0x1f: true, + /* 0x20 - 0x7f */ + 0x80: true, + 0x81: true, + 0x82: true, + 0x83: true, + 0x84: true, + 0x85: true, + 0x86: true, + 0x87: true, + 0x88: true, + 0x89: true, + 0x8a: true, + 0x8b: true, + 0x8c: true, + 0x8d: true, + 0x8e: true, + 0x8f: true, + 0x90: true, + 0x91: true, + 0x92: true, + 0x93: true, + 0x94: true, + 0x95: true, + 0x96: true, + 0x97: true, + 0x98: true, + 0x99: true, + 0x9a: true, + 0x9b: true, + 0x9c: true, + 0x9d: true, + 0x9e: true, + 0x9f: true, + 0xa0: true, + 0xa1: true, + 0xa2: true, + 0xa3: true, + 0xa4: true, + 0xa5: true, + 0xa6: true, + 0xa7: true, + 0xa8: true, + 0xa9: true, + 0xaa: true, + 0xab: true, + 0xac: true, + 0xad: true, + 0xae: true, + 0xaf: true, + 0xb0: true, + 0xb1: true, + 0xb2: true, + 0xb3: true, + 0xb4: true, + 0xb5: true, + 0xb6: true, + 0xb7: true, + 0xb8: true, + 0xb9: true, + 0xba: true, + 0xbb: true, + 0xbc: true, + 0xbd: true, + 0xbe: true, + 0xbf: true, + 0xc0: true, + 0xc1: true, + 0xc2: true, + 0xc3: true, + 0xc4: true, + 0xc5: true, + 0xc6: true, + 0xc7: true, + 0xc8: true, + 0xc9: true, + 0xca: true, + 0xcb: true, + 0xcc: true, + 0xcd: true, + 0xce: true, + 0xcf: true, + 0xd0: true, + 0xd1: true, + 0xd2: true, + 0xd3: true, + 0xd4: true, + 0xd5: true, + 0xd6: true, + 0xd7: true, + 0xd8: true, + 0xd9: true, + 0xda: true, + 0xdb: true, + 0xdc: true, + 0xdd: true, + 0xde: true, + 0xdf: true, + 0xe0: true, + 0xe1: true, + 0xe2: true, + 0xe3: true, + 0xe4: true, + 0xe5: true, + 0xe6: true, + 0xe7: true, + 0xe8: true, + 0xe9: true, + 0xea: true, + 0xeb: true, + 0xec: true, + 0xed: true, + 0xee: true, + 0xef: true, + 0xf0: true, + 0xf1: true, + 0xf2: true, + 0xf3: true, + 0xf4: true, + 0xf5: true, + 0xf6: true, + 0xf7: true, + 0xf8: true, + 0xf9: true, + 0xfa: true, + 0xfb: true, + 0xfc: true, + 0xfd: true, + 0xfe: true, + 0xff: true, +} + +var needEscape = [256]bool{ + '"': true, + '\\': true, + 0x00: true, + 0x01: true, + 0x02: true, + 0x03: true, + 0x04: true, + 0x05: true, + 0x06: true, + 0x07: true, + 0x08: true, + 0x09: true, + 0x0a: true, + 0x0b: true, + 0x0c: true, + 0x0d: true, + 0x0e: true, + 0x0f: true, + 0x10: true, + 0x11: true, + 0x12: true, + 0x13: true, + 0x14: true, + 0x15: true, + 0x16: true, + 0x17: true, + 0x18: true, + 0x19: true, + 0x1a: true, + 0x1b: true, + 0x1c: true, + 0x1d: true, + 0x1e: true, + 0x1f: true, + /* 0x20 - 0x7f */ + 0x80: true, + 0x81: true, + 0x82: true, + 0x83: true, + 0x84: true, + 0x85: true, + 0x86: true, + 0x87: true, + 0x88: true, + 0x89: true, + 0x8a: true, + 0x8b: true, + 0x8c: true, + 0x8d: true, + 0x8e: true, + 0x8f: true, + 0x90: true, + 0x91: true, + 0x92: true, + 0x93: true, + 0x94: true, + 0x95: true, + 0x96: true, + 0x97: true, + 0x98: true, + 0x99: true, + 0x9a: true, + 0x9b: true, + 0x9c: true, + 0x9d: true, + 0x9e: true, + 0x9f: true, + 0xa0: true, + 0xa1: true, + 0xa2: true, + 0xa3: true, + 0xa4: true, + 0xa5: true, + 0xa6: true, + 0xa7: true, + 0xa8: true, + 0xa9: true, + 0xaa: true, + 0xab: true, + 0xac: true, + 0xad: true, + 0xae: true, + 0xaf: true, + 0xb0: true, + 0xb1: true, + 0xb2: true, + 0xb3: true, + 0xb4: true, + 0xb5: true, + 0xb6: true, + 0xb7: true, + 0xb8: true, + 0xb9: true, + 0xba: true, + 0xbb: true, + 0xbc: true, + 0xbd: true, + 0xbe: true, + 0xbf: true, + 0xc0: true, + 0xc1: true, + 0xc2: true, + 0xc3: true, + 0xc4: true, + 0xc5: true, + 0xc6: true, + 0xc7: true, + 0xc8: true, + 0xc9: true, + 0xca: true, + 0xcb: true, + 0xcc: true, + 0xcd: true, + 0xce: true, + 0xcf: true, + 0xd0: true, + 0xd1: true, + 0xd2: true, + 0xd3: true, + 0xd4: true, + 0xd5: true, + 0xd6: true, + 0xd7: true, + 0xd8: true, + 0xd9: true, + 0xda: true, + 0xdb: true, + 0xdc: true, + 0xdd: true, + 0xde: true, + 0xdf: true, + 0xe0: true, + 0xe1: true, + 0xe2: true, + 0xe3: true, + 0xe4: true, + 0xe5: true, + 0xe6: true, + 0xe7: true, + 0xe8: true, + 0xe9: true, + 0xea: true, + 0xeb: true, + 0xec: true, + 0xed: true, + 0xee: true, + 0xef: true, + 0xf0: true, + 0xf1: true, + 0xf2: true, + 0xf3: true, + 0xf4: true, + 0xf5: true, + 0xf6: true, + 0xf7: true, + 0xf8: true, + 0xf9: true, + 0xfa: true, + 0xfb: true, + 0xfc: true, + 0xfd: true, + 0xfe: true, + 0xff: true, +} + // htmlSafeSet holds the value true if the ASCII character with the given // array position can be safely represented inside a JSON string, embedded // inside of HTML