Add encoder

This commit is contained in:
Masaaki Goshima 2020-04-19 19:51:22 +09:00
parent 9bcae1f0c8
commit 1d576e23f8
7 changed files with 654 additions and 0 deletions

39
benchmark_test.go Normal file
View File

@ -0,0 +1,39 @@
package json_test
import (
"testing"
gojson "github.com/goccy/go-json"
jsoniter "github.com/json-iterator/go"
)
type T struct {
A int
B float64
C string
}
func newT() *T {
return &T{A: 1, B: 3.14, C: `hello"world`}
}
func Benchmark_jsoniter(b *testing.B) {
v := newT()
var json = jsoniter.ConfigCompatibleWithStandardLibrary
b.ReportAllocs()
for i := 0; i < b.N; i++ {
if _, err := json.Marshal(v); err != nil {
b.Fatal(err)
}
}
}
func Benchmark_gojson(b *testing.B) {
v := newT()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
if _, err := gojson.Marshal(v); err != nil {
b.Fatal(err)
}
}
}

271
encode.go Normal file
View File

@ -0,0 +1,271 @@
package json
import (
"fmt"
"reflect"
"strconv"
"sync"
"unsafe"
"golang.org/x/xerrors"
)
type Encoder struct {
buf []byte
pool sync.Pool
}
type EncodeOp func(*Encoder, uintptr)
const (
bufSize = 1024
)
var (
encPool sync.Pool
cachedEncodeOp map[string]EncodeOp
)
func init() {
encPool = sync.Pool{
New: func() interface{} {
return &Encoder{
buf: make([]byte, 0, bufSize),
pool: encPool,
}
},
}
cachedEncodeOp = map[string]EncodeOp{}
}
func NewEncoder() *Encoder {
enc := encPool.Get().(*Encoder)
enc.Reset()
return enc
}
func (e *Encoder) Release() {
e.pool.Put(e)
}
func (e *Encoder) Reset() {
e.buf = e.buf[:0]
}
func (e *Encoder) EncodeInt(v int) {
e.EncodeInt64(int64(v))
}
func (e *Encoder) EncodeInt8(v int8) {
e.EncodeInt64(int64(v))
}
func (e *Encoder) EncodeInt16(v int16) {
e.EncodeInt64(int64(v))
}
func (e *Encoder) EncodeInt32(v int32) {
e.EncodeInt64(int64(v))
}
func (e *Encoder) EncodeInt64(v int64) {
e.buf = strconv.AppendInt(e.buf, v, 10)
}
func (e *Encoder) EncodeUint(v uint) {
e.EncodeUint64(uint64(v))
}
func (e *Encoder) EncodeUint8(v uint8) {
e.EncodeUint64(uint64(v))
}
func (e *Encoder) EncodeUint16(v uint16) {
e.EncodeUint64(uint64(v))
}
func (e *Encoder) EncodeUint32(v uint32) {
e.EncodeUint64(uint64(v))
}
func (e *Encoder) EncodeUint64(v uint64) {
e.buf = strconv.AppendUint(e.buf, v, 10)
}
func (e *Encoder) EncodeFloat32(v float32) {
e.buf = strconv.AppendFloat(e.buf, float64(v), 'f', -1, 32)
}
func (e *Encoder) EncodeFloat64(v float64) {
e.buf = strconv.AppendFloat(e.buf, v, 'f', -1, 64)
}
func (e *Encoder) EncodeBool(v bool) {
e.buf = strconv.AppendBool(e.buf, v)
}
func (e *Encoder) EncodeString(s string) {
b := *(*[]byte)(unsafe.Pointer(&s))
e.buf = append(e.buf, b...)
}
func (e *Encoder) EncodeByte(b byte) {
e.buf = append(e.buf, b)
}
func (e *Encoder) Encode(v interface{}) ([]byte, error) {
rv := reflect.ValueOf(v)
if rv.Kind() != reflect.Ptr {
rv = rv.Addr()
}
return e.encode(rv)
}
func (e *Encoder) encode(v reflect.Value) ([]byte, error) {
name := v.Type().Name()
if op, exists := cachedEncodeOp[name]; exists {
op(e, v.Pointer())
copied := make([]byte, len(e.buf))
copy(copied, e.buf)
return copied, nil
}
op, err := e.compile(v)
if err != nil {
return nil, err
}
cachedEncodeOp[name] = op
op(e, v.Pointer())
copied := make([]byte, len(e.buf))
copy(copied, e.buf)
return copied, nil
}
func (e *Encoder) compile(v reflect.Value) (EncodeOp, error) {
switch v.Type().Kind() {
case reflect.Ptr:
return e.compile(v.Elem())
case reflect.Struct:
return e.compileStruct(v)
case reflect.Int:
return e.compileInt()
case reflect.Int8:
return e.compileInt8()
case reflect.Int16:
return e.compileInt16()
case reflect.Int32:
return e.compileInt32()
case reflect.Int64:
return e.compileInt64()
case reflect.Float32:
return e.compileFloat32()
case reflect.Float64:
return e.compileFloat64()
case reflect.String:
return e.compileString()
case reflect.Bool:
return e.compileBool()
}
return nil, xerrors.Errorf("failed to compile %s: %w", v.Type(), ErrUnknownType)
}
func (e *Encoder) compileInt() (EncodeOp, error) {
return func(enc *Encoder, p uintptr) { enc.EncodeInt(e.ptrToInt(p)) }, nil
}
func (e *Encoder) compileInt8() (EncodeOp, error) {
return func(enc *Encoder, p uintptr) { enc.EncodeInt8(e.ptrToInt8(p)) }, nil
}
func (e *Encoder) compileInt16() (EncodeOp, error) {
return func(enc *Encoder, p uintptr) { enc.EncodeInt16(e.ptrToInt16(p)) }, nil
}
func (e *Encoder) compileInt32() (EncodeOp, error) {
return func(enc *Encoder, p uintptr) { enc.EncodeInt32(e.ptrToInt32(p)) }, nil
}
func (e *Encoder) compileInt64() (EncodeOp, error) {
return func(enc *Encoder, p uintptr) { enc.EncodeInt64(e.ptrToInt64(p)) }, nil
}
func (e *Encoder) compileUint() (EncodeOp, error) {
return func(enc *Encoder, p uintptr) { enc.EncodeUint(e.ptrToUint(p)) }, nil
}
func (e *Encoder) compileUint8() (EncodeOp, error) {
return func(enc *Encoder, p uintptr) { enc.EncodeUint8(e.ptrToUint8(p)) }, nil
}
func (e *Encoder) compileUint16() (EncodeOp, error) {
return func(enc *Encoder, p uintptr) { enc.EncodeUint16(e.ptrToUint16(p)) }, nil
}
func (e *Encoder) compileUint32() (EncodeOp, error) {
return func(enc *Encoder, p uintptr) { enc.EncodeUint32(e.ptrToUint32(p)) }, nil
}
func (e *Encoder) compileUint64() (EncodeOp, error) {
return func(enc *Encoder, p uintptr) { enc.EncodeUint64(e.ptrToUint64(p)) }, nil
}
func (e *Encoder) compileFloat32() (EncodeOp, error) {
return func(enc *Encoder, p uintptr) { enc.EncodeFloat32(e.ptrToFloat32(p)) }, nil
}
func (e *Encoder) compileFloat64() (EncodeOp, error) {
return func(enc *Encoder, p uintptr) { enc.EncodeFloat64(e.ptrToFloat64(p)) }, nil
}
func (e *Encoder) compileString() (EncodeOp, error) {
return func(enc *Encoder, p uintptr) { enc.EncodeEscapedString(e.ptrToString(p)) }, nil
}
func (e *Encoder) compileBool() (EncodeOp, error) {
return func(enc *Encoder, p uintptr) { enc.EncodeBool(e.ptrToBool(p)) }, nil
}
func (e *Encoder) compileStruct(v reflect.Value) (EncodeOp, error) {
typ := v.Type()
fieldNum := v.NumField()
opQueue := make([]EncodeOp, 0, fieldNum)
for i := 0; i < fieldNum; i++ {
field := typ.Field(i)
op, err := e.compile(v.Field(i))
if err != nil {
return nil, err
}
key := fmt.Sprintf(`"%s":`, field.Name)
opQueue = append(opQueue, func(enc *Encoder, base uintptr) {
enc.EncodeString(key)
op(enc, base+field.Offset)
})
}
queueNum := len(opQueue)
return func(enc *Encoder, base uintptr) {
enc.EncodeByte('{')
for i := 0; i < queueNum; i++ {
opQueue[i](enc, base)
if i != queueNum-1 {
enc.EncodeByte(',')
}
}
enc.EncodeByte('}')
}, nil
}
func (e *Encoder) ptrToInt(p uintptr) int { return *(*int)(unsafe.Pointer(p)) }
func (e *Encoder) ptrToInt8(p uintptr) int8 { return *(*int8)(unsafe.Pointer(p)) }
func (e *Encoder) ptrToInt16(p uintptr) int16 { return *(*int16)(unsafe.Pointer(p)) }
func (e *Encoder) ptrToInt32(p uintptr) int32 { return *(*int32)(unsafe.Pointer(p)) }
func (e *Encoder) ptrToInt64(p uintptr) int64 { return *(*int64)(unsafe.Pointer(p)) }
func (e *Encoder) ptrToUint(p uintptr) uint { return *(*uint)(unsafe.Pointer(p)) }
func (e *Encoder) ptrToUint8(p uintptr) uint8 { return *(*uint8)(unsafe.Pointer(p)) }
func (e *Encoder) ptrToUint16(p uintptr) uint16 { return *(*uint16)(unsafe.Pointer(p)) }
func (e *Encoder) ptrToUint32(p uintptr) uint32 { return *(*uint32)(unsafe.Pointer(p)) }
func (e *Encoder) ptrToUint64(p uintptr) uint64 { return *(*uint64)(unsafe.Pointer(p)) }
func (e *Encoder) ptrToFloat32(p uintptr) float32 { return *(*float32)(unsafe.Pointer(p)) }
func (e *Encoder) ptrToFloat64(p uintptr) float64 { return *(*float64)(unsafe.Pointer(p)) }
func (e *Encoder) ptrToBool(p uintptr) bool { return *(*bool)(unsafe.Pointer(p)) }
func (e *Encoder) ptrToByte(p uintptr) byte { return *(*byte)(unsafe.Pointer(p)) }
func (e *Encoder) ptrToBytes(p uintptr) []byte { return *(*[]byte)(unsafe.Pointer(p)) }
func (e *Encoder) ptrToString(p uintptr) string { return *(*string)(unsafe.Pointer(p)) }

305
encode_string.go Normal file
View File

@ -0,0 +1,305 @@
package json
import "unicode/utf8"
// htmlSafeSet holds the value true if the ASCII character with the given
// array position can be safely represented inside a JSON string, embedded
// inside of HTML <script> tags, without any additional escaping.
//
// All values are true except for the ASCII control characters (0-31), the
// double quote ("), the backslash character ("\"), HTML opening and closing
// tags ("<" and ">"), and the ampersand ("&").
var htmlSafeSet = [utf8.RuneSelf]bool{
' ': true,
'!': true,
'"': false,
'#': true,
'$': true,
'%': true,
'&': false,
'\'': true,
'(': true,
')': true,
'*': true,
'+': true,
',': true,
'-': true,
'.': true,
'/': true,
'0': true,
'1': true,
'2': true,
'3': true,
'4': true,
'5': true,
'6': true,
'7': true,
'8': true,
'9': true,
':': true,
';': true,
'<': false,
'=': true,
'>': false,
'?': true,
'@': true,
'A': true,
'B': true,
'C': true,
'D': true,
'E': true,
'F': true,
'G': true,
'H': true,
'I': true,
'J': true,
'K': true,
'L': true,
'M': true,
'N': true,
'O': true,
'P': true,
'Q': true,
'R': true,
'S': true,
'T': true,
'U': true,
'V': true,
'W': true,
'X': true,
'Y': true,
'Z': true,
'[': true,
'\\': false,
']': true,
'^': true,
'_': true,
'`': true,
'a': true,
'b': true,
'c': true,
'd': true,
'e': true,
'f': true,
'g': true,
'h': true,
'i': true,
'j': true,
'k': true,
'l': true,
'm': true,
'n': true,
'o': true,
'p': true,
'q': true,
'r': true,
's': true,
't': true,
'u': true,
'v': true,
'w': true,
'x': true,
'y': true,
'z': true,
'{': true,
'|': true,
'}': true,
'~': true,
'\u007f': true,
}
// safeSet holds the value true if the ASCII character with the given array
// position can be represented inside a JSON string without any further
// escaping.
//
// All values are true except for the ASCII control characters (0-31), the
// double quote ("), and the backslash character ("\").
var safeSet = [utf8.RuneSelf]bool{
' ': true,
'!': true,
'"': false,
'#': true,
'$': true,
'%': true,
'&': true,
'\'': true,
'(': true,
')': true,
'*': true,
'+': true,
',': true,
'-': true,
'.': true,
'/': true,
'0': true,
'1': true,
'2': true,
'3': true,
'4': true,
'5': true,
'6': true,
'7': true,
'8': true,
'9': true,
':': true,
';': true,
'<': true,
'=': true,
'>': true,
'?': true,
'@': true,
'A': true,
'B': true,
'C': true,
'D': true,
'E': true,
'F': true,
'G': true,
'H': true,
'I': true,
'J': true,
'K': true,
'L': true,
'M': true,
'N': true,
'O': true,
'P': true,
'Q': true,
'R': true,
'S': true,
'T': true,
'U': true,
'V': true,
'W': true,
'X': true,
'Y': true,
'Z': true,
'[': true,
'\\': false,
']': true,
'^': true,
'_': true,
'`': true,
'a': true,
'b': true,
'c': true,
'd': true,
'e': true,
'f': true,
'g': true,
'h': true,
'i': true,
'j': true,
'k': true,
'l': true,
'm': true,
'n': true,
'o': true,
'p': true,
'q': true,
'r': true,
's': true,
't': true,
'u': true,
'v': true,
'w': true,
'x': true,
'y': true,
'z': true,
'{': true,
'|': true,
'}': true,
'~': true,
'\u007f': true,
}
var hex = "0123456789abcdef"
func (e *Encoder) EncodeEscapedString(s string) {
valLen := len(s)
e.buf = append(e.buf, '"')
// write string, the fast path, without utf8 and escape support
i := 0
for ; i < valLen; i++ {
c := s[i]
if c < utf8.RuneSelf && htmlSafeSet[c] {
e.buf = append(e.buf, c)
} else {
break
}
}
if i == valLen {
e.buf = append(e.buf, '"')
return
}
e.writeStringSlowPathWithHTMLEscaped(i, s, valLen)
}
func (e *Encoder) writeStringSlowPathWithHTMLEscaped(i int, s string, valLen int) {
start := i
// for the remaining parts, we process them char by char
for i < valLen {
if b := s[i]; b < utf8.RuneSelf {
if htmlSafeSet[b] {
i++
continue
}
if start < i {
e.buf = append(e.buf, s[start:i]...)
}
switch b {
case '\\', '"':
e.buf = append(e.buf, '\\', b)
case '\n':
e.buf = append(e.buf, '\\', 'n')
case '\r':
e.buf = append(e.buf, '\\', 'r')
case '\t':
e.buf = append(e.buf, '\\', 't')
default:
// This encodes bytes < 0x20 except for \t, \n and \r.
// If escapeHTML is set, it also escapes <, >, and &
// because they can lead to security holes when
// user-controlled strings are rendered into JSON
// and served to some browsers.
e.buf = append(e.buf, `\u00`...)
e.buf = append(e.buf, hex[b>>4], hex[b&0xF])
}
i++
start = i
continue
}
c, size := utf8.DecodeRuneInString(s[i:])
if c == utf8.RuneError && size == 1 {
if start < i {
e.buf = append(e.buf, s[start:i]...)
}
e.buf = append(e.buf, `\ufffd`...)
i++
start = i
continue
}
// U+2028 is LINE SEPARATOR.
// U+2029 is PARAGRAPH SEPARATOR.
// They are both technically valid characters in JSON strings,
// but don't work in JSONP, which has to be evaluated as JavaScript,
// and can lead to security holes there. It is valid JSON to
// escape them, so we do so unconditionally.
// See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion.
if c == '\u2028' || c == '\u2029' {
if start < i {
e.buf = append(e.buf, s[start:i]...)
}
e.buf = append(e.buf, `\u202`...)
e.buf = append(e.buf, hex[c&0xF])
i += size
start = i
continue
}
i += size
}
if start < len(s) {
e.buf = append(e.buf, s[start:]...)
}
e.buf = append(e.buf, '"')
}

7
error.go Normal file
View File

@ -0,0 +1,7 @@
package json
import "errors"
var (
ErrUnknownType = errors.New("unknown type name")
)

8
go.mod Normal file
View File

@ -0,0 +1,8 @@
module github.com/goccy/go-json
go 1.12
require (
github.com/json-iterator/go v1.1.9
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543
)

17
go.sum Normal file
View File

@ -0,0 +1,17 @@
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/json-iterator/go v1.1.9 h1:9yzud/Ht36ygwatGx56VwCZtlI/2AD15T1X2sjSuGns=
github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 h1:ZqeYNhU3OHLH3mGKHDcjJRFFRrJa6eAM5H+CtDdOsPc=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742 h1:Esafd1046DLDQ0W1YjYsBW+p8U2u7vzgW2SQVmlNazg=
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=

7
json.go Normal file
View File

@ -0,0 +1,7 @@
package json
func Marshal(v interface{}) ([]byte, error) {
enc := NewEncoder()
defer enc.Release()
return enc.Encode(v)
}