From 62b7d3ba0a98e3d11e7520bf802a9df647f78ac0 Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Tue, 16 Mar 2021 19:44:32 +0900 Subject: [PATCH] Move compiler for encoder to internal package --- internal/encoder/compiler.go | 1210 +++++++++++++++++++++++++ internal/encoder/compiler/compiler.go | 1 - internal/encoder/compiler/norace.go | 11 - internal/encoder/compiler_norace.go | 38 + internal/encoder/compiler_race.go | 46 + internal/encoder/context.go | 82 ++ internal/encoder/encoder.go | 135 ++- internal/encoder/opcode.go | 647 +++++++++++++ internal/encoder/vm/vm.go | 5 +- internal/runtime/struct_field.go | 81 ++ internal/runtime/type.go | 80 +- 11 files changed, 2286 insertions(+), 50 deletions(-) create mode 100644 internal/encoder/compiler.go delete mode 100644 internal/encoder/compiler/compiler.go delete mode 100644 internal/encoder/compiler/norace.go create mode 100644 internal/encoder/compiler_norace.go create mode 100644 internal/encoder/compiler_race.go create mode 100644 internal/encoder/context.go create mode 100644 internal/encoder/opcode.go create mode 100644 internal/runtime/struct_field.go diff --git a/internal/encoder/compiler.go b/internal/encoder/compiler.go new file mode 100644 index 0000000..aa8d066 --- /dev/null +++ b/internal/encoder/compiler.go @@ -0,0 +1,1210 @@ +package encoder + +import ( + "encoding" + "encoding/json" + "fmt" + "reflect" + "strings" + "sync/atomic" + "unsafe" + + "github.com/goccy/go-json/internal/errors" + "github.com/goccy/go-json/internal/runtime" +) + +var ( + marshalJSONType = reflect.TypeOf((*json.Marshaler)(nil)).Elem() + marshalTextType = reflect.TypeOf((*encoding.TextMarshaler)(nil)).Elem() + jsonNumberType = reflect.TypeOf(json.Number("")) + cachedOpcodeSets []*OpcodeSet + cachedOpcodeMap unsafe.Pointer // map[uintptr]*OpcodeSet + typeAddr = &runtime.TypeAddr{} +) + +func init() { + typeAddr = runtime.AnalyzeTypeAddr() + cachedOpcodeSets = make([]*OpcodeSet, typeAddr.AddrRange) +} + +func loadOpcodeMap() map[uintptr]*OpcodeSet { + p := atomic.LoadPointer(&cachedOpcodeMap) + return *(*map[uintptr]*OpcodeSet)(unsafe.Pointer(&p)) +} + +func storeOpcodeSet(typ uintptr, set *OpcodeSet, m map[uintptr]*OpcodeSet) { + newOpcodeMap := make(map[uintptr]*OpcodeSet, len(m)+1) + newOpcodeMap[typ] = set + + for k, v := range m { + newOpcodeMap[k] = v + } + + atomic.StorePointer(&cachedOpcodeMap, *(*unsafe.Pointer)(unsafe.Pointer(&newOpcodeMap))) +} + +func compileToGetCodeSetSlowPath(typeptr uintptr) (*OpcodeSet, error) { + opcodeMap := loadOpcodeMap() + if codeSet, exists := opcodeMap[typeptr]; exists { + return codeSet, nil + } + + // noescape trick for header.typ ( reflect.*rtype ) + copiedType := *(**runtime.Type)(unsafe.Pointer(&typeptr)) + + code, err := compileHead(&compileContext{ + typ: copiedType, + structTypeToCompiledCode: map[uintptr]*CompiledCode{}, + }) + if err != nil { + return nil, err + } + code = copyOpcode(code) + codeLength := code.TotalLength() + codeSet := &OpcodeSet{ + Code: code, + CodeLength: codeLength, + } + storeOpcodeSet(typeptr, codeSet, opcodeMap) + return codeSet, nil +} + +func compileHead(ctx *compileContext) (*Opcode, error) { + typ := ctx.typ + switch { + case implementsMarshalJSON(typ): + return compileMarshalJSON(ctx) + case implementsMarshalText(typ): + return compileMarshalText(ctx) + } + + isPtr := false + orgType := typ + if typ.Kind() == reflect.Ptr { + typ = typ.Elem() + isPtr = true + } + switch { + case implementsMarshalJSON(typ): + return compileMarshalJSON(ctx) + case implementsMarshalText(typ): + return compileMarshalText(ctx) + } + if typ.Kind() == reflect.Map { + if isPtr { + return compilePtr(ctx.withType(runtime.PtrTo(typ))) + } + return compileMap(ctx.withType(typ)) + } else if typ.Kind() == reflect.Struct { + code, err := compileStruct(ctx.withType(typ), isPtr) + if err != nil { + return nil, err + } + optimizeStructEnd(code) + linkRecursiveCode(code) + return code, nil + } else if isPtr && typ.Implements(marshalTextType) { + typ = orgType + } + code, err := compile(ctx.withType(typ), isPtr) + if err != nil { + return nil, err + } + optimizeStructEnd(code) + linkRecursiveCode(code) + return code, nil +} + +func linkRecursiveCode(c *Opcode) { + for code := c; code.Op != OpEnd && code.Op != OpStructFieldRecursiveEnd; { + switch code.Op { + case OpStructFieldRecursive, OpStructFieldRecursivePtr: + if code.Jmp.Linked { + code = code.Next + continue + } + code.Jmp.Code = copyOpcode(code.Jmp.Code) + c := code.Jmp.Code + c.End.Next = newEndOp(&compileContext{}) + c.Op = c.Op.PtrHeadToHead() + + beforeLastCode := c.End + lastCode := beforeLastCode.Next + + lastCode.Idx = beforeLastCode.Idx + uintptrSize + lastCode.ElemIdx = lastCode.Idx + uintptrSize + + // extend length to alloc slot for elemIdx + totalLength := uintptr(code.TotalLength() + 1) + nextTotalLength := uintptr(c.TotalLength() + 1) + + c.End.Next.Op = OpStructFieldRecursiveEnd + + code.Jmp.CurLen = totalLength + code.Jmp.NextLen = nextTotalLength + code.Jmp.Linked = true + + linkRecursiveCode(code.Jmp.Code) + code = code.Next + continue + } + switch code.Op.CodeType() { + case CodeArrayElem, CodeSliceElem, CodeMapKey: + code = code.End + default: + code = code.Next + } + } +} + +func optimizeStructEnd(c *Opcode) { + for code := c; code.Op != OpEnd; { + if code.Op == OpStructFieldRecursive || code.Op == OpStructFieldRecursivePtr { + // ignore if exists recursive operation + return + } + switch code.Op.CodeType() { + case CodeArrayElem, CodeSliceElem, CodeMapKey: + code = code.End + default: + code = code.Next + } + } + + for code := c; code.Op != OpEnd; { + switch code.Op.CodeType() { + case CodeArrayElem, CodeSliceElem, CodeMapKey: + code = code.End + case CodeStructEnd: + switch code.Op { + case OpStructEnd: + prev := code.PrevField + prevOp := prev.Op.String() + if strings.Contains(prevOp, "Head") || + strings.Contains(prevOp, "Slice") || + strings.Contains(prevOp, "Array") || + strings.Contains(prevOp, "Map") || + strings.Contains(prevOp, "MarshalJSON") || + strings.Contains(prevOp, "MarshalText") { + // not exists field + code = code.Next + break + } + if prev.Op != prev.Op.FieldToEnd() { + prev.Op = prev.Op.FieldToEnd() + prev.Next = code.Next + } + code = code.Next + default: + code = code.Next + } + default: + code = code.Next + } + } +} + +func implementsMarshalJSON(typ *runtime.Type) bool { + if !typ.Implements(marshalJSONType) { + return false + } + if typ.Kind() != reflect.Ptr { + return true + } + // type kind is reflect.Ptr + if !typ.Elem().Implements(marshalJSONType) { + return true + } + // needs to dereference + return false +} + +func implementsMarshalText(typ *runtime.Type) bool { + if !typ.Implements(marshalTextType) { + return false + } + if typ.Kind() != reflect.Ptr { + return true + } + // type kind is reflect.Ptr + if !typ.Elem().Implements(marshalTextType) { + return true + } + // needs to dereference + return false +} + +func compile(ctx *compileContext, isPtr bool) (*Opcode, error) { + typ := ctx.typ + switch { + case implementsMarshalJSON(typ): + return compileMarshalJSON(ctx) + case implementsMarshalText(typ): + return compileMarshalText(ctx) + } + switch typ.Kind() { + case reflect.Ptr: + return compilePtr(ctx) + case reflect.Slice: + elem := typ.Elem() + if elem.Kind() == reflect.Uint8 { + p := runtime.PtrTo(elem) + if !p.Implements(marshalJSONType) && !p.Implements(marshalTextType) { + return compileBytes(ctx) + } + } + return compileSlice(ctx) + case reflect.Array: + return compileArray(ctx) + case reflect.Map: + return compileMap(ctx) + case reflect.Struct: + return compileStruct(ctx, isPtr) + case reflect.Interface: + return compileInterface(ctx) + case reflect.Int: + return compileInt(ctx) + case reflect.Int8: + return compileInt8(ctx) + case reflect.Int16: + return compileInt16(ctx) + case reflect.Int32: + return compileInt32(ctx) + case reflect.Int64: + return compileInt64(ctx) + case reflect.Uint: + return compileUint(ctx) + case reflect.Uint8: + return compileUint8(ctx) + case reflect.Uint16: + return compileUint16(ctx) + case reflect.Uint32: + return compileUint32(ctx) + case reflect.Uint64: + return compileUint64(ctx) + case reflect.Uintptr: + return compileUint(ctx) + case reflect.Float32: + return compileFloat32(ctx) + case reflect.Float64: + return compileFloat64(ctx) + case reflect.String: + return compileString(ctx) + case reflect.Bool: + return compileBool(ctx) + } + return nil, &errors.UnsupportedTypeError{Type: runtime.RType2Type(typ)} +} + +func convertPtrOp(code *Opcode) OpType { + ptrHeadOp := code.Op.HeadToPtrHead() + if code.Op != ptrHeadOp { + return ptrHeadOp + } + switch code.Op { + case OpInt: + return OpIntPtr + case OpUint: + return OpUintPtr + case OpFloat32: + return OpFloat32Ptr + case OpFloat64: + return OpFloat64Ptr + case OpString: + return OpStringPtr + case OpBool: + return OpBoolPtr + case OpBytes: + return OpBytesPtr + case OpArray: + return OpArrayPtr + case OpSlice: + return OpSlicePtr + case OpMap: + return OpMapPtr + case OpMarshalJSON: + return OpMarshalJSONPtr + case OpMarshalText: + return OpMarshalTextPtr + case OpInterface: + return OpInterfacePtr + case OpStructFieldRecursive: + return OpStructFieldRecursivePtr + } + return code.Op +} + +func compileKey(ctx *compileContext) (*Opcode, error) { + typ := ctx.typ + switch { + case implementsMarshalJSON(typ): + return compileMarshalJSON(ctx) + case implementsMarshalText(typ): + return compileMarshalText(ctx) + } + switch typ.Kind() { + case reflect.Ptr: + return compilePtr(ctx) + case reflect.Interface: + return compileInterface(ctx) + case reflect.String: + return compileString(ctx) + case reflect.Int: + return compileIntString(ctx) + case reflect.Int8: + return compileInt8String(ctx) + case reflect.Int16: + return compileInt16String(ctx) + case reflect.Int32: + return compileInt32String(ctx) + case reflect.Int64: + return compileInt64String(ctx) + case reflect.Uint: + return compileUintString(ctx) + case reflect.Uint8: + return compileUint8String(ctx) + case reflect.Uint16: + return compileUint16String(ctx) + case reflect.Uint32: + return compileUint32String(ctx) + case reflect.Uint64: + return compileUint64String(ctx) + case reflect.Uintptr: + return compileUintString(ctx) + } + return nil, &errors.UnsupportedTypeError{Type: runtime.RType2Type(typ)} +} + +func compilePtr(ctx *compileContext) (*Opcode, error) { + code, err := compile(ctx.withType(ctx.typ.Elem()), true) + if err != nil { + return nil, err + } + code.Op = convertPtrOp(code) + code.PtrNum++ + return code, nil +} + +func compileMarshalJSON(ctx *compileContext) (*Opcode, error) { + code := newOpCode(ctx, OpMarshalJSON) + typ := ctx.typ + if !typ.Implements(marshalJSONType) && runtime.PtrTo(typ).Implements(marshalJSONType) { + code.AddrForMarshaler = true + } + ctx.incIndex() + return code, nil +} + +func compileMarshalText(ctx *compileContext) (*Opcode, error) { + code := newOpCode(ctx, OpMarshalText) + typ := ctx.typ + if !typ.Implements(marshalTextType) && runtime.PtrTo(typ).Implements(marshalTextType) { + code.AddrForMarshaler = true + } + ctx.incIndex() + return code, nil +} + +const intSize = 32 << (^uint(0) >> 63) + +func compileInt(ctx *compileContext) (*Opcode, error) { + code := newOpCode(ctx, OpInt) + code.setMaskAndRshiftNum(intSize) + ctx.incIndex() + return code, nil +} + +func compileInt8(ctx *compileContext) (*Opcode, error) { + code := newOpCode(ctx, OpInt) + code.setMaskAndRshiftNum(8) + ctx.incIndex() + return code, nil +} + +func compileInt16(ctx *compileContext) (*Opcode, error) { + code := newOpCode(ctx, OpInt) + code.setMaskAndRshiftNum(16) + ctx.incIndex() + return code, nil +} + +func compileInt32(ctx *compileContext) (*Opcode, error) { + code := newOpCode(ctx, OpInt) + code.setMaskAndRshiftNum(32) + ctx.incIndex() + return code, nil +} + +func compileInt64(ctx *compileContext) (*Opcode, error) { + code := newOpCode(ctx, OpInt) + code.setMaskAndRshiftNum(64) + ctx.incIndex() + return code, nil +} + +func compileUint(ctx *compileContext) (*Opcode, error) { + code := newOpCode(ctx, OpUint) + code.setMaskAndRshiftNum(intSize) + ctx.incIndex() + return code, nil +} + +func compileUint8(ctx *compileContext) (*Opcode, error) { + code := newOpCode(ctx, OpUint) + code.setMaskAndRshiftNum(8) + ctx.incIndex() + return code, nil +} + +func compileUint16(ctx *compileContext) (*Opcode, error) { + code := newOpCode(ctx, OpUint) + code.setMaskAndRshiftNum(16) + ctx.incIndex() + return code, nil +} + +func compileUint32(ctx *compileContext) (*Opcode, error) { + code := newOpCode(ctx, OpUint) + code.setMaskAndRshiftNum(32) + ctx.incIndex() + return code, nil +} + +func compileUint64(ctx *compileContext) (*Opcode, error) { + code := newOpCode(ctx, OpUint) + code.setMaskAndRshiftNum(64) + ctx.incIndex() + return code, nil +} + +func compileIntString(ctx *compileContext) (*Opcode, error) { + code := newOpCode(ctx, OpIntString) + code.setMaskAndRshiftNum(intSize) + ctx.incIndex() + return code, nil +} + +func compileInt8String(ctx *compileContext) (*Opcode, error) { + code := newOpCode(ctx, OpIntString) + code.setMaskAndRshiftNum(8) + ctx.incIndex() + return code, nil +} + +func compileInt16String(ctx *compileContext) (*Opcode, error) { + code := newOpCode(ctx, OpIntString) + code.setMaskAndRshiftNum(16) + ctx.incIndex() + return code, nil +} + +func compileInt32String(ctx *compileContext) (*Opcode, error) { + code := newOpCode(ctx, OpIntString) + code.setMaskAndRshiftNum(32) + ctx.incIndex() + return code, nil +} + +func compileInt64String(ctx *compileContext) (*Opcode, error) { + code := newOpCode(ctx, OpIntString) + code.setMaskAndRshiftNum(64) + ctx.incIndex() + return code, nil +} + +func compileUintString(ctx *compileContext) (*Opcode, error) { + code := newOpCode(ctx, OpUintString) + code.setMaskAndRshiftNum(intSize) + ctx.incIndex() + return code, nil +} + +func compileUint8String(ctx *compileContext) (*Opcode, error) { + code := newOpCode(ctx, OpUintString) + code.setMaskAndRshiftNum(8) + ctx.incIndex() + return code, nil +} + +func compileUint16String(ctx *compileContext) (*Opcode, error) { + code := newOpCode(ctx, OpUintString) + code.setMaskAndRshiftNum(16) + ctx.incIndex() + return code, nil +} + +func compileUint32String(ctx *compileContext) (*Opcode, error) { + code := newOpCode(ctx, OpUintString) + code.setMaskAndRshiftNum(32) + ctx.incIndex() + return code, nil +} + +func compileUint64String(ctx *compileContext) (*Opcode, error) { + code := newOpCode(ctx, OpUintString) + code.setMaskAndRshiftNum(64) + ctx.incIndex() + return code, nil +} + +func compileFloat32(ctx *compileContext) (*Opcode, error) { + code := newOpCode(ctx, OpFloat32) + ctx.incIndex() + return code, nil +} + +func compileFloat64(ctx *compileContext) (*Opcode, error) { + code := newOpCode(ctx, OpFloat64) + ctx.incIndex() + return code, nil +} + +func compileString(ctx *compileContext) (*Opcode, error) { + var op OpType + if ctx.typ == runtime.Type2RType(jsonNumberType) { + op = OpNumber + } else { + op = OpString + } + code := newOpCode(ctx, op) + ctx.incIndex() + return code, nil +} + +func compileBool(ctx *compileContext) (*Opcode, error) { + code := newOpCode(ctx, OpBool) + ctx.incIndex() + return code, nil +} + +func compileBytes(ctx *compileContext) (*Opcode, error) { + code := newOpCode(ctx, OpBytes) + ctx.incIndex() + return code, nil +} + +func compileInterface(ctx *compileContext) (*Opcode, error) { + code := newInterfaceCode(ctx) + ctx.incIndex() + return code, nil +} + +func compileSlice(ctx *compileContext) (*Opcode, error) { + elem := ctx.typ.Elem() + size := elem.Size() + + header := newSliceHeaderCode(ctx) + ctx.incIndex() + + code, err := compileSliceElem(ctx.withType(elem).incIndent()) + if err != nil { + return nil, err + } + + // header => opcode => elem => end + // ^ | + // |________| + + elemCode := newSliceElemCode(ctx, header, size) + ctx.incIndex() + + end := newOpCode(ctx, OpSliceEnd) + ctx.incIndex() + + header.Elem = elemCode + header.End = end + header.Next = code + code.BeforeLastCode().Next = (*Opcode)(unsafe.Pointer(elemCode)) + elemCode.Next = code + elemCode.End = end + return (*Opcode)(unsafe.Pointer(header)), nil +} + +func compileSliceElem(ctx *compileContext) (*Opcode, error) { + typ := ctx.typ + switch { + case !typ.Implements(marshalJSONType) && runtime.PtrTo(typ).Implements(marshalJSONType): + return compileMarshalJSON(ctx) + case !typ.Implements(marshalTextType) && runtime.PtrTo(typ).Implements(marshalTextType): + return compileMarshalText(ctx) + default: + return compile(ctx, false) + } +} + +func compileArray(ctx *compileContext) (*Opcode, error) { + typ := ctx.typ + elem := typ.Elem() + alen := typ.Len() + size := elem.Size() + + header := newArrayHeaderCode(ctx, alen) + ctx.incIndex() + + code, err := compile(ctx.withType(elem).incIndent(), false) + if err != nil { + return nil, err + } + // header => opcode => elem => end + // ^ | + // |________| + + elemCode := newArrayElemCode(ctx, header, alen, size) + ctx.incIndex() + + end := newOpCode(ctx, OpArrayEnd) + ctx.incIndex() + + header.Elem = elemCode + header.End = end + header.Next = code + code.BeforeLastCode().Next = (*Opcode)(unsafe.Pointer(elemCode)) + elemCode.Next = code + elemCode.End = end + return (*Opcode)(unsafe.Pointer(header)), nil +} + +func compileMap(ctx *compileContext) (*Opcode, error) { + // header => code => value => code => key => code => value => code => end + // ^ | + // |_______________________| + ctx = ctx.incIndent() + header := newMapHeaderCode(ctx) + ctx.incIndex() + + typ := ctx.typ + keyType := ctx.typ.Key() + keyCode, err := compileKey(ctx.withType(keyType)) + if err != nil { + return nil, err + } + + value := newMapValueCode(ctx, header) + ctx.incIndex() + + valueCode, err := compileMapValue(ctx.withType(typ.Elem())) + if err != nil { + return nil, err + } + + key := newMapKeyCode(ctx, header) + ctx.incIndex() + + ctx = ctx.decIndent() + + header.MapKey = key + header.MapValue = value + + end := newMapEndCode(ctx, header) + ctx.incIndex() + + header.Next = keyCode + keyCode.BeforeLastCode().Next = (*Opcode)(unsafe.Pointer(value)) + value.Next = valueCode + valueCode.BeforeLastCode().Next = (*Opcode)(unsafe.Pointer(key)) + key.Next = keyCode + + header.End = end + key.End = end + value.End = end + + return (*Opcode)(unsafe.Pointer(header)), nil +} + +func compileMapValue(ctx *compileContext) (*Opcode, error) { + switch ctx.typ.Kind() { + case reflect.Map: + return compilePtr(ctx.withType(runtime.PtrTo(ctx.typ))) + default: + return compile(ctx, false) + } +} + +func optimizeStructHeader(code *Opcode, tag *runtime.StructTag) OpType { + headType := code.ToHeaderType() + switch { + case tag.IsOmitEmpty: + headType = headType.HeadToOmitEmptyHead() + case tag.IsString: + headType = headType.HeadToStringTagHead() + } + return headType +} + +func optimizeStructField(code *Opcode, tag *runtime.StructTag) OpType { + fieldType := code.ToFieldType() + switch { + case tag.IsOmitEmpty: + fieldType = fieldType.FieldToOmitEmptyField() + case tag.IsString: + fieldType = fieldType.FieldToStringTagField() + } + return fieldType +} + +func recursiveCode(ctx *compileContext, jmp *CompiledCode) *Opcode { + code := newRecursiveCode(ctx, jmp) + ctx.incIndex() + return code +} + +func compiledCode(ctx *compileContext) *Opcode { + typ := ctx.typ + typeptr := uintptr(unsafe.Pointer(typ)) + if cc, exists := ctx.structTypeToCompiledCode[typeptr]; exists { + return recursiveCode(ctx, cc) + } + return nil +} + +func structHeader(ctx *compileContext, fieldCode *Opcode, valueCode *Opcode, tag *runtime.StructTag) *Opcode { + fieldCode.Indent-- + op := optimizeStructHeader(valueCode, tag) + fieldCode.Op = op + fieldCode.Mask = valueCode.Mask + fieldCode.RshiftNum = valueCode.RshiftNum + fieldCode.PtrNum = valueCode.PtrNum + if op.IsMultipleOpHead() { + return valueCode.BeforeLastCode() + } + ctx.decOpcodeIndex() + return (*Opcode)(unsafe.Pointer(fieldCode)) +} + +func structField(ctx *compileContext, fieldCode *Opcode, valueCode *Opcode, tag *runtime.StructTag) *Opcode { + code := (*Opcode)(unsafe.Pointer(fieldCode)) + op := optimizeStructField(valueCode, tag) + fieldCode.Op = op + fieldCode.PtrNum = valueCode.PtrNum + fieldCode.Mask = valueCode.Mask + fieldCode.RshiftNum = valueCode.RshiftNum + fieldCode.Jmp = valueCode.Jmp + if op.IsMultipleOpField() { + return valueCode.BeforeLastCode() + } + ctx.decIndex() + return code +} + +func isNotExistsField(head *Opcode) bool { + if head == nil { + return false + } + if head.Op != OpStructHead { + return false + } + if !head.AnonymousHead { + return false + } + if head.Next == nil { + return false + } + if head.NextField == nil { + return false + } + if head.NextField.Op != OpStructAnonymousEnd { + return false + } + if head.Next.Op == OpStructAnonymousEnd { + return true + } + if head.Next.Op.CodeType() != CodeStructField { + return false + } + return isNotExistsField(head.Next) +} + +func optimizeAnonymousFields(head *Opcode) { + code := head + var prev *Opcode + removedFields := map[*Opcode]struct{}{} + for { + if code.Op == OpStructEnd { + break + } + if code.Op == OpStructField { + codeType := code.Next.Op.CodeType() + if codeType == CodeStructField { + if isNotExistsField(code.Next) { + code.Next = code.NextField + diff := code.Next.DisplayIdx - code.DisplayIdx + for i := 0; i < diff; i++ { + code.Next.decOpcodeIndex() + } + linkPrevToNextField(code, removedFields) + code = prev + } + } + } + prev = code + code = code.NextField + } +} + +type structFieldPair struct { + prevField *Opcode + curField *Opcode + isTaggedKey bool + linked bool +} + +func anonymousStructFieldPairMap(tags runtime.StructTags, named string, valueCode *Opcode) map[string][]structFieldPair { + anonymousFields := map[string][]structFieldPair{} + f := valueCode + var prevAnonymousField *Opcode + removedFields := map[*Opcode]struct{}{} + for { + existsKey := tags.ExistsKey(f.DisplayKey) + isHeadOp := strings.Contains(f.Op.String(), "Head") + if existsKey && strings.Contains(f.Op.String(), "Recursive") { + // through + } else if isHeadOp && !f.AnonymousHead { + if existsKey { + // TODO: need to remove this head + f.Op = OpStructHead + f.AnonymousKey = true + f.AnonymousHead = true + } else if named == "" { + f.AnonymousHead = true + } + } else if named == "" && f.Op == OpStructEnd { + f.Op = OpStructAnonymousEnd + } else if existsKey { + diff := f.NextField.DisplayIdx - f.DisplayIdx + for i := 0; i < diff; i++ { + f.NextField.decOpcodeIndex() + } + linkPrevToNextField(f, removedFields) + } + + if f.DisplayKey == "" { + if f.NextField == nil { + break + } + prevAnonymousField = f + f = f.NextField + continue + } + + key := fmt.Sprintf("%s.%s", named, f.DisplayKey) + anonymousFields[key] = append(anonymousFields[key], structFieldPair{ + prevField: prevAnonymousField, + curField: f, + isTaggedKey: f.IsTaggedKey, + }) + if f.Next != nil && f.NextField != f.Next && f.Next.Op.CodeType() == CodeStructField { + for k, v := range anonymousFieldPairRecursively(named, f.Next) { + anonymousFields[k] = append(anonymousFields[k], v...) + } + } + if f.NextField == nil { + break + } + prevAnonymousField = f + f = f.NextField + } + return anonymousFields +} + +func anonymousFieldPairRecursively(named string, valueCode *Opcode) map[string][]structFieldPair { + anonymousFields := map[string][]structFieldPair{} + f := valueCode + var prevAnonymousField *Opcode + for { + if f.DisplayKey != "" && f.AnonymousHead { + key := fmt.Sprintf("%s.%s", named, f.DisplayKey) + anonymousFields[key] = append(anonymousFields[key], structFieldPair{ + prevField: prevAnonymousField, + curField: f, + isTaggedKey: f.IsTaggedKey, + }) + if f.Next != nil && f.NextField != f.Next && f.Next.Op.CodeType() == CodeStructField { + for k, v := range anonymousFieldPairRecursively(named, f.Next) { + anonymousFields[k] = append(anonymousFields[k], v...) + } + } + } + if f.NextField == nil { + break + } + prevAnonymousField = f + f = f.NextField + } + return anonymousFields +} + +func optimizeConflictAnonymousFields(anonymousFields map[string][]structFieldPair) { + removedFields := map[*Opcode]struct{}{} + for _, fieldPairs := range anonymousFields { + if len(fieldPairs) == 1 { + continue + } + // conflict anonymous fields + taggedPairs := []structFieldPair{} + for _, fieldPair := range fieldPairs { + if fieldPair.isTaggedKey { + taggedPairs = append(taggedPairs, fieldPair) + } else { + if !fieldPair.linked { + if fieldPair.prevField == nil { + // head operation + fieldPair.curField.Op = OpStructHead + fieldPair.curField.AnonymousHead = true + fieldPair.curField.AnonymousKey = true + } else { + diff := fieldPair.curField.NextField.DisplayIdx - fieldPair.curField.DisplayIdx + for i := 0; i < diff; i++ { + fieldPair.curField.NextField.decOpcodeIndex() + } + removedFields[fieldPair.curField] = struct{}{} + linkPrevToNextField(fieldPair.curField, removedFields) + } + fieldPair.linked = true + } + } + } + if len(taggedPairs) > 1 { + for _, fieldPair := range taggedPairs { + if !fieldPair.linked { + if fieldPair.prevField == nil { + // head operation + fieldPair.curField.Op = OpStructHead + fieldPair.curField.AnonymousHead = true + fieldPair.curField.AnonymousKey = true + } else { + diff := fieldPair.curField.NextField.DisplayIdx - fieldPair.curField.DisplayIdx + removedFields[fieldPair.curField] = struct{}{} + for i := 0; i < diff; i++ { + fieldPair.curField.NextField.decOpcodeIndex() + } + linkPrevToNextField(fieldPair.curField, removedFields) + } + fieldPair.linked = true + } + } + } else { + for _, fieldPair := range taggedPairs { + fieldPair.curField.IsTaggedKey = false + } + } + } +} + +func isNilableType(typ *runtime.Type) bool { + switch typ.Kind() { + case reflect.Ptr: + return true + case reflect.Interface: + return true + case reflect.Slice: + return true + case reflect.Map: + return true + default: + return false + } +} + +func compileStruct(ctx *compileContext, isPtr bool) (*Opcode, error) { + if code := compiledCode(ctx); code != nil { + return code, nil + } + typ := ctx.typ + typeptr := uintptr(unsafe.Pointer(typ)) + compiled := &CompiledCode{} + ctx.structTypeToCompiledCode[typeptr] = compiled + // header => code => structField => code => end + // ^ | + // |__________| + fieldNum := typ.NumField() + indirect := runtime.IfaceIndir(typ) + fieldIdx := 0 + disableIndirectConversion := false + var ( + head *Opcode + code *Opcode + prevField *Opcode + ) + ctx = ctx.incIndent() + tags := runtime.StructTags{} + anonymousFields := map[string][]structFieldPair{} + for i := 0; i < fieldNum; i++ { + field := typ.Field(i) + if runtime.IsIgnoredStructField(field) { + continue + } + tags = append(tags, runtime.StructTagFromField(field)) + } + for i, tag := range tags { + field := tag.Field + fieldType := runtime.Type2RType(field.Type) + fieldOpcodeIndex := ctx.opcodeIndex + fieldPtrIndex := ctx.ptrIndex + ctx.incIndex() + + nilcheck := true + addrForMarshaler := false + isIndirectSpecialCase := isPtr && i == 0 && fieldNum == 1 + isNilableType := isNilableType(fieldType) + + var valueCode *Opcode + switch { + case isIndirectSpecialCase && !isNilableType && isPtrMarshalJSONType(fieldType): + // *struct{ field T } => struct { field *T } + // func (*T) MarshalJSON() ([]byte, error) + // move pointer position from head to first field + code, err := compileMarshalJSON(ctx.withType(runtime.PtrTo(fieldType))) + if err != nil { + return nil, err + } + valueCode = code + nilcheck = false + indirect = false + disableIndirectConversion = true + case isIndirectSpecialCase && !isNilableType && isPtrMarshalTextType(fieldType): + // *struct{ field T } => struct { field *T } + // func (*T) MarshalText() ([]byte, error) + // move pointer position from head to first field + code, err := compileMarshalText(ctx.withType(runtime.PtrTo(fieldType))) + if err != nil { + return nil, err + } + valueCode = code + nilcheck = false + indirect = false + disableIndirectConversion = true + case isPtr && isPtrMarshalJSONType(fieldType): + // *struct{ field T } + // func (*T) MarshalJSON() ([]byte, error) + code, err := compileMarshalJSON(ctx.withType(fieldType)) + if err != nil { + return nil, err + } + addrForMarshaler = true + nilcheck = false + valueCode = code + case isPtr && isPtrMarshalTextType(fieldType): + // *struct{ field T } + // func (*T) MarshalText() ([]byte, error) + code, err := compileMarshalText(ctx.withType(fieldType)) + if err != nil { + return nil, err + } + addrForMarshaler = true + nilcheck = false + valueCode = code + default: + code, err := compile(ctx.withType(fieldType), isPtr) + if err != nil { + return nil, err + } + valueCode = code + } + + if field.Anonymous { + tagKey := "" + if tag.IsTaggedKey { + tagKey = tag.Key + } + for k, v := range anonymousStructFieldPairMap(tags, tagKey, valueCode) { + anonymousFields[k] = append(anonymousFields[k], v...) + } + valueCode.decIndent() + + // fix issue144 + if !(isPtr && strings.Contains(valueCode.Op.String(), "Marshal")) { + valueCode.Indirect = indirect + } + } else { + valueCode.Indirect = indirect + } + key := fmt.Sprintf(`"%s":`, tag.Key) + escapedKey := fmt.Sprintf(`%s:`, string(AppendEscapedString([]byte{}, tag.Key))) + fieldCode := &Opcode{ + Type: valueCode.Type, + DisplayIdx: fieldOpcodeIndex, + Idx: opcodeOffset(fieldPtrIndex), + Next: valueCode, + Indent: ctx.indent, + AnonymousKey: field.Anonymous, + Key: []byte(key), + EscapedKey: []byte(escapedKey), + IsTaggedKey: tag.IsTaggedKey, + DisplayKey: tag.Key, + Offset: field.Offset, + Indirect: indirect, + Nilcheck: nilcheck, + AddrForMarshaler: addrForMarshaler, + } + if fieldIdx == 0 { + fieldCode.HeadIdx = fieldCode.Idx + code = structHeader(ctx, fieldCode, valueCode, tag) + head = fieldCode + prevField = fieldCode + } else { + fieldCode.HeadIdx = head.HeadIdx + code.Next = fieldCode + code = structField(ctx, fieldCode, valueCode, tag) + prevField.NextField = fieldCode + fieldCode.PrevField = prevField + prevField = fieldCode + } + fieldIdx++ + } + ctx = ctx.decIndent() + + structEndCode := &Opcode{ + Op: OpStructEnd, + Type: nil, + Indent: ctx.indent, + Next: newEndOp(ctx), + } + + // no struct field + if head == nil { + head = &Opcode{ + Op: OpStructHead, + Type: typ, + DisplayIdx: ctx.opcodeIndex, + Idx: opcodeOffset(ctx.ptrIndex), + HeadIdx: opcodeOffset(ctx.ptrIndex), + Indent: ctx.indent, + NextField: structEndCode, + } + structEndCode.PrevField = head + ctx.incIndex() + code = head + } + + structEndCode.DisplayIdx = ctx.opcodeIndex + structEndCode.Idx = opcodeOffset(ctx.ptrIndex) + ctx.incIndex() + + if prevField != nil && prevField.NextField == nil { + prevField.NextField = structEndCode + structEndCode.PrevField = prevField + } + + head.End = structEndCode + code.Next = structEndCode + optimizeConflictAnonymousFields(anonymousFields) + optimizeAnonymousFields(head) + ret := (*Opcode)(unsafe.Pointer(head)) + compiled.Code = ret + + delete(ctx.structTypeToCompiledCode, typeptr) + + if !disableIndirectConversion && !head.Indirect && isPtr { + head.Indirect = true + } + + return ret, nil +} + +func isPtrMarshalJSONType(typ *runtime.Type) bool { + return !typ.Implements(marshalJSONType) && runtime.PtrTo(typ).Implements(marshalJSONType) +} + +func isPtrMarshalTextType(typ *runtime.Type) bool { + return !typ.Implements(marshalTextType) && runtime.PtrTo(typ).Implements(marshalTextType) +} diff --git a/internal/encoder/compiler/compiler.go b/internal/encoder/compiler/compiler.go deleted file mode 100644 index a20d4fe..0000000 --- a/internal/encoder/compiler/compiler.go +++ /dev/null @@ -1 +0,0 @@ -package compiler diff --git a/internal/encoder/compiler/norace.go b/internal/encoder/compiler/norace.go deleted file mode 100644 index 8299204..0000000 --- a/internal/encoder/compiler/norace.go +++ /dev/null @@ -1,11 +0,0 @@ -// +build !race - -package compiler - -import ( - "github.com/goccy/go-json/internal/encoder" -) - -func CompileToGetCodeSet(typeptr uintptr) (*encoder.OpcodeSet, error) { - return nil, nil -} diff --git a/internal/encoder/compiler_norace.go b/internal/encoder/compiler_norace.go new file mode 100644 index 0000000..f500779 --- /dev/null +++ b/internal/encoder/compiler_norace.go @@ -0,0 +1,38 @@ +// +build !race + +package encoder + +import ( + "unsafe" + + "github.com/goccy/go-json/internal/runtime" +) + +func CompileToGetCodeSet(typeptr uintptr) (*OpcodeSet, error) { + if typeptr > typeAddr.MaxTypeAddr { + return compileToGetCodeSetSlowPath(typeptr) + } + index := typeptr - typeAddr.BaseTypeAddr + if codeSet := cachedOpcodeSets[index]; codeSet != nil { + return codeSet, nil + } + + // noescape trick for header.typ ( reflect.*rtype ) + copiedType := *(**runtime.Type)(unsafe.Pointer(&typeptr)) + + code, err := compileHead(&compileContext{ + typ: copiedType, + structTypeToCompiledCode: map[uintptr]*CompiledCode{}, + }) + if err != nil { + return nil, err + } + code = copyOpcode(code) + codeLength := code.TotalLength() + codeSet := &OpcodeSet{ + Code: code, + CodeLength: codeLength, + } + cachedOpcodeSets[index] = codeSet + return codeSet, nil +} diff --git a/internal/encoder/compiler_race.go b/internal/encoder/compiler_race.go new file mode 100644 index 0000000..285b0ba --- /dev/null +++ b/internal/encoder/compiler_race.go @@ -0,0 +1,46 @@ +// +build race + +package encoder + +import ( + "sync" + "unsafe" + + "github.com/goccy/go-json/internal/runtime" +) + +var setsMu sync.RWMutex + +func CompileToGetCodeSet(typeptr uintptr) (*opcodeSet, error) { + if typeptr > maxTypeAddr { + return compileToGetCodeSetSlowPath(typeptr) + } + index := typeptr - baseTypeAddr + setsMu.RLock() + if codeSet := cachedOpcodeSets[index]; codeSet != nil { + setsMu.RUnlock() + return codeSet, nil + } + setsMu.RUnlock() + + // noescape trick for header.typ ( reflect.*rtype ) + copiedType := *(**runtime.Type)(unsafe.Pointer(&typeptr)) + + code, err := compileHead(&compileContext{ + typ: copiedType, + structTypeToCompiledCode: map[uintptr]*CompiledCode{}, + }) + if err != nil { + return nil, err + } + code = copyOpcode(code) + codeLength := code.TotalLength() + codeSet := &opcodeSet{ + Code: code, + CodeLength: codeLength, + } + setsMu.Lock() + cachedOpcodeSets[index] = codeSet + setsMu.Unlock() + return codeSet, nil +} diff --git a/internal/encoder/context.go b/internal/encoder/context.go new file mode 100644 index 0000000..d21c008 --- /dev/null +++ b/internal/encoder/context.go @@ -0,0 +1,82 @@ +package encoder + +import ( + "github.com/goccy/go-json/internal/runtime" +) + +type compileContext struct { + typ *runtime.Type + opcodeIndex int + ptrIndex int + indent int + structTypeToCompiledCode map[uintptr]*CompiledCode + + parent *compileContext +} + +func (c *compileContext) context() *compileContext { + return &compileContext{ + typ: c.typ, + opcodeIndex: c.opcodeIndex, + ptrIndex: c.ptrIndex, + indent: c.indent, + structTypeToCompiledCode: c.structTypeToCompiledCode, + parent: c, + } +} + +func (c *compileContext) withType(typ *runtime.Type) *compileContext { + ctx := c.context() + ctx.typ = typ + return ctx +} + +func (c *compileContext) incIndent() *compileContext { + ctx := c.context() + ctx.indent++ + return ctx +} + +func (c *compileContext) decIndent() *compileContext { + ctx := c.context() + ctx.indent-- + return ctx +} + +func (c *compileContext) incIndex() { + c.incOpcodeIndex() + c.incPtrIndex() +} + +func (c *compileContext) decIndex() { + c.decOpcodeIndex() + c.decPtrIndex() +} + +func (c *compileContext) incOpcodeIndex() { + c.opcodeIndex++ + if c.parent != nil { + c.parent.incOpcodeIndex() + } +} + +func (c *compileContext) decOpcodeIndex() { + c.opcodeIndex-- + if c.parent != nil { + c.parent.decOpcodeIndex() + } +} + +func (c *compileContext) incPtrIndex() { + c.ptrIndex++ + if c.parent != nil { + c.parent.incPtrIndex() + } +} + +func (c *compileContext) decPtrIndex() { + c.ptrIndex-- + if c.parent != nil { + c.parent.decPtrIndex() + } +} diff --git a/internal/encoder/encoder.go b/internal/encoder/encoder.go index 26725fc..89410bf 100644 --- a/internal/encoder/encoder.go +++ b/internal/encoder/encoder.go @@ -24,41 +24,108 @@ const ( UnorderedMapOption ) -type Opcode struct { - Op OpType // operation type - Type *runtime.Type // go type - DisplayIdx int // opcode index - Key []byte // struct field key - EscapedKey []byte // struct field key ( HTML escaped ) - PtrNum int // pointer number: e.g. double pointer is 2. - DisplayKey string // key text to display - IsTaggedKey bool // whether tagged key - AnonymousKey bool // whether anonymous key - AnonymousHead bool // whether anonymous head or not - Indirect bool // whether indirect or not - Nilcheck bool // whether needs to nilcheck or not - AddrForMarshaler bool // whether needs to addr for marshaler or not - RshiftNum uint8 // use to take bit for judging whether negative integer or not - Mask uint64 // mask for number - Indent int // indent number +func (t OpType) IsMultipleOpHead() bool { + switch t { + case OpStructHead: + return true + case OpStructHeadSlice: + return true + case OpStructHeadArray: + return true + case OpStructHeadMap: + return true + case OpStructHeadStruct: + return true + case OpStructHeadOmitEmpty: + return true + case OpStructHeadOmitEmptySlice: + return true + case OpStructHeadStringTagSlice: + return true + case OpStructHeadOmitEmptyArray: + return true + case OpStructHeadStringTagArray: + return true + case OpStructHeadOmitEmptyMap: + return true + case OpStructHeadStringTagMap: + return true + case OpStructHeadOmitEmptyStruct: + return true + case OpStructHeadStringTag: + return true + case OpStructHeadSlicePtr: + return true + case OpStructHeadOmitEmptySlicePtr: + return true + case OpStructHeadStringTagSlicePtr: + return true + case OpStructHeadArrayPtr: + return true + case OpStructHeadOmitEmptyArrayPtr: + return true + case OpStructHeadStringTagArrayPtr: + return true + case OpStructHeadMapPtr: + return true + case OpStructHeadOmitEmptyMapPtr: + return true + case OpStructHeadStringTagMapPtr: + return true + } + return false +} - Idx uintptr // offset to access ptr - HeadIdx uintptr // offset to access slice/struct head - ElemIdx uintptr // offset to access array/slice/map elem - Length uintptr // offset to access slice/map length or array length - MapIter uintptr // offset to access map iterator - MapPos uintptr // offset to access position list for sorted map - Offset uintptr // offset size from struct header - Size uintptr // array/slice elem size - - MapKey *Opcode // map key - MapValue *Opcode // map value - Elem *Opcode // array/slice elem - End *Opcode // array/slice/struct/map end - PrevField *Opcode // prev struct field - NextField *Opcode // next struct field - Next *Opcode // next opcode - Jmp *CompiledCode // for recursive call +func (t OpType) IsMultipleOpField() bool { + switch t { + case OpStructField: + return true + case OpStructFieldSlice: + return true + case OpStructFieldArray: + return true + case OpStructFieldMap: + return true + case OpStructFieldStruct: + return true + case OpStructFieldOmitEmpty: + return true + case OpStructFieldOmitEmptySlice: + return true + case OpStructFieldStringTagSlice: + return true + case OpStructFieldOmitEmptyArray: + return true + case OpStructFieldStringTagArray: + return true + case OpStructFieldOmitEmptyMap: + return true + case OpStructFieldStringTagMap: + return true + case OpStructFieldOmitEmptyStruct: + return true + case OpStructFieldStringTag: + return true + case OpStructFieldSlicePtr: + return true + case OpStructFieldOmitEmptySlicePtr: + return true + case OpStructFieldStringTagSlicePtr: + return true + case OpStructFieldArrayPtr: + return true + case OpStructFieldOmitEmptyArrayPtr: + return true + case OpStructFieldStringTagArrayPtr: + return true + case OpStructFieldMapPtr: + return true + case OpStructFieldOmitEmptyMapPtr: + return true + case OpStructFieldStringTagMapPtr: + return true + } + return false } type OpcodeSet struct { diff --git a/internal/encoder/opcode.go b/internal/encoder/opcode.go new file mode 100644 index 0000000..674a71b --- /dev/null +++ b/internal/encoder/opcode.go @@ -0,0 +1,647 @@ +package encoder + +import ( + "fmt" + "math" + "strings" + "unsafe" + + "github.com/goccy/go-json/internal/runtime" +) + +const uintptrSize = 4 << (^uintptr(0) >> 63) + +type Opcode struct { + Op OpType // operation type + Type *runtime.Type // go type + DisplayIdx int // opcode index + Key []byte // struct field key + EscapedKey []byte // struct field key ( HTML escaped ) + PtrNum int // pointer number: e.g. double pointer is 2. + DisplayKey string // key text to display + IsTaggedKey bool // whether tagged key + AnonymousKey bool // whether anonymous key + AnonymousHead bool // whether anonymous head or not + Indirect bool // whether indirect or not + Nilcheck bool // whether needs to nilcheck or not + AddrForMarshaler bool // whether needs to addr for marshaler or not + RshiftNum uint8 // use to take bit for judging whether negative integer or not + Mask uint64 // mask for number + Indent int // indent number + + Idx uintptr // offset to access ptr + HeadIdx uintptr // offset to access slice/struct head + ElemIdx uintptr // offset to access array/slice/map elem + Length uintptr // offset to access slice/map length or array length + MapIter uintptr // offset to access map iterator + MapPos uintptr // offset to access position list for sorted map + Offset uintptr // offset size from struct header + Size uintptr // array/slice elem size + + MapKey *Opcode // map key + MapValue *Opcode // map value + Elem *Opcode // array/slice elem + End *Opcode // array/slice/struct/map end + PrevField *Opcode // prev struct field + NextField *Opcode // next struct field + Next *Opcode // next opcode + Jmp *CompiledCode // for recursive call +} + +func rshitNum(bitSize uint8) uint8 { + return bitSize - 1 +} + +func (c *Opcode) setMaskAndRshiftNum(bitSize uint8) { + switch bitSize { + case 8: + c.Mask = math.MaxUint8 + case 16: + c.Mask = math.MaxUint16 + case 32: + c.Mask = math.MaxUint32 + case 64: + c.Mask = math.MaxUint64 + } + c.RshiftNum = rshitNum(bitSize) +} + +func (c *Opcode) ToHeaderType() OpType { + switch c.Op { + case OpInt: + return OpStructHeadInt + case OpIntPtr: + return OpStructHeadIntPtr + case OpUint: + return OpStructHeadUint + case OpUintPtr: + return OpStructHeadUintPtr + case OpFloat32: + return OpStructHeadFloat32 + case OpFloat32Ptr: + return OpStructHeadFloat32Ptr + case OpFloat64: + return OpStructHeadFloat64 + case OpFloat64Ptr: + return OpStructHeadFloat64Ptr + case OpString: + return OpStructHeadString + case OpStringPtr: + return OpStructHeadStringPtr + case OpNumber: + return OpStructHeadNumber + case OpNumberPtr: + return OpStructHeadNumberPtr + case OpBool: + return OpStructHeadBool + case OpBoolPtr: + return OpStructHeadBoolPtr + case OpMap: + return OpStructHeadMap + case OpMapPtr: + c.Op = OpMap + return OpStructHeadMapPtr + case OpArray: + return OpStructHeadArray + case OpArrayPtr: + c.Op = OpArray + return OpStructHeadArrayPtr + case OpSlice: + return OpStructHeadSlice + case OpSlicePtr: + c.Op = OpSlice + return OpStructHeadSlicePtr + case OpMarshalJSON: + return OpStructHeadMarshalJSON + case OpMarshalJSONPtr: + return OpStructHeadMarshalJSONPtr + case OpMarshalText: + return OpStructHeadMarshalText + case OpMarshalTextPtr: + return OpStructHeadMarshalTextPtr + } + return OpStructHead +} + +func (c *Opcode) ToFieldType() OpType { + switch c.Op { + case OpInt: + return OpStructFieldInt + case OpIntPtr: + return OpStructFieldIntPtr + case OpUint: + return OpStructFieldUint + case OpUintPtr: + return OpStructFieldUintPtr + case OpFloat32: + return OpStructFieldFloat32 + case OpFloat32Ptr: + return OpStructFieldFloat32Ptr + case OpFloat64: + return OpStructFieldFloat64 + case OpFloat64Ptr: + return OpStructFieldFloat64Ptr + case OpString: + return OpStructFieldString + case OpStringPtr: + return OpStructFieldStringPtr + case OpNumber: + return OpStructFieldNumber + case OpNumberPtr: + return OpStructFieldNumberPtr + case OpBool: + return OpStructFieldBool + case OpBoolPtr: + return OpStructFieldBoolPtr + case OpMap: + return OpStructFieldMap + case OpMapPtr: + c.Op = OpMap + return OpStructFieldMapPtr + case OpArray: + return OpStructFieldArray + case OpArrayPtr: + c.Op = OpArray + return OpStructFieldArrayPtr + case OpSlice: + return OpStructFieldSlice + case OpSlicePtr: + c.Op = OpSlice + return OpStructFieldSlicePtr + case OpMarshalJSON: + return OpStructFieldMarshalJSON + case OpMarshalJSONPtr: + return OpStructFieldMarshalJSONPtr + case OpMarshalText: + return OpStructFieldMarshalText + case OpMarshalTextPtr: + return OpStructFieldMarshalTextPtr + } + return OpStructField +} + +func newOpCode(ctx *compileContext, op OpType) *Opcode { + return newOpCodeWithNext(ctx, op, newEndOp(ctx)) +} + +func opcodeOffset(idx int) uintptr { + return uintptr(idx) * uintptrSize +} + +func copyOpcode(code *Opcode) *Opcode { + codeMap := map[uintptr]*Opcode{} + return code.copy(codeMap) +} + +func newOpCodeWithNext(ctx *compileContext, op OpType, next *Opcode) *Opcode { + return &Opcode{ + Op: op, + Type: ctx.typ, + DisplayIdx: ctx.opcodeIndex, + Indent: ctx.indent, + Idx: opcodeOffset(ctx.ptrIndex), + Next: next, + } +} + +func newEndOp(ctx *compileContext) *Opcode { + return newOpCodeWithNext(ctx, OpEnd, nil) +} + +func (c *Opcode) copy(codeMap map[uintptr]*Opcode) *Opcode { + if c == nil { + return nil + } + addr := uintptr(unsafe.Pointer(c)) + if code, exists := codeMap[addr]; exists { + return code + } + copied := &Opcode{ + Op: c.Op, + Type: c.Type, + DisplayIdx: c.DisplayIdx, + Key: c.Key, + EscapedKey: c.EscapedKey, + DisplayKey: c.DisplayKey, + PtrNum: c.PtrNum, + Mask: c.Mask, + RshiftNum: c.RshiftNum, + IsTaggedKey: c.IsTaggedKey, + AnonymousKey: c.AnonymousKey, + AnonymousHead: c.AnonymousHead, + Indirect: c.Indirect, + Nilcheck: c.Nilcheck, + AddrForMarshaler: c.AddrForMarshaler, + Indent: c.Indent, + Idx: c.Idx, + HeadIdx: c.HeadIdx, + ElemIdx: c.ElemIdx, + Length: c.Length, + MapIter: c.MapIter, + MapPos: c.MapPos, + Offset: c.Offset, + Size: c.Size, + } + codeMap[addr] = copied + copied.MapKey = c.MapKey.copy(codeMap) + copied.MapValue = c.MapValue.copy(codeMap) + copied.Elem = c.Elem.copy(codeMap) + copied.End = c.End.copy(codeMap) + copied.PrevField = c.PrevField.copy(codeMap) + copied.NextField = c.NextField.copy(codeMap) + copied.Next = c.Next.copy(codeMap) + copied.Jmp = c.Jmp + return copied +} + +func (c *Opcode) BeforeLastCode() *Opcode { + code := c + for { + var nextCode *Opcode + switch code.Op.CodeType() { + case CodeArrayElem, CodeSliceElem, CodeMapKey: + nextCode = code.End + default: + nextCode = code.Next + } + if nextCode.Op == OpEnd { + return code + } + code = nextCode + } +} + +func (c *Opcode) TotalLength() int { + var idx int + for code := c; code.Op != OpEnd; { + idx = int(code.Idx / uintptrSize) + if code.Op == OpStructFieldRecursiveEnd { + break + } + switch code.Op.CodeType() { + case CodeArrayElem, CodeSliceElem, CodeMapKey: + code = code.End + default: + code = code.Next + } + } + return idx + 2 // opEnd + 1 +} + +func (c *Opcode) decOpcodeIndex() { + for code := c; code.Op != OpEnd; { + code.DisplayIdx-- + code.Idx -= uintptrSize + if code.HeadIdx > 0 { + code.HeadIdx -= uintptrSize + } + if code.ElemIdx > 0 { + code.ElemIdx -= uintptrSize + } + if code.MapIter > 0 { + code.MapIter -= uintptrSize + } + if code.Length > 0 && code.Op.CodeType() != CodeArrayHead && code.Op.CodeType() != CodeArrayElem { + code.Length -= uintptrSize + } + switch code.Op.CodeType() { + case CodeArrayElem, CodeSliceElem, CodeMapKey: + code = code.End + default: + code = code.Next + } + } +} + +func (c *Opcode) decIndent() { + for code := c; code.Op != OpEnd; { + code.Indent-- + switch code.Op.CodeType() { + case CodeArrayElem, CodeSliceElem, CodeMapKey: + code = code.End + default: + code = code.Next + } + } +} + +func (c *Opcode) dumpHead(code *Opcode) string { + var length uintptr + if code.Op.CodeType() == CodeArrayHead { + length = code.Length + } else { + length = code.Length / uintptrSize + } + return fmt.Sprintf( + `[%d]%s%s ([idx:%d][headIdx:%d][elemIdx:%d][length:%d])`, + code.DisplayIdx, + strings.Repeat("-", code.Indent), + code.Op, + code.Idx/uintptrSize, + code.HeadIdx/uintptrSize, + code.ElemIdx/uintptrSize, + length, + ) +} + +func (c *Opcode) dumpMapHead(code *Opcode) string { + return fmt.Sprintf( + `[%d]%s%s ([idx:%d][headIdx:%d][elemIdx:%d][length:%d][mapIter:%d])`, + code.DisplayIdx, + strings.Repeat("-", code.Indent), + code.Op, + code.Idx/uintptrSize, + code.HeadIdx/uintptrSize, + code.ElemIdx/uintptrSize, + code.Length/uintptrSize, + code.MapIter/uintptrSize, + ) +} + +func (c *Opcode) dumpMapEnd(code *Opcode) string { + return fmt.Sprintf( + `[%d]%s%s ([idx:%d][mapPos:%d][length:%d])`, + code.DisplayIdx, + strings.Repeat("-", code.Indent), + code.Op, + code.Idx/uintptrSize, + code.MapPos/uintptrSize, + code.Length/uintptrSize, + ) +} + +func (c *Opcode) dumpElem(code *Opcode) string { + var length uintptr + if code.Op.CodeType() == CodeArrayElem { + length = code.Length + } else { + length = code.Length / uintptrSize + } + return fmt.Sprintf( + `[%d]%s%s ([idx:%d][headIdx:%d][elemIdx:%d][length:%d][size:%d])`, + code.DisplayIdx, + strings.Repeat("-", code.Indent), + code.Op, + code.Idx/uintptrSize, + code.HeadIdx/uintptrSize, + code.ElemIdx/uintptrSize, + length, + code.Size, + ) +} + +func (c *Opcode) dumpField(code *Opcode) string { + return fmt.Sprintf( + `[%d]%s%s ([idx:%d][key:%s][offset:%d][headIdx:%d])`, + code.DisplayIdx, + strings.Repeat("-", code.Indent), + code.Op, + code.Idx/uintptrSize, + code.DisplayKey, + code.Offset, + code.HeadIdx/uintptrSize, + ) +} + +func (c *Opcode) dumpKey(code *Opcode) string { + return fmt.Sprintf( + `[%d]%s%s ([idx:%d][elemIdx:%d][length:%d][mapIter:%d])`, + code.DisplayIdx, + strings.Repeat("-", code.Indent), + code.Op, + code.Idx/uintptrSize, + code.ElemIdx/uintptrSize, + code.Length/uintptrSize, + code.MapIter/uintptrSize, + ) +} + +func (c *Opcode) dumpValue(code *Opcode) string { + return fmt.Sprintf( + `[%d]%s%s ([idx:%d][mapIter:%d])`, + code.DisplayIdx, + strings.Repeat("-", code.Indent), + code.Op, + code.Idx/uintptrSize, + code.MapIter/uintptrSize, + ) +} + +func (c *Opcode) dump() string { + codes := []string{} + for code := c; code.Op != OpEnd; { + switch code.Op.CodeType() { + case CodeSliceHead: + codes = append(codes, c.dumpHead(code)) + code = code.Next + case CodeMapHead: + codes = append(codes, c.dumpMapHead(code)) + code = code.Next + case CodeArrayElem, CodeSliceElem: + codes = append(codes, c.dumpElem(code)) + code = code.End + case CodeMapKey: + codes = append(codes, c.dumpKey(code)) + code = code.End + case CodeMapValue: + codes = append(codes, c.dumpValue(code)) + code = code.Next + case CodeMapEnd: + codes = append(codes, c.dumpMapEnd(code)) + code = code.Next + case CodeStructField: + codes = append(codes, c.dumpField(code)) + code = code.Next + case CodeStructEnd: + codes = append(codes, c.dumpField(code)) + code = code.Next + default: + codes = append(codes, fmt.Sprintf( + "[%d]%s%s ([idx:%d])", + code.DisplayIdx, + strings.Repeat("-", code.Indent), + code.Op, + code.Idx/uintptrSize, + )) + code = code.Next + } + } + return strings.Join(codes, "\n") +} + +func prevField(code *Opcode, removedFields map[*Opcode]struct{}) *Opcode { + if _, exists := removedFields[code]; exists { + return prevField(code.PrevField, removedFields) + } + return code +} + +func nextField(code *Opcode, removedFields map[*Opcode]struct{}) *Opcode { + if _, exists := removedFields[code]; exists { + return nextField(code.NextField, removedFields) + } + return code +} + +func linkPrevToNextField(cur *Opcode, removedFields map[*Opcode]struct{}) { + prev := prevField(cur.PrevField, removedFields) + prev.NextField = nextField(cur.NextField, removedFields) + code := prev + fcode := cur + for { + var nextCode *Opcode + switch code.Op.CodeType() { + case CodeArrayElem, CodeSliceElem, CodeMapKey: + nextCode = code.End + default: + nextCode = code.Next + } + if nextCode == fcode { + code.Next = fcode.Next + break + } else if nextCode.Op == OpEnd { + break + } + code = nextCode + } +} + +func newSliceHeaderCode(ctx *compileContext) *Opcode { + idx := opcodeOffset(ctx.ptrIndex) + ctx.incPtrIndex() + elemIdx := opcodeOffset(ctx.ptrIndex) + ctx.incPtrIndex() + length := opcodeOffset(ctx.ptrIndex) + return &Opcode{ + Op: OpSlice, + DisplayIdx: ctx.opcodeIndex, + Idx: idx, + HeadIdx: idx, + ElemIdx: elemIdx, + Length: length, + Indent: ctx.indent, + } +} + +func newSliceElemCode(ctx *compileContext, head *Opcode, size uintptr) *Opcode { + return &Opcode{ + Op: OpSliceElem, + DisplayIdx: ctx.opcodeIndex, + Idx: opcodeOffset(ctx.ptrIndex), + HeadIdx: head.Idx, + ElemIdx: head.ElemIdx, + Length: head.Length, + Indent: ctx.indent, + Size: size, + } +} + +func newArrayHeaderCode(ctx *compileContext, alen int) *Opcode { + idx := opcodeOffset(ctx.ptrIndex) + ctx.incPtrIndex() + elemIdx := opcodeOffset(ctx.ptrIndex) + return &Opcode{ + Op: OpArray, + DisplayIdx: ctx.opcodeIndex, + Idx: idx, + HeadIdx: idx, + ElemIdx: elemIdx, + Indent: ctx.indent, + Length: uintptr(alen), + } +} + +func newArrayElemCode(ctx *compileContext, head *Opcode, length int, size uintptr) *Opcode { + return &Opcode{ + Op: OpArrayElem, + DisplayIdx: ctx.opcodeIndex, + Idx: opcodeOffset(ctx.ptrIndex), + ElemIdx: head.ElemIdx, + HeadIdx: head.HeadIdx, + Length: uintptr(length), + Indent: ctx.indent, + Size: size, + } +} + +func newMapHeaderCode(ctx *compileContext) *Opcode { + idx := opcodeOffset(ctx.ptrIndex) + ctx.incPtrIndex() + elemIdx := opcodeOffset(ctx.ptrIndex) + ctx.incPtrIndex() + length := opcodeOffset(ctx.ptrIndex) + ctx.incPtrIndex() + mapIter := opcodeOffset(ctx.ptrIndex) + return &Opcode{ + Op: OpMap, + Type: ctx.typ, + DisplayIdx: ctx.opcodeIndex, + Idx: idx, + ElemIdx: elemIdx, + Length: length, + MapIter: mapIter, + Indent: ctx.indent, + } +} + +func newMapKeyCode(ctx *compileContext, head *Opcode) *Opcode { + return &Opcode{ + Op: OpMapKey, + DisplayIdx: ctx.opcodeIndex, + Idx: opcodeOffset(ctx.ptrIndex), + ElemIdx: head.ElemIdx, + Length: head.Length, + MapIter: head.MapIter, + Indent: ctx.indent, + } +} + +func newMapValueCode(ctx *compileContext, head *Opcode) *Opcode { + return &Opcode{ + Op: OpMapValue, + DisplayIdx: ctx.opcodeIndex, + Idx: opcodeOffset(ctx.ptrIndex), + ElemIdx: head.ElemIdx, + Length: head.Length, + MapIter: head.MapIter, + Indent: ctx.indent, + } +} + +func newMapEndCode(ctx *compileContext, head *Opcode) *Opcode { + mapPos := opcodeOffset(ctx.ptrIndex) + ctx.incPtrIndex() + idx := opcodeOffset(ctx.ptrIndex) + return &Opcode{ + Op: OpMapEnd, + DisplayIdx: ctx.opcodeIndex, + Idx: idx, + Length: head.Length, + MapPos: mapPos, + Indent: ctx.indent, + Next: newEndOp(ctx), + } +} + +func newInterfaceCode(ctx *compileContext) *Opcode { + return &Opcode{ + Op: OpInterface, + Type: ctx.typ, + DisplayIdx: ctx.opcodeIndex, + Idx: opcodeOffset(ctx.ptrIndex), + Indent: ctx.indent, + Next: newEndOp(ctx), + } +} + +func newRecursiveCode(ctx *compileContext, jmp *CompiledCode) *Opcode { + return &Opcode{ + Op: OpStructFieldRecursive, + Type: ctx.typ, + DisplayIdx: ctx.opcodeIndex, + Idx: opcodeOffset(ctx.ptrIndex), + Indent: ctx.indent, + Next: newEndOp(ctx), + Jmp: jmp, + } +} diff --git a/internal/encoder/vm/vm.go b/internal/encoder/vm/vm.go index e7fddd4..ee8de9e 100644 --- a/internal/encoder/vm/vm.go +++ b/internal/encoder/vm/vm.go @@ -9,11 +9,10 @@ import ( "unsafe" "github.com/goccy/go-json/internal/encoder" - "github.com/goccy/go-json/internal/encoder/compiler" "github.com/goccy/go-json/internal/runtime" ) -const uintptrSize = 4 << (^uintptr(0) >> 63) // unsafe.Sizeof(uintptr(0)) but an ideal const +const uintptrSize = 4 << (^uintptr(0) >> 63) var ( load = encoder.Load @@ -173,7 +172,7 @@ func Run(ctx *encoder.RuntimeContext, b []byte, codeSet *encoder.OpcodeSet, opt break } ctx.KeepRefs = append(ctx.KeepRefs, unsafe.Pointer(iface)) - ifaceCodeSet, err := compiler.CompileToGetCodeSet(uintptr(unsafe.Pointer(iface.typ))) + ifaceCodeSet, err := encoder.CompileToGetCodeSet(uintptr(unsafe.Pointer(iface.typ))) if err != nil { return nil, err } diff --git a/internal/runtime/struct_field.go b/internal/runtime/struct_field.go new file mode 100644 index 0000000..7700eb2 --- /dev/null +++ b/internal/runtime/struct_field.go @@ -0,0 +1,81 @@ +package runtime + +import ( + "reflect" + "strings" + "unicode" +) + +func getTag(field reflect.StructField) string { + return field.Tag.Get("json") +} + +func IsIgnoredStructField(field reflect.StructField) bool { + if field.PkgPath != "" { + if field.Anonymous { + if !(field.Type.Kind() == reflect.Ptr && field.Type.Elem().Kind() == reflect.Struct) && field.Type.Kind() != reflect.Struct { + return true + } + } else { + // private field + return true + } + } + tag := getTag(field) + return tag == "-" +} + +type StructTag struct { + Key string + IsTaggedKey bool + IsOmitEmpty bool + IsString bool + Field reflect.StructField +} + +type StructTags []*StructTag + +func (t StructTags) ExistsKey(key string) bool { + for _, tt := range t { + if tt.Key == key { + return true + } + } + return false +} + +func isValidTag(s string) bool { + if s == "" { + return false + } + for _, c := range s { + switch { + case strings.ContainsRune("!#$%&()*+-./:<=>?@[]^_{|}~ ", c): + // Backslash and quote chars are reserved, but + // otherwise any punctuation chars are allowed + // in a tag name. + case !unicode.IsLetter(c) && !unicode.IsDigit(c): + return false + } + } + return true +} + +func StructTagFromField(field reflect.StructField) *StructTag { + keyName := field.Name + tag := getTag(field) + st := &StructTag{Field: field} + opts := strings.Split(tag, ",") + if len(opts) > 0 { + if opts[0] != "" && isValidTag(opts[0]) { + keyName = opts[0] + st.IsTaggedKey = true + } + } + st.Key = keyName + if len(opts) > 1 { + st.IsOmitEmpty = opts[1] == "omitempty" + st.IsString = opts[1] == "string" + } + return st +} diff --git a/internal/runtime/type.go b/internal/runtime/type.go index fd425f7..88d05e1 100644 --- a/internal/runtime/type.go +++ b/internal/runtime/type.go @@ -1,9 +1,87 @@ package runtime -import "unsafe" +import ( + "reflect" + "unsafe" +) type SliceHeader struct { Data unsafe.Pointer Len int Cap int } + +const ( + maxAcceptableTypeAddrRange = 1024 * 1024 * 2 // 2 Mib +) + +type TypeAddr struct { + BaseTypeAddr uintptr + MaxTypeAddr uintptr + AddrRange uintptr +} + +var ( + typeAddr *TypeAddr + alreadyAnalyzed bool +) + +//go:linkname typelinks reflect.typelinks +func typelinks() ([]unsafe.Pointer, [][]int32) + +//go:linkname rtypeOff reflect.rtypeOff +func rtypeOff(unsafe.Pointer, int32) unsafe.Pointer + +func AnalyzeTypeAddr() *TypeAddr { + defer func() { + alreadyAnalyzed = true + }() + if alreadyAnalyzed { + return typeAddr + } + sections, offsets := typelinks() + if len(sections) != 1 { + return nil + } + if len(offsets) != 1 { + return nil + } + section := sections[0] + offset := offsets[0] + var ( + min uintptr = uintptr(^uint(0)) + max uintptr = 0 + ) + for i := 0; i < len(offset); i++ { + typ := (*Type)(rtypeOff(section, offset[i])) + addr := uintptr(unsafe.Pointer(typ)) + if min > addr { + min = addr + } + if max < addr { + max = addr + } + if typ.Kind() == reflect.Ptr { + addr = uintptr(unsafe.Pointer(typ.Elem())) + if min > addr { + min = addr + } + if max < addr { + max = addr + } + } + } + addrRange := max - min + if addrRange == 0 { + return nil + } + if addrRange > maxAcceptableTypeAddrRange { + return nil + } + typeAddr = &TypeAddr{ + BaseTypeAddr: min, + MaxTypeAddr: max, + AddrRange: addrRange, + } + return typeAddr +}