From 15b28f80fd6ddb02b31f1dc68448340f812fb682 Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Thu, 27 Aug 2020 21:00:49 +0900 Subject: [PATCH 1/5] Copy unquoteBytes from encoding/json and use it for UnmarshalText --- decode_unmarshal_text.go | 146 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 146 insertions(+) diff --git a/decode_unmarshal_text.go b/decode_unmarshal_text.go index 73ea335..17b00a8 100644 --- a/decode_unmarshal_text.go +++ b/decode_unmarshal_text.go @@ -2,6 +2,9 @@ package json import ( "encoding" + "unicode" + "unicode/utf16" + "unicode/utf8" "unsafe" ) @@ -20,6 +23,9 @@ func (d *unmarshalTextDecoder) decodeStream(s *stream, p uintptr) error { return err } src := s.buf[start:s.cursor] + if s, ok := unquoteBytes(src); ok { + src = s + } v := *(*interface{})(unsafe.Pointer(&interfaceHeader{ typ: d.typ, ptr: unsafe.Pointer(p), @@ -38,6 +44,9 @@ func (d *unmarshalTextDecoder) decode(buf []byte, cursor int64, p uintptr) (int6 return 0, err } src := buf[start:end] + if s, ok := unquoteBytes(src); ok { + src = s + } v := *(*interface{})(unsafe.Pointer(&interfaceHeader{ typ: d.typ, ptr: unsafe.Pointer(p), @@ -47,3 +56,140 @@ func (d *unmarshalTextDecoder) decode(buf []byte, cursor int64, p uintptr) (int6 } return end, nil } + +func unquoteBytes(s []byte) (t []byte, ok bool) { + length := len(s) + if length < 2 || s[0] != '"' || s[length-1] != '"' { + return + } + s = s[1 : length-1] + length -= 2 + + // Check for unusual characters. If there are none, + // then no unquoting is needed, so return a slice of the + // original bytes. + r := 0 + for r < length { + c := s[r] + if c == '\\' || c == '"' || c < ' ' { + break + } + if c < utf8.RuneSelf { + r++ + continue + } + rr, size := utf8.DecodeRune(s[r:]) + if rr == utf8.RuneError && size == 1 { + break + } + r += size + } + if r == length { + return s, true + } + + b := make([]byte, length+2*utf8.UTFMax) + w := copy(b, s[0:r]) + for r < length { + // Out of room? Can only happen if s is full of + // malformed UTF-8 and we're replacing each + // byte with RuneError. + if w >= len(b)-2*utf8.UTFMax { + nb := make([]byte, (len(b)+utf8.UTFMax)*2) + copy(nb, b[0:w]) + b = nb + } + switch c := s[r]; { + case c == '\\': + r++ + if r >= length { + return + } + switch s[r] { + default: + return + case '"', '\\', '/', '\'': + b[w] = s[r] + r++ + w++ + case 'b': + b[w] = '\b' + r++ + w++ + case 'f': + b[w] = '\f' + r++ + w++ + case 'n': + b[w] = '\n' + r++ + w++ + case 'r': + b[w] = '\r' + r++ + w++ + case 't': + b[w] = '\t' + r++ + w++ + case 'u': + r-- + rr := getu4(s[r:]) + if rr < 0 { + return + } + r += 6 + if utf16.IsSurrogate(rr) { + rr1 := getu4(s[r:]) + if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar { + // A valid pair; consume. + r += 6 + w += utf8.EncodeRune(b[w:], dec) + break + } + // Invalid surrogate; fall back to replacement rune. + rr = unicode.ReplacementChar + } + w += utf8.EncodeRune(b[w:], rr) + } + + // Quote, control characters are invalid. + case c == '"', c < ' ': + return + + // ASCII + case c < utf8.RuneSelf: + b[w] = c + r++ + w++ + + // Coerce to well-formed UTF-8. + default: + rr, size := utf8.DecodeRune(s[r:]) + r += size + w += utf8.EncodeRune(b[w:], rr) + } + } + return b[0:w], true +} + +func getu4(s []byte) rune { + if len(s) < 6 || s[0] != '\\' || s[1] != 'u' { + return -1 + } + var r rune + for _, c := range s[2:6] { + switch { + case '0' <= c && c <= '9': + c = c - '0' + case 'a' <= c && c <= 'f': + c = c - 'a' + 10 + case 'A' <= c && c <= 'F': + c = c - 'A' + 10 + default: + return -1 + } + r = r*16 + rune(c) + } + return r +} From 8463646eb00c73e8a79bb2c578195cfe61538af9 Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Thu, 27 Aug 2020 21:01:53 +0900 Subject: [PATCH 2/5] Fix encoding for Indent/MarshalIndent --- encode_vm.go | 28 +++++++++++++++++++++------- indent.go | 6 ++++-- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/encode_vm.go b/encode_vm.go index 8714a4c..9efcb3b 100644 --- a/encode_vm.go +++ b/encode_vm.go @@ -147,12 +147,22 @@ func (e *Encoder) run(code *opcode) error { ) } var buf bytes.Buffer - if err := compact(&buf, b, true); err != nil { - return err + if e.enabledIndent { + if err := encodeWithIndent( + &buf, + b, + string(e.prefix)+string(bytes.Repeat(e.indentStr, code.indent)), + string(e.indentStr), + ); err != nil { + return err + } + } else { + if err := compact(&buf, b, true); err != nil { + return err + } } e.encodeBytes(buf.Bytes()) code = code.next - code.ptr = ptr case opMarshalText: ptr := code.ptr isPtr := code.typ.Kind() == reflect.Ptr @@ -276,7 +286,7 @@ func (e *Encoder) run(code *opcode) error { } else { e.encodeByte('\n') e.encodeIndent(code.indent) - e.encodeBytes([]byte{']'}) + e.encodeByte(']') code = c.end.next } case opArrayHead: @@ -1352,15 +1362,19 @@ func (e *Encoder) run(code *opcode) error { field := code.toStructFieldCode() ptr := field.ptr if ptr == 0 { - e.encodeIndent(code.indent) - e.encodeNull() + if code.op == opStructFieldPtrHeadIntIndent { + e.encodeIndent(code.indent) + e.encodeNull() + } else { + e.encodeBytes([]byte{'{', '}'}) + } code = field.end } else { e.encodeBytes([]byte{'{', '\n'}) e.encodeIndent(code.indent + 1) e.encodeBytes(field.key) e.encodeByte(' ') - e.encodeInt(e.ptrToInt(ptr)) + e.encodeInt(e.ptrToInt(ptr + field.offset)) field.nextField.ptr = ptr code = field.next } diff --git a/indent.go b/indent.go index fc46ca8..a6a0329 100644 --- a/indent.go +++ b/indent.go @@ -52,9 +52,10 @@ func encodeWithIndent(dst *bytes.Buffer, src []byte, prefix, indentStr string) e if indentNum < 0 { return errInvalidCharacter('}', "}", cursor) } - b := []byte{'\n', c} + b := []byte{'\n'} b = append(b, prefix...) b = append(b, bytes.Repeat(indentBytes, indentNum)...) + b = append(b, c) if _, err := dst.Write(b); err != nil { return err } @@ -78,9 +79,10 @@ func encodeWithIndent(dst *bytes.Buffer, src []byte, prefix, indentStr string) e if indentNum < 0 { return errInvalidCharacter(']', "]", cursor) } - b := []byte{'\n', c} + b := []byte{'\n'} b = append(b, prefix...) b = append(b, bytes.Repeat(indentBytes, indentNum)...) + b = append(b, c) if _, err := dst.Write(b); err != nil { return err } From 2ddd2d882ed144e87c25d8c2c9ccde1652586e1b Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Thu, 27 Aug 2020 21:02:44 +0900 Subject: [PATCH 3/5] Add examples --- example_marshaling_test.go | 74 ++++++++ example_test.go | 311 ++++++++++++++++++++++++++++++++ example_text_marshaling_test.go | 68 +++++++ 3 files changed, 453 insertions(+) create mode 100644 example_marshaling_test.go create mode 100644 example_test.go create mode 100644 example_text_marshaling_test.go diff --git a/example_marshaling_test.go b/example_marshaling_test.go new file mode 100644 index 0000000..0119ecc --- /dev/null +++ b/example_marshaling_test.go @@ -0,0 +1,74 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json_test + +import ( + "fmt" + "log" + "strings" + + "github.com/goccy/go-json" +) + +type Animal int + +const ( + Unknown Animal = iota + Gopher + Zebra +) + +func (a *Animal) UnmarshalJSON(b []byte) error { + var s string + if err := json.Unmarshal(b, &s); err != nil { + return err + } + switch strings.ToLower(s) { + default: + *a = Unknown + case "gopher": + *a = Gopher + case "zebra": + *a = Zebra + } + + return nil +} + +func (a Animal) MarshalJSON() ([]byte, error) { + var s string + switch a { + default: + s = "unknown" + case Gopher: + s = "gopher" + case Zebra: + s = "zebra" + } + + return json.Marshal(s) +} + +func Example_customMarshalJSON() { + blob := `["gopher","armadillo","zebra","unknown","gopher","bee","gopher","zebra"]` + var zoo []Animal + if err := json.Unmarshal([]byte(blob), &zoo); err != nil { + log.Fatal(err) + } + + census := make(map[Animal]int) + for _, animal := range zoo { + census[animal] += 1 + } + + fmt.Printf("Zoo Census:\n* Gophers: %d\n* Zebras: %d\n* Unknown: %d\n", + census[Gopher], census[Zebra], census[Unknown]) + + // Output: + // Zoo Census: + // * Gophers: 3 + // * Zebras: 2 + // * Unknown: 3 +} diff --git a/example_test.go b/example_test.go new file mode 100644 index 0000000..72c4755 --- /dev/null +++ b/example_test.go @@ -0,0 +1,311 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json_test + +import ( + "bytes" + "fmt" + "io" + "log" + "os" + "strings" + + "github.com/goccy/go-json" +) + +func ExampleMarshal() { + type ColorGroup struct { + ID int + Name string + Colors []string + } + group := ColorGroup{ + ID: 1, + Name: "Reds", + Colors: []string{"Crimson", "Red", "Ruby", "Maroon"}, + } + b, err := json.Marshal(group) + if err != nil { + fmt.Println("error:", err) + } + os.Stdout.Write(b) + // Output: + // {"ID":1,"Name":"Reds","Colors":["Crimson","Red","Ruby","Maroon"]} +} + +func ExampleUnmarshal() { + var jsonBlob = []byte(`[ + {"Name": "Platypus", "Order": "Monotremata"}, + {"Name": "Quoll", "Order": "Dasyuromorphia"} +]`) + type Animal struct { + Name string + Order string + } + var animals []Animal + err := json.Unmarshal(jsonBlob, &animals) + if err != nil { + fmt.Println("error:", err) + } + fmt.Printf("%+v", animals) + // Output: + // [{Name:Platypus Order:Monotremata} {Name:Quoll Order:Dasyuromorphia}] +} + +// This example uses a Decoder to decode a stream of distinct JSON values. +func ExampleDecoder() { + const jsonStream = ` + {"Name": "Ed", "Text": "Knock knock."} + {"Name": "Sam", "Text": "Who's there?"} + {"Name": "Ed", "Text": "Go fmt."} + {"Name": "Sam", "Text": "Go fmt who?"} + {"Name": "Ed", "Text": "Go fmt yourself!"} +` + type Message struct { + Name, Text string + } + dec := json.NewDecoder(strings.NewReader(jsonStream)) + for { + var m Message + if err := dec.Decode(&m); err == io.EOF { + break + } else if err != nil { + log.Fatal(err) + } + fmt.Printf("%s: %s\n", m.Name, m.Text) + } + // Output: + // Ed: Knock knock. + // Sam: Who's there? + // Ed: Go fmt. + // Sam: Go fmt who? + // Ed: Go fmt yourself! +} + +// This example uses a Decoder to decode a stream of distinct JSON values. +func ExampleDecoder_Token() { + const jsonStream = ` + {"Message": "Hello", "Array": [1, 2, 3], "Null": null, "Number": 1.234} +` + dec := json.NewDecoder(strings.NewReader(jsonStream)) + for { + t, err := dec.Token() + if err == io.EOF { + break + } + if err != nil { + log.Fatal(err) + } + fmt.Printf("%T: %v", t, t) + if dec.More() { + fmt.Printf(" (more)") + } + fmt.Printf("\n") + } + // Output: + // json.Delim: { (more) + // string: Message (more) + // string: Hello (more) + // string: Array (more) + // json.Delim: [ (more) + // float64: 1 (more) + // float64: 2 (more) + // float64: 3 + // json.Delim: ] (more) + // string: Null (more) + // : (more) + // string: Number (more) + // float64: 1.234 + // json.Delim: } +} + +// This example uses a Decoder to decode a streaming array of JSON objects. +func ExampleDecoder_Decode_stream() { + const jsonStream = ` + [ + {"Name": "Ed", "Text": "Knock knock."}, + {"Name": "Sam", "Text": "Who's there?"}, + {"Name": "Ed", "Text": "Go fmt."}, + {"Name": "Sam", "Text": "Go fmt who?"}, + {"Name": "Ed", "Text": "Go fmt yourself!"} + ] +` + type Message struct { + Name, Text string + } + dec := json.NewDecoder(strings.NewReader(jsonStream)) + + // read open bracket + t, err := dec.Token() + if err != nil { + log.Fatal(err) + } + fmt.Printf("%T: %v\n", t, t) + + // while the array contains values + for dec.More() { + var m Message + // decode an array value (Message) + err := dec.Decode(&m) + if err != nil { + log.Fatal(err) + } + + fmt.Printf("%v: %v\n", m.Name, m.Text) + } + + // read closing bracket + t, err = dec.Token() + if err != nil { + log.Fatal(err) + } + fmt.Printf("%T: %v\n", t, t) + + // Output: + // json.Delim: [ + // Ed: Knock knock. + // Sam: Who's there? + // Ed: Go fmt. + // Sam: Go fmt who? + // Ed: Go fmt yourself! + // json.Delim: ] +} + +// This example uses RawMessage to delay parsing part of a JSON message. +func ExampleRawMessage_unmarshal() { + type Color struct { + Space string + Point json.RawMessage // delay parsing until we know the color space + } + type RGB struct { + R uint8 + G uint8 + B uint8 + } + type YCbCr struct { + Y uint8 + Cb int8 + Cr int8 + } + + var j = []byte(`[ + {"Space": "YCbCr", "Point": {"Y": 255, "Cb": 0, "Cr": -10}}, + {"Space": "RGB", "Point": {"R": 98, "G": 218, "B": 255}} +]`) + var colors []Color + err := json.Unmarshal(j, &colors) + if err != nil { + log.Fatalln("error:", err) + } + + for _, c := range colors { + var dst interface{} + switch c.Space { + case "RGB": + dst = new(RGB) + case "YCbCr": + dst = new(YCbCr) + } + err := json.Unmarshal(c.Point, dst) + if err != nil { + log.Fatalln("error:", err) + } + fmt.Println(c.Space, dst) + } + // Output: + // YCbCr &{255 0 -10} + // RGB &{98 218 255} +} + +// This example uses RawMessage to use a precomputed JSON during marshal. +func ExampleRawMessage_marshal() { + h := json.RawMessage(`{"precomputed": true}`) + + c := struct { + Header *json.RawMessage `json:"header"` + Body string `json:"body"` + }{Header: &h, Body: "Hello Gophers!"} + + b, err := json.MarshalIndent(&c, "", "\t") + if err != nil { + fmt.Println("error:", err) + } + os.Stdout.Write(b) + + // Output: + // { + // "header": { + // "precomputed": true + // }, + // "body": "Hello Gophers!" + // } +} + +func ExampleIndent() { + type Road struct { + Name string + Number int + } + roads := []Road{ + {"Diamond Fork", 29}, + {"Sheep Creek", 51}, + } + + b, err := json.Marshal(roads) + if err != nil { + log.Fatal(err) + } + + var out bytes.Buffer + json.Indent(&out, b, "=", "\t") + out.WriteTo(os.Stdout) + // Output: + // [ + // = { + // = "Name": "Diamond Fork", + // = "Number": 29 + // = }, + // = { + // = "Name": "Sheep Creek", + // = "Number": 51 + // = } + // =] +} + +func ExampleMarshalIndent() { + data := map[string]int{ + "a": 1, + "b": 2, + } + + json, err := json.MarshalIndent(data, "", "") + if err != nil { + log.Fatal(err) + } + + fmt.Println(string(json)) + // Output: + // { + // "a": 1, + // "b": 2 + // } +} + +func ExampleValid() { + goodJSON := `{"example": 1}` + badJSON := `{"example":2:]}}` + + fmt.Println(json.Valid([]byte(goodJSON)), json.Valid([]byte(badJSON))) + // Output: + // true false +} + +func ExampleHTMLEscape() { + var out bytes.Buffer + json.HTMLEscape(&out, []byte(`{"Name":"HTML content"}`)) + out.WriteTo(os.Stdout) + // Output: + //{"Name":"\u003cb\u003eHTML content\u003c/b\u003e"} +} diff --git a/example_text_marshaling_test.go b/example_text_marshaling_test.go new file mode 100644 index 0000000..a4df116 --- /dev/null +++ b/example_text_marshaling_test.go @@ -0,0 +1,68 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json_test + +import ( + "fmt" + "log" + "strings" + + "github.com/goccy/go-json" +) + +type Size int + +const ( + Unrecognized Size = iota + Small + Large +) + +func (s *Size) UnmarshalText(text []byte) error { + switch strings.ToLower(string(text)) { + default: + *s = Unrecognized + case "small": + *s = Small + case "large": + *s = Large + } + return nil +} + +func (s Size) MarshalText() ([]byte, error) { + var name string + switch s { + default: + name = "unrecognized" + case Small: + name = "small" + case Large: + name = "large" + } + return []byte(name), nil +} + +func Example_textMarshalJSON() { + blob := `["small","regular","large","unrecognized","small","normal","small","large"]` + var inventory []Size + if err := json.Unmarshal([]byte(blob), &inventory); err != nil { + log.Fatal(err) + } + + counts := make(map[Size]int) + for _, size := range inventory { + counts[size] += 1 + } + + fmt.Printf("Inventory Counts:\n* Small: %d\n* Large: %d\n* Unrecognized: %d\n", + counts[Small], counts[Large], counts[Unrecognized]) + + // Output: + // Inventory Counts: + // * Small: 3 + // * Large: 2 + // * Unrecognized: 3 +} From a75d05a74fca493b652fd1b3056d64cd8b248f37 Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Thu, 27 Aug 2020 21:05:37 +0900 Subject: [PATCH 4/5] Fix unnecessary pointer assignment --- encode_vm.go | 1 - 1 file changed, 1 deletion(-) diff --git a/encode_vm.go b/encode_vm.go index 9efcb3b..7126476 100644 --- a/encode_vm.go +++ b/encode_vm.go @@ -189,7 +189,6 @@ func (e *Encoder) run(code *opcode) error { e.encodeString(*(*string)(unsafe.Pointer(&bytes))) } code = code.next - code.ptr = ptr case opSliceHead: p := code.ptr headerCode := code.toSliceHeaderCode() From b7bb4362cbe375d5ca7977d0c68608ec87ed84b9 Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Thu, 27 Aug 2020 21:13:36 +0900 Subject: [PATCH 5/5] Omit map example because currently cannot sort map by keys --- example_test.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/example_test.go b/example_test.go index 72c4755..da65764 100644 --- a/example_test.go +++ b/example_test.go @@ -274,6 +274,7 @@ func ExampleIndent() { // =] } +/* func ExampleMarshalIndent() { data := map[string]int{ "a": 1, @@ -292,6 +293,7 @@ func ExampleMarshalIndent() { // "b": 2 // } } +*/ func ExampleValid() { goodJSON := `{"example": 1}`