Merge pull request #24 from goccy/feature/fix-compact

Fix Compact/Indent
This commit is contained in:
Masaaki Goshima 2020-08-14 19:00:51 +09:00 committed by GitHub
commit aa8099d09e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 449 additions and 30 deletions

43
compact.go Normal file
View File

@ -0,0 +1,43 @@
package json
import (
"bytes"
)
func compact(dst *bytes.Buffer, src []byte) error {
length := len(src)
for cursor := 0; cursor < length; cursor++ {
c := src[cursor]
switch c {
case ' ', '\t', '\n', '\r':
continue
case '"':
if err := dst.WriteByte(c); err != nil {
return err
}
for {
cursor++
if err := dst.WriteByte(src[cursor]); err != nil {
return err
}
switch src[cursor] {
case '\\':
cursor++
if err := dst.WriteByte(src[cursor]); err != nil {
return err
}
case '"':
goto LOOP_END
case nul:
return errUnexpectedEndOfJSON("string", int64(length))
}
}
default:
if err := dst.WriteByte(c); err != nil {
return err
}
}
LOOP_END:
}
return nil
}

View File

@ -34,7 +34,7 @@ func trueBytes(s *stream) error {
func falseBytes(s *stream) error { func falseBytes(s *stream) error {
if s.cursor+4 >= s.length { if s.cursor+4 >= s.length {
if s.read() { if !s.read() {
return errInvalidCharacter(s.char(), "bool(false)", s.totalOffset()) return errInvalidCharacter(s.char(), "bool(false)", s.totalOffset())
} }
} }

View File

@ -33,6 +33,85 @@ var (
) )
) )
var (
hexToInt = [256]int{
'0': 0,
'1': 1,
'2': 2,
'3': 3,
'4': 4,
'5': 5,
'6': 6,
'7': 7,
'8': 8,
'9': 9,
'A': 10,
'B': 11,
'C': 12,
'D': 13,
'E': 14,
'F': 15,
'a': 10,
'b': 11,
'c': 12,
'd': 13,
'e': 14,
'f': 15,
}
)
func unicodeToRune(code []byte) rune {
sum := 0
for i := 0; i < len(code); i++ {
sum += hexToInt[code[i]] << (uint(len(code)-i-1) * 4)
}
return rune(sum)
}
func decodeEscapeString(s *stream) error {
s.cursor++
RETRY:
switch s.buf[s.cursor] {
case '"':
s.buf[s.cursor] = '"'
case '\\':
s.buf[s.cursor] = '\\'
case '/':
s.buf[s.cursor] = '/'
case 'b':
s.buf[s.cursor] = '\b'
case 'f':
s.buf[s.cursor] = '\f'
case 'n':
s.buf[s.cursor] = '\n'
case 'r':
s.buf[s.cursor] = '\r'
case 't':
s.buf[s.cursor] = '\t'
case 'u':
if s.cursor+5 >= s.length {
if !s.read() {
return errInvalidCharacter(s.char(), "escaped string", s.totalOffset())
}
}
code := unicodeToRune(s.buf[s.cursor+1 : s.cursor+5])
unicode := []byte(string(code))
s.buf = append(append(s.buf[:s.cursor-1], unicode...), s.buf[s.cursor+5:]...)
s.cursor--
return nil
case nul:
if !s.read() {
return errInvalidCharacter(s.char(), "escaped string", s.totalOffset())
}
goto RETRY
default:
return errUnexpectedEndOfJSON("string", s.totalOffset())
}
s.buf = append(s.buf[:s.cursor-1], s.buf[s.cursor:]...)
s.cursor--
return nil
}
func (d *interfaceDecoder) decodeStream(s *stream, p uintptr) error { func (d *interfaceDecoder) decodeStream(s *stream, p uintptr) error {
s.skipWhiteSpace() s.skipWhiteSpace()
for { for {
@ -71,7 +150,9 @@ func (d *interfaceDecoder) decodeStream(s *stream, p uintptr) error {
for { for {
switch s.char() { switch s.char() {
case '\\': case '\\':
s.cursor++ if err := decodeEscapeString(s); err != nil {
return err
}
case '"': case '"':
literal := s.buf[start:s.cursor] literal := s.buf[start:s.cursor]
s.cursor++ s.cursor++

View File

@ -99,7 +99,7 @@ func (d *mapDecoder) decodeStream(s *stream, p uintptr) error {
return nil return nil
} }
if s.char() != ',' { if s.char() != ',' {
return errExpected("semicolon after object value", s.totalOffset()) return errExpected("comma after object value", s.totalOffset())
} }
} }
return nil return nil
@ -168,7 +168,7 @@ func (d *mapDecoder) decode(buf []byte, cursor int64, p uintptr) (int64, error)
return cursor, nil return cursor, nil
} }
if buf[cursor] != ',' { if buf[cursor] != ',' {
return 0, errExpected("semicolon after object value", cursor) return 0, errExpected("comma after object value", cursor)
} }
} }
return cursor, nil return cursor, nil

View File

@ -67,21 +67,25 @@ func (s *stream) read() bool {
if n < readChunkSize || err == io.EOF { if n < readChunkSize || err == io.EOF {
s.allRead = true s.allRead = true
} }
totalSize := s.length + int64(n) + 1 // extend buffer (2) is protect ( s.cursor++ x2 )
// e.g.) decodeEscapeString
const extendBufLength = int64(2)
totalSize := s.length + int64(n) + extendBufLength
if totalSize > readChunkSize { if totalSize > readChunkSize {
newBuf := make([]byte, totalSize) newBuf := make([]byte, totalSize)
copy(newBuf, s.buf) copy(newBuf, s.buf)
copy(newBuf[s.length:], buf) copy(newBuf[s.length:], buf)
s.buf = newBuf s.buf = newBuf
s.length = totalSize - 1 s.length = totalSize - extendBufLength
} else if s.length > 0 { } else if s.length > 0 {
copy(buf[s.length:], buf) copy(buf[s.length:], buf)
copy(buf, s.buf[:s.length]) copy(buf, s.buf[:s.length])
s.buf = buf s.buf = buf
s.length = totalSize - 1 s.length = totalSize - extendBufLength
} else { } else {
s.buf = buf s.buf = buf
s.length = totalSize - 1 s.length = totalSize - extendBufLength
} }
s.offset += s.cursor s.offset += s.cursor
if n == 0 { if n == 0 {

View File

@ -84,10 +84,20 @@ func (e *Encoder) run(code *opcode) error {
typ = typ.Elem() typ = typ.Elem()
} }
e.indent = ifaceCode.indent e.indent = ifaceCode.indent
c, err := e.compile(typ, ifaceCode.root, e.enabledIndent) var c *opcode
if typ.Kind() == reflect.Map {
code, err := e.compileMap(typ, false, ifaceCode.root, e.enabledIndent)
if err != nil { if err != nil {
return err return err
} }
c = code
} else {
code, err := e.compile(typ, ifaceCode.root, e.enabledIndent)
if err != nil {
return err
}
c = code
}
c.ptr = uintptr(header.ptr) c.ptr = uintptr(header.ptr)
c.beforeLastCode().next = code.next c.beforeLastCode().next = code.next
code = c code = c

8
export_test.go Normal file
View File

@ -0,0 +1,8 @@
package json
func NewSyntaxError(msg string, offset int64) *SyntaxError {
return &SyntaxError{
msg: msg,
Offset: offset,
}
}

106
indent.go Normal file
View File

@ -0,0 +1,106 @@
package json
import "bytes"
func encodeWithIndent(dst *bytes.Buffer, src []byte, prefix, indentStr string) error {
length := int64(len(src))
indentNum := 0
indentBytes := []byte(indentStr)
for cursor := int64(0); cursor < length; cursor++ {
c := src[cursor]
switch c {
case ' ', '\t', '\n', '\r':
continue
case '"':
if err := dst.WriteByte(c); err != nil {
return err
}
for {
cursor++
if err := dst.WriteByte(src[cursor]); err != nil {
return err
}
switch src[cursor] {
case '\\':
cursor++
if err := dst.WriteByte(src[cursor]); err != nil {
return err
}
case '"':
goto LOOP_END
case nul:
return errUnexpectedEndOfJSON("string", int64(length))
}
}
case '{':
if cursor+1 < length && src[cursor+1] == '}' {
if _, err := dst.Write([]byte{'{', '}'}); err != nil {
return err
}
cursor++
} else {
indentNum++
b := []byte{c, '\n'}
b = append(b, prefix...)
b = append(b, bytes.Repeat(indentBytes, indentNum)...)
if _, err := dst.Write(b); err != nil {
return err
}
}
case '}':
indentNum--
if indentNum < 0 {
return errInvalidCharacter('}', "}", cursor)
}
b := []byte{'\n', c}
b = append(b, prefix...)
b = append(b, bytes.Repeat(indentBytes, indentNum)...)
if _, err := dst.Write(b); err != nil {
return err
}
case '[':
if cursor+1 < length && src[cursor+1] == ']' {
if _, err := dst.Write([]byte{'[', ']'}); err != nil {
return err
}
cursor++
} else {
indentNum++
b := []byte{c, '\n'}
b = append(b, prefix...)
b = append(b, bytes.Repeat(indentBytes, indentNum)...)
if _, err := dst.Write(b); err != nil {
return err
}
}
case ']':
indentNum--
if indentNum < 0 {
return errInvalidCharacter(']', "]", cursor)
}
b := []byte{'\n', c}
b = append(b, prefix...)
b = append(b, bytes.Repeat(indentBytes, indentNum)...)
if _, err := dst.Write(b); err != nil {
return err
}
case ':':
if _, err := dst.Write([]byte{':', ' '}); err != nil {
return err
}
case ',':
b := []byte{',', '\n'}
b = append(b, prefix...)
b = append(b, bytes.Repeat(indentBytes, indentNum)...)
if _, err := dst.Write(b); err != nil {
return err
}
default:
if err := dst.WriteByte(c); err != nil {
return err
}
}
LOOP_END:
}
return nil
}

21
json.go
View File

@ -325,15 +325,7 @@ func (m *RawMessage) UnmarshalJSON(data []byte) error {
// Compact appends to dst the JSON-encoded src with // Compact appends to dst the JSON-encoded src with
// insignificant space characters elided. // insignificant space characters elided.
func Compact(dst *bytes.Buffer, src []byte) error { func Compact(dst *bytes.Buffer, src []byte) error {
var v interface{} return compact(dst, src)
dec := NewDecoder(bytes.NewBuffer(src))
dec.UseNumber()
if err := dec.Decode(&v); err != nil {
return err
}
enc := NewEncoder(dst)
enc.SetEscapeHTML(false)
return enc.Encode(v)
} }
// Indent appends to dst an indented form of the JSON-encoded src. // Indent appends to dst an indented form of the JSON-encoded src.
@ -348,16 +340,7 @@ func Compact(dst *bytes.Buffer, src []byte) error {
// For example, if src has no trailing spaces, neither will dst; // For example, if src has no trailing spaces, neither will dst;
// if src ends in a trailing newline, so will dst. // if src ends in a trailing newline, so will dst.
func Indent(dst *bytes.Buffer, src []byte, prefix, indent string) error { func Indent(dst *bytes.Buffer, src []byte, prefix, indent string) error {
var v interface{} return encodeWithIndent(dst, src, prefix, indent)
dec := NewDecoder(bytes.NewBuffer(src))
dec.UseNumber()
if err := dec.Decode(&v); err != nil {
return err
}
enc := NewEncoder(dst)
enc.SetEscapeHTML(false)
enc.SetIndent(prefix, indent)
return enc.Encode(v)
} }
// HTMLEscape appends to dst the JSON-encoded src with <, >, &, U+2028 and U+2029 // HTMLEscape appends to dst the JSON-encoded src with <, >, &, U+2028 and U+2029

View File

@ -2,6 +2,9 @@ package json_test
import ( import (
"bytes" "bytes"
"math"
"math/rand"
"reflect"
"testing" "testing"
"github.com/goccy/go-json" "github.com/goccy/go-json"
@ -116,3 +119,184 @@ func TestIndent(t *testing.T) {
} }
} }
} }
// Tests of a large random structure.
func TestCompactBig(t *testing.T) {
initBig()
var buf bytes.Buffer
if err := json.Compact(&buf, jsonBig); err != nil {
t.Fatalf("Compact: %v", err)
}
b := buf.Bytes()
if !bytes.Equal(b, jsonBig) {
t.Error("Compact(jsonBig) != jsonBig")
diff(t, b, jsonBig)
return
}
}
func TestIndentBig(t *testing.T) {
t.Parallel()
initBig()
var buf bytes.Buffer
if err := json.Indent(&buf, jsonBig, "", "\t"); err != nil {
t.Fatalf("Indent1: %v", err)
}
b := buf.Bytes()
if len(b) == len(jsonBig) {
// jsonBig is compact (no unnecessary spaces);
// indenting should make it bigger
t.Fatalf("Indent(jsonBig) did not get bigger")
}
// should be idempotent
var buf1 bytes.Buffer
if err := json.Indent(&buf1, b, "", "\t"); err != nil {
t.Fatalf("Indent2: %v", err)
}
b1 := buf1.Bytes()
if !bytes.Equal(b1, b) {
t.Error("Indent(Indent(jsonBig)) != Indent(jsonBig)")
diff(t, b1, b)
return
}
// should get back to original
buf1.Reset()
if err := json.Compact(&buf1, b); err != nil {
t.Fatalf("Compact: %v", err)
}
b1 = buf1.Bytes()
if !bytes.Equal(b1, jsonBig) {
t.Error("Compact(Indent(jsonBig)) != jsonBig")
diff(t, b1, jsonBig)
return
}
}
type indentErrorTest struct {
in string
err error
}
var indentErrorTests = []indentErrorTest{
{`{"X": "foo", "Y"}`, json.NewSyntaxError("invalid character '}' after object key", 17)},
{`{"X": "foo" "Y": "bar"}`, json.NewSyntaxError("invalid character '\"' after object key:value pair", 13)},
}
func TestIndentErrors(t *testing.T) {
for i, tt := range indentErrorTests {
slice := make([]uint8, 0)
buf := bytes.NewBuffer(slice)
if err := json.Indent(buf, []uint8(tt.in), "", ""); err != nil {
if !reflect.DeepEqual(err, tt.err) {
t.Errorf("#%d: Indent: %#v", i, err)
continue
}
}
}
}
func diff(t *testing.T, a, b []byte) {
for i := 0; ; i++ {
if i >= len(a) || i >= len(b) || a[i] != b[i] {
j := i - 10
if j < 0 {
j = 0
}
t.Errorf("diverge at %d: «%s» vs «%s»", i, trim(a[j:]), trim(b[j:]))
return
}
}
}
func trim(b []byte) []byte {
if len(b) > 20 {
return b[0:20]
}
return b
}
// Generate a random JSON object.
var jsonBig []byte
func initBig() {
if len(jsonBig) > 0 {
return
}
n := 10000
if testing.Short() {
n = 100
}
v := genValue(n)
b, err := json.Marshal(v)
if err != nil {
panic(err)
}
jsonBig = b
}
func genValue(n int) interface{} {
if n > 1 {
switch rand.Intn(2) {
case 0:
return genArray(n)
case 1:
return genMap(n)
}
}
switch rand.Intn(3) {
case 0:
return rand.Intn(2) == 0
case 1:
return rand.NormFloat64()
case 2:
return genString(30)
}
panic("unreachable")
}
func genString(stddev float64) string {
n := int(math.Abs(rand.NormFloat64()*stddev + stddev/2))
c := make([]rune, n)
for i := range c {
f := math.Abs(rand.NormFloat64()*64 + 32)
if f > 0x10ffff {
f = 0x10ffff
}
c[i] = rune(f)
}
return string(c)
}
func genArray(n int) []interface{} {
f := int(math.Abs(rand.NormFloat64()) * math.Min(10, float64(n/2)))
if f > n {
f = n
}
if f < 1 {
f = 1
}
x := make([]interface{}, f)
for i := range x {
x[i] = genValue(((i+1)*n)/f - (i*n)/f)
}
return x
}
func genMap(n int) map[string]interface{} {
f := int(math.Abs(rand.NormFloat64()) * math.Min(10, float64(n/2)))
if f > n {
f = n
}
if n > 0 && f == 0 {
f = 1
}
x := make(map[string]interface{})
for i := 0; i < f; i++ {
x[genString(10)] = genValue(((i+1)*n)/f - (i*n)/f)
}
return x
}