From 2a0ee24e6ec4b14a1eeefe7c9cb85d97b70ad668 Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Thu, 30 Dec 2021 11:54:29 +0900 Subject: [PATCH] Add encoding option for performance --- encode.go | 9 +- internal/encoder/option.go | 1 + internal/encoder/string.go | 603 ++++++++++++------------------- internal/encoder/string_table.go | 415 +++++++++++++++++++++ option.go | 17 + 5 files changed, 664 insertions(+), 381 deletions(-) create mode 100644 internal/encoder/string_table.go diff --git a/encode.go b/encode.go index 7f198bd..b55ec04 100644 --- a/encode.go +++ b/encode.go @@ -61,6 +61,7 @@ func (e *Encoder) encodeWithOption(ctx *encoder.RuntimeContext, v interface{}, o if e.enabledHTMLEscape { ctx.Option.Flag |= encoder.HTMLEscapeOption } + ctx.Option.Flag |= encoder.NormalizeUTF8Option for _, optFunc := range optFuncs { optFunc(ctx.Option) } @@ -111,7 +112,7 @@ func (e *Encoder) SetIndent(prefix, indent string) { func marshalContext(ctx context.Context, v interface{}, optFuncs ...EncodeOptionFunc) ([]byte, error) { rctx := encoder.TakeRuntimeContext() rctx.Option.Flag = 0 - rctx.Option.Flag = encoder.HTMLEscapeOption | encoder.ContextOption + rctx.Option.Flag = encoder.HTMLEscapeOption | encoder.NormalizeUTF8Option | encoder.ContextOption rctx.Option.Context = ctx for _, optFunc := range optFuncs { optFunc(rctx.Option) @@ -139,7 +140,7 @@ func marshal(v interface{}, optFuncs ...EncodeOptionFunc) ([]byte, error) { ctx := encoder.TakeRuntimeContext() ctx.Option.Flag = 0 - ctx.Option.Flag |= encoder.HTMLEscapeOption + ctx.Option.Flag |= (encoder.HTMLEscapeOption | encoder.NormalizeUTF8Option) for _, optFunc := range optFuncs { optFunc(ctx.Option) } @@ -166,7 +167,7 @@ func marshalNoEscape(v interface{}) ([]byte, error) { ctx := encoder.TakeRuntimeContext() ctx.Option.Flag = 0 - ctx.Option.Flag |= encoder.HTMLEscapeOption + ctx.Option.Flag |= (encoder.HTMLEscapeOption | encoder.NormalizeUTF8Option) buf, err := encodeNoEscape(ctx, v) if err != nil { @@ -190,7 +191,7 @@ func marshalIndent(v interface{}, prefix, indent string, optFuncs ...EncodeOptio ctx := encoder.TakeRuntimeContext() ctx.Option.Flag = 0 - ctx.Option.Flag |= (encoder.HTMLEscapeOption | encoder.IndentOption) + ctx.Option.Flag |= (encoder.HTMLEscapeOption | encoder.NormalizeUTF8Option | encoder.IndentOption) for _, optFunc := range optFuncs { optFunc(ctx.Option) } diff --git a/internal/encoder/option.go b/internal/encoder/option.go index f5f1f04..b30964c 100644 --- a/internal/encoder/option.go +++ b/internal/encoder/option.go @@ -11,6 +11,7 @@ const ( DebugOption ColorizeOption ContextOption + NormalizeUTF8Option ) type Option struct { diff --git a/internal/encoder/string.go b/internal/encoder/string.go index 236e2e9..e4152b2 100644 --- a/internal/encoder/string.go +++ b/internal/encoder/string.go @@ -11,341 +11,6 @@ const ( msb = 0x8080808080808080 ) -var needEscapeWithHTML = [256]bool{ - '"': true, - '&': true, - '<': true, - '>': true, - '\\': true, - 0x00: true, - 0x01: true, - 0x02: true, - 0x03: true, - 0x04: true, - 0x05: true, - 0x06: true, - 0x07: true, - 0x08: true, - 0x09: true, - 0x0a: true, - 0x0b: true, - 0x0c: true, - 0x0d: true, - 0x0e: true, - 0x0f: true, - 0x10: true, - 0x11: true, - 0x12: true, - 0x13: true, - 0x14: true, - 0x15: true, - 0x16: true, - 0x17: true, - 0x18: true, - 0x19: true, - 0x1a: true, - 0x1b: true, - 0x1c: true, - 0x1d: true, - 0x1e: true, - 0x1f: true, - /* 0x20 - 0x7f */ - 0x80: true, - 0x81: true, - 0x82: true, - 0x83: true, - 0x84: true, - 0x85: true, - 0x86: true, - 0x87: true, - 0x88: true, - 0x89: true, - 0x8a: true, - 0x8b: true, - 0x8c: true, - 0x8d: true, - 0x8e: true, - 0x8f: true, - 0x90: true, - 0x91: true, - 0x92: true, - 0x93: true, - 0x94: true, - 0x95: true, - 0x96: true, - 0x97: true, - 0x98: true, - 0x99: true, - 0x9a: true, - 0x9b: true, - 0x9c: true, - 0x9d: true, - 0x9e: true, - 0x9f: true, - 0xa0: true, - 0xa1: true, - 0xa2: true, - 0xa3: true, - 0xa4: true, - 0xa5: true, - 0xa6: true, - 0xa7: true, - 0xa8: true, - 0xa9: true, - 0xaa: true, - 0xab: true, - 0xac: true, - 0xad: true, - 0xae: true, - 0xaf: true, - 0xb0: true, - 0xb1: true, - 0xb2: true, - 0xb3: true, - 0xb4: true, - 0xb5: true, - 0xb6: true, - 0xb7: true, - 0xb8: true, - 0xb9: true, - 0xba: true, - 0xbb: true, - 0xbc: true, - 0xbd: true, - 0xbe: true, - 0xbf: true, - 0xc0: true, - 0xc1: true, - 0xc2: true, - 0xc3: true, - 0xc4: true, - 0xc5: true, - 0xc6: true, - 0xc7: true, - 0xc8: true, - 0xc9: true, - 0xca: true, - 0xcb: true, - 0xcc: true, - 0xcd: true, - 0xce: true, - 0xcf: true, - 0xd0: true, - 0xd1: true, - 0xd2: true, - 0xd3: true, - 0xd4: true, - 0xd5: true, - 0xd6: true, - 0xd7: true, - 0xd8: true, - 0xd9: true, - 0xda: true, - 0xdb: true, - 0xdc: true, - 0xdd: true, - 0xde: true, - 0xdf: true, - 0xe0: true, - 0xe1: true, - 0xe2: true, - 0xe3: true, - 0xe4: true, - 0xe5: true, - 0xe6: true, - 0xe7: true, - 0xe8: true, - 0xe9: true, - 0xea: true, - 0xeb: true, - 0xec: true, - 0xed: true, - 0xee: true, - 0xef: true, - 0xf0: true, - 0xf1: true, - 0xf2: true, - 0xf3: true, - 0xf4: true, - 0xf5: true, - 0xf6: true, - 0xf7: true, - 0xf8: true, - 0xf9: true, - 0xfa: true, - 0xfb: true, - 0xfc: true, - 0xfd: true, - 0xfe: true, - 0xff: true, -} - -var needEscape = [256]bool{ - '"': true, - '\\': true, - 0x00: true, - 0x01: true, - 0x02: true, - 0x03: true, - 0x04: true, - 0x05: true, - 0x06: true, - 0x07: true, - 0x08: true, - 0x09: true, - 0x0a: true, - 0x0b: true, - 0x0c: true, - 0x0d: true, - 0x0e: true, - 0x0f: true, - 0x10: true, - 0x11: true, - 0x12: true, - 0x13: true, - 0x14: true, - 0x15: true, - 0x16: true, - 0x17: true, - 0x18: true, - 0x19: true, - 0x1a: true, - 0x1b: true, - 0x1c: true, - 0x1d: true, - 0x1e: true, - 0x1f: true, - /* 0x20 - 0x7f */ - 0x80: true, - 0x81: true, - 0x82: true, - 0x83: true, - 0x84: true, - 0x85: true, - 0x86: true, - 0x87: true, - 0x88: true, - 0x89: true, - 0x8a: true, - 0x8b: true, - 0x8c: true, - 0x8d: true, - 0x8e: true, - 0x8f: true, - 0x90: true, - 0x91: true, - 0x92: true, - 0x93: true, - 0x94: true, - 0x95: true, - 0x96: true, - 0x97: true, - 0x98: true, - 0x99: true, - 0x9a: true, - 0x9b: true, - 0x9c: true, - 0x9d: true, - 0x9e: true, - 0x9f: true, - 0xa0: true, - 0xa1: true, - 0xa2: true, - 0xa3: true, - 0xa4: true, - 0xa5: true, - 0xa6: true, - 0xa7: true, - 0xa8: true, - 0xa9: true, - 0xaa: true, - 0xab: true, - 0xac: true, - 0xad: true, - 0xae: true, - 0xaf: true, - 0xb0: true, - 0xb1: true, - 0xb2: true, - 0xb3: true, - 0xb4: true, - 0xb5: true, - 0xb6: true, - 0xb7: true, - 0xb8: true, - 0xb9: true, - 0xba: true, - 0xbb: true, - 0xbc: true, - 0xbd: true, - 0xbe: true, - 0xbf: true, - 0xc0: true, - 0xc1: true, - 0xc2: true, - 0xc3: true, - 0xc4: true, - 0xc5: true, - 0xc6: true, - 0xc7: true, - 0xc8: true, - 0xc9: true, - 0xca: true, - 0xcb: true, - 0xcc: true, - 0xcd: true, - 0xce: true, - 0xcf: true, - 0xd0: true, - 0xd1: true, - 0xd2: true, - 0xd3: true, - 0xd4: true, - 0xd5: true, - 0xd6: true, - 0xd7: true, - 0xd8: true, - 0xd9: true, - 0xda: true, - 0xdb: true, - 0xdc: true, - 0xdd: true, - 0xde: true, - 0xdf: true, - 0xe0: true, - 0xe1: true, - 0xe2: true, - 0xe3: true, - 0xe4: true, - 0xe5: true, - 0xe6: true, - 0xe7: true, - 0xe8: true, - 0xe9: true, - 0xea: true, - 0xeb: true, - 0xec: true, - 0xed: true, - 0xee: true, - 0xef: true, - 0xf0: true, - 0xf1: true, - 0xf2: true, - 0xf3: true, - 0xf4: true, - 0xf5: true, - 0xf6: true, - 0xf7: true, - 0xf8: true, - 0xf9: true, - 0xfa: true, - 0xfb: true, - 0xfc: true, - 0xfd: true, - 0xfe: true, - 0xff: true, -} - var hex = "0123456789abcdef" //nolint:govet @@ -358,9 +23,19 @@ func stringToUint64Slice(s string) []uint64 { } func AppendString(ctx *RuntimeContext, buf []byte, s string) []byte { - if ctx.Option.Flag&HTMLEscapeOption == 0 { - return appendString(buf, s) + if ctx.Option.Flag&HTMLEscapeOption != 0 { + if ctx.Option.Flag&NormalizeUTF8Option != 0 { + return appendNormalizedHTMLString(buf, s) + } + return appendHTMLString(buf, s) } + if ctx.Option.Flag&NormalizeUTF8Option != 0 { + return appendNormalizedString(buf, s) + } + return appendString(buf, s) +} + +func appendNormalizedHTMLString(buf []byte, s string) []byte { valLen := len(s) if valLen == 0 { return append(buf, `""`...) @@ -387,7 +62,7 @@ func AppendString(ctx *RuntimeContext, buf []byte, s string) []byte { } } for i := len(chunks) * 8; i < valLen; i++ { - if needEscapeWithHTML[s[i]] { + if needEscapeHTMLNormalizeUTF8[s[i]] { j = i goto ESCAPE_END } @@ -399,7 +74,7 @@ ESCAPE_END: for j < valLen { c := s[j] - if !needEscapeWithHTML[c] { + if !needEscapeHTMLNormalizeUTF8[c] { // fast path: most of the time, printable ascii characters are used j++ continue @@ -442,6 +117,217 @@ ESCAPE_END: j = j + 1 continue + case 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x0B, 0x0C, 0x0E, 0x0F, // 0x00-0x0F + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F: // 0x10-0x1F + buf = append(buf, s[i:j]...) + buf = append(buf, `\u00`...) + buf = append(buf, hex[c>>4], hex[c&0xF]) + i = j + 1 + j = j + 1 + continue + } + state, size := decodeRuneInString(s[j:]) + switch state { + case runeErrorState: + buf = append(buf, s[i:j]...) + buf = append(buf, `\ufffd`...) + i = j + 1 + j = j + 1 + continue + // U+2028 is LINE SEPARATOR. + // U+2029 is PARAGRAPH SEPARATOR. + // They are both technically valid characters in JSON strings, + // but don't work in JSONP, which has to be evaluated as JavaScript, + // and can lead to security holes there. It is valid JSON to + // escape them, so we do so unconditionally. + // See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion. + case lineSepState: + buf = append(buf, s[i:j]...) + buf = append(buf, `\u2028`...) + i = j + 3 + j = j + 3 + continue + case paragraphSepState: + buf = append(buf, s[i:j]...) + buf = append(buf, `\u2029`...) + i = j + 3 + j = j + 3 + continue + } + j += size + } + + return append(append(buf, s[i:]...), '"') +} + +func appendHTMLString(buf []byte, s string) []byte { + valLen := len(s) + if valLen == 0 { + return append(buf, `""`...) + } + buf = append(buf, '"') + var ( + i, j int + ) + if valLen >= 8 { + chunks := stringToUint64Slice(s) + for _, n := range chunks { + // combine masks before checking for the MSB of each byte. We include + // `n` in the mask to check whether any of the *input* byte MSBs were + // set (i.e. the byte was outside the ASCII range). + mask := n | (n - (lsb * 0x20)) | + ((n ^ (lsb * '"')) - lsb) | + ((n ^ (lsb * '\\')) - lsb) | + ((n ^ (lsb * '<')) - lsb) | + ((n ^ (lsb * '>')) - lsb) | + ((n ^ (lsb * '&')) - lsb) + if (mask & msb) != 0 { + j = bits.TrailingZeros64(mask&msb) / 8 + goto ESCAPE_END + } + } + for i := len(chunks) * 8; i < valLen; i++ { + if needEscapeHTML[s[i]] { + j = i + goto ESCAPE_END + } + } + // no found any escape characters. + return append(append(buf, s...), '"') + } +ESCAPE_END: + for j < valLen { + c := s[j] + + if !needEscapeHTML[c] { + // fast path: most of the time, printable ascii characters are used + j++ + continue + } + + switch c { + case '\\', '"': + buf = append(buf, s[i:j]...) + buf = append(buf, '\\', c) + i = j + 1 + j = j + 1 + continue + + case '\n': + buf = append(buf, s[i:j]...) + buf = append(buf, '\\', 'n') + i = j + 1 + j = j + 1 + continue + + case '\r': + buf = append(buf, s[i:j]...) + buf = append(buf, '\\', 'r') + i = j + 1 + j = j + 1 + continue + + case '\t': + buf = append(buf, s[i:j]...) + buf = append(buf, '\\', 't') + i = j + 1 + j = j + 1 + continue + + case '<', '>', '&': + buf = append(buf, s[i:j]...) + buf = append(buf, `\u00`...) + buf = append(buf, hex[c>>4], hex[c&0xF]) + i = j + 1 + j = j + 1 + continue + + case 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x0B, 0x0C, 0x0E, 0x0F, // 0x00-0x0F + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F: // 0x10-0x1F + buf = append(buf, s[i:j]...) + buf = append(buf, `\u00`...) + buf = append(buf, hex[c>>4], hex[c&0xF]) + i = j + 1 + j = j + 1 + continue + } + j++ + } + + return append(append(buf, s[i:]...), '"') +} + +func appendNormalizedString(buf []byte, s string) []byte { + valLen := len(s) + if valLen == 0 { + return append(buf, `""`...) + } + buf = append(buf, '"') + var ( + i, j int + ) + if valLen >= 8 { + chunks := stringToUint64Slice(s) + for _, n := range chunks { + // combine masks before checking for the MSB of each byte. We include + // `n` in the mask to check whether any of the *input* byte MSBs were + // set (i.e. the byte was outside the ASCII range). + mask := n | (n - (lsb * 0x20)) | + ((n ^ (lsb * '"')) - lsb) | + ((n ^ (lsb * '\\')) - lsb) + if (mask & msb) != 0 { + j = bits.TrailingZeros64(mask&msb) / 8 + goto ESCAPE_END + } + } + valLen := len(s) + for i := len(chunks) * 8; i < valLen; i++ { + if needEscapeNormalizeUTF8[s[i]] { + j = i + goto ESCAPE_END + } + } + return append(append(buf, s...), '"') + } +ESCAPE_END: + for j < valLen { + c := s[j] + + if !needEscapeNormalizeUTF8[c] { + // fast path: most of the time, printable ascii characters are used + j++ + continue + } + + switch c { + case '\\', '"': + buf = append(buf, s[i:j]...) + buf = append(buf, '\\', c) + i = j + 1 + j = j + 1 + continue + + case '\n': + buf = append(buf, s[i:j]...) + buf = append(buf, '\\', 'n') + i = j + 1 + j = j + 1 + continue + + case '\r': + buf = append(buf, s[i:j]...) + buf = append(buf, '\\', 'r') + i = j + 1 + j = j + 1 + continue + + case '\t': + buf = append(buf, s[i:j]...) + buf = append(buf, '\\', 't') + i = j + 1 + j = j + 1 + continue + case 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x0B, 0x0C, 0x0E, 0x0F, // 0x00-0x0F 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F: // 0x10-0x1F buf = append(buf, s[i:j]...) @@ -557,14 +443,6 @@ ESCAPE_END: j = j + 1 continue - case '<', '>', '&': - buf = append(buf, s[i:j]...) - buf = append(buf, `\u00`...) - buf = append(buf, hex[c>>4], hex[c&0xF]) - i = j + 1 - j = j + 1 - continue - case 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x0B, 0x0C, 0x0E, 0x0F, // 0x00-0x0F 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F: // 0x10-0x1F buf = append(buf, s[i:j]...) @@ -574,36 +452,7 @@ ESCAPE_END: j = j + 1 continue } - - state, size := decodeRuneInString(s[j:]) - switch state { - case runeErrorState: - buf = append(buf, s[i:j]...) - buf = append(buf, `\ufffd`...) - i = j + 1 - j = j + 1 - continue - // U+2028 is LINE SEPARATOR. - // U+2029 is PARAGRAPH SEPARATOR. - // They are both technically valid characters in JSON strings, - // but don't work in JSONP, which has to be evaluated as JavaScript, - // and can lead to security holes there. It is valid JSON to - // escape them, so we do so unconditionally. - // See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion. - case lineSepState: - buf = append(buf, s[i:j]...) - buf = append(buf, `\u2028`...) - i = j + 3 - j = j + 3 - continue - case paragraphSepState: - buf = append(buf, s[i:j]...) - buf = append(buf, `\u2029`...) - i = j + 3 - j = j + 3 - continue - } - j += size + j++ } return append(append(buf, s[i:]...), '"') diff --git a/internal/encoder/string_table.go b/internal/encoder/string_table.go new file mode 100644 index 0000000..ebe42c9 --- /dev/null +++ b/internal/encoder/string_table.go @@ -0,0 +1,415 @@ +package encoder + +var needEscapeHTMLNormalizeUTF8 = [256]bool{ + '"': true, + '&': true, + '<': true, + '>': true, + '\\': true, + 0x00: true, + 0x01: true, + 0x02: true, + 0x03: true, + 0x04: true, + 0x05: true, + 0x06: true, + 0x07: true, + 0x08: true, + 0x09: true, + 0x0a: true, + 0x0b: true, + 0x0c: true, + 0x0d: true, + 0x0e: true, + 0x0f: true, + 0x10: true, + 0x11: true, + 0x12: true, + 0x13: true, + 0x14: true, + 0x15: true, + 0x16: true, + 0x17: true, + 0x18: true, + 0x19: true, + 0x1a: true, + 0x1b: true, + 0x1c: true, + 0x1d: true, + 0x1e: true, + 0x1f: true, + /* 0x20 - 0x7f */ + 0x80: true, + 0x81: true, + 0x82: true, + 0x83: true, + 0x84: true, + 0x85: true, + 0x86: true, + 0x87: true, + 0x88: true, + 0x89: true, + 0x8a: true, + 0x8b: true, + 0x8c: true, + 0x8d: true, + 0x8e: true, + 0x8f: true, + 0x90: true, + 0x91: true, + 0x92: true, + 0x93: true, + 0x94: true, + 0x95: true, + 0x96: true, + 0x97: true, + 0x98: true, + 0x99: true, + 0x9a: true, + 0x9b: true, + 0x9c: true, + 0x9d: true, + 0x9e: true, + 0x9f: true, + 0xa0: true, + 0xa1: true, + 0xa2: true, + 0xa3: true, + 0xa4: true, + 0xa5: true, + 0xa6: true, + 0xa7: true, + 0xa8: true, + 0xa9: true, + 0xaa: true, + 0xab: true, + 0xac: true, + 0xad: true, + 0xae: true, + 0xaf: true, + 0xb0: true, + 0xb1: true, + 0xb2: true, + 0xb3: true, + 0xb4: true, + 0xb5: true, + 0xb6: true, + 0xb7: true, + 0xb8: true, + 0xb9: true, + 0xba: true, + 0xbb: true, + 0xbc: true, + 0xbd: true, + 0xbe: true, + 0xbf: true, + 0xc0: true, + 0xc1: true, + 0xc2: true, + 0xc3: true, + 0xc4: true, + 0xc5: true, + 0xc6: true, + 0xc7: true, + 0xc8: true, + 0xc9: true, + 0xca: true, + 0xcb: true, + 0xcc: true, + 0xcd: true, + 0xce: true, + 0xcf: true, + 0xd0: true, + 0xd1: true, + 0xd2: true, + 0xd3: true, + 0xd4: true, + 0xd5: true, + 0xd6: true, + 0xd7: true, + 0xd8: true, + 0xd9: true, + 0xda: true, + 0xdb: true, + 0xdc: true, + 0xdd: true, + 0xde: true, + 0xdf: true, + 0xe0: true, + 0xe1: true, + 0xe2: true, + 0xe3: true, + 0xe4: true, + 0xe5: true, + 0xe6: true, + 0xe7: true, + 0xe8: true, + 0xe9: true, + 0xea: true, + 0xeb: true, + 0xec: true, + 0xed: true, + 0xee: true, + 0xef: true, + 0xf0: true, + 0xf1: true, + 0xf2: true, + 0xf3: true, + 0xf4: true, + 0xf5: true, + 0xf6: true, + 0xf7: true, + 0xf8: true, + 0xf9: true, + 0xfa: true, + 0xfb: true, + 0xfc: true, + 0xfd: true, + 0xfe: true, + 0xff: true, +} + +var needEscapeNormalizeUTF8 = [256]bool{ + '"': true, + '\\': true, + 0x00: true, + 0x01: true, + 0x02: true, + 0x03: true, + 0x04: true, + 0x05: true, + 0x06: true, + 0x07: true, + 0x08: true, + 0x09: true, + 0x0a: true, + 0x0b: true, + 0x0c: true, + 0x0d: true, + 0x0e: true, + 0x0f: true, + 0x10: true, + 0x11: true, + 0x12: true, + 0x13: true, + 0x14: true, + 0x15: true, + 0x16: true, + 0x17: true, + 0x18: true, + 0x19: true, + 0x1a: true, + 0x1b: true, + 0x1c: true, + 0x1d: true, + 0x1e: true, + 0x1f: true, + /* 0x20 - 0x7f */ + 0x80: true, + 0x81: true, + 0x82: true, + 0x83: true, + 0x84: true, + 0x85: true, + 0x86: true, + 0x87: true, + 0x88: true, + 0x89: true, + 0x8a: true, + 0x8b: true, + 0x8c: true, + 0x8d: true, + 0x8e: true, + 0x8f: true, + 0x90: true, + 0x91: true, + 0x92: true, + 0x93: true, + 0x94: true, + 0x95: true, + 0x96: true, + 0x97: true, + 0x98: true, + 0x99: true, + 0x9a: true, + 0x9b: true, + 0x9c: true, + 0x9d: true, + 0x9e: true, + 0x9f: true, + 0xa0: true, + 0xa1: true, + 0xa2: true, + 0xa3: true, + 0xa4: true, + 0xa5: true, + 0xa6: true, + 0xa7: true, + 0xa8: true, + 0xa9: true, + 0xaa: true, + 0xab: true, + 0xac: true, + 0xad: true, + 0xae: true, + 0xaf: true, + 0xb0: true, + 0xb1: true, + 0xb2: true, + 0xb3: true, + 0xb4: true, + 0xb5: true, + 0xb6: true, + 0xb7: true, + 0xb8: true, + 0xb9: true, + 0xba: true, + 0xbb: true, + 0xbc: true, + 0xbd: true, + 0xbe: true, + 0xbf: true, + 0xc0: true, + 0xc1: true, + 0xc2: true, + 0xc3: true, + 0xc4: true, + 0xc5: true, + 0xc6: true, + 0xc7: true, + 0xc8: true, + 0xc9: true, + 0xca: true, + 0xcb: true, + 0xcc: true, + 0xcd: true, + 0xce: true, + 0xcf: true, + 0xd0: true, + 0xd1: true, + 0xd2: true, + 0xd3: true, + 0xd4: true, + 0xd5: true, + 0xd6: true, + 0xd7: true, + 0xd8: true, + 0xd9: true, + 0xda: true, + 0xdb: true, + 0xdc: true, + 0xdd: true, + 0xde: true, + 0xdf: true, + 0xe0: true, + 0xe1: true, + 0xe2: true, + 0xe3: true, + 0xe4: true, + 0xe5: true, + 0xe6: true, + 0xe7: true, + 0xe8: true, + 0xe9: true, + 0xea: true, + 0xeb: true, + 0xec: true, + 0xed: true, + 0xee: true, + 0xef: true, + 0xf0: true, + 0xf1: true, + 0xf2: true, + 0xf3: true, + 0xf4: true, + 0xf5: true, + 0xf6: true, + 0xf7: true, + 0xf8: true, + 0xf9: true, + 0xfa: true, + 0xfb: true, + 0xfc: true, + 0xfd: true, + 0xfe: true, + 0xff: true, +} + +var needEscapeHTML = [256]bool{ + '"': true, + '&': true, + '<': true, + '>': true, + '\\': true, + 0x00: true, + 0x01: true, + 0x02: true, + 0x03: true, + 0x04: true, + 0x05: true, + 0x06: true, + 0x07: true, + 0x08: true, + 0x09: true, + 0x0a: true, + 0x0b: true, + 0x0c: true, + 0x0d: true, + 0x0e: true, + 0x0f: true, + 0x10: true, + 0x11: true, + 0x12: true, + 0x13: true, + 0x14: true, + 0x15: true, + 0x16: true, + 0x17: true, + 0x18: true, + 0x19: true, + 0x1a: true, + 0x1b: true, + 0x1c: true, + 0x1d: true, + 0x1e: true, + 0x1f: true, + /* 0x20 - 0xff */ +} + +var needEscape = [256]bool{ + '"': true, + '\\': true, + 0x00: true, + 0x01: true, + 0x02: true, + 0x03: true, + 0x04: true, + 0x05: true, + 0x06: true, + 0x07: true, + 0x08: true, + 0x09: true, + 0x0a: true, + 0x0b: true, + 0x0c: true, + 0x0d: true, + 0x0e: true, + 0x0f: true, + 0x10: true, + 0x11: true, + 0x12: true, + 0x13: true, + 0x14: true, + 0x15: true, + 0x16: true, + 0x17: true, + 0x18: true, + 0x19: true, + 0x1a: true, + 0x1b: true, + 0x1c: true, + 0x1d: true, + 0x1e: true, + 0x1f: true, + /* 0x20 - 0xff */ +} diff --git a/option.go b/option.go index ad65091..0138d77 100644 --- a/option.go +++ b/option.go @@ -15,6 +15,23 @@ func UnorderedMap() EncodeOptionFunc { } } +// DisableHTMLEscape disables escaping of HTML characters ( '&', '<', '>' ) when encoding string. +func DisableHTMLEscape() EncodeOptionFunc { + return func(opt *EncodeOption) { + opt.Flag &= ^encoder.HTMLEscapeOption + } +} + +// DisableNormalizeUTF8 +// By default, when encoding string, UTF8 characters in the range of 0x80 - 0xFF are processed by applying \ufffd for invalid code and escaping for \u2028 and \u2029. +// This option disables this behaviour. You can expect faster speeds by applying this option, but be careful. +// encoding/json implements here: https://github.com/golang/go/blob/6178d25fc0b28724b1b5aec2b1b74fc06d9294c7/src/encoding/json/encode.go#L1067-L1093. +func DisableNormalizeUTF8() EncodeOptionFunc { + return func(opt *EncodeOption) { + opt.Flag &= ^encoder.NormalizeUTF8Option + } +} + // Debug outputs debug information when panic occurs during encoding. func Debug() EncodeOptionFunc { return func(opt *EncodeOption) {