package encoder import ( "math/bits" "reflect" "unsafe" "github.com/goccy/go-json/internal/runtime" ) const ( lsb = 0x0101010101010101 msb = 0x8080808080808080 ) var needEscapeWithHTML = [256]bool{ '"': true, '&': true, '<': true, '>': true, '\\': true, 0x00: true, 0x01: true, 0x02: true, 0x03: true, 0x04: true, 0x05: true, 0x06: true, 0x07: true, 0x08: true, 0x09: true, 0x0a: true, 0x0b: true, 0x0c: true, 0x0d: true, 0x0e: true, 0x0f: true, 0x10: true, 0x11: true, 0x12: true, 0x13: true, 0x14: true, 0x15: true, 0x16: true, 0x17: true, 0x18: true, 0x19: true, 0x1a: true, 0x1b: true, 0x1c: true, 0x1d: true, 0x1e: true, 0x1f: true, /* 0x20 - 0x7f */ 0x80: true, 0x81: true, 0x82: true, 0x83: true, 0x84: true, 0x85: true, 0x86: true, 0x87: true, 0x88: true, 0x89: true, 0x8a: true, 0x8b: true, 0x8c: true, 0x8d: true, 0x8e: true, 0x8f: true, 0x90: true, 0x91: true, 0x92: true, 0x93: true, 0x94: true, 0x95: true, 0x96: true, 0x97: true, 0x98: true, 0x99: true, 0x9a: true, 0x9b: true, 0x9c: true, 0x9d: true, 0x9e: true, 0x9f: true, 0xa0: true, 0xa1: true, 0xa2: true, 0xa3: true, 0xa4: true, 0xa5: true, 0xa6: true, 0xa7: true, 0xa8: true, 0xa9: true, 0xaa: true, 0xab: true, 0xac: true, 0xad: true, 0xae: true, 0xaf: true, 0xb0: true, 0xb1: true, 0xb2: true, 0xb3: true, 0xb4: true, 0xb5: true, 0xb6: true, 0xb7: true, 0xb8: true, 0xb9: true, 0xba: true, 0xbb: true, 0xbc: true, 0xbd: true, 0xbe: true, 0xbf: true, 0xc0: true, 0xc1: true, 0xc2: true, 0xc3: true, 0xc4: true, 0xc5: true, 0xc6: true, 0xc7: true, 0xc8: true, 0xc9: true, 0xca: true, 0xcb: true, 0xcc: true, 0xcd: true, 0xce: true, 0xcf: true, 0xd0: true, 0xd1: true, 0xd2: true, 0xd3: true, 0xd4: true, 0xd5: true, 0xd6: true, 0xd7: true, 0xd8: true, 0xd9: true, 0xda: true, 0xdb: true, 0xdc: true, 0xdd: true, 0xde: true, 0xdf: true, 0xe0: true, 0xe1: true, 0xe2: true, 0xe3: true, 0xe4: true, 0xe5: true, 0xe6: true, 0xe7: true, 0xe8: true, 0xe9: true, 0xea: true, 0xeb: true, 0xec: true, 0xed: true, 0xee: true, 0xef: true, 0xf0: true, 0xf1: true, 0xf2: true, 0xf3: true, 0xf4: true, 0xf5: true, 0xf6: true, 0xf7: true, 0xf8: true, 0xf9: true, 0xfa: true, 0xfb: true, 0xfc: true, 0xfd: true, 0xfe: true, 0xff: true, } var needEscape = [256]bool{ '"': true, '\\': true, 0x00: true, 0x01: true, 0x02: true, 0x03: true, 0x04: true, 0x05: true, 0x06: true, 0x07: true, 0x08: true, 0x09: true, 0x0a: true, 0x0b: true, 0x0c: true, 0x0d: true, 0x0e: true, 0x0f: true, 0x10: true, 0x11: true, 0x12: true, 0x13: true, 0x14: true, 0x15: true, 0x16: true, 0x17: true, 0x18: true, 0x19: true, 0x1a: true, 0x1b: true, 0x1c: true, 0x1d: true, 0x1e: true, 0x1f: true, /* 0x20 - 0x7f */ 0x80: true, 0x81: true, 0x82: true, 0x83: true, 0x84: true, 0x85: true, 0x86: true, 0x87: true, 0x88: true, 0x89: true, 0x8a: true, 0x8b: true, 0x8c: true, 0x8d: true, 0x8e: true, 0x8f: true, 0x90: true, 0x91: true, 0x92: true, 0x93: true, 0x94: true, 0x95: true, 0x96: true, 0x97: true, 0x98: true, 0x99: true, 0x9a: true, 0x9b: true, 0x9c: true, 0x9d: true, 0x9e: true, 0x9f: true, 0xa0: true, 0xa1: true, 0xa2: true, 0xa3: true, 0xa4: true, 0xa5: true, 0xa6: true, 0xa7: true, 0xa8: true, 0xa9: true, 0xaa: true, 0xab: true, 0xac: true, 0xad: true, 0xae: true, 0xaf: true, 0xb0: true, 0xb1: true, 0xb2: true, 0xb3: true, 0xb4: true, 0xb5: true, 0xb6: true, 0xb7: true, 0xb8: true, 0xb9: true, 0xba: true, 0xbb: true, 0xbc: true, 0xbd: true, 0xbe: true, 0xbf: true, 0xc0: true, 0xc1: true, 0xc2: true, 0xc3: true, 0xc4: true, 0xc5: true, 0xc6: true, 0xc7: true, 0xc8: true, 0xc9: true, 0xca: true, 0xcb: true, 0xcc: true, 0xcd: true, 0xce: true, 0xcf: true, 0xd0: true, 0xd1: true, 0xd2: true, 0xd3: true, 0xd4: true, 0xd5: true, 0xd6: true, 0xd7: true, 0xd8: true, 0xd9: true, 0xda: true, 0xdb: true, 0xdc: true, 0xdd: true, 0xde: true, 0xdf: true, 0xe0: true, 0xe1: true, 0xe2: true, 0xe3: true, 0xe4: true, 0xe5: true, 0xe6: true, 0xe7: true, 0xe8: true, 0xe9: true, 0xea: true, 0xeb: true, 0xec: true, 0xed: true, 0xee: true, 0xef: true, 0xf0: true, 0xf1: true, 0xf2: true, 0xf3: true, 0xf4: true, 0xf5: true, 0xf6: true, 0xf7: true, 0xf8: true, 0xf9: true, 0xfa: true, 0xfb: true, 0xfc: true, 0xfd: true, 0xfe: true, 0xff: true, } var hex = "0123456789abcdef" //nolint:govet func stringToUint64Slice(s string) []uint64 { return *(*[]uint64)(unsafe.Pointer(&reflect.SliceHeader{ Data: ((*reflect.StringHeader)(unsafe.Pointer(&s))).Data, Len: len(s) / 8, Cap: len(s) / 8, })) } func AppendString(ctx *RuntimeContext, buf []byte, s string) []byte { if ctx.Option.Flag&HTMLEscapeOption == 0 { return appendString(buf, s) } valLen := len(s) if valLen == 0 { return append(buf, `""`...) } buf = append(buf, '"') var ( i, j int ) orgLen := valLen base := (*runtime.SliceHeader)(unsafe.Pointer(&s)).Data for { valLen = len(s) - j if valLen <= 0 { return append(append(buf, s[i:]...), '"') } data := unsafe.Pointer(uintptr(base) + uintptr(j)) switch valLen { case 1: if needEscapeWithHTML[s[j]] { goto ESCAPE } return append(append(buf, s[i:]...), '"') case 2: if needEscapeWithHTML[s[j]] { goto ESCAPE } j++ if needEscapeWithHTML[s[j]] { goto ESCAPE } return append(append(buf, s[i:]...), '"') case 3: if needEscapeWithHTML[s[j]] { goto ESCAPE } j++ if needEscapeWithHTML[s[j]] { goto ESCAPE } j++ if needEscapeWithHTML[s[j]] { goto ESCAPE } return append(append(buf, s[i:]...), '"') case 4: if needEscapeWithHTML[s[j]] { goto ESCAPE } j++ if needEscapeWithHTML[s[j]] { goto ESCAPE } j++ if needEscapeWithHTML[s[j]] { goto ESCAPE } j++ if needEscapeWithHTML[s[j]] { goto ESCAPE } return append(append(buf, s[i:]...), '"') case 5: if needEscapeWithHTML[s[j]] { goto ESCAPE } j++ if needEscapeWithHTML[s[j]] { goto ESCAPE } j++ if needEscapeWithHTML[s[j]] { goto ESCAPE } j++ if needEscapeWithHTML[s[j]] { goto ESCAPE } j++ if needEscapeWithHTML[s[j]] { goto ESCAPE } return append(append(buf, s[i:]...), '"') case 6: if needEscapeWithHTML[s[j]] { goto ESCAPE } j++ if needEscapeWithHTML[s[j]] { goto ESCAPE } j++ if needEscapeWithHTML[s[j]] { goto ESCAPE } j++ if needEscapeWithHTML[s[j]] { goto ESCAPE } j++ if needEscapeWithHTML[s[j]] { goto ESCAPE } j++ if needEscapeWithHTML[s[j]] { goto ESCAPE } return append(append(buf, s[i:]...), '"') case 7: if needEscapeWithHTML[s[j]] { goto ESCAPE } j++ if needEscapeWithHTML[s[j]] { goto ESCAPE } j++ if needEscapeWithHTML[s[j]] { goto ESCAPE } j++ if needEscapeWithHTML[s[j]] { goto ESCAPE } j++ if needEscapeWithHTML[s[j]] { goto ESCAPE } j++ if needEscapeWithHTML[s[j]] { goto ESCAPE } j++ if needEscapeWithHTML[s[j]] { goto ESCAPE } return append(append(buf, s[i:]...), '"') case 8, 9, 10, 11, 12, 13, 14, 15: chunks := stringToUint64Slice(s[j:]) for _, n := range chunks { // combine masks before checking for the MSB of each byte. We include // `n` in the mask to check whether any of the *input* byte MSBs were // set (i.e. the byte was outside the ASCII range). mask := n | (n - (lsb * 0x20)) | ((n ^ (lsb * '"')) - lsb) | ((n ^ (lsb * '\\')) - lsb) | ((n ^ (lsb * '<')) - lsb) | ((n ^ (lsb * '>')) - lsb) | ((n ^ (lsb * '&')) - lsb) if (mask & msb) != 0 { j += bits.TrailingZeros64(mask&msb) / 8 goto ESCAPE } } j += len(chunks) * 8 case 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31: j += _findHTMLEscapeIndex128(data, valLen) default: j += _findHTMLEscapeIndex256(data, valLen) } if j >= orgLen { return append(append(buf, s[i:]...), '"') } ESCAPE: c := s[j] if !needEscapeWithHTML[c] { j++ continue } switch c { case '\\', '"': buf = append(buf, s[i:j]...) buf = append(buf, '\\', c) i = j + 1 j = j + 1 continue case '\n': buf = append(buf, s[i:j]...) buf = append(buf, '\\', 'n') i = j + 1 j = j + 1 continue case '\r': buf = append(buf, s[i:j]...) buf = append(buf, '\\', 'r') i = j + 1 j = j + 1 continue case '\t': buf = append(buf, s[i:j]...) buf = append(buf, '\\', 't') i = j + 1 j = j + 1 continue case '<', '>', '&': buf = append(buf, s[i:j]...) buf = append(buf, `\u00`...) buf = append(buf, hex[c>>4], hex[c&0xF]) i = j + 1 j = j + 1 continue case 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x0B, 0x0C, 0x0E, 0x0F, // 0x00-0x0F 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F: // 0x10-0x1F buf = append(buf, s[i:j]...) buf = append(buf, `\u00`...) buf = append(buf, hex[c>>4], hex[c&0xF]) i = j + 1 j = j + 1 continue } state, size := decodeRuneInString(s[j:]) switch state { case runeErrorState: buf = append(buf, s[i:j]...) buf = append(buf, `\ufffd`...) i = j + 1 j = j + 1 continue // U+2028 is LINE SEPARATOR. // U+2029 is PARAGRAPH SEPARATOR. // They are both technically valid characters in JSON strings, // but don't work in JSONP, which has to be evaluated as JavaScript, // and can lead to security holes there. It is valid JSON to // escape them, so we do so unconditionally. // See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion. case lineSepState: buf = append(buf, s[i:j]...) buf = append(buf, `\u2028`...) i = j + 3 j = j + 3 continue case paragraphSepState: buf = append(buf, s[i:j]...) buf = append(buf, `\u2029`...) i = j + 3 j = j + 3 continue } j += size } } func appendString(buf []byte, s string) []byte { valLen := len(s) if valLen == 0 { return append(buf, `""`...) } buf = append(buf, '"') var ( i, j int ) base := (*runtime.SliceHeader)(unsafe.Pointer(&s)).Data orgLen := valLen for { valLen = len(s) - j if valLen <= 0 { return append(append(buf, s[i:]...), '"') } data := unsafe.Pointer(uintptr(base) + uintptr(j)) switch valLen { case 1: if needEscape[s[j]] { goto ESCAPE } return append(buf, s[i], '"') case 2: if needEscape[s[j]] { goto ESCAPE } j++ if needEscape[s[j]] { goto ESCAPE } return append(append(buf, s[i:]...), '"') case 3: if needEscape[s[j]] { goto ESCAPE } j++ if needEscape[s[j]] { goto ESCAPE } j++ if needEscape[s[j]] { goto ESCAPE } return append(append(buf, s[i:]...), '"') case 4: if needEscape[s[j]] { goto ESCAPE } j++ if needEscape[s[j]] { goto ESCAPE } j++ if needEscape[s[j]] { goto ESCAPE } j++ if needEscape[s[j]] { goto ESCAPE } return append(append(buf, s[i:]...), '"') case 5: if needEscape[s[j]] { goto ESCAPE } j++ if needEscape[s[j]] { goto ESCAPE } j++ if needEscape[s[j]] { goto ESCAPE } j++ if needEscape[s[j]] { goto ESCAPE } j++ if needEscape[s[j]] { goto ESCAPE } return append(append(buf, s[i:]...), '"') case 6: if needEscape[s[j]] { goto ESCAPE } j++ if needEscape[s[j]] { goto ESCAPE } j++ if needEscape[s[j]] { goto ESCAPE } j++ if needEscape[s[j]] { goto ESCAPE } j++ if needEscape[s[j]] { goto ESCAPE } j++ if needEscape[s[j]] { goto ESCAPE } return append(append(buf, s[i:]...), '"') case 7: if needEscape[s[j]] { goto ESCAPE } j++ if needEscape[s[j]] { goto ESCAPE } j++ if needEscape[s[j]] { goto ESCAPE } j++ if needEscape[s[j]] { goto ESCAPE } j++ if needEscape[s[j]] { goto ESCAPE } j++ if needEscape[s[j]] { goto ESCAPE } j++ if needEscape[s[j]] { goto ESCAPE } return append(append(buf, s[i:]...), '"') case 8, 9, 10, 11, 12, 13, 14, 15: j += _findEscapeIndex64(data, valLen) case 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31: j += _findEscapeIndex128(data, valLen) default: j += _findEscapeIndex256(data, valLen) } if j == orgLen { return append(append(buf, s[i:]...), '"') } ESCAPE: c := s[j] if !needEscape[c] { j++ continue } switch c { case '\\', '"': buf = append(buf, s[i:j]...) buf = append(buf, '\\', c) i = j + 1 j = j + 1 continue case '\n': buf = append(buf, s[i:j]...) buf = append(buf, '\\', 'n') i = j + 1 j = j + 1 continue case '\r': buf = append(buf, s[i:j]...) buf = append(buf, '\\', 'r') i = j + 1 j = j + 1 continue case '\t': buf = append(buf, s[i:j]...) buf = append(buf, '\\', 't') i = j + 1 j = j + 1 continue case 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x0B, 0x0C, 0x0E, 0x0F, // 0x00-0x0F 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F: // 0x10-0x1F buf = append(buf, s[i:j]...) buf = append(buf, `\u00`...) buf = append(buf, hex[c>>4], hex[c&0xF]) i = j + 1 j = j + 1 continue } state, size := decodeRuneInString(s[j:]) switch state { case runeErrorState: buf = append(buf, s[i:j]...) buf = append(buf, `\ufffd`...) i = j + 1 j = j + 1 continue // U+2028 is LINE SEPARATOR. // U+2029 is PARAGRAPH SEPARATOR. // They are both technically valid characters in JSON strings, // but don't work in JSONP, which has to be evaluated as JavaScript, // and can lead to security holes there. It is valid JSON to // escape them, so we do so unconditionally. // See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion. case lineSepState: buf = append(buf, s[i:j]...) buf = append(buf, `\u2028`...) i = j + 3 j = j + 3 continue case paragraphSepState: buf = append(buf, s[i:j]...) buf = append(buf, `\u2029`...) i = j + 3 j = j + 3 continue } j += size } }