package json import ( "unicode/utf8" ) // htmlSafeSet holds the value true if the ASCII character with the given // array position can be safely represented inside a JSON string, embedded // inside of HTML <script> tags, without any additional escaping. // // All values are true except for the ASCII control characters (0-31), the // double quote ("), the backslash character ("\"), HTML opening and closing // tags ("<" and ">"), and the ampersand ("&"). var htmlSafeSet = [utf8.RuneSelf]bool{ ' ': true, '!': true, '"': false, '#': true, '$': true, '%': true, '&': false, '\'': true, '(': true, ')': true, '*': true, '+': true, ',': true, '-': true, '.': true, '/': true, '0': true, '1': true, '2': true, '3': true, '4': true, '5': true, '6': true, '7': true, '8': true, '9': true, ':': true, ';': true, '<': false, '=': true, '>': false, '?': true, '@': true, 'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 'G': true, 'H': true, 'I': true, 'J': true, 'K': true, 'L': true, 'M': true, 'N': true, 'O': true, 'P': true, 'Q': true, 'R': true, 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true, 'Y': true, 'Z': true, '[': true, '\\': false, ']': true, '^': true, '_': true, '`': true, 'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 'g': true, 'h': true, 'i': true, 'j': true, 'k': true, 'l': true, 'm': true, 'n': true, 'o': true, 'p': true, 'q': true, 'r': true, 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true, 'y': true, 'z': true, '{': true, '|': true, '}': true, '~': true, '\u007f': true, } // safeSet holds the value true if the ASCII character with the given array // position can be represented inside a JSON string without any further // escaping. // // All values are true except for the ASCII control characters (0-31), the // double quote ("), and the backslash character ("\"). var safeSet = [utf8.RuneSelf]bool{ ' ': true, '!': true, '"': false, '#': true, '$': true, '%': true, '&': true, '\'': true, '(': true, ')': true, '*': true, '+': true, ',': true, '-': true, '.': true, '/': true, '0': true, '1': true, '2': true, '3': true, '4': true, '5': true, '6': true, '7': true, '8': true, '9': true, ':': true, ';': true, '<': true, '=': true, '>': true, '?': true, '@': true, 'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 'G': true, 'H': true, 'I': true, 'J': true, 'K': true, 'L': true, 'M': true, 'N': true, 'O': true, 'P': true, 'Q': true, 'R': true, 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true, 'Y': true, 'Z': true, '[': true, '\\': false, ']': true, '^': true, '_': true, '`': true, 'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 'g': true, 'h': true, 'i': true, 'j': true, 'k': true, 'l': true, 'm': true, 'n': true, 'o': true, 'p': true, 'q': true, 'r': true, 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true, 'y': true, 'z': true, '{': true, '|': true, '}': true, '~': true, '\u007f': true, } var hex = "0123456789abcdef" func (e *Encoder) encodeEscapedString(s string) { valLen := len(s) e.buf = append(e.buf, '"') // write string, the fast path, without utf8 and escape support i := 0 for ; i < valLen; i++ { c := s[i] if c < utf8.RuneSelf && htmlSafeSet[c] { e.buf = append(e.buf, c) } else { break } } if i == valLen { e.buf = append(e.buf, '"') return } e.writeStringSlowPathWithHTMLEscaped(i, s, valLen) } func (e *Encoder) writeStringSlowPathWithHTMLEscaped(i int, s string, valLen int) { start := i // for the remaining parts, we process them char by char for i < valLen { if b := s[i]; b < utf8.RuneSelf { if htmlSafeSet[b] { i++ continue } if start < i { e.buf = append(e.buf, s[start:i]...) } switch b { case '\\', '"': e.buf = append(e.buf, '\\', b) case '\n': e.buf = append(e.buf, '\\', 'n') case '\r': e.buf = append(e.buf, '\\', 'r') case '\t': e.buf = append(e.buf, '\\', 't') default: // This encodes bytes < 0x20 except for \t, \n and \r. // If escapeHTML is set, it also escapes <, >, and & // because they can lead to security holes when // user-controlled strings are rendered into JSON // and served to some browsers. e.buf = append(e.buf, `\u00`...) e.buf = append(e.buf, hex[b>>4], hex[b&0xF]) } i++ start = i continue } c, size := utf8.DecodeRuneInString(s[i:]) if c == utf8.RuneError && size == 1 { if start < i { e.buf = append(e.buf, s[start:i]...) } e.buf = append(e.buf, `\ufffd`...) i++ start = i continue } // U+2028 is LINE SEPARATOR. // U+2029 is PARAGRAPH SEPARATOR. // They are both technically valid characters in JSON strings, // but don't work in JSONP, which has to be evaluated as JavaScript, // and can lead to security holes there. It is valid JSON to // escape them, so we do so unconditionally. // See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion. if c == '\u2028' || c == '\u2029' { if start < i { e.buf = append(e.buf, s[start:i]...) } e.buf = append(e.buf, `\u202`...) e.buf = append(e.buf, hex[c&0xF]) i += size start = i continue } i += size } if start < len(s) { e.buf = append(e.buf, s[start:]...) } e.buf = append(e.buf, '"') } func (e *Encoder) encodeNoEscapedString(s string) { valLen := len(s) e.buf = append(e.buf, '"') // write string, the fast path, without utf8 and escape support i := 0 for ; i < valLen; i++ { c := s[i] if c > 31 && c != '"' && c != '\\' { e.buf = append(e.buf, c) } else { break } } if i == valLen { e.buf = append(e.buf, '"') return } e.writeStringSlowPath(i, s, valLen) } func (e *Encoder) writeStringSlowPath(i int, s string, valLen int) { start := i // for the remaining parts, we process them char by char for i < valLen { if b := s[i]; b < utf8.RuneSelf { if safeSet[b] { i++ continue } if start < i { e.buf = append(e.buf, s[start:i]...) } switch b { case '\\', '"': e.buf = append(e.buf, '\\', b) case '\n': e.buf = append(e.buf, '\\', 'n') case '\r': e.buf = append(e.buf, '\\', 'r') case '\t': e.buf = append(e.buf, '\\', 't') default: // This encodes bytes < 0x20 except for \t, \n and \r. // If escapeHTML is set, it also escapes <, >, and & // because they can lead to security holes when // user-controlled strings are rendered into JSON // and served to some browsers. e.buf = append(e.buf, []byte(`\u00`)...) e.buf = append(e.buf, hex[b>>4], hex[b&0xF]) } i++ start = i continue } i++ continue } if start < len(s) { e.buf = append(e.buf, s[start:]...) } e.buf = append(e.buf, '"') }