inlined unicode decoding

This commit is contained in:
Josh Baker 2017-04-05 10:22:45 -07:00
parent 458c2c85b8
commit 01736e2faf
1 changed files with 64 additions and 9 deletions

View File

@ -2,9 +2,10 @@
package gjson
import (
"encoding/json"
"reflect"
"strconv"
"unicode/utf16"
"unicode/utf8"
"unsafe"
"github.com/tidwall/match"
@ -1358,14 +1359,68 @@ func GetBytes(json []byte, path string) Result {
return result
}
// unescape unescapes a string, we'll use the standard go/json library for this.
func unescape(s string) string {
data := make([]byte, 0, len(s)+2)
data = append(data, '"')
data = append(data, s...)
data = append(data, '"')
json.Unmarshal(data, &s)
return s
// runeit returns the rune from the the \uXXXX
func runeit(json string) rune {
n, _ := strconv.ParseUint(json[:4], 16, 64)
return rune(n)
}
// unescape unescapes a string
func unescape(json string) string { //, error) {
var str = make([]byte, 0, len(json))
for i := 0; i < len(json); i++ {
switch {
default:
str = append(str, json[i])
case json[i] < ' ':
return string(str)
case json[i] == '\\':
i++
if i >= len(json) {
return string(str)
}
switch json[i] {
default:
return string(str)
case '\\':
str = append(str, '\\')
case '/':
str = append(str, '/')
case 'b':
str = append(str, '\b')
case 'f':
str = append(str, '\f')
case 'n':
str = append(str, '\n')
case 'r':
str = append(str, '\r')
case 't':
str = append(str, '\t')
case '"':
str = append(str, '"')
case 'u':
if i+5 > len(json) {
return string(str)
}
r := runeit(json[i+1:])
i += 5
if utf16.IsSurrogate(r) {
// need another code
if len(json) >= 6 && json[i] == '\\' && json[i+1] == 'u' {
// we expect it to be correct so just consume it
r = utf16.DecodeRune(r, runeit(json[i+2:]))
i += 6
}
}
// provide enough space to encode the largest utf8 possible
str = append(str, 0, 0, 0, 0, 0, 0, 0, 0)
n := utf8.EncodeRune(str[len(str)-8:], r)
str = str[:len(str)-8+n]
i-- // backtrack index by one
}
}
}
return string(str)
}
// Less return true if a token is less than another token.