From 01736e2fafeab19684bc0c2d1ee0a600418d7b2a Mon Sep 17 00:00:00 2001 From: Josh Baker Date: Wed, 5 Apr 2017 10:22:45 -0700 Subject: [PATCH] inlined unicode decoding --- gjson.go | 73 +++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 64 insertions(+), 9 deletions(-) diff --git a/gjson.go b/gjson.go index b6650f5..d5a6097 100644 --- a/gjson.go +++ b/gjson.go @@ -2,9 +2,10 @@ package gjson import ( - "encoding/json" "reflect" "strconv" + "unicode/utf16" + "unicode/utf8" "unsafe" "github.com/tidwall/match" @@ -1358,14 +1359,68 @@ func GetBytes(json []byte, path string) Result { return result } -// unescape unescapes a string, we'll use the standard go/json library for this. -func unescape(s string) string { - data := make([]byte, 0, len(s)+2) - data = append(data, '"') - data = append(data, s...) - data = append(data, '"') - json.Unmarshal(data, &s) - return s +// runeit returns the rune from the the \uXXXX +func runeit(json string) rune { + n, _ := strconv.ParseUint(json[:4], 16, 64) + return rune(n) +} + +// unescape unescapes a string +func unescape(json string) string { //, error) { + var str = make([]byte, 0, len(json)) + for i := 0; i < len(json); i++ { + switch { + default: + str = append(str, json[i]) + case json[i] < ' ': + return string(str) + case json[i] == '\\': + i++ + if i >= len(json) { + return string(str) + } + switch json[i] { + default: + return string(str) + case '\\': + str = append(str, '\\') + case '/': + str = append(str, '/') + case 'b': + str = append(str, '\b') + case 'f': + str = append(str, '\f') + case 'n': + str = append(str, '\n') + case 'r': + str = append(str, '\r') + case 't': + str = append(str, '\t') + case '"': + str = append(str, '"') + case 'u': + if i+5 > len(json) { + return string(str) + } + r := runeit(json[i+1:]) + i += 5 + if utf16.IsSurrogate(r) { + // need another code + if len(json) >= 6 && json[i] == '\\' && json[i+1] == 'u' { + // we expect it to be correct so just consume it + r = utf16.DecodeRune(r, runeit(json[i+2:])) + i += 6 + } + } + // provide enough space to encode the largest utf8 possible + str = append(str, 0, 0, 0, 0, 0, 0, 0, 0) + n := utf8.EncodeRune(str[len(str)-8:], r) + str = str[:len(str)-8+n] + i-- // backtrack index by one + } + } + } + return string(str) } // Less return true if a token is less than another token.