From 2555fc0b612219669ee6ec6c84324920dedd8b42 Mon Sep 17 00:00:00 2001 From: Josh Baker Date: Mon, 8 May 2017 17:33:03 -0700 Subject: [PATCH] Unmarshal Validation The Unmarshal function now returns an error if the JSON is not valid. --- README.md | 19 ++-- gjson.go | 276 ++++++++++++++++++++++++++++++++++++++++++++++++-- gjson_test.go | 103 +++++++++++++++++++ 3 files changed, 378 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index ee03c8b..e8c6c82 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@

get a json value quickly

GJSON is a Go package that provides a [fast](#performance) and [simple](#get-a-value) way to get values from a json document. -It has features such as [one line retrieval](#get-a-value), [dot notation paths](#path-syntax), [iteration](#iterate-through-an-object-or-array). It can also [unmarshal](#unmarshalling) 3 to 4 times faster than the standard Go `encoding/json` unmarshaller. +It has features such as [one line retrieval](#get-a-value), [dot notation paths](#path-syntax), [iteration](#iterate-through-an-object-or-array). It can also [unmarshal](#unmarshalling) 2 to 3 times faster than the standard Go `encoding/json` unmarshaller. Getting Started =============== @@ -26,7 +26,7 @@ $ go get -u github.com/tidwall/gjson This will retrieve the library. ## Get a value -Get searches json for the specified path. A path is in dot syntax, such as "name.last" or "age". This function expects that the json is well-formed. Bad json will not panic, but it may return back unexpected results. When the value is found it's returned immediately. +Get searches json for the specified path. A path is in dot syntax, such as "name.last" or "age". This function expects that the json is well-formed and validates. Invalid json will not panic, but it may return back unexpected results. When the value is found it's returned immediately. ```go package main @@ -237,16 +237,13 @@ if gjson.Get(json, "name.last").Exists(){ There's a `gjson.Unmarshal` function which loads json data into a value. It's a drop in replacement for `json.Unmarshal` and you can typically -see a 3-4x boost in performance without the need for external generators. +see a 2-3x boost in performance without the need for external generators. -This function works almost identically to `json.Unmarshal` except that it -expects the json to be well-formed prior to being called. Bad json -will not panic or cause a decoding error. - -Another difference is that `gjson.Unmarshal` will automatically attempt to -convert JSON values to any Go type. For example, the JSON string "100" or -the JSON number 100 can be equally assigned to Go string, int, byte, uint64, -etc. This rule applies to all types. +This function works almost identically to `json.Unmarshal` except that +`gjson.Unmarshal` will automatically attempt to convert JSON values to any +Go type. For example, the JSON string "100" or the JSON number 100 can be +equally assigned to Go string, int, byte, uint64, etc. This rule applies to +all types. ```go diff --git a/gjson.go b/gjson.go index 77ed4c6..8e9afcb 100644 --- a/gjson.go +++ b/gjson.go @@ -4,6 +4,7 @@ package gjson import ( "encoding/base64" "encoding/json" + "errors" "reflect" "strconv" "strings" @@ -2060,18 +2061,275 @@ func assign(jsval Result, goval reflect.Value) { // Unmarshal loads the JSON data into the value pointed to by v. // -// This function works almost identically to json.Unmarshal except that it -// expects that the json is well-formed prior to being called. Invalid json -// will not panic, but it may return back unexpected results. Therefore the -// return value of this function will always be nil. -// -// Another difference is that gjson.Unmarshal will automatically attempt to -// convert JSON values to any Go type. For example, the JSON string "100" or -// the JSON number 100 can be equally assigned to Go string, int, byte, uint64, -// etc. This rule applies to all types. +// This function works almost identically to json.Unmarshal except that +// gjson.Unmarshal will automatically attempt to convert JSON values to any Go +// type. For example, the JSON string "100" or the JSON number 100 can be equally +// assigned to Go string, int, byte, uint64, etc. This rule applies to all types. func Unmarshal(data []byte, v interface{}) error { + _, ok := validpayload(data, 0) + if !ok { + return errors.New("invalid json") + } if v := reflect.ValueOf(v); v.Kind() == reflect.Ptr { assign(ParseBytes(data), v) } return nil } + +func validpayload(data []byte, i int) (outi int, ok bool) { + for ; i < len(data); i++ { + switch data[i] { + default: + i, ok = validany(data, i) + if !ok { + return i, false + } + for ; i < len(data); i++ { + switch data[i] { + default: + return i, false + case ' ', '\t', '\n', '\r': + continue + } + } + return i, true + case ' ', '\t', '\n', '\r': + continue + } + } + return i, false +} +func validany(data []byte, i int) (outi int, ok bool) { + for ; i < len(data); i++ { + switch data[i] { + default: + return i, false + case ' ', '\t', '\n', '\r': + continue + case '{': + return validobject(data, i+1) + case '[': + return validarray(data, i+1) + case '"': + return validstring(data, i+1) + case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + return validnumber(data, i+1) + case 't': + return validtrue(data, i+1) + case 'f': + return validfalse(data, i+1) + case 'n': + return validnull(data, i+1) + } + } + return i, false +} +func validobject(data []byte, i int) (outi int, ok bool) { + for ; i < len(data); i++ { + switch data[i] { + default: + return i, false + case ' ', '\t', '\n', '\r': + continue + case '}': + return i + 1, true + case '"': + key: + if i, ok = validstring(data, i+1); !ok { + return i, false + } + if i, ok = validcolon(data, i); !ok { + return i, false + } + if i, ok = validany(data, i); !ok { + return i, false + } + if i, ok = validcomma(data, i, '}'); !ok { + return i, false + } + if data[i] == '}' { + return i + 1, true + } + for ; i < len(data); i++ { + if data[i] == '"' { + goto key + } + } + return i, false + } + } + return i, false +} +func validcolon(data []byte, i int) (outi int, ok bool) { + for ; i < len(data); i++ { + switch data[i] { + default: + return i, false + case ' ', '\t', '\n', '\r': + continue + case ':': + return i + 1, true + } + } + return i, false +} +func validcomma(data []byte, i int, end byte) (outi int, ok bool) { + for ; i < len(data); i++ { + switch data[i] { + default: + return i, false + case ' ', '\t', '\n', '\r': + continue + case ',': + return i, true + case end: + return i, true + } + } + return i, false +} +func validarray(data []byte, i int) (outi int, ok bool) { + for ; i < len(data); i++ { + switch data[i] { + default: + for ; i < len(data); i++ { + if i, ok = validany(data, i); !ok { + return i, false + } + if i, ok = validcomma(data, i, ']'); !ok { + return i, false + } + if data[i] == ']' { + return i + 1, true + } + } + case ' ', '\t', '\n', '\r': + continue + case ']': + return i + 1, true + } + } + return i, false +} +func validstring(data []byte, i int) (outi int, ok bool) { + for ; i < len(data); i++ { + if data[i] < ' ' { + return i, false + } else if data[i] == '\\' { + i++ + switch data[i] { + default: + return i, false + case '"', '\\', '/', 'b', 'f', 'n', 'r', 't': + case 'u': + for j := 0; j < 4; j++ { + i++ + if i >= len(data) { + return i, false + } + if !((data[i] >= '0' && data[i] <= '9') || + (data[i] >= 'a' && data[i] <= 'f') || + (data[i] >= 'A' && data[i] <= 'F')) { + return i, false + } + } + } + } else if data[i] == '"' { + return i + 1, true + } + } + return i, false +} +func validnumber(data []byte, i int) (outi int, ok bool) { + i-- + // sign + if data[i] == '-' { + i++ + } + // int + if i == len(data) { + return i, false + } + if data[i] == '0' { + i++ + } else { + for ; i < len(data); i++ { + if data[i] >= '0' && data[i] <= '9' { + continue + } + break + } + } + // frac + if i == len(data) { + return i, true + } + if data[i] == '.' { + i++ + if i == len(data) { + return i, false + } + if data[i] < '0' || data[i] > '9' { + return i, false + } + i++ + for ; i < len(data); i++ { + if data[i] >= '0' && data[i] <= '9' { + continue + } + break + } + } + // exp + if i == len(data) { + return i, true + } + if data[i] == 'e' || data[i] == 'E' { + i++ + if i == len(data) { + return i, false + } + if data[i] == '+' || data[i] == '-' { + i++ + } + if i == len(data) { + return i, false + } + if data[i] < '0' || data[i] > '9' { + return i, false + } + i++ + for ; i < len(data); i++ { + if data[i] >= '0' && data[i] <= '9' { + continue + } + break + } + } + return i, true +} + +func validtrue(data []byte, i int) (outi int, ok bool) { + if i+3 <= len(data) && data[i] == 'r' && data[i+1] == 'u' && data[i+2] == 'e' { + return i + 3, true + } + return i, false +} +func validfalse(data []byte, i int) (outi int, ok bool) { + if i+4 <= len(data) && data[i] == 'a' && data[i+1] == 'l' && data[i+2] == 's' && data[i+3] == 'e' { + return i + 4, true + } + return i, false +} +func validnull(data []byte, i int) (outi int, ok bool) { + if i+3 <= len(data) && data[i] == 'u' && data[i+1] == 'l' && data[i+2] == 'l' { + return i + 3, true + } + return i, false +} + +// Valid returns true if the input is valid json. +func Valid(json string) bool { + _, ok := validpayload([]byte(json), 0) + return ok +} diff --git a/gjson_test.go b/gjson_test.go index 235a310..3a25f3d 100644 --- a/gjson_test.go +++ b/gjson_test.go @@ -901,6 +901,109 @@ func TestUnmarshal(t *testing.T) { assert(t, str == Get(complicatedJSON, "LeftOut").String()) } +func testvalid(json string, expect bool) { + _, ok := validpayload([]byte(json), 0) + if ok != expect { + panic("mismatch") + } +} + +func TestValidBasic(t *testing.T) { + testvalid("0", true) + testvalid("00", false) + testvalid("-00", false) + testvalid("-.", false) + testvalid("0.0", true) + testvalid("10.0", true) + testvalid("10e1", true) + testvalid("10EE", false) + testvalid("10E-", false) + testvalid("10E+", false) + testvalid("10E123", true) + testvalid("10E-123", true) + testvalid("10E-0123", true) + testvalid("", false) + testvalid(" ", false) + testvalid("{}", true) + testvalid("{", false) + testvalid("-", false) + testvalid("-1", true) + testvalid("-1.", false) + testvalid("-1.0", true) + testvalid(" -1.0", true) + testvalid(" -1.0 ", true) + testvalid("-1.0 ", true) + testvalid("-1.0 i", false) + testvalid("-1.0 i", false) + testvalid("true", true) + testvalid(" true", true) + testvalid(" true ", true) + testvalid(" True ", false) + testvalid(" tru", false) + testvalid("false", true) + testvalid(" false", true) + testvalid(" false ", true) + testvalid(" False ", false) + testvalid(" fals", false) + testvalid("null", true) + testvalid(" null", true) + testvalid(" null ", true) + testvalid(" Null ", false) + testvalid(" nul", false) + testvalid(" []", true) + testvalid(" [true]", true) + testvalid(" [ true, null ]", true) + testvalid(" [ true,]", false) + testvalid(`{"hello":"world"}`, true) + testvalid(`{ "hello": "world" }`, true) + testvalid(`{ "hello": "world", }`, false) + testvalid(`{"a":"b",}`, false) + testvalid(`{"a":"b","a"}`, false) + testvalid(`{"a":"b","a":}`, false) + testvalid(`{"a":"b","a":1}`, true) + testvalid(`{"a":"b","a": 1, "c":{"hi":"there"} }`, true) + testvalid(`{"a":"b","a": 1, "c":{"hi":"there", "easy":["going",{"mixed":"bag"}]} }`, true) + testvalid(`""`, true) + testvalid(`"`, false) + testvalid(`"\n"`, true) + testvalid(`"\"`, false) + testvalid(`"\\"`, true) + testvalid(`"a\\b"`, true) + testvalid(`"a\\b\\\"a"`, true) + testvalid(`"a\\b\\\uFFAAa"`, true) + testvalid(`"a\\b\\\uFFAZa"`, false) + testvalid(`"a\\b\\\uFFA"`, false) + testvalid(string(complicatedJSON), true) + testvalid(string(exampleJSON), true) +} + +var jsonchars = []string{"{", "[", ",", ":", "}", "]", "1", "0", "true", "false", "null", `""`, `"\""`, `"a"`} + +func makeRandomJSONChars(b []byte) { + var bb []byte + for len(bb) < len(b) { + bb = append(bb, jsonchars[rand.Int()%len(jsonchars)]...) + } + copy(b, bb[:len(b)]) +} +func TestValidRandom(t *testing.T) { + rand.Seed(time.Now().UnixNano()) + b := make([]byte, 100000) + start := time.Now() + for time.Since(start) < time.Second*3 { + n := rand.Int() % len(b) + rand.Read(b[:n]) + validpayload(b[:n], 0) + } + + start = time.Now() + for time.Since(start) < time.Second*3 { + n := rand.Int() % len(b) + makeRandomJSONChars(b[:n]) + validpayload(b[:n], 0) + } +} + type BenchStruct struct { Widget struct { Window struct {