Unmarshal Validation

The Unmarshal function now returns an error if the JSON is not valid.
This commit is contained in:
Josh Baker 2017-05-08 17:33:03 -07:00
parent 0bedaf01cb
commit 2555fc0b61
3 changed files with 378 additions and 20 deletions

View File

@ -10,7 +10,7 @@
<p align="center">get a json value quickly</a></p> <p align="center">get a json value quickly</a></p>
GJSON is a Go package that provides a [fast](#performance) and [simple](#get-a-value) way to get values from a json document. GJSON is a Go package that provides a [fast](#performance) and [simple](#get-a-value) way to get values from a json document.
It has features such as [one line retrieval](#get-a-value), [dot notation paths](#path-syntax), [iteration](#iterate-through-an-object-or-array). It can also [unmarshal](#unmarshalling) 3 to 4 times faster than the standard Go `encoding/json` unmarshaller. It has features such as [one line retrieval](#get-a-value), [dot notation paths](#path-syntax), [iteration](#iterate-through-an-object-or-array). It can also [unmarshal](#unmarshalling) 2 to 3 times faster than the standard Go `encoding/json` unmarshaller.
Getting Started Getting Started
=============== ===============
@ -26,7 +26,7 @@ $ go get -u github.com/tidwall/gjson
This will retrieve the library. This will retrieve the library.
## Get a value ## Get a value
Get searches json for the specified path. A path is in dot syntax, such as "name.last" or "age". This function expects that the json is well-formed. Bad json will not panic, but it may return back unexpected results. When the value is found it's returned immediately. Get searches json for the specified path. A path is in dot syntax, such as "name.last" or "age". This function expects that the json is well-formed and validates. Invalid json will not panic, but it may return back unexpected results. When the value is found it's returned immediately.
```go ```go
package main package main
@ -237,16 +237,13 @@ if gjson.Get(json, "name.last").Exists(){
There's a `gjson.Unmarshal` function which loads json data into a value. There's a `gjson.Unmarshal` function which loads json data into a value.
It's a drop in replacement for `json.Unmarshal` and you can typically It's a drop in replacement for `json.Unmarshal` and you can typically
see a 3-4x boost in performance without the need for external generators. see a 2-3x boost in performance without the need for external generators.
This function works almost identically to `json.Unmarshal` except that it This function works almost identically to `json.Unmarshal` except that
expects the json to be well-formed prior to being called. Bad json `gjson.Unmarshal` will automatically attempt to convert JSON values to any
will not panic or cause a decoding error. Go type. For example, the JSON string "100" or the JSON number 100 can be
equally assigned to Go string, int, byte, uint64, etc. This rule applies to
Another difference is that `gjson.Unmarshal` will automatically attempt to all types.
convert JSON values to any Go type. For example, the JSON string "100" or
the JSON number 100 can be equally assigned to Go string, int, byte, uint64,
etc. This rule applies to all types.
```go ```go

276
gjson.go
View File

@ -4,6 +4,7 @@ package gjson
import ( import (
"encoding/base64" "encoding/base64"
"encoding/json" "encoding/json"
"errors"
"reflect" "reflect"
"strconv" "strconv"
"strings" "strings"
@ -2060,18 +2061,275 @@ func assign(jsval Result, goval reflect.Value) {
// Unmarshal loads the JSON data into the value pointed to by v. // Unmarshal loads the JSON data into the value pointed to by v.
// //
// This function works almost identically to json.Unmarshal except that it // This function works almost identically to json.Unmarshal except that
// expects that the json is well-formed prior to being called. Invalid json // gjson.Unmarshal will automatically attempt to convert JSON values to any Go
// will not panic, but it may return back unexpected results. Therefore the // type. For example, the JSON string "100" or the JSON number 100 can be equally
// return value of this function will always be nil. // assigned to Go string, int, byte, uint64, etc. This rule applies to all types.
//
// Another difference is that gjson.Unmarshal will automatically attempt to
// convert JSON values to any Go type. For example, the JSON string "100" or
// the JSON number 100 can be equally assigned to Go string, int, byte, uint64,
// etc. This rule applies to all types.
func Unmarshal(data []byte, v interface{}) error { func Unmarshal(data []byte, v interface{}) error {
_, ok := validpayload(data, 0)
if !ok {
return errors.New("invalid json")
}
if v := reflect.ValueOf(v); v.Kind() == reflect.Ptr { if v := reflect.ValueOf(v); v.Kind() == reflect.Ptr {
assign(ParseBytes(data), v) assign(ParseBytes(data), v)
} }
return nil return nil
} }
func validpayload(data []byte, i int) (outi int, ok bool) {
for ; i < len(data); i++ {
switch data[i] {
default:
i, ok = validany(data, i)
if !ok {
return i, false
}
for ; i < len(data); i++ {
switch data[i] {
default:
return i, false
case ' ', '\t', '\n', '\r':
continue
}
}
return i, true
case ' ', '\t', '\n', '\r':
continue
}
}
return i, false
}
func validany(data []byte, i int) (outi int, ok bool) {
for ; i < len(data); i++ {
switch data[i] {
default:
return i, false
case ' ', '\t', '\n', '\r':
continue
case '{':
return validobject(data, i+1)
case '[':
return validarray(data, i+1)
case '"':
return validstring(data, i+1)
case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
return validnumber(data, i+1)
case 't':
return validtrue(data, i+1)
case 'f':
return validfalse(data, i+1)
case 'n':
return validnull(data, i+1)
}
}
return i, false
}
func validobject(data []byte, i int) (outi int, ok bool) {
for ; i < len(data); i++ {
switch data[i] {
default:
return i, false
case ' ', '\t', '\n', '\r':
continue
case '}':
return i + 1, true
case '"':
key:
if i, ok = validstring(data, i+1); !ok {
return i, false
}
if i, ok = validcolon(data, i); !ok {
return i, false
}
if i, ok = validany(data, i); !ok {
return i, false
}
if i, ok = validcomma(data, i, '}'); !ok {
return i, false
}
if data[i] == '}' {
return i + 1, true
}
for ; i < len(data); i++ {
if data[i] == '"' {
goto key
}
}
return i, false
}
}
return i, false
}
func validcolon(data []byte, i int) (outi int, ok bool) {
for ; i < len(data); i++ {
switch data[i] {
default:
return i, false
case ' ', '\t', '\n', '\r':
continue
case ':':
return i + 1, true
}
}
return i, false
}
func validcomma(data []byte, i int, end byte) (outi int, ok bool) {
for ; i < len(data); i++ {
switch data[i] {
default:
return i, false
case ' ', '\t', '\n', '\r':
continue
case ',':
return i, true
case end:
return i, true
}
}
return i, false
}
func validarray(data []byte, i int) (outi int, ok bool) {
for ; i < len(data); i++ {
switch data[i] {
default:
for ; i < len(data); i++ {
if i, ok = validany(data, i); !ok {
return i, false
}
if i, ok = validcomma(data, i, ']'); !ok {
return i, false
}
if data[i] == ']' {
return i + 1, true
}
}
case ' ', '\t', '\n', '\r':
continue
case ']':
return i + 1, true
}
}
return i, false
}
func validstring(data []byte, i int) (outi int, ok bool) {
for ; i < len(data); i++ {
if data[i] < ' ' {
return i, false
} else if data[i] == '\\' {
i++
switch data[i] {
default:
return i, false
case '"', '\\', '/', 'b', 'f', 'n', 'r', 't':
case 'u':
for j := 0; j < 4; j++ {
i++
if i >= len(data) {
return i, false
}
if !((data[i] >= '0' && data[i] <= '9') ||
(data[i] >= 'a' && data[i] <= 'f') ||
(data[i] >= 'A' && data[i] <= 'F')) {
return i, false
}
}
}
} else if data[i] == '"' {
return i + 1, true
}
}
return i, false
}
func validnumber(data []byte, i int) (outi int, ok bool) {
i--
// sign
if data[i] == '-' {
i++
}
// int
if i == len(data) {
return i, false
}
if data[i] == '0' {
i++
} else {
for ; i < len(data); i++ {
if data[i] >= '0' && data[i] <= '9' {
continue
}
break
}
}
// frac
if i == len(data) {
return i, true
}
if data[i] == '.' {
i++
if i == len(data) {
return i, false
}
if data[i] < '0' || data[i] > '9' {
return i, false
}
i++
for ; i < len(data); i++ {
if data[i] >= '0' && data[i] <= '9' {
continue
}
break
}
}
// exp
if i == len(data) {
return i, true
}
if data[i] == 'e' || data[i] == 'E' {
i++
if i == len(data) {
return i, false
}
if data[i] == '+' || data[i] == '-' {
i++
}
if i == len(data) {
return i, false
}
if data[i] < '0' || data[i] > '9' {
return i, false
}
i++
for ; i < len(data); i++ {
if data[i] >= '0' && data[i] <= '9' {
continue
}
break
}
}
return i, true
}
func validtrue(data []byte, i int) (outi int, ok bool) {
if i+3 <= len(data) && data[i] == 'r' && data[i+1] == 'u' && data[i+2] == 'e' {
return i + 3, true
}
return i, false
}
func validfalse(data []byte, i int) (outi int, ok bool) {
if i+4 <= len(data) && data[i] == 'a' && data[i+1] == 'l' && data[i+2] == 's' && data[i+3] == 'e' {
return i + 4, true
}
return i, false
}
func validnull(data []byte, i int) (outi int, ok bool) {
if i+3 <= len(data) && data[i] == 'u' && data[i+1] == 'l' && data[i+2] == 'l' {
return i + 3, true
}
return i, false
}
// Valid returns true if the input is valid json.
func Valid(json string) bool {
_, ok := validpayload([]byte(json), 0)
return ok
}

View File

@ -901,6 +901,109 @@ func TestUnmarshal(t *testing.T) {
assert(t, str == Get(complicatedJSON, "LeftOut").String()) assert(t, str == Get(complicatedJSON, "LeftOut").String())
} }
func testvalid(json string, expect bool) {
_, ok := validpayload([]byte(json), 0)
if ok != expect {
panic("mismatch")
}
}
func TestValidBasic(t *testing.T) {
testvalid("0", true)
testvalid("00", false)
testvalid("-00", false)
testvalid("-.", false)
testvalid("0.0", true)
testvalid("10.0", true)
testvalid("10e1", true)
testvalid("10EE", false)
testvalid("10E-", false)
testvalid("10E+", false)
testvalid("10E123", true)
testvalid("10E-123", true)
testvalid("10E-0123", true)
testvalid("", false)
testvalid(" ", false)
testvalid("{}", true)
testvalid("{", false)
testvalid("-", false)
testvalid("-1", true)
testvalid("-1.", false)
testvalid("-1.0", true)
testvalid(" -1.0", true)
testvalid(" -1.0 ", true)
testvalid("-1.0 ", true)
testvalid("-1.0 i", false)
testvalid("-1.0 i", false)
testvalid("true", true)
testvalid(" true", true)
testvalid(" true ", true)
testvalid(" True ", false)
testvalid(" tru", false)
testvalid("false", true)
testvalid(" false", true)
testvalid(" false ", true)
testvalid(" False ", false)
testvalid(" fals", false)
testvalid("null", true)
testvalid(" null", true)
testvalid(" null ", true)
testvalid(" Null ", false)
testvalid(" nul", false)
testvalid(" []", true)
testvalid(" [true]", true)
testvalid(" [ true, null ]", true)
testvalid(" [ true,]", false)
testvalid(`{"hello":"world"}`, true)
testvalid(`{ "hello": "world" }`, true)
testvalid(`{ "hello": "world", }`, false)
testvalid(`{"a":"b",}`, false)
testvalid(`{"a":"b","a"}`, false)
testvalid(`{"a":"b","a":}`, false)
testvalid(`{"a":"b","a":1}`, true)
testvalid(`{"a":"b","a": 1, "c":{"hi":"there"} }`, true)
testvalid(`{"a":"b","a": 1, "c":{"hi":"there", "easy":["going",{"mixed":"bag"}]} }`, true)
testvalid(`""`, true)
testvalid(`"`, false)
testvalid(`"\n"`, true)
testvalid(`"\"`, false)
testvalid(`"\\"`, true)
testvalid(`"a\\b"`, true)
testvalid(`"a\\b\\\"a"`, true)
testvalid(`"a\\b\\\uFFAAa"`, true)
testvalid(`"a\\b\\\uFFAZa"`, false)
testvalid(`"a\\b\\\uFFA"`, false)
testvalid(string(complicatedJSON), true)
testvalid(string(exampleJSON), true)
}
var jsonchars = []string{"{", "[", ",", ":", "}", "]", "1", "0", "true", "false", "null", `""`, `"\""`, `"a"`}
func makeRandomJSONChars(b []byte) {
var bb []byte
for len(bb) < len(b) {
bb = append(bb, jsonchars[rand.Int()%len(jsonchars)]...)
}
copy(b, bb[:len(b)])
}
func TestValidRandom(t *testing.T) {
rand.Seed(time.Now().UnixNano())
b := make([]byte, 100000)
start := time.Now()
for time.Since(start) < time.Second*3 {
n := rand.Int() % len(b)
rand.Read(b[:n])
validpayload(b[:n], 0)
}
start = time.Now()
for time.Since(start) < time.Second*3 {
n := rand.Int() % len(b)
makeRandomJSONChars(b[:n])
validpayload(b[:n], 0)
}
}
type BenchStruct struct { type BenchStruct struct {
Widget struct { Widget struct {
Window struct { Window struct {