Updated logic for better ssa compilation in Go 1.7

This commit is contained in:
Josh Baker 2016-08-25 07:21:53 -07:00
parent d37f454044
commit 19f9404e51
3 changed files with 471 additions and 513 deletions

View File

@ -172,7 +172,7 @@ gjson.Get(json, "name.last")
## Check for the existence of a value ## Check for the existence of a value
Sometimes you may want to see if the value actually existed in the json document. Sometimes you just want to know you if a value exists.
```go ```go
value := gjson.Get(json, "name.last") value := gjson.Get(json, "name.last")
@ -197,13 +197,13 @@ Benchmarks of GJSON alongside [encoding/json](https://golang.org/pkg/encoding/js
and [jsonparser](https://github.com/buger/jsonparser) and [jsonparser](https://github.com/buger/jsonparser)
``` ```
BenchmarkGJSONGet-8 3000000 373 ns/op 0 B/op 0 allocs/op BenchmarkGJSONGet-8 3000000 368 ns/op 0 B/op 0 allocs/op
BenchmarkJSONUnmarshalMap-8 600000 8884 ns/op 3048 B/op 69 allocs/op BenchmarkJSONUnmarshalMap-8 600000 9181 ns/op 3048 B/op 69 allocs/op
BenchmarkJSONUnmarshalStruct-8 600000 9045 ns/op 1832 B/op 69 allocs/op BenchmarkJSONUnmarshalStruct-8 600000 9256 ns/op 1832 B/op 69 allocs/op
BenchmarkJSONDecoder-8 300000 14134 ns/op 4224 B/op 184 allocs/op BenchmarkJSONDecoder-8 300000 14365 ns/op 4224 B/op 184 allocs/op
BenchmarkFFJSONLexer-8 1500000 3182 ns/op 896 B/op 8 allocs/op BenchmarkFFJSONLexer-8 1500000 3569 ns/op 896 B/op 8 allocs/op
BenchmarkEasyJSONLexer-8 3000000 932 ns/op 613 B/op 6 allocs/op BenchmarkEasyJSONLexer-8 3000000 973 ns/op 613 B/op 6 allocs/op
BenchmarkJSONParserGet-8 3000000 444 ns/op 21 B/op 0 allocs/op BenchmarkJSONParserGet-8 3000000 531 ns/op 21 B/op 0 allocs/op
``` ```
JSON document used: JSON document used:

793
gjson.go
View File

@ -430,98 +430,113 @@ func (t Result) Value() interface{} {
} }
type part struct { func parseString(json string, i int, raw bool) (int, string, bool, bool) {
wild bool var s = i
key string for ; i < len(json); i++ {
if json[i] > '\\' {
continue
}
if json[i] == '"' {
if raw {
return i + 1, json[s-1 : i+1], false, true
} else {
return i + 1, json[s:i], false, true
}
}
if json[i] == '\\' {
i++
for ; i < len(json); i++ {
if json[i] > '\\' {
continue
}
if json[i] == '"' {
// look for an escaped slash
if json[i-1] == '\\' {
n := 0
for j := i - 2; j > 0; j-- {
if json[j] != '\\' {
break
}
n++
}
if n%2 == 0 {
continue
}
}
if raw {
return i + 1, json[s-1 : i+1], true, true
} else {
return i + 1, json[s:i], true, true
}
}
}
break
}
}
if raw {
return i, json[s-1:], false, false
} else {
return i, json[s:], false, false
}
} }
type frame struct { func parseNumber(json string, i int) (int, string) {
key string var s = i
count int i++
stype byte for ; i < len(json); i++ {
if json[i] <= ' ' || json[i] == ',' || json[i] == ']' || json[i] == '}' {
return i, json[s:i]
}
}
return i, json[s:]
} }
// Get searches json for the specified path. func parseLiteral(json string, i int) (int, string) {
// A path is in dot syntax, such as "name.last" or "age". var s = i
// This function expects that the json is well-formed, and does not validate. i++
// Invalid json will not panic, but it may return back unexpected results. for ; i < len(json); i++ {
// When the value is found it's returned immediately. if json[i] < 'a' || json[i] > 'z' {
// return i, json[s:i]
// A path is a series of keys seperated by a dot. }
// A key may contain special wildcard characters '*' and '?'. }
// To access an array value use the index as the key. return i, json[s:]
// To get the number of elements in an array or to access a child path, use the '#' character. }
// The dot and wildcard character can be escaped with '\'.
//
// {
// "name": {"first": "Tom", "last": "Anderson"},
// "age":37,
// "children": ["Sara","Alex","Jack"],
// "friends": [
// {"first": "James", "last": "Murphy"},
// {"first": "Roger", "last": "Craig"}
// ]
// }
// "name.last" >> "Anderson"
// "age" >> 37
// "children.#" >> 3
// "children.1" >> "Alex"
// "child*.2" >> "Jack"
// "c?ildren.0" >> "Sara"
// "friends.#.first" >> [ "James", "Roger" ]
//
func Get(json string, path string) Result {
var s int // starting index variable
var wild bool // wildcard indicator
var parts = make([]part, 0, 4) // parsed path parts
var i int // index of current json character
var depth int // the current stack depth
var f frame // the current frame
var matched bool // flag used for key/part matching
var stack = make([]frame, 1, 4) // the frame stack
var value Result // the final value, also used for temp store
var vc byte // the current token value chacter type
var arrch bool
var alogok bool
var alogkey string
var alog []int
var uc bool
// parse the path into multiple parts. func parseArrayPath(path string) (
part string, npath string, more bool, alogok bool, arrch bool, alogkey string,
) {
for i := 0; i < len(path); i++ {
if path[i] == '.' {
return path[:i], path[i+1:], true, alogok, arrch, alogkey
}
if path[i] == '#' {
arrch = true
if i == 0 && len(path) > 1 && path[1] == '.' {
alogok = true
alogkey = path[2:]
path = path[:1]
}
continue
}
}
return path, "", false, alogok, arrch, alogkey
}
func parseObjectPath(path string) (
part string, npath string, wild bool, uc bool, more bool,
) {
for i := 0; i < len(path); i++ { for i := 0; i < len(path); i++ {
if path[i]&0x60 == 0x60 { if path[i]&0x60 == 0x60 {
// alpha lowercase // alpha lowercase
continue continue
} }
if path[i] >= 'A' && path[i] <= 'Z' {
continue
}
if path[i] == '.' { if path[i] == '.' {
// append a new part return path[:i], path[i+1:], wild, uc, true
parts = append(parts, part{wild: wild, key: path[s:i]})
if wild {
wild = false // reset the wild flag
}
// set the starting index to one past the dot.
s = i + 1
continue
}
if (path[i] >= '0' && path[i] <= '9') || path[i] == '_' {
continue
} }
if path[i] == '*' || path[i] == '?' { if path[i] == '*' || path[i] == '?' {
wild = true wild = true
continue continue
} }
if path[i] == '#' {
arrch = true
if s == i && i+1 < len(path) && path[i+1] == '.' {
alogok = true
alogkey = path[i+2:]
path = path[:i+1]
}
continue
}
if path[i] > 0x7f { if path[i] > 0x7f {
uc = true uc = true
continue continue
@ -529,7 +544,7 @@ func Get(json string, path string) Result {
if path[i] == '\\' { if path[i] == '\\' {
// go into escape mode. this is a slower path that // go into escape mode. this is a slower path that
// strips off the escape character from the part. // strips off the escape character from the part.
epart := []byte(path[s:i]) epart := []byte(path[:i])
i++ i++
if i < len(path) { if i < len(path) {
epart = append(epart, path[i]) epart = append(epart, path[i])
@ -546,255 +561,27 @@ func Get(json string, path string) Result {
} }
continue continue
} else if path[i] == '.' { } else if path[i] == '.' {
parts = append(parts, part{ return string(epart), path[i+1:], wild, uc, true
wild: wild, key: string(epart),
})
if wild {
wild = false
}
s = i + 1
i++
goto next_part
} else if path[i] == '*' || path[i] == '?' { } else if path[i] == '*' || path[i] == '?' {
wild = true wild = true
} else if path[i] == '#' {
arrch = true
if s == i && i+1 < len(path) && path[i+1] == '.' {
alogok = true
alogkey = path[i+2:]
path = path[:i+1]
}
} }
epart = append(epart, path[i]) epart = append(epart, path[i])
} }
} }
// append the last part // append the last part
parts = append(parts, part{wild: wild, key: string(epart)}) return string(epart), "", wild, uc, false
goto end_parts
next_part:
continue
} }
} }
// append the last part return path, "", wild, uc, false
parts = append(parts, part{wild: wild, key: path[s:]})
end_parts:
i = 0
// look for first delimiter. only allow arrays and objects, other
// json types will fail. it's ok for control characters to passthrough.
for ; i < len(json); i++ {
if json[i] == '{' {
f.stype = '{'
i++
stack[0].stype = f.stype
break
} else if json[i] == '[' {
f.stype = '['
stack[0].stype = f.stype
i++
break
} else if json[i] <= ' ' {
continue
} else {
return Result{}
}
} }
// assume that the depth is at least one func squashObjectOrArray(json string, i int) (int, string) {
depth = 1 // expects that the lead character is a '[' or '{'
// read the next key from the json string
read_key:
if f.stype == '[' {
// for arrays we use the index of the value as the key.
// so "0" is the key for the first value, and "10" is the
// key for the 10th value.
f.key = strconv.FormatInt(int64(f.count), 10)
f.count++
if alogok && depth == len(parts) {
alog = append(alog, i)
}
} else {
// for objects we must parse the next string. this string will
// become the key that is compared against the path parts.
for ; i < len(json); i++ {
// begin key string reading routine.
if json[i] == '"' {
i++
// set the starting index. the first double-quote has already
// been read.
s = i
// loop through each character in the string looking for the
// the double-quote termination character. it's possible that
// the string contains an escape slash character. if so, we
// must do a nested loop that will look for an isolated
// double-quote terminator.
for ; i < len(json); i++ {
if json[i] > '\\' {
continue
}
if json[i] == '"' {
// a simple string that contains no escape characters.
// assign this to the current frame key and we are
// done parsing the key.
f.key = json[s:i]
i++
break
}
if json[i] == '\\' {
// escape character detected. we now look for the
// the double-quote terminator.
i++
for ; i < len(json); i++ {
if json[i] == '"' {
// possibly the end of the string, but let's
// look to see if the previous character was
// an escape slash. if so then we must keep
// reading backwards to see if the slash has a
// prefixed slashed, and so forth.
if json[i-1] == '\\' {
n := 0
for j := i - 2; j > s-1; j-- {
if json[j] != '\\' {
break
}
n++
}
if n%2 == 0 {
// the double-quote is not a terminator.
// keep reading the string.
continue
}
}
// we found the correct double-quote terminator.
// stop reading the string.
break
}
}
// the string contains escape sequences so we must
// unescape and then assign to the current frame key.
// done parsing the key
f.key = unescape(json[s:i])
i++
break
}
}
break
}
// end of string key reading routine
}
}
// we have a brand new (possibly shiny) key.
// is it the key that we are looking for?
if parts[depth-1].wild {
// the path part contains a wildcard character. we must do a wildcard
// match to determine if it truly matches.
matched = wildcardMatch(f.key, parts[depth-1].key, uc)
} else {
// just a straight up equality check
matched = parts[depth-1].key == f.key
}
// read the value
for ; i < len(json); i++ {
// any thing less than a double-quote is likely whitespace.
// just burn past these.
if json[i] < '"' {
continue
}
// anything less that a dash is likely a double-quote. let's
// assume that it is.
if json[i] < '-' {
i++
vc = '"'
// defer reading the string value until we know for sure
// that we want it. if we don't want it, then we will
// parse it using a quicker method than if we do want it.
goto proc_val
}
// any character less than an open bracket is likely a number.
if json[i] < '[' {
// with one exception, the colon character. we do not care
// about the colon character. just burn past it.
if json[i] == ':' {
continue
}
vc = '0'
s = i
i++
// look for any character that might terminate a number
// break on whitespace, comma, ']', and '}'.
for ; i < len(json); i++ {
// less than dash might have valid characters
if json[i] <= '-' {
if json[i] <= ' ' || json[i] == ',' {
// break on whitespace and comma
break
}
// could be a '+' or '-'. let's assume so.
continue
}
if json[i] < ']' {
// probably a valid number
continue
}
if json[i] == 'e' || json[i] == 'E' {
// allow for exponential numbers
continue
}
// likely a ']' or '}'
break
}
// we have raw number. jump to the process value routine.
goto proc_val
}
// any character less than ']' is likely '['. let's assume
// it's an open-array character.
if json[i] < ']' {
i++
vc = '['
// jump to process delimiter routine.
goto proc_nested
}
// any character less than 'u' likely means tha the value is
// 'true', 'false', or 'null'.
if json[i] < 'u' {
vc = json[i] // assign the vc token character to the actual.
s = i
i++
for ; i < len(json); i++ {
// let's pick up any non-alpha lowercase character as the
// terminator. it doesn't matter.
if json[i] < 'a' || json[i] > 'z' {
break
}
}
// we have raw literal. jump to the process value routine.
goto proc_val
}
// if we reached this far, then the value must be a nested object.
i++
vc = '{'
// jump to process delimiter routine.
goto proc_nested
}
vc = 0
// ran out of json buffer
if i >= len(json) {
return Result{}
}
// process nested array or object
proc_nested:
if (matched && depth == len(parts)) || !matched {
// begin squash
// squash the value, ignoring all nested arrays and objects. // squash the value, ignoring all nested arrays and objects.
s = i - 1
// the first '[' or '{' has already been read // the first '[' or '{' has already been read
s := i
i++
depth := 1 depth := 1
squash:
for ; i < len(json); i++ { for ; i < len(json); i++ {
if json[i] >= '"' && json[i] <= '}' { if json[i] >= '"' && json[i] <= '}' {
switch json[i] { switch json[i] {
@ -828,122 +615,238 @@ proc_nested:
depth-- depth--
if depth == 0 { if depth == 0 {
i++ i++
break squash return i, json[s:i]
} }
} }
} }
} }
// end squash return i, json[s:]
// the 'i' and 's' values should fall-though to the proc_val function
} }
// process the value func parseObject(json string, i int, path string, value *Result) (int, bool) {
proc_val: var match, kesc, vesc, ok, hit bool
if matched { var key, val string
// hit, that's good! part, npath, wild, uc, more := parseObjectPath(path)
if depth == len(parts) { for i < len(json) {
switch vc {
case '{', '[':
value.Type = JSON
value.Raw = json[s:i]
case 'n':
value.Type = Null
value.Raw = json[s:i]
case 't':
value.Type = True
value.Raw = json[s:i]
case 'f':
value.Type = False
value.Raw = json[s:i]
case '"':
value.Type = String
// readstr
// the val has not been read yet
// the first double-quote has already been read
s = i
for ; i < len(json); i++ { for ; i < len(json); i++ {
if json[i] > '\\' {
continue
}
if json[i] == '"' { if json[i] == '"' {
value.Raw = json[s-1 : i+1] i, key, kesc, ok = parseString(json, i+1, false)
value.Str = json[s:i]
break break
} }
if json[i] == '\\' { if json[i] == '}' {
i++ return i + 1, false
for ; i < len(json); i++ {
if json[i] > '\\' {
continue
}
if json[i] == '"' {
// look for an escaped slash
if json[i-1] == '\\' {
n := 0
for j := i - 2; j > s-1; j-- {
if json[j] != '\\' {
break
}
n++
}
if n%2 == 0 {
continue
} }
} }
break if !ok {
return i, false
} }
} if wild {
value.Raw = json[s-1 : i+1] if kesc {
value.Str = unescape(json[s:i]) match = wildcardMatch(unescape(key), part, uc)
break
}
}
// end readstr
case '0':
value.Type = Number
value.Raw = json[s:i]
value.Num, _ = strconv.ParseFloat(value.Raw, 64)
}
return value
} else { } else {
f = frame{stype: vc} match = wildcardMatch(key, part, uc)
stack = append(stack, f) }
depth++ } else {
goto read_key if kesc {
match = part == unescape(key)
} else {
match = part == key
} }
} }
if vc == '"' { hit = match && !more
// readstr
// the val has not been read yet. we can read and throw away.
// the first double-quote has already been read
s = i
for ; i < len(json); i++ {
if json[i] == '"' {
// look for an escaped slash
if json[i-1] == '\\' {
n := 0
for j := i - 2; j > s-1; j-- {
if json[j] != '\\' {
break
}
n++
}
if n%2 == 0 {
continue
}
}
break
}
}
i++
// end readstr
}
// read to the comma or end of object
for ; i < len(json); i++ { for ; i < len(json); i++ {
switch json[i] { switch json[i] {
case '}', ']': default:
if arrch && parts[depth-1].key == "#" { continue
case '"':
i++
i, val, vesc, ok = parseString(json, i, true)
if !ok {
return i, false
}
if hit {
if vesc {
value.Str = unescape(val[1 : len(val)-1])
} else {
value.Str = val[1 : len(val)-1]
}
value.Raw = val
value.Type = String
return i, true
}
case '{':
if match && !hit {
i, hit = parseObject(json, i+1, npath, value)
if hit {
return i, true
}
} else {
i, val = squashObjectOrArray(json, i)
if hit {
value.Raw = val
value.Type = JSON
return i, true
}
}
case '[':
if match && !hit {
i, hit = parseArray(json, i+1, npath, value)
if hit {
return i, true
}
} else {
i, val = squashObjectOrArray(json, i)
if hit {
value.Raw = val
value.Type = JSON
return i, true
}
}
case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
i, val = parseNumber(json, i)
if hit {
value.Raw = val
value.Type = Number
value.Num, _ = strconv.ParseFloat(val, 64)
return i, true
}
case 't', 'f', 'n':
vc := json[i]
i, val = parseLiteral(json, i)
if hit {
value.Raw = val
switch vc {
case 't':
value.Type = True
case 'f':
value.Type = False
}
return i, true
}
}
break
}
}
return i, false
}
func parseArray(json string, i int, path string, value *Result) (int, bool) {
var match, vesc, ok, hit bool
var val string
var h int
var alog []int
var partidx int
part, npath, more, alogok, arrch, alogkey := parseArrayPath(path)
if !arrch {
n, err := strconv.ParseUint(part, 10, 64)
if err != nil {
partidx = -1
} else {
partidx = int(n)
}
}
for i < len(json) {
if !arrch {
match = partidx == h
hit = match && !more
}
h++
if alogok {
alog = append(alog, i)
}
for ; i < len(json); i++ {
switch json[i] {
default:
continue
case '"':
i++
i, val, vesc, ok = parseString(json, i, true)
if !ok {
return i, false
}
if hit {
if alogok {
break
}
if vesc {
value.Str = unescape(val[1 : len(val)-1])
} else {
value.Str = val[1 : len(val)-1]
}
value.Raw = val
value.Type = String
return i, true
}
case '{':
if match && !hit {
i, hit = parseObject(json, i+1, npath, value)
if hit {
if alogok {
break
}
return i, true
}
} else {
i, val = squashObjectOrArray(json, i)
if hit {
if alogok {
break
}
value.Raw = val
value.Type = JSON
return i, true
}
}
case '[':
if match && !hit {
i, hit = parseArray(json, i+1, npath, value)
if hit {
if alogok {
break
}
return i, true
}
} else {
i, val = squashObjectOrArray(json, i)
if hit {
if alogok {
break
}
value.Raw = val
value.Type = JSON
return i, true
}
}
case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
i, val = parseNumber(json, i)
if hit {
if alogok {
break
}
value.Raw = val
value.Type = Number
value.Num, _ = strconv.ParseFloat(val, 64)
return i, true
}
case 't', 'f', 'n':
vc := json[i]
i, val = parseLiteral(json, i)
if hit {
if alogok {
break
}
value.Raw = val
switch vc {
case 't':
value.Type = True
case 'f':
value.Type = False
}
return i, true
}
case ']':
// TODO... '#' counter?
if arrch && part == "#" {
if alogok { if alogok {
var jsons = make([]byte, 0, 64) var jsons = make([]byte, 0, 64)
jsons = append(jsons, '[') jsons = append(jsons, '[')
@ -957,24 +860,72 @@ proc_val:
} }
} }
jsons = append(jsons, ']') jsons = append(jsons, ']')
return Result{Type: JSON, Raw: string(jsons)} value.Type = JSON
value.Raw = string(jsons)
return i + 1, true
} else { } else {
return Result{Type: Number, Num: float64(f.count)} if alogok {
break
}
value.Raw = val
value.Type = Number
value.Num = float64(h - 1)
return i + 1, true
} }
} }
// step the stack back return i + 1, false
depth--
if depth == 0 {
return Result{}
} }
stack = stack[:len(stack)-1] break
f = stack[len(stack)-1] }
case ',': }
return i, false
}
// Get searches json for the specified path.
// A path is in dot syntax, such as "name.last" or "age".
// This function expects that the json is well-formed, and does not validate.
// Invalid json will not panic, but it may return back unexpected results.
// When the value is found it's returned immediately.
//
// A path is a series of keys seperated by a dot.
// A key may contain special wildcard characters '*' and '?'.
// To access an array value use the index as the key.
// To get the number of elements in an array or to access a child path, use the '#' character.
// The dot and wildcard character can be escaped with '\'.
//
// {
// "name": {"first": "Tom", "last": "Anderson"},
// "age":37,
// "children": ["Sara","Alex","Jack"],
// "friends": [
// {"first": "James", "last": "Murphy"},
// {"first": "Roger", "last": "Craig"}
// ]
// }
// "name.last" >> "Anderson"
// "age" >> 37
// "children.#" >> 3
// "children.1" >> "Alex"
// "child*.2" >> "Jack"
// "c?ildren.0" >> "Sara"
// "friends.#.first" >> [ "James", "Roger" ]
//
func Get(json, path string) Result {
var i int
var value Result
for ; i < len(json); i++ {
if json[i] == '{' {
i++ i++
goto read_key parseObject(json, i, path, &value)
break
}
if json[i] == '[' {
i++
parseArray(json, i, path, &value)
break
} }
} }
return Result{} return value
} }
// unescape unescapes a string // unescape unescapes a string

View File

@ -126,6 +126,9 @@ var basicJSON = `{"age":100, "name":{"here":"B\\\"R"},
}` }`
func TestBasic(t *testing.T) { func TestBasic(t *testing.T) {
//fmt.Printf("%v\n", Parse(basicJSON).Get("items.3.tags.#").String())
//return
var mtok Result var mtok Result
mtok = Get(basicJSON, "loggy") mtok = Get(basicJSON, "loggy")
if mtok.Type != JSON { if mtok.Type != JSON {
@ -733,6 +736,10 @@ var exampleJSON = `{
} }
}` }`
func TestNewParse(t *testing.T) {
//fmt.Printf("%v\n", parse2(exampleJSON, "widget").String())
}
func TestUnmarshalMap(t *testing.T) { func TestUnmarshalMap(t *testing.T) {
var m1 = Parse(exampleJSON).Value().(map[string]interface{}) var m1 = Parse(exampleJSON).Value().(map[string]interface{})
var m2 map[string]interface{} var m2 map[string]interface{}