Added GetMany. Query many paths at once.

The `GetMany(json, paths...)` function can be used to get multiple
values at one time from the same json string.
This is preferrable to calling `Get(json, path)` over and over.
It's also optimized to scan over a JSON payload once.

This addresses a feature request by @FZambia, and closes #13.
This commit is contained in:
Josh Baker 2016-11-28 15:10:11 -07:00
parent 72b0cad1c1
commit ae5d307631
3 changed files with 691 additions and 36 deletions

View File

@ -132,7 +132,7 @@ The `result.Value()` function returns an `interface{}` which requires type asser
The `result.Array()` funtion returns back an array of values.
The `result.Array()` function returns back an array of values.
If the result represents a non-existent value, then an empty array will be returned.
If the result is not a JSON array, the return value will be an array containing one result.
@ -213,6 +213,19 @@ if gjson.Get(json, "name.last").Exists(){
}
```
## Getting many paths at once
The `GetMany(json, paths...)` function can be used to get multiple values at one time from the same json string.
This is preferrable to calling `Get(json, path)` over and over.
It's also optimized to scan over a JSON payload once.
```go
results := gjson.GetMany(json, "name.first", "name.last", "age")
```
The return value is a `[]Result` and it will always contain exactly the same number of items as the input paths.
## Unmarshal to a map
To unmarshal to a `map[string]interface{}`:
@ -248,6 +261,9 @@ if result.Index > 0 {
This is a best-effort no allocation sub slice of the original json. This method utilizes the `result.Index` field, which is the position of the raw data in the original json. It's possible that the value of `result.Index` equals zero, in which case the `result.Raw` is converted to a `[]byte`.
## Performance
Benchmarks of GJSON alongside [encoding/json](https://golang.org/pkg/encoding/json/),
@ -266,6 +282,17 @@ BenchmarkEasyJSONLexer-8 3000000 938 ns/op 613 B/op
BenchmarkJSONParserGet-8 3000000 442 ns/op 21 B/op 0 allocs/op
```
Benchmarks for the `GetMany` function:
```
BenchmarkGJSONGetMany4Paths-8 4000000 319 ns/op 112 B/op 0 allocs/op
BenchmarkGJSONGetMany8Paths-8 8000000 218 ns/op 56 B/op 0 allocs/op
BenchmarkGJSONGetMany16Paths-8 16000000 160 ns/op 56 B/op 0 allocs/op
BenchmarkGJSONGetMany32Paths-8 32000000 130 ns/op 64 B/op 0 allocs/op
BenchmarkGJSONGetMany64Paths-8 64000000 117 ns/op 64 B/op 0 allocs/op
BenchmarkGJSONGetMany128Paths-8 128000000 109 ns/op 64 B/op 0 allocs/op
```
JSON document used:
```json
@ -304,6 +331,20 @@ widget.image.hOffset
widget.text.onMouseUp
```
For the `GetMany` benchmarks these paths are used:
```
widget.window.name
widget.image.hOffset
widget.text.onMouseUp
widget.window.title
widget.image.alignment
widget.text.style
widget.window.height
widget.image.src
widget.text.data
widget.text.size
```
*These benchmarks were run on a MacBook Pro 15" 2.8 GHz Intel Core i7 using Go 1.7.*

484
gjson.go
View File

@ -1141,7 +1141,7 @@ type parseContext struct {
// Invalid json will not panic, but it may return back unexpected results.
// When the value is found it's returned immediately.
//
// A path is a series of keys seperated by a dot.
// A path is a series of keys searated by a dot.
// A key may contain special wildcard characters '*' and '?'.
// To access an array value use the index as the key.
// To get the number of elements in an array or to access a child path, use the '#' character.
@ -1190,14 +1190,7 @@ func Get(json, path string) Result {
}
return c.value
}
// GetBytes searches json for the specified path.
// If working with bytes, this method preferred over Get(string(data), path)
func GetBytes(json []byte, path string) Result {
var result Result
if json != nil {
// unsafe cast to string
result = Get(*(*string)(unsafe.Pointer(&json)), path)
func fromBytesGet(result Result) Result {
// safely get the string headers
rawhi := *(*reflect.StringHeader)(unsafe.Pointer(&result.Raw))
strhi := *(*reflect.StringHeader)(unsafe.Pointer(&result.Str))
@ -1232,6 +1225,17 @@ func GetBytes(json []byte, path string) Result {
result.Raw = string(*(*[]byte)(unsafe.Pointer(&rawh)))
result.Str = string(*(*[]byte)(unsafe.Pointer(&strh)))
}
return result
}
// GetBytes searches json for the specified path.
// If working with bytes, this method preferred over Get(string(data), path)
func GetBytes(json []byte, path string) Result {
var result Result
if json != nil {
// unsafe cast to string
result = Get(*(*string)(unsafe.Pointer(&json)), path)
result = fromBytesGet(result)
}
return result
}
@ -1357,3 +1361,465 @@ func stringLessInsensitive(a, b string) bool {
}
return len(a) < len(b)
}
// parseAny parses the next value from a json string.
// A Result is returned when the hit param is set.
// The return values are (i int, res Result, ok bool)
func parseAny(json string, i int, hit bool) (int, Result, bool) {
var res Result
var val string
for ; i < len(json); i++ {
if json[i] == '{' || json[i] == '[' {
i, val = parseSquash(json, i)
if hit {
res.Raw = val
res.Type = JSON
}
return i, res, true
}
if json[i] <= ' ' {
continue
}
switch json[i] {
case '"':
i++
var vesc bool
var ok bool
i, val, vesc, ok = parseString(json, i)
if !ok {
return i, res, false
}
if hit {
res.Type = String
res.Raw = val
if vesc {
res.Str = unescape(val[1 : len(val)-1])
} else {
res.Str = val[1 : len(val)-1]
}
}
return i, res, true
case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
i, val = parseNumber(json, i)
if hit {
res.Raw = val
res.Type = Number
res.Num, _ = strconv.ParseFloat(val, 64)
}
return i, res, true
case 't', 'f', 'n':
vc := json[i]
i, val = parseLiteral(json, i)
if hit {
res.Raw = val
switch vc {
case 't':
res.Type = True
case 'f':
res.Type = False
}
return i, res, true
}
}
}
return i, res, false
}
var ( // used for testing
testWatchForFallback bool
testLastWasFallback bool
)
// areSimplePaths returns true if all the paths are simple enough
// to parse quickly for GetMany(). Allows alpha-numeric, dots,
// underscores, and the dollar sign. It does not allow non-alnum,
// escape characters, or keys which start with a numbers.
// For example:
// "name.last" == OK
// "user.id0" == OK
// "user.ID" == OK
// "user.first_name" == OK
// "user.firstName" == OK
// "user.0item" == BAD
// "user.#id" == BAD
// "user\.name" == BAD
func areSimplePaths(paths []string) bool {
for _, path := range paths {
var fi int // first key index, for keys with numeric prefix
for i := 0; i < len(path); i++ {
if path[i] >= 'a' && path[i] <= 'z' {
// a-z is likely to be the highest frequency charater.
continue
}
if path[i] == '.' {
fi = i + 1
continue
}
if path[i] >= 'A' && path[i] <= 'Z' {
continue
}
if path[i] == '_' || path[i] == '$' {
continue
}
if i > fi && path[i] >= '0' && path[i] <= '9' {
continue
}
return false
}
}
return true
}
// GetMany searches json for the multiple paths.
// The return value is a Result array where the number of items
// will be equal to the number of input paths.
func GetMany(json string, paths ...string) []Result {
if len(paths) < 4 {
if testWatchForFallback {
testLastWasFallback = false
}
switch len(paths) {
case 0:
// return nil when no paths are specified.
return nil
case 1:
return []Result{Get(json, paths[0])}
case 2:
return []Result{Get(json, paths[0]), Get(json, paths[1])}
case 3:
return []Result{Get(json, paths[0]), Get(json, paths[1]), Get(json, paths[2])}
}
}
var results []Result
var ok bool
var i int
if len(paths) > 512 {
// we can only support up to 512 paths. Is that too many?
goto fallback
}
if !areSimplePaths(paths) {
// If there is even one path that is not considered "simple" then
// we need to use the fallback method.
goto fallback
}
// locate the object token.
for ; i < len(json); i++ {
if json[i] == '{' {
i++
break
}
if json[i] <= ' ' {
continue
}
goto fallback
}
// use the call function table.
if len(paths) <= 8 {
results, ok = getMany8(json, i, paths)
} else if len(paths) <= 16 {
results, ok = getMany16(json, i, paths)
} else if len(paths) <= 32 {
results, ok = getMany32(json, i, paths)
} else if len(paths) <= 64 {
results, ok = getMany64(json, i, paths)
} else if len(paths) <= 128 {
results, ok = getMany128(json, i, paths)
} else if len(paths) <= 256 {
results, ok = getMany256(json, i, paths)
} else if len(paths) <= 512 {
results, ok = getMany512(json, i, paths)
}
if !ok {
// there was some fault while parsing. we should try the
// fallback method. This could result in performance
// degregation in some cases.
goto fallback
}
if testWatchForFallback {
testLastWasFallback = false
}
return results
fallback:
results = results[:0]
for i := 0; i < len(paths); i++ {
results = append(results, Get(json, paths[i]))
}
if testWatchForFallback {
testLastWasFallback = true
}
return results
}
// GetManyBytes searches json for the specified path.
// If working with bytes, this method preferred over
// GetMany(string(data), paths...)
func GetManyBytes(json []byte, paths ...string) []Result {
if json == nil {
return GetMany("", paths...)
}
results := GetMany(*(*string)(unsafe.Pointer(&json)), paths...)
for i := range results {
results[i] = fromBytesGet(results[i])
}
return results
}
// parseGetMany parses a json object for keys that match against the callers
// paths. It's a best-effort attempt and quickly locating and assigning the
// values to the []Result array. If there are failures such as bad json, or
// invalid input paths, or too much recursion, the function will exit with a
// return value of 'false'.
func parseGetMany(
json string, i int,
level uint, kplen int,
paths []string, completed []bool, matches []uint64, results []Result,
) (int, bool) {
if level > 62 {
// The recursion level is limited because the matches []uint64
// array cannot handle more the 64-bits.
return i, false
}
// At this point the last character read was a '{'.
// Read all object keys and try to match against the paths.
var key string
var val string
var vesc, ok bool
next_key:
for ; i < len(json); i++ {
if json[i] == '"' {
// read the key
i, val, vesc, ok = parseString(json, i+1)
if !ok {
return i, false
}
if vesc {
// the value is escaped
key = unescape(val[1 : len(val)-1])
} else {
// just a plain old ascii key
key = val[1 : len(val)-1]
}
var hasMatch bool
var parsedVal bool
var valOrgIndex int
var valPathIndex int
for j := 0; j < len(key); j++ {
if key[j] == '.' {
// we need to look for keys with dot and ignore them.
if i, _, ok = parseAny(json, i, false); !ok {
return i, false
}
continue next_key
}
}
var usedPaths int
// loop through paths and look for matches
for j := 0; j < len(paths); j++ {
if completed[j] {
usedPaths++
// ignore completed paths
continue
}
if level > 0 && (matches[j]>>(level-1))&1 == 0 {
// ignore unmatched paths
usedPaths++
continue
}
// try to match the key to the path
// this is spaghetti code but the idea is to minimize
// calls and variable assignments when comparing the
// key to paths
if len(paths[j])-kplen >= len(key) {
i, k := kplen, 0
for ; k < len(key); k, i = k+1, i+1 {
if key[k] != paths[j][i] {
// no match
goto nomatch
}
}
if i < len(paths[j]) {
if paths[j][i] == '.' {
// matched, but there still more keys in the path
goto match_not_atend
}
}
// matched and at the end of the path
goto match_atend
}
// no match, jump to the nomatch label
goto nomatch
match_atend:
// found a match
// at the end of the path. we must take the value.
usedPaths++
if !parsedVal {
// the value has not been parsed yet. let's do so.
valOrgIndex = i // keep track of the current position.
i, results[j], ok = parseAny(json, i, true)
if !ok {
return i, false
}
parsedVal = true
valPathIndex = j
} else {
results[j] = results[valPathIndex]
}
// mark as complete
completed[j] = true
// jump over the match_not_atend label
goto nomatch
match_not_atend:
// found a match
// still in the middle of the path.
usedPaths++
// mark the path as matched
matches[j] |= 1 << level
if !hasMatch {
hasMatch = true
}
nomatch: // noop label
}
if !parsedVal {
if hasMatch {
// we found a match and the value has not been parsed yet.
// let's find out if the next value type is an object.
for ; i < len(json); i++ {
if json[i] <= ' ' || json[i] == ':' {
continue
}
break
}
if i < len(json) {
if json[i] == '{' {
// it's an object. let's go deeper
i, ok = parseGetMany(json, i+1, level+1, kplen+len(key)+1, paths, completed, matches, results)
if !ok {
return i, false
}
} else {
// not an object. just parse and ignore.
if i, _, ok = parseAny(json, i, false); !ok {
return i, false
}
}
}
} else {
// Since there was no matches we can just parse the value and
// ignore the result.
if i, _, ok = parseAny(json, i, false); !ok {
return i, false
}
}
} else if hasMatch && len(results[valPathIndex].Raw) > 0 && results[valPathIndex].Raw[0] == '{' {
// The value was already parsed and the value type is an object.
// Rewind the json index and let's parse deeper.
i = valOrgIndex
for ; i < len(json); i++ {
if json[i] == '{' {
break
}
}
i, ok = parseGetMany(json, i+1, level+1, kplen+len(key)+1, paths, completed, matches, results)
if !ok {
return i, false
}
}
if usedPaths == len(paths) {
// all paths have been used, either completed or matched.
// we should stop parsing this object to save CPU cycles.
if level > 0 && i < len(json) {
i, _ = parseSquash(json, i)
}
return i, true
}
} else if json[i] == '}' {
// reached the end of the object. end it here.
return i + 1, true
}
}
return i, true
}
// Call table for GetMany. Using an isolated function allows for allocating
// arrays with know capacities on the stack, as opposed to dynamically
// allocating on the heap. This can provide a tremendous performance boost
// by avoiding the GC.
func getMany8(json string, i int, paths []string) ([]Result, bool) {
const max = 8
var completed = make([]bool, 0, max)
var matches = make([]uint64, 0, max)
var results = make([]Result, 0, max)
completed = completed[0:len(paths):max]
matches = matches[0:len(paths):max]
results = results[0:len(paths):max]
_, ok := parseGetMany(json, i, 0, 0, paths, completed, matches, results)
return results, ok
}
func getMany16(json string, i int, paths []string) ([]Result, bool) {
const max = 16
var completed = make([]bool, 0, max)
var matches = make([]uint64, 0, max)
var results = make([]Result, 0, max)
completed = completed[0:len(paths):max]
matches = matches[0:len(paths):max]
results = results[0:len(paths):max]
_, ok := parseGetMany(json, i, 0, 0, paths, completed, matches, results)
return results, ok
}
func getMany32(json string, i int, paths []string) ([]Result, bool) {
const max = 32
var completed = make([]bool, 0, max)
var matches = make([]uint64, 0, max)
var results = make([]Result, 0, max)
completed = completed[0:len(paths):max]
matches = matches[0:len(paths):max]
results = results[0:len(paths):max]
_, ok := parseGetMany(json, i, 0, 0, paths, completed, matches, results)
return results, ok
}
func getMany64(json string, i int, paths []string) ([]Result, bool) {
const max = 64
var completed = make([]bool, 0, max)
var matches = make([]uint64, 0, max)
var results = make([]Result, 0, max)
completed = completed[0:len(paths):max]
matches = matches[0:len(paths):max]
results = results[0:len(paths):max]
_, ok := parseGetMany(json, i, 0, 0, paths, completed, matches, results)
return results, ok
}
func getMany128(json string, i int, paths []string) ([]Result, bool) {
const max = 128
var completed = make([]bool, 0, max)
var matches = make([]uint64, 0, max)
var results = make([]Result, 0, max)
completed = completed[0:len(paths):max]
matches = matches[0:len(paths):max]
results = results[0:len(paths):max]
_, ok := parseGetMany(json, i, 0, 0, paths, completed, matches, results)
return results, ok
}
func getMany256(json string, i int, paths []string) ([]Result, bool) {
const max = 256
var completed = make([]bool, 0, max)
var matches = make([]uint64, 0, max)
var results = make([]Result, 0, max)
completed = completed[0:len(paths):max]
matches = matches[0:len(paths):max]
results = results[0:len(paths):max]
_, ok := parseGetMany(json, i, 0, 0, paths, completed, matches, results)
return results, ok
}
func getMany512(json string, i int, paths []string) ([]Result, bool) {
const max = 512
var completed = make([]bool, 0, max)
var matches = make([]uint64, 0, max)
var results = make([]Result, 0, max)
completed = completed[0:len(paths):max]
matches = matches[0:len(paths):max]
results = results[0:len(paths):max]
_, ok := parseGetMany(json, i, 0, 0, paths, completed, matches, results)
return results, ok
}

View File

@ -468,6 +468,96 @@ func TestSingleArrayValue(t *testing.T) {
}
var manyJSON = ` {
"a":{"a":{"a":{"a":{"a":{"a":{"a":{"a":{"a":{"a":{
"a":{"a":{"a":{"a":{"a":{"a":{"a":{"a":{"a":{"a":{
"a":{"a":{"a":{"a":{"a":{"a":{"a":{"a":{"a":{"a":{
"a":{"a":{"a":{"a":{"a":{"a":{"a":{"a":{"a":{"a":{
"a":{"a":{"a":{"a":{"a":{"a":{"a":{"a":{"a":{"a":{
"a":{"a":{"a":{"a":{"a":{"a":{"a":{"a":{"a":{"a":{
"a":{"a":{"a":{"a":{"a":{"a":{"a":{"a":{"a":{"a":{"hello":"world"
}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
"position":{"type":"Point","coordinates":[-115.24,33.09]},
"loves":["world peace"],
"name":{"last":"Anderson","first":"Nancy"},
"age":31
"":{"a":"emptya","b":"emptyb"},
"name.last":"Yellow",
"name.first":"Cat",
}`
func combine(results []Result) string {
return fmt.Sprintf("%v", results)
}
func TestManyBasic(t *testing.T) {
testWatchForFallback = true
defer func() {
testWatchForFallback = false
}()
testMany := func(shouldFallback bool, expect string, paths ...string) {
results := GetMany(
manyJSON,
paths...,
)
if len(results) != len(paths) {
t.Fatalf("expected %v, got %v", len(paths), len(results))
}
if fmt.Sprintf("%v", results) != expect {
t.Fatalf("expected %v, got %v", expect, results)
}
return
if testLastWasFallback != shouldFallback {
t.Fatalf("expected %v, got %v", shouldFallback, testLastWasFallback)
}
}
testMany(false, "[Point]", "position.type")
testMany(false, `[emptya ["world peace"] 31]`, ".a", "loves", "age")
testMany(false, `[["world peace"]]`, "loves")
testMany(false, `[{"last":"Anderson","first":"Nancy"} Nancy]`, "name", "name.first")
testMany(true, `[null]`, strings.Repeat("a.", 40)+"hello")
res := Get(manyJSON, strings.Repeat("a.", 48)+"a")
testMany(true, `[`+res.String()+`]`, strings.Repeat("a.", 48)+"a")
// these should fallback
testMany(true, `[Cat Nancy]`, "name\\.first", "name.first")
testMany(true, `[world]`, strings.Repeat("a.", 70)+"hello")
}
func TestRandomMany(t *testing.T) {
var lstr string
defer func() {
if v := recover(); v != nil {
println("'" + hex.EncodeToString([]byte(lstr)) + "'")
println("'" + lstr + "'")
panic(v)
}
}()
rand.Seed(time.Now().UnixNano())
b := make([]byte, 512)
for i := 0; i < 50000; i++ {
n, err := rand.Read(b[:rand.Int()%len(b)])
if err != nil {
t.Fatal(err)
}
lstr = string(b[:n])
paths := make([]string, rand.Int()%64)
for i := range paths {
var b []byte
n := rand.Int() % 5
for j := 0; j < n; j++ {
if j > 0 {
b = append(b, '.')
}
nn := rand.Int() % 10
for k := 0; k < nn; k++ {
b = append(b, 'a'+byte(rand.Int()%26))
}
}
paths[i] = string(b)
}
GetMany(lstr, paths...)
}
}
type BenchStruct struct {
Widget struct {
Window struct {
@ -488,6 +578,19 @@ var benchPaths = []string{
"widget.text.onMouseUp",
}
var benchManyPaths = []string{
"widget.window.name",
"widget.image.hOffset",
"widget.text.onMouseUp",
"widget.window.title",
"widget.image.alignment",
"widget.text.style",
"widget.window.height",
"widget.image.src",
"widget.text.data",
"widget.text.size",
}
func BenchmarkGJSONGet(t *testing.B) {
t.ReportAllocs()
t.ResetTimer()
@ -500,6 +603,51 @@ func BenchmarkGJSONGet(t *testing.B) {
}
t.N *= len(benchPaths) // because we are running against 3 paths
}
func BenchmarkGJSONGetMany4Paths(t *testing.B) {
benchmarkGJSONGetManyN(t, 4)
}
func BenchmarkGJSONGetMany8Paths(t *testing.B) {
benchmarkGJSONGetManyN(t, 8)
}
func BenchmarkGJSONGetMany16Paths(t *testing.B) {
benchmarkGJSONGetManyN(t, 16)
}
func BenchmarkGJSONGetMany32Paths(t *testing.B) {
benchmarkGJSONGetManyN(t, 32)
}
func BenchmarkGJSONGetMany64Paths(t *testing.B) {
benchmarkGJSONGetManyN(t, 64)
}
func BenchmarkGJSONGetMany128Paths(t *testing.B) {
benchmarkGJSONGetManyN(t, 128)
}
func BenchmarkGJSONGetMany256Paths(t *testing.B) {
benchmarkGJSONGetManyN(t, 256)
}
func BenchmarkGJSONGetMany512Paths(t *testing.B) {
benchmarkGJSONGetManyN(t, 512)
}
func benchmarkGJSONGetManyN(t *testing.B, n int) {
var paths []string
for len(paths) < n {
paths = append(paths, benchManyPaths...)
}
paths = paths[:n]
t.ReportAllocs()
t.ResetTimer()
for i := 0; i < t.N; i++ {
results := GetMany(exampleJSON, paths...)
if len(results) == 0 {
t.Fatal("did not find the value")
}
for j := 0; j < len(results); j++ {
if results[j].Type == Null {
t.Fatal("did not find the value")
}
}
}
t.N *= len(paths) // because we are running against 3 paths
}
func BenchmarkGJSONUnmarshalMap(t *testing.B) {
t.ReportAllocs()