match GetMany and Get results, fixes #55

This commit is contained in:
Josh Baker 2017-12-22 06:58:04 -07:00
parent 080cd22816
commit e62d62a3e1
3 changed files with 25 additions and 419 deletions

View File

@ -271,7 +271,7 @@ This is a best-effort no allocation sub slice of the original json. This method
## Get multiple values at once
The `GetMany` function can be used to get multiple values at the same time, and is optimized to scan over a JSON payload once.
The `GetMany` function can be used to get multiple values at the same time.
```go
results := gjson.GetMany(json, "name.first", "name.last", "age")
@ -298,17 +298,6 @@ BenchmarkJSONParserGet-8 3000000 499 ns/op 21 B/op
BenchmarkJSONIterator-8 3000000 812 ns/op 544 B/op 9 allocs/op
```
Benchmarks for the `GetMany` function:
```
BenchmarkGJSONGetMany4Paths-8 4000000 303 ns/op 112 B/op 0 allocs/op
BenchmarkGJSONGetMany8Paths-8 8000000 208 ns/op 56 B/op 0 allocs/op
BenchmarkGJSONGetMany16Paths-8 16000000 156 ns/op 56 B/op 0 allocs/op
BenchmarkGJSONGetMany32Paths-8 32000000 127 ns/op 64 B/op 0 allocs/op
BenchmarkGJSONGetMany64Paths-8 64000000 117 ns/op 64 B/op 0 allocs/op
BenchmarkGJSONGetMany128Paths-8 128000000 109 ns/op 64 B/op 0 allocs/op
```
JSON document used:
```json
@ -347,21 +336,6 @@ widget.image.hOffset
widget.text.onMouseUp
```
For the `GetMany` benchmarks these paths are used:
```
widget.window.name
widget.image.hOffset
widget.text.onMouseUp
widget.window.title
widget.image.alignment
widget.text.style
widget.window.height
widget.image.src
widget.text.data
widget.text.size
```
*These benchmarks were run on a MacBook Pro 15" 2.8 GHz Intel Core i7 using Go 1.8 and can be be found [here](https://github.com/tidwall/gjson-benchmarks).*

405
gjson.go
View File

@ -1595,405 +1595,26 @@ var ( // used for testing
testLastWasFallback bool
)
// areSimplePaths returns true if all the paths are simple enough
// to parse quickly for GetMany(). Allows alpha-numeric, dots,
// underscores, and the dollar sign. It does not allow non-alnum,
// escape characters, or keys which start with a numbers.
// For example:
// "name.last" == OK
// "user.id0" == OK
// "user.ID" == OK
// "user.first_name" == OK
// "user.firstName" == OK
// "user.0item" == BAD
// "user.#id" == BAD
// "user\.name" == BAD
func areSimplePaths(paths []string) bool {
for _, path := range paths {
var fi int // first key index, for keys with numeric prefix
for i := 0; i < len(path); i++ {
if path[i] >= 'a' && path[i] <= 'z' {
// a-z is likely to be the highest frequency charater.
continue
}
if path[i] == '.' {
fi = i + 1
continue
}
if path[i] >= 'A' && path[i] <= 'Z' {
continue
}
if path[i] == '_' || path[i] == '$' {
continue
}
if i > fi && path[i] >= '0' && path[i] <= '9' {
continue
}
return false
}
}
return true
}
// GetMany searches json for the multiple paths.
// The return value is a Result array where the number of items
// will be equal to the number of input paths.
func GetMany(json string, paths ...string) []Result {
if len(paths) < 4 {
if testWatchForFallback {
testLastWasFallback = false
}
switch len(paths) {
case 0:
// return nil when no paths are specified.
return nil
case 1:
return []Result{Get(json, paths[0])}
case 2:
return []Result{Get(json, paths[0]), Get(json, paths[1])}
case 3:
return []Result{Get(json, paths[0]), Get(json, paths[1]), Get(json, paths[2])}
}
func GetMany(json string, path ...string) []Result {
res := make([]Result, len(path))
for i, path := range path {
res[i] = Get(json, path)
}
var results []Result
var ok bool
var i int
if len(paths) > 512 {
// we can only support up to 512 paths. Is that too many?
goto fallback
}
if !areSimplePaths(paths) {
// If there is even one path that is not considered "simple" then
// we need to use the fallback method.
goto fallback
}
// locate the object token.
for ; i < len(json); i++ {
if json[i] == '{' {
i++
break
}
if json[i] <= ' ' {
continue
}
goto fallback
}
// use the call function table.
if len(paths) <= 8 {
results, ok = getMany8(json, i, paths)
} else if len(paths) <= 16 {
results, ok = getMany16(json, i, paths)
} else if len(paths) <= 32 {
results, ok = getMany32(json, i, paths)
} else if len(paths) <= 64 {
results, ok = getMany64(json, i, paths)
} else if len(paths) <= 128 {
results, ok = getMany128(json, i, paths)
} else if len(paths) <= 256 {
results, ok = getMany256(json, i, paths)
} else if len(paths) <= 512 {
results, ok = getMany512(json, i, paths)
}
if !ok {
// there was some fault while parsing. we should try the
// fallback method. This could result in performance
// degregation in some cases.
goto fallback
}
if testWatchForFallback {
testLastWasFallback = false
}
return results
fallback:
results = results[:0]
for i := 0; i < len(paths); i++ {
results = append(results, Get(json, paths[i]))
}
if testWatchForFallback {
testLastWasFallback = true
}
return results
return res
}
// GetManyBytes searches json for the specified path.
// If working with bytes, this method preferred over
// GetMany(string(data), paths...)
func GetManyBytes(json []byte, paths ...string) []Result {
if json == nil {
return GetMany("", paths...)
// GetManyBytes searches json for the multiple paths.
// The return value is a Result array where the number of items
// will be equal to the number of input paths.
func GetManyBytes(json []byte, path ...string) []Result {
res := make([]Result, len(path))
for i, path := range path {
res[i] = GetBytes(json, path)
}
results := GetMany(*(*string)(unsafe.Pointer(&json)), paths...)
for i := range results {
results[i] = fromBytesGet(results[i])
}
return results
}
// parseGetMany parses a json object for keys that match against the callers
// paths. It's a best-effort attempt and quickly locating and assigning the
// values to the []Result array. If there are failures such as bad json, or
// invalid input paths, or too much recursion, the function will exit with a
// return value of 'false'.
func parseGetMany(
json string, i int,
level uint, kplen int,
paths []string, completed []bool, matches []uint64, results []Result,
) (int, bool) {
if level > 62 {
// The recursion level is limited because the matches []uint64
// array cannot handle more the 64-bits.
return i, false
}
// At this point the last character read was a '{'.
// Read all object keys and try to match against the paths.
var key string
var val string
var vesc, ok bool
next_key:
for ; i < len(json); i++ {
if json[i] == '"' {
// read the key
i, val, vesc, ok = parseString(json, i+1)
if !ok {
return i, false
}
if vesc {
// the value is escaped
key = unescape(val[1 : len(val)-1])
} else {
// just a plain old ascii key
key = val[1 : len(val)-1]
}
var hasMatch bool
var parsedVal bool
var valOrgIndex int
var valPathIndex int
for j := 0; j < len(key); j++ {
if key[j] == '.' {
// we need to look for keys with dot and ignore them.
if i, _, ok = parseAny(json, i, false); !ok {
return i, false
}
continue next_key
}
}
var usedPaths int
// loop through paths and look for matches
for j := 0; j < len(paths); j++ {
if completed[j] {
usedPaths++
// ignore completed paths
continue
}
if level > 0 && (matches[j]>>(level-1))&1 == 0 {
// ignore unmatched paths
usedPaths++
continue
}
// try to match the key to the path
// this is spaghetti code but the idea is to minimize
// calls and variable assignments when comparing the
// key to paths
if len(paths[j])-kplen >= len(key) {
i, k := kplen, 0
for ; k < len(key); k, i = k+1, i+1 {
if key[k] != paths[j][i] {
// no match
goto nomatch
}
}
if i < len(paths[j]) {
if paths[j][i] == '.' {
// matched, but there are still more keys in path
goto match_not_atend
}
}
if len(paths[j]) <= len(key) || kplen != 0 {
if len(paths[j]) != i {
goto nomatch
}
// matched and at the end of the path
goto match_atend
}
}
// no match, jump to the nomatch label
goto nomatch
match_atend:
// found a match
// at the end of the path. we must take the value.
usedPaths++
if !parsedVal {
// the value has not been parsed yet. let's do so.
valOrgIndex = i // keep track of the current position.
i, results[j], ok = parseAny(json, i, true)
if !ok {
return i, false
}
parsedVal = true
valPathIndex = j
} else {
results[j] = results[valPathIndex]
}
// mark as complete
completed[j] = true
// jump over the match_not_atend label
goto nomatch
match_not_atend:
// found a match
// still in the middle of the path.
usedPaths++
// mark the path as matched
matches[j] |= 1 << level
if !hasMatch {
hasMatch = true
}
nomatch: // noop label
}
if !hasMatch && i < len(json) && json[i] == '}' {
return i + 1, true
}
if !parsedVal {
if hasMatch {
// we found a match and the value has not been parsed yet.
// let's find out if the next value type is an object.
for ; i < len(json); i++ {
if json[i] <= ' ' || json[i] == ':' {
continue
}
break
}
if i < len(json) {
if json[i] == '{' {
// it's an object. let's go deeper
i, ok = parseGetMany(json, i+1, level+1, kplen+len(key)+1, paths, completed, matches, results)
if !ok {
return i, false
}
} else {
// not an object. just parse and ignore.
if i, _, ok = parseAny(json, i, false); !ok {
return i, false
}
}
}
} else {
// Since there was no matches we can just parse the value and
// ignore the result.
if i, _, ok = parseAny(json, i, true); !ok {
return i, false
}
}
} else if hasMatch && len(results[valPathIndex].Raw) > 0 && results[valPathIndex].Raw[0] == '{' {
// The value was already parsed and the value type is an object.
// Rewind the json index and let's parse deeper.
i = valOrgIndex
for ; i < len(json); i++ {
if json[i] == '{' {
break
}
}
i, ok = parseGetMany(json, i+1, level+1, kplen+len(key)+1, paths, completed, matches, results)
if !ok {
return i, false
}
}
if usedPaths == len(paths) {
// all paths have been used, either completed or matched.
// we should stop parsing this object to save CPU cycles.
if level > 0 && i < len(json) {
i, _ = parseSquash(json, i)
}
return i, true
}
} else if json[i] == '}' {
// reached the end of the object. end it here.
return i + 1, true
}
}
return i, true
}
// Call table for GetMany. Using an isolated function allows for allocating
// arrays with know capacities on the stack, as opposed to dynamically
// allocating on the heap. This can provide a tremendous performance boost
// by avoiding the GC.
func getMany8(json string, i int, paths []string) ([]Result, bool) {
const max = 8
var completed = make([]bool, 0, max)
var matches = make([]uint64, 0, max)
var results = make([]Result, 0, max)
completed = completed[0:len(paths):max]
matches = matches[0:len(paths):max]
results = results[0:len(paths):max]
_, ok := parseGetMany(json, i, 0, 0, paths, completed, matches, results)
return results, ok
}
func getMany16(json string, i int, paths []string) ([]Result, bool) {
const max = 16
var completed = make([]bool, 0, max)
var matches = make([]uint64, 0, max)
var results = make([]Result, 0, max)
completed = completed[0:len(paths):max]
matches = matches[0:len(paths):max]
results = results[0:len(paths):max]
_, ok := parseGetMany(json, i, 0, 0, paths, completed, matches, results)
return results, ok
}
func getMany32(json string, i int, paths []string) ([]Result, bool) {
const max = 32
var completed = make([]bool, 0, max)
var matches = make([]uint64, 0, max)
var results = make([]Result, 0, max)
completed = completed[0:len(paths):max]
matches = matches[0:len(paths):max]
results = results[0:len(paths):max]
_, ok := parseGetMany(json, i, 0, 0, paths, completed, matches, results)
return results, ok
}
func getMany64(json string, i int, paths []string) ([]Result, bool) {
const max = 64
var completed = make([]bool, 0, max)
var matches = make([]uint64, 0, max)
var results = make([]Result, 0, max)
completed = completed[0:len(paths):max]
matches = matches[0:len(paths):max]
results = results[0:len(paths):max]
_, ok := parseGetMany(json, i, 0, 0, paths, completed, matches, results)
return results, ok
}
func getMany128(json string, i int, paths []string) ([]Result, bool) {
const max = 128
var completed = make([]bool, 0, max)
var matches = make([]uint64, 0, max)
var results = make([]Result, 0, max)
completed = completed[0:len(paths):max]
matches = matches[0:len(paths):max]
results = results[0:len(paths):max]
_, ok := parseGetMany(json, i, 0, 0, paths, completed, matches, results)
return results, ok
}
func getMany256(json string, i int, paths []string) ([]Result, bool) {
const max = 256
var completed = make([]bool, 0, max)
var matches = make([]uint64, 0, max)
var results = make([]Result, 0, max)
completed = completed[0:len(paths):max]
matches = matches[0:len(paths):max]
results = results[0:len(paths):max]
_, ok := parseGetMany(json, i, 0, 0, paths, completed, matches, results)
return results, ok
}
func getMany512(json string, i int, paths []string) ([]Result, bool) {
const max = 512
var completed = make([]bool, 0, max)
var matches = make([]uint64, 0, max)
var results = make([]Result, 0, max)
completed = completed[0:len(paths):max]
matches = matches[0:len(paths):max]
results = results[0:len(paths):max]
_, ok := parseGetMany(json, i, 0, 0, paths, completed, matches, results)
return results, ok
return res
}
var fieldsmu sync.RWMutex

View File

@ -1278,3 +1278,14 @@ func randomJSON() (json string, keys []string) {
//rand.Seed(time.Now().UnixNano())
return randomObjectOrArray(nil, "", false, 0)
}
func TestIssue55(t *testing.T) {
json := `{"one": {"two": 2, "three": 3}, "four": 4, "five": 5}`
results := GetMany(json, "four", "five", "one.two", "one.six")
expected := []string{"4", "5", "2", ""}
for i, r := range results {
if r.String() != expected[i] {
t.Fatalf("expected %v, got %v", expected[i], r.String())
}
}
}