mirror of https://github.com/tidwall/gjson.git
match GetMany and Get results, fixes #55
This commit is contained in:
parent
080cd22816
commit
e62d62a3e1
28
README.md
28
README.md
|
@ -271,7 +271,7 @@ This is a best-effort no allocation sub slice of the original json. This method
|
|||
|
||||
## Get multiple values at once
|
||||
|
||||
The `GetMany` function can be used to get multiple values at the same time, and is optimized to scan over a JSON payload once.
|
||||
The `GetMany` function can be used to get multiple values at the same time.
|
||||
|
||||
```go
|
||||
results := gjson.GetMany(json, "name.first", "name.last", "age")
|
||||
|
@ -298,17 +298,6 @@ BenchmarkJSONParserGet-8 3000000 499 ns/op 21 B/op
|
|||
BenchmarkJSONIterator-8 3000000 812 ns/op 544 B/op 9 allocs/op
|
||||
```
|
||||
|
||||
Benchmarks for the `GetMany` function:
|
||||
|
||||
```
|
||||
BenchmarkGJSONGetMany4Paths-8 4000000 303 ns/op 112 B/op 0 allocs/op
|
||||
BenchmarkGJSONGetMany8Paths-8 8000000 208 ns/op 56 B/op 0 allocs/op
|
||||
BenchmarkGJSONGetMany16Paths-8 16000000 156 ns/op 56 B/op 0 allocs/op
|
||||
BenchmarkGJSONGetMany32Paths-8 32000000 127 ns/op 64 B/op 0 allocs/op
|
||||
BenchmarkGJSONGetMany64Paths-8 64000000 117 ns/op 64 B/op 0 allocs/op
|
||||
BenchmarkGJSONGetMany128Paths-8 128000000 109 ns/op 64 B/op 0 allocs/op
|
||||
```
|
||||
|
||||
JSON document used:
|
||||
|
||||
```json
|
||||
|
@ -347,21 +336,6 @@ widget.image.hOffset
|
|||
widget.text.onMouseUp
|
||||
```
|
||||
|
||||
For the `GetMany` benchmarks these paths are used:
|
||||
|
||||
```
|
||||
widget.window.name
|
||||
widget.image.hOffset
|
||||
widget.text.onMouseUp
|
||||
widget.window.title
|
||||
widget.image.alignment
|
||||
widget.text.style
|
||||
widget.window.height
|
||||
widget.image.src
|
||||
widget.text.data
|
||||
widget.text.size
|
||||
```
|
||||
|
||||
*These benchmarks were run on a MacBook Pro 15" 2.8 GHz Intel Core i7 using Go 1.8 and can be be found [here](https://github.com/tidwall/gjson-benchmarks).*
|
||||
|
||||
|
||||
|
|
405
gjson.go
405
gjson.go
|
@ -1595,405 +1595,26 @@ var ( // used for testing
|
|||
testLastWasFallback bool
|
||||
)
|
||||
|
||||
// areSimplePaths returns true if all the paths are simple enough
|
||||
// to parse quickly for GetMany(). Allows alpha-numeric, dots,
|
||||
// underscores, and the dollar sign. It does not allow non-alnum,
|
||||
// escape characters, or keys which start with a numbers.
|
||||
// For example:
|
||||
// "name.last" == OK
|
||||
// "user.id0" == OK
|
||||
// "user.ID" == OK
|
||||
// "user.first_name" == OK
|
||||
// "user.firstName" == OK
|
||||
// "user.0item" == BAD
|
||||
// "user.#id" == BAD
|
||||
// "user\.name" == BAD
|
||||
func areSimplePaths(paths []string) bool {
|
||||
for _, path := range paths {
|
||||
var fi int // first key index, for keys with numeric prefix
|
||||
for i := 0; i < len(path); i++ {
|
||||
if path[i] >= 'a' && path[i] <= 'z' {
|
||||
// a-z is likely to be the highest frequency charater.
|
||||
continue
|
||||
}
|
||||
if path[i] == '.' {
|
||||
fi = i + 1
|
||||
continue
|
||||
}
|
||||
if path[i] >= 'A' && path[i] <= 'Z' {
|
||||
continue
|
||||
}
|
||||
if path[i] == '_' || path[i] == '$' {
|
||||
continue
|
||||
}
|
||||
if i > fi && path[i] >= '0' && path[i] <= '9' {
|
||||
continue
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// GetMany searches json for the multiple paths.
|
||||
// The return value is a Result array where the number of items
|
||||
// will be equal to the number of input paths.
|
||||
func GetMany(json string, paths ...string) []Result {
|
||||
if len(paths) < 4 {
|
||||
if testWatchForFallback {
|
||||
testLastWasFallback = false
|
||||
}
|
||||
switch len(paths) {
|
||||
case 0:
|
||||
// return nil when no paths are specified.
|
||||
return nil
|
||||
case 1:
|
||||
return []Result{Get(json, paths[0])}
|
||||
case 2:
|
||||
return []Result{Get(json, paths[0]), Get(json, paths[1])}
|
||||
case 3:
|
||||
return []Result{Get(json, paths[0]), Get(json, paths[1]), Get(json, paths[2])}
|
||||
}
|
||||
func GetMany(json string, path ...string) []Result {
|
||||
res := make([]Result, len(path))
|
||||
for i, path := range path {
|
||||
res[i] = Get(json, path)
|
||||
}
|
||||
var results []Result
|
||||
var ok bool
|
||||
var i int
|
||||
if len(paths) > 512 {
|
||||
// we can only support up to 512 paths. Is that too many?
|
||||
goto fallback
|
||||
}
|
||||
if !areSimplePaths(paths) {
|
||||
// If there is even one path that is not considered "simple" then
|
||||
// we need to use the fallback method.
|
||||
goto fallback
|
||||
}
|
||||
// locate the object token.
|
||||
for ; i < len(json); i++ {
|
||||
if json[i] == '{' {
|
||||
i++
|
||||
break
|
||||
}
|
||||
if json[i] <= ' ' {
|
||||
continue
|
||||
}
|
||||
goto fallback
|
||||
}
|
||||
// use the call function table.
|
||||
if len(paths) <= 8 {
|
||||
results, ok = getMany8(json, i, paths)
|
||||
} else if len(paths) <= 16 {
|
||||
results, ok = getMany16(json, i, paths)
|
||||
} else if len(paths) <= 32 {
|
||||
results, ok = getMany32(json, i, paths)
|
||||
} else if len(paths) <= 64 {
|
||||
results, ok = getMany64(json, i, paths)
|
||||
} else if len(paths) <= 128 {
|
||||
results, ok = getMany128(json, i, paths)
|
||||
} else if len(paths) <= 256 {
|
||||
results, ok = getMany256(json, i, paths)
|
||||
} else if len(paths) <= 512 {
|
||||
results, ok = getMany512(json, i, paths)
|
||||
}
|
||||
if !ok {
|
||||
// there was some fault while parsing. we should try the
|
||||
// fallback method. This could result in performance
|
||||
// degregation in some cases.
|
||||
goto fallback
|
||||
}
|
||||
if testWatchForFallback {
|
||||
testLastWasFallback = false
|
||||
}
|
||||
return results
|
||||
fallback:
|
||||
results = results[:0]
|
||||
for i := 0; i < len(paths); i++ {
|
||||
results = append(results, Get(json, paths[i]))
|
||||
}
|
||||
if testWatchForFallback {
|
||||
testLastWasFallback = true
|
||||
}
|
||||
return results
|
||||
return res
|
||||
}
|
||||
|
||||
// GetManyBytes searches json for the specified path.
|
||||
// If working with bytes, this method preferred over
|
||||
// GetMany(string(data), paths...)
|
||||
func GetManyBytes(json []byte, paths ...string) []Result {
|
||||
if json == nil {
|
||||
return GetMany("", paths...)
|
||||
// GetManyBytes searches json for the multiple paths.
|
||||
// The return value is a Result array where the number of items
|
||||
// will be equal to the number of input paths.
|
||||
func GetManyBytes(json []byte, path ...string) []Result {
|
||||
res := make([]Result, len(path))
|
||||
for i, path := range path {
|
||||
res[i] = GetBytes(json, path)
|
||||
}
|
||||
results := GetMany(*(*string)(unsafe.Pointer(&json)), paths...)
|
||||
for i := range results {
|
||||
results[i] = fromBytesGet(results[i])
|
||||
}
|
||||
return results
|
||||
}
|
||||
|
||||
// parseGetMany parses a json object for keys that match against the callers
|
||||
// paths. It's a best-effort attempt and quickly locating and assigning the
|
||||
// values to the []Result array. If there are failures such as bad json, or
|
||||
// invalid input paths, or too much recursion, the function will exit with a
|
||||
// return value of 'false'.
|
||||
func parseGetMany(
|
||||
json string, i int,
|
||||
level uint, kplen int,
|
||||
paths []string, completed []bool, matches []uint64, results []Result,
|
||||
) (int, bool) {
|
||||
if level > 62 {
|
||||
// The recursion level is limited because the matches []uint64
|
||||
// array cannot handle more the 64-bits.
|
||||
return i, false
|
||||
}
|
||||
// At this point the last character read was a '{'.
|
||||
// Read all object keys and try to match against the paths.
|
||||
var key string
|
||||
var val string
|
||||
var vesc, ok bool
|
||||
next_key:
|
||||
for ; i < len(json); i++ {
|
||||
if json[i] == '"' {
|
||||
// read the key
|
||||
i, val, vesc, ok = parseString(json, i+1)
|
||||
if !ok {
|
||||
return i, false
|
||||
}
|
||||
if vesc {
|
||||
// the value is escaped
|
||||
key = unescape(val[1 : len(val)-1])
|
||||
} else {
|
||||
// just a plain old ascii key
|
||||
key = val[1 : len(val)-1]
|
||||
}
|
||||
var hasMatch bool
|
||||
var parsedVal bool
|
||||
var valOrgIndex int
|
||||
var valPathIndex int
|
||||
for j := 0; j < len(key); j++ {
|
||||
if key[j] == '.' {
|
||||
// we need to look for keys with dot and ignore them.
|
||||
if i, _, ok = parseAny(json, i, false); !ok {
|
||||
return i, false
|
||||
}
|
||||
continue next_key
|
||||
}
|
||||
}
|
||||
var usedPaths int
|
||||
// loop through paths and look for matches
|
||||
for j := 0; j < len(paths); j++ {
|
||||
if completed[j] {
|
||||
usedPaths++
|
||||
// ignore completed paths
|
||||
continue
|
||||
}
|
||||
if level > 0 && (matches[j]>>(level-1))&1 == 0 {
|
||||
// ignore unmatched paths
|
||||
usedPaths++
|
||||
continue
|
||||
}
|
||||
// try to match the key to the path
|
||||
// this is spaghetti code but the idea is to minimize
|
||||
// calls and variable assignments when comparing the
|
||||
// key to paths
|
||||
if len(paths[j])-kplen >= len(key) {
|
||||
i, k := kplen, 0
|
||||
for ; k < len(key); k, i = k+1, i+1 {
|
||||
if key[k] != paths[j][i] {
|
||||
// no match
|
||||
goto nomatch
|
||||
}
|
||||
}
|
||||
if i < len(paths[j]) {
|
||||
if paths[j][i] == '.' {
|
||||
// matched, but there are still more keys in path
|
||||
goto match_not_atend
|
||||
}
|
||||
}
|
||||
if len(paths[j]) <= len(key) || kplen != 0 {
|
||||
if len(paths[j]) != i {
|
||||
goto nomatch
|
||||
}
|
||||
// matched and at the end of the path
|
||||
goto match_atend
|
||||
}
|
||||
}
|
||||
// no match, jump to the nomatch label
|
||||
goto nomatch
|
||||
match_atend:
|
||||
// found a match
|
||||
// at the end of the path. we must take the value.
|
||||
usedPaths++
|
||||
if !parsedVal {
|
||||
// the value has not been parsed yet. let's do so.
|
||||
valOrgIndex = i // keep track of the current position.
|
||||
i, results[j], ok = parseAny(json, i, true)
|
||||
if !ok {
|
||||
return i, false
|
||||
}
|
||||
parsedVal = true
|
||||
valPathIndex = j
|
||||
} else {
|
||||
results[j] = results[valPathIndex]
|
||||
}
|
||||
// mark as complete
|
||||
completed[j] = true
|
||||
// jump over the match_not_atend label
|
||||
goto nomatch
|
||||
match_not_atend:
|
||||
// found a match
|
||||
// still in the middle of the path.
|
||||
usedPaths++
|
||||
// mark the path as matched
|
||||
matches[j] |= 1 << level
|
||||
if !hasMatch {
|
||||
hasMatch = true
|
||||
}
|
||||
nomatch: // noop label
|
||||
}
|
||||
|
||||
if !hasMatch && i < len(json) && json[i] == '}' {
|
||||
return i + 1, true
|
||||
}
|
||||
if !parsedVal {
|
||||
if hasMatch {
|
||||
// we found a match and the value has not been parsed yet.
|
||||
// let's find out if the next value type is an object.
|
||||
for ; i < len(json); i++ {
|
||||
if json[i] <= ' ' || json[i] == ':' {
|
||||
continue
|
||||
}
|
||||
break
|
||||
}
|
||||
if i < len(json) {
|
||||
if json[i] == '{' {
|
||||
// it's an object. let's go deeper
|
||||
i, ok = parseGetMany(json, i+1, level+1, kplen+len(key)+1, paths, completed, matches, results)
|
||||
if !ok {
|
||||
return i, false
|
||||
}
|
||||
} else {
|
||||
// not an object. just parse and ignore.
|
||||
if i, _, ok = parseAny(json, i, false); !ok {
|
||||
return i, false
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Since there was no matches we can just parse the value and
|
||||
// ignore the result.
|
||||
if i, _, ok = parseAny(json, i, true); !ok {
|
||||
return i, false
|
||||
}
|
||||
}
|
||||
} else if hasMatch && len(results[valPathIndex].Raw) > 0 && results[valPathIndex].Raw[0] == '{' {
|
||||
// The value was already parsed and the value type is an object.
|
||||
// Rewind the json index and let's parse deeper.
|
||||
i = valOrgIndex
|
||||
for ; i < len(json); i++ {
|
||||
if json[i] == '{' {
|
||||
break
|
||||
}
|
||||
}
|
||||
i, ok = parseGetMany(json, i+1, level+1, kplen+len(key)+1, paths, completed, matches, results)
|
||||
if !ok {
|
||||
return i, false
|
||||
}
|
||||
}
|
||||
if usedPaths == len(paths) {
|
||||
// all paths have been used, either completed or matched.
|
||||
// we should stop parsing this object to save CPU cycles.
|
||||
if level > 0 && i < len(json) {
|
||||
i, _ = parseSquash(json, i)
|
||||
}
|
||||
return i, true
|
||||
}
|
||||
} else if json[i] == '}' {
|
||||
// reached the end of the object. end it here.
|
||||
return i + 1, true
|
||||
}
|
||||
}
|
||||
return i, true
|
||||
}
|
||||
|
||||
// Call table for GetMany. Using an isolated function allows for allocating
|
||||
// arrays with know capacities on the stack, as opposed to dynamically
|
||||
// allocating on the heap. This can provide a tremendous performance boost
|
||||
// by avoiding the GC.
|
||||
func getMany8(json string, i int, paths []string) ([]Result, bool) {
|
||||
const max = 8
|
||||
var completed = make([]bool, 0, max)
|
||||
var matches = make([]uint64, 0, max)
|
||||
var results = make([]Result, 0, max)
|
||||
completed = completed[0:len(paths):max]
|
||||
matches = matches[0:len(paths):max]
|
||||
results = results[0:len(paths):max]
|
||||
_, ok := parseGetMany(json, i, 0, 0, paths, completed, matches, results)
|
||||
return results, ok
|
||||
}
|
||||
func getMany16(json string, i int, paths []string) ([]Result, bool) {
|
||||
const max = 16
|
||||
var completed = make([]bool, 0, max)
|
||||
var matches = make([]uint64, 0, max)
|
||||
var results = make([]Result, 0, max)
|
||||
completed = completed[0:len(paths):max]
|
||||
matches = matches[0:len(paths):max]
|
||||
results = results[0:len(paths):max]
|
||||
_, ok := parseGetMany(json, i, 0, 0, paths, completed, matches, results)
|
||||
return results, ok
|
||||
}
|
||||
func getMany32(json string, i int, paths []string) ([]Result, bool) {
|
||||
const max = 32
|
||||
var completed = make([]bool, 0, max)
|
||||
var matches = make([]uint64, 0, max)
|
||||
var results = make([]Result, 0, max)
|
||||
completed = completed[0:len(paths):max]
|
||||
matches = matches[0:len(paths):max]
|
||||
results = results[0:len(paths):max]
|
||||
_, ok := parseGetMany(json, i, 0, 0, paths, completed, matches, results)
|
||||
return results, ok
|
||||
}
|
||||
func getMany64(json string, i int, paths []string) ([]Result, bool) {
|
||||
const max = 64
|
||||
var completed = make([]bool, 0, max)
|
||||
var matches = make([]uint64, 0, max)
|
||||
var results = make([]Result, 0, max)
|
||||
completed = completed[0:len(paths):max]
|
||||
matches = matches[0:len(paths):max]
|
||||
results = results[0:len(paths):max]
|
||||
_, ok := parseGetMany(json, i, 0, 0, paths, completed, matches, results)
|
||||
return results, ok
|
||||
}
|
||||
func getMany128(json string, i int, paths []string) ([]Result, bool) {
|
||||
const max = 128
|
||||
var completed = make([]bool, 0, max)
|
||||
var matches = make([]uint64, 0, max)
|
||||
var results = make([]Result, 0, max)
|
||||
completed = completed[0:len(paths):max]
|
||||
matches = matches[0:len(paths):max]
|
||||
results = results[0:len(paths):max]
|
||||
_, ok := parseGetMany(json, i, 0, 0, paths, completed, matches, results)
|
||||
return results, ok
|
||||
}
|
||||
func getMany256(json string, i int, paths []string) ([]Result, bool) {
|
||||
const max = 256
|
||||
var completed = make([]bool, 0, max)
|
||||
var matches = make([]uint64, 0, max)
|
||||
var results = make([]Result, 0, max)
|
||||
completed = completed[0:len(paths):max]
|
||||
matches = matches[0:len(paths):max]
|
||||
results = results[0:len(paths):max]
|
||||
_, ok := parseGetMany(json, i, 0, 0, paths, completed, matches, results)
|
||||
return results, ok
|
||||
}
|
||||
func getMany512(json string, i int, paths []string) ([]Result, bool) {
|
||||
const max = 512
|
||||
var completed = make([]bool, 0, max)
|
||||
var matches = make([]uint64, 0, max)
|
||||
var results = make([]Result, 0, max)
|
||||
completed = completed[0:len(paths):max]
|
||||
matches = matches[0:len(paths):max]
|
||||
results = results[0:len(paths):max]
|
||||
_, ok := parseGetMany(json, i, 0, 0, paths, completed, matches, results)
|
||||
return results, ok
|
||||
return res
|
||||
}
|
||||
|
||||
var fieldsmu sync.RWMutex
|
||||
|
|
|
@ -1278,3 +1278,14 @@ func randomJSON() (json string, keys []string) {
|
|||
//rand.Seed(time.Now().UnixNano())
|
||||
return randomObjectOrArray(nil, "", false, 0)
|
||||
}
|
||||
|
||||
func TestIssue55(t *testing.T) {
|
||||
json := `{"one": {"two": 2, "three": 3}, "four": 4, "five": 5}`
|
||||
results := GetMany(json, "four", "five", "one.two", "one.six")
|
||||
expected := []string{"4", "5", "2", ""}
|
||||
for i, r := range results {
|
||||
if r.String() != expected[i] {
|
||||
t.Fatalf("expected %v, got %v", expected[i], r.String())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue