diff --git a/README.md b/README.md index 9f3ddb1..2e8744e 100644 --- a/README.md +++ b/README.md @@ -271,7 +271,7 @@ This is a best-effort no allocation sub slice of the original json. This method ## Get multiple values at once -The `GetMany` function can be used to get multiple values at the same time, and is optimized to scan over a JSON payload once. +The `GetMany` function can be used to get multiple values at the same time. ```go results := gjson.GetMany(json, "name.first", "name.last", "age") @@ -298,17 +298,6 @@ BenchmarkJSONParserGet-8 3000000 499 ns/op 21 B/op BenchmarkJSONIterator-8 3000000 812 ns/op 544 B/op 9 allocs/op ``` -Benchmarks for the `GetMany` function: - -``` -BenchmarkGJSONGetMany4Paths-8 4000000 303 ns/op 112 B/op 0 allocs/op -BenchmarkGJSONGetMany8Paths-8 8000000 208 ns/op 56 B/op 0 allocs/op -BenchmarkGJSONGetMany16Paths-8 16000000 156 ns/op 56 B/op 0 allocs/op -BenchmarkGJSONGetMany32Paths-8 32000000 127 ns/op 64 B/op 0 allocs/op -BenchmarkGJSONGetMany64Paths-8 64000000 117 ns/op 64 B/op 0 allocs/op -BenchmarkGJSONGetMany128Paths-8 128000000 109 ns/op 64 B/op 0 allocs/op -``` - JSON document used: ```json @@ -347,21 +336,6 @@ widget.image.hOffset widget.text.onMouseUp ``` -For the `GetMany` benchmarks these paths are used: - -``` -widget.window.name -widget.image.hOffset -widget.text.onMouseUp -widget.window.title -widget.image.alignment -widget.text.style -widget.window.height -widget.image.src -widget.text.data -widget.text.size -``` - *These benchmarks were run on a MacBook Pro 15" 2.8 GHz Intel Core i7 using Go 1.8 and can be be found [here](https://github.com/tidwall/gjson-benchmarks).* diff --git a/gjson.go b/gjson.go index ba86a8b..ef11ecf 100644 --- a/gjson.go +++ b/gjson.go @@ -1595,405 +1595,26 @@ var ( // used for testing testLastWasFallback bool ) -// areSimplePaths returns true if all the paths are simple enough -// to parse quickly for GetMany(). Allows alpha-numeric, dots, -// underscores, and the dollar sign. It does not allow non-alnum, -// escape characters, or keys which start with a numbers. -// For example: -// "name.last" == OK -// "user.id0" == OK -// "user.ID" == OK -// "user.first_name" == OK -// "user.firstName" == OK -// "user.0item" == BAD -// "user.#id" == BAD -// "user\.name" == BAD -func areSimplePaths(paths []string) bool { - for _, path := range paths { - var fi int // first key index, for keys with numeric prefix - for i := 0; i < len(path); i++ { - if path[i] >= 'a' && path[i] <= 'z' { - // a-z is likely to be the highest frequency charater. - continue - } - if path[i] == '.' { - fi = i + 1 - continue - } - if path[i] >= 'A' && path[i] <= 'Z' { - continue - } - if path[i] == '_' || path[i] == '$' { - continue - } - if i > fi && path[i] >= '0' && path[i] <= '9' { - continue - } - return false - } - } - return true -} - // GetMany searches json for the multiple paths. // The return value is a Result array where the number of items // will be equal to the number of input paths. -func GetMany(json string, paths ...string) []Result { - if len(paths) < 4 { - if testWatchForFallback { - testLastWasFallback = false - } - switch len(paths) { - case 0: - // return nil when no paths are specified. - return nil - case 1: - return []Result{Get(json, paths[0])} - case 2: - return []Result{Get(json, paths[0]), Get(json, paths[1])} - case 3: - return []Result{Get(json, paths[0]), Get(json, paths[1]), Get(json, paths[2])} - } +func GetMany(json string, path ...string) []Result { + res := make([]Result, len(path)) + for i, path := range path { + res[i] = Get(json, path) } - var results []Result - var ok bool - var i int - if len(paths) > 512 { - // we can only support up to 512 paths. Is that too many? - goto fallback - } - if !areSimplePaths(paths) { - // If there is even one path that is not considered "simple" then - // we need to use the fallback method. - goto fallback - } - // locate the object token. - for ; i < len(json); i++ { - if json[i] == '{' { - i++ - break - } - if json[i] <= ' ' { - continue - } - goto fallback - } - // use the call function table. - if len(paths) <= 8 { - results, ok = getMany8(json, i, paths) - } else if len(paths) <= 16 { - results, ok = getMany16(json, i, paths) - } else if len(paths) <= 32 { - results, ok = getMany32(json, i, paths) - } else if len(paths) <= 64 { - results, ok = getMany64(json, i, paths) - } else if len(paths) <= 128 { - results, ok = getMany128(json, i, paths) - } else if len(paths) <= 256 { - results, ok = getMany256(json, i, paths) - } else if len(paths) <= 512 { - results, ok = getMany512(json, i, paths) - } - if !ok { - // there was some fault while parsing. we should try the - // fallback method. This could result in performance - // degregation in some cases. - goto fallback - } - if testWatchForFallback { - testLastWasFallback = false - } - return results -fallback: - results = results[:0] - for i := 0; i < len(paths); i++ { - results = append(results, Get(json, paths[i])) - } - if testWatchForFallback { - testLastWasFallback = true - } - return results + return res } -// GetManyBytes searches json for the specified path. -// If working with bytes, this method preferred over -// GetMany(string(data), paths...) -func GetManyBytes(json []byte, paths ...string) []Result { - if json == nil { - return GetMany("", paths...) +// GetManyBytes searches json for the multiple paths. +// The return value is a Result array where the number of items +// will be equal to the number of input paths. +func GetManyBytes(json []byte, path ...string) []Result { + res := make([]Result, len(path)) + for i, path := range path { + res[i] = GetBytes(json, path) } - results := GetMany(*(*string)(unsafe.Pointer(&json)), paths...) - for i := range results { - results[i] = fromBytesGet(results[i]) - } - return results -} - -// parseGetMany parses a json object for keys that match against the callers -// paths. It's a best-effort attempt and quickly locating and assigning the -// values to the []Result array. If there are failures such as bad json, or -// invalid input paths, or too much recursion, the function will exit with a -// return value of 'false'. -func parseGetMany( - json string, i int, - level uint, kplen int, - paths []string, completed []bool, matches []uint64, results []Result, -) (int, bool) { - if level > 62 { - // The recursion level is limited because the matches []uint64 - // array cannot handle more the 64-bits. - return i, false - } - // At this point the last character read was a '{'. - // Read all object keys and try to match against the paths. - var key string - var val string - var vesc, ok bool -next_key: - for ; i < len(json); i++ { - if json[i] == '"' { - // read the key - i, val, vesc, ok = parseString(json, i+1) - if !ok { - return i, false - } - if vesc { - // the value is escaped - key = unescape(val[1 : len(val)-1]) - } else { - // just a plain old ascii key - key = val[1 : len(val)-1] - } - var hasMatch bool - var parsedVal bool - var valOrgIndex int - var valPathIndex int - for j := 0; j < len(key); j++ { - if key[j] == '.' { - // we need to look for keys with dot and ignore them. - if i, _, ok = parseAny(json, i, false); !ok { - return i, false - } - continue next_key - } - } - var usedPaths int - // loop through paths and look for matches - for j := 0; j < len(paths); j++ { - if completed[j] { - usedPaths++ - // ignore completed paths - continue - } - if level > 0 && (matches[j]>>(level-1))&1 == 0 { - // ignore unmatched paths - usedPaths++ - continue - } - // try to match the key to the path - // this is spaghetti code but the idea is to minimize - // calls and variable assignments when comparing the - // key to paths - if len(paths[j])-kplen >= len(key) { - i, k := kplen, 0 - for ; k < len(key); k, i = k+1, i+1 { - if key[k] != paths[j][i] { - // no match - goto nomatch - } - } - if i < len(paths[j]) { - if paths[j][i] == '.' { - // matched, but there are still more keys in path - goto match_not_atend - } - } - if len(paths[j]) <= len(key) || kplen != 0 { - if len(paths[j]) != i { - goto nomatch - } - // matched and at the end of the path - goto match_atend - } - } - // no match, jump to the nomatch label - goto nomatch - match_atend: - // found a match - // at the end of the path. we must take the value. - usedPaths++ - if !parsedVal { - // the value has not been parsed yet. let's do so. - valOrgIndex = i // keep track of the current position. - i, results[j], ok = parseAny(json, i, true) - if !ok { - return i, false - } - parsedVal = true - valPathIndex = j - } else { - results[j] = results[valPathIndex] - } - // mark as complete - completed[j] = true - // jump over the match_not_atend label - goto nomatch - match_not_atend: - // found a match - // still in the middle of the path. - usedPaths++ - // mark the path as matched - matches[j] |= 1 << level - if !hasMatch { - hasMatch = true - } - nomatch: // noop label - } - - if !hasMatch && i < len(json) && json[i] == '}' { - return i + 1, true - } - if !parsedVal { - if hasMatch { - // we found a match and the value has not been parsed yet. - // let's find out if the next value type is an object. - for ; i < len(json); i++ { - if json[i] <= ' ' || json[i] == ':' { - continue - } - break - } - if i < len(json) { - if json[i] == '{' { - // it's an object. let's go deeper - i, ok = parseGetMany(json, i+1, level+1, kplen+len(key)+1, paths, completed, matches, results) - if !ok { - return i, false - } - } else { - // not an object. just parse and ignore. - if i, _, ok = parseAny(json, i, false); !ok { - return i, false - } - } - } - } else { - // Since there was no matches we can just parse the value and - // ignore the result. - if i, _, ok = parseAny(json, i, true); !ok { - return i, false - } - } - } else if hasMatch && len(results[valPathIndex].Raw) > 0 && results[valPathIndex].Raw[0] == '{' { - // The value was already parsed and the value type is an object. - // Rewind the json index and let's parse deeper. - i = valOrgIndex - for ; i < len(json); i++ { - if json[i] == '{' { - break - } - } - i, ok = parseGetMany(json, i+1, level+1, kplen+len(key)+1, paths, completed, matches, results) - if !ok { - return i, false - } - } - if usedPaths == len(paths) { - // all paths have been used, either completed or matched. - // we should stop parsing this object to save CPU cycles. - if level > 0 && i < len(json) { - i, _ = parseSquash(json, i) - } - return i, true - } - } else if json[i] == '}' { - // reached the end of the object. end it here. - return i + 1, true - } - } - return i, true -} - -// Call table for GetMany. Using an isolated function allows for allocating -// arrays with know capacities on the stack, as opposed to dynamically -// allocating on the heap. This can provide a tremendous performance boost -// by avoiding the GC. -func getMany8(json string, i int, paths []string) ([]Result, bool) { - const max = 8 - var completed = make([]bool, 0, max) - var matches = make([]uint64, 0, max) - var results = make([]Result, 0, max) - completed = completed[0:len(paths):max] - matches = matches[0:len(paths):max] - results = results[0:len(paths):max] - _, ok := parseGetMany(json, i, 0, 0, paths, completed, matches, results) - return results, ok -} -func getMany16(json string, i int, paths []string) ([]Result, bool) { - const max = 16 - var completed = make([]bool, 0, max) - var matches = make([]uint64, 0, max) - var results = make([]Result, 0, max) - completed = completed[0:len(paths):max] - matches = matches[0:len(paths):max] - results = results[0:len(paths):max] - _, ok := parseGetMany(json, i, 0, 0, paths, completed, matches, results) - return results, ok -} -func getMany32(json string, i int, paths []string) ([]Result, bool) { - const max = 32 - var completed = make([]bool, 0, max) - var matches = make([]uint64, 0, max) - var results = make([]Result, 0, max) - completed = completed[0:len(paths):max] - matches = matches[0:len(paths):max] - results = results[0:len(paths):max] - _, ok := parseGetMany(json, i, 0, 0, paths, completed, matches, results) - return results, ok -} -func getMany64(json string, i int, paths []string) ([]Result, bool) { - const max = 64 - var completed = make([]bool, 0, max) - var matches = make([]uint64, 0, max) - var results = make([]Result, 0, max) - completed = completed[0:len(paths):max] - matches = matches[0:len(paths):max] - results = results[0:len(paths):max] - _, ok := parseGetMany(json, i, 0, 0, paths, completed, matches, results) - return results, ok -} -func getMany128(json string, i int, paths []string) ([]Result, bool) { - const max = 128 - var completed = make([]bool, 0, max) - var matches = make([]uint64, 0, max) - var results = make([]Result, 0, max) - completed = completed[0:len(paths):max] - matches = matches[0:len(paths):max] - results = results[0:len(paths):max] - _, ok := parseGetMany(json, i, 0, 0, paths, completed, matches, results) - return results, ok -} -func getMany256(json string, i int, paths []string) ([]Result, bool) { - const max = 256 - var completed = make([]bool, 0, max) - var matches = make([]uint64, 0, max) - var results = make([]Result, 0, max) - completed = completed[0:len(paths):max] - matches = matches[0:len(paths):max] - results = results[0:len(paths):max] - _, ok := parseGetMany(json, i, 0, 0, paths, completed, matches, results) - return results, ok -} -func getMany512(json string, i int, paths []string) ([]Result, bool) { - const max = 512 - var completed = make([]bool, 0, max) - var matches = make([]uint64, 0, max) - var results = make([]Result, 0, max) - completed = completed[0:len(paths):max] - matches = matches[0:len(paths):max] - results = results[0:len(paths):max] - _, ok := parseGetMany(json, i, 0, 0, paths, completed, matches, results) - return results, ok + return res } var fieldsmu sync.RWMutex diff --git a/gjson_test.go b/gjson_test.go index fdd0104..871c598 100644 --- a/gjson_test.go +++ b/gjson_test.go @@ -1278,3 +1278,14 @@ func randomJSON() (json string, keys []string) { //rand.Seed(time.Now().UnixNano()) return randomObjectOrArray(nil, "", false, 0) } + +func TestIssue55(t *testing.T) { + json := `{"one": {"two": 2, "three": 3}, "four": 4, "five": 5}` + results := GetMany(json, "four", "five", "one.two", "one.six") + expected := []string{"4", "5", "2", ""} + for i, r := range results { + if r.String() != expected[i] { + t.Fatalf("expected %v, got %v", expected[i], r.String()) + } + } +}