mirror of https://github.com/tidwall/gjson.git
added more comments and a couple minor optimizations
This commit is contained in:
parent
0e6e567424
commit
3a4fec0606
161
gjson.go
161
gjson.go
|
@ -121,23 +121,24 @@ type frame struct {
|
||||||
// "c?ildren.0" >> "Sara"
|
// "c?ildren.0" >> "Sara"
|
||||||
//
|
//
|
||||||
func Get(json string, path string) Result {
|
func Get(json string, path string) Result {
|
||||||
var s int
|
var s int // starting index variable
|
||||||
var wild bool
|
var wild bool // wildcard indicator
|
||||||
var parts = make([]part, 0, 4)
|
var parts = make([]part, 0, 4) // parsed path parts
|
||||||
|
|
||||||
// do nothing when no path specified
|
|
||||||
if len(path) == 0 {
|
if len(path) == 0 {
|
||||||
return Result{} // nothing
|
// do nothing when no path specified and return an empty result.
|
||||||
|
return Result{}
|
||||||
}
|
}
|
||||||
|
|
||||||
// parse the path. just split on the dot
|
// parse the path into multiple parts.
|
||||||
for i := 0; i < len(path); i++ {
|
for i := 0; i < len(path); i++ {
|
||||||
next_part:
|
next_part:
|
||||||
// be optimistic that the path mostly contains lowercase and
|
// be optimistic that the path mostly contains lowercase and
|
||||||
// underscore characters.
|
// underscore characters.
|
||||||
if path[i] <= '\\' {
|
if path[i] <= '\\' {
|
||||||
if path[i] == '\\' {
|
if path[i] == '\\' {
|
||||||
// go into escape mode.
|
// go into escape mode. this is a slower path that
|
||||||
|
// strips off the escape character from the part.
|
||||||
epart := []byte(path[s:i])
|
epart := []byte(path[s:i])
|
||||||
i++
|
i++
|
||||||
if i < len(path) {
|
if i < len(path) {
|
||||||
|
@ -164,32 +165,36 @@ func Get(json string, path string) Result {
|
||||||
epart = append(epart, path[i])
|
epart = append(epart, path[i])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// append the last part
|
||||||
parts = append(parts, part{wild: wild, key: string(epart)})
|
parts = append(parts, part{wild: wild, key: string(epart)})
|
||||||
goto end_parts
|
goto end_parts
|
||||||
} else if path[i] == '.' {
|
} else if path[i] == '.' {
|
||||||
|
// append a new part
|
||||||
parts = append(parts, part{wild: wild, key: path[s:i]})
|
parts = append(parts, part{wild: wild, key: path[s:i]})
|
||||||
if wild {
|
if wild {
|
||||||
wild = false
|
wild = false // reset the wild flag
|
||||||
}
|
}
|
||||||
|
// set the starting index to one past the dot.
|
||||||
s = i + 1
|
s = i + 1
|
||||||
} else if path[i] == '*' || path[i] == '?' {
|
} else if path[i] == '*' || path[i] == '?' {
|
||||||
|
// set the wild flag to indicate that the part is a wildcard.
|
||||||
wild = true
|
wild = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// append the last part
|
||||||
parts = append(parts, part{wild: wild, key: path[s:]})
|
parts = append(parts, part{wild: wild, key: path[s:]})
|
||||||
end_parts:
|
end_parts:
|
||||||
|
var i int // index of current json character
|
||||||
|
var depth int // the current stack depth
|
||||||
|
var f frame // the current frame
|
||||||
|
var matched bool // flag used for key/part matching
|
||||||
|
var stack = make([]frame, 1, 4) // the frame stack
|
||||||
|
var value Result // the final value, also used for temp store
|
||||||
|
var vc byte // the current token value chacter type
|
||||||
|
|
||||||
var i, depth int
|
// look for first delimiter. only allow arrays and objects, other
|
||||||
var f frame
|
// json types will fail. it's ok for control characters to passthrough.
|
||||||
var matched bool
|
|
||||||
var stack = make([]frame, 1, 4)
|
|
||||||
var value Result
|
|
||||||
var vc byte
|
|
||||||
|
|
||||||
depth = 1
|
|
||||||
|
|
||||||
// look for first delimiter
|
|
||||||
for ; i < len(json); i++ {
|
for ; i < len(json); i++ {
|
||||||
if json[i] == '{' {
|
if json[i] == '{' {
|
||||||
f.stype = '{'
|
f.stype = '{'
|
||||||
|
@ -208,7 +213,10 @@ end_parts:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// read the next key
|
// assume that the depth is at least one
|
||||||
|
depth = 1
|
||||||
|
|
||||||
|
// read the next key from the json string
|
||||||
read_key:
|
read_key:
|
||||||
if f.stype == '[' {
|
if f.stype == '[' {
|
||||||
// for arrays we use the index of the value as the key.
|
// for arrays we use the index of the value as the key.
|
||||||
|
@ -217,24 +225,40 @@ read_key:
|
||||||
f.key = strconv.FormatInt(int64(f.count), 10)
|
f.key = strconv.FormatInt(int64(f.count), 10)
|
||||||
f.count++
|
f.count++
|
||||||
} else {
|
} else {
|
||||||
// for objects we must parse the next string.
|
// for objects we must parse the next string. this string will
|
||||||
|
// become the key that is compared against the path parts.
|
||||||
for ; i < len(json); i++ {
|
for ; i < len(json); i++ {
|
||||||
// read string
|
// begin key string reading routine.
|
||||||
if json[i] == '"' {
|
if json[i] == '"' {
|
||||||
i++
|
i++
|
||||||
// the first double-quote has already been read
|
// set the starting index. the first double-quote has already
|
||||||
|
// been read.
|
||||||
s = i
|
s = i
|
||||||
|
// loop through each character in the string looking for the
|
||||||
|
// the double-quote termination character. it's possible that
|
||||||
|
// the string contains an escape slash character. if so, we
|
||||||
|
// must do a nested loop that will look for an isolated
|
||||||
|
// double-quote terminator.
|
||||||
for ; i < len(json); i++ {
|
for ; i < len(json); i++ {
|
||||||
if json[i] == '"' {
|
if json[i] == '"' {
|
||||||
|
// a simple string that contains no escape characters.
|
||||||
|
// assign this to the current frame key and we are
|
||||||
|
// done parsing the key.
|
||||||
f.key = json[s:i]
|
f.key = json[s:i]
|
||||||
i++
|
i++
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
if json[i] == '\\' {
|
if json[i] == '\\' {
|
||||||
|
// escape character detected. we now look for the
|
||||||
|
// the double-quote terminator.
|
||||||
i++
|
i++
|
||||||
for ; i < len(json); i++ {
|
for ; i < len(json); i++ {
|
||||||
if json[i] == '"' {
|
if json[i] == '"' {
|
||||||
// look for an escaped slash
|
// possibly the end of the string, but let's
|
||||||
|
// look to see if the previous character was
|
||||||
|
// an escape slash. if so then we must keep
|
||||||
|
// reading backwards to see if the slash has a
|
||||||
|
// prefixed slashed, and so forth.
|
||||||
if json[i-1] == '\\' {
|
if json[i-1] == '\\' {
|
||||||
n := 0
|
n := 0
|
||||||
for j := i - 2; j > s-1; j-- {
|
for j := i - 2; j > s-1; j-- {
|
||||||
|
@ -244,12 +268,19 @@ read_key:
|
||||||
n++
|
n++
|
||||||
}
|
}
|
||||||
if n%2 == 0 {
|
if n%2 == 0 {
|
||||||
|
// the double-quote is not a terminator.
|
||||||
|
// keep reading the string.
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// we found the correct double-quote terminator.
|
||||||
|
// stop reading the string.
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// the string contains escape sequences so we must
|
||||||
|
// unescape and then assign to the current frame key.
|
||||||
|
// done parsing the key
|
||||||
f.key = unescape(json[s:i])
|
f.key = unescape(json[s:i])
|
||||||
i++
|
i++
|
||||||
break
|
break
|
||||||
|
@ -257,82 +288,112 @@ read_key:
|
||||||
}
|
}
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
// end read string
|
// end of string key reading routine
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// we have a brand new (possibly shiny) key.
|
// we have a brand new (possibly shiny) key.
|
||||||
// is it the key that we are looking for?
|
// is it the key that we are looking for?
|
||||||
if parts[depth-1].wild {
|
if parts[depth-1].wild {
|
||||||
// it's a wildcard path element
|
// the path part contains a wildcard character. we must do a wildcard
|
||||||
|
// match to determine if it truly matches.
|
||||||
matched = wildcardMatch(f.key, parts[depth-1].key)
|
matched = wildcardMatch(f.key, parts[depth-1].key)
|
||||||
} else {
|
} else {
|
||||||
// just a straight up equality check
|
// just a straight up equality check
|
||||||
matched = parts[depth-1].key == f.key
|
matched = parts[depth-1].key == f.key
|
||||||
}
|
}
|
||||||
|
|
||||||
// read to the value token
|
// read the value
|
||||||
// there's likely a colon here, but who cares. just burn past it.
|
|
||||||
for ; i < len(json); i++ {
|
for ; i < len(json); i++ {
|
||||||
if json[i] < '"' { // control character
|
// any thing less than a double-quote is likely whitespace.
|
||||||
|
// just burn past these.
|
||||||
|
if json[i] < '"' {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if json[i] < '-' { // string
|
// anything less that a dash is likely a double-quote. let's
|
||||||
|
// assume that it is.
|
||||||
|
if json[i] < '-' {
|
||||||
i++
|
i++
|
||||||
// we read the val below
|
|
||||||
vc = '"'
|
vc = '"'
|
||||||
|
// defer reading the string value until we know for sure
|
||||||
|
// that we want it. if we don't want it, then we will
|
||||||
|
// parse it using a quicker method than if we do want it.
|
||||||
goto proc_val
|
goto proc_val
|
||||||
}
|
}
|
||||||
if json[i] < '[' { // number
|
// any character less than an open bracket is likely a number.
|
||||||
|
if json[i] < '[' {
|
||||||
|
// with one exception, the colon character. we do not care
|
||||||
|
// about the colon character. just burn past it.
|
||||||
if json[i] == ':' {
|
if json[i] == ':' {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
vc = '0'
|
vc = '0'
|
||||||
s = i
|
s = i
|
||||||
i++
|
i++
|
||||||
// look for characters that cannot be in a number
|
// look for any character that might terminate a number
|
||||||
|
// break on whitespace, comma, ']', and '}'.
|
||||||
for ; i < len(json); i++ {
|
for ; i < len(json); i++ {
|
||||||
switch json[i] {
|
// less than dash might have valid characters
|
||||||
default:
|
if json[i] <= '-' {
|
||||||
continue
|
if json[i] <= ' ' || json[i] == ',' {
|
||||||
case ' ', '\t', '\r', '\n', ',', ']', '}':
|
// break on whitespace and comma
|
||||||
}
|
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
value.Raw = json[s:i]
|
// could be a '+' or '-'. let's assume so.
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if json[i] < ']' {
|
||||||
|
// probably a valid number
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if json[i] == 'e' || json[i] == 'E' {
|
||||||
|
// allow for exponential numbers
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// likely a ']' or '}'
|
||||||
|
break
|
||||||
|
}
|
||||||
|
// we have raw number. jump to the process value routine.
|
||||||
goto proc_val
|
goto proc_val
|
||||||
}
|
}
|
||||||
if json[i] < ']' { // '['
|
// any character less than ']' is likely '['. let's assume
|
||||||
|
// it's an open-array character.
|
||||||
|
if json[i] < ']' {
|
||||||
i++
|
i++
|
||||||
vc = '['
|
vc = '['
|
||||||
goto proc_delim
|
// jump to process delimiter routine.
|
||||||
|
goto proc_nested
|
||||||
}
|
}
|
||||||
if json[i] < 'u' { // true, false, null
|
// any character less than 'u' likely means tha the value is
|
||||||
vc = json[i]
|
// 'true', 'false', or 'null'.
|
||||||
|
if json[i] < 'u' {
|
||||||
|
vc = json[i] // assign the vc token character to the actual.
|
||||||
s = i
|
s = i
|
||||||
i++
|
i++
|
||||||
for ; i < len(json); i++ {
|
for ; i < len(json); i++ {
|
||||||
// let's pick up any character. it doesn't matter.
|
// let's pick up any non-alpha lowercase character as the
|
||||||
|
// terminator. it doesn't matter.
|
||||||
if json[i] < 'a' || json[i] > 'z' {
|
if json[i] < 'a' || json[i] > 'z' {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
value.Raw = json[s:i]
|
// we have raw literal. jump to the process value routine.
|
||||||
goto proc_val
|
goto proc_val
|
||||||
}
|
}
|
||||||
// must be an open objet
|
// if we reached this far, then the value must be a nested object.
|
||||||
i++
|
i++
|
||||||
vc = '{'
|
vc = '{'
|
||||||
goto proc_delim
|
// jump to process delimiter routine.
|
||||||
|
goto proc_nested
|
||||||
}
|
}
|
||||||
vc = 0
|
vc = 0
|
||||||
|
// ran out of json buffer
|
||||||
// sanity check before we move on
|
|
||||||
if i >= len(json) {
|
if i >= len(json) {
|
||||||
return Result{}
|
return Result{}
|
||||||
}
|
}
|
||||||
|
|
||||||
proc_delim:
|
// process nested array or object
|
||||||
|
proc_nested:
|
||||||
if (matched && depth == len(parts)) || !matched {
|
if (matched && depth == len(parts)) || !matched {
|
||||||
// begin squash
|
// begin squash
|
||||||
// squash the value, ignoring all nested arrays and objects.
|
// squash the value, ignoring all nested arrays and objects.
|
||||||
|
@ -384,10 +445,10 @@ proc_val:
|
||||||
if matched {
|
if matched {
|
||||||
// hit, that's good!
|
// hit, that's good!
|
||||||
if depth == len(parts) {
|
if depth == len(parts) {
|
||||||
|
value.Raw = json[s:i]
|
||||||
switch vc {
|
switch vc {
|
||||||
case '{', '[':
|
case '{', '[':
|
||||||
value.Type = JSON
|
value.Type = JSON
|
||||||
value.Raw = json[s:i]
|
|
||||||
case 'n':
|
case 'n':
|
||||||
value.Type = Null
|
value.Type = Null
|
||||||
case 't':
|
case 't':
|
||||||
|
|
Loading…
Reference in New Issue