added more comments and a couple minor optimizations

This commit is contained in:
Josh Baker 2016-08-19 13:51:52 -07:00
parent 0e6e567424
commit 3a4fec0606
1 changed files with 109 additions and 48 deletions

157
gjson.go
View File

@ -121,23 +121,24 @@ type frame struct {
// "c?ildren.0" >> "Sara" // "c?ildren.0" >> "Sara"
// //
func Get(json string, path string) Result { func Get(json string, path string) Result {
var s int var s int // starting index variable
var wild bool var wild bool // wildcard indicator
var parts = make([]part, 0, 4) var parts = make([]part, 0, 4) // parsed path parts
// do nothing when no path specified
if len(path) == 0 { if len(path) == 0 {
return Result{} // nothing // do nothing when no path specified and return an empty result.
return Result{}
} }
// parse the path. just split on the dot // parse the path into multiple parts.
for i := 0; i < len(path); i++ { for i := 0; i < len(path); i++ {
next_part: next_part:
// be optimistic that the path mostly contains lowercase and // be optimistic that the path mostly contains lowercase and
// underscore characters. // underscore characters.
if path[i] <= '\\' { if path[i] <= '\\' {
if path[i] == '\\' { if path[i] == '\\' {
// go into escape mode. // go into escape mode. this is a slower path that
// strips off the escape character from the part.
epart := []byte(path[s:i]) epart := []byte(path[s:i])
i++ i++
if i < len(path) { if i < len(path) {
@ -164,32 +165,36 @@ func Get(json string, path string) Result {
epart = append(epart, path[i]) epart = append(epart, path[i])
} }
} }
// append the last part
parts = append(parts, part{wild: wild, key: string(epart)}) parts = append(parts, part{wild: wild, key: string(epart)})
goto end_parts goto end_parts
} else if path[i] == '.' { } else if path[i] == '.' {
// append a new part
parts = append(parts, part{wild: wild, key: path[s:i]}) parts = append(parts, part{wild: wild, key: path[s:i]})
if wild { if wild {
wild = false wild = false // reset the wild flag
} }
// set the starting index to one past the dot.
s = i + 1 s = i + 1
} else if path[i] == '*' || path[i] == '?' { } else if path[i] == '*' || path[i] == '?' {
// set the wild flag to indicate that the part is a wildcard.
wild = true wild = true
} }
} }
} }
// append the last part
parts = append(parts, part{wild: wild, key: path[s:]}) parts = append(parts, part{wild: wild, key: path[s:]})
end_parts: end_parts:
var i int // index of current json character
var depth int // the current stack depth
var f frame // the current frame
var matched bool // flag used for key/part matching
var stack = make([]frame, 1, 4) // the frame stack
var value Result // the final value, also used for temp store
var vc byte // the current token value chacter type
var i, depth int // look for first delimiter. only allow arrays and objects, other
var f frame // json types will fail. it's ok for control characters to passthrough.
var matched bool
var stack = make([]frame, 1, 4)
var value Result
var vc byte
depth = 1
// look for first delimiter
for ; i < len(json); i++ { for ; i < len(json); i++ {
if json[i] == '{' { if json[i] == '{' {
f.stype = '{' f.stype = '{'
@ -208,7 +213,10 @@ end_parts:
} }
} }
// read the next key // assume that the depth is at least one
depth = 1
// read the next key from the json string
read_key: read_key:
if f.stype == '[' { if f.stype == '[' {
// for arrays we use the index of the value as the key. // for arrays we use the index of the value as the key.
@ -217,24 +225,40 @@ read_key:
f.key = strconv.FormatInt(int64(f.count), 10) f.key = strconv.FormatInt(int64(f.count), 10)
f.count++ f.count++
} else { } else {
// for objects we must parse the next string. // for objects we must parse the next string. this string will
// become the key that is compared against the path parts.
for ; i < len(json); i++ { for ; i < len(json); i++ {
// read string // begin key string reading routine.
if json[i] == '"' { if json[i] == '"' {
i++ i++
// the first double-quote has already been read // set the starting index. the first double-quote has already
// been read.
s = i s = i
// loop through each character in the string looking for the
// the double-quote termination character. it's possible that
// the string contains an escape slash character. if so, we
// must do a nested loop that will look for an isolated
// double-quote terminator.
for ; i < len(json); i++ { for ; i < len(json); i++ {
if json[i] == '"' { if json[i] == '"' {
// a simple string that contains no escape characters.
// assign this to the current frame key and we are
// done parsing the key.
f.key = json[s:i] f.key = json[s:i]
i++ i++
break break
} }
if json[i] == '\\' { if json[i] == '\\' {
// escape character detected. we now look for the
// the double-quote terminator.
i++ i++
for ; i < len(json); i++ { for ; i < len(json); i++ {
if json[i] == '"' { if json[i] == '"' {
// look for an escaped slash // possibly the end of the string, but let's
// look to see if the previous character was
// an escape slash. if so then we must keep
// reading backwards to see if the slash has a
// prefixed slashed, and so forth.
if json[i-1] == '\\' { if json[i-1] == '\\' {
n := 0 n := 0
for j := i - 2; j > s-1; j-- { for j := i - 2; j > s-1; j-- {
@ -244,12 +268,19 @@ read_key:
n++ n++
} }
if n%2 == 0 { if n%2 == 0 {
// the double-quote is not a terminator.
// keep reading the string.
continue continue
} }
} }
// we found the correct double-quote terminator.
// stop reading the string.
break break
} }
} }
// the string contains escape sequences so we must
// unescape and then assign to the current frame key.
// done parsing the key
f.key = unescape(json[s:i]) f.key = unescape(json[s:i])
i++ i++
break break
@ -257,82 +288,112 @@ read_key:
} }
break break
} }
// end read string // end of string key reading routine
} }
} }
// we have a brand new (possibly shiny) key. // we have a brand new (possibly shiny) key.
// is it the key that we are looking for? // is it the key that we are looking for?
if parts[depth-1].wild { if parts[depth-1].wild {
// it's a wildcard path element // the path part contains a wildcard character. we must do a wildcard
// match to determine if it truly matches.
matched = wildcardMatch(f.key, parts[depth-1].key) matched = wildcardMatch(f.key, parts[depth-1].key)
} else { } else {
// just a straight up equality check // just a straight up equality check
matched = parts[depth-1].key == f.key matched = parts[depth-1].key == f.key
} }
// read to the value token // read the value
// there's likely a colon here, but who cares. just burn past it.
for ; i < len(json); i++ { for ; i < len(json); i++ {
if json[i] < '"' { // control character // any thing less than a double-quote is likely whitespace.
// just burn past these.
if json[i] < '"' {
continue continue
} }
if json[i] < '-' { // string // anything less that a dash is likely a double-quote. let's
// assume that it is.
if json[i] < '-' {
i++ i++
// we read the val below
vc = '"' vc = '"'
// defer reading the string value until we know for sure
// that we want it. if we don't want it, then we will
// parse it using a quicker method than if we do want it.
goto proc_val goto proc_val
} }
if json[i] < '[' { // number // any character less than an open bracket is likely a number.
if json[i] < '[' {
// with one exception, the colon character. we do not care
// about the colon character. just burn past it.
if json[i] == ':' { if json[i] == ':' {
continue continue
} }
vc = '0' vc = '0'
s = i s = i
i++ i++
// look for characters that cannot be in a number // look for any character that might terminate a number
// break on whitespace, comma, ']', and '}'.
for ; i < len(json); i++ { for ; i < len(json); i++ {
switch json[i] { // less than dash might have valid characters
default: if json[i] <= '-' {
if json[i] <= ' ' || json[i] == ',' {
// break on whitespace and comma
break
}
// could be a '+' or '-'. let's assume so.
continue continue
case ' ', '\t', '\r', '\n', ',', ']', '}':
} }
if json[i] < ']' {
// probably a valid number
continue
}
if json[i] == 'e' || json[i] == 'E' {
// allow for exponential numbers
continue
}
// likely a ']' or '}'
break break
} }
value.Raw = json[s:i] // we have raw number. jump to the process value routine.
goto proc_val goto proc_val
} }
if json[i] < ']' { // '[' // any character less than ']' is likely '['. let's assume
// it's an open-array character.
if json[i] < ']' {
i++ i++
vc = '[' vc = '['
goto proc_delim // jump to process delimiter routine.
goto proc_nested
} }
if json[i] < 'u' { // true, false, null // any character less than 'u' likely means tha the value is
vc = json[i] // 'true', 'false', or 'null'.
if json[i] < 'u' {
vc = json[i] // assign the vc token character to the actual.
s = i s = i
i++ i++
for ; i < len(json); i++ { for ; i < len(json); i++ {
// let's pick up any character. it doesn't matter. // let's pick up any non-alpha lowercase character as the
// terminator. it doesn't matter.
if json[i] < 'a' || json[i] > 'z' { if json[i] < 'a' || json[i] > 'z' {
break break
} }
} }
value.Raw = json[s:i] // we have raw literal. jump to the process value routine.
goto proc_val goto proc_val
} }
// must be an open objet // if we reached this far, then the value must be a nested object.
i++ i++
vc = '{' vc = '{'
goto proc_delim // jump to process delimiter routine.
goto proc_nested
} }
vc = 0 vc = 0
// ran out of json buffer
// sanity check before we move on
if i >= len(json) { if i >= len(json) {
return Result{} return Result{}
} }
proc_delim: // process nested array or object
proc_nested:
if (matched && depth == len(parts)) || !matched { if (matched && depth == len(parts)) || !matched {
// begin squash // begin squash
// squash the value, ignoring all nested arrays and objects. // squash the value, ignoring all nested arrays and objects.
@ -384,10 +445,10 @@ proc_val:
if matched { if matched {
// hit, that's good! // hit, that's good!
if depth == len(parts) { if depth == len(parts) {
value.Raw = json[s:i]
switch vc { switch vc {
case '{', '[': case '{', '[':
value.Type = JSON value.Type = JSON
value.Raw = json[s:i]
case 'n': case 'n':
value.Type = Null value.Type = Null
case 't': case 't':