Slightly improve parsing speed

- Adds a "hint" to tolit that avoids needing to loop to retrieve
   literals
 - Replaces (slow) modulos with bitwise functions
 - Most notably, replaces copying string() function in ParseBytes with a
   dangerous but faster path. Since gjson uses strings internally and in
   return values and cannot modify this, this is safe [citation needed]

This improves performance between 0 and 10%:

```
benchmark                         old ns/op     new ns/op     delta
BenchmarkGJSONGet                 621           577           -7.09%
BenchmarkGJSONGetMany4Paths       524           489           -6.68%
BenchmarkGJSONGetMany8Paths       361           340           -5.82%
BenchmarkGJSONGetMany16Paths      274           259           -5.47%
BenchmarkGJSONGetMany32Paths      219           212           -3.20%
BenchmarkGJSONGetMany64Paths      201           195           -2.99%
BenchmarkGJSONGetMany128Paths     187           182           -2.67%
BenchmarkGJSONUnmarshalMap        6972          6935          -0.53%
BenchmarkGJSONUnmarshalStruct     6606          6128          -7.24%
```
This commit is contained in:
Connor Peet 2017-08-31 23:40:20 -07:00
parent 4e8f0c7f28
commit 6d69ff7f5e
1 changed files with 26 additions and 38 deletions

View File

@ -353,13 +353,13 @@ func (t Result) arrayOrMap(vc byte, valueize bool) (r arrayOrMapResult) {
value.Raw = squash(json[i:])
case 'n':
value.Type = Null
value.Raw = tolit(json[i:])
value.Raw = tolit(json[i:], 4)
case 't':
value.Type = True
value.Raw = tolit(json[i:])
value.Raw = tolit(json[i:], 4)
case 'f':
value.Type = False
value.Raw = tolit(json[i:])
value.Raw = tolit(json[i:], 5)
case '"':
value.Type = String
value.Raw, value.Str = tostr(json[i:])
@ -367,7 +367,7 @@ func (t Result) arrayOrMap(vc byte, valueize bool) (r arrayOrMapResult) {
i += len(value.Raw) - 1
if r.vc == '{' {
if count%2 == 0 {
if count&0x1 == 0 {
key = value
} else {
if valueize {
@ -411,13 +411,13 @@ func Parse(json string) Result {
}
case 'n':
value.Type = Null
value.Raw = tolit(json[i:])
value.Raw = tolit(json[i:], 4)
case 't':
value.Type = True
value.Raw = tolit(json[i:])
value.Raw = tolit(json[i:], 4)
case 'f':
value.Type = False
value.Raw = tolit(json[i:])
value.Raw = tolit(json[i:], 5)
case '"':
value.Type = String
value.Raw, value.Str = tostr(json[i:])
@ -430,7 +430,10 @@ func Parse(json string) Result {
// ParseBytes parses the json and returns a result.
// If working with bytes, this method preferred over Parse(string(data))
func ParseBytes(json []byte) Result {
return Parse(string(json))
return Parse(*(*string)(unsafe.Pointer(&reflect.StringHeader{
Data: uintptr(unsafe.Pointer(&json[0])),
Len: len(json),
})))
}
func squash(json string) string {
@ -458,7 +461,7 @@ func squash(json string) string {
}
n++
}
if n%2 == 0 {
if n&0x1 == 0 {
continue
}
}
@ -480,44 +483,29 @@ func squash(json string) string {
func tonum(json string) (raw string, num float64) {
for i := 1; i < len(json); i++ {
// less than dash might have valid characters
if json[i] <= '-' {
if json[i] <= ' ' || json[i] == ',' {
// break on whitespace and comma
raw = json[:i]
num, _ = strconv.ParseFloat(raw, 64)
return
}
// could be a '+' or '-'. let's assume so.
continue
}
if json[i] < ']' {
// probably a valid number
continue
}
if json[i] == 'e' || json[i] == 'E' {
// allow for exponential numbers
continue
}
// likely a ']' or '}'
// Allow characters > . that don't terminate a JSON type
if json[i] >= '.' && json[i] != ']' && json[i] != '}' {
continue
}
raw = json[:i]
num, _ = strconv.ParseFloat(raw, 64)
return
}
raw = json
num, _ = strconv.ParseFloat(raw, 64)
return
}
func tolit(json string) (raw string) {
for i := 1; i < len(json); i++ {
if json[i] <= 'a' || json[i] >= 'z' {
return json[:i]
}
}
func tolit(json string, chars int) (raw string) {
if chars >= len(json) { // invalid, not enough characters
return json
}
return json[:chars]
}
func tostr(json string) (raw string, str string) {
// expects that the lead character is a '"'
for i := 1; i < len(json); i++ {
@ -543,7 +531,7 @@ func tostr(json string) (raw string, str string) {
}
n++
}
if n%2 == 0 {
if n&0x1 == 0 {
continue
}
}
@ -628,7 +616,7 @@ func parseString(json string, i int) (int, string, bool, bool) {
}
n++
}
if n%2 == 0 {
if n&0x1 == 0 {
continue
}
}
@ -766,7 +754,7 @@ func parseArrayPath(path string) (r arrayPathResult) {
}
n++
}
if n%2 == 0 {
if n&0x1 == 0 {
continue
}
}
@ -880,7 +868,7 @@ func parseSquash(json string, i int) (int, string) {
}
n++
}
if n%2 == 0 {
if n&0x1 == 0 {
continue
}
}
@ -937,7 +925,7 @@ func parseObject(c *parseContext, i int, path string) (int, bool) {
}
n++
}
if n%2 == 0 {
if n&0x1 == 0 {
continue
}
}