Generally faster parsing

This commit includes an optimization that increases overall
performance.

The gains are roughly between 20% to 300% depending on the size
of the JSON document. Larger documents will see the greates gains,
particularly when searching for keys that are deeply embedded, or
near the end of the document.
This commit is contained in:
tidwall 2024-10-01 20:45:20 -07:00
parent 92dff34b25
commit 133f42ce2d
2 changed files with 131 additions and 35 deletions

View File

@ -438,7 +438,7 @@ Benchmarks of GJSON alongside [encoding/json](https://golang.org/pkg/encoding/js
and [json-iterator](https://github.com/json-iterator/go) and [json-iterator](https://github.com/json-iterator/go)
``` ```
BenchmarkGJSONGet-10 14919366 240.9 ns/op 0 B/op 0 allocs/op BenchmarkGJSONGet-10 17893731 202.1 ns/op 0 B/op 0 allocs/op
BenchmarkGJSONUnmarshalMap-10 1663548 2157 ns/op 1920 B/op 26 allocs/op BenchmarkGJSONUnmarshalMap-10 1663548 2157 ns/op 1920 B/op 26 allocs/op
BenchmarkJSONUnmarshalMap-10 832236 4279 ns/op 2920 B/op 68 allocs/op BenchmarkJSONUnmarshalMap-10 832236 4279 ns/op 2920 B/op 68 allocs/op
BenchmarkJSONUnmarshalStruct-10 1076475 3219 ns/op 920 B/op 12 allocs/op BenchmarkJSONUnmarshalStruct-10 1076475 3219 ns/op 920 B/op 12 allocs/op
@ -489,6 +489,4 @@ widget.text.onMouseUp
** **
Last run: Oct 1, 2024
*These benchmarks were run on a MacBook Pro M1 Max using Go 1.22 and can be found [here](https://github.com/tidwall/gjson-benchmarks).* *These benchmarks were run on a MacBook Pro M1 Max using Go 1.22 and can be found [here](https://github.com/tidwall/gjson-benchmarks).*

162
gjson.go
View File

@ -1040,6 +1040,10 @@ func parseObjectPath(path string) (r objectPathResult) {
return return
} }
var vchars = [256]byte{
'"': 2, '{': 3, '(': 3, '[': 3, '}': 1, ')': 1, ']': 1,
}
func parseSquash(json string, i int) (int, string) { func parseSquash(json string, i int) (int, string) {
// expects that the lead character is a '[' or '{' or '(' // expects that the lead character is a '[' or '{' or '('
// squash the value, ignoring all nested arrays and objects. // squash the value, ignoring all nested arrays and objects.
@ -1047,43 +1051,137 @@ func parseSquash(json string, i int) (int, string) {
s := i s := i
i++ i++
depth := 1 depth := 1
for ; i < len(json); i++ { var c byte
if json[i] >= '"' && json[i] <= '}' { for i < len(json) {
switch json[i] { for i < len(json)-8 {
case '"': jslice := json[i : i+8]
c = vchars[jslice[0]]
if c != 0 {
i += 0
goto token
}
c = vchars[jslice[1]]
if c != 0 {
i += 1
goto token
}
c = vchars[jslice[2]]
if c != 0 {
i += 2
goto token
}
c = vchars[jslice[3]]
if c != 0 {
i += 3
goto token
}
c = vchars[jslice[4]]
if c != 0 {
i += 4
goto token
}
c = vchars[jslice[5]]
if c != 0 {
i += 5
goto token
}
c = vchars[jslice[6]]
if c != 0 {
i += 6
goto token
}
c = vchars[jslice[7]]
if c != 0 {
i += 7
goto token
}
i += 8
}
c = vchars[json[i]]
if c == 0 {
i++
continue
}
token:
if c == 2 {
// '"' string
i++
s2 := i
nextquote:
for i < len(json)-8 {
jslice := json[i : i+8]
if jslice[0] == '"' {
i += 0
goto strchkesc
}
if jslice[1] == '"' {
i += 1
goto strchkesc
}
if jslice[2] == '"' {
i += 2
goto strchkesc
}
if jslice[3] == '"' {
i += 3
goto strchkesc
}
if jslice[4] == '"' {
i += 4
goto strchkesc
}
if jslice[5] == '"' {
i += 5
goto strchkesc
}
if jslice[6] == '"' {
i += 6
goto strchkesc
}
if jslice[7] == '"' {
i += 7
goto strchkesc
}
i += 8
}
goto strchkstd
strchkesc:
if json[i-1] != '\\' {
i++ i++
s2 := i continue
for ; i < len(json); i++ { }
if json[i] > '\\' { strchkstd:
continue for i < len(json) {
} if json[i] > '\\' || json[i] != '"' {
if json[i] == '"' {
// look for an escaped slash
if json[i-1] == '\\' {
n := 0
for j := i - 2; j > s2-1; j-- {
if json[j] != '\\' {
break
}
n++
}
if n%2 == 0 {
continue
}
}
break
}
}
case '{', '[', '(':
depth++
case '}', ']', ')':
depth--
if depth == 0 {
i++ i++
return i, json[s:i] continue
} }
// look for an escaped slash
if json[i-1] == '\\' {
n := 0
for j := i - 2; j > s2-1; j-- {
if json[j] != '\\' {
break
}
n++
}
if n%2 == 0 {
i++
goto nextquote
}
}
break
}
} else {
// '{', '[', '(', '}', ']', ')'
// open close tokens
depth += int(c) - 2
if depth == 0 {
i++
return i, json[s:i]
} }
} }
i++
} }
return i, json[s:] return i, json[s:]
} }