adding regex support for the key match

This commit is contained in:
Dinesh Kumar 2021-12-28 00:02:37 +05:30
parent ba95ef80b5
commit d0d6d25016
4 changed files with 107 additions and 28 deletions

View File

@ -63,6 +63,9 @@ To access an array value use the index as the key.
To get the number of elements in an array or to access a child path, use the '#' character.
The dot and wildcard characters can be escaped with '\\'.
A path key can also be a valid regular expression. Provide the regular expression string by enclosing it '~'.
If provided regular expression is valid, then it will return the matched result set. If regular expression is invalid, then we return an empty result set.
```json
{
"name": {"first": "Tom", "last": "Anderson"},
@ -77,16 +80,19 @@ The dot and wildcard characters can be escaped with '\\'.
}
```
```
"name.last" >> "Anderson"
"age" >> 37
"children" >> ["Sara","Alex","Jack"]
"children.#" >> 3
"children.1" >> "Alex"
"child*.2" >> "Jack"
"c?ildren.0" >> "Sara"
"fav\.movie" >> "Deer Hunter"
"friends.#.first" >> ["Dale","Roger","Jane"]
"friends.1.last" >> "Craig"
"name.last" >> "Anderson"
"age" >> 37
"children" >> ["Sara","Alex","Jack"]
"children.#" >> 3
"children.1" >> "Alex"
"child*.2" >> "Jack"
"c?ildren.0" >> "Sara"
"fav\.movie" >> "Deer Hunter"
"friends.#.first" >> ["Dale","Roger","Jane"]
"friends.1.last" >> "Craig"
"name.~\w~" >> "Tom"
"name.~la*~" >> "Anderson"
"friends.2.~^(\w{3})$~" >> 47
```
You can also query an array for the first match by using `#(...)`, or find all

View File

@ -4,16 +4,21 @@ A GJSON Path is a text string syntax that describes a search pattern for quickly
This document is designed to explain the structure of a GJSON Path through examples.
- [Path structure](#path-structure)
- [Basic](#basic)
- [Wildcards](#wildcards)
- [Escape Character](#escape-character)
- [Arrays](#arrays)
- [Queries](#queries)
- [Dot vs Pipe](#dot-vs-pipe)
- [Modifiers](#modifiers)
- [Multipaths](#multipaths)
- [Literals](#literals)
- [GJSON Path Syntax](#gjson-path-syntax)
- [Path structure](#path-structure)
- [Example](#example)
- [Basic](#basic)
- [Wildcards](#wildcards)
- [Regular Expression](#regular-expression)
- [Escape character](#escape-character)
- [Arrays](#arrays)
- [Queries](#queries)
- [Dot vs Pipe](#dot-vs-pipe)
- [Modifiers](#modifiers)
- [Modifier arguments](#modifier-arguments)
- [Custom modifiers](#custom-modifiers)
- [Multipaths](#multipaths)
- [Literals](#literals)
The definitive implemenation is [github.com/tidwall/gjson](https://github.com/tidwall/gjson).
Use the [GJSON Playground](https://gjson.dev) to experiment with the syntax online.
@ -69,6 +74,15 @@ child*.2 "Jack"
c?ildren.0 "Sara"
```
### Regular Expression
Regular expression based key matching is supported, but the expression should be enclosed by `~`. If the provided regular expression is invalid, then it always return empty result set.
```go
~fav.*~ "Deer Hunter"
friends.#.~fir*~ ["Dale","Roger","Jane"]
~[~
```
### Escape character
Special purpose characters, such as `.`, `*`, and `?` can be escaped with `\`.

View File

@ -3,6 +3,7 @@ package gjson
import (
"encoding/json"
"regexp"
"strconv"
"strings"
"time"
@ -943,9 +944,11 @@ type objectPathResult struct {
piped bool
wild bool
more bool
regex bool
}
func parseObjectPath(path string) (r objectPathResult) {
var regexBegin = false
for i := 0; i < len(path); i++ {
if path[i] == '|' {
r.part = path[:i]
@ -953,7 +956,7 @@ func parseObjectPath(path string) (r objectPathResult) {
r.piped = true
return
}
if path[i] == '.' {
if path[i] == '.' && !regexBegin {
r.part = path[:i]
if i < len(path)-1 && isDotPiperChar(path[i+1]) {
r.pipe = path[i+1:]
@ -968,7 +971,17 @@ func parseObjectPath(path string) (r objectPathResult) {
r.wild = true
continue
}
if path[i] == '\\' {
if path[i] == '~' {
if !regexBegin {
regexBegin = true
} else {
r.regex = true
regexBegin = false
}
continue
}
if path[i] == '\\' && !regexBegin {
// go into escape mode. this is a slower path that
// strips off the escape character from the part.
epart := []byte(path[:i])
@ -1123,17 +1136,26 @@ func parseObject(c *parseContext, i int, path string) (int, bool) {
if !ok {
return i, false
}
if rp.wild {
if rp.regex {
if kesc {
pmatch = matchLimit(unescape(key), rp.part)
pmatch = matchRegex(unescape(key), rp.part)
} else {
pmatch = matchLimit(key, rp.part)
pmatch = matchRegex(key, rp.part)
}
} else {
if kesc {
pmatch = rp.part == unescape(key)
if rp.wild {
if kesc {
pmatch = matchLimit(unescape(key), rp.part)
} else {
pmatch = matchLimit(key, rp.part)
}
} else {
pmatch = rp.part == key
if kesc {
pmatch = rp.part == unescape(key)
} else {
pmatch = rp.part == key
}
}
}
hit = pmatch && !rp.more
@ -1232,6 +1254,18 @@ func matchLimit(str, pattern string) bool {
return matched
}
func matchRegex(str, pattern string) bool {
// Remove the enclosed pattern character "~"
match, err := regexp.MatchString(strings.Trim(pattern, "~"), str)
if err != nil {
// XXX
// If regex is invalid, then do not throw any error.
// Rather, return false to the calling environment.
return false
}
return match
}
func queryMatches(rp *arrayPathResult, value Result) bool {
rpv := rp.query.value
if len(rpv) > 0 && rpv[0] == '~' {

View File

@ -105,6 +105,10 @@ func TestEscapePath(t *testing.T) {
testEscapePath(t, json, "test.keyv\\.", "val5")
testEscapePath(t, json, "test.key\\.v", "val6")
testEscapePath(t, json, "test.keyk\\*.key\\?", "val7")
//XXX
//Do we have escaped regex string as key ?
//Rather, let user provide a valid regex to perform the escape.
}
// this json block is poorly formed on purpose.
@ -200,6 +204,27 @@ func TestPath(t *testing.T) {
get("lastly.end\\.\\.\\.ing")
get("lastly.yay")
//XXX
//Regex path testing goes here
regexGet := func(path string, isValid bool) {
r1 := Get(json, path)
if isValid {
assert(t, r1.Raw != "")
} else {
assert(t, r1.Raw == "")
}
}
//XXX
//Testing a valid regular expression path
regexGet("~.~", true)
regexGet("loggy.~programmers~", true)
regexGet("~last*~.~end*~", true)
//Get any 3 word length string from the JSON path
regexGet(`loggy.programmers.2.~^(\w{3})$~`, true)
//Testing an invalid regular expression
regexGet("~[0-9~", false)
}
func TestTimeResult(t *testing.T) {