adding regex support for the key match

This commit is contained in:
Dinesh Kumar 2021-12-28 00:02:37 +05:30
parent ba95ef80b5
commit d0d6d25016
4 changed files with 107 additions and 28 deletions

View File

@ -63,6 +63,9 @@ To access an array value use the index as the key.
To get the number of elements in an array or to access a child path, use the '#' character. To get the number of elements in an array or to access a child path, use the '#' character.
The dot and wildcard characters can be escaped with '\\'. The dot and wildcard characters can be escaped with '\\'.
A path key can also be a valid regular expression. Provide the regular expression string by enclosing it '~'.
If provided regular expression is valid, then it will return the matched result set. If regular expression is invalid, then we return an empty result set.
```json ```json
{ {
"name": {"first": "Tom", "last": "Anderson"}, "name": {"first": "Tom", "last": "Anderson"},
@ -77,16 +80,19 @@ The dot and wildcard characters can be escaped with '\\'.
} }
``` ```
``` ```
"name.last" >> "Anderson" "name.last" >> "Anderson"
"age" >> 37 "age" >> 37
"children" >> ["Sara","Alex","Jack"] "children" >> ["Sara","Alex","Jack"]
"children.#" >> 3 "children.#" >> 3
"children.1" >> "Alex" "children.1" >> "Alex"
"child*.2" >> "Jack" "child*.2" >> "Jack"
"c?ildren.0" >> "Sara" "c?ildren.0" >> "Sara"
"fav\.movie" >> "Deer Hunter" "fav\.movie" >> "Deer Hunter"
"friends.#.first" >> ["Dale","Roger","Jane"] "friends.#.first" >> ["Dale","Roger","Jane"]
"friends.1.last" >> "Craig" "friends.1.last" >> "Craig"
"name.~\w~" >> "Tom"
"name.~la*~" >> "Anderson"
"friends.2.~^(\w{3})$~" >> 47
``` ```
You can also query an array for the first match by using `#(...)`, or find all You can also query an array for the first match by using `#(...)`, or find all

View File

@ -4,16 +4,21 @@ A GJSON Path is a text string syntax that describes a search pattern for quickly
This document is designed to explain the structure of a GJSON Path through examples. This document is designed to explain the structure of a GJSON Path through examples.
- [Path structure](#path-structure) - [GJSON Path Syntax](#gjson-path-syntax)
- [Basic](#basic) - [Path structure](#path-structure)
- [Wildcards](#wildcards) - [Example](#example)
- [Escape Character](#escape-character) - [Basic](#basic)
- [Arrays](#arrays) - [Wildcards](#wildcards)
- [Queries](#queries) - [Regular Expression](#regular-expression)
- [Dot vs Pipe](#dot-vs-pipe) - [Escape character](#escape-character)
- [Modifiers](#modifiers) - [Arrays](#arrays)
- [Multipaths](#multipaths) - [Queries](#queries)
- [Literals](#literals) - [Dot vs Pipe](#dot-vs-pipe)
- [Modifiers](#modifiers)
- [Modifier arguments](#modifier-arguments)
- [Custom modifiers](#custom-modifiers)
- [Multipaths](#multipaths)
- [Literals](#literals)
The definitive implemenation is [github.com/tidwall/gjson](https://github.com/tidwall/gjson). The definitive implemenation is [github.com/tidwall/gjson](https://github.com/tidwall/gjson).
Use the [GJSON Playground](https://gjson.dev) to experiment with the syntax online. Use the [GJSON Playground](https://gjson.dev) to experiment with the syntax online.
@ -69,6 +74,15 @@ child*.2 "Jack"
c?ildren.0 "Sara" c?ildren.0 "Sara"
``` ```
### Regular Expression
Regular expression based key matching is supported, but the expression should be enclosed by `~`. If the provided regular expression is invalid, then it always return empty result set.
```go
~fav.*~ "Deer Hunter"
friends.#.~fir*~ ["Dale","Roger","Jane"]
~[~
```
### Escape character ### Escape character
Special purpose characters, such as `.`, `*`, and `?` can be escaped with `\`. Special purpose characters, such as `.`, `*`, and `?` can be escaped with `\`.

View File

@ -3,6 +3,7 @@ package gjson
import ( import (
"encoding/json" "encoding/json"
"regexp"
"strconv" "strconv"
"strings" "strings"
"time" "time"
@ -943,9 +944,11 @@ type objectPathResult struct {
piped bool piped bool
wild bool wild bool
more bool more bool
regex bool
} }
func parseObjectPath(path string) (r objectPathResult) { func parseObjectPath(path string) (r objectPathResult) {
var regexBegin = false
for i := 0; i < len(path); i++ { for i := 0; i < len(path); i++ {
if path[i] == '|' { if path[i] == '|' {
r.part = path[:i] r.part = path[:i]
@ -953,7 +956,7 @@ func parseObjectPath(path string) (r objectPathResult) {
r.piped = true r.piped = true
return return
} }
if path[i] == '.' { if path[i] == '.' && !regexBegin {
r.part = path[:i] r.part = path[:i]
if i < len(path)-1 && isDotPiperChar(path[i+1]) { if i < len(path)-1 && isDotPiperChar(path[i+1]) {
r.pipe = path[i+1:] r.pipe = path[i+1:]
@ -968,7 +971,17 @@ func parseObjectPath(path string) (r objectPathResult) {
r.wild = true r.wild = true
continue continue
} }
if path[i] == '\\' { if path[i] == '~' {
if !regexBegin {
regexBegin = true
} else {
r.regex = true
regexBegin = false
}
continue
}
if path[i] == '\\' && !regexBegin {
// go into escape mode. this is a slower path that // go into escape mode. this is a slower path that
// strips off the escape character from the part. // strips off the escape character from the part.
epart := []byte(path[:i]) epart := []byte(path[:i])
@ -1123,17 +1136,26 @@ func parseObject(c *parseContext, i int, path string) (int, bool) {
if !ok { if !ok {
return i, false return i, false
} }
if rp.wild {
if rp.regex {
if kesc { if kesc {
pmatch = matchLimit(unescape(key), rp.part) pmatch = matchRegex(unescape(key), rp.part)
} else { } else {
pmatch = matchLimit(key, rp.part) pmatch = matchRegex(key, rp.part)
} }
} else { } else {
if kesc { if rp.wild {
pmatch = rp.part == unescape(key) if kesc {
pmatch = matchLimit(unescape(key), rp.part)
} else {
pmatch = matchLimit(key, rp.part)
}
} else { } else {
pmatch = rp.part == key if kesc {
pmatch = rp.part == unescape(key)
} else {
pmatch = rp.part == key
}
} }
} }
hit = pmatch && !rp.more hit = pmatch && !rp.more
@ -1232,6 +1254,18 @@ func matchLimit(str, pattern string) bool {
return matched return matched
} }
func matchRegex(str, pattern string) bool {
// Remove the enclosed pattern character "~"
match, err := regexp.MatchString(strings.Trim(pattern, "~"), str)
if err != nil {
// XXX
// If regex is invalid, then do not throw any error.
// Rather, return false to the calling environment.
return false
}
return match
}
func queryMatches(rp *arrayPathResult, value Result) bool { func queryMatches(rp *arrayPathResult, value Result) bool {
rpv := rp.query.value rpv := rp.query.value
if len(rpv) > 0 && rpv[0] == '~' { if len(rpv) > 0 && rpv[0] == '~' {

View File

@ -105,6 +105,10 @@ func TestEscapePath(t *testing.T) {
testEscapePath(t, json, "test.keyv\\.", "val5") testEscapePath(t, json, "test.keyv\\.", "val5")
testEscapePath(t, json, "test.key\\.v", "val6") testEscapePath(t, json, "test.key\\.v", "val6")
testEscapePath(t, json, "test.keyk\\*.key\\?", "val7") testEscapePath(t, json, "test.keyk\\*.key\\?", "val7")
//XXX
//Do we have escaped regex string as key ?
//Rather, let user provide a valid regex to perform the escape.
} }
// this json block is poorly formed on purpose. // this json block is poorly formed on purpose.
@ -200,6 +204,27 @@ func TestPath(t *testing.T) {
get("lastly.end\\.\\.\\.ing") get("lastly.end\\.\\.\\.ing")
get("lastly.yay") get("lastly.yay")
//XXX
//Regex path testing goes here
regexGet := func(path string, isValid bool) {
r1 := Get(json, path)
if isValid {
assert(t, r1.Raw != "")
} else {
assert(t, r1.Raw == "")
}
}
//XXX
//Testing a valid regular expression path
regexGet("~.~", true)
regexGet("loggy.~programmers~", true)
regexGet("~last*~.~end*~", true)
//Get any 3 word length string from the JSON path
regexGet(`loggy.programmers.2.~^(\w{3})$~`, true)
//Testing an invalid regular expression
regexGet("~[0-9~", false)
} }
func TestTimeResult(t *testing.T) { func TestTimeResult(t *testing.T) {