diff --git a/README.md b/README.md index bb56b3d..777a4ee 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,9 @@ To access an array value use the index as the key. To get the number of elements in an array or to access a child path, use the '#' character. The dot and wildcard characters can be escaped with '\\'. +A path key can also be a valid regular expression. Provide the regular expression string by enclosing it '~'. +If provided regular expression is valid, then it will return the matched result set. If regular expression is invalid, then we return an empty result set. + ```json { "name": {"first": "Tom", "last": "Anderson"}, @@ -77,16 +80,19 @@ The dot and wildcard characters can be escaped with '\\'. } ``` ``` -"name.last" >> "Anderson" -"age" >> 37 -"children" >> ["Sara","Alex","Jack"] -"children.#" >> 3 -"children.1" >> "Alex" -"child*.2" >> "Jack" -"c?ildren.0" >> "Sara" -"fav\.movie" >> "Deer Hunter" -"friends.#.first" >> ["Dale","Roger","Jane"] -"friends.1.last" >> "Craig" +"name.last" >> "Anderson" +"age" >> 37 +"children" >> ["Sara","Alex","Jack"] +"children.#" >> 3 +"children.1" >> "Alex" +"child*.2" >> "Jack" +"c?ildren.0" >> "Sara" +"fav\.movie" >> "Deer Hunter" +"friends.#.first" >> ["Dale","Roger","Jane"] +"friends.1.last" >> "Craig" +"name.~\w~" >> "Tom" +"name.~la*~" >> "Anderson" +"friends.2.~^(\w{3})$~" >> 47 ``` You can also query an array for the first match by using `#(...)`, or find all diff --git a/SYNTAX.md b/SYNTAX.md index 67fa058..c967ca5 100644 --- a/SYNTAX.md +++ b/SYNTAX.md @@ -4,16 +4,21 @@ A GJSON Path is a text string syntax that describes a search pattern for quickly This document is designed to explain the structure of a GJSON Path through examples. -- [Path structure](#path-structure) -- [Basic](#basic) -- [Wildcards](#wildcards) -- [Escape Character](#escape-character) -- [Arrays](#arrays) -- [Queries](#queries) -- [Dot vs Pipe](#dot-vs-pipe) -- [Modifiers](#modifiers) -- [Multipaths](#multipaths) -- [Literals](#literals) +- [GJSON Path Syntax](#gjson-path-syntax) + - [Path structure](#path-structure) + - [Example](#example) + - [Basic](#basic) + - [Wildcards](#wildcards) + - [Regular Expression](#regular-expression) + - [Escape character](#escape-character) + - [Arrays](#arrays) + - [Queries](#queries) + - [Dot vs Pipe](#dot-vs-pipe) + - [Modifiers](#modifiers) + - [Modifier arguments](#modifier-arguments) + - [Custom modifiers](#custom-modifiers) + - [Multipaths](#multipaths) + - [Literals](#literals) The definitive implemenation is [github.com/tidwall/gjson](https://github.com/tidwall/gjson). Use the [GJSON Playground](https://gjson.dev) to experiment with the syntax online. @@ -69,6 +74,15 @@ child*.2 "Jack" c?ildren.0 "Sara" ``` +### Regular Expression +Regular expression based key matching is supported, but the expression should be enclosed by `~`. If the provided regular expression is invalid, then it always return empty result set. + +```go +~fav.*~ "Deer Hunter" +friends.#.~fir*~ ["Dale","Roger","Jane"] +~[~ +``` + ### Escape character Special purpose characters, such as `.`, `*`, and `?` can be escaped with `\`. diff --git a/gjson.go b/gjson.go index 9920c4d..6f66738 100644 --- a/gjson.go +++ b/gjson.go @@ -3,6 +3,7 @@ package gjson import ( "encoding/json" + "regexp" "strconv" "strings" "time" @@ -943,9 +944,11 @@ type objectPathResult struct { piped bool wild bool more bool + regex bool } func parseObjectPath(path string) (r objectPathResult) { + var regexBegin = false for i := 0; i < len(path); i++ { if path[i] == '|' { r.part = path[:i] @@ -953,7 +956,7 @@ func parseObjectPath(path string) (r objectPathResult) { r.piped = true return } - if path[i] == '.' { + if path[i] == '.' && !regexBegin { r.part = path[:i] if i < len(path)-1 && isDotPiperChar(path[i+1]) { r.pipe = path[i+1:] @@ -968,7 +971,17 @@ func parseObjectPath(path string) (r objectPathResult) { r.wild = true continue } - if path[i] == '\\' { + if path[i] == '~' { + + if !regexBegin { + regexBegin = true + } else { + r.regex = true + regexBegin = false + } + continue + } + if path[i] == '\\' && !regexBegin { // go into escape mode. this is a slower path that // strips off the escape character from the part. epart := []byte(path[:i]) @@ -1123,17 +1136,26 @@ func parseObject(c *parseContext, i int, path string) (int, bool) { if !ok { return i, false } - if rp.wild { + + if rp.regex { if kesc { - pmatch = matchLimit(unescape(key), rp.part) + pmatch = matchRegex(unescape(key), rp.part) } else { - pmatch = matchLimit(key, rp.part) + pmatch = matchRegex(key, rp.part) } } else { - if kesc { - pmatch = rp.part == unescape(key) + if rp.wild { + if kesc { + pmatch = matchLimit(unescape(key), rp.part) + } else { + pmatch = matchLimit(key, rp.part) + } } else { - pmatch = rp.part == key + if kesc { + pmatch = rp.part == unescape(key) + } else { + pmatch = rp.part == key + } } } hit = pmatch && !rp.more @@ -1232,6 +1254,18 @@ func matchLimit(str, pattern string) bool { return matched } +func matchRegex(str, pattern string) bool { + // Remove the enclosed pattern character "~" + match, err := regexp.MatchString(strings.Trim(pattern, "~"), str) + if err != nil { + // XXX + // If regex is invalid, then do not throw any error. + // Rather, return false to the calling environment. + return false + } + return match +} + func queryMatches(rp *arrayPathResult, value Result) bool { rpv := rp.query.value if len(rpv) > 0 && rpv[0] == '~' { diff --git a/gjson_test.go b/gjson_test.go index d79bcdf..3c2f4e9 100644 --- a/gjson_test.go +++ b/gjson_test.go @@ -105,6 +105,10 @@ func TestEscapePath(t *testing.T) { testEscapePath(t, json, "test.keyv\\.", "val5") testEscapePath(t, json, "test.key\\.v", "val6") testEscapePath(t, json, "test.keyk\\*.key\\?", "val7") + + //XXX + //Do we have escaped regex string as key ? + //Rather, let user provide a valid regex to perform the escape. } // this json block is poorly formed on purpose. @@ -200,6 +204,27 @@ func TestPath(t *testing.T) { get("lastly.end\\.\\.\\.ing") get("lastly.yay") + //XXX + //Regex path testing goes here + regexGet := func(path string, isValid bool) { + r1 := Get(json, path) + if isValid { + assert(t, r1.Raw != "") + } else { + assert(t, r1.Raw == "") + } + } + + //XXX + //Testing a valid regular expression path + regexGet("~.~", true) + regexGet("loggy.~programmers~", true) + regexGet("~last*~.~end*~", true) + //Get any 3 word length string from the JSON path + regexGet(`loggy.programmers.2.~^(\w{3})$~`, true) + + //Testing an invalid regular expression + regexGet("~[0-9~", false) } func TestTimeResult(t *testing.T) {