From 0608a3853a050afc7954e63354518a79af870ffe Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Wed, 24 Aug 2016 11:58:05 -0700 Subject: [PATCH] wildcard: Fix wildcard match behavior to support wide range. This helps in supporting all types of patterns in wildcard match. --- gjson.go | 32 +----- match.go | 90 +++++++++++++++ match_test.go | 311 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 402 insertions(+), 31 deletions(-) create mode 100644 match.go create mode 100644 match_test.go diff --git a/gjson.go b/gjson.go index 2e16394..876e766 100644 --- a/gjson.go +++ b/gjson.go @@ -663,7 +663,7 @@ read_key: if parts[depth-1].wild { // the path part contains a wildcard character. we must do a wildcard // match to determine if it truly matches. - matched = wildcardMatch(f.key, parts[depth-1].key) + matched = wildcardMatch(parts[depth-1].key, f.key) } else { // just a straight up equality check matched = parts[depth-1].key == f.key @@ -1070,33 +1070,3 @@ func stringLessInsensitive(a, b string) bool { } return len(a) < len(b) } - -// wilcardMatch returns true if str matches pattern. This is a very -// simple wildcard match where '*' matches on any number characters -// and '?' matches on any one character. -func wildcardMatch(str, pattern string) bool { - if pattern == "*" { - return true - } - return deepMatch(str, pattern) -} -func deepMatch(str, pattern string) bool { - for len(pattern) > 0 { - switch pattern[0] { - default: - if len(str) == 0 || str[0] != pattern[0] { - return false - } - case '?': - if len(str) == 0 { - return false - } - case '*': - return wildcardMatch(str, pattern[1:]) || - (len(str) > 0 && wildcardMatch(str[1:], pattern)) - } - str = str[1:] - pattern = pattern[1:] - } - return len(str) == 0 && len(pattern) == 0 -} diff --git a/match.go b/match.go new file mode 100644 index 0000000..c58881e --- /dev/null +++ b/match.go @@ -0,0 +1,90 @@ +package gjson + +import "unicode/utf8" + +// wildcardMatch - finds whether the text matches/satisfies the pattern string. +// supports '*' and '?' wildcards in the pattern string. +// unlike path.Match(), considers a path as a flat name space while matching the pattern. +// The difference is illustrated in the example here https://play.golang.org/p/Ega9qgD4Qz . +func wildcardMatch(pattern, name string) (matched bool) { +Pattern: + for len(pattern) > 0 { + var star bool + var chunk string + star, chunk, pattern = scanChunk(pattern) + if star && chunk == "" { + // Trailing * matches rest of string. + return true + } + // Look for match at current position. + t, ok := matchChunk(chunk, name) + // if we're the last chunk, make sure we've exhausted the name + // otherwise we'll give a false result even if we could still match + // using the star + if ok && (len(t) == 0 || len(pattern) > 0) { + name = t + continue + } + if star { + // Look for match skipping i+1 bytes. + for i := 0; i < len(name); i++ { + t, ok := matchChunk(chunk, name[i+1:]) + if ok { + // if we're the last chunk, make sure we exhausted the name + if len(pattern) == 0 && len(t) > 0 { + continue + } + name = t + continue Pattern + } + } + } + return false + } + return len(name) == 0 +} + +// scanChunk gets the next segment of pattern, which is a non-star string +// possibly preceded by a star. +func scanChunk(pattern string) (star bool, chunk, rest string) { + for len(pattern) > 0 && pattern[0] == '*' { + pattern = pattern[1:] + star = true + } + inrange := false + var i int +Scan: + for i = 0; i < len(pattern); i++ { + switch pattern[i] { + case '*': + if !inrange { + break Scan + } + } + } + return star, pattern[0:i], pattern[i:] +} + +// matchChunk checks whether chunk matches the beginning of s. +// If so, it returns the remainder of s (after the match). +// Chunk is all single-character operators: literals, char classes, and ?. +func matchChunk(chunk, s string) (rest string, ok bool) { + for len(chunk) > 0 { + if len(s) == 0 { + return + } + switch chunk[0] { + case '?': + _, n := utf8.DecodeRuneInString(s) + s = s[n:] + chunk = chunk[1:] + default: + if chunk[0] != s[0] { + return + } + s = s[1:] + chunk = chunk[1:] + } + } + return s, true +} diff --git a/match_test.go b/match_test.go new file mode 100644 index 0000000..cde8b82 --- /dev/null +++ b/match_test.go @@ -0,0 +1,311 @@ +package gjson + +import ( + "testing" +) + +// TestWildcardMatch - Tests validate the logic of wild card matching. +// `WildcardMatch` supports '*' and '?' wildcards. +// Sample usage: In resource matching for folder policy validation. +func TestWildcardMatch(t *testing.T) { + testCases := []struct { + pattern string + text string + matched bool + }{ + // Test case - 1. + // Test case with pattern containing key name with a prefix. Should accept the same text without a "*". + { + pattern: "my-folder/oo*", + text: "my-folder/oo", + matched: true, + }, + // Test case - 2. + // Test case with "*" at the end of the pattern. + { + pattern: "my-folder/In*", + text: "my-folder/India/Karnataka/", + matched: true, + }, + // Test case - 3. + // Test case with prefixes shuffled. + // This should fail. + { + pattern: "my-folder/In*", + text: "my-folder/Karnataka/India/", + matched: false, + }, + // Test case - 4. + // Test case with text expanded to the wildcards in the pattern. + { + pattern: "my-folder/In*/Ka*/Ban", + text: "my-folder/India/Karnataka/Ban", + matched: true, + }, + // Test case - 5. + // Test case with the keyname part is repeated as prefix several times. + // This is valid. + { + pattern: "my-folder/In*/Ka*/Ban", + text: "my-folder/India/Karnataka/Ban/Ban/Ban/Ban/Ban", + matched: true, + }, + // Test case - 6. + // Test case to validate that `*` can be expanded into multiple prefixes. + { + pattern: "my-folder/In*/Ka*/Ban", + text: "my-folder/India/Karnataka/Area1/Area2/Area3/Ban", + matched: true, + }, + // Test case - 7. + // Test case to validate that `*` can be expanded into multiple prefixes. + { + pattern: "my-folder/In*/Ka*/Ban", + text: "my-folder/India/State1/State2/Karnataka/Area1/Area2/Area3/Ban", + matched: true, + }, + // Test case - 8. + // Test case where the keyname part of the pattern is expanded in the text. + { + pattern: "my-folder/In*/Ka*/Ban", + text: "my-folder/India/Karnataka/Bangalore", + matched: false, + }, + // Test case - 9. + // Test case with prefixes and wildcard expanded for all "*". + { + pattern: "my-folder/In*/Ka*/Ban*", + text: "my-folder/India/Karnataka/Bangalore", + matched: true, + }, + // Test case - 10. + // Test case with keyname part being a wildcard in the pattern. + {pattern: "my-folder/*", + text: "my-folder/India", + matched: true, + }, + // Test case - 11. + { + pattern: "my-folder/oo*", + text: "my-folder/odo", + matched: false, + }, + + // Test case with pattern containing wildcard '?'. + // Test case - 12. + // "my-folder?/" matches "my-folder1/", "my-folder2/", "my-folder3" etc... + // doesn't match "myfolder/". + { + pattern: "my-folder?/abc*", + text: "myfolder/abc", + matched: false, + }, + // Test case - 13. + { + pattern: "my-folder?/abc*", + text: "my-folder1/abc", + matched: true, + }, + // Test case - 14. + { + pattern: "my-?-folder/abc*", + text: "my--folder/abc", + matched: false, + }, + // Test case - 15. + { + pattern: "my-?-folder/abc*", + text: "my-1-folder/abc", + matched: true, + }, + // Test case - 16. + { + pattern: "my-?-folder/abc*", + text: "my-k-folder/abc", + matched: true, + }, + // Test case - 17. + { + pattern: "my??folder/abc*", + text: "myfolder/abc", + matched: false, + }, + // Test case - 18. + { + pattern: "my??folder/abc*", + text: "my4afolder/abc", + matched: true, + }, + // Test case - 19. + { + pattern: "my-folder?abc*", + text: "my-folder/abc", + matched: true, + }, + // Test case 20-21. + // '?' matches '/' too. (works with s3). + // This is because the namespace is considered flat. + // "abc?efg" matches both "abcdefg" and "abc/efg". + { + pattern: "my-folder/abc?efg", + text: "my-folder/abcdefg", + matched: true, + }, + { + pattern: "my-folder/abc?efg", + text: "my-folder/abc/efg", + matched: true, + }, + // Test case - 22. + { + pattern: "my-folder/abc????", + text: "my-folder/abc", + matched: false, + }, + // Test case - 23. + { + pattern: "my-folder/abc????", + text: "my-folder/abcde", + matched: false, + }, + // Test case - 24. + { + pattern: "my-folder/abc????", + text: "my-folder/abcdefg", + matched: true, + }, + // Test case 25-26. + // test case with no '*'. + { + pattern: "my-folder/abc?", + text: "my-folder/abc", + matched: false, + }, + { + pattern: "my-folder/abc?", + text: "my-folder/abcd", + matched: true, + }, + { + pattern: "my-folder/abc?", + text: "my-folder/abcde", + matched: false, + }, + // Test case 27. + { + pattern: "my-folder/mnop*?", + text: "my-folder/mnop", + matched: false, + }, + // Test case 28. + { + pattern: "my-folder/mnop*?", + text: "my-folder/mnopqrst/mnopqr", + matched: true, + }, + // Test case 29. + { + pattern: "my-folder/mnop*?", + text: "my-folder/mnopqrst/mnopqrs", + matched: true, + }, + // Test case 30. + { + pattern: "my-folder/mnop*?", + text: "my-folder/mnop", + matched: false, + }, + // Test case 31. + { + pattern: "my-folder/mnop*?", + text: "my-folder/mnopq", + matched: true, + }, + // Test case 32. + { + pattern: "my-folder/mnop*?", + text: "my-folder/mnopqr", + matched: true, + }, + // Test case 33. + { + pattern: "my-folder/mnop*?and", + text: "my-folder/mnopqand", + matched: true, + }, + // Test case 34. + { + pattern: "my-folder/mnop*?and", + text: "my-folder/mnopand", + matched: false, + }, + // Test case 35. + { + pattern: "my-folder/mnop*?and", + text: "my-folder/mnopqand", + matched: true, + }, + // Test case 36. + { + pattern: "my-folder/mnop*?", + text: "my-folder/mn", + matched: false, + }, + // Test case 37. + { + pattern: "my-folder/mnop*?", + text: "my-folder/mnopqrst/mnopqrs", + matched: true, + }, + // Test case 38. + { + pattern: "my-folder/mnop*??", + text: "my-folder/mnopqrst", + matched: true, + }, + // Test case 39. + { + pattern: "my-folder/mnop*qrst", + text: "my-folder/mnopabcdegqrst", + matched: true, + }, + // Test case 40. + { + pattern: "my-folder/mnop*?and", + text: "my-folder/mnopqand", + matched: true, + }, + // Test case 41. + { + pattern: "my-folder/mnop*?and", + text: "my-folder/mnopand", + matched: false, + }, + // Test case 42. + { + pattern: "my-folder/mnop*?and?", + text: "my-folder/mnopqanda", + matched: true, + }, + // Test case 43. + { + pattern: "my-folder/mnop*?and", + text: "my-folder/mnopqanda", + matched: false, + }, + // Test case 44. + + { + pattern: "my-?-folder/abc*", + text: "my-folder/mnopqanda", + matched: false, + }, + } + // Iterating over the test cases, call the function under test and asert the output. + for i, testCase := range testCases { + actualResult := wildcardMatch(testCase.pattern, testCase.text) + if testCase.matched != actualResult { + t.Errorf("Test %d: Expected the result to be `%v`, but instead found it to be `%v`", i+1, testCase.matched, actualResult) + } + } +}