From 1801ade38c1d1e12c9828484d41cd062a4b5479e Mon Sep 17 00:00:00 2001 From: gobwas Date: Wed, 24 Feb 2016 23:53:19 +0300 Subject: [PATCH 1/7] quote meta func --- glob.go | 19 +++++++++++++++++++ glob_test.go | 20 ++++++++++++++++++-- lexer.go | 18 ++++++++++++++++++ 3 files changed, 55 insertions(+), 2 deletions(-) diff --git a/glob.go b/glob.go index 4d1b77b..47a4c47 100644 --- a/glob.go +++ b/glob.go @@ -54,3 +54,22 @@ func MustCompile(pattern string, separators ...rune) Glob { return g } + +// QuoteMeta returns a string that quotes all glob pattern metacharacters +// inside the argument text; For example, QuoteMeta(`{foo*}`) returns `\[foo\*\]`. +func QuoteMeta(s string) string { + b := make([]byte, 2*len(s)) + + // A byte loop is correct because all metacharacters are ASCII. + j := 0 + for i := 0; i < len(s); i++ { + if special(s[i]) { + b[j] = '\\' + j++ + } + b[j] = s[i] + j++ + } + + return string(b[0:j]) +} diff --git a/glob_test.go b/glob_test.go index 4fa836a..5e6c360 100644 --- a/glob_test.go +++ b/glob_test.go @@ -150,6 +150,22 @@ func TestGlob(t *testing.T) { } } +func TestQuoteMeta(t *testing.T) { + for id, test := range []struct { + in, out string + }{ + { + in: `[foo*]`, + out: `\[foo\*\]`, + }, + } { + act := QuoteMeta(test.in) + if act != test.out { + t.Errorf("#%d QuoteMeta(%q) = %q; want %q", id, test.in, act, test.out) + } + } +} + func BenchmarkParseGlob(b *testing.B) { for i := 0; i < b.N; i++ { Compile(pattern_all) @@ -193,12 +209,12 @@ func BenchmarkAllGlobMismatch(b *testing.B) { _ = m.Match(fixture_all_mismatch) } } -func BenchmarkAllGlobMatchParallel(b *testing.B) { +func BenchmarkAllGlobMismatchParallel(b *testing.B) { m, _ := Compile(pattern_all) b.RunParallel(func(pb *testing.PB) { for pb.Next() { - _ = m.Match(fixture_all_match) + _ = m.Match(fixture_all_mismatch) } }) } diff --git a/lexer.go b/lexer.go index 0747e9e..306e856 100644 --- a/lexer.go +++ b/lexer.go @@ -1,6 +1,7 @@ package glob import ( + "bytes" "fmt" "strings" "unicode/utf8" @@ -19,6 +20,23 @@ const ( char_range_between = '-' ) +var specials = []byte{ + char_any, + char_separator, + char_single, + char_escape, + char_range_open, + char_range_close, + char_terms_open, + char_terms_close, + char_range_not, + char_range_between, +} + +func special(c byte) bool { + return bytes.IndexByte(specials, c) != -1 +} + var eof rune = 0 type stateFn func(*lexer) stateFn From eccf734cd7d09ae5b23252d96cb1a50558ae13f4 Mon Sep 17 00:00:00 2001 From: gobwas Date: Thu, 25 Feb 2016 00:31:30 +0300 Subject: [PATCH 2/7] add quote meta func --- glob.go | 4 ++-- glob_test.go | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/glob.go b/glob.go index 47a4c47..58f45c9 100644 --- a/glob.go +++ b/glob.go @@ -55,12 +55,12 @@ func MustCompile(pattern string, separators ...rune) Glob { return g } -// QuoteMeta returns a string that quotes all glob pattern metacharacters +// QuoteMeta returns a string that quotes all glob pattern meta characters // inside the argument text; For example, QuoteMeta(`{foo*}`) returns `\[foo\*\]`. func QuoteMeta(s string) string { b := make([]byte, 2*len(s)) - // A byte loop is correct because all metacharacters are ASCII. + // a byte loop is correct because all meta characters are ASCII j := 0 for i := 0; i < len(s); i++ { if special(s[i]) { diff --git a/glob_test.go b/glob_test.go index 5e6c360..6fe73a6 100644 --- a/glob_test.go +++ b/glob_test.go @@ -151,6 +151,12 @@ func TestGlob(t *testing.T) { } func TestQuoteMeta(t *testing.T) { + specialsQuoted := make([]byte, len(specials)*2) + for i, j := 0, 0; i < len(specials); i, j = i+1, j+2 { + specialsQuoted[j] = '\\' + specialsQuoted[j+1] = specials[i] + } + for id, test := range []struct { in, out string }{ @@ -158,11 +164,22 @@ func TestQuoteMeta(t *testing.T) { in: `[foo*]`, out: `\[foo\*\]`, }, + { + in: string(specials), + out: string(specialsQuoted), + }, + { + in: string(append([]byte("some text and"), specials...)), + out: string(append([]byte("some text and"), specialsQuoted...)), + }, } { act := QuoteMeta(test.in) if act != test.out { t.Errorf("#%d QuoteMeta(%q) = %q; want %q", id, test.in, act, test.out) } + if _, err := Compile(act); err != nil { + t.Errorf("#%d _, err := Compile(QuoteMeta(%q) = %q); err = %q", id, test.in, act, err) + } } } From f031a1f239ee8e02036bcb1b7a9022941c9526e7 Mon Sep 17 00:00:00 2001 From: gobwas Date: Thu, 25 Feb 2016 00:31:37 +0300 Subject: [PATCH 3/7] bugfix --- lexer.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lexer.go b/lexer.go index 306e856..0ebf451 100644 --- a/lexer.go +++ b/lexer.go @@ -142,7 +142,7 @@ func newLexer(source string) *lexer { l := &lexer{ input: source, state: lexText, - items: make(chan item, 5), + items: make(chan item, len(source)), termPhrases: make(map[int]int), } return l @@ -299,9 +299,7 @@ func lexText(l *lexer) stateFn { l.unread() l.emitMaybe(item_text) return lexSeparator - } - } if l.pos > l.start { From 084e0669a315116a20eea44b21769311c3f35b25 Mon Sep 17 00:00:00 2001 From: gobwas Date: Thu, 25 Feb 2016 00:41:52 +0300 Subject: [PATCH 4/7] todo --- compiler.go | 1 + todo.txt | 26 -------------------------- 2 files changed, 1 insertion(+), 26 deletions(-) delete mode 100644 todo.txt diff --git a/compiler.go b/compiler.go index c0d087b..16c241b 100644 --- a/compiler.go +++ b/compiler.go @@ -1,6 +1,7 @@ package glob // TODO use constructor with all matchers, and to their structs private +// TODO glue multiple Text nodes (like after QuoteMeta) import ( "fmt" diff --git a/todo.txt b/todo.txt deleted file mode 100644 index 918063e..0000000 --- a/todo.txt +++ /dev/null @@ -1,26 +0,0 @@ -benchmark old ns/op new ns/op delta - -BenchmarkAllGlobMatch-4 519 1024 +97.30% -BenchmarkMultipleGlobMatch-4 123 218 +77.24% -BenchmarkAlternativesGlobMatch-4 164 283 +72.56% -BenchmarkAlternativesSuffixFirstGlobMatch-4 23.6 23.5 -0.42% -BenchmarkAlternativesSuffixSecondGlobMatch-4 29.7 30.1 +1.35% -BenchmarkAlternativesCombineLiteGlobMatch-4 161 352 +118.63% -BenchmarkAlternativesCombineHardGlobMatch-4 321 649 +102.18% -BenchmarkPlainGlobMatch-4 7.17 7.09 -1.12% -BenchmarkPrefixGlobMatch-4 8.74 8.64 -1.14% -BenchmarkSuffixGlobMatch-4 10.3 9.06 -12.04% -BenchmarkPrefixSuffixGlobMatch-4 31.0 15.1 -51.29% -BenchmarkIndexAny-4 1414 232 -83.59% -BenchmarkIndexContains-4 557 250 -55.12% -BenchmarkIndexList-4 207 42.6 -79.42% -BenchmarkIndexMax-4 630 111 -82.38% -BenchmarkIndexMin-4 515 328 -36.31% -BenchmarkIndexPrefixSuffix-4 97.9 86.2 -11.95% -BenchmarkIndexPrefix-4 86.1 84.0 -2.44% -BenchmarkIndexRange-4 181 144 -20.44% -BenchmarkRowIndex-4 185 127 -31.35% -BenchmarkIndexSingle-4 82.6 16.0 -80.63% -BenchmarkIndexSuffix-4 85.5 84.9 -0.70% -BenchmarkIndexSuper-4 450 196 -56.44% -BenchmarkIndexText-4 85.3 85.9 +0.70% From b7874cc364f1cc248d7884a3a0def0cfa5531633 Mon Sep 17 00:00:00 2001 From: gobwas Date: Thu, 25 Feb 2016 00:42:13 +0300 Subject: [PATCH 5/7] cleanup specials --- lexer.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/lexer.go b/lexer.go index 0ebf451..7454a2e 100644 --- a/lexer.go +++ b/lexer.go @@ -22,15 +22,12 @@ const ( var specials = []byte{ char_any, - char_separator, char_single, char_escape, char_range_open, char_range_close, char_terms_open, char_terms_close, - char_range_not, - char_range_between, } func special(c byte) bool { From 04523b0f5970650bffefbce4aa241d4c505f748a Mon Sep 17 00:00:00 2001 From: gobwas Date: Thu, 25 Feb 2016 00:42:26 +0300 Subject: [PATCH 6/7] to string tunage --- match/text.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/match/text.go b/match/text.go index 8b93992..0a17616 100644 --- a/match/text.go +++ b/match/text.go @@ -41,5 +41,5 @@ func (self Text) Index(s string) (int, []int) { } func (self Text) String() string { - return fmt.Sprintf("", self.Str) + return fmt.Sprintf("", self.Str) } From badfcd81714ff7ce6caa40c68f16b8d36662a44e Mon Sep 17 00:00:00 2001 From: gobwas Date: Thu, 25 Feb 2016 00:42:32 +0300 Subject: [PATCH 7/7] update with QuoteMeta --- readme.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/readme.md b/readme.md index dcb4b2a..43f827b 100644 --- a/readme.md +++ b/readme.md @@ -25,6 +25,10 @@ func main() { g = glob.MustCompile("*.github.com") g.Match("api.github.com") // true + // quote meta characters and then create simple glob + g = glob.MustCompile(glob.QuoteMeta("*.github.com")) + g.Match("*.github.com") // true + // create new glob with set of delimiters as ["."] g = glob.MustCompile("api.*.com", '.') g.Match("api.github.com") // true @@ -76,7 +80,6 @@ func main() { g.Match("fat") // true g.Match("at") // false - // create glob with pattern-alternatives list g = glob.MustCompile("{cat,bat,[fr]at}") g.Match("cat") // true