mirror of https://github.com/gobwas/glob.git
Correctly handle non-ASCII runes in patterns (fixes #54)
When matching a row we calculate an index into the string, and this index was in runes. However when slicing the string Go uses byte indexes. This change tracks both, using the rune count to determine the correct length and the byte index to slice the string.
This commit is contained in:
parent
e7a84e9525
commit
1d823af501
10
glob_test.go
10
glob_test.go
|
@ -162,6 +162,16 @@ func TestGlob(t *testing.T) {
|
|||
|
||||
glob(true, pattern_prefix_suffix, fixture_prefix_suffix_match),
|
||||
glob(false, pattern_prefix_suffix, fixture_prefix_suffix_mismatch),
|
||||
|
||||
glob(true, "155ö", "155ö"),
|
||||
glob(true, "1?5ö", "155ö"), // <-
|
||||
glob(true, "1?ö5", "15ö5"),
|
||||
glob(true, "155helloö", "155helloö"),
|
||||
glob(true, "1?5helloö", "155helloö"), // <-
|
||||
glob(true, "1?ö5hello", "15ö5hello"),
|
||||
glob(true, "1?5heöllo", "155heöllo"),
|
||||
glob(true, "1ö?5", "1ö55"), // <-
|
||||
glob(true, "ö1?5", "ö155"),
|
||||
} {
|
||||
t.Run("", func(t *testing.T) {
|
||||
g := MustCompile(test.pattern, test.delimiters...)
|
||||
|
|
15
match/row.go
15
match/row.go
|
@ -2,6 +2,7 @@ package match
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
type Row struct {
|
||||
|
@ -23,19 +24,21 @@ func (self Row) matchAll(s string) bool {
|
|||
for _, m := range self.Matchers {
|
||||
length := m.Len()
|
||||
|
||||
var next, i int
|
||||
for next = range s[idx:] {
|
||||
i++
|
||||
if i == length {
|
||||
var runeCount, byteIdx int
|
||||
var r rune
|
||||
for _, r = range s[idx:] {
|
||||
runeCount++
|
||||
byteIdx += utf8.RuneLen(r)
|
||||
if runeCount == length {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if i < length || !m.Match(s[idx:idx+next+1]) {
|
||||
if runeCount < length || !m.Match(s[idx:idx+byteIdx]) {
|
||||
return false
|
||||
}
|
||||
|
||||
idx += next + 1
|
||||
idx += byteIdx
|
||||
}
|
||||
|
||||
return true
|
||||
|
|
Loading…
Reference in New Issue