This commit is contained in:
gobwas 2019-02-10 21:25:05 +03:00
parent 1c85fdee98
commit c8369fd9fe
7 changed files with 167 additions and 81 deletions

1
.gitignore vendored
View File

@ -6,3 +6,4 @@ glob.iml
*.dot
*.png
*.svg
patterns.txt

View File

@ -1,6 +1,6 @@
#! /bin/bash
rnd=$(head -c4 </dev/urandom|xxd -p -u)
rnd=$(head -c4 </dev/urandom|xxd -p)
bench() {
local exp=".*"

View File

@ -1,9 +1,11 @@
package main
import (
"bufio"
"flag"
"fmt"
"os"
"os/exec"
"strings"
"unicode/utf8"
@ -12,33 +14,103 @@ import (
)
func main() {
pattern := flag.String("p", "", "pattern to draw")
sep := flag.String("s", "", "comma separated list of separators characters")
var (
pattern = flag.String("p", "", "pattern to draw")
sep = flag.String("s", "", "comma separated list of separators characters")
filepath = flag.String("file", "", "path for patterns file")
auto = flag.Bool("auto", false, "autoopen result")
offset = flag.Int("offset", 0, "patterns to skip")
)
flag.Parse()
if *pattern == "" {
flag.Usage()
os.Exit(1)
var patterns []string
if *pattern != "" {
patterns = append(patterns, *pattern)
}
if *filepath != "" {
file, err := os.Open(*filepath)
if err != nil {
fmt.Printf("could not open file: %v\n", err)
os.Exit(1)
}
s := bufio.NewScanner(file)
for s.Scan() {
fmt.Println(*offset)
if *offset > 0 {
*offset--
fmt.Println("skipped")
continue
}
patterns = append(patterns, s.Text())
}
file.Close()
}
if len(patterns) == 0 {
return
}
var separators []rune
if len(*sep) > 0 {
for _, c := range strings.Split(*sep, ",") {
if r, w := utf8.DecodeRuneInString(c); len(c) > w {
fmt.Println("only single charactered separators are allowed")
r, w := utf8.DecodeRuneInString(c)
if len(c) > w {
fmt.Println("only single charactered separators are allowed: %+q", c)
os.Exit(1)
} else {
separators = append(separators, r)
}
separators = append(separators, r)
}
}
glob, err := glob.Compile(*pattern, separators...)
if err != nil {
fmt.Println("could not compile pattern:", err)
os.Exit(1)
br := bufio.NewReader(os.Stdin)
for _, p := range patterns {
g, err := glob.Compile(p, separators...)
if err != nil {
fmt.Printf("could not compile pattern %+q: %v\n", p, err)
os.Exit(1)
}
s := match.Graphviz(p, g.(match.Matcher))
if *auto {
fmt.Fprintf(os.Stdout, "pattern: %+q: ", p)
if err := open(s); err != nil {
fmt.Printf("could not open graphviz: %v", err)
os.Exit(1)
}
if !next(br) {
return
}
} else {
fmt.Fprintln(os.Stdout, s)
}
}
matcher := glob.(match.Matcher)
fmt.Fprint(os.Stdout, match.Graphviz(*pattern, matcher))
}
func open(s string) error {
file, err := os.Create("glob.graphviz.png")
if err != nil {
return err
}
defer file.Close()
cmd := exec.Command("dot", "-Tpng")
cmd.Stdin = strings.NewReader(s)
cmd.Stdout = file
if err := cmd.Run(); err != nil {
return err
}
if err := file.Sync(); err != nil {
return err
}
cmd = exec.Command("open", file.Name())
return cmd.Run()
}
func next(in *bufio.Reader) bool {
fmt.Fprint(os.Stdout, "cancel? [Y/n]: ")
p, err := in.ReadBytes('\n')
if err != nil {
return false
}
if p[0] == 'Y' {
return false
}
return true
}

View File

@ -1,8 +1,11 @@
package glob
import (
"fmt"
"regexp"
"testing"
"github.com/gobwas/glob/match"
)
const (
@ -57,7 +60,12 @@ type test struct {
}
func glob(s bool, p, m string, d ...rune) test {
return test{p, m, s, d}
return test{
should: s,
pattern: p,
match: m,
delimiters: d,
}
}
func globc(p string, d ...rune) test {
@ -245,20 +253,10 @@ func BenchmarkParseRegexp(b *testing.B) {
func BenchmarkAllGlobMatch(b *testing.B) {
m, _ := Compile(pattern_all)
for i := 0; i < b.N; i++ {
_ = m.Match(fixture_all_match)
}
}
func BenchmarkAllGlobMatchParallel(b *testing.B) {
m, _ := Compile(pattern_all)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_ = m.Match(fixture_all_match)
}
})
}
func BenchmarkAllRegexpMatch(b *testing.B) {
m := regexp.MustCompile(regexp_all)
@ -268,22 +266,24 @@ func BenchmarkAllRegexpMatch(b *testing.B) {
_ = m.Match(f)
}
}
func TestAllGlobMismatch(t *testing.T) {
m := MustCompile(pattern_all)
fmt.Println("====")
fmt.Println(match.Graphviz(pattern_all, m.(match.Matcher)))
fmt.Println("====")
m.Match(fixture_all_mismatch)
}
func BenchmarkAllGlobMismatch(b *testing.B) {
m, _ := Compile(pattern_all)
m := MustCompile(pattern_all)
fmt.Println(match.Graphviz(pattern_all, m.(match.Matcher)))
for i := 0; i < b.N; i++ {
_ = m.Match(fixture_all_mismatch)
}
}
func BenchmarkAllGlobMismatchParallel(b *testing.B) {
m, _ := Compile(pattern_all)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_ = m.Match(fixture_all_mismatch)
}
})
}
func BenchmarkAllRegexpMismatch(b *testing.B) {
m := regexp.MustCompile(regexp_all)
f := []byte(fixture_all_mismatch)

View File

@ -148,21 +148,22 @@ func Compile(ms []Matcher) (m Matcher, err error) {
right = ms[x+1:]
}
var l, r Matcher
var (
l Matcher = Nothing{}
r Matcher = Nothing{}
)
if len(left) > 0 {
l, err = Compile(left)
if err != nil {
return nil, err
}
}
if len(right) > 0 {
r, err = Compile(right)
if err != nil {
return nil, err
}
}
return NewTree(indexer, l, r), nil
}
@ -279,16 +280,16 @@ type result struct {
}
func compareResult(a, b result) int {
if x := len(a.ms) - len(b.ms); x != 0 {
if x := b.minLen - a.minLen; x != 0 {
return x
}
if x := a.matchers - b.matchers; x != 0 {
return x
}
if x := b.minLen - a.minLen; x != 0 {
if x := a.nesting - b.nesting; x != 0 {
return x
}
if x := a.nesting - b.nesting; x != 0 {
if x := len(a.ms) - len(b.ms); x != 0 {
return x
}
return 0

View File

@ -25,6 +25,12 @@ type SizedTree struct {
Tree
}
type IndexedTree struct {
value MatchIndexer
left MatchIndexer
right MatchIndexer
}
func (st SizedTree) RunesCount() int {
return st.Tree.runes
}
@ -49,24 +55,19 @@ func NewTree(v MatchIndexer, l, r Matcher) Matcher {
)
if lsz {
tree.lrunes = ls.RunesCount()
} else {
tree.lrunes = -1
}
if rsz {
tree.rrunes = rs.RunesCount()
} else {
tree.rrunes = -1
}
if vsz {
tree.vrunes = vs.RunesCount()
} else {
tree.vrunes = -1
}
//li, lix := l.(MatchIndexer)
//ri, rix := r.(MatchIndexer)
if vsz && lsz && rsz {
tree.runes = tree.vrunes + tree.lrunes + tree.rrunes
return SizedTree{tree}
}
tree.runes = -1
return tree
}
@ -90,44 +91,44 @@ func (t Tree) Match(s string) (ok bool) {
defer func() { done(ok) }()
}
n := len(s)
offset, limit := t.offsetLimit(s)
q := s[offset : len(s)-limit]
if debug.Enabled {
debug.Logf("offset/limit: %d/%d: %q of %q", offset, limit, q, s)
}
for len(q) >= t.vrunes {
// search for matching part in substring
index, segments := t.value.Index(q)
for len(s)-offset-limit >= t.vrunes {
if debug.Enabled {
debug.Logf(
"value %s indexing: %q (offset=%d; limit=%d)",
t.value, s[offset:n-limit], offset, limit,
)
}
index, segments := t.value.Index(s[offset : n-limit])
if debug.Enabled {
debug.Logf(
"value %s index: %d; %v",
t.value, index, segments,
)
}
if index == -1 {
releaseSegments(segments)
return false
}
l := s[:offset+index]
var left bool
if t.left != nil {
left = t.left.Match(l)
} else {
left = l == ""
}
if debug.Enabled {
debug.Logf("left %q: -> %t", l, left)
debug.Logf("matching left: %q", s[:offset+index])
}
left := t.left.Match(s[:offset+index])
if debug.Enabled {
debug.Logf("matching left: -> %t", left)
}
if left {
for _, seg := range segments {
var (
right bool
)
r := s[offset+index+seg:]
if t.right != nil {
right = t.right.Match(r)
} else {
right = r == ""
}
if debug.Enabled {
debug.Logf("right %q: -> %t", r, right)
debug.Logf("matching right: %q", s[offset+index+seg:])
}
right := t.right.Match(s[offset+index+seg:])
if debug.Enabled {
debug.Logf("matching right: -> %t", right)
}
if right {
releaseSegments(segments)
@ -136,13 +137,14 @@ func (t Tree) Match(s string) (ok bool) {
}
}
_, x := utf8.DecodeRuneInString(q[index:])
releaseSegments(segments)
q = q[x:]
offset += x
if debug.Enabled {
debug.Logf("tree: sliced to %q", q)
_, x := utf8.DecodeRuneInString(s[offset+index:])
if x == 0 {
// No progress.
break
}
offset = offset + index + x
}
return false

View File

@ -11,6 +11,16 @@ func TestTree(t *testing.T) {
str string
exp bool
}{
{
NewTree(NewText("x"), NewText("y"), NewText("z")),
"0000x1111",
false,
},
{
NewTree(NewText("a"), NewSingle(nil), NewSingle(nil)),
"aaa",
false,
},
{
NewTree(NewText("abc"), NewSuper(), NewSuper()),
"abc",