Merge pull request #4 from gobwas/opti

Optimization with reusable slices
This commit is contained in:
Sergey Kamardin 2016-02-24 23:37:39 +03:00
commit 3deac2392d
51 changed files with 1502 additions and 540 deletions

26
bench.sh Executable file
View File

@ -0,0 +1,26 @@
#! /bin/bash
bench() {
filename="/tmp/$1-$2.bench"
if test -e "${filename}";
then
echo "Already exists ${filename}"
else
backup=`git rev-parse --abbrev-ref HEAD`
git checkout $1
echo -n "Creating ${filename}... "
go test ./... -run=NONE -bench=$2 > "${filename}" -benchmem
echo "OK"
git checkout ${backup}
sleep 5
fi
}
to=$1
current=`git rev-parse --abbrev-ref HEAD`
bench ${to} $2
bench ${current} $2
benchcmp $3 "/tmp/${to}-$2.bench" "/tmp/${current}-$2.bench"

View File

@ -1,66 +1,19 @@
package main
import (
"bytes"
"flag"
"fmt"
"github.com/gobwas/glob"
"github.com/gobwas/glob/match"
"math/rand"
"github.com/gobwas/glob/match/debug"
"os"
"strings"
"unicode/utf8"
)
func draw(pattern string, m match.Matcher) string {
return fmt.Sprintf(`digraph G {graph[label="%s"];%s}`, pattern, graphviz(m, fmt.Sprintf("%x", rand.Int63())))
}
func graphviz(m match.Matcher, id string) string {
buf := &bytes.Buffer{}
switch matcher := m.(type) {
case match.BTree:
fmt.Fprintf(buf, `"%s"[label="%s"];`, id, matcher.Value.String())
for _, m := range []match.Matcher{matcher.Left, matcher.Right} {
switch n := m.(type) {
case nil:
rnd := rand.Int63()
fmt.Fprintf(buf, `"%x"[label="<nil>"];`, rnd)
fmt.Fprintf(buf, `"%s"->"%x";`, id, rnd)
default:
sub := fmt.Sprintf("%x", rand.Int63())
fmt.Fprintf(buf, `"%s"->"%s";`, id, sub)
fmt.Fprintf(buf, graphviz(n, sub))
}
}
case match.AnyOf:
fmt.Fprintf(buf, `"%s"[label="AnyOf"];`, id)
for _, m := range matcher.Matchers {
rnd := rand.Int63()
fmt.Fprintf(buf, graphviz(m, fmt.Sprintf("%x", rnd)))
fmt.Fprintf(buf, `"%s"->"%x";`, id, rnd)
}
case match.EveryOf:
fmt.Fprintf(buf, `"%s"[label="EveryOf"];`, id)
for _, m := range matcher.Matchers {
rnd := rand.Int63()
fmt.Fprintf(buf, graphviz(m, fmt.Sprintf("%x", rnd)))
fmt.Fprintf(buf, `"%s"->"%x";`, id, rnd)
}
default:
fmt.Fprintf(buf, `"%s"[label="%s"];`, id, m.String())
}
return buf.String()
}
func main() {
pattern := flag.String("p", "", "pattern to draw")
sep := flag.String("s", "", "comma separated list of separators")
sep := flag.String("s", "", "comma separated list of separators characters")
flag.Parse()
if *pattern == "" {
@ -68,12 +21,24 @@ func main() {
os.Exit(1)
}
glob, err := glob.Compile(*pattern, strings.Split(*sep, ",")...)
var separators []rune
if len(*sep) > 0 {
for _, c := range strings.Split(*sep, ",") {
if r, w := utf8.DecodeRuneInString(c); len(c) > w {
fmt.Println("only single charactered separators are allowed")
os.Exit(1)
} else {
separators = append(separators, r)
}
}
}
glob, err := glob.Compile(*pattern, separators...)
if err != nil {
fmt.Println("could not compile pattern:", err)
os.Exit(1)
}
matcher := glob.(match.Matcher)
fmt.Fprint(os.Stdout, draw(*pattern, matcher))
fmt.Fprint(os.Stdout, debug.Graphviz(*pattern, matcher))
}

View File

@ -7,6 +7,7 @@ import (
"os"
"strings"
"testing"
"unicode/utf8"
)
func benchString(r testing.BenchmarkResult) string {
@ -42,7 +43,16 @@ func main() {
os.Exit(1)
}
separators := strings.Split(*sep, ",")
var separators []rune
for _, c := range strings.Split(*sep, ",") {
if r, w := utf8.DecodeRuneInString(c); len(c) > w {
fmt.Println("only single charactered separators are allowed")
os.Exit(1)
} else {
separators = append(separators, r)
}
}
g, err := glob.Compile(*pattern, separators...)
if err != nil {
fmt.Println("could not compile pattern:", err)

View File

@ -1,18 +1,20 @@
package glob
// TODO use constructor with all matchers, and to their structs private
import (
"fmt"
"github.com/gobwas/glob/match"
"github.com/gobwas/glob/runes"
"reflect"
"unicode/utf8"
)
func optimize(matcher match.Matcher) match.Matcher {
switch m := matcher.(type) {
case match.Any:
if m.Separators == "" {
return match.Super{}
if len(m.Separators) == 0 {
return match.NewSuper()
}
case match.AnyOf:
@ -23,8 +25,8 @@ func optimize(matcher match.Matcher) match.Matcher {
return m
case match.List:
if m.Not == false && utf8.RuneCountInString(m.List) == 1 {
return match.NewText(m.List)
if m.Not == false && len(m.List) == 1 {
return match.NewText(string(m.List))
}
return m
@ -52,23 +54,23 @@ func optimize(matcher match.Matcher) match.Matcher {
rs, rightSuffix := m.Right.(match.Suffix)
if leftSuper && rightSuper {
return match.Contains{r.Str, false}
return match.NewContains(r.Str, false)
}
if leftSuper && rightNil {
return match.Suffix{r.Str}
return match.NewSuffix(r.Str)
}
if rightSuper && leftNil {
return match.Prefix{r.Str}
return match.NewPrefix(r.Str)
}
if leftNil && rightSuffix {
return match.PrefixSuffix{Prefix: r.Str, Suffix: rs.Suffix}
return match.NewPrefixSuffix(r.Str, rs.Suffix)
}
if rightNil && leftPrefix {
return match.PrefixSuffix{Prefix: lp.Prefix, Suffix: r.Str}
return match.NewPrefixSuffix(lp.Prefix, r.Str)
}
return m
@ -122,7 +124,7 @@ func glueAsRow(matchers []match.Matcher) match.Matcher {
}
}
return match.Row{c, l}
return match.NewRow(l, c...)
}
func glueAsEvery(matchers []match.Matcher) match.Matcher {
@ -135,15 +137,15 @@ func glueAsEvery(matchers []match.Matcher) match.Matcher {
hasSuper bool
hasSingle bool
min int
separator string
separator []rune
)
for i, matcher := range matchers {
var sep string
switch m := matcher.(type) {
var sep []rune
switch m := matcher.(type) {
case match.Super:
sep = ""
sep = []rune{}
hasSuper = true
case match.Any:
@ -172,7 +174,7 @@ func glueAsEvery(matchers []match.Matcher) match.Matcher {
separator = sep
}
if sep == separator {
if runes.Equal(sep, separator) {
continue
}
@ -180,29 +182,29 @@ func glueAsEvery(matchers []match.Matcher) match.Matcher {
}
if hasSuper && !hasAny && !hasSingle {
return match.Super{}
return match.NewSuper()
}
if hasAny && !hasSuper && !hasSingle {
return match.Any{separator}
return match.NewAny(separator)
}
if (hasAny || hasSuper) && min > 0 && separator == "" {
return match.Min{min}
if (hasAny || hasSuper) && min > 0 && len(separator) == 0 {
return match.NewMin(min)
}
every := match.EveryOf{}
every := match.NewEveryOf()
if min > 0 {
every.Add(match.Min{min})
every.Add(match.NewMin(min))
if !hasAny && !hasSuper {
every.Add(match.Max{min})
every.Add(match.NewMax(min))
}
}
if separator != "" {
every.Add(match.Contains{separator, true})
if len(separator) > 0 {
every.Add(match.NewContains(string(separator), true))
}
return every
@ -468,11 +470,11 @@ func compileMatchers(matchers []match.Matcher) (match.Matcher, error) {
// return sum * k
//}
func doAnyOf(n *nodeAnyOf, s string) (match.Matcher, error) {
func doAnyOf(n *nodeAnyOf, s []rune) (match.Matcher, error) {
var matchers []match.Matcher
for _, desc := range n.children() {
if desc == nil {
matchers = append(matchers, match.Nothing{})
matchers = append(matchers, match.NewNothing())
continue
}
@ -483,10 +485,10 @@ func doAnyOf(n *nodeAnyOf, s string) (match.Matcher, error) {
matchers = append(matchers, optimize(m))
}
return match.AnyOf{matchers}, nil
return match.NewAnyOf(matchers...), nil
}
func do(leaf node, s string) (m match.Matcher, err error) {
func do(leaf node, s []rune) (m match.Matcher, err error) {
switch n := leaf.(type) {
case *nodeAnyOf:
@ -498,7 +500,7 @@ func do(leaf node, s string) (m match.Matcher, err error) {
var matchers []match.Matcher
for _, desc := range n.children() {
if desc == nil {
matchers = append(matchers, match.Nothing{})
matchers = append(matchers, match.NewNothing())
continue
}
@ -509,12 +511,12 @@ func do(leaf node, s string) (m match.Matcher, err error) {
matchers = append(matchers, optimize(m))
}
return match.AnyOf{matchers}, nil
return match.NewAnyOf(matchers...), nil
case *nodePattern:
nodes := leaf.children()
if len(nodes) == 0 {
return match.Nothing{}, nil
return match.NewNothing(), nil
}
var matchers []match.Matcher
@ -532,19 +534,19 @@ func do(leaf node, s string) (m match.Matcher, err error) {
}
case *nodeList:
m = match.List{n.chars, n.not}
m = match.NewList([]rune(n.chars), n.not)
case *nodeRange:
m = match.Range{n.lo, n.hi, n.not}
m = match.NewRange(n.lo, n.hi, n.not)
case *nodeAny:
m = match.Any{s}
m = match.NewAny(s)
case *nodeSuper:
m = match.Super{}
m = match.NewSuper()
case *nodeSingle:
m = match.Single{s}
m = match.NewSingle(s)
case *nodeText:
m = match.NewText(n.text)
@ -556,7 +558,7 @@ func do(leaf node, s string) (m match.Matcher, err error) {
return optimize(m), nil
}
func do2(node node, s string) ([]match.Matcher, error) {
func do2(node node, s []rune) ([]match.Matcher, error) {
var result []match.Matcher
switch n := node.(type) {
@ -631,19 +633,19 @@ func do2(node node, s string) ([]match.Matcher, error) {
}
case *nodeList:
result = append(result, match.List{n.chars, n.not})
result = append(result, match.NewList([]rune(n.chars), n.not))
case *nodeRange:
result = append(result, match.Range{n.lo, n.hi, n.not})
result = append(result, match.NewRange(n.lo, n.hi, n.not))
case *nodeAny:
result = append(result, match.Any{s})
result = append(result, match.NewAny(s))
case *nodeSuper:
result = append(result, match.Super{})
result = append(result, match.NewSuper())
case *nodeSingle:
result = append(result, match.Single{s})
result = append(result, match.NewSingle(s))
case *nodeText:
result = append(result, match.NewText(n.text))
@ -659,7 +661,7 @@ func do2(node node, s string) ([]match.Matcher, error) {
return result, nil
}
func compile(ast *nodePattern, s string) (Glob, error) {
func compile(ast *nodePattern, s []rune) (Glob, error) {
// ms, err := do2(ast, s)
// if err != nil {
// return nil, err
@ -667,7 +669,7 @@ func compile(ast *nodePattern, s string) (Glob, error) {
// if len(ms) == 1 {
// return ms[0], nil
// } else {
// return match.AnyOf{ms}, nil
// return match.NewAnyOf(ms), nil
// }
g, err := do(ast, s)

View File

@ -6,7 +6,7 @@ import (
"testing"
)
const separators = "."
var separators = []rune{'.'}
func TestGlueMatchers(t *testing.T) {
for id, test := range []struct {
@ -15,40 +15,40 @@ func TestGlueMatchers(t *testing.T) {
}{
{
[]match.Matcher{
match.Super{},
match.Single{},
match.NewSuper(),
match.NewSingle(nil),
},
match.Min{1},
match.NewMin(1),
},
{
[]match.Matcher{
match.Any{separators},
match.Single{separators},
match.NewAny(separators),
match.NewSingle(separators),
},
match.EveryOf{match.Matchers{
match.Min{1},
match.Contains{separators, true},
match.NewMin(1),
match.NewContains(string(separators), true),
}},
},
{
[]match.Matcher{
match.Single{},
match.Single{},
match.Single{},
match.NewSingle(nil),
match.NewSingle(nil),
match.NewSingle(nil),
},
match.EveryOf{match.Matchers{
match.Min{3},
match.Max{3},
match.NewMin(3),
match.NewMax(3),
}},
},
{
[]match.Matcher{
match.List{"a", true},
match.Any{"a"},
match.NewList([]rune{'a'}, true),
match.NewAny([]rune{'a'}),
},
match.EveryOf{match.Matchers{
match.Min{1},
match.Contains{"a", true},
match.NewMin(1),
match.NewContains("a", true),
}},
},
} {
@ -59,7 +59,7 @@ func TestGlueMatchers(t *testing.T) {
}
if !reflect.DeepEqual(act, test.exp) {
t.Errorf("#%d unexpected convert matchers result:\nact: %s;\nexp: %s", id, act, test.exp)
t.Errorf("#%d unexpected convert matchers result:\nact: %#v;\nexp: %#v", id, act, test.exp)
continue
}
}
@ -72,15 +72,15 @@ func TestCompileMatchers(t *testing.T) {
}{
{
[]match.Matcher{
match.Super{},
match.Single{separators},
match.NewSuper(),
match.NewSingle(separators),
match.NewText("c"),
},
match.NewBTree(
match.NewText("c"),
match.NewBTree(
match.Single{separators},
match.Super{},
match.NewSingle(separators),
match.NewSuper(),
nil,
),
nil,
@ -88,32 +88,32 @@ func TestCompileMatchers(t *testing.T) {
},
{
[]match.Matcher{
match.Any{},
match.NewAny(nil),
match.NewText("c"),
match.Any{},
match.NewAny(nil),
},
match.NewBTree(
match.NewText("c"),
match.Any{},
match.Any{},
match.NewAny(nil),
match.NewAny(nil),
),
},
{
[]match.Matcher{
match.Range{'a', 'c', true},
match.List{"zte", false},
match.NewRange('a', 'c', true),
match.NewList([]rune{'z', 't', 'e'}, false),
match.NewText("c"),
match.Single{},
match.NewSingle(nil),
},
match.Row{
Matchers: match.Matchers{
match.Range{'a', 'c', true},
match.List{"zte", false},
match.NewRow(
4,
match.Matchers{
match.NewRange('a', 'c', true),
match.NewList([]rune{'z', 't', 'e'}, false),
match.NewText("c"),
match.Single{},
},
RunesLength: 4,
},
match.NewSingle(nil),
}...,
),
},
} {
act, err := compileMatchers(test.in)
@ -123,7 +123,7 @@ func TestCompileMatchers(t *testing.T) {
}
if !reflect.DeepEqual(act, test.exp) {
t.Errorf("#%d unexpected convert matchers result:\nact: %s;\nexp: %s", id, act, test.exp)
t.Errorf("#%d unexpected convert matchers result:\nact: %#v\nexp: %#v", id, act, test.exp)
continue
}
}
@ -135,52 +135,52 @@ func TestConvertMatchers(t *testing.T) {
}{
{
[]match.Matcher{
match.Range{'a', 'c', true},
match.List{"zte", false},
match.NewRange('a', 'c', true),
match.NewList([]rune{'z', 't', 'e'}, false),
match.NewText("c"),
match.Single{},
match.Any{},
match.NewSingle(nil),
match.NewAny(nil),
},
[]match.Matcher{
match.Row{
Matchers: match.Matchers{
match.Range{'a', 'c', true},
match.List{"zte", false},
match.NewRow(
4,
[]match.Matcher{
match.NewRange('a', 'c', true),
match.NewList([]rune{'z', 't', 'e'}, false),
match.NewText("c"),
match.Single{},
},
RunesLength: 4,
},
match.Any{},
match.NewSingle(nil),
}...,
),
match.NewAny(nil),
},
},
{
[]match.Matcher{
match.Range{'a', 'c', true},
match.List{"zte", false},
match.NewRange('a', 'c', true),
match.NewList([]rune{'z', 't', 'e'}, false),
match.NewText("c"),
match.Single{},
match.Any{},
match.Single{},
match.Single{},
match.Any{},
match.NewSingle(nil),
match.NewAny(nil),
match.NewSingle(nil),
match.NewSingle(nil),
match.NewAny(nil),
},
[]match.Matcher{
match.Row{
Matchers: match.Matchers{
match.Range{'a', 'c', true},
match.List{"zte", false},
match.NewRow(
3,
match.Matchers{
match.NewRange('a', 'c', true),
match.NewList([]rune{'z', 't', 'e'}, false),
match.NewText("c"),
},
RunesLength: 3,
},
match.Min{3},
}...,
),
match.NewMin(3),
},
},
} {
act := minimizeMatchers(test.in)
if !reflect.DeepEqual(act, test.exp) {
t.Errorf("#%d unexpected convert matchers 2 result:\nact: %s;\nexp: %s", id, act, test.exp)
t.Errorf("#%d unexpected convert matchers 2 result:\nact: %#v\nexp: %#v", id, act, test.exp)
continue
}
}
@ -204,7 +204,7 @@ func TestCompiler(t *testing.T) {
for id, test := range []struct {
ast *nodePattern
result Glob
sep string
sep []rune
}{
{
ast: pattern(&nodeText{text: "abc"}),
@ -213,20 +213,20 @@ func TestCompiler(t *testing.T) {
{
ast: pattern(&nodeAny{}),
sep: separators,
result: match.Any{separators},
result: match.NewAny(separators),
},
{
ast: pattern(&nodeAny{}),
result: match.Super{},
result: match.NewSuper(),
},
{
ast: pattern(&nodeSuper{}),
result: match.Super{},
result: match.NewSuper(),
},
{
ast: pattern(&nodeSingle{}),
sep: separators,
result: match.Single{separators},
result: match.NewSingle(separators),
},
{
ast: pattern(&nodeRange{
@ -234,39 +234,39 @@ func TestCompiler(t *testing.T) {
hi: 'z',
not: true,
}),
result: match.Range{'a', 'z', true},
result: match.NewRange('a', 'z', true),
},
{
ast: pattern(&nodeList{
chars: "abc",
not: true,
}),
result: match.List{"abc", true},
result: match.NewList([]rune{'a', 'b', 'c'}, true),
},
{
ast: pattern(&nodeAny{}, &nodeSingle{}, &nodeSingle{}, &nodeSingle{}),
sep: separators,
result: match.EveryOf{Matchers: match.Matchers{
match.Min{3},
match.Contains{separators, true},
match.NewMin(3),
match.NewContains(string(separators), true),
}},
},
{
ast: pattern(&nodeAny{}, &nodeSingle{}, &nodeSingle{}, &nodeSingle{}),
result: match.Min{3},
result: match.NewMin(3),
},
{
ast: pattern(&nodeAny{}, &nodeText{text: "abc"}, &nodeSingle{}),
sep: separators,
result: match.NewBTree(
match.Row{
Matchers: match.Matchers{
match.NewRow(
4,
match.Matchers{
match.NewText("abc"),
match.Single{separators},
},
RunesLength: 4,
},
match.Any{separators},
match.NewSingle(separators),
}...,
),
match.NewAny(separators),
nil,
),
},
@ -274,49 +274,49 @@ func TestCompiler(t *testing.T) {
ast: pattern(&nodeSuper{}, &nodeSingle{}, &nodeText{text: "abc"}, &nodeSingle{}),
sep: separators,
result: match.NewBTree(
match.Row{
Matchers: match.Matchers{
match.Single{separators},
match.NewRow(
5,
match.Matchers{
match.NewSingle(separators),
match.NewText("abc"),
match.Single{separators},
},
RunesLength: 5,
},
match.Super{},
match.NewSingle(separators),
}...,
),
match.NewSuper(),
nil,
),
},
{
ast: pattern(&nodeAny{}, &nodeText{text: "abc"}),
result: match.Suffix{"abc"},
result: match.NewSuffix("abc"),
},
{
ast: pattern(&nodeText{text: "abc"}, &nodeAny{}),
result: match.Prefix{"abc"},
result: match.NewPrefix("abc"),
},
{
ast: pattern(&nodeText{text: "abc"}, &nodeAny{}, &nodeText{text: "def"}),
result: match.PrefixSuffix{"abc", "def"},
result: match.NewPrefixSuffix("abc", "def"),
},
{
ast: pattern(&nodeAny{}, &nodeAny{}, &nodeAny{}, &nodeText{text: "abc"}, &nodeAny{}, &nodeAny{}),
result: match.Contains{"abc", false},
result: match.NewContains("abc", false),
},
{
ast: pattern(&nodeAny{}, &nodeAny{}, &nodeAny{}, &nodeText{text: "abc"}, &nodeAny{}, &nodeAny{}),
sep: separators,
result: match.NewBTree(
match.NewText("abc"),
match.Any{separators},
match.Any{separators},
match.NewAny(separators),
match.NewAny(separators),
),
},
{
ast: pattern(&nodeSuper{}, &nodeSingle{}, &nodeText{text: "abc"}, &nodeSuper{}, &nodeSingle{}),
result: match.NewBTree(
match.NewText("abc"),
match.Min{1},
match.Min{1},
match.NewMin(1),
match.NewMin(1),
),
},
{
@ -348,9 +348,9 @@ func TestCompiler(t *testing.T) {
match.NewText("abc"),
nil,
match.AnyOf{Matchers: match.Matchers{
match.Single{},
match.List{List: "def"},
match.Nothing{},
match.NewSingle(nil),
match.NewList([]rune{'d', 'e', 'f'}, false),
match.NewNothing(),
}},
),
},
@ -361,15 +361,15 @@ func TestCompiler(t *testing.T) {
&nodeAny{},
),
result: match.NewBTree(
match.Row{
Matchers: match.Matchers{
match.Range{Lo: 'a', Hi: 'z'},
match.Range{Lo: 'a', Hi: 'x', Not: true},
},
RunesLength: 2,
},
match.NewRow(
2,
match.Matchers{
match.NewRange('a', 'z', false),
match.NewRange('a', 'x', true),
}...,
),
nil,
match.Super{},
match.NewSuper(),
),
},
{
@ -385,17 +385,17 @@ func TestCompiler(t *testing.T) {
&nodeText{text: "ghi"},
),
)),
result: match.Row{
RunesLength: 7,
Matchers: match.Matchers{
result: match.NewRow(
7,
match.Matchers{
match.NewText("abc"),
match.AnyOf{Matchers: match.Matchers{
match.List{List: "abc"},
match.List{List: "def"},
match.NewList([]rune{'a', 'b', 'c'}, false),
match.NewList([]rune{'d', 'e', 'f'}, false),
}},
match.NewText("ghi"),
},
},
}...,
),
},
// {
// ast: pattern(
@ -403,21 +403,21 @@ func TestCompiler(t *testing.T) {
// anyOf(&nodeText{text: "c"}, &nodeText{text: "d"}),
// ),
// result: match.AnyOf{Matchers: match.Matchers{
// match.Row{Matchers: match.Matchers{match.Raw{"a"}, match.Raw{"c", 1}}},
// match.Row{Matchers: match.Matchers{match.Raw{"a"}, match.Raw{"d"}}},
// match.Row{Matchers: match.Matchers{match.Raw{"b"}, match.Raw{"c", 1}}},
// match.Row{Matchers: match.Matchers{match.Raw{"b"}, match.Raw{"d"}}},
// match.NewRow(Matchers: match.Matchers{match.Raw{"a"}, match.Raw{"c", 1}}),
// match.NewRow(Matchers: match.Matchers{match.Raw{"a"}, match.Raw{"d"}}),
// match.NewRow(Matchers: match.Matchers{match.Raw{"b"}, match.Raw{"c", 1}}),
// match.NewRow(Matchers: match.Matchers{match.Raw{"b"}, match.Raw{"d"}}),
// }},
// },
} {
prog, err := compile(test.ast, test.sep)
m, err := compile(test.ast, test.sep)
if err != nil {
t.Errorf("compilation error: %s", err)
continue
}
if !reflect.DeepEqual(prog, test.result) {
t.Errorf("#%d results are not equal:\nexp: %s,\nact: %s", id, test.result, prog)
if !reflect.DeepEqual(m, test.result) {
t.Errorf("#%d results are not equal:\nexp: %#v\nact: %#v", id, test.result, m)
continue
}
}
@ -426,105 +426,105 @@ func TestCompiler(t *testing.T) {
const complexityString = "abcd"
//func BenchmarkComplexityAny(b *testing.B) {
// m := match.Any{}
// m := match.NewAny(nil)
// for i := 0; i < b.N; i++ {
// _ = m.Match(complexityString)
// _, _ = m.Index(complexityString)
// }
//}
//func BenchmarkComplexityContains(b *testing.B) {
// m := match.Contains{}
// m := match.NewContains()
// for i := 0; i < b.N; i++ {
// _ = m.Match(complexityString)
// _, _ = m.Index(complexityString)
// }
//}
//func BenchmarkComplexityList(b *testing.B) {
// m := match.List{}
// m := match.NewList()
// for i := 0; i < b.N; i++ {
// _ = m.Match(complexityString)
// _, _ = m.Index(complexityString)
// }
//}
//func BenchmarkComplexityMax(b *testing.B) {
// m := match.Max{}
// m := match.NewMax()
// for i := 0; i < b.N; i++ {
// _ = m.Match(complexityString)
// _, _ = m.Index(complexityString)
// }
//}
//func BenchmarkComplexityMin(b *testing.B) {
// m := match.Min{}
// m := match.NewMin()
// for i := 0; i < b.N; i++ {
// _ = m.Match(complexityString)
// _, _ = m.Index(complexityString)
// }
//}
//func BenchmarkComplexityNothing(b *testing.B) {
// m := match.Nothing{}
// m := match.NewNothing()
// for i := 0; i < b.N; i++ {
// _ = m.Match(complexityString)
// _, _ = m.Index(complexityString)
// }
//}
//func BenchmarkComplexityPrefix(b *testing.B) {
// m := match.Prefix{}
// m := match.NewPrefix()
// for i := 0; i < b.N; i++ {
// _ = m.Match(complexityString)
// _, _ = m.Index(complexityString)
// }
//}
//func BenchmarkComplexityPrefixSuffix(b *testing.B) {
// m := match.PrefixSuffix{}
// m := match.NewPrefixSuffix()
// for i := 0; i < b.N; i++ {
// _ = m.Match(complexityString)
// _, _ = m.Index(complexityString)
// }
//}
//func BenchmarkComplexityRange(b *testing.B) {
// m := match.Range{}
// m := match.NewRange()
// for i := 0; i < b.N; i++ {
// _ = m.Match(complexityString)
// _, _ = m.Index(complexityString)
// }
//}
//func BenchmarkComplexityRow(b *testing.B) {
// m := match.Row{}
// m := match.NewRow()
// for i := 0; i < b.N; i++ {
// _ = m.Match(complexityString)
// _, _ = m.Index(complexityString)
// }
//}
//func BenchmarkComplexitySingle(b *testing.B) {
// m := match.Single{}
// m := match.NewSingle(nil)
// for i := 0; i < b.N; i++ {
// _ = m.Match(complexityString)
// _, _ = m.Index(complexityString)
// }
//}
//func BenchmarkComplexitySuffix(b *testing.B) {
// m := match.Suffix{}
// m := match.NewSuffix()
// for i := 0; i < b.N; i++ {
// _ = m.Match(complexityString)
// _, _ = m.Index(complexityString)
// }
//}
//func BenchmarkComplexitySuper(b *testing.B) {
// m := match.Super{}
// m := match.NewSuper()
// for i := 0; i < b.N; i++ {
// _ = m.Match(complexityString)
// _, _ = m.Index(complexityString)
// }
//}
//func BenchmarkComplexityText(b *testing.B) {
// m := match.Text{}
// m := match.NewText()
// for i := 0; i < b.N; i++ {
// _ = m.Match(complexityString)
// _, _ = m.Index(complexityString)
// }
//}
//func BenchmarkComplexityAnyOf(b *testing.B) {
// m := match.AnyOf{}
// m := match.NewAnyOf()
// for i := 0; i < b.N; i++ {
// _ = m.Match(complexityString)
// _, _ = m.Index(complexityString)
@ -538,7 +538,7 @@ const complexityString = "abcd"
// }
//}
//func BenchmarkComplexityEveryOf(b *testing.B) {
// m := match.EveryOf{}
// m := match.NewEveryOf()
// for i := 0; i < b.N; i++ {
// _ = m.Match(complexityString)
// _, _ = m.Index(complexityString)

View File

@ -1,7 +1,5 @@
package glob
import "strings"
// Glob represents compiled glob pattern.
type Glob interface {
Match(string) bool
@ -33,13 +31,13 @@ type Glob interface {
// pattern { `,` pattern }
// comma-separated (without spaces) patterns
//
func Compile(pattern string, separators ...string) (Glob, error) {
func Compile(pattern string, separators ...rune) (Glob, error) {
ast, err := parse(newLexer(pattern))
if err != nil {
return nil, err
}
matcher, err := compile(ast, strings.Join(separators, ""))
matcher, err := compile(ast, separators)
if err != nil {
return nil, err
}
@ -48,7 +46,7 @@ func Compile(pattern string, separators ...string) (Glob, error) {
}
// MustCompile is the same as Compile, except that if Compile returns error, this will panic
func MustCompile(pattern string, separators ...string) Glob {
func MustCompile(pattern string, separators ...rune) Glob {
g, err := Compile(pattern, separators...)
if err != nil {
panic(err)

View File

@ -53,10 +53,10 @@ const (
type test struct {
pattern, match string
should bool
delimiters []string
delimiters []rune
}
func glob(s bool, p, m string, d ...string) test {
func glob(s bool, p, m string, d ...rune) test {
return test{p, m, s, d}
}
@ -68,22 +68,22 @@ func TestGlob(t *testing.T) {
glob(true, "a*c", "abc"),
glob(true, "a*c", "a12345c"),
glob(true, "a?c", "a1c"),
glob(true, "a.b", "a.b", "."),
glob(true, "a.*", "a.b", "."),
glob(true, "a.**", "a.b.c", "."),
glob(true, "a.?.c", "a.b.c", "."),
glob(true, "a.?.?", "a.b.c", "."),
glob(true, "a.b", "a.b", '.'),
glob(true, "a.*", "a.b", '.'),
glob(true, "a.**", "a.b.c", '.'),
glob(true, "a.?.c", "a.b.c", '.'),
glob(true, "a.?.?", "a.b.c", '.'),
glob(true, "?at", "cat"),
glob(true, "?at", "fat"),
glob(true, "*", "abc"),
glob(true, `\*`, "*"),
glob(true, "**", "a.b.c", "."),
glob(true, "**", "a.b.c", '.'),
glob(false, "?at", "at"),
glob(false, "?at", "fat", "f"),
glob(false, "a.*", "a.b.c", "."),
glob(false, "a.?.c", "a.bb.c", "."),
glob(false, "*", "a.b.c", "."),
glob(false, "?at", "fat", 'f'),
glob(false, "a.*", "a.b.c", '.'),
glob(false, "a.?.c", "a.bb.c", '.'),
glob(false, "*", "a.b.c", '.'),
glob(true, "*test", "this is a test"),
glob(true, "this*", "this is a test"),
@ -168,6 +168,16 @@ func BenchmarkAllGlobMatch(b *testing.B) {
_ = m.Match(fixture_all_match)
}
}
func BenchmarkAllGlobMatchParallel(b *testing.B) {
m, _ := Compile(pattern_all)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_ = m.Match(fixture_all_match)
}
})
}
func BenchmarkAllRegexpMatch(b *testing.B) {
m := regexp.MustCompile(regexp_all)
f := []byte(fixture_all_match)

View File

@ -2,37 +2,36 @@ package match
import (
"fmt"
"strings"
"unicode/utf8"
"github.com/gobwas/glob/strings"
)
type Any struct {
Separators string
Separators []rune
}
func NewAny(s []rune) Any {
return Any{s}
}
func (self Any) Match(s string) bool {
return strings.IndexAny(s, self.Separators) == -1
return strings.IndexAnyRunes(s, self.Separators) == -1
}
func (self Any) Index(s string) (int, []int) {
var sub string
found := strings.IndexAny(s, self.Separators)
found := strings.IndexAnyRunes(s, self.Separators)
switch found {
case -1:
sub = s
case 0:
return 0, []int{0}
return 0, segments0
default:
sub = s[:found]
s = s[:found]
}
segments := make([]int, 0, utf8.RuneCountInString(sub)+1)
for i := range sub {
segments := acquireSegments(len(s))
for i := range s {
segments = append(segments, i)
}
segments = append(segments, len(sub))
segments = append(segments, len(s))
return 0, segments
}
@ -42,5 +41,5 @@ func (self Any) Len() int {
}
func (self Any) String() string {
return fmt.Sprintf("<any:![%s]>", self.Separators)
return fmt.Sprintf("<any:![%s]>", string(self.Separators))
}

View File

@ -8,6 +8,10 @@ type AnyOf struct {
Matchers Matchers
}
func NewAnyOf(m ...Matcher) AnyOf {
return AnyOf{Matchers(m)}
}
func (self *AnyOf) Add(m Matcher) error {
self.Matchers = append(self.Matchers, m)
return nil
@ -24,14 +28,9 @@ func (self AnyOf) Match(s string) bool {
}
func (self AnyOf) Index(s string) (int, []int) {
if len(self.Matchers) == 0 {
return -1, nil
}
// segments to merge
var segments [][]int
index := -1
segments := acquireSegments(len(s))
for _, m := range self.Matchers {
idx, seg := m.Index(s)
if idx == -1 {
@ -40,7 +39,7 @@ func (self AnyOf) Index(s string) (int, []int) {
if index == -1 || idx < index {
index = idx
segments = [][]int{seg}
segments = append(segments[:0], seg...)
continue
}
@ -48,14 +47,16 @@ func (self AnyOf) Index(s string) (int, []int) {
continue
}
segments = append(segments, seg)
// here idx == index
segments = appendMerge(segments, seg)
}
if index == -1 {
releaseSegments(segments)
return -1, nil
}
return index, mergeSegments(segments)
return index, segments
}
func (self AnyOf) Len() (l int) {

View File

@ -14,7 +14,7 @@ func TestAnyOfIndex(t *testing.T) {
}{
{
Matchers{
Any{},
NewAny(nil),
NewText("b"),
NewText("c"),
},
@ -24,8 +24,8 @@ func TestAnyOfIndex(t *testing.T) {
},
{
Matchers{
Prefix{"b"},
Suffix{"c"},
NewPrefix("b"),
NewSuffix("c"),
},
"abc",
0,
@ -33,15 +33,15 @@ func TestAnyOfIndex(t *testing.T) {
},
{
Matchers{
List{"[def]", false},
List{"[abc]", false},
NewList([]rune("[def]"), false),
NewList([]rune("[abc]"), false),
},
"abcdef",
0,
[]int{1},
},
} {
everyOf := AnyOf{test.matchers}
everyOf := NewAnyOf(test.matchers...)
index, segments := everyOf.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)

View File

@ -7,25 +7,25 @@ import (
func TestAnyIndex(t *testing.T) {
for id, test := range []struct {
sep string
sep []rune
fixture string
index int
segments []int
}{
{
".",
[]rune{'.'},
"abc",
0,
[]int{0, 1, 2, 3},
},
{
".",
[]rune{'.'},
"abc.def",
0,
[]int{0, 1, 2, 3},
},
} {
p := Any{test.sep}
p := NewAny(test.sep)
index, segments := p.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
@ -37,8 +37,21 @@ func TestAnyIndex(t *testing.T) {
}
func BenchmarkIndexAny(b *testing.B) {
p := Any{bench_separators}
m := NewAny(bench_separators)
for i := 0; i < b.N; i++ {
p.Index(bench_pattern)
_, s := m.Index(bench_pattern)
releaseSegments(s)
}
}
func BenchmarkIndexAnyParallel(b *testing.B) {
m := NewAny(bench_separators)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_, s := m.Index(bench_pattern)
releaseSegments(s)
}
})
}

View File

@ -65,8 +65,8 @@ func (self BTree) Match(s string) bool {
return false
}
// try to cut unnecessary parts
// by knowledge of length of right and left part
// try to cut unnecessary parts
// by knowledge of length of right and left part
var offset, limit int
if self.LeftLengthRunes >= 0 {
offset = self.LeftLengthRunes
@ -81,6 +81,7 @@ func (self BTree) Match(s string) bool {
// search for matching part in substring
index, segments := self.Value.Index(s[offset:limit])
if index == -1 {
releaseSegments(segments)
return false
}
@ -112,6 +113,7 @@ func (self BTree) Match(s string) bool {
}
if right {
releaseSegments(segments)
return true
}
}
@ -119,11 +121,26 @@ func (self BTree) Match(s string) bool {
_, step := utf8.DecodeRuneInString(s[offset+index:])
offset += index + step
releaseSegments(segments)
}
return false
}
func (self BTree) String() string {
return fmt.Sprintf("<btree:[%s<-%s->%s]>", self.Left, self.Value, self.Right)
const n string = "<nil>"
var l, r string
if self.Left == nil {
l = n
} else {
l = self.Left.String()
}
if self.Right == nil {
r = n
} else {
r = self.Right.String()
}
return fmt.Sprintf("<btree:[%s<-%s->%s]>", l, self.Value, r)
}

View File

@ -11,17 +11,17 @@ func TestBTree(t *testing.T) {
exp bool
}{
{
NewBTree(NewText("abc"), Super{}, Super{}),
NewBTree(NewText("abc"), NewSuper(), NewSuper()),
"abc",
true,
},
{
NewBTree(NewText("a"), Single{}, Single{}),
NewBTree(NewText("a"), NewSingle(nil), NewSingle(nil)),
"aaa",
true,
},
{
NewBTree(NewText("b"), Single{}, nil),
NewBTree(NewText("b"), NewSingle(nil), nil),
"bbb",
false,
},
@ -29,8 +29,8 @@ func TestBTree(t *testing.T) {
NewBTree(
NewText("c"),
NewBTree(
Single{},
Super{},
NewSingle(nil),
NewSuper(),
nil,
),
nil,

View File

@ -3,7 +3,6 @@ package match
import (
"fmt"
"strings"
"unicode/utf8"
)
type Contains struct {
@ -11,15 +10,16 @@ type Contains struct {
Not bool
}
func NewContains(needle string, not bool) Contains {
return Contains{needle, not}
}
func (self Contains) Match(s string) bool {
return strings.Contains(s, self.Needle) != self.Not
}
func (self Contains) Index(s string) (int, []int) {
var (
sub string
offset int
)
var offset int
idx := strings.Index(s, self.Needle)
@ -29,27 +29,20 @@ func (self Contains) Index(s string) (int, []int) {
}
offset = idx + len(self.Needle)
if len(s) <= offset {
return 0, []int{offset}
}
sub = s[offset:]
} else {
switch idx {
case -1:
sub = s
default:
sub = s[:idx]
}
s = s[offset:]
} else if idx != -1 {
s = s[:idx]
}
segments := make([]int, 0, utf8.RuneCountInString(sub)+1)
for i, _ := range sub {
segments := acquireSegments(len(s) + 1)
for i, _ := range s {
segments = append(segments, offset+i)
}
return 0, append(segments, offset+len(sub))
return 0, append(segments, offset+len(s))
}
func (self Contains) Len() int {

View File

@ -42,7 +42,7 @@ func TestContainsIndex(t *testing.T) {
[]int{0, 1, 2, 3},
},
} {
p := Contains{test.prefix, test.not}
p := NewContains(test.prefix, test.not)
index, segments := p.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
@ -54,8 +54,21 @@ func TestContainsIndex(t *testing.T) {
}
func BenchmarkIndexContains(b *testing.B) {
m := Contains{bench_separators, true}
m := NewContains(string(bench_separators), true)
for i := 0; i < b.N; i++ {
m.Index(bench_pattern)
_, s := m.Index(bench_pattern)
releaseSegments(s)
}
}
func BenchmarkIndexContainsParallel(b *testing.B) {
m := NewContains(string(bench_separators), true)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_, s := m.Index(bench_pattern)
releaseSegments(s)
}
})
}

55
match/debug/debug.go Normal file
View File

@ -0,0 +1,55 @@
package debug
import (
"bytes"
"fmt"
"github.com/gobwas/glob/match"
"math/rand"
)
func Graphviz(pattern string, m match.Matcher) string {
return fmt.Sprintf(`digraph G {graph[label="%s"];%s}`, pattern, graphviz_internal(m, fmt.Sprintf("%x", rand.Int63())))
}
func graphviz_internal(m match.Matcher, id string) string {
buf := &bytes.Buffer{}
switch matcher := m.(type) {
case match.BTree:
fmt.Fprintf(buf, `"%s"[label="%s"];`, id, matcher.Value.String())
for _, m := range []match.Matcher{matcher.Left, matcher.Right} {
switch n := m.(type) {
case nil:
rnd := rand.Int63()
fmt.Fprintf(buf, `"%x"[label="<nil>"];`, rnd)
fmt.Fprintf(buf, `"%s"->"%x";`, id, rnd)
default:
sub := fmt.Sprintf("%x", rand.Int63())
fmt.Fprintf(buf, `"%s"->"%s";`, id, sub)
fmt.Fprintf(buf, graphviz_internal(n, sub))
}
}
case match.AnyOf:
fmt.Fprintf(buf, `"%s"[label="AnyOf"];`, id)
for _, m := range matcher.Matchers {
rnd := rand.Int63()
fmt.Fprintf(buf, graphviz_internal(m, fmt.Sprintf("%x", rnd)))
fmt.Fprintf(buf, `"%s"->"%x";`, id, rnd)
}
case match.EveryOf:
fmt.Fprintf(buf, `"%s"[label="EveryOf"];`, id)
for _, m := range matcher.Matchers {
rnd := rand.Int63()
fmt.Fprintf(buf, graphviz_internal(m, fmt.Sprintf("%x", rnd)))
fmt.Fprintf(buf, `"%s"->"%x";`, id, rnd)
}
default:
fmt.Fprintf(buf, `"%s"[label="%s"];`, id, m.String())
}
return buf.String()
}

View File

@ -8,6 +8,10 @@ type EveryOf struct {
Matchers Matchers
}
func NewEveryOf(m ...Matcher) EveryOf {
return EveryOf{Matchers(m)}
}
func (self *EveryOf) Add(m Matcher) error {
self.Matchers = append(self.Matchers, m)
return nil
@ -28,40 +32,56 @@ func (self EveryOf) Len() (l int) {
func (self EveryOf) Index(s string) (int, []int) {
var index int
var offset int
var segments []int
// make `in` with cap as len(s),
// cause it is the maximum size of output segments values
next := acquireSegments(len(s))
current := acquireSegments(len(s))
sub := s
for _, m := range self.Matchers {
for i, m := range self.Matchers {
idx, seg := m.Index(sub)
if idx == -1 {
releaseSegments(next)
releaseSegments(current)
return -1, nil
}
var sum []int
if segments == nil {
sum = seg
if i == 0 {
// we use copy here instead of `current = seg`
// cause seg is a slice from reusable buffer `in`
// and it could be overwritten in next iteration
current = append(current, seg...)
} else {
// clear the next
next = next[:0]
delta := index - (idx + offset)
for _, ex := range segments {
for _, ex := range current {
for _, n := range seg {
if ex+delta == n {
sum = append(sum, n)
next = append(next, n)
}
}
}
if len(next) == 0 {
releaseSegments(next)
releaseSegments(current)
return -1, nil
}
current = append(current[:0], next...)
}
if len(sum) == 0 {
return -1, nil
}
segments = sum
index = idx + offset
sub = s[index:]
offset += idx
}
return index, segments
releaseSegments(next)
return index, current
}
func (self EveryOf) Match(s string) bool {

View File

@ -14,26 +14,26 @@ func TestEveryOfIndex(t *testing.T) {
}{
{
Matchers{
Any{},
NewAny(nil),
NewText("b"),
NewText("c"),
},
"abc",
"dbc",
-1,
nil,
},
{
Matchers{
Any{},
Prefix{"b"},
Suffix{"c"},
NewAny(nil),
NewPrefix("b"),
NewSuffix("c"),
},
"abc",
1,
[]int{2},
},
} {
everyOf := EveryOf{test.matchers}
everyOf := NewEveryOf(test.matchers...)
index, segments := everyOf.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)

View File

@ -2,24 +2,26 @@ package match
import (
"fmt"
"strings"
"github.com/gobwas/glob/runes"
"unicode/utf8"
)
type List struct {
List string
List []rune
Not bool
}
func NewList(list []rune, not bool) List {
return List{list, not}
}
func (self List) Match(s string) bool {
// if s 100% have two symbols
// _, w := utf8.DecodeRuneInString(s)
// if len(s) > w {
if len(s) > 4 {
r, w := utf8.DecodeRuneInString(s)
if len(s) > w {
return false
}
inList := strings.Index(self.List, s) != -1
inList := runes.IndexRune(self.List, r) != -1
return inList == !self.Not
}
@ -29,8 +31,8 @@ func (self List) Len() int {
func (self List) Index(s string) (int, []int) {
for i, r := range s {
if self.Not == (strings.IndexRune(self.List, r) == -1) {
return i, []int{utf8.RuneLen(r)}
if self.Not == (runes.IndexRune(self.List, r) == -1) {
return i, segmentsByRuneLength[utf8.RuneLen(r)]
}
}
@ -43,5 +45,5 @@ func (self List) String() string {
not = "!"
}
return fmt.Sprintf("<list:%s[%s]>", not, self.List)
return fmt.Sprintf("<list:%s[%s]>", not, string(self.List))
}

View File

@ -7,28 +7,28 @@ import (
func TestListIndex(t *testing.T) {
for id, test := range []struct {
list string
list []rune
not bool
fixture string
index int
segments []int
}{
{
"ab",
[]rune("ab"),
false,
"abc",
0,
[]int{1},
},
{
"ab",
[]rune("ab"),
true,
"fffabfff",
0,
[]int{1},
},
} {
p := List{test.list, test.not}
p := NewList(test.list, test.not)
index, segments := p.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
@ -40,8 +40,19 @@ func TestListIndex(t *testing.T) {
}
func BenchmarkIndexList(b *testing.B) {
m := List{"def", false}
m := NewList([]rune("def"), false)
for i := 0; i < b.N; i++ {
m.Index(bench_pattern)
}
}
func BenchmarkIndexListParallel(b *testing.B) {
m := NewList([]rune("def"), false)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
m.Index(bench_pattern)
}
})
}

View File

@ -1,5 +1,7 @@
package match
// todo common table of rune's length
import (
"fmt"
"strings"
@ -27,59 +29,53 @@ func (m Matchers) String() string {
return fmt.Sprintf("%s", strings.Join(s, ","))
}
func appendIfNotAsPrevious(target []int, val int) []int {
l := len(target)
if l != 0 && target[l-1] == val {
return target
}
// appendMerge merges and sorts given already SORTED and UNIQUE segments.
func appendMerge(target, sub []int) []int {
lt, ls := len(target), len(sub)
out := make([]int, 0, lt+ls)
return append(target, val)
}
// mergeSegments merges and sorts given already SORTED and UNIQUE segments.
func mergeSegments(segments [][]int) []int {
var current []int
for _, s := range segments {
if current == nil {
current = s
continue
for x, y := 0, 0; x < lt || y < ls; {
if x >= lt {
out = append(out, sub[y:]...)
break
}
var next []int
for x, y := 0, 0; x < len(current) || y < len(s); {
if x >= len(current) {
next = append(next, s[y:]...)
break
}
if y >= len(s) {
next = append(next, current[x:]...)
break
}
xValue := current[x]
yValue := s[y]
switch {
case xValue == yValue:
x++
y++
next = appendIfNotAsPrevious(next, xValue)
case xValue < yValue:
next = appendIfNotAsPrevious(next, xValue)
x++
case yValue < xValue:
next = appendIfNotAsPrevious(next, yValue)
y++
}
if y >= ls {
out = append(out, target[x:]...)
break
}
current = next
xValue := target[x]
yValue := sub[y]
switch {
case xValue == yValue:
out = append(out, xValue)
x++
y++
case xValue < yValue:
out = append(out, xValue)
x++
case yValue < xValue:
out = append(out, yValue)
y++
}
}
return current
target = append(target[:0], out...)
return target
}
func reverseSegments(input []int) {
l := len(input)
m := l / 2
for i := 0; i < m; i++ {
input[i], input[l-i-1] = input[l-i-1], input[i]
}
}

View File

@ -3,38 +3,88 @@ package match
import (
"reflect"
"testing"
"unicode/utf8"
)
const bench_separators = "."
var bench_separators = []rune{'.'}
const bench_pattern = "abcdefghijklmnopqrstuvwxyz0123456789"
func TestMergeSegments(t *testing.T) {
func TestAppendMerge(t *testing.T) {
for id, test := range []struct {
segments [][]int
segments [2][]int
exp []int
}{
{
[][]int{
[2][]int{
[]int{0, 6, 7},
[]int{0, 1, 3},
[]int{2, 4},
},
[]int{0, 1, 2, 3, 4, 6, 7},
[]int{0, 1, 3, 6, 7},
},
{
[][]int{
[2][]int{
[]int{0, 1, 3, 6, 7},
[]int{0, 1, 3},
[]int{2, 4},
[]int{1},
[]int{0, 1, 10},
},
[]int{0, 1, 2, 3, 4, 6, 7},
[]int{0, 1, 3, 6, 7, 10},
},
} {
act := mergeSegments(test.segments)
act := appendMerge(test.segments[0], test.segments[1])
if !reflect.DeepEqual(act, test.exp) {
t.Errorf("#%d merge sort segments unexpected:\nact: %v\nexp:%v", id, act, test.exp)
continue
}
}
}
func BenchmarkAppendMerge(b *testing.B) {
s1 := []int{0, 1, 3, 6, 7}
s2 := []int{0, 1, 3}
for i := 0; i < b.N; i++ {
appendMerge(s1, s2)
}
}
func BenchmarkAppendMergeParallel(b *testing.B) {
s1 := []int{0, 1, 3, 6, 7}
s2 := []int{0, 1, 3}
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
appendMerge(s1, s2)
}
})
}
func BenchmarkReverse(b *testing.B) {
for i := 0; i < b.N; i++ {
reverseSegments([]int{1, 2, 3, 4})
}
}
func getTable() []int {
table := make([]int, utf8.MaxRune+1)
for i := 0; i <= utf8.MaxRune; i++ {
table[i] = utf8.RuneLen(rune(i))
}
return table
}
var table = getTable()
const runeToLen = 'q'
func BenchmarkRuneLenFromTable(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = table[runeToLen]
}
}
func BenchmarkRuneLenFromUTF8(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = utf8.RuneLen(runeToLen)
}
}

View File

@ -9,6 +9,10 @@ type Max struct {
Limit int
}
func NewMax(l int) Max {
return Max{l}
}
func (self Max) Match(s string) bool {
var l int
for range s {
@ -21,7 +25,8 @@ func (self Max) Match(s string) bool {
return true
}
func (self Max) Index(s string) (index int, segments []int) {
func (self Max) Index(s string) (int, []int) {
segments := acquireSegments(self.Limit + 1)
segments = append(segments, 0)
var count int
for i, r := range s {

View File

@ -25,7 +25,7 @@ func TestMaxIndex(t *testing.T) {
[]int{0, 1, 2, 3},
},
} {
p := Max{test.limit}
p := NewMax(test.limit)
index, segments := p.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
@ -37,8 +37,21 @@ func TestMaxIndex(t *testing.T) {
}
func BenchmarkIndexMax(b *testing.B) {
m := Max{10}
m := NewMax(10)
for i := 0; i < b.N; i++ {
m.Index(bench_pattern)
_, s := m.Index(bench_pattern)
releaseSegments(s)
}
}
func BenchmarkIndexMaxParallel(b *testing.B) {
m := NewMax(10)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_, s := m.Index(bench_pattern)
releaseSegments(s)
}
})
}

View File

@ -9,6 +9,10 @@ type Min struct {
Limit int
}
func NewMin(l int) Min {
return Min{l}
}
func (self Min) Match(s string) bool {
var l int
for range s {
@ -24,12 +28,12 @@ func (self Min) Match(s string) bool {
func (self Min) Index(s string) (int, []int) {
var count int
c := utf8.RuneCountInString(s)
if c < self.Limit {
c := len(s) - self.Limit + 1
if c <= 0 {
return -1, nil
}
segments := make([]int, 0, c-self.Limit+1)
segments := acquireSegments(c)
for i, r := range s {
count++
if count >= self.Limit {
@ -37,6 +41,10 @@ func (self Min) Index(s string) (int, []int) {
}
}
if len(segments) == 0 {
return -1, nil
}
return 0, segments
}

View File

@ -25,7 +25,7 @@ func TestMinIndex(t *testing.T) {
[]int{3, 4},
},
} {
p := Min{test.limit}
p := NewMin(test.limit)
index, segments := p.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
@ -37,8 +37,21 @@ func TestMinIndex(t *testing.T) {
}
func BenchmarkIndexMin(b *testing.B) {
m := Min{10}
m := NewMin(10)
for i := 0; i < b.N; i++ {
m.Index(bench_pattern)
_, s := m.Index(bench_pattern)
releaseSegments(s)
}
}
func BenchmarkIndexMinParallel(b *testing.B) {
m := NewMin(10)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_, s := m.Index(bench_pattern)
releaseSegments(s)
}
})
}

View File

@ -6,12 +6,16 @@ import (
type Nothing struct{}
func NewNothing() Nothing {
return Nothing{}
}
func (self Nothing) Match(s string) bool {
return len(s) == 0
}
func (self Nothing) Index(s string) (int, []int) {
return 0, []int{0}
return 0, segments0
}
func (self Nothing) Len() int {

View File

@ -22,7 +22,7 @@ func TestNothingIndex(t *testing.T) {
[]int{0},
},
} {
p := Nothing{}
p := NewNothing()
index, segments := p.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
@ -34,8 +34,21 @@ func TestNothingIndex(t *testing.T) {
}
func BenchmarkIndexNothing(b *testing.B) {
m := Nothing{}
m := NewNothing()
for i := 0; i < b.N; i++ {
m.Index(bench_pattern)
_, s := m.Index(bench_pattern)
releaseSegments(s)
}
}
func BenchmarkIndexNothingParallel(b *testing.B) {
m := NewNothing()
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_, s := m.Index(bench_pattern)
releaseSegments(s)
}
})
}

View File

@ -10,6 +10,10 @@ type Prefix struct {
Prefix string
}
func NewPrefix(p string) Prefix {
return Prefix{p}
}
func (self Prefix) Index(s string) (int, []int) {
idx := strings.Index(s, self.Prefix)
if idx == -1 {
@ -24,7 +28,7 @@ func (self Prefix) Index(s string) (int, []int) {
sub = ""
}
segments := make([]int, 0, utf8.RuneCountInString(sub)+1)
segments := acquireSegments(len(sub) + 1)
segments = append(segments, length)
for i, r := range sub {
segments = append(segments, length+i+utf8.RuneLen(r))

View File

@ -9,41 +9,44 @@ type PrefixSuffix struct {
Prefix, Suffix string
}
func NewPrefixSuffix(p, s string) PrefixSuffix {
return PrefixSuffix{p, s}
}
func (self PrefixSuffix) Index(s string) (int, []int) {
prefixIdx := strings.Index(s, self.Prefix)
if prefixIdx == -1 {
return -1, nil
}
var resp []int
suffixLen := len(self.Suffix)
if suffixLen > 0 {
var segments []int
for sub := s[prefixIdx:]; ; {
suffixIdx := strings.LastIndex(sub, self.Suffix)
if suffixIdx == -1 {
break
}
segments = append(segments, suffixIdx+suffixLen)
sub = sub[:suffixIdx]
}
segLen := len(segments)
if segLen == 0 {
return -1, nil
}
resp = make([]int, segLen)
for i, s := range segments {
resp[segLen-i-1] = s
}
} else {
resp = append(resp, len(s)-prefixIdx)
if suffixLen <= 0 {
return prefixIdx, []int{len(s) - prefixIdx}
}
return prefixIdx, resp
if (len(s) - prefixIdx) <= 0 {
return -1, nil
}
segments := acquireSegments(len(s) - prefixIdx)
for sub := s[prefixIdx:]; ; {
suffixIdx := strings.LastIndex(sub, self.Suffix)
if suffixIdx == -1 {
break
}
segments = append(segments, suffixIdx+suffixLen)
sub = sub[:suffixIdx]
}
if len(segments) == 0 {
releaseSegments(segments)
return -1, nil
}
reverseSegments(segments)
return prefixIdx, segments
}
func (self PrefixSuffix) Len() int {

View File

@ -35,7 +35,7 @@ func TestPrefixSuffixIndex(t *testing.T) {
[]int{3},
},
} {
p := PrefixSuffix{test.prefix, test.suffix}
p := NewPrefixSuffix(test.prefix, test.suffix)
index, segments := p.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
@ -47,8 +47,21 @@ func TestPrefixSuffixIndex(t *testing.T) {
}
func BenchmarkIndexPrefixSuffix(b *testing.B) {
m := PrefixSuffix{"qew", "sqw"}
m := NewPrefixSuffix("qew", "sqw")
for i := 0; i < b.N; i++ {
m.Index(bench_pattern)
_, s := m.Index(bench_pattern)
releaseSegments(s)
}
}
func BenchmarkIndexPrefixSuffixParallel(b *testing.B) {
m := NewPrefixSuffix("qew", "sqw")
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_, s := m.Index(bench_pattern)
releaseSegments(s)
}
})
}

View File

@ -25,7 +25,7 @@ func TestPrefixIndex(t *testing.T) {
[]int{2, 3, 4, 5},
},
} {
p := Prefix{test.prefix}
p := NewPrefix(test.prefix)
index, segments := p.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
@ -37,8 +37,21 @@ func TestPrefixIndex(t *testing.T) {
}
func BenchmarkIndexPrefix(b *testing.B) {
m := Prefix{"qew"}
m := NewPrefix("qew")
for i := 0; i < b.N; i++ {
m.Index(bench_pattern)
_, s := m.Index(bench_pattern)
releaseSegments(s)
}
}
func BenchmarkIndexPrefixParallel(b *testing.B) {
m := NewPrefix("qew")
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_, s := m.Index(bench_pattern)
releaseSegments(s)
}
})
}

View File

@ -10,6 +10,10 @@ type Range struct {
Not bool
}
func NewRange(lo, hi rune, not bool) Range {
return Range{lo, hi, not}
}
func (self Range) Len() int {
return lenOne
}
@ -28,7 +32,7 @@ func (self Range) Match(s string) bool {
func (self Range) Index(s string) (int, []int) {
for i, r := range s {
if self.Not != (r >= self.Lo && r <= self.Hi) {
return i, []int{utf8.RuneLen(r)}
return i, segmentsByRuneLength[utf8.RuneLen(r)]
}
}

View File

@ -35,7 +35,7 @@ func TestRangeIndex(t *testing.T) {
[]int{1},
},
} {
m := Range{test.lo, test.hi, test.not}
m := NewRange(test.lo, test.hi, test.not)
index, segments := m.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
@ -47,8 +47,21 @@ func TestRangeIndex(t *testing.T) {
}
func BenchmarkIndexRange(b *testing.B) {
m := Range{'0', '9', false}
m := NewRange('0', '9', false)
for i := 0; i < b.N; i++ {
m.Index(bench_pattern)
_, s := m.Index(bench_pattern)
releaseSegments(s)
}
}
func BenchmarkIndexRangeParallel(b *testing.B) {
m := NewRange('0', '9', false)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_, s := m.Index(bench_pattern)
releaseSegments(s)
}
})
}

View File

@ -7,6 +7,15 @@ import (
type Row struct {
Matchers Matchers
RunesLength int
Segments []int
}
func NewRow(len int, m ...Matcher) Row {
return Row{
Matchers: Matchers(m),
RunesLength: len,
Segments: []int{len},
}
}
func (self Row) matchAll(s string) bool {
@ -59,14 +68,12 @@ func (self Row) Index(s string) (int, []int) {
for i := range s {
// this is not strict check but useful
// when glob will be refactored for usage with []rune
// it will be better
if len(s[i:]) < self.RunesLength {
break
}
if self.matchAll(s[i:]) {
return i, []int{self.RunesLength}
return i, self.Segments
}
}

View File

@ -5,20 +5,6 @@ import (
"testing"
)
func BenchmarkRowIndex(b *testing.B) {
m := Row{
Matchers: Matchers{
NewText("abc"),
NewText("def"),
Single{},
},
RunesLength: 7,
}
for i := 0; i < b.N; i++ {
m.Index("abcdefghijk")
}
}
func TestRowIndex(t *testing.T) {
for id, test := range []struct {
matchers Matchers
@ -31,7 +17,7 @@ func TestRowIndex(t *testing.T) {
Matchers{
NewText("abc"),
NewText("def"),
Single{},
NewSingle(nil),
},
7,
"qweabcdefghij",
@ -42,7 +28,7 @@ func TestRowIndex(t *testing.T) {
Matchers{
NewText("abc"),
NewText("def"),
Single{},
NewSingle(nil),
},
7,
"abcd",
@ -50,10 +36,7 @@ func TestRowIndex(t *testing.T) {
nil,
},
} {
p := Row{
Matchers: test.matchers,
RunesLength: test.length,
}
p := NewRow(test.length, test.matchers...)
index, segments := p.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
@ -63,3 +46,37 @@ func TestRowIndex(t *testing.T) {
}
}
}
func BenchmarkRowIndex(b *testing.B) {
m := NewRow(
7,
Matchers{
NewText("abc"),
NewText("def"),
NewSingle(nil),
}...,
)
for i := 0; i < b.N; i++ {
_, s := m.Index(bench_pattern)
releaseSegments(s)
}
}
func BenchmarkIndexRowParallel(b *testing.B) {
m := NewRow(
7,
Matchers{
NewText("abc"),
NewText("def"),
NewSingle(nil),
}...,
)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_, s := m.Index(bench_pattern)
releaseSegments(s)
}
})
}

91
match/segments.go Normal file
View File

@ -0,0 +1,91 @@
package match
import (
"sync"
)
type SomePool interface {
Get() []int
Put([]int)
}
var segmentsPools [1024]sync.Pool
func toPowerOfTwo(v int) int {
v--
v |= v >> 1
v |= v >> 2
v |= v >> 4
v |= v >> 8
v |= v >> 16
v++
return v
}
const (
cacheFrom = 16
cacheToAndHigher = 1024
cacheFromIndex = 15
cacheToAndHigherIndex = 1023
)
var (
segments0 = []int{0}
segments1 = []int{1}
segments2 = []int{2}
segments3 = []int{3}
segments4 = []int{4}
)
var segmentsByRuneLength [5][]int = [5][]int{
0: segments0,
1: segments1,
2: segments2,
3: segments3,
4: segments4,
}
func init() {
for i := cacheToAndHigher; i >= cacheFrom; i >>= 1 {
func(i int) {
segmentsPools[i-1] = sync.Pool{New: func() interface{} {
return make([]int, 0, i)
}}
}(i)
}
}
func getTableIndex(c int) int {
p := toPowerOfTwo(c)
switch {
case p >= cacheToAndHigher:
return cacheToAndHigherIndex
case p <= cacheFrom:
return cacheFromIndex
default:
return p - 1
}
}
func acquireSegments(c int) []int {
// make []int with less capacity than cacheFrom
// is faster than acquiring it from pool
if c < cacheFrom {
return make([]int, 0, c)
}
return segmentsPools[getTableIndex(c)].Get().([]int)[:0]
}
func releaseSegments(s []int) {
c := cap(s)
// make []int with less capacity than cacheFrom
// is faster than acquiring it from pool
if c < cacheFrom {
return
}
segmentsPools[getTableIndex(c)].Put(s)
}

83
match/segments_test.go Normal file
View File

@ -0,0 +1,83 @@
package match
import (
"sync"
"testing"
)
func benchPool(i int, b *testing.B) {
pool := sync.Pool{New: func() interface{} {
return make([]int, 0, i)
}}
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
s := pool.Get().([]int)[:0]
pool.Put(s)
}
})
}
func benchMake(i int, b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_ = make([]int, 0, i)
}
})
}
func BenchmarkSegmentsPool_1(b *testing.B) {
benchPool(1, b)
}
func BenchmarkSegmentsPool_2(b *testing.B) {
benchPool(2, b)
}
func BenchmarkSegmentsPool_4(b *testing.B) {
benchPool(4, b)
}
func BenchmarkSegmentsPool_8(b *testing.B) {
benchPool(8, b)
}
func BenchmarkSegmentsPool_16(b *testing.B) {
benchPool(16, b)
}
func BenchmarkSegmentsPool_32(b *testing.B) {
benchPool(32, b)
}
func BenchmarkSegmentsPool_64(b *testing.B) {
benchPool(64, b)
}
func BenchmarkSegmentsPool_128(b *testing.B) {
benchPool(128, b)
}
func BenchmarkSegmentsPool_256(b *testing.B) {
benchPool(256, b)
}
func BenchmarkSegmentsMake_1(b *testing.B) {
benchMake(1, b)
}
func BenchmarkSegmentsMake_2(b *testing.B) {
benchMake(2, b)
}
func BenchmarkSegmentsMake_4(b *testing.B) {
benchMake(4, b)
}
func BenchmarkSegmentsMake_8(b *testing.B) {
benchMake(8, b)
}
func BenchmarkSegmentsMake_16(b *testing.B) {
benchMake(16, b)
}
func BenchmarkSegmentsMake_32(b *testing.B) {
benchMake(32, b)
}
func BenchmarkSegmentsMake_64(b *testing.B) {
benchMake(64, b)
}
func BenchmarkSegmentsMake_128(b *testing.B) {
benchMake(128, b)
}
func BenchmarkSegmentsMake_256(b *testing.B) {
benchMake(256, b)
}

View File

@ -2,13 +2,17 @@ package match
import (
"fmt"
"strings"
"github.com/gobwas/glob/runes"
"unicode/utf8"
)
// single represents ?
type Single struct {
Separators string
Separators []rune
}
func NewSingle(s []rune) Single {
return Single{s}
}
func (self Single) Match(s string) bool {
@ -17,7 +21,7 @@ func (self Single) Match(s string) bool {
return false
}
return strings.IndexRune(self.Separators, r) == -1
return runes.IndexRune(self.Separators, r) == -1
}
func (self Single) Len() int {
@ -26,8 +30,8 @@ func (self Single) Len() int {
func (self Single) Index(s string) (int, []int) {
for i, r := range s {
if strings.IndexRune(self.Separators, r) == -1 {
return i, []int{utf8.RuneLen(r)}
if runes.IndexRune(self.Separators, r) == -1 {
return i, segmentsByRuneLength[utf8.RuneLen(r)]
}
}
@ -35,5 +39,5 @@ func (self Single) Index(s string) (int, []int) {
}
func (self Single) String() string {
return fmt.Sprintf("<single:![%s]>", self.Separators)
return fmt.Sprintf("<single:![%s]>", string(self.Separators))
}

View File

@ -7,25 +7,25 @@ import (
func TestSingleIndex(t *testing.T) {
for id, test := range []struct {
separators string
separators []rune
fixture string
index int
segments []int
}{
{
".",
[]rune{'.'},
".abc",
1,
[]int{1},
},
{
".",
[]rune{'.'},
".",
-1,
nil,
},
} {
p := Single{test.separators}
p := NewSingle(test.separators)
index, segments := p.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
@ -37,8 +37,21 @@ func TestSingleIndex(t *testing.T) {
}
func BenchmarkIndexSingle(b *testing.B) {
m := Single{bench_separators}
m := NewSingle(bench_separators)
for i := 0; i < b.N; i++ {
m.Index(bench_pattern)
_, s := m.Index(bench_pattern)
releaseSegments(s)
}
}
func BenchmarkIndexSingleParallel(b *testing.B) {
m := NewSingle(bench_separators)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_, s := m.Index(bench_pattern)
releaseSegments(s)
}
})
}

View File

@ -9,13 +9,8 @@ type Suffix struct {
Suffix string
}
func (self Suffix) Index(s string) (int, []int) {
idx := strings.Index(s, self.Suffix)
if idx == -1 {
return -1, nil
}
return 0, []int{idx + len(self.Suffix)}
func NewSuffix(s string) Suffix {
return Suffix{s}
}
func (self Suffix) Len() int {
@ -26,6 +21,15 @@ func (self Suffix) Match(s string) bool {
return strings.HasSuffix(s, self.Suffix)
}
func (self Suffix) Index(s string) (int, []int) {
idx := strings.Index(s, self.Suffix)
if idx == -1 {
return -1, nil
}
return 0, []int{idx + len(self.Suffix)}
}
func (self Suffix) String() string {
return fmt.Sprintf("<suffix:%s>", self.Suffix)
}

View File

@ -25,7 +25,7 @@ func TestSuffixIndex(t *testing.T) {
[]int{5},
},
} {
p := Suffix{test.prefix}
p := NewSuffix(test.prefix)
index, segments := p.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
@ -37,8 +37,21 @@ func TestSuffixIndex(t *testing.T) {
}
func BenchmarkIndexSuffix(b *testing.B) {
m := Suffix{"qwe"}
m := NewSuffix("qwe")
for i := 0; i < b.N; i++ {
m.Index(bench_pattern)
_, s := m.Index(bench_pattern)
releaseSegments(s)
}
}
func BenchmarkIndexSuffixParallel(b *testing.B) {
m := NewSuffix("qwe")
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_, s := m.Index(bench_pattern)
releaseSegments(s)
}
})
}

View File

@ -2,11 +2,14 @@ package match
import (
"fmt"
"unicode/utf8"
)
type Super struct{}
func NewSuper() Super {
return Super{}
}
func (self Super) Match(s string) bool {
return true
}
@ -16,11 +19,10 @@ func (self Super) Len() int {
}
func (self Super) Index(s string) (int, []int) {
segments := make([]int, 0, utf8.RuneCountInString(s)+1)
segments := acquireSegments(len(s) + 1)
for i := range s {
segments = append(segments, i)
}
segments = append(segments, len(s))
return 0, segments

View File

@ -22,7 +22,7 @@ func TestSuperIndex(t *testing.T) {
[]int{0},
},
} {
p := Super{}
p := NewSuper()
index, segments := p.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
@ -34,8 +34,21 @@ func TestSuperIndex(t *testing.T) {
}
func BenchmarkIndexSuper(b *testing.B) {
m := Super{}
m := NewSuper()
for i := 0; i < b.N; i++ {
m.Index(bench_pattern)
_, s := m.Index(bench_pattern)
releaseSegments(s)
}
}
func BenchmarkIndexSuperParallel(b *testing.B) {
m := NewSuper()
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_, s := m.Index(bench_pattern)
releaseSegments(s)
}
})
}

View File

@ -11,6 +11,7 @@ type Text struct {
Str string
RunesLength int
BytesLength int
Segments []int
}
func NewText(s string) Text {
@ -18,6 +19,7 @@ func NewText(s string) Text {
Str: s,
RunesLength: utf8.RuneCountInString(s),
BytesLength: len(s),
Segments: []int{len(s)},
}
}
@ -29,15 +31,13 @@ func (self Text) Len() int {
return self.RunesLength
}
func (self Text) Index(s string) (index int, segments []int) {
index = strings.Index(s, self.Str)
func (self Text) Index(s string) (int, []int) {
index := strings.Index(s, self.Str)
if index == -1 {
return
return -1, nil
}
segments = []int{self.BytesLength}
return
return index, self.Segments
}
func (self Text) String() string {

View File

@ -38,7 +38,20 @@ func TestTextIndex(t *testing.T) {
func BenchmarkIndexText(b *testing.B) {
m := NewText("foo")
for i := 0; i < b.N; i++ {
m.Index(bench_pattern)
_, s := m.Index(bench_pattern)
releaseSegments(s)
}
}
func BenchmarkIndexTextParallel(b *testing.B) {
m := NewText("foo")
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_, s := m.Index(bench_pattern)
releaseSegments(s)
}
})
}

View File

@ -26,13 +26,13 @@ func main() {
g.Match("api.github.com") // true
// create new glob with set of delimiters as ["."]
g = glob.MustCompile("api.*.com", ".")
g = glob.MustCompile("api.*.com", '.')
g.Match("api.github.com") // true
g.Match("api.gi.hub.com") // false
// create new glob with set of delimiters as ["."]
// but now with super wildcard
g = glob.MustCompile("api.**.com", ".")
g = glob.MustCompile("api.**.com", '.')
g.Match("api.github.com") // true
g.Match("api.gi.hub.com") // true
@ -42,8 +42,8 @@ func main() {
g.Match("fat") // true
g.Match("at") // false
// create glob with single symbol wildcard and delimiters ["f"]
g = glob.MustCompile("?at", "f")
// create glob with single symbol wildcard and delimiters ['f']
g = glob.MustCompile("?at", 'f')
g.Match("cat") // true
g.Match("fat") // false
g.Match("at") // false
@ -100,18 +100,18 @@ Run `go test -bench=.` from source root to see the benchmarks:
Pattern | Fixture | Match | Operations | Speed (ns/op)
--------|---------|-------|------------|--------------
`[a-z][!a-x]*cat*[h][!b]*eyes*` | `my cat has very bright eyes` | `true` | 2000000 | 527
`[a-z][!a-x]*cat*[h][!b]*eyes*` | `my dog has very bright eyes` | `false` | 10000000 | 229
`https://*.google.*` | `https://account.google.com` | `true` | 10000000 | 121
`https://*.google.*` | `https://google.com` | `false` | 20000000 | 68.6
`{https://*.google.*,*yandex.*,*yahoo.*,*mail.ru}` | `http://yahoo.com` | `true` | 10000000 | 167
`{https://*.google.*,*yandex.*,*yahoo.*,*mail.ru}` | `http://google.com` | `false` | 10000000 | 198
`{https://*gobwas.com,http://exclude.gobwas.com}` | `https://safe.gobwas.com` | `true` | 100000000 | 23.9
`{https://*gobwas.com,http://exclude.gobwas.com}` | `http://safe.gobwas.com` | `false` | 50000000 | 24.7
`abc*` | `abcdef` | `true` | 200000000 | 8.86
`abc*` | `af` | `false` | 300000000 | 4.99
`*def` | `abcdef` | `true` | 200000000 | 9.23
`*def` | `af` | `false` | 300000000 | 5.44
`[a-z][!a-x]*cat*[h][!b]*eyes*` | `my cat has very bright eyes` | `true` | 2000000 | 432
`[a-z][!a-x]*cat*[h][!b]*eyes*` | `my dog has very bright eyes` | `false` | 10000000 | 199
`https://*.google.*` | `https://account.google.com` | `true` | 10000000 | 96
`https://*.google.*` | `https://google.com` | `false` | 20000000 | 66
`{https://*.google.*,*yandex.*,*yahoo.*,*mail.ru}` | `http://yahoo.com` | `true` | 10000000 | 163
`{https://*.google.*,*yandex.*,*yahoo.*,*mail.ru}` | `http://google.com` | `false` | 10000000 | 197
`{https://*gobwas.com,http://exclude.gobwas.com}` | `https://safe.gobwas.com` | `true` | 100000000 | 22
`{https://*gobwas.com,http://exclude.gobwas.com}` | `http://safe.gobwas.com` | `false` | 50000000 | 24
`abc*` | `abcdef` | `true` | 200000000 | 8.15
`abc*` | `af` | `false` | 300000000 | 5.68
`*def` | `abcdef` | `true` | 200000000 | 8.84
`*def` | `af` | `false` | 300000000 | 5.74
`ab*ef` | `abcdef` | `true` | 100000000 | 15.2
`ab*ef` | `af` | `false` | 100000000 | 10.4

154
runes/runes.go Normal file
View File

@ -0,0 +1,154 @@
package runes
func Index(s, needle []rune) int {
ls, ln := len(s), len(needle)
switch {
case ln == 0:
return 0
case ln == 1:
return IndexRune(s, needle[0])
case ln == ls:
if Equal(s, needle) {
return 0
}
return -1
case ln > ls:
return -1
}
head:
for i := 0; i < ls && ls-i >= ln; i++ {
for y := 0; y < ln; y++ {
if s[i+y] != needle[y] {
continue head
}
}
return i
}
return -1
}
func LastIndex(s, needle []rune) int {
ls, ln := len(s), len(needle)
switch {
case ln == 0:
if ls == 0 {
return 0
}
return ls
case ln == 1:
return IndexLastRune(s, needle[0])
case ln == ls:
if Equal(s, needle) {
return 0
}
return -1
case ln > ls:
return -1
}
head:
for i := ls - 1; i >= 0 && i >= ln; i-- {
for y := ln - 1; y >= 0; y-- {
if s[i-(ln-y-1)] != needle[y] {
continue head
}
}
return i - ln + 1
}
return -1
}
// IndexAny returns the index of the first instance of any Unicode code point
// from chars in s, or -1 if no Unicode code point from chars is present in s.
func IndexAny(s, chars []rune) int {
if len(chars) > 0 {
for i, c := range s {
for _, m := range chars {
if c == m {
return i
}
}
}
}
return -1
}
func Contains(s, needle []rune) bool {
return Index(s, needle) >= 0
}
func Max(s []rune) (max rune) {
for _, r := range s {
if r > max {
max = r
}
}
return
}
func Min(s []rune) rune {
min := rune(-1)
for _, r := range s {
if min == -1 {
min = r
continue
}
if r < min {
min = r
}
}
return min
}
func IndexRune(s []rune, r rune) int {
for i, c := range s {
if c == r {
return i
}
}
return -1
}
func IndexLastRune(s []rune, r rune) int {
for i := len(s) - 1; i >= 0; i-- {
if s[i] == r {
return i
}
}
return -1
}
func Equal(a, b []rune) bool {
if len(a) == len(b) {
for i := 0; i < len(a); i++ {
if a[i] != b[i] {
return false
}
}
return true
}
return false
}
// HasPrefix tests whether the string s begins with prefix.
func HasPrefix(s, prefix []rune) bool {
return len(s) >= len(prefix) && Equal(s[0:len(prefix)], prefix)
}
// HasSuffix tests whether the string s ends with suffix.
func HasSuffix(s, suffix []rune) bool {
return len(s) >= len(suffix) && Equal(s[len(s)-len(suffix):], suffix)
}

222
runes/runes_test.go Normal file
View File

@ -0,0 +1,222 @@
package runes
import (
"strings"
"testing"
)
type indexTest struct {
s []rune
sep []rune
out int
}
type equalTest struct {
a []rune
b []rune
out bool
}
func newIndexTest(s, sep string, out int) indexTest {
return indexTest{[]rune(s), []rune(sep), out}
}
func newEqualTest(s, sep string, out bool) equalTest {
return equalTest{[]rune(s), []rune(sep), out}
}
var dots = "1....2....3....4"
var indexTests = []indexTest{
newIndexTest("", "", 0),
newIndexTest("", "a", -1),
newIndexTest("", "foo", -1),
newIndexTest("fo", "foo", -1),
newIndexTest("foo", "foo", 0),
newIndexTest("oofofoofooo", "f", 2),
newIndexTest("oofofoofooo", "foo", 4),
newIndexTest("barfoobarfoo", "foo", 3),
newIndexTest("foo", "", 0),
newIndexTest("foo", "o", 1),
newIndexTest("abcABCabc", "A", 3),
// cases with one byte strings - test special case in Index()
newIndexTest("", "a", -1),
newIndexTest("x", "a", -1),
newIndexTest("x", "x", 0),
newIndexTest("abc", "a", 0),
newIndexTest("abc", "b", 1),
newIndexTest("abc", "c", 2),
newIndexTest("abc", "x", -1),
}
var lastIndexTests = []indexTest{
newIndexTest("", "", 0),
newIndexTest("", "a", -1),
newIndexTest("", "foo", -1),
newIndexTest("fo", "foo", -1),
newIndexTest("foo", "foo", 0),
newIndexTest("foo", "f", 0),
newIndexTest("oofofoofooo", "f", 7),
newIndexTest("oofofoofooo", "foo", 7),
newIndexTest("barfoobarfoo", "foo", 9),
newIndexTest("foo", "", 3),
newIndexTest("foo", "o", 2),
newIndexTest("abcABCabc", "A", 3),
newIndexTest("abcABCabc", "a", 6),
}
var indexAnyTests = []indexTest{
newIndexTest("", "", -1),
newIndexTest("", "a", -1),
newIndexTest("", "abc", -1),
newIndexTest("a", "", -1),
newIndexTest("a", "a", 0),
newIndexTest("aaa", "a", 0),
newIndexTest("abc", "xyz", -1),
newIndexTest("abc", "xcz", 2),
newIndexTest("a☺b☻c☹d", "uvw☻xyz", 3),
newIndexTest("aRegExp*", ".(|)*+?^$[]", 7),
newIndexTest(dots+dots+dots, " ", -1),
}
// Execute f on each test case. funcName should be the name of f; it's used
// in failure reports.
func runIndexTests(t *testing.T, f func(s, sep []rune) int, funcName string, testCases []indexTest) {
for _, test := range testCases {
actual := f(test.s, test.sep)
if actual != test.out {
t.Errorf("%s(%q,%q) = %v; want %v", funcName, test.s, test.sep, actual, test.out)
}
}
}
func TestIndex(t *testing.T) { runIndexTests(t, Index, "Index", indexTests) }
func TestLastIndex(t *testing.T) { runIndexTests(t, LastIndex, "LastIndex", lastIndexTests) }
func TestIndexAny(t *testing.T) { runIndexTests(t, IndexAny, "IndexAny", indexAnyTests) }
var equalTests = []equalTest{
newEqualTest("a", "a", true),
newEqualTest("a", "b", false),
newEqualTest("a☺b☻c☹d", "uvw☻xyz", false),
newEqualTest("a☺b☻c☹d", "a☺b☻c☹d", true),
}
func TestEqual(t *testing.T) {
for _, test := range equalTests {
actual := Equal(test.a, test.b)
if actual != test.out {
t.Errorf("Equal(%q,%q) = %v; want %v", test.a, test.b, actual, test.out)
}
}
}
func BenchmarkLastIndexRunes(b *testing.B) {
r := []rune("abcdef")
n := []rune("cd")
for i := 0; i < b.N; i++ {
LastIndex(r, n)
}
}
func BenchmarkLastIndexStrings(b *testing.B) {
r := "abcdef"
n := "cd"
for i := 0; i < b.N; i++ {
strings.LastIndex(r, n)
}
}
func BenchmarkIndexAnyRunes(b *testing.B) {
s := []rune("...b...")
c := []rune("abc")
for i := 0; i < b.N; i++ {
IndexAny(s, c)
}
}
func BenchmarkIndexAnyStrings(b *testing.B) {
s := "...b..."
c := "abc"
for i := 0; i < b.N; i++ {
strings.IndexAny(s, c)
}
}
func BenchmarkIndexRuneRunes(b *testing.B) {
s := []rune("...b...")
r := 'b'
for i := 0; i < b.N; i++ {
IndexRune(s, r)
}
}
func BenchmarkIndexRuneStrings(b *testing.B) {
s := "...b..."
r := 'b'
for i := 0; i < b.N; i++ {
strings.IndexRune(s, r)
}
}
func BenchmarkIndexRunes(b *testing.B) {
r := []rune("abcdef")
n := []rune("cd")
for i := 0; i < b.N; i++ {
Index(r, n)
}
}
func BenchmarkIndexStrings(b *testing.B) {
r := "abcdef"
n := "cd"
for i := 0; i < b.N; i++ {
strings.Index(r, n)
}
}
func BenchmarkEqualRunes(b *testing.B) {
x := []rune("abc")
y := []rune("abc")
for i := 0; i < b.N; i++ {
if Equal(x, y) {
continue
}
}
}
func BenchmarkEqualStrings(b *testing.B) {
x := "abc"
y := "abc"
for i := 0; i < b.N; i++ {
if x == y {
continue
}
}
}
func BenchmarkNotEqualRunes(b *testing.B) {
x := []rune("abc")
y := []rune("abcd")
for i := 0; i < b.N; i++ {
if Equal(x, y) {
continue
}
}
}
func BenchmarkNotEqualStrings(b *testing.B) {
x := "abc"
y := "abcd"
for i := 0; i < b.N; i++ {
if x == y {
continue
}
}
}

13
strings/strings.go Normal file
View File

@ -0,0 +1,13 @@
package strings
import "strings"
func IndexAnyRunes(s string, rs []rune) int {
for _, r := range rs {
if i := strings.IndexRune(s, r); i != -1 {
return i
}
}
return -1
}

26
todo.txt Normal file
View File

@ -0,0 +1,26 @@
benchmark old ns/op new ns/op delta
BenchmarkAllGlobMatch-4 519 1024 +97.30%
BenchmarkMultipleGlobMatch-4 123 218 +77.24%
BenchmarkAlternativesGlobMatch-4 164 283 +72.56%
BenchmarkAlternativesSuffixFirstGlobMatch-4 23.6 23.5 -0.42%
BenchmarkAlternativesSuffixSecondGlobMatch-4 29.7 30.1 +1.35%
BenchmarkAlternativesCombineLiteGlobMatch-4 161 352 +118.63%
BenchmarkAlternativesCombineHardGlobMatch-4 321 649 +102.18%
BenchmarkPlainGlobMatch-4 7.17 7.09 -1.12%
BenchmarkPrefixGlobMatch-4 8.74 8.64 -1.14%
BenchmarkSuffixGlobMatch-4 10.3 9.06 -12.04%
BenchmarkPrefixSuffixGlobMatch-4 31.0 15.1 -51.29%
BenchmarkIndexAny-4 1414 232 -83.59%
BenchmarkIndexContains-4 557 250 -55.12%
BenchmarkIndexList-4 207 42.6 -79.42%
BenchmarkIndexMax-4 630 111 -82.38%
BenchmarkIndexMin-4 515 328 -36.31%
BenchmarkIndexPrefixSuffix-4 97.9 86.2 -11.95%
BenchmarkIndexPrefix-4 86.1 84.0 -2.44%
BenchmarkIndexRange-4 181 144 -20.44%
BenchmarkRowIndex-4 185 127 -31.35%
BenchmarkIndexSingle-4 82.6 16.0 -80.63%
BenchmarkIndexSuffix-4 85.5 84.9 -0.70%
BenchmarkIndexSuper-4 450 196 -56.44%
BenchmarkIndexText-4 85.3 85.9 +0.70%