Compare commits

...

14 Commits

Author SHA1 Message Date
gobwas 88b0fce4f5 test 2016-02-22 23:34:16 +03:00
gobwas ea3a6f712d t 2016-02-22 23:18:25 +03:00
gobwas 6cb2c11fd7 debug 2016-02-22 23:12:29 +03:00
gobwas 291b8084ed debug 2016-02-22 23:12:17 +03:00
gobwas be21acf93c fix 2016-02-22 22:46:45 +03:00
gobwas 205c640a14 ttt 2016-02-22 22:32:30 +03:00
gobwas 35629e8ad6 ttt 2016-02-22 22:24:05 +03:00
gobwas bff71ed368 test parallel 2016-02-22 22:17:07 +03:00
gobwas 54004631f5 t 2016-02-22 20:32:28 +03:00
gobwas 9b07f114a0 use sequenced pool 2016-02-22 20:22:04 +03:00
gobwas 325689ef4a prog 2016-02-22 20:11:06 +03:00
gobwas 99cf82455b test for nil slice 2016-02-22 19:43:52 +03:00
gobwas 9c0c7cba85 reuse 2016-02-22 18:47:01 +03:00
gobwas 0c30789d3a tune 2016-02-22 17:18:13 +03:00
27 changed files with 436 additions and 102 deletions

View File

@ -1,64 +1,16 @@
package main
import (
"bytes"
"flag"
"fmt"
"github.com/gobwas/glob"
"github.com/gobwas/glob/match"
"math/rand"
"github.com/gobwas/glob/match/debug"
"os"
"strings"
"unicode/utf8"
)
func draw(pattern string, m match.Matcher) string {
return fmt.Sprintf(`digraph G {graph[label="%s"];%s}`, pattern, graphviz(m, fmt.Sprintf("%x", rand.Int63())))
}
func graphviz(m match.Matcher, id string) string {
buf := &bytes.Buffer{}
switch matcher := m.(type) {
case match.BTree:
fmt.Fprintf(buf, `"%s"[label="%s"];`, id, matcher.Value.String())
for _, m := range []match.Matcher{matcher.Left, matcher.Right} {
switch n := m.(type) {
case nil:
rnd := rand.Int63()
fmt.Fprintf(buf, `"%x"[label="<nil>"];`, rnd)
fmt.Fprintf(buf, `"%s"->"%x";`, id, rnd)
default:
sub := fmt.Sprintf("%x", rand.Int63())
fmt.Fprintf(buf, `"%s"->"%s";`, id, sub)
fmt.Fprintf(buf, graphviz(n, sub))
}
}
case match.AnyOf:
fmt.Fprintf(buf, `"%s"[label="AnyOf"];`, id)
for _, m := range matcher.Matchers {
rnd := rand.Int63()
fmt.Fprintf(buf, graphviz(m, fmt.Sprintf("%x", rnd)))
fmt.Fprintf(buf, `"%s"->"%x";`, id, rnd)
}
case match.EveryOf:
fmt.Fprintf(buf, `"%s"[label="EveryOf"];`, id)
for _, m := range matcher.Matchers {
rnd := rand.Int63()
fmt.Fprintf(buf, graphviz(m, fmt.Sprintf("%x", rnd)))
fmt.Fprintf(buf, `"%s"->"%x";`, id, rnd)
}
default:
fmt.Fprintf(buf, `"%s"[label="%s"];`, id, m.String())
}
return buf.String()
}
func main() {
pattern := flag.String("p", "", "pattern to draw")
sep := flag.String("s", "", "comma separated list of separators characters")
@ -70,12 +22,14 @@ func main() {
}
var separators []rune
for _, c := range strings.Split(*sep, ",") {
if r, w := utf8.DecodeRuneInString(c); len(c) > w {
fmt.Println("only single charactered separators are allowed")
os.Exit(1)
} else {
separators = append(separators, r)
if len(*sep) > 0 {
for _, c := range strings.Split(*sep, ",") {
if r, w := utf8.DecodeRuneInString(c); len(c) > w {
fmt.Println("only single charactered separators are allowed")
os.Exit(1)
} else {
separators = append(separators, r)
}
}
}
@ -86,5 +40,5 @@ func main() {
}
matcher := glob.(match.Matcher)
fmt.Fprint(os.Stdout, draw(*pattern, matcher))
fmt.Fprint(os.Stdout, debug.Graphviz(*pattern, matcher))
}

View File

@ -18,7 +18,6 @@ func (self Any) Index(s string, segments []int) (int, []int) {
switch found {
case -1:
case 0:
segments = append(segments)
return 0, segments
default:
s = s[:found]
@ -27,7 +26,6 @@ func (self Any) Index(s string, segments []int) (int, []int) {
for i := range s {
segments = append(segments, i)
}
segments = append(segments, len(s))
return 0, segments
@ -38,5 +36,5 @@ func (self Any) Len() int {
}
func (self Any) String() string {
return fmt.Sprintf("<any:![%s]>", self.Separators)
return fmt.Sprintf("<any:![%s]>", string(self.Separators))
}

View File

@ -27,10 +27,14 @@ func (self AnyOf) Index(s string, segments []int) (int, []int) {
index := -1
// create reusable segments
in := make([]int, 0, len(s))
// seg := acquireSegments(len(s))
// defer func() {
// releaseSegments(seg)
// }()
var seg []int
for _, m := range self.Matchers {
idx, seg := m.Index(s, in[:0])
idx, seg := m.Index(s, seg[:0])
if idx == -1 {
continue
}

View File

@ -38,10 +38,10 @@ func TestAnyIndex(t *testing.T) {
func BenchmarkIndexAny(b *testing.B) {
m := Any{bench_separators}
in := make([]int, 0, len(bench_pattern))
for i := 0; i < b.N; i++ {
m.Index(bench_pattern, in[:0])
_, in = m.Index(bench_pattern, in[:0])
}
}
@ -51,7 +51,7 @@ func BenchmarkIndexAnyParallel(b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
m.Index(bench_pattern, in[:0])
_, in = m.Index(bench_pattern, in[:0])
}
})
}

View File

@ -77,13 +77,21 @@ func (self BTree) Match(s string) bool {
limit = inputLen
}
if offset >= limit {
return false
}
// reusable segments list
// inputLen is the maximum size of output segments values
in := make([]int, 0, inputLen)
segments := acquireSegments(inputLen)
defer func() {
releaseSegments(segments)
}()
// segments := make([]int, 0, 4)
for offset < limit {
// search for matching part in substring
index, segments := self.Value.Index(s[offset:limit], in[:0])
index, segments := self.Value.Index(s[offset:limit], segments[:0])
if index == -1 {
return false
}

View File

@ -46,3 +46,44 @@ func TestBTree(t *testing.T) {
}
}
}
type fakeMatcher struct {
len int
name string
}
func (f *fakeMatcher) Match(string) bool {
return true
}
var i = 3
func (f *fakeMatcher) Index(s string, seg []int) (int, []int) {
for x := 0; x < i; x++ {
seg = append(seg, x)
}
return 0, seg
}
func (f *fakeMatcher) Len() int {
return f.len
}
func (f *fakeMatcher) String() string {
return f.name
}
func BenchmarkMatchBTree(b *testing.B) {
l := &fakeMatcher{4, "left_fake"}
r := &fakeMatcher{4, "right_fake"}
v := &fakeMatcher{2, "value_fake"}
// must be <= len(l + r + v)
fixture := "abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij"
bt := NewBTree(v, l, r)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
bt.Match(fixture)
}
})
}

View File

@ -58,7 +58,7 @@ func BenchmarkIndexContains(b *testing.B) {
in := make([]int, 0, len(bench_pattern))
for i := 0; i < b.N; i++ {
m.Index(bench_pattern, in[:0])
_, in = m.Index(bench_pattern, in[:0])
}
}
@ -68,7 +68,7 @@ func BenchmarkIndexContainsParallel(b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
m.Index(bench_pattern, in[:0])
_, in = m.Index(bench_pattern, in[:0])
}
})
}

55
match/debug/graphviz.go Normal file
View File

@ -0,0 +1,55 @@
package debug
import (
"bytes"
"fmt"
"github.com/gobwas/glob/match"
"math/rand"
)
func Graphviz(pattern string, m match.Matcher) string {
return fmt.Sprintf(`digraph G {graph[label="%s"];%s}`, pattern, matcherToGraphvizNode(m, fmt.Sprintf("%x", rand.Int63())))
}
func matcherToGraphvizNode(m match.Matcher, id string) string {
buf := &bytes.Buffer{}
switch matcher := m.(type) {
case match.BTree:
fmt.Fprintf(buf, `"%s"[label="%s"];`, id, matcher.Value.String())
for _, m := range []match.Matcher{matcher.Left, matcher.Right} {
switch n := m.(type) {
case nil:
rnd := rand.Int63()
fmt.Fprintf(buf, `"%x"[label="<nil>"];`, rnd)
fmt.Fprintf(buf, `"%s"->"%x";`, id, rnd)
default:
sub := fmt.Sprintf("%x", rand.Int63())
fmt.Fprintf(buf, `"%s"->"%s";`, id, sub)
fmt.Fprintf(buf, matcherToGraphvizNode(n, sub))
}
}
case match.AnyOf:
fmt.Fprintf(buf, `"%s"[label="AnyOf"];`, id)
for _, m := range matcher.Matchers {
rnd := rand.Int63()
fmt.Fprintf(buf, matcherToGraphvizNode(m, fmt.Sprintf("%x", rnd)))
fmt.Fprintf(buf, `"%s"->"%x";`, id, rnd)
}
case match.EveryOf:
fmt.Fprintf(buf, `"%s"[label="EveryOf"];`, id)
for _, m := range matcher.Matchers {
rnd := rand.Int63()
fmt.Fprintf(buf, matcherToGraphvizNode(m, fmt.Sprintf("%x", rnd)))
fmt.Fprintf(buf, `"%s"->"%x";`, id, rnd)
}
default:
fmt.Fprintf(buf, `"%s"[label="%s"];`, id, m.String())
}
return buf.String()
}

View File

@ -31,13 +31,24 @@ func (self EveryOf) Index(s string, out []int) (int, []int) {
// make `in` with cap as len(s),
// cause it is the maximum size of output segments values
in := make([]int, 0, len(s))
next := make([]int, 0, len(s))
current := make([]int, 0, len(s))
// seg := acquireSegments(len(s))
// next := acquireSegments(len(s))
// current := acquireSegments(len(s))
// defer func() {
// releaseSegments(seg)
// releaseSegments(next)
// releaseSegments(current)
// }()
var (
seg []int
next []int
current []int
)
sub := s
for i, m := range self.Matchers {
idx, seg := m.Index(sub, in[:0])
var idx int
idx, seg = m.Index(sub, seg[:0])
if idx == -1 {
return -1, nil
}

View File

@ -41,5 +41,10 @@ func (self List) String() string {
not = "!"
}
return fmt.Sprintf("<list:%s[%s]>", not, self.List)
// var list []string
// for _, r := range self.List {
// list = append(list, string(r))
// }
return fmt.Sprintf("<list:%s[%s]>", not, string(self.List))
}

View File

@ -44,7 +44,7 @@ func BenchmarkIndexList(b *testing.B) {
in := make([]int, 0, len(bench_pattern))
for i := 0; i < b.N; i++ {
m.Index(bench_pattern, in[:0])
_, in = m.Index(bench_pattern, in[:0])
}
}
@ -54,7 +54,7 @@ func BenchmarkIndexListParallel(b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
m.Index(bench_pattern, in[:0])
_, in = m.Index(bench_pattern, in[:0])
}
})
}

View File

@ -41,7 +41,7 @@ func BenchmarkIndexMax(b *testing.B) {
in := make([]int, 0, len(bench_pattern))
for i := 0; i < b.N; i++ {
m.Index(bench_pattern, in[:0])
_, in = m.Index(bench_pattern, in[:0])
}
}
@ -51,7 +51,7 @@ func BenchmarkIndexMaxParallel(b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
m.Index(bench_pattern, in[:0])
_, in = m.Index(bench_pattern, in[:0])
}
})
}

View File

@ -41,7 +41,7 @@ func BenchmarkIndexMin(b *testing.B) {
in := make([]int, 0, len(bench_pattern))
for i := 0; i < b.N; i++ {
m.Index(bench_pattern, in[:0])
_, in = m.Index(bench_pattern, in[:0])
}
}
@ -51,7 +51,7 @@ func BenchmarkIndexMinParallel(b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
m.Index(bench_pattern, in[:0])
_, in = m.Index(bench_pattern, in[:0])
}
})
}

View File

@ -38,7 +38,7 @@ func BenchmarkIndexNothing(b *testing.B) {
in := make([]int, 0, len(bench_pattern))
for i := 0; i < b.N; i++ {
m.Index(bench_pattern, in[:0])
_, in = m.Index(bench_pattern, in[:0])
}
}
@ -48,7 +48,7 @@ func BenchmarkIndexNothingParallel(b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
m.Index(bench_pattern, in[:0])
_, in = m.Index(bench_pattern, in[:0])
}
})
}

View File

@ -51,7 +51,7 @@ func BenchmarkIndexPrefixSuffix(b *testing.B) {
in := make([]int, 0, len(bench_pattern))
for i := 0; i < b.N; i++ {
m.Index(bench_pattern, in[:0])
_, in = m.Index(bench_pattern, in[:0])
}
}
@ -61,7 +61,7 @@ func BenchmarkIndexPrefixSuffixParallel(b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
m.Index(bench_pattern, in[:0])
_, in = m.Index(bench_pattern, in[:0])
}
})
}

View File

@ -41,7 +41,7 @@ func BenchmarkIndexPrefix(b *testing.B) {
in := make([]int, 0, len(bench_pattern))
for i := 0; i < b.N; i++ {
m.Index(bench_pattern, in[:0])
_, in = m.Index(bench_pattern, in[:0])
}
}
@ -51,7 +51,7 @@ func BenchmarkIndexPrefixParallel(b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
m.Index(bench_pattern, in[:0])
_, in = m.Index(bench_pattern, in[:0])
}
})
}

View File

@ -35,7 +35,7 @@ func (self Range) Index(s string, segments []int) (int, []int) {
}
}
return -1, nil
return -1, segments
}
func (self Range) String() string {
@ -43,5 +43,5 @@ func (self Range) String() string {
if self.Not {
not = "!"
}
return fmt.Sprintf("<range:%s[%s,%s]>", not, string(self.Lo), string(self.Hi))
return fmt.Sprintf("<range:%s[%s-%s]>", not, string(self.Lo), string(self.Hi))
}

View File

@ -51,7 +51,7 @@ func BenchmarkIndexRange(b *testing.B) {
in := make([]int, 0, len(bench_pattern))
for i := 0; i < b.N; i++ {
m.Index(bench_pattern, in[:0])
_, in = m.Index(bench_pattern, in[:0])
}
}
@ -61,7 +61,7 @@ func BenchmarkIndexRangeParallel(b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
m.Index(bench_pattern, in[:0])
_, in = m.Index(bench_pattern, in[:0])
}
})
}

View File

@ -54,7 +54,7 @@ func (self Row) Len() (l int) {
func (self Row) Index(s string, segments []int) (int, []int) {
if !self.lenOk(s) {
return -1, nil
return -1, segments
}
for i := range s {
@ -70,7 +70,7 @@ func (self Row) Index(s string, segments []int) (int, []int) {
}
}
return -1, nil
return -1, segments
}
func (self Row) String() string {

View File

@ -62,7 +62,7 @@ func BenchmarkRowIndex(b *testing.B) {
in := make([]int, 0, len(bench_pattern))
for i := 0; i < b.N; i++ {
m.Index(bench_pattern, in[:0])
_, in = m.Index(bench_pattern, in[:0])
}
}
@ -79,7 +79,7 @@ func BenchmarkIndexRowParallel(b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
m.Index(bench_pattern, in[:0])
_, in = m.Index(bench_pattern, in[:0])
}
})
}

65
match/segements_test.go Normal file
View File

@ -0,0 +1,65 @@
package match
import (
"testing"
)
func BenchmarkPerfPoolSequenced(b *testing.B) {
pool := NewPoolSequenced(512, func() []int {
return make([]int, 0, 16)
})
b.SetParallelism(32)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
s := pool.Get()
pool.Put(s)
}
})
}
func BenchmarkPerfPoolSynced(b *testing.B) {
pool := NewPoolSynced(32)
b.SetParallelism(32)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
s := pool.Get()
pool.Put(s)
}
})
}
func BenchmarkPerfPoolNative(b *testing.B) {
pool := NewPoolNative(func() []int {
return make([]int, 0, 16)
})
b.SetParallelism(32)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
s := pool.Get()
pool.Put(s)
}
})
}
func BenchmarkPerfPoolStatic(b *testing.B) {
pool := NewPoolStatic(32, func() []int {
return make([]int, 0, 16)
})
b.SetParallelism(32)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
i, v := pool.Get()
pool.Put(i, v)
}
})
}
func BenchmarkPerfMake(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = make([]int, 0, 32)
}
}

193
match/segments.go Normal file
View File

@ -0,0 +1,193 @@
package match
import (
"sync"
"sync/atomic"
)
var segmentsPools [1024]*PoolNative
func toPowerOfTwo(v int) int {
v--
v |= v >> 1
v |= v >> 2
v |= v >> 4
v |= v >> 8
v |= v >> 16
v++
return v
}
const (
minSegment = 4
minSegmentMinusOne = 3
maxSegment = 1024
maxSegmentMinusOne = 1023
)
func init() {
for i := maxSegment; i >= minSegment; i >>= 1 {
func(i int) {
segmentsPools[i-1] = NewPoolNative(func() []int {
return make([]int, 0, i)
})
}(i)
}
}
func getIdx(c int) int {
p := toPowerOfTwo(c)
switch {
case p >= maxSegment:
return maxSegmentMinusOne
case p <= minSegment:
return minSegmentMinusOne
default:
return p - 1
}
}
func acquireSegments(c int) []int {
return segmentsPools[getIdx(c)].Get()
}
func releaseSegments(s []int) {
segmentsPools[getIdx(cap(s))].Put(s)
}
type newSegmentsFunc func() []int
// Pool holds Clients.
type PoolSequenced struct {
new newSegmentsFunc
pool chan []int
}
// NewPool creates a new pool of Clients.
func NewPoolSequenced(size int, f newSegmentsFunc) *PoolSequenced {
return &PoolSequenced{
new: f,
pool: make(chan []int, size),
}
}
// Borrow a Client from the pool.
func (p *PoolSequenced) Get() []int {
var s []int
select {
case s = <-p.pool:
default:
s = p.new()
}
return s[:0]
}
// Return returns a Client to the pool.
func (p *PoolSequenced) Put(s []int) {
select {
case p.pool <- s:
default:
// let it go, let it go...
}
}
type PoolSynced struct {
size int
mu sync.Mutex
list [][]int
}
func NewPoolSynced(size int) *PoolSynced {
return &PoolSynced{
size: size,
}
}
func (p *PoolSynced) Get() []int {
var s []int
p.mu.Lock()
ll := len(p.list)
if ll > 0 {
s, p.list = p.list[ll-1], p.list[:ll-1]
}
p.mu.Unlock()
if s == nil {
return make([]int, 0, p.size)
}
return s[:0]
}
func (p *PoolSynced) Put(s []int) {
p.mu.Lock()
defer p.mu.Unlock()
p.list = append(p.list, s)
}
type PoolNative struct {
pool *sync.Pool
}
func NewPoolNative(f newSegmentsFunc) *PoolNative {
return &PoolNative{
pool: &sync.Pool{New: func() interface{} {
return f()
}},
}
}
func (p *PoolNative) Get() []int {
return p.pool.Get().([]int)[:0]
}
func (p *PoolNative) Put(s []int) {
p.pool.Put(s)
}
type segments struct {
data []int
locked int32
}
type PoolStatic struct {
f newSegmentsFunc
pool []*segments
}
func NewPoolStatic(size int, f newSegmentsFunc) *PoolStatic {
p := &PoolStatic{
f: f,
pool: make([]*segments, 0, size),
}
for i := 0; i < size; i++ {
p.pool = append(p.pool, &segments{
data: f(),
})
}
return p
}
func (p *PoolStatic) Get() (int, []int) {
for i, s := range p.pool {
if atomic.CompareAndSwapInt32(&s.locked, 0, 1) {
return i, s.data
}
}
return -1, p.f()
}
func (p *PoolStatic) Put(i int, s []int) {
if i < 0 {
return
}
p.pool[i].data = s
atomic.CompareAndSwapInt32(&(p.pool[i].locked), 1, 0)
}

View File

@ -41,7 +41,7 @@ func BenchmarkIndexSingle(b *testing.B) {
in := make([]int, 0, len(bench_pattern))
for i := 0; i < b.N; i++ {
m.Index(bench_pattern, in[:0])
_, in = m.Index(bench_pattern, in[:0])
}
}
@ -51,7 +51,7 @@ func BenchmarkIndexSingleParallel(b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
m.Index(bench_pattern, in[:0])
_, in = m.Index(bench_pattern, in[:0])
}
})
}

View File

@ -41,7 +41,7 @@ func BenchmarkIndexSuffix(b *testing.B) {
in := make([]int, 0, len(bench_pattern))
for i := 0; i < b.N; i++ {
m.Index(bench_pattern, in[:0])
_, in = m.Index(bench_pattern, in[:0])
}
}
@ -51,7 +51,7 @@ func BenchmarkIndexSuffixParallel(b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
m.Index(bench_pattern, in[:0])
_, in = m.Index(bench_pattern, in[:0])
}
})
}

View File

@ -38,7 +38,7 @@ func BenchmarkIndexSuper(b *testing.B) {
in := make([]int, 0, len(bench_pattern))
for i := 0; i < b.N; i++ {
m.Index(bench_pattern, in[:0])
_, in = m.Index(bench_pattern, in[:0])
}
}
@ -48,7 +48,7 @@ func BenchmarkIndexSuperParallel(b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
m.Index(bench_pattern, in[:0])
_, in = m.Index(bench_pattern, in[:0])
}
})
}

View File

@ -32,7 +32,7 @@ func (self Text) Len() int {
func (self Text) Index(s string, segments []int) (int, []int) {
index := strings.Index(s, self.Str)
if index == -1 {
return -1, nil
return -1, segments
}
return index, append(segments, self.BytesLength)

View File

@ -41,7 +41,7 @@ func BenchmarkIndexText(b *testing.B) {
in := make([]int, 0, len(bench_pattern))
for i := 0; i < b.N; i++ {
m.Index(bench_pattern, in[:0])
_, in = m.Index(bench_pattern, in[:0])
}
}
@ -51,7 +51,7 @@ func BenchmarkIndexTextParallel(b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
m.Index(bench_pattern, in[:0])
_, in = m.Index(bench_pattern, in[:0])
}
})
}