mirror of https://github.com/tidwall/tile38.git
Fix excessive memory usage for objects with TTLs
This commit fixes an issue where Tile38 was using lots of extra memory to track objects that are marked to expire. This was creating problems with applications that set big TTLs. How it worked before: Every collection had a unique hashmap that stores expiration timestamps for every object in that collection. Along with the hashmaps, there's also one big server-wide list that gets appended every time a new SET+EX is performed. From a background routine, this list is looped over at least 10 times per second and is randomly searched for potential candidates that might need expiring. The routine then removes those entries from the list and tests if the objects matching the entries have actually expired. If so, these objects are deleted them from the database. When at least 25% of the 20 candidates are deleted the loop is immediately continued, otherwise the loop backs off with a 100ms pause. Why this was a problem. The list grows one entry for every SET+EX. When TTLs are long, like 24-hours or more, it would take at least that much time before the entry is removed. So for databased that have objects that use TTLs and are updated often this could lead to a very large list. How it was fixed. The list was removed and the hashmap is now search randomly. This required a new hashmap implementation, as the built-in Go map does not provide an operation for randomly geting entries. The chosen implementation is a robinhood-hash because it provides open-addressing, which makes for simple random bucket selections. Issue #502
This commit is contained in:
parent
df477bf3f4
commit
23b016d192
|
@ -48,6 +48,14 @@
|
|||
revision = "8b705a6dec722bcda3a9309c0924d4eca24f7c72"
|
||||
version = "v1.17.14"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:545ae40d6dde46043a71bdfd7f9a17f2353ce16277c83ac685af231b4b7c4beb"
|
||||
name = "github.com/cespare/xxhash"
|
||||
packages = ["."]
|
||||
pruneopts = ""
|
||||
revision = "de209a9ffae3256185a6bb135d1a0ada7b2b5f09"
|
||||
version = "v2.1.0"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:56c130d885a4aacae1dd9c7b71cfe39912c7ebc1ff7d2b46083c8812996dc43b"
|
||||
name = "github.com/davecgh/go-spew"
|
||||
|
@ -307,6 +315,14 @@
|
|||
pruneopts = ""
|
||||
revision = "b2b1a7ca20e34ad839fdb81f78e67522c99959f0"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:2351ccd20a2fc2ba55096db53b98f6dc4451d2f68b72ab744dd1550adf98e85f"
|
||||
name = "github.com/tidwall/rhh"
|
||||
packages = ["."]
|
||||
pruneopts = ""
|
||||
revision = "86b588640216d02f9c15d34f1f3d546f082dd65e"
|
||||
version = "v1.1.0"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
digest = "1:5d9d865e55b95f001e52a7f5d1f812e8a80f0f05d5b04ede006f24206ebba33c"
|
||||
|
@ -485,6 +501,7 @@
|
|||
"github.com/tidwall/redbench",
|
||||
"github.com/tidwall/redcon",
|
||||
"github.com/tidwall/resp",
|
||||
"github.com/tidwall/rhh",
|
||||
"github.com/tidwall/sjson",
|
||||
"github.com/tidwall/tinybtree",
|
||||
"github.com/yuin/gopher-lua",
|
||||
|
|
|
@ -29,6 +29,10 @@ required = [
|
|||
branch = "master"
|
||||
name = "github.com/tidwall/tinybtree"
|
||||
|
||||
[[constraint]]
|
||||
name = "github.com/tidwall/rhh"
|
||||
version = "1.1.0"
|
||||
|
||||
[[constraint]]
|
||||
name = "github.com/tidwall/geojson"
|
||||
version = "1.1.7"
|
||||
|
|
|
@ -9,6 +9,7 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/tidwall/geojson"
|
||||
"github.com/tidwall/rhh"
|
||||
"github.com/tidwall/tile38/core"
|
||||
"github.com/tidwall/tile38/internal/collection"
|
||||
"github.com/tidwall/tile38/internal/log"
|
||||
|
@ -92,10 +93,13 @@ func (server *Server) aofshrink() {
|
|||
if col == nil {
|
||||
return
|
||||
}
|
||||
var fnames = col.FieldArr() // reload an array of field names to match each object
|
||||
var exm = server.expires[keys[0]] // the expiration map
|
||||
var now = time.Now() // used for expiration
|
||||
var count = 0 // the object count
|
||||
var fnames = col.FieldArr() // reload an array of field names to match each object
|
||||
var exm *rhh.Map // the expiration map
|
||||
if value, ok := server.expires.Get(keys[0]); ok {
|
||||
exm = value.(*rhh.Map)
|
||||
}
|
||||
var now = time.Now() // used for expiration
|
||||
var count = 0 // the object count
|
||||
col.ScanGreaterOrEqual(nextid, false, nil, nil,
|
||||
func(id string, obj geojson.Object, fields []float64) bool {
|
||||
if count == maxids {
|
||||
|
@ -118,9 +122,8 @@ func (server *Server) aofshrink() {
|
|||
}
|
||||
}
|
||||
if exm != nil {
|
||||
at, ok := exm[id]
|
||||
if ok {
|
||||
expires := at.Sub(now)
|
||||
if at, ok := exm.Get(id); ok {
|
||||
expires := at.(time.Time).Sub(now)
|
||||
if expires > 0 {
|
||||
values = append(values, "ex")
|
||||
values = append(values, strconv.FormatFloat(math.Floor(float64(expires)/float64(time.Second)*10)/10, 'f', -1, 64))
|
||||
|
|
|
@ -12,6 +12,7 @@ import (
|
|||
"github.com/tidwall/geojson/geometry"
|
||||
"github.com/tidwall/rbang"
|
||||
"github.com/tidwall/resp"
|
||||
"github.com/tidwall/rhh"
|
||||
"github.com/tidwall/tile38/internal/collection"
|
||||
"github.com/tidwall/tile38/internal/glob"
|
||||
"github.com/tidwall/tinybtree"
|
||||
|
@ -521,10 +522,7 @@ func (server *Server) cmdFlushDB(msg *Message) (res resp.Value, d commandDetails
|
|||
return
|
||||
}
|
||||
server.cols = tinybtree.BTree{}
|
||||
server.exlistmu.Lock()
|
||||
server.exlist = nil
|
||||
server.exlistmu.Unlock()
|
||||
server.expires = make(map[string]map[string]time.Time)
|
||||
server.expires = rhh.New(0)
|
||||
server.hooks = make(map[string]*Hook)
|
||||
server.hooksOut = make(map[string]*Hook)
|
||||
server.hookTree = rbang.RTree{}
|
||||
|
|
|
@ -1,158 +1,123 @@
|
|||
package server
|
||||
|
||||
import (
|
||||
"log"
|
||||
"math/rand"
|
||||
"time"
|
||||
|
||||
"github.com/tidwall/btree"
|
||||
"github.com/tidwall/rhh"
|
||||
"github.com/tidwall/tile38/internal/log"
|
||||
)
|
||||
|
||||
type exitem struct {
|
||||
key, id string
|
||||
at time.Time
|
||||
}
|
||||
|
||||
func (a *exitem) Less(v btree.Item, ctx interface{}) bool {
|
||||
b := v.(*exitem)
|
||||
if a.at.Before(b.at) {
|
||||
return true
|
||||
}
|
||||
if a.at.After(b.at) {
|
||||
return false
|
||||
}
|
||||
if a.key < b.key {
|
||||
return true
|
||||
}
|
||||
if a.key > b.key {
|
||||
return false
|
||||
}
|
||||
return a.id < b.id
|
||||
}
|
||||
|
||||
// fillExpiresList occurs once at startup
|
||||
func (c *Server) fillExpiresList() {
|
||||
c.exlistmu.Lock()
|
||||
c.exlist = c.exlist[:0]
|
||||
for key, m := range c.expires {
|
||||
for id, at := range m {
|
||||
c.exlist = append(c.exlist, exitem{key, id, at})
|
||||
}
|
||||
}
|
||||
c.exlistmu.Unlock()
|
||||
}
|
||||
|
||||
// clearIDExpires clears a single item from the expires list.
|
||||
func (c *Server) clearIDExpires(key, id string) (cleared bool) {
|
||||
if len(c.expires) == 0 {
|
||||
return false
|
||||
if c.expires.Len() > 0 {
|
||||
if idm, ok := c.expires.Get(key); ok {
|
||||
if _, ok := idm.(*rhh.Map).Delete(id); ok {
|
||||
if idm.(*rhh.Map).Len() == 0 {
|
||||
c.expires.Delete(key)
|
||||
}
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
m, ok := c.expires[key]
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
_, ok = m[id]
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
delete(m, id)
|
||||
return true
|
||||
return false
|
||||
}
|
||||
|
||||
// clearKeyExpires clears all items that are marked as expires from a single key.
|
||||
func (c *Server) clearKeyExpires(key string) {
|
||||
delete(c.expires, key)
|
||||
c.expires.Delete(key)
|
||||
}
|
||||
|
||||
// moveKeyExpires moves all items that are marked as expires from a key to a newKey.
|
||||
func (c *Server) moveKeyExpires(key, newKey string) {
|
||||
val := c.expires[key]
|
||||
delete(c.expires, key)
|
||||
c.expires[newKey] = val
|
||||
if idm, ok := c.expires.Delete(key); ok {
|
||||
c.expires.Set(newKey, idm)
|
||||
}
|
||||
}
|
||||
|
||||
// expireAt marks an item as expires at a specific time.
|
||||
func (c *Server) expireAt(key, id string, at time.Time) {
|
||||
m := c.expires[key]
|
||||
if m == nil {
|
||||
m = make(map[string]time.Time)
|
||||
c.expires[key] = m
|
||||
idm, ok := c.expires.Get(key)
|
||||
if !ok {
|
||||
idm = rhh.New(0)
|
||||
c.expires.Set(key, idm)
|
||||
}
|
||||
m[id] = at
|
||||
c.exlistmu.Lock()
|
||||
c.exlist = append(c.exlist, exitem{key, id, at})
|
||||
c.exlistmu.Unlock()
|
||||
idm.(*rhh.Map).Set(id, at.UnixNano())
|
||||
}
|
||||
|
||||
// getExpires returns the when an item expires.
|
||||
func (c *Server) getExpires(key, id string) (at time.Time, ok bool) {
|
||||
if len(c.expires) == 0 {
|
||||
return at, false
|
||||
if c.expires.Len() > 0 {
|
||||
if idm, ok := c.expires.Get(key); ok {
|
||||
if atv, ok := idm.(*rhh.Map).Get(id); ok {
|
||||
return time.Unix(0, atv.(int64)), true
|
||||
}
|
||||
}
|
||||
}
|
||||
m, ok := c.expires[key]
|
||||
if !ok {
|
||||
return at, false
|
||||
}
|
||||
at, ok = m[id]
|
||||
return at, ok
|
||||
return time.Time{}, false
|
||||
}
|
||||
|
||||
// hasExpired returns true if an item has expired.
|
||||
func (c *Server) hasExpired(key, id string) bool {
|
||||
at, ok := c.getExpires(key, id)
|
||||
if !ok {
|
||||
return false
|
||||
if at, ok := c.getExpires(key, id); ok {
|
||||
return time.Now().After(at)
|
||||
}
|
||||
return time.Now().After(at)
|
||||
return false
|
||||
}
|
||||
|
||||
const bgExpireDelay = time.Second / 10
|
||||
const bgExpireSegmentSize = 20
|
||||
|
||||
// expirePurgeSweep is ran from backgroundExpiring operation and performs
|
||||
// segmented sweep of the expires list
|
||||
func (c *Server) expirePurgeSweep(rng *rand.Rand) (purged int) {
|
||||
now := time.Now().UnixNano()
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
if c.expires.Len() == 0 {
|
||||
return 0
|
||||
}
|
||||
for i := 0; i < bgExpireSegmentSize; i++ {
|
||||
if key, idm, ok := c.expires.GetPos(rng.Uint64()); ok {
|
||||
id, atv, ok := idm.(*rhh.Map).GetPos(rng.Uint64())
|
||||
if ok {
|
||||
if now > atv.(int64) {
|
||||
// expired, purge from database
|
||||
msg := &Message{}
|
||||
msg.Args = []string{"del", key, id}
|
||||
_, d, err := c.cmdDel(msg)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
if err := c.writeAOF(msg.Args, &d); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
purged++
|
||||
}
|
||||
}
|
||||
}
|
||||
// recycle the lock
|
||||
c.mu.Unlock()
|
||||
c.mu.Lock()
|
||||
}
|
||||
return purged
|
||||
}
|
||||
|
||||
// backgroundExpiring watches for when items that have expired must be purged
|
||||
// from the database. It's executes 10 times a seconds.
|
||||
func (c *Server) backgroundExpiring() {
|
||||
rand.Seed(time.Now().UnixNano())
|
||||
var purgelist []exitem
|
||||
rng := rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||
for {
|
||||
if c.stopServer.on() {
|
||||
return
|
||||
}
|
||||
now := time.Now()
|
||||
purgelist = purgelist[:0]
|
||||
c.exlistmu.Lock()
|
||||
for i := 0; i < 20 && len(c.exlist) > 0; i++ {
|
||||
ix := rand.Int() % len(c.exlist)
|
||||
if now.After(c.exlist[ix].at) {
|
||||
// purge from exlist
|
||||
purgelist = append(purgelist, c.exlist[ix])
|
||||
c.exlist[ix] = c.exlist[len(c.exlist)-1]
|
||||
c.exlist = c.exlist[:len(c.exlist)-1]
|
||||
}
|
||||
purged := c.expirePurgeSweep(rng)
|
||||
if purged > bgExpireSegmentSize/4 {
|
||||
// do another purge immediately
|
||||
continue
|
||||
} else {
|
||||
// back off
|
||||
time.Sleep(bgExpireDelay)
|
||||
}
|
||||
c.exlistmu.Unlock()
|
||||
if len(purgelist) > 0 {
|
||||
c.mu.Lock()
|
||||
for _, item := range purgelist {
|
||||
if c.hasExpired(item.key, item.id) {
|
||||
// purge from database
|
||||
msg := &Message{}
|
||||
msg.Args = []string{"del", item.key, item.id}
|
||||
_, d, err := c.cmdDel(msg)
|
||||
if err != nil {
|
||||
c.mu.Unlock()
|
||||
log.Fatal(err)
|
||||
continue
|
||||
}
|
||||
if err := c.writeAOF(msg.Args, &d); err != nil {
|
||||
c.mu.Unlock()
|
||||
log.Fatal(err)
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
c.mu.Unlock()
|
||||
if len(purgelist) > 5 {
|
||||
continue
|
||||
}
|
||||
}
|
||||
time.Sleep(time.Second / 10)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -28,6 +28,7 @@ import (
|
|||
"github.com/tidwall/rbang"
|
||||
"github.com/tidwall/redcon"
|
||||
"github.com/tidwall/resp"
|
||||
"github.com/tidwall/rhh"
|
||||
"github.com/tidwall/tile38/core"
|
||||
"github.com/tidwall/tile38/internal/collection"
|
||||
"github.com/tidwall/tile38/internal/deadline"
|
||||
|
@ -93,18 +94,15 @@ type Server struct {
|
|||
connsmu sync.RWMutex
|
||||
conns map[int]*Client
|
||||
|
||||
exlistmu sync.RWMutex
|
||||
exlist []exitem
|
||||
|
||||
mu sync.RWMutex
|
||||
aof *os.File // active aof file
|
||||
aofdirty int32 // mark the aofbuf as having data
|
||||
aofbuf []byte // prewrite buffer
|
||||
aofsz int // active size of the aof file
|
||||
qdb *buntdb.DB // hook queue log
|
||||
qidx uint64 // hook queue log last idx
|
||||
cols tinybtree.BTree // data collections
|
||||
expires map[string]map[string]time.Time // synced with cols
|
||||
aof *os.File // active aof file
|
||||
aofdirty int32 // mark the aofbuf as having data
|
||||
aofbuf []byte // prewrite buffer
|
||||
aofsz int // active size of the aof file
|
||||
qdb *buntdb.DB // hook queue log
|
||||
qidx uint64 // hook queue log last idx
|
||||
cols tinybtree.BTree // data collections
|
||||
expires *rhh.Map // map[string]map[string]time.Time
|
||||
|
||||
follows map[*bytes.Buffer]bool
|
||||
fcond *sync.Cond
|
||||
|
@ -148,7 +146,7 @@ func Serve(host string, port int, dir string, http bool) error {
|
|||
hooks: make(map[string]*Hook),
|
||||
hooksOut: make(map[string]*Hook),
|
||||
aofconnM: make(map[net.Conn]bool),
|
||||
expires: make(map[string]map[string]time.Time),
|
||||
expires: rhh.New(0),
|
||||
started: time.Now(),
|
||||
conns: make(map[int]*Client),
|
||||
http: http,
|
||||
|
@ -259,7 +257,7 @@ func Serve(host string, port int, dir string, http bool) error {
|
|||
server.aof.Sync()
|
||||
}()
|
||||
}
|
||||
server.fillExpiresList()
|
||||
// server.fillExpiresList()
|
||||
|
||||
// Start background routines
|
||||
if server.config.followHost() != "" {
|
||||
|
@ -919,10 +917,7 @@ func randomKey(n int) string {
|
|||
func (server *Server) reset() {
|
||||
server.aofsz = 0
|
||||
server.cols = tinybtree.BTree{}
|
||||
server.exlistmu.Lock()
|
||||
server.exlist = nil
|
||||
server.exlistmu.Unlock()
|
||||
server.expires = make(map[string]map[string]time.Time)
|
||||
server.expires = rhh.New(0)
|
||||
}
|
||||
|
||||
func (server *Server) command(msg *Message, client *Client) (
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
language: go
|
||||
go:
|
||||
- "1.x"
|
||||
- master
|
||||
env:
|
||||
- TAGS=""
|
||||
- TAGS="-tags purego"
|
||||
script: go test $TAGS -v ./...
|
|
@ -0,0 +1,22 @@
|
|||
Copyright (c) 2016 Caleb Spare
|
||||
|
||||
MIT License
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
@ -0,0 +1,55 @@
|
|||
# xxhash
|
||||
|
||||
[![GoDoc](https://godoc.org/github.com/cespare/xxhash?status.svg)](https://godoc.org/github.com/cespare/xxhash)
|
||||
[![Build Status](https://travis-ci.org/cespare/xxhash.svg?branch=master)](https://travis-ci.org/cespare/xxhash)
|
||||
|
||||
xxhash is a Go implementation of the 64-bit
|
||||
[xxHash](http://cyan4973.github.io/xxHash/) algorithm, XXH64. This is a
|
||||
high-quality hashing algorithm that is much faster than anything in the Go
|
||||
standard library.
|
||||
|
||||
This package provides a straightforward API:
|
||||
|
||||
```
|
||||
func Sum64(b []byte) uint64
|
||||
func Sum64String(s string) uint64
|
||||
type Digest struct{ ... }
|
||||
func New() *Digest
|
||||
```
|
||||
|
||||
The `Digest` type implements hash.Hash64. Its key methods are:
|
||||
|
||||
```
|
||||
func (*Digest) Write([]byte) (int, error)
|
||||
func (*Digest) WriteString(string) (int, error)
|
||||
func (*Digest) Sum64() uint64
|
||||
```
|
||||
|
||||
This implementation provides a fast pure-Go implementation and an even faster
|
||||
assembly implementation for amd64.
|
||||
|
||||
## Benchmarks
|
||||
|
||||
Here are some quick benchmarks comparing the pure-Go and assembly
|
||||
implementations of Sum64.
|
||||
|
||||
| input size | purego | asm |
|
||||
| --- | --- | --- |
|
||||
| 5 B | 979.66 MB/s | 1291.17 MB/s |
|
||||
| 100 B | 7475.26 MB/s | 7973.40 MB/s |
|
||||
| 4 KB | 17573.46 MB/s | 17602.65 MB/s |
|
||||
| 10 MB | 17131.46 MB/s | 17142.16 MB/s |
|
||||
|
||||
These numbers were generated on Ubuntu 18.04 with an Intel i7-8700K CPU using
|
||||
the following commands under Go 1.11.2:
|
||||
|
||||
```
|
||||
$ go test -tags purego -benchtime 10s -bench '/xxhash,direct,bytes'
|
||||
$ go test -benchtime 10s -bench '/xxhash,direct,bytes'
|
||||
```
|
||||
|
||||
## Projects using this package
|
||||
|
||||
- [InfluxDB](https://github.com/influxdata/influxdb)
|
||||
- [Prometheus](https://github.com/prometheus/prometheus)
|
||||
- [FreeCache](https://github.com/coocood/freecache)
|
|
@ -0,0 +1,3 @@
|
|||
module github.com/cespare/xxhash/v2
|
||||
|
||||
go 1.13
|
|
@ -0,0 +1,236 @@
|
|||
// Package xxhash implements the 64-bit variant of xxHash (XXH64) as described
|
||||
// at http://cyan4973.github.io/xxHash/.
|
||||
package xxhash
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"math/bits"
|
||||
)
|
||||
|
||||
const (
|
||||
prime1 uint64 = 11400714785074694791
|
||||
prime2 uint64 = 14029467366897019727
|
||||
prime3 uint64 = 1609587929392839161
|
||||
prime4 uint64 = 9650029242287828579
|
||||
prime5 uint64 = 2870177450012600261
|
||||
)
|
||||
|
||||
// NOTE(caleb): I'm using both consts and vars of the primes. Using consts where
|
||||
// possible in the Go code is worth a small (but measurable) performance boost
|
||||
// by avoiding some MOVQs. Vars are needed for the asm and also are useful for
|
||||
// convenience in the Go code in a few places where we need to intentionally
|
||||
// avoid constant arithmetic (e.g., v1 := prime1 + prime2 fails because the
|
||||
// result overflows a uint64).
|
||||
var (
|
||||
prime1v = prime1
|
||||
prime2v = prime2
|
||||
prime3v = prime3
|
||||
prime4v = prime4
|
||||
prime5v = prime5
|
||||
)
|
||||
|
||||
// Digest implements hash.Hash64.
|
||||
type Digest struct {
|
||||
v1 uint64
|
||||
v2 uint64
|
||||
v3 uint64
|
||||
v4 uint64
|
||||
total uint64
|
||||
mem [32]byte
|
||||
n int // how much of mem is used
|
||||
}
|
||||
|
||||
// New creates a new Digest that computes the 64-bit xxHash algorithm.
|
||||
func New() *Digest {
|
||||
var d Digest
|
||||
d.Reset()
|
||||
return &d
|
||||
}
|
||||
|
||||
// Reset clears the Digest's state so that it can be reused.
|
||||
func (d *Digest) Reset() {
|
||||
d.v1 = prime1v + prime2
|
||||
d.v2 = prime2
|
||||
d.v3 = 0
|
||||
d.v4 = -prime1v
|
||||
d.total = 0
|
||||
d.n = 0
|
||||
}
|
||||
|
||||
// Size always returns 8 bytes.
|
||||
func (d *Digest) Size() int { return 8 }
|
||||
|
||||
// BlockSize always returns 32 bytes.
|
||||
func (d *Digest) BlockSize() int { return 32 }
|
||||
|
||||
// Write adds more data to d. It always returns len(b), nil.
|
||||
func (d *Digest) Write(b []byte) (n int, err error) {
|
||||
n = len(b)
|
||||
d.total += uint64(n)
|
||||
|
||||
if d.n+n < 32 {
|
||||
// This new data doesn't even fill the current block.
|
||||
copy(d.mem[d.n:], b)
|
||||
d.n += n
|
||||
return
|
||||
}
|
||||
|
||||
if d.n > 0 {
|
||||
// Finish off the partial block.
|
||||
copy(d.mem[d.n:], b)
|
||||
d.v1 = round(d.v1, u64(d.mem[0:8]))
|
||||
d.v2 = round(d.v2, u64(d.mem[8:16]))
|
||||
d.v3 = round(d.v3, u64(d.mem[16:24]))
|
||||
d.v4 = round(d.v4, u64(d.mem[24:32]))
|
||||
b = b[32-d.n:]
|
||||
d.n = 0
|
||||
}
|
||||
|
||||
if len(b) >= 32 {
|
||||
// One or more full blocks left.
|
||||
nw := writeBlocks(d, b)
|
||||
b = b[nw:]
|
||||
}
|
||||
|
||||
// Store any remaining partial block.
|
||||
copy(d.mem[:], b)
|
||||
d.n = len(b)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// Sum appends the current hash to b and returns the resulting slice.
|
||||
func (d *Digest) Sum(b []byte) []byte {
|
||||
s := d.Sum64()
|
||||
return append(
|
||||
b,
|
||||
byte(s>>56),
|
||||
byte(s>>48),
|
||||
byte(s>>40),
|
||||
byte(s>>32),
|
||||
byte(s>>24),
|
||||
byte(s>>16),
|
||||
byte(s>>8),
|
||||
byte(s),
|
||||
)
|
||||
}
|
||||
|
||||
// Sum64 returns the current hash.
|
||||
func (d *Digest) Sum64() uint64 {
|
||||
var h uint64
|
||||
|
||||
if d.total >= 32 {
|
||||
v1, v2, v3, v4 := d.v1, d.v2, d.v3, d.v4
|
||||
h = rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4)
|
||||
h = mergeRound(h, v1)
|
||||
h = mergeRound(h, v2)
|
||||
h = mergeRound(h, v3)
|
||||
h = mergeRound(h, v4)
|
||||
} else {
|
||||
h = d.v3 + prime5
|
||||
}
|
||||
|
||||
h += d.total
|
||||
|
||||
i, end := 0, d.n
|
||||
for ; i+8 <= end; i += 8 {
|
||||
k1 := round(0, u64(d.mem[i:i+8]))
|
||||
h ^= k1
|
||||
h = rol27(h)*prime1 + prime4
|
||||
}
|
||||
if i+4 <= end {
|
||||
h ^= uint64(u32(d.mem[i:i+4])) * prime1
|
||||
h = rol23(h)*prime2 + prime3
|
||||
i += 4
|
||||
}
|
||||
for i < end {
|
||||
h ^= uint64(d.mem[i]) * prime5
|
||||
h = rol11(h) * prime1
|
||||
i++
|
||||
}
|
||||
|
||||
h ^= h >> 33
|
||||
h *= prime2
|
||||
h ^= h >> 29
|
||||
h *= prime3
|
||||
h ^= h >> 32
|
||||
|
||||
return h
|
||||
}
|
||||
|
||||
const (
|
||||
magic = "xxh\x06"
|
||||
marshaledSize = len(magic) + 8*5 + 32
|
||||
)
|
||||
|
||||
// MarshalBinary implements the encoding.BinaryMarshaler interface.
|
||||
func (d *Digest) MarshalBinary() ([]byte, error) {
|
||||
b := make([]byte, 0, marshaledSize)
|
||||
b = append(b, magic...)
|
||||
b = appendUint64(b, d.v1)
|
||||
b = appendUint64(b, d.v2)
|
||||
b = appendUint64(b, d.v3)
|
||||
b = appendUint64(b, d.v4)
|
||||
b = appendUint64(b, d.total)
|
||||
b = append(b, d.mem[:d.n]...)
|
||||
b = b[:len(b)+len(d.mem)-d.n]
|
||||
return b, nil
|
||||
}
|
||||
|
||||
// UnmarshalBinary implements the encoding.BinaryUnmarshaler interface.
|
||||
func (d *Digest) UnmarshalBinary(b []byte) error {
|
||||
if len(b) < len(magic) || string(b[:len(magic)]) != magic {
|
||||
return errors.New("xxhash: invalid hash state identifier")
|
||||
}
|
||||
if len(b) != marshaledSize {
|
||||
return errors.New("xxhash: invalid hash state size")
|
||||
}
|
||||
b = b[len(magic):]
|
||||
b, d.v1 = consumeUint64(b)
|
||||
b, d.v2 = consumeUint64(b)
|
||||
b, d.v3 = consumeUint64(b)
|
||||
b, d.v4 = consumeUint64(b)
|
||||
b, d.total = consumeUint64(b)
|
||||
copy(d.mem[:], b)
|
||||
b = b[len(d.mem):]
|
||||
d.n = int(d.total % uint64(len(d.mem)))
|
||||
return nil
|
||||
}
|
||||
|
||||
func appendUint64(b []byte, x uint64) []byte {
|
||||
var a [8]byte
|
||||
binary.LittleEndian.PutUint64(a[:], x)
|
||||
return append(b, a[:]...)
|
||||
}
|
||||
|
||||
func consumeUint64(b []byte) ([]byte, uint64) {
|
||||
x := u64(b)
|
||||
return b[8:], x
|
||||
}
|
||||
|
||||
func u64(b []byte) uint64 { return binary.LittleEndian.Uint64(b) }
|
||||
func u32(b []byte) uint32 { return binary.LittleEndian.Uint32(b) }
|
||||
|
||||
func round(acc, input uint64) uint64 {
|
||||
acc += input * prime2
|
||||
acc = rol31(acc)
|
||||
acc *= prime1
|
||||
return acc
|
||||
}
|
||||
|
||||
func mergeRound(acc, val uint64) uint64 {
|
||||
val = round(0, val)
|
||||
acc ^= val
|
||||
acc = acc*prime1 + prime4
|
||||
return acc
|
||||
}
|
||||
|
||||
func rol1(x uint64) uint64 { return bits.RotateLeft64(x, 1) }
|
||||
func rol7(x uint64) uint64 { return bits.RotateLeft64(x, 7) }
|
||||
func rol11(x uint64) uint64 { return bits.RotateLeft64(x, 11) }
|
||||
func rol12(x uint64) uint64 { return bits.RotateLeft64(x, 12) }
|
||||
func rol18(x uint64) uint64 { return bits.RotateLeft64(x, 18) }
|
||||
func rol23(x uint64) uint64 { return bits.RotateLeft64(x, 23) }
|
||||
func rol27(x uint64) uint64 { return bits.RotateLeft64(x, 27) }
|
||||
func rol31(x uint64) uint64 { return bits.RotateLeft64(x, 31) }
|
|
@ -0,0 +1,13 @@
|
|||
// +build !appengine
|
||||
// +build gc
|
||||
// +build !purego
|
||||
|
||||
package xxhash
|
||||
|
||||
// Sum64 computes the 64-bit xxHash digest of b.
|
||||
//
|
||||
//go:noescape
|
||||
func Sum64(b []byte) uint64
|
||||
|
||||
//go:noescape
|
||||
func writeBlocks(*Digest, []byte) int
|
|
@ -0,0 +1,215 @@
|
|||
// +build !appengine
|
||||
// +build gc
|
||||
// +build !purego
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// Register allocation:
|
||||
// AX h
|
||||
// CX pointer to advance through b
|
||||
// DX n
|
||||
// BX loop end
|
||||
// R8 v1, k1
|
||||
// R9 v2
|
||||
// R10 v3
|
||||
// R11 v4
|
||||
// R12 tmp
|
||||
// R13 prime1v
|
||||
// R14 prime2v
|
||||
// R15 prime4v
|
||||
|
||||
// round reads from and advances the buffer pointer in CX.
|
||||
// It assumes that R13 has prime1v and R14 has prime2v.
|
||||
#define round(r) \
|
||||
MOVQ (CX), R12 \
|
||||
ADDQ $8, CX \
|
||||
IMULQ R14, R12 \
|
||||
ADDQ R12, r \
|
||||
ROLQ $31, r \
|
||||
IMULQ R13, r
|
||||
|
||||
// mergeRound applies a merge round on the two registers acc and val.
|
||||
// It assumes that R13 has prime1v, R14 has prime2v, and R15 has prime4v.
|
||||
#define mergeRound(acc, val) \
|
||||
IMULQ R14, val \
|
||||
ROLQ $31, val \
|
||||
IMULQ R13, val \
|
||||
XORQ val, acc \
|
||||
IMULQ R13, acc \
|
||||
ADDQ R15, acc
|
||||
|
||||
// func Sum64(b []byte) uint64
|
||||
TEXT ·Sum64(SB), NOSPLIT, $0-32
|
||||
// Load fixed primes.
|
||||
MOVQ ·prime1v(SB), R13
|
||||
MOVQ ·prime2v(SB), R14
|
||||
MOVQ ·prime4v(SB), R15
|
||||
|
||||
// Load slice.
|
||||
MOVQ b_base+0(FP), CX
|
||||
MOVQ b_len+8(FP), DX
|
||||
LEAQ (CX)(DX*1), BX
|
||||
|
||||
// The first loop limit will be len(b)-32.
|
||||
SUBQ $32, BX
|
||||
|
||||
// Check whether we have at least one block.
|
||||
CMPQ DX, $32
|
||||
JLT noBlocks
|
||||
|
||||
// Set up initial state (v1, v2, v3, v4).
|
||||
MOVQ R13, R8
|
||||
ADDQ R14, R8
|
||||
MOVQ R14, R9
|
||||
XORQ R10, R10
|
||||
XORQ R11, R11
|
||||
SUBQ R13, R11
|
||||
|
||||
// Loop until CX > BX.
|
||||
blockLoop:
|
||||
round(R8)
|
||||
round(R9)
|
||||
round(R10)
|
||||
round(R11)
|
||||
|
||||
CMPQ CX, BX
|
||||
JLE blockLoop
|
||||
|
||||
MOVQ R8, AX
|
||||
ROLQ $1, AX
|
||||
MOVQ R9, R12
|
||||
ROLQ $7, R12
|
||||
ADDQ R12, AX
|
||||
MOVQ R10, R12
|
||||
ROLQ $12, R12
|
||||
ADDQ R12, AX
|
||||
MOVQ R11, R12
|
||||
ROLQ $18, R12
|
||||
ADDQ R12, AX
|
||||
|
||||
mergeRound(AX, R8)
|
||||
mergeRound(AX, R9)
|
||||
mergeRound(AX, R10)
|
||||
mergeRound(AX, R11)
|
||||
|
||||
JMP afterBlocks
|
||||
|
||||
noBlocks:
|
||||
MOVQ ·prime5v(SB), AX
|
||||
|
||||
afterBlocks:
|
||||
ADDQ DX, AX
|
||||
|
||||
// Right now BX has len(b)-32, and we want to loop until CX > len(b)-8.
|
||||
ADDQ $24, BX
|
||||
|
||||
CMPQ CX, BX
|
||||
JG fourByte
|
||||
|
||||
wordLoop:
|
||||
// Calculate k1.
|
||||
MOVQ (CX), R8
|
||||
ADDQ $8, CX
|
||||
IMULQ R14, R8
|
||||
ROLQ $31, R8
|
||||
IMULQ R13, R8
|
||||
|
||||
XORQ R8, AX
|
||||
ROLQ $27, AX
|
||||
IMULQ R13, AX
|
||||
ADDQ R15, AX
|
||||
|
||||
CMPQ CX, BX
|
||||
JLE wordLoop
|
||||
|
||||
fourByte:
|
||||
ADDQ $4, BX
|
||||
CMPQ CX, BX
|
||||
JG singles
|
||||
|
||||
MOVL (CX), R8
|
||||
ADDQ $4, CX
|
||||
IMULQ R13, R8
|
||||
XORQ R8, AX
|
||||
|
||||
ROLQ $23, AX
|
||||
IMULQ R14, AX
|
||||
ADDQ ·prime3v(SB), AX
|
||||
|
||||
singles:
|
||||
ADDQ $4, BX
|
||||
CMPQ CX, BX
|
||||
JGE finalize
|
||||
|
||||
singlesLoop:
|
||||
MOVBQZX (CX), R12
|
||||
ADDQ $1, CX
|
||||
IMULQ ·prime5v(SB), R12
|
||||
XORQ R12, AX
|
||||
|
||||
ROLQ $11, AX
|
||||
IMULQ R13, AX
|
||||
|
||||
CMPQ CX, BX
|
||||
JL singlesLoop
|
||||
|
||||
finalize:
|
||||
MOVQ AX, R12
|
||||
SHRQ $33, R12
|
||||
XORQ R12, AX
|
||||
IMULQ R14, AX
|
||||
MOVQ AX, R12
|
||||
SHRQ $29, R12
|
||||
XORQ R12, AX
|
||||
IMULQ ·prime3v(SB), AX
|
||||
MOVQ AX, R12
|
||||
SHRQ $32, R12
|
||||
XORQ R12, AX
|
||||
|
||||
MOVQ AX, ret+24(FP)
|
||||
RET
|
||||
|
||||
// writeBlocks uses the same registers as above except that it uses AX to store
|
||||
// the d pointer.
|
||||
|
||||
// func writeBlocks(d *Digest, b []byte) int
|
||||
TEXT ·writeBlocks(SB), NOSPLIT, $0-40
|
||||
// Load fixed primes needed for round.
|
||||
MOVQ ·prime1v(SB), R13
|
||||
MOVQ ·prime2v(SB), R14
|
||||
|
||||
// Load slice.
|
||||
MOVQ b_base+8(FP), CX
|
||||
MOVQ b_len+16(FP), DX
|
||||
LEAQ (CX)(DX*1), BX
|
||||
SUBQ $32, BX
|
||||
|
||||
// Load vN from d.
|
||||
MOVQ d+0(FP), AX
|
||||
MOVQ 0(AX), R8 // v1
|
||||
MOVQ 8(AX), R9 // v2
|
||||
MOVQ 16(AX), R10 // v3
|
||||
MOVQ 24(AX), R11 // v4
|
||||
|
||||
// We don't need to check the loop condition here; this function is
|
||||
// always called with at least one block of data to process.
|
||||
blockLoop:
|
||||
round(R8)
|
||||
round(R9)
|
||||
round(R10)
|
||||
round(R11)
|
||||
|
||||
CMPQ CX, BX
|
||||
JLE blockLoop
|
||||
|
||||
// Copy vN back to d.
|
||||
MOVQ R8, 0(AX)
|
||||
MOVQ R9, 8(AX)
|
||||
MOVQ R10, 16(AX)
|
||||
MOVQ R11, 24(AX)
|
||||
|
||||
// The number of bytes written is CX minus the old base pointer.
|
||||
SUBQ b_base+8(FP), CX
|
||||
MOVQ CX, ret+32(FP)
|
||||
|
||||
RET
|
|
@ -0,0 +1,76 @@
|
|||
// +build !amd64 appengine !gc purego
|
||||
|
||||
package xxhash
|
||||
|
||||
// Sum64 computes the 64-bit xxHash digest of b.
|
||||
func Sum64(b []byte) uint64 {
|
||||
// A simpler version would be
|
||||
// d := New()
|
||||
// d.Write(b)
|
||||
// return d.Sum64()
|
||||
// but this is faster, particularly for small inputs.
|
||||
|
||||
n := len(b)
|
||||
var h uint64
|
||||
|
||||
if n >= 32 {
|
||||
v1 := prime1v + prime2
|
||||
v2 := prime2
|
||||
v3 := uint64(0)
|
||||
v4 := -prime1v
|
||||
for len(b) >= 32 {
|
||||
v1 = round(v1, u64(b[0:8:len(b)]))
|
||||
v2 = round(v2, u64(b[8:16:len(b)]))
|
||||
v3 = round(v3, u64(b[16:24:len(b)]))
|
||||
v4 = round(v4, u64(b[24:32:len(b)]))
|
||||
b = b[32:len(b):len(b)]
|
||||
}
|
||||
h = rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4)
|
||||
h = mergeRound(h, v1)
|
||||
h = mergeRound(h, v2)
|
||||
h = mergeRound(h, v3)
|
||||
h = mergeRound(h, v4)
|
||||
} else {
|
||||
h = prime5
|
||||
}
|
||||
|
||||
h += uint64(n)
|
||||
|
||||
i, end := 0, len(b)
|
||||
for ; i+8 <= end; i += 8 {
|
||||
k1 := round(0, u64(b[i:i+8:len(b)]))
|
||||
h ^= k1
|
||||
h = rol27(h)*prime1 + prime4
|
||||
}
|
||||
if i+4 <= end {
|
||||
h ^= uint64(u32(b[i:i+4:len(b)])) * prime1
|
||||
h = rol23(h)*prime2 + prime3
|
||||
i += 4
|
||||
}
|
||||
for ; i < end; i++ {
|
||||
h ^= uint64(b[i]) * prime5
|
||||
h = rol11(h) * prime1
|
||||
}
|
||||
|
||||
h ^= h >> 33
|
||||
h *= prime2
|
||||
h ^= h >> 29
|
||||
h *= prime3
|
||||
h ^= h >> 32
|
||||
|
||||
return h
|
||||
}
|
||||
|
||||
func writeBlocks(d *Digest, b []byte) int {
|
||||
v1, v2, v3, v4 := d.v1, d.v2, d.v3, d.v4
|
||||
n := len(b)
|
||||
for len(b) >= 32 {
|
||||
v1 = round(v1, u64(b[0:8:len(b)]))
|
||||
v2 = round(v2, u64(b[8:16:len(b)]))
|
||||
v3 = round(v3, u64(b[16:24:len(b)]))
|
||||
v4 = round(v4, u64(b[24:32:len(b)]))
|
||||
b = b[32:len(b):len(b)]
|
||||
}
|
||||
d.v1, d.v2, d.v3, d.v4 = v1, v2, v3, v4
|
||||
return n - len(b)
|
||||
}
|
|
@ -0,0 +1,15 @@
|
|||
// +build appengine
|
||||
|
||||
// This file contains the safe implementations of otherwise unsafe-using code.
|
||||
|
||||
package xxhash
|
||||
|
||||
// Sum64String computes the 64-bit xxHash digest of s.
|
||||
func Sum64String(s string) uint64 {
|
||||
return Sum64([]byte(s))
|
||||
}
|
||||
|
||||
// WriteString adds more data to d. It always returns len(s), nil.
|
||||
func (d *Digest) WriteString(s string) (n int, err error) {
|
||||
return d.Write([]byte(s))
|
||||
}
|
|
@ -0,0 +1,162 @@
|
|||
package xxhash
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestAll(t *testing.T) {
|
||||
for _, tt := range []struct {
|
||||
name string
|
||||
input string
|
||||
want uint64
|
||||
}{
|
||||
{"empty", "", 0xef46db3751d8e999},
|
||||
{"a", "a", 0xd24ec4f1a98c6e5b},
|
||||
{"as", "as", 0x1c330fb2d66be179},
|
||||
{"asd", "asd", 0x631c37ce72a97393},
|
||||
{"asdf", "asdf", 0x415872f599cea71e},
|
||||
{
|
||||
"len=63",
|
||||
// Exactly 63 characters, which exercises all code paths.
|
||||
"Call me Ishmael. Some years ago--never mind how long precisely-",
|
||||
0x02a2e85470d6fd96,
|
||||
},
|
||||
} {
|
||||
for chunkSize := 1; chunkSize <= len(tt.input); chunkSize++ {
|
||||
name := fmt.Sprintf("%s,chunkSize=%d", tt.name, chunkSize)
|
||||
t.Run(name, func(t *testing.T) {
|
||||
testDigest(t, tt.input, chunkSize, tt.want)
|
||||
})
|
||||
}
|
||||
t.Run(tt.name, func(t *testing.T) { testSum(t, tt.input, tt.want) })
|
||||
}
|
||||
}
|
||||
|
||||
func testDigest(t *testing.T, input string, chunkSize int, want uint64) {
|
||||
d := New()
|
||||
ds := New() // uses WriteString
|
||||
for i := 0; i < len(input); i += chunkSize {
|
||||
chunk := input[i:]
|
||||
if len(chunk) > chunkSize {
|
||||
chunk = chunk[:chunkSize]
|
||||
}
|
||||
n, err := d.Write([]byte(chunk))
|
||||
if err != nil || n != len(chunk) {
|
||||
t.Fatalf("Digest.Write: got (%d, %v); want (%d, nil)", n, err, len(chunk))
|
||||
}
|
||||
n, err = ds.WriteString(chunk)
|
||||
if err != nil || n != len(chunk) {
|
||||
t.Fatalf("Digest.WriteString: got (%d, %v); want (%d, nil)", n, err, len(chunk))
|
||||
}
|
||||
}
|
||||
if got := d.Sum64(); got != want {
|
||||
t.Fatalf("Digest.Sum64: got 0x%x; want 0x%x", got, want)
|
||||
}
|
||||
if got := ds.Sum64(); got != want {
|
||||
t.Fatalf("Digest.Sum64 (WriteString): got 0x%x; want 0x%x", got, want)
|
||||
}
|
||||
var b [8]byte
|
||||
binary.BigEndian.PutUint64(b[:], want)
|
||||
if got := d.Sum(nil); !bytes.Equal(got, b[:]) {
|
||||
t.Fatalf("Sum: got %v; want %v", got, b[:])
|
||||
}
|
||||
}
|
||||
|
||||
func testSum(t *testing.T, input string, want uint64) {
|
||||
if got := Sum64([]byte(input)); got != want {
|
||||
t.Fatalf("Sum64: got 0x%x; want 0x%x", got, want)
|
||||
}
|
||||
if got := Sum64String(input); got != want {
|
||||
t.Fatalf("Sum64String: got 0x%x; want 0x%x", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReset(t *testing.T) {
|
||||
parts := []string{"The quic", "k br", "o", "wn fox jumps", " ov", "er the lazy ", "dog."}
|
||||
d := New()
|
||||
for _, part := range parts {
|
||||
d.Write([]byte(part))
|
||||
}
|
||||
h0 := d.Sum64()
|
||||
|
||||
d.Reset()
|
||||
d.Write([]byte(strings.Join(parts, "")))
|
||||
h1 := d.Sum64()
|
||||
|
||||
if h0 != h1 {
|
||||
t.Errorf("0x%x != 0x%x", h0, h1)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBinaryMarshaling(t *testing.T) {
|
||||
d := New()
|
||||
d.WriteString("abc")
|
||||
b, err := d.MarshalBinary()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
d = New()
|
||||
d.WriteString("junk")
|
||||
if err := d.UnmarshalBinary(b); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
d.WriteString("def")
|
||||
if got, want := d.Sum64(), Sum64String("abcdef"); got != want {
|
||||
t.Fatalf("after MarshalBinary+UnmarshalBinary, got 0x%x; want 0x%x", got, want)
|
||||
}
|
||||
|
||||
d0 := New()
|
||||
d1 := New()
|
||||
for i := 0; i < 64; i++ {
|
||||
b, err := d0.MarshalBinary()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
d0 = new(Digest)
|
||||
if err := d0.UnmarshalBinary(b); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if got, want := d0.Sum64(), d1.Sum64(); got != want {
|
||||
t.Fatalf("after %d Writes, unmarshaled Digest gave sum 0x%x; want 0x%x", i, got, want)
|
||||
}
|
||||
|
||||
d0.Write([]byte{'a'})
|
||||
d1.Write([]byte{'a'})
|
||||
}
|
||||
}
|
||||
|
||||
var sink uint64
|
||||
|
||||
func TestAllocs(t *testing.T) {
|
||||
const shortStr = "abcdefghijklmnop"
|
||||
// Sum64([]byte(shortString)) shouldn't allocate because the
|
||||
// intermediate []byte ought not to escape.
|
||||
// (See https://github.com/cespare/xxhash/pull/2.)
|
||||
t.Run("Sum64", func(t *testing.T) {
|
||||
testAllocs(t, func() {
|
||||
sink = Sum64([]byte(shortStr))
|
||||
})
|
||||
})
|
||||
// Creating and using a Digest shouldn't allocate because its methods
|
||||
// shouldn't make it escape. (A previous version of New returned a
|
||||
// hash.Hash64 which forces an allocation.)
|
||||
t.Run("Digest", func(t *testing.T) {
|
||||
b := []byte("asdf")
|
||||
testAllocs(t, func() {
|
||||
d := New()
|
||||
d.Write(b)
|
||||
sink = d.Sum64()
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func testAllocs(t *testing.T, fn func()) {
|
||||
t.Helper()
|
||||
if allocs := int(testing.AllocsPerRun(10, fn)); allocs > 0 {
|
||||
t.Fatalf("got %d allocation(s) (want zero)", allocs)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,46 @@
|
|||
// +build !appengine
|
||||
|
||||
// This file encapsulates usage of unsafe.
|
||||
// xxhash_safe.go contains the safe implementations.
|
||||
|
||||
package xxhash
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// Notes:
|
||||
//
|
||||
// See https://groups.google.com/d/msg/golang-nuts/dcjzJy-bSpw/tcZYBzQqAQAJ
|
||||
// for some discussion about these unsafe conversions.
|
||||
//
|
||||
// In the future it's possible that compiler optimizations will make these
|
||||
// unsafe operations unnecessary: https://golang.org/issue/2205.
|
||||
//
|
||||
// Both of these wrapper functions still incur function call overhead since they
|
||||
// will not be inlined. We could write Go/asm copies of Sum64 and Digest.Write
|
||||
// for strings to squeeze out a bit more speed. Mid-stack inlining should
|
||||
// eventually fix this.
|
||||
|
||||
// Sum64String computes the 64-bit xxHash digest of s.
|
||||
// It may be faster than Sum64([]byte(s)) by avoiding a copy.
|
||||
func Sum64String(s string) uint64 {
|
||||
var b []byte
|
||||
bh := (*reflect.SliceHeader)(unsafe.Pointer(&b))
|
||||
bh.Data = (*reflect.StringHeader)(unsafe.Pointer(&s)).Data
|
||||
bh.Len = len(s)
|
||||
bh.Cap = len(s)
|
||||
return Sum64(b)
|
||||
}
|
||||
|
||||
// WriteString adds more data to d. It always returns len(s), nil.
|
||||
// It may be faster than Write([]byte(s)) by avoiding a copy.
|
||||
func (d *Digest) WriteString(s string) (n int, err error) {
|
||||
var b []byte
|
||||
bh := (*reflect.SliceHeader)(unsafe.Pointer(&b))
|
||||
bh.Data = (*reflect.StringHeader)(unsafe.Pointer(&s)).Data
|
||||
bh.Len = len(s)
|
||||
bh.Cap = len(s)
|
||||
return d.Write(b)
|
||||
}
|
|
@ -0,0 +1,24 @@
|
|||
// +build !appengine
|
||||
|
||||
package xxhash
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestStringAllocs(t *testing.T) {
|
||||
longStr := strings.Repeat("a", 1000)
|
||||
t.Run("Sum64String", func(t *testing.T) {
|
||||
testAllocs(t, func() {
|
||||
sink = Sum64String(longStr)
|
||||
})
|
||||
})
|
||||
t.Run("Digest.WriteString", func(t *testing.T) {
|
||||
testAllocs(t, func() {
|
||||
d := New()
|
||||
d.WriteString(longStr)
|
||||
sink = d.Sum64()
|
||||
})
|
||||
})
|
||||
}
|
|
@ -0,0 +1,11 @@
|
|||
module github.com/cespare/xxhash/xxhashbench
|
||||
|
||||
go 1.13
|
||||
|
||||
require (
|
||||
github.com/OneOfOne/xxhash v1.2.5
|
||||
github.com/cespare/xxhash/v2 v2.0.0-00010101000000-000000000000
|
||||
github.com/spaolacci/murmur3 v1.1.0
|
||||
)
|
||||
|
||||
replace github.com/cespare/xxhash/v2 => ../
|
|
@ -0,0 +1,6 @@
|
|||
github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU=
|
||||
github.com/OneOfOne/xxhash v1.2.5 h1:zl/OfRA6nftbBK9qTohYBJ5xvw6C/oNKizR7cZGl3cI=
|
||||
github.com/OneOfOne/xxhash v1.2.5/go.mod h1:eZbhyaAYD41SGSSsnmcpxVoRiQ/MPUTjUdIIOT9Um7Q=
|
||||
github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
|
||||
github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI=
|
||||
github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
|
|
@ -0,0 +1,160 @@
|
|||
package xxhashbench
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"hash/crc32"
|
||||
"hash/fnv"
|
||||
"testing"
|
||||
|
||||
OneOfOne "github.com/OneOfOne/xxhash"
|
||||
"github.com/cespare/xxhash/v2"
|
||||
"github.com/spaolacci/murmur3"
|
||||
)
|
||||
|
||||
var sink uint64
|
||||
|
||||
var benchmarks = []struct {
|
||||
name string
|
||||
directBytes func([]byte) uint64
|
||||
directString func(string) uint64
|
||||
digestBytes func([]byte) uint64
|
||||
digestString func(string) uint64
|
||||
}{
|
||||
{
|
||||
name: "xxhash",
|
||||
directBytes: xxhash.Sum64,
|
||||
directString: xxhash.Sum64String,
|
||||
digestBytes: func(b []byte) uint64 {
|
||||
h := xxhash.New()
|
||||
h.Write(b)
|
||||
return h.Sum64()
|
||||
},
|
||||
digestString: func(s string) uint64 {
|
||||
h := xxhash.New()
|
||||
h.WriteString(s)
|
||||
return h.Sum64()
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "OneOfOne",
|
||||
directBytes: OneOfOne.Checksum64,
|
||||
directString: OneOfOne.ChecksumString64,
|
||||
digestBytes: func(b []byte) uint64 {
|
||||
h := OneOfOne.New64()
|
||||
h.Write(b)
|
||||
return h.Sum64()
|
||||
},
|
||||
digestString: func(s string) uint64 {
|
||||
h := OneOfOne.New64()
|
||||
h.WriteString(s)
|
||||
return h.Sum64()
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "murmur3",
|
||||
directBytes: murmur3.Sum64,
|
||||
directString: func(s string) uint64 {
|
||||
return murmur3.Sum64([]byte(s))
|
||||
},
|
||||
digestBytes: func(b []byte) uint64 {
|
||||
h := murmur3.New64()
|
||||
h.Write(b)
|
||||
return h.Sum64()
|
||||
},
|
||||
digestString: func(s string) uint64 {
|
||||
h := murmur3.New64()
|
||||
h.Write([]byte(s))
|
||||
return h.Sum64()
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "CRC-32",
|
||||
directBytes: func(b []byte) uint64 {
|
||||
return uint64(crc32.ChecksumIEEE(b))
|
||||
},
|
||||
directString: func(s string) uint64 {
|
||||
return uint64(crc32.ChecksumIEEE([]byte(s)))
|
||||
},
|
||||
digestBytes: func(b []byte) uint64 {
|
||||
h := crc32.NewIEEE()
|
||||
h.Write(b)
|
||||
return uint64(h.Sum32())
|
||||
},
|
||||
digestString: func(s string) uint64 {
|
||||
h := crc32.NewIEEE()
|
||||
h.Write([]byte(s))
|
||||
return uint64(h.Sum32())
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "FNV-1a",
|
||||
digestBytes: func(b []byte) uint64 {
|
||||
h := fnv.New64()
|
||||
h.Write(b)
|
||||
return h.Sum64()
|
||||
},
|
||||
digestString: func(s string) uint64 {
|
||||
h := fnv.New64a()
|
||||
h.Write([]byte(s))
|
||||
return h.Sum64()
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
func BenchmarkHashes(b *testing.B) {
|
||||
for _, bb := range benchmarks {
|
||||
for _, benchSize := range []struct {
|
||||
name string
|
||||
n int
|
||||
}{
|
||||
{"5B", 5},
|
||||
{"100B", 100},
|
||||
{"4KB", 4e3},
|
||||
{"10MB", 10e6},
|
||||
} {
|
||||
input := make([]byte, benchSize.n)
|
||||
for i := range input {
|
||||
input[i] = byte(i)
|
||||
}
|
||||
inputString := string(input)
|
||||
if bb.directBytes != nil {
|
||||
name := fmt.Sprintf("%s,direct,bytes,n=%s", bb.name, benchSize.name)
|
||||
b.Run(name, func(b *testing.B) {
|
||||
benchmarkHashBytes(b, input, bb.directBytes)
|
||||
})
|
||||
}
|
||||
if bb.directString != nil {
|
||||
name := fmt.Sprintf("%s,direct,string,n=%s", bb.name, benchSize.name)
|
||||
b.Run(name, func(b *testing.B) {
|
||||
benchmarkHashString(b, inputString, bb.directString)
|
||||
})
|
||||
}
|
||||
if bb.digestBytes != nil {
|
||||
name := fmt.Sprintf("%s,digest,bytes,n=%s", bb.name, benchSize.name)
|
||||
b.Run(name, func(b *testing.B) {
|
||||
benchmarkHashBytes(b, input, bb.digestBytes)
|
||||
})
|
||||
}
|
||||
if bb.digestString != nil {
|
||||
name := fmt.Sprintf("%s,digest,string,n=%s", bb.name, benchSize.name)
|
||||
b.Run(name, func(b *testing.B) {
|
||||
benchmarkHashString(b, inputString, bb.digestString)
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func benchmarkHashBytes(b *testing.B, input []byte, fn func([]byte) uint64) {
|
||||
b.SetBytes(int64(len(input)))
|
||||
for i := 0; i < b.N; i++ {
|
||||
sink = fn(input)
|
||||
}
|
||||
}
|
||||
|
||||
func benchmarkHashString(b *testing.B, input string, fn func(string) uint64) {
|
||||
b.SetBytes(int64(len(input)))
|
||||
for i := 0; i < b.N; i++ {
|
||||
sink = fn(input)
|
||||
}
|
||||
}
|
|
@ -0,0 +1 @@
|
|||
/xxhsum
|
|
@ -0,0 +1,50 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
|
||||
"github.com/cespare/xxhash/v2"
|
||||
)
|
||||
|
||||
func main() {
|
||||
if contains(os.Args[1:], "-h") {
|
||||
fmt.Fprintf(os.Stderr, `Usage:
|
||||
%s [filenames]
|
||||
If no filenames are provided or only - is given, input is read from stdin.
|
||||
`, os.Args[0])
|
||||
os.Exit(1)
|
||||
}
|
||||
if len(os.Args) < 2 || len(os.Args) == 2 && os.Args[1] == "-" {
|
||||
printHash(os.Stdin, "-")
|
||||
return
|
||||
}
|
||||
for _, path := range os.Args[1:] {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
continue
|
||||
}
|
||||
printHash(f, path)
|
||||
f.Close()
|
||||
}
|
||||
}
|
||||
|
||||
func contains(ss []string, s string) bool {
|
||||
for _, s1 := range ss {
|
||||
if s1 == s {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func printHash(r io.Reader, name string) {
|
||||
h := xxhash.New()
|
||||
if _, err := io.Copy(h, r); err != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
return
|
||||
}
|
||||
fmt.Printf("%016x %s\n", h.Sum64(), name)
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
Copyright 2019, Joshua J Baker
|
||||
|
||||
Permission to use, copy, modify, and/or distribute this software for any
|
||||
purpose with or without fee is hereby granted, provided that the above
|
||||
copyright notice and this permission notice appear in all copies.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
||||
SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
|
||||
OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
||||
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
@ -0,0 +1,51 @@
|
|||
# `rhh` (Robin Hood Hashmap)
|
||||
|
||||
[![GoDoc](https://img.shields.io/badge/api-reference-blue.svg?style=flat-square)](https://godoc.org/github.com/tidwall/rhh)
|
||||
|
||||
A simple and efficient hashmap package for Go using the
|
||||
[`xxhash`](http://www.xxhash.com) algorithm,
|
||||
[open addressing](https://en.wikipedia.org/wiki/Hash_table#Open_addressing), and
|
||||
[robin hood hashing](https://en.wikipedia.org/wiki/Hash_table#Robin_Hood_hashing).
|
||||
|
||||
This is an alternative to the standard [Go map](https://golang.org/ref/spec#Map_types).
|
||||
|
||||
# Getting Started
|
||||
|
||||
## Installing
|
||||
|
||||
To start using `rhh`, install Go and run `go get`:
|
||||
|
||||
```sh
|
||||
$ go get -u github.com/tidwall/rhh
|
||||
```
|
||||
|
||||
This will retrieve the library.
|
||||
|
||||
## Usage
|
||||
|
||||
The `Map` type works similar to a standard Go map, and includes four methods:
|
||||
`Set`, `Get`, `Delete`, `Len`.
|
||||
|
||||
```go
|
||||
var m rhh.Map
|
||||
m.Set("Hello", "Dolly!")
|
||||
val, _ := m.Get("Hello")
|
||||
fmt.Printf("%v\n", val)
|
||||
val, _ = m.Delete("Hello")
|
||||
fmt.Printf("%v\n", val)
|
||||
val, _ = m.Get("Hello")
|
||||
fmt.Printf("%v\n", val)
|
||||
|
||||
// Output:
|
||||
// Dolly!
|
||||
// Dolly!
|
||||
// <nil>
|
||||
```
|
||||
|
||||
## Contact
|
||||
|
||||
Josh Baker [@tidwall](http://twitter.com/tidwall)
|
||||
|
||||
## License
|
||||
|
||||
`rhh` source code is available under the MIT [License](/LICENSE).
|
|
@ -0,0 +1,205 @@
|
|||
// Copyright 2019 Joshua J Baker. All rights reserved.
|
||||
// Use of this source code is governed by an ISC-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package rhh
|
||||
|
||||
import (
|
||||
"github.com/cespare/xxhash"
|
||||
)
|
||||
|
||||
const (
|
||||
loadFactor = 0.85 // must be above 50%
|
||||
dibBitSize = 16 // 0xFFFF
|
||||
hashBitSize = 64 - dibBitSize // 0xFFFFFFFFFFFF
|
||||
maxHash = ^uint64(0) >> dibBitSize // max 28,147,497,671,0655
|
||||
maxDIB = ^uint64(0) >> hashBitSize // max 65,535
|
||||
)
|
||||
|
||||
type entry struct {
|
||||
hdib uint64 // bitfield { hash:48 dib:16 }
|
||||
key string // user key
|
||||
value interface{} // user value
|
||||
}
|
||||
|
||||
func (e *entry) dib() int {
|
||||
return int(e.hdib & maxDIB)
|
||||
}
|
||||
func (e *entry) hash() int {
|
||||
return int(e.hdib >> dibBitSize)
|
||||
}
|
||||
func (e *entry) setDIB(dib int) {
|
||||
e.hdib = e.hdib>>dibBitSize<<dibBitSize | uint64(dib)&maxDIB
|
||||
}
|
||||
func (e *entry) setHash(hash int) {
|
||||
e.hdib = uint64(hash)<<dibBitSize | e.hdib&maxDIB
|
||||
}
|
||||
func makeHDIB(hash, dib int) uint64 {
|
||||
return uint64(hash)<<dibBitSize | uint64(dib)&maxDIB
|
||||
}
|
||||
|
||||
// hash returns a 48-bit hash for 64-bit environments, or 32-bit hash for
|
||||
// 32-bit environments.
|
||||
func (m *Map) hash(key string) int {
|
||||
return int(xxhash.Sum64String(key) >> dibBitSize)
|
||||
}
|
||||
|
||||
// Map is a hashmap. Like map[string]interface{}
|
||||
type Map struct {
|
||||
cap int
|
||||
length int
|
||||
mask int
|
||||
growAt int
|
||||
shrinkAt int
|
||||
buckets []entry
|
||||
}
|
||||
|
||||
// New returns a new Map. Like map[string]interface{}
|
||||
func New(cap int) *Map {
|
||||
m := new(Map)
|
||||
m.cap = cap
|
||||
sz := 8
|
||||
for sz < m.cap {
|
||||
sz *= 2
|
||||
}
|
||||
m.buckets = make([]entry, sz)
|
||||
m.mask = len(m.buckets) - 1
|
||||
m.growAt = int(float64(len(m.buckets)) * loadFactor)
|
||||
m.shrinkAt = int(float64(len(m.buckets)) * (1 - loadFactor))
|
||||
return m
|
||||
}
|
||||
|
||||
func (m *Map) resize(newCap int) {
|
||||
nmap := New(newCap)
|
||||
for i := 0; i < len(m.buckets); i++ {
|
||||
if m.buckets[i].dib() > 0 {
|
||||
nmap.set(m.buckets[i].hash(), m.buckets[i].key, m.buckets[i].value)
|
||||
}
|
||||
}
|
||||
cap := m.cap
|
||||
*m = *nmap
|
||||
m.cap = cap
|
||||
}
|
||||
|
||||
// Set assigns a value to a key.
|
||||
// Returns the previous value, or false when no value was assigned.
|
||||
func (m *Map) Set(key string, value interface{}) (interface{}, bool) {
|
||||
if len(m.buckets) == 0 {
|
||||
*m = *New(0)
|
||||
}
|
||||
if m.length >= m.growAt {
|
||||
m.resize(len(m.buckets) * 2)
|
||||
}
|
||||
return m.set(m.hash(key), key, value)
|
||||
}
|
||||
|
||||
func (m *Map) set(hash int, key string, value interface{}) (interface{}, bool) {
|
||||
e := entry{makeHDIB(hash, 1), key, value}
|
||||
i := e.hash() & m.mask
|
||||
for {
|
||||
if m.buckets[i].dib() == 0 {
|
||||
m.buckets[i] = e
|
||||
m.length++
|
||||
return nil, false
|
||||
}
|
||||
if e.hash() == m.buckets[i].hash() && e.key == m.buckets[i].key {
|
||||
old := m.buckets[i].value
|
||||
m.buckets[i].value = e.value
|
||||
return old, true
|
||||
}
|
||||
if m.buckets[i].dib() < e.dib() {
|
||||
e, m.buckets[i] = m.buckets[i], e
|
||||
}
|
||||
i = (i + 1) & m.mask
|
||||
e.setDIB(e.dib() + 1)
|
||||
}
|
||||
}
|
||||
|
||||
// Get returns a value for a key.
|
||||
// Returns false when no value has been assign for key.
|
||||
func (m *Map) Get(key string) (interface{}, bool) {
|
||||
if len(m.buckets) == 0 {
|
||||
return nil, false
|
||||
}
|
||||
hash := m.hash(key)
|
||||
i := hash & m.mask
|
||||
for {
|
||||
if m.buckets[i].dib() == 0 {
|
||||
return nil, false
|
||||
}
|
||||
if m.buckets[i].hash() == hash && m.buckets[i].key == key {
|
||||
return m.buckets[i].value, true
|
||||
}
|
||||
i = (i + 1) & m.mask
|
||||
}
|
||||
}
|
||||
|
||||
// Len returns the number of values in map.
|
||||
func (m *Map) Len() int {
|
||||
return m.length
|
||||
}
|
||||
|
||||
// Delete deletes a value for a key.
|
||||
// Returns the deleted value, or false when no value was assigned.
|
||||
func (m *Map) Delete(key string) (interface{}, bool) {
|
||||
if len(m.buckets) == 0 {
|
||||
return nil, false
|
||||
}
|
||||
hash := m.hash(key)
|
||||
i := hash & m.mask
|
||||
for {
|
||||
if m.buckets[i].dib() == 0 {
|
||||
return nil, false
|
||||
}
|
||||
if m.buckets[i].hash() == hash && m.buckets[i].key == key {
|
||||
old := m.buckets[i].value
|
||||
m.remove(i)
|
||||
return old, true
|
||||
}
|
||||
i = (i + 1) & m.mask
|
||||
}
|
||||
}
|
||||
|
||||
func (m *Map) remove(i int) {
|
||||
m.buckets[i].setDIB(0)
|
||||
for {
|
||||
pi := i
|
||||
i = (i + 1) & m.mask
|
||||
if m.buckets[i].dib() <= 1 {
|
||||
m.buckets[pi] = entry{}
|
||||
break
|
||||
}
|
||||
m.buckets[pi] = m.buckets[i]
|
||||
m.buckets[pi].setDIB(m.buckets[pi].dib() - 1)
|
||||
}
|
||||
m.length--
|
||||
if len(m.buckets) > m.cap && m.length <= m.shrinkAt {
|
||||
m.resize(m.length)
|
||||
}
|
||||
}
|
||||
|
||||
// Range iterates over all key/values.
|
||||
// It's not safe to call or Set or Delete while ranging.
|
||||
func (m *Map) Range(iter func(key string, value interface{}) bool) {
|
||||
for i := 0; i < len(m.buckets); i++ {
|
||||
if m.buckets[i].dib() > 0 {
|
||||
if !iter(m.buckets[i].key, m.buckets[i].value) {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// GetPos gets a single keys/value nearby a position
|
||||
// The pos param can be any valid uint64. Useful for grabbing a random item
|
||||
// from the map.
|
||||
// It's not safe to call or Set or Delete while ranging.
|
||||
func (m *Map) GetPos(pos uint64) (key string, value interface{}, ok bool) {
|
||||
for i := 0; i < len(m.buckets); i++ {
|
||||
index := (pos + uint64(i)) & uint64(m.mask)
|
||||
if m.buckets[index].dib() > 0 {
|
||||
return m.buckets[index].key, m.buckets[index].value, true
|
||||
}
|
||||
}
|
||||
return "", nil, false
|
||||
}
|
|
@ -0,0 +1,324 @@
|
|||
// Copyright 2019 Joshua J Baker. All rights reserved.
|
||||
// Use of this source code is governed by an ISC-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package rhh
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"os"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/tidwall/lotsa"
|
||||
)
|
||||
|
||||
type keyT = string
|
||||
type valueT = interface{}
|
||||
|
||||
func k(key int) keyT {
|
||||
return strconv.FormatInt(int64(key), 10)
|
||||
}
|
||||
|
||||
func add(x keyT, delta int) int {
|
||||
i, err := strconv.ParseInt(x, 10, 64)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return int(i + int64(delta))
|
||||
}
|
||||
|
||||
///////////////////////////
|
||||
func random(N int, perm bool) []keyT {
|
||||
nums := make([]keyT, N)
|
||||
if perm {
|
||||
for i, x := range rand.Perm(N) {
|
||||
nums[i] = k(x)
|
||||
}
|
||||
} else {
|
||||
m := make(map[keyT]bool)
|
||||
for len(m) < N {
|
||||
m[k(int(rand.Uint64()))] = true
|
||||
}
|
||||
var i int
|
||||
for k := range m {
|
||||
nums[i] = k
|
||||
i++
|
||||
}
|
||||
}
|
||||
return nums
|
||||
}
|
||||
|
||||
func shuffle(nums []keyT) {
|
||||
for i := range nums {
|
||||
j := rand.Intn(i + 1)
|
||||
nums[i], nums[j] = nums[j], nums[i]
|
||||
}
|
||||
}
|
||||
|
||||
func init() {
|
||||
//var seed int64 = 1519776033517775607
|
||||
seed := (time.Now().UnixNano())
|
||||
println("seed:", seed)
|
||||
rand.Seed(seed)
|
||||
}
|
||||
|
||||
func TestRandomData(t *testing.T) {
|
||||
N := 10000
|
||||
start := time.Now()
|
||||
for time.Since(start) < time.Second*2 {
|
||||
nums := random(N, true)
|
||||
var m *Map
|
||||
switch rand.Int() % 5 {
|
||||
default:
|
||||
m = New(N / ((rand.Int() % 3) + 1))
|
||||
case 1:
|
||||
m = new(Map)
|
||||
case 2:
|
||||
m = New(0)
|
||||
}
|
||||
v, ok := m.Get(k(999))
|
||||
if ok || v != nil {
|
||||
t.Fatalf("expected %v, got %v", nil, v)
|
||||
}
|
||||
v, ok = m.Delete(k(999))
|
||||
if ok || v != nil {
|
||||
t.Fatalf("expected %v, got %v", nil, v)
|
||||
}
|
||||
if m.Len() != 0 {
|
||||
t.Fatalf("expected %v, got %v", 0, m.Len())
|
||||
}
|
||||
// set a bunch of items
|
||||
for i := 0; i < len(nums); i++ {
|
||||
v, ok := m.Set(nums[i], nums[i])
|
||||
if ok || v != nil {
|
||||
t.Fatalf("expected %v, got %v", nil, v)
|
||||
}
|
||||
}
|
||||
if m.Len() != N {
|
||||
t.Fatalf("expected %v, got %v", N, m.Len())
|
||||
}
|
||||
// retrieve all the items
|
||||
shuffle(nums)
|
||||
for i := 0; i < len(nums); i++ {
|
||||
v, ok := m.Get(nums[i])
|
||||
if !ok || v == nil || v != nums[i] {
|
||||
t.Fatalf("expected %v, got %v", nums[i], v)
|
||||
}
|
||||
}
|
||||
// replace all the items
|
||||
shuffle(nums)
|
||||
for i := 0; i < len(nums); i++ {
|
||||
v, ok := m.Set(nums[i], add(nums[i], 1))
|
||||
if !ok || v != nums[i] {
|
||||
t.Fatalf("expected %v, got %v", nums[i], v)
|
||||
}
|
||||
}
|
||||
if m.Len() != N {
|
||||
t.Fatalf("expected %v, got %v", N, m.Len())
|
||||
}
|
||||
// retrieve all the items
|
||||
shuffle(nums)
|
||||
for i := 0; i < len(nums); i++ {
|
||||
v, ok := m.Get(nums[i])
|
||||
if !ok || v != add(nums[i], 1) {
|
||||
t.Fatalf("expected %v, got %v", add(nums[i], 1), v)
|
||||
}
|
||||
}
|
||||
// remove half the items
|
||||
shuffle(nums)
|
||||
for i := 0; i < len(nums)/2; i++ {
|
||||
v, ok := m.Delete(nums[i])
|
||||
if !ok || v != add(nums[i], 1) {
|
||||
t.Fatalf("expected %v, got %v", add(nums[i], 1), v)
|
||||
}
|
||||
}
|
||||
if m.Len() != N/2 {
|
||||
t.Fatalf("expected %v, got %v", N/2, m.Len())
|
||||
}
|
||||
// check to make sure that the items have been removed
|
||||
for i := 0; i < len(nums)/2; i++ {
|
||||
v, ok := m.Get(nums[i])
|
||||
if ok || v != nil {
|
||||
t.Fatalf("expected %v, got %v", nil, v)
|
||||
}
|
||||
}
|
||||
// check the second half of the items
|
||||
for i := len(nums) / 2; i < len(nums); i++ {
|
||||
v, ok := m.Get(nums[i])
|
||||
if !ok || v != add(nums[i], 1) {
|
||||
t.Fatalf("expected %v, got %v", add(nums[i], 1), v)
|
||||
}
|
||||
}
|
||||
// try to delete again, make sure they don't exist
|
||||
for i := 0; i < len(nums)/2; i++ {
|
||||
v, ok := m.Delete(nums[i])
|
||||
if ok || v != nil {
|
||||
t.Fatalf("expected %v, got %v", nil, v)
|
||||
}
|
||||
}
|
||||
if m.Len() != N/2 {
|
||||
t.Fatalf("expected %v, got %v", N/2, m.Len())
|
||||
}
|
||||
m.Range(func(key keyT, value valueT) bool {
|
||||
if value != add(key, 1) {
|
||||
t.Fatalf("expected %v, got %v", add(key, 1), value)
|
||||
}
|
||||
return true
|
||||
})
|
||||
var n int
|
||||
m.Range(func(key keyT, value valueT) bool {
|
||||
n++
|
||||
return false
|
||||
})
|
||||
if n != 1 {
|
||||
t.Fatalf("expected %v, got %v", 1, n)
|
||||
}
|
||||
for i := len(nums) / 2; i < len(nums); i++ {
|
||||
v, ok := m.Delete(nums[i])
|
||||
if !ok || v != add(nums[i], 1) {
|
||||
t.Fatalf("expected %v, got %v", add(nums[i], 1), v)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestBench(t *testing.T) {
|
||||
N, _ := strconv.ParseUint(os.Getenv("MAPBENCH"), 10, 64)
|
||||
if N == 0 {
|
||||
fmt.Printf("Enable benchmarks with MAPBENCH=1000000\n")
|
||||
return
|
||||
}
|
||||
nums := random(int(N), false)
|
||||
var pnums []valueT
|
||||
for i := range nums {
|
||||
pnums = append(pnums, valueT(&nums[i]))
|
||||
}
|
||||
fmt.Printf("\n## STRING KEYS\n\n")
|
||||
t.Run("RobinHood", func(t *testing.T) {
|
||||
testPerf(nums, pnums, "robinhood")
|
||||
})
|
||||
t.Run("Stdlib", func(t *testing.T) {
|
||||
testPerf(nums, pnums, "stdlib")
|
||||
})
|
||||
}
|
||||
|
||||
func printItem(s string, size int, dir int) {
|
||||
for len(s) < size {
|
||||
if dir == -1 {
|
||||
s += " "
|
||||
} else {
|
||||
s = " " + s
|
||||
}
|
||||
}
|
||||
fmt.Printf("%s ", s)
|
||||
}
|
||||
|
||||
func testPerf(nums []keyT, pnums []valueT, which keyT) {
|
||||
var ms1, ms2 runtime.MemStats
|
||||
initSize := 0 //len(nums) * 2
|
||||
defer func() {
|
||||
heapBytes := int(ms2.HeapAlloc - ms1.HeapAlloc)
|
||||
fmt.Printf("memory %13s bytes %19s/entry \n",
|
||||
commaize(heapBytes), commaize(heapBytes/len(nums)))
|
||||
fmt.Printf("\n")
|
||||
}()
|
||||
runtime.GC()
|
||||
time.Sleep(time.Millisecond * 100)
|
||||
runtime.ReadMemStats(&ms1)
|
||||
|
||||
var setop, getop, delop func(int, int)
|
||||
var scnop func()
|
||||
switch which {
|
||||
case "stdlib":
|
||||
m := make(map[keyT]valueT, initSize)
|
||||
setop = func(i, _ int) { m[nums[i]] = pnums[i] }
|
||||
getop = func(i, _ int) { _ = m[nums[i]] }
|
||||
delop = func(i, _ int) { delete(m, nums[i]) }
|
||||
scnop = func() {
|
||||
for range m {
|
||||
}
|
||||
}
|
||||
case "robinhood":
|
||||
m := New(initSize)
|
||||
setop = func(i, _ int) { m.Set(nums[i], pnums[i]) }
|
||||
getop = func(i, _ int) { m.Get(nums[i]) }
|
||||
delop = func(i, _ int) { m.Delete(nums[i]) }
|
||||
scnop = func() {
|
||||
m.Range(func(key keyT, value valueT) bool {
|
||||
return true
|
||||
})
|
||||
}
|
||||
}
|
||||
fmt.Printf("-- %s --", which)
|
||||
fmt.Printf("\n")
|
||||
|
||||
ops := []func(int, int){setop, getop, setop, nil, delop}
|
||||
tags := []keyT{"set", "get", "reset", "scan", "delete"}
|
||||
for i := range ops {
|
||||
shuffle(nums)
|
||||
var na bool
|
||||
var n int
|
||||
start := time.Now()
|
||||
if tags[i] == "scan" {
|
||||
op := scnop
|
||||
if op == nil {
|
||||
na = true
|
||||
} else {
|
||||
n = 20
|
||||
lotsa.Ops(n, 1, func(_, _ int) { op() })
|
||||
}
|
||||
|
||||
} else {
|
||||
n = len(nums)
|
||||
lotsa.Ops(n, 1, ops[i])
|
||||
}
|
||||
dur := time.Since(start)
|
||||
if i == 0 {
|
||||
runtime.GC()
|
||||
time.Sleep(time.Millisecond * 100)
|
||||
runtime.ReadMemStats(&ms2)
|
||||
}
|
||||
printItem(tags[i], 9, -1)
|
||||
if na {
|
||||
printItem("-- unavailable --", 14, 1)
|
||||
} else {
|
||||
if n == -1 {
|
||||
printItem("unknown ops", 14, 1)
|
||||
} else {
|
||||
printItem(fmt.Sprintf("%s ops", commaize(n)), 14, 1)
|
||||
}
|
||||
printItem(fmt.Sprintf("%.0fms", dur.Seconds()*1000), 8, 1)
|
||||
if n != -1 {
|
||||
printItem(fmt.Sprintf("%s/sec", commaize(int(float64(n)/dur.Seconds()))), 18, 1)
|
||||
}
|
||||
}
|
||||
fmt.Printf("\n")
|
||||
}
|
||||
}
|
||||
|
||||
func commaize(n int) string {
|
||||
s1, s2 := fmt.Sprintf("%d", n), ""
|
||||
for i, j := len(s1)-1, 0; i >= 0; i, j = i-1, j+1 {
|
||||
if j%3 == 0 && j != 0 {
|
||||
s2 = "," + s2
|
||||
}
|
||||
s2 = string(s1[i]) + s2
|
||||
}
|
||||
return s2
|
||||
}
|
||||
|
||||
func TestHashDIB(t *testing.T) {
|
||||
var e entry
|
||||
e.setDIB(100)
|
||||
e.setHash(90000)
|
||||
if e.dib() != 100 {
|
||||
t.Fatalf("expected %v, got %v", 100, e.dib())
|
||||
}
|
||||
if e.hash() != 90000 {
|
||||
t.Fatalf("expected %v, got %v", 90000, e.hash())
|
||||
}
|
||||
}
|
|
@ -0,0 +1,185 @@
|
|||
// Copyright 2019 Joshua J Baker. All rights reserved.
|
||||
// Use of this source code is governed by an ISC-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package rhh
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"unsafe"
|
||||
|
||||
"github.com/cespare/xxhash"
|
||||
)
|
||||
|
||||
type entryU64 struct {
|
||||
hdib uint64 // bitfield { hash:48 dib:16 }
|
||||
key uint64 // user key
|
||||
value interface{} // user value
|
||||
}
|
||||
|
||||
func (e *entryU64) dib() int {
|
||||
return int(e.hdib & maxDIB)
|
||||
}
|
||||
func (e *entryU64) hash() int {
|
||||
return int(e.hdib >> dibBitSize)
|
||||
}
|
||||
func (e *entryU64) setDIB(dib int) {
|
||||
e.hdib = e.hdib>>dibBitSize<<dibBitSize | uint64(dib)&maxDIB
|
||||
}
|
||||
func (e *entryU64) setHash(hash int) {
|
||||
e.hdib = uint64(hash)<<dibBitSize | e.hdib&maxDIB
|
||||
}
|
||||
|
||||
// hash returns a 48-bit hash for 64-bit environments, or 32-bit hash for
|
||||
// 32-bit environments.
|
||||
func (m *MapU64) hash(key uint64) int {
|
||||
return int(xxhash.Sum64(*(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{
|
||||
Data: uintptr(unsafe.Pointer(&key)), Len: 8, Cap: 8,
|
||||
}))) >> dibBitSize)
|
||||
}
|
||||
|
||||
// MapU64 is a map. Like map[uint64]interface{}
|
||||
type MapU64 struct {
|
||||
cap int
|
||||
length int
|
||||
mask int
|
||||
growAt int
|
||||
shrinkAt int
|
||||
buckets []entryU64
|
||||
}
|
||||
|
||||
// NewU64 returns a new map. Like map[uint64]interface{}
|
||||
func NewU64(cap int) *MapU64 {
|
||||
m := new(MapU64)
|
||||
m.cap = cap
|
||||
sz := 8
|
||||
for sz < m.cap {
|
||||
sz *= 2
|
||||
}
|
||||
m.buckets = make([]entryU64, sz)
|
||||
m.mask = len(m.buckets) - 1
|
||||
m.growAt = int(float64(len(m.buckets)) * loadFactor)
|
||||
m.shrinkAt = int(float64(len(m.buckets)) * (1 - loadFactor))
|
||||
return m
|
||||
}
|
||||
|
||||
func (m *MapU64) resize(newCap int) {
|
||||
nmap := NewU64(newCap)
|
||||
for i := 0; i < len(m.buckets); i++ {
|
||||
if m.buckets[i].dib() > 0 {
|
||||
nmap.set(m.buckets[i].hash(), m.buckets[i].key, m.buckets[i].value)
|
||||
}
|
||||
}
|
||||
cap := m.cap
|
||||
*m = *nmap
|
||||
m.cap = cap
|
||||
}
|
||||
|
||||
// Set assigns a value to a key.
|
||||
// Returns the previous value, or false when no value was assigned.
|
||||
func (m *MapU64) Set(key uint64, value interface{}) (interface{}, bool) {
|
||||
if len(m.buckets) == 0 {
|
||||
*m = *NewU64(0)
|
||||
}
|
||||
if m.length >= m.growAt {
|
||||
m.resize(len(m.buckets) * 2)
|
||||
}
|
||||
return m.set(m.hash(key), key, value)
|
||||
}
|
||||
|
||||
func (m *MapU64) set(hash int, key uint64, value interface{}) (interface{}, bool) {
|
||||
e := entryU64{makeHDIB(hash, 1), key, value}
|
||||
i := e.hash() & m.mask
|
||||
for {
|
||||
if m.buckets[i].dib() == 0 {
|
||||
m.buckets[i] = e
|
||||
m.length++
|
||||
return nil, false
|
||||
}
|
||||
if e.hash() == m.buckets[i].hash() && e.key == m.buckets[i].key {
|
||||
old := m.buckets[i].value
|
||||
m.buckets[i].value = e.value
|
||||
return old, true
|
||||
}
|
||||
if m.buckets[i].dib() < e.dib() {
|
||||
e, m.buckets[i] = m.buckets[i], e
|
||||
}
|
||||
i = (i + 1) & m.mask
|
||||
e.setDIB(e.dib() + 1)
|
||||
}
|
||||
}
|
||||
|
||||
// Get returns a value for a key.
|
||||
// Returns false when no value has been assign for key.
|
||||
func (m *MapU64) Get(key uint64) (interface{}, bool) {
|
||||
if len(m.buckets) == 0 {
|
||||
return nil, false
|
||||
}
|
||||
hash := m.hash(key)
|
||||
i := hash & m.mask
|
||||
for {
|
||||
if m.buckets[i].dib() == 0 {
|
||||
return nil, false
|
||||
}
|
||||
if m.buckets[i].hash() == hash && m.buckets[i].key == key {
|
||||
return m.buckets[i].value, true
|
||||
}
|
||||
i = (i + 1) & m.mask
|
||||
}
|
||||
}
|
||||
|
||||
// Len returns the number of values in map.
|
||||
func (m *MapU64) Len() int {
|
||||
return m.length
|
||||
}
|
||||
|
||||
// Delete deletes a value for a key.
|
||||
// Returns the deleted value, or false when no value was assigned.
|
||||
func (m *MapU64) Delete(key uint64) (interface{}, bool) {
|
||||
if len(m.buckets) == 0 {
|
||||
return nil, false
|
||||
}
|
||||
hash := m.hash(key)
|
||||
i := hash & m.mask
|
||||
for {
|
||||
if m.buckets[i].dib() == 0 {
|
||||
return nil, false
|
||||
}
|
||||
if m.buckets[i].hash() == hash && m.buckets[i].key == key {
|
||||
old := m.buckets[i].value
|
||||
m.remove(i)
|
||||
return old, true
|
||||
}
|
||||
i = (i + 1) & m.mask
|
||||
}
|
||||
}
|
||||
|
||||
func (m *MapU64) remove(i int) {
|
||||
m.buckets[i].setDIB(0)
|
||||
for {
|
||||
pi := i
|
||||
i = (i + 1) & m.mask
|
||||
if m.buckets[i].dib() <= 1 {
|
||||
m.buckets[pi] = entryU64{}
|
||||
break
|
||||
}
|
||||
m.buckets[pi] = m.buckets[i]
|
||||
m.buckets[pi].setDIB(m.buckets[pi].dib() - 1)
|
||||
}
|
||||
m.length--
|
||||
if len(m.buckets) > m.cap && m.length <= m.shrinkAt {
|
||||
m.resize(m.length)
|
||||
}
|
||||
}
|
||||
|
||||
// Range iterates overall all key/values.
|
||||
// It's not safe to call or Set or Delete while ranging.
|
||||
func (m *MapU64) Range(iter func(key uint64, value interface{}) bool) {
|
||||
for i := 0; i < len(m.buckets); i++ {
|
||||
if m.buckets[i].dib() > 0 {
|
||||
if !iter(m.buckets[i].key, m.buckets[i].value) {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,298 @@
|
|||
// Copyright 2019 Joshua J Baker. All rights reserved.
|
||||
// Use of this source code is governed by an ISC-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package rhh
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"os"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/tidwall/lotsa"
|
||||
)
|
||||
|
||||
type keyTU64 = uint64
|
||||
type valueTU64 = interface{}
|
||||
|
||||
func kU64(key int) keyTU64 {
|
||||
return uint64(key)
|
||||
}
|
||||
|
||||
func addU64(x keyTU64, delta int) int {
|
||||
return int(x) + delta
|
||||
}
|
||||
|
||||
///////////////////////////
|
||||
func randomU64(N int, perm bool) []keyTU64 {
|
||||
nums := make([]keyTU64, N)
|
||||
if perm {
|
||||
for i, x := range rand.Perm(N) {
|
||||
nums[i] = kU64(x)
|
||||
}
|
||||
} else {
|
||||
m := make(map[keyTU64]bool)
|
||||
for len(m) < N {
|
||||
m[kU64(int(rand.Uint64()))] = true
|
||||
}
|
||||
var i int
|
||||
for k := range m {
|
||||
nums[i] = k
|
||||
i++
|
||||
}
|
||||
}
|
||||
return nums
|
||||
}
|
||||
|
||||
func shuffleU64(nums []keyTU64) {
|
||||
for i := range nums {
|
||||
j := rand.Intn(i + 1)
|
||||
nums[i], nums[j] = nums[j], nums[i]
|
||||
}
|
||||
}
|
||||
|
||||
func init() {
|
||||
//var seed int64 = 1519776033517775607
|
||||
seed := (time.Now().UnixNano())
|
||||
println("seed:", seed)
|
||||
rand.Seed(seed)
|
||||
}
|
||||
|
||||
func TestRandomDataU64(t *testing.T) {
|
||||
N := 10000
|
||||
start := time.Now()
|
||||
for time.Since(start) < time.Second*2 {
|
||||
nums := randomU64(N, true)
|
||||
var m *MapU64
|
||||
switch rand.Int() % 5 {
|
||||
default:
|
||||
m = NewU64(N / ((rand.Int() % 3) + 1))
|
||||
case 1:
|
||||
m = new(MapU64)
|
||||
case 2:
|
||||
m = NewU64(0)
|
||||
}
|
||||
v, ok := m.Get(kU64(999))
|
||||
if ok || v != nil {
|
||||
t.Fatalf("expected %v, got %v", nil, v)
|
||||
}
|
||||
v, ok = m.Delete(kU64(999))
|
||||
if ok || v != nil {
|
||||
t.Fatalf("expected %v, got %v", nil, v)
|
||||
}
|
||||
if m.Len() != 0 {
|
||||
t.Fatalf("expected %v, got %v", 0, m.Len())
|
||||
}
|
||||
// set a bunch of items
|
||||
for i := 0; i < len(nums); i++ {
|
||||
v, ok := m.Set(nums[i], nums[i])
|
||||
if ok || v != nil {
|
||||
t.Fatalf("expected %v, got %v", nil, v)
|
||||
}
|
||||
}
|
||||
if m.Len() != N {
|
||||
t.Fatalf("expected %v, got %v", N, m.Len())
|
||||
}
|
||||
// retrieve all the items
|
||||
shuffleU64(nums)
|
||||
for i := 0; i < len(nums); i++ {
|
||||
v, ok := m.Get(nums[i])
|
||||
if !ok || v == nil || v != nums[i] {
|
||||
t.Fatalf("expected %v, got %v", nums[i], v)
|
||||
}
|
||||
}
|
||||
// replace all the items
|
||||
shuffleU64(nums)
|
||||
for i := 0; i < len(nums); i++ {
|
||||
v, ok := m.Set(nums[i], addU64(nums[i], 1))
|
||||
if !ok || v != nums[i] {
|
||||
t.Fatalf("expected %v, got %v", nums[i], v)
|
||||
}
|
||||
}
|
||||
if m.Len() != N {
|
||||
t.Fatalf("expected %v, got %v", N, m.Len())
|
||||
}
|
||||
// retrieve all the items
|
||||
shuffleU64(nums)
|
||||
for i := 0; i < len(nums); i++ {
|
||||
v, ok := m.Get(nums[i])
|
||||
if !ok || v != addU64(nums[i], 1) {
|
||||
t.Fatalf("expected %v, got %v", addU64(nums[i], 1), v)
|
||||
}
|
||||
}
|
||||
// remove half the items
|
||||
shuffleU64(nums)
|
||||
for i := 0; i < len(nums)/2; i++ {
|
||||
v, ok := m.Delete(nums[i])
|
||||
if !ok || v != addU64(nums[i], 1) {
|
||||
t.Fatalf("expected %v, got %v", addU64(nums[i], 1), v)
|
||||
}
|
||||
}
|
||||
if m.Len() != N/2 {
|
||||
t.Fatalf("expected %v, got %v", N/2, m.Len())
|
||||
}
|
||||
// check to make sure that the items have been removed
|
||||
for i := 0; i < len(nums)/2; i++ {
|
||||
v, ok := m.Get(nums[i])
|
||||
if ok || v != nil {
|
||||
t.Fatalf("expected %v, got %v", nil, v)
|
||||
}
|
||||
}
|
||||
// check the second half of the items
|
||||
for i := len(nums) / 2; i < len(nums); i++ {
|
||||
v, ok := m.Get(nums[i])
|
||||
if !ok || v != addU64(nums[i], 1) {
|
||||
t.Fatalf("expected %v, got %v", addU64(nums[i], 1), v)
|
||||
}
|
||||
}
|
||||
// try to delete again, make sure they don't exist
|
||||
for i := 0; i < len(nums)/2; i++ {
|
||||
v, ok := m.Delete(nums[i])
|
||||
if ok || v != nil {
|
||||
t.Fatalf("expected %v, got %v", nil, v)
|
||||
}
|
||||
}
|
||||
if m.Len() != N/2 {
|
||||
t.Fatalf("expected %v, got %v", N/2, m.Len())
|
||||
}
|
||||
m.Range(func(key keyTU64, value valueTU64) bool {
|
||||
if value != addU64(key, 1) {
|
||||
t.Fatalf("expected %v, got %v", addU64(key, 1), value)
|
||||
}
|
||||
return true
|
||||
})
|
||||
var n int
|
||||
m.Range(func(key keyTU64, value valueTU64) bool {
|
||||
n++
|
||||
return false
|
||||
})
|
||||
if n != 1 {
|
||||
t.Fatalf("expected %v, got %v", 1, n)
|
||||
}
|
||||
for i := len(nums) / 2; i < len(nums); i++ {
|
||||
v, ok := m.Delete(nums[i])
|
||||
if !ok || v != addU64(nums[i], 1) {
|
||||
t.Fatalf("expected %v, got %v", addU64(nums[i], 1), v)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestBenchU64(t *testing.T) {
|
||||
N, _ := strconv.ParseUint(os.Getenv("MAPBENCH"), 10, 64)
|
||||
if N == 0 {
|
||||
fmt.Printf("Enable benchmarks with MAPBENCH=1000000\n")
|
||||
return
|
||||
}
|
||||
nums := randomU64(int(N), false)
|
||||
var pnums []valueTU64
|
||||
for i := range nums {
|
||||
pnums = append(pnums, valueTU64(&nums[i]))
|
||||
}
|
||||
fmt.Printf("\n## UINT64 KEYS\n\n")
|
||||
t.Run("RobinHood", func(t *testing.T) {
|
||||
testPerfU64(nums, pnums, "robinhood")
|
||||
})
|
||||
t.Run("Stdlib", func(t *testing.T) {
|
||||
testPerfU64(nums, pnums, "stdlib")
|
||||
})
|
||||
}
|
||||
|
||||
func testPerfU64(nums []keyTU64, pnums []valueTU64, which string) {
|
||||
var ms1, ms2 runtime.MemStats
|
||||
initSize := 0 //len(nums) * 2
|
||||
defer func() {
|
||||
heapBytes := int(ms2.HeapAlloc - ms1.HeapAlloc)
|
||||
fmt.Printf("memory %13s bytes %19s/entry \n",
|
||||
commaize(heapBytes), commaize(heapBytes/len(nums)))
|
||||
fmt.Printf("\n")
|
||||
}()
|
||||
runtime.GC()
|
||||
time.Sleep(time.Millisecond * 100)
|
||||
runtime.ReadMemStats(&ms1)
|
||||
|
||||
var setop, getop, delop func(int, int)
|
||||
var scnop func()
|
||||
switch which {
|
||||
case "stdlib":
|
||||
m := make(map[keyTU64]valueTU64, initSize)
|
||||
setop = func(i, _ int) { m[nums[i]] = pnums[i] }
|
||||
getop = func(i, _ int) { _ = m[nums[i]] }
|
||||
delop = func(i, _ int) { delete(m, nums[i]) }
|
||||
scnop = func() {
|
||||
for range m {
|
||||
}
|
||||
}
|
||||
case "robinhood":
|
||||
m := NewU64(initSize)
|
||||
setop = func(i, _ int) { m.Set(nums[i], pnums[i]) }
|
||||
getop = func(i, _ int) { m.Get(nums[i]) }
|
||||
delop = func(i, _ int) { m.Delete(nums[i]) }
|
||||
scnop = func() {
|
||||
m.Range(func(key keyTU64, value valueTU64) bool {
|
||||
return true
|
||||
})
|
||||
}
|
||||
}
|
||||
fmt.Printf("-- %s --", which)
|
||||
fmt.Printf("\n")
|
||||
|
||||
ops := []func(int, int){setop, getop, setop, nil, delop}
|
||||
tags := []string{"set", "get", "reset", "scan", "delete"}
|
||||
for i := range ops {
|
||||
shuffleU64(nums)
|
||||
var na bool
|
||||
var n int
|
||||
start := time.Now()
|
||||
if tags[i] == "scan" {
|
||||
op := scnop
|
||||
if op == nil {
|
||||
na = true
|
||||
} else {
|
||||
n = 20
|
||||
lotsa.Ops(n, 1, func(_, _ int) { op() })
|
||||
}
|
||||
|
||||
} else {
|
||||
n = len(nums)
|
||||
lotsa.Ops(n, 1, ops[i])
|
||||
}
|
||||
dur := time.Since(start)
|
||||
if i == 0 {
|
||||
runtime.GC()
|
||||
time.Sleep(time.Millisecond * 100)
|
||||
runtime.ReadMemStats(&ms2)
|
||||
}
|
||||
printItem(tags[i], 9, -1)
|
||||
if na {
|
||||
printItem("-- unavailable --", 14, 1)
|
||||
} else {
|
||||
if n == -1 {
|
||||
printItem("unknown ops", 14, 1)
|
||||
} else {
|
||||
printItem(fmt.Sprintf("%s ops", commaize(n)), 14, 1)
|
||||
}
|
||||
printItem(fmt.Sprintf("%.0fms", dur.Seconds()*1000), 8, 1)
|
||||
if n != -1 {
|
||||
printItem(fmt.Sprintf("%s/sec", commaize(int(float64(n)/dur.Seconds()))), 18, 1)
|
||||
}
|
||||
}
|
||||
fmt.Printf("\n")
|
||||
}
|
||||
}
|
||||
|
||||
func TestHashDIBU64(t *testing.T) {
|
||||
var e entryU64
|
||||
e.setDIB(100)
|
||||
e.setHash(90000)
|
||||
if e.dib() != 100 {
|
||||
t.Fatalf("expected %v, got %v", 100, e.dib())
|
||||
}
|
||||
if e.hash() != 90000 {
|
||||
t.Fatalf("expected %v, got %v", 90000, e.hash())
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue