update godep

This commit is contained in:
siddontang 2015-06-18 16:23:42 +08:00
parent c80cda80d8
commit a298919dd0
55 changed files with 2846 additions and 1747 deletions

17
Godeps/Godeps.json generated
View File

@ -1,18 +1,19 @@
{
"ImportPath": "github.com/siddontang/ledisdb",
"GoVersion": "go1.3.3",
"GoVersion": "go1.4.2",
"Packages": [
"./..."
],
"Deps": [
{
"ImportPath": "github.com/BurntSushi/toml",
"Comment": "v0.1.0",
"Rev": "2ceedfee35ad3848e49308ab0c9a4f640cfb5fb2"
},
{
"ImportPath": "github.com/boltdb/bolt",
"Comment": "v1.0-62-gee95430",
"Rev": "ee954308d64186f0fc9b7022b6178977848c17a3"
"Comment": "v1.0-111-g04a3e85",
"Rev": "04a3e85793043e76d41164037d0d7f9d53eecae3"
},
{
"ImportPath": "github.com/cupcake/rdb",
@ -22,6 +23,10 @@
"ImportPath": "github.com/edsrzf/mmap-go",
"Rev": "6c75090c55983bef2e129e173681b20d24871ef8"
},
{
"ImportPath": "github.com/google/go-snappy/snappy",
"Rev": "eaa750b9bf4dcb7cb20454be850613b66cda3273"
},
{
"ImportPath": "github.com/siddontang/go/bson",
"Rev": "530a23162549a31baa14dfa3b647a9eccee8878f"
@ -64,11 +69,7 @@
},
{
"ImportPath": "github.com/syndtr/goleveldb/leveldb",
"Rev": "4875955338b0a434238a31165cb87255ab6e9e4a"
},
{
"ImportPath": "github.com/syndtr/gosnappy/snappy",
"Rev": "156a073208e131d7d2e212cb749feae7c339e846"
"Rev": "a06509502ca32565bdf74afc1e573050023f261c"
},
{
"ImportPath": "github.com/ugorji/go/codec",

View File

@ -1,3 +1,4 @@
*.prof
*.test
*.swp
/bin/

View File

@ -87,6 +87,11 @@ are not thread safe. To work with data in multiple goroutines you must start
a transaction for each one or use locking to ensure only one goroutine accesses
a transaction at a time. Creating transaction from the `DB` is thread safe.
Read-only transactions and read-write transactions should not depend on one
another and generally shouldn't be opened simultaneously in the same goroutine.
This can cause a deadlock as the read-write transaction needs to periodically
re-map the data file but it cannot do so while a read-only transaction is open.
#### Read-write transactions
@ -446,6 +451,21 @@ It's also useful to pipe these stats to a service such as statsd for monitoring
or to provide an HTTP endpoint that will perform a fixed-length sample.
### Read-Only Mode
Sometimes it is useful to create a shared, read-only Bolt database. To this,
set the `Options.ReadOnly` flag when opening your database. Read-only mode
uses a shared lock to allow multiple processes to read from the database but
it will block any processes from opening the database in read-write mode.
```go
db, err := bolt.Open("my.db", 0666, &bolt.Options{ReadOnly: true})
if err != nil {
log.Fatal(err)
}
```
## Resources
For more information on getting started with Bolt, check out the following articles:
@ -550,6 +570,11 @@ Here are a few things to note when evaluating and using Bolt:
However, this is expected and the OS will release memory as needed. Bolt can
handle databases much larger than the available physical RAM.
* The data structures in the Bolt database are memory mapped so the data file
will be endian specific. This means that you cannot copy a Bolt file from a
little endian machine to a big endian machine and have it work. For most
users this is not a concern since most modern CPUs are little endian.
* Because of the way pages are laid out on disk, Bolt cannot truncate data files
and return free pages back to the disk. Instead, Bolt maintains a free list
of unused pages within its data file. These free pages can be reused by later
@ -586,5 +611,10 @@ Below is a list of public, open source projects that use Bolt:
* [tentacool](https://github.com/optiflows/tentacool) - REST api server to manage system stuff (IP, DNS, Gateway...) on a linux server.
* [SkyDB](https://github.com/skydb/sky) - Behavioral analytics database.
* [Seaweed File System](https://github.com/chrislusf/weed-fs) - Highly scalable distributed key~file system with O(1) disk read.
* [InfluxDB](http://influxdb.com) - Scalable datastore for metrics, events, and real-time analytics.
* [Freehold](http://tshannon.bitbucket.org/freehold/) - An open, secure, and lightweight platform for your files and data.
* [Prometheus Annotation Server](https://github.com/oliver006/prom_annotation_server) - Annotation server for PromDash & Prometheus service monitoring system.
* [Consul](https://github.com/hashicorp/consul) - Consul is service discovery and configuration made easy. Distributed, highly available, and datacenter-aware.
* [Kala](https://github.com/ajvb/kala) - Kala is a modern job scheduler optimized to run on a single node. It is persistant, JSON over HTTP API, ISO 8601 duration notation, and dependent jobs.
If you are using Bolt in a project please send a pull request to add it to the list.

View File

@ -20,6 +20,9 @@ import (
// take permanent effect only after a successful return is seen in
// caller.
//
// The maximum batch size and delay can be adjusted with DB.MaxBatchSize
// and DB.MaxBatchDelay, respectively.
//
// Batch is only useful when there are multiple goroutines calling it.
func (db *DB) Batch(fn func(*Tx) error) error {
errCh := make(chan error, 1)

View File

@ -11,7 +11,7 @@ import (
)
// flock acquires an advisory lock on a file descriptor.
func flock(f *os.File, timeout time.Duration) error {
func flock(f *os.File, exclusive bool, timeout time.Duration) error {
var t time.Time
for {
// If we're beyond our timeout then return an error.
@ -21,9 +21,13 @@ func flock(f *os.File, timeout time.Duration) error {
} else if timeout > 0 && time.Since(t) > timeout {
return ErrTimeout
}
flag := syscall.LOCK_SH
if exclusive {
flag = syscall.LOCK_EX
}
// Otherwise attempt to obtain an exclusive lock.
err := syscall.Flock(int(f.Fd()), syscall.LOCK_EX|syscall.LOCK_NB)
err := syscall.Flock(int(f.Fd()), flag|syscall.LOCK_NB)
if err == nil {
return nil
} else if err != syscall.EWOULDBLOCK {
@ -44,12 +48,14 @@ func funlock(f *os.File) error {
func mmap(db *DB, sz int) error {
// Truncate and fsync to ensure file size metadata is flushed.
// https://github.com/boltdb/bolt/issues/284
if !db.NoGrowSync && !db.readOnly {
if err := db.file.Truncate(int64(sz)); err != nil {
return fmt.Errorf("file resize error: %s", err)
}
if err := db.file.Sync(); err != nil {
return fmt.Errorf("file sync error: %s", err)
}
}
// Map the data file to memory.
b, err := syscall.Mmap(int(db.file.Fd()), 0, sz, syscall.PROT_READ, syscall.MAP_SHARED)
@ -57,6 +63,11 @@ func mmap(db *DB, sz int) error {
return err
}
// Advise the kernel that the mmap is accessed randomly.
if err := madvise(b, syscall.MADV_RANDOM); err != nil {
return fmt.Errorf("madvise: %s", err)
}
// Save the original byte slice and convert to a byte array pointer.
db.dataref = b
db.data = (*[maxMapSize]byte)(unsafe.Pointer(&b[0]))
@ -78,3 +89,12 @@ func munmap(db *DB) error {
db.datasz = 0
return err
}
// NOTE: This function is copied from stdlib because it is not available on darwin.
func madvise(b []byte, advice int) (err error) {
_, _, e1 := syscall.Syscall(syscall.SYS_MADVISE, uintptr(unsafe.Pointer(&b[0])), uintptr(len(b)), uintptr(advice))
if e1 != 0 {
err = e1
}
return
}

View File

@ -16,7 +16,7 @@ func fdatasync(db *DB) error {
}
// flock acquires an advisory lock on a file descriptor.
func flock(f *os.File, _ time.Duration) error {
func flock(f *os.File, _ bool, _ time.Duration) error {
return nil
}
@ -28,10 +28,12 @@ func funlock(f *os.File) error {
// mmap memory maps a DB's data file.
// Based on: https://github.com/edsrzf/mmap-go
func mmap(db *DB, sz int) error {
if !db.readOnly {
// Truncate the database to the size of the mmap.
if err := db.file.Truncate(int64(sz)); err != nil {
return fmt.Errorf("truncate: %s", err)
}
}
// Open a file mapping handle.
sizelo := uint32(sz >> 32)

View File

@ -640,6 +640,22 @@ func TestBucket_Put_KeyTooLarge(t *testing.T) {
})
}
// Ensure that an error is returned when inserting a value that's too large.
func TestBucket_Put_ValueTooLarge(t *testing.T) {
if os.Getenv("DRONE") == "true" {
t.Skip("not enough RAM for test")
}
db := NewTestDB()
defer db.Close()
db.Update(func(tx *bolt.Tx) error {
tx.CreateBucket([]byte("widgets"))
err := tx.Bucket([]byte("widgets")).Put([]byte("foo"), make([]byte, bolt.MaxValueSize+1))
equals(t, err, bolt.ErrValueTooLarge)
return nil
})
}
// Ensure a bucket can calculate stats.
func TestBucket_Stats(t *testing.T) {
db := NewTestDB()

View File

@ -1,421 +0,0 @@
package main
import (
"encoding/binary"
"encoding/json"
"errors"
"fmt"
"io/ioutil"
"math/rand"
"os"
"runtime"
"runtime/pprof"
"time"
"github.com/boltdb/bolt"
)
// File handlers for the various profiles.
var cpuprofile, memprofile, blockprofile *os.File
var benchBucketName = []byte("bench")
// Bench executes a customizable, synthetic benchmark against Bolt.
func Bench(options *BenchOptions) {
var results BenchResults
// Validate options.
if options.BatchSize == 0 {
options.BatchSize = options.Iterations
} else if options.Iterations%options.BatchSize != 0 {
fatal("number of iterations must be divisible by the batch size")
}
// Find temporary location.
path := tempfile()
if options.Clean {
defer os.Remove(path)
} else {
println("work:", path)
}
// Create database.
db, err := bolt.Open(path, 0600, nil)
if err != nil {
fatal(err)
return
}
db.NoSync = options.NoSync
defer db.Close()
// Enable streaming stats.
if options.StatsInterval > 0 {
go printStats(db, options.StatsInterval)
}
// Start profiling for writes.
if options.ProfileMode == "rw" || options.ProfileMode == "w" {
benchStartProfiling(options)
}
// Write to the database.
if err := benchWrite(db, options, &results); err != nil {
fatal("bench: write: ", err)
}
// Stop profiling for writes only.
if options.ProfileMode == "w" {
benchStopProfiling()
}
// Start profiling for reads.
if options.ProfileMode == "r" {
benchStartProfiling(options)
}
// Read from the database.
if err := benchRead(db, options, &results); err != nil {
fatal("bench: read: ", err)
}
// Stop profiling for writes only.
if options.ProfileMode == "rw" || options.ProfileMode == "r" {
benchStopProfiling()
}
// Print results.
fmt.Fprintf(os.Stderr, "# Write\t%v\t(%v/op)\t(%v op/sec)\n", results.WriteDuration, results.WriteOpDuration(), results.WriteOpsPerSecond())
fmt.Fprintf(os.Stderr, "# Read\t%v\t(%v/op)\t(%v op/sec)\n", results.ReadDuration, results.ReadOpDuration(), results.ReadOpsPerSecond())
fmt.Fprintln(os.Stderr, "")
}
// Writes to the database.
func benchWrite(db *bolt.DB, options *BenchOptions, results *BenchResults) error {
var err error
var t = time.Now()
switch options.WriteMode {
case "seq":
err = benchWriteSequential(db, options, results)
case "rnd":
err = benchWriteRandom(db, options, results)
case "seq-nest":
err = benchWriteSequentialNested(db, options, results)
case "rnd-nest":
err = benchWriteRandomNested(db, options, results)
default:
return fmt.Errorf("invalid write mode: %s", options.WriteMode)
}
results.WriteDuration = time.Since(t)
return err
}
func benchWriteSequential(db *bolt.DB, options *BenchOptions, results *BenchResults) error {
var i = uint32(0)
return benchWriteWithSource(db, options, results, func() uint32 { i++; return i })
}
func benchWriteRandom(db *bolt.DB, options *BenchOptions, results *BenchResults) error {
r := rand.New(rand.NewSource(time.Now().UnixNano()))
return benchWriteWithSource(db, options, results, func() uint32 { return r.Uint32() })
}
func benchWriteSequentialNested(db *bolt.DB, options *BenchOptions, results *BenchResults) error {
var i = uint32(0)
return benchWriteNestedWithSource(db, options, results, func() uint32 { i++; return i })
}
func benchWriteRandomNested(db *bolt.DB, options *BenchOptions, results *BenchResults) error {
r := rand.New(rand.NewSource(time.Now().UnixNano()))
return benchWriteNestedWithSource(db, options, results, func() uint32 { return r.Uint32() })
}
func benchWriteWithSource(db *bolt.DB, options *BenchOptions, results *BenchResults, keySource func() uint32) error {
results.WriteOps = options.Iterations
for i := 0; i < options.Iterations; i += options.BatchSize {
err := db.Update(func(tx *bolt.Tx) error {
b, _ := tx.CreateBucketIfNotExists(benchBucketName)
b.FillPercent = options.FillPercent
for j := 0; j < options.BatchSize; j++ {
var key = make([]byte, options.KeySize)
var value = make([]byte, options.ValueSize)
binary.BigEndian.PutUint32(key, keySource())
if err := b.Put(key, value); err != nil {
return err
}
}
return nil
})
if err != nil {
return err
}
}
return nil
}
func benchWriteNestedWithSource(db *bolt.DB, options *BenchOptions, results *BenchResults, keySource func() uint32) error {
results.WriteOps = options.Iterations
for i := 0; i < options.Iterations; i += options.BatchSize {
err := db.Update(func(tx *bolt.Tx) error {
top, _ := tx.CreateBucketIfNotExists(benchBucketName)
top.FillPercent = options.FillPercent
var name = make([]byte, options.KeySize)
binary.BigEndian.PutUint32(name, keySource())
b, _ := top.CreateBucketIfNotExists(name)
b.FillPercent = options.FillPercent
for j := 0; j < options.BatchSize; j++ {
var key = make([]byte, options.KeySize)
var value = make([]byte, options.ValueSize)
binary.BigEndian.PutUint32(key, keySource())
if err := b.Put(key, value); err != nil {
return err
}
}
return nil
})
if err != nil {
return err
}
}
return nil
}
// Reads from the database.
func benchRead(db *bolt.DB, options *BenchOptions, results *BenchResults) error {
var err error
var t = time.Now()
switch options.ReadMode {
case "seq":
if options.WriteMode == "seq-nest" || options.WriteMode == "rnd-nest" {
err = benchReadSequentialNested(db, options, results)
} else {
err = benchReadSequential(db, options, results)
}
default:
return fmt.Errorf("invalid read mode: %s", options.ReadMode)
}
results.ReadDuration = time.Since(t)
return err
}
func benchReadSequential(db *bolt.DB, options *BenchOptions, results *BenchResults) error {
return db.View(func(tx *bolt.Tx) error {
var t = time.Now()
for {
c := tx.Bucket(benchBucketName).Cursor()
var count int
for k, v := c.First(); k != nil; k, v = c.Next() {
if v == nil {
return errors.New("invalid value")
}
count++
}
if options.WriteMode == "seq" && count != options.Iterations {
return fmt.Errorf("read seq: iter mismatch: expected %d, got %d", options.Iterations, count)
}
results.ReadOps += count
// Make sure we do this for at least a second.
if time.Since(t) >= time.Second {
break
}
}
return nil
})
}
func benchReadSequentialNested(db *bolt.DB, options *BenchOptions, results *BenchResults) error {
return db.View(func(tx *bolt.Tx) error {
var t = time.Now()
for {
var count int
var top = tx.Bucket(benchBucketName)
top.ForEach(func(name, _ []byte) error {
c := top.Bucket(name).Cursor()
for k, v := c.First(); k != nil; k, v = c.Next() {
if v == nil {
return errors.New("invalid value")
}
count++
}
return nil
})
if options.WriteMode == "seq-nest" && count != options.Iterations {
return fmt.Errorf("read seq-nest: iter mismatch: expected %d, got %d", options.Iterations, count)
}
results.ReadOps += count
// Make sure we do this for at least a second.
if time.Since(t) >= time.Second {
break
}
}
return nil
})
}
// Starts all profiles set on the options.
func benchStartProfiling(options *BenchOptions) {
var err error
// Start CPU profiling.
if options.CPUProfile != "" {
cpuprofile, err = os.Create(options.CPUProfile)
if err != nil {
fatalf("bench: could not create cpu profile %q: %v", options.CPUProfile, err)
}
pprof.StartCPUProfile(cpuprofile)
}
// Start memory profiling.
if options.MemProfile != "" {
memprofile, err = os.Create(options.MemProfile)
if err != nil {
fatalf("bench: could not create memory profile %q: %v", options.MemProfile, err)
}
runtime.MemProfileRate = 4096
}
// Start fatal profiling.
if options.BlockProfile != "" {
blockprofile, err = os.Create(options.BlockProfile)
if err != nil {
fatalf("bench: could not create block profile %q: %v", options.BlockProfile, err)
}
runtime.SetBlockProfileRate(1)
}
}
// Stops all profiles.
func benchStopProfiling() {
if cpuprofile != nil {
pprof.StopCPUProfile()
cpuprofile.Close()
cpuprofile = nil
}
if memprofile != nil {
pprof.Lookup("heap").WriteTo(memprofile, 0)
memprofile.Close()
memprofile = nil
}
if blockprofile != nil {
pprof.Lookup("block").WriteTo(blockprofile, 0)
blockprofile.Close()
blockprofile = nil
runtime.SetBlockProfileRate(0)
}
}
// Continuously prints stats on the database at given intervals.
func printStats(db *bolt.DB, interval time.Duration) {
var prevStats = db.Stats()
var encoder = json.NewEncoder(os.Stdout)
for {
// Wait for the stats interval.
time.Sleep(interval)
// Retrieve new stats and find difference from previous iteration.
var stats = db.Stats()
var diff = stats.Sub(&prevStats)
// Print as JSON to STDOUT.
if err := encoder.Encode(diff); err != nil {
fatal(err)
}
// Save stats for next iteration.
prevStats = stats
}
}
// BenchOptions represents the set of options that can be passed to Bench().
type BenchOptions struct {
ProfileMode string
WriteMode string
ReadMode string
Iterations int
BatchSize int
KeySize int
ValueSize int
CPUProfile string
MemProfile string
BlockProfile string
StatsInterval time.Duration
FillPercent float64
NoSync bool
Clean bool
}
// BenchResults represents the performance results of the benchmark.
type BenchResults struct {
WriteOps int
WriteDuration time.Duration
ReadOps int
ReadDuration time.Duration
}
// Returns the duration for a single write operation.
func (r *BenchResults) WriteOpDuration() time.Duration {
if r.WriteOps == 0 {
return 0
}
return r.WriteDuration / time.Duration(r.WriteOps)
}
// Returns average number of write operations that can be performed per second.
func (r *BenchResults) WriteOpsPerSecond() int {
var op = r.WriteOpDuration()
if op == 0 {
return 0
}
return int(time.Second) / int(op)
}
// Returns the duration for a single read operation.
func (r *BenchResults) ReadOpDuration() time.Duration {
if r.ReadOps == 0 {
return 0
}
return r.ReadDuration / time.Duration(r.ReadOps)
}
// Returns average number of read operations that can be performed per second.
func (r *BenchResults) ReadOpsPerSecond() int {
var op = r.ReadOpDuration()
if op == 0 {
return 0
}
return int(time.Second) / int(op)
}
// tempfile returns a temporary file path.
func tempfile() string {
f, _ := ioutil.TempFile("", "bolt-bench-")
f.Close()
os.Remove(f.Name())
return f.Name()
}

View File

@ -1,33 +0,0 @@
package main
import (
"os"
"github.com/boltdb/bolt"
)
// Buckets prints a list of all buckets.
func Buckets(path string) {
if _, err := os.Stat(path); os.IsNotExist(err) {
fatal(err)
return
}
db, err := bolt.Open(path, 0600, nil)
if err != nil {
fatal(err)
return
}
defer db.Close()
err = db.View(func(tx *bolt.Tx) error {
return tx.ForEach(func(name []byte, _ *bolt.Bucket) error {
println(string(name))
return nil
})
})
if err != nil {
fatal(err)
return
}
}

View File

@ -1,31 +0,0 @@
package main_test
import (
"testing"
"github.com/boltdb/bolt"
. "github.com/boltdb/bolt/cmd/bolt"
)
// Ensure that a list of buckets can be retrieved.
func TestBuckets(t *testing.T) {
SetTestMode(true)
open(func(db *bolt.DB, path string) {
db.Update(func(tx *bolt.Tx) error {
tx.CreateBucket([]byte("woojits"))
tx.CreateBucket([]byte("widgets"))
tx.CreateBucket([]byte("whatchits"))
return nil
})
db.Close()
output := run("buckets", path)
equals(t, "whatchits\nwidgets\nwoojits", output)
})
}
// Ensure that an error is reported if the database is not found.
func TestBucketsDBNotFound(t *testing.T) {
SetTestMode(true)
output := run("buckets", "no/such/db")
equals(t, "stat no/such/db: no such file or directory", output)
}

View File

@ -1,47 +0,0 @@
package main
import (
"os"
"github.com/boltdb/bolt"
)
// Check performs a consistency check on the database and prints any errors found.
func Check(path string) {
if _, err := os.Stat(path); os.IsNotExist(err) {
fatal(err)
return
}
db, err := bolt.Open(path, 0600, nil)
if err != nil {
fatal(err)
return
}
defer db.Close()
// Perform consistency check.
_ = db.View(func(tx *bolt.Tx) error {
var count int
ch := tx.Check()
loop:
for {
select {
case err, ok := <-ch:
if !ok {
break loop
}
println(err)
count++
}
}
// Print summary of errors.
if count > 0 {
fatalf("%d errors found", count)
} else {
println("OK")
}
return nil
})
}

View File

@ -1,45 +0,0 @@
package main
import (
"os"
"github.com/boltdb/bolt"
)
// Get retrieves the value for a given bucket/key.
func Get(path, name, key string) {
if _, err := os.Stat(path); os.IsNotExist(err) {
fatal(err)
return
}
db, err := bolt.Open(path, 0600, nil)
if err != nil {
fatal(err)
return
}
defer db.Close()
err = db.View(func(tx *bolt.Tx) error {
// Find bucket.
b := tx.Bucket([]byte(name))
if b == nil {
fatalf("bucket not found: %s", name)
return nil
}
// Find value for a given key.
value := b.Get([]byte(key))
if value == nil {
fatalf("key not found: %s", key)
return nil
}
println(string(value))
return nil
})
if err != nil {
fatal(err)
return
}
}

View File

@ -1,54 +0,0 @@
package main_test
import (
"testing"
"github.com/boltdb/bolt"
. "github.com/boltdb/bolt/cmd/bolt"
)
// Ensure that a value can be retrieved from the CLI.
func TestGet(t *testing.T) {
SetTestMode(true)
open(func(db *bolt.DB, path string) {
db.Update(func(tx *bolt.Tx) error {
tx.CreateBucket([]byte("widgets"))
tx.Bucket([]byte("widgets")).Put([]byte("foo"), []byte("bar"))
return nil
})
db.Close()
output := run("get", path, "widgets", "foo")
equals(t, "bar", output)
})
}
// Ensure that an error is reported if the database is not found.
func TestGetDBNotFound(t *testing.T) {
SetTestMode(true)
output := run("get", "no/such/db", "widgets", "foo")
equals(t, "stat no/such/db: no such file or directory", output)
}
// Ensure that an error is reported if the bucket is not found.
func TestGetBucketNotFound(t *testing.T) {
SetTestMode(true)
open(func(db *bolt.DB, path string) {
db.Close()
output := run("get", path, "widgets", "foo")
equals(t, "bucket not found: widgets", output)
})
}
// Ensure that an error is reported if the key is not found.
func TestGetKeyNotFound(t *testing.T) {
SetTestMode(true)
open(func(db *bolt.DB, path string) {
db.Update(func(tx *bolt.Tx) error {
_, err := tx.CreateBucket([]byte("widgets"))
return err
})
db.Close()
output := run("get", path, "widgets", "foo")
equals(t, "key not found: foo", output)
})
}

View File

@ -1,26 +0,0 @@
package main
import (
"os"
"github.com/boltdb/bolt"
)
// Info prints basic information about a database.
func Info(path string) {
if _, err := os.Stat(path); os.IsNotExist(err) {
fatal(err)
return
}
db, err := bolt.Open(path, 0600, nil)
if err != nil {
fatal(err)
return
}
defer db.Close()
// Print basic database info.
var info = db.Info()
printf("Page Size: %d\n", info.PageSize)
}

View File

@ -1,31 +0,0 @@
package main_test
import (
"testing"
"github.com/boltdb/bolt"
. "github.com/boltdb/bolt/cmd/bolt"
)
// Ensure that a database info can be printed.
func TestInfo(t *testing.T) {
SetTestMode(true)
open(func(db *bolt.DB, path string) {
db.Update(func(tx *bolt.Tx) error {
tx.CreateBucket([]byte("widgets"))
b := tx.Bucket([]byte("widgets"))
b.Put([]byte("foo"), []byte("0000"))
return nil
})
db.Close()
output := run("info", path)
equals(t, `Page Size: 4096`, output)
})
}
// Ensure that an error is reported if the database is not found.
func TestInfo_NotFound(t *testing.T) {
SetTestMode(true)
output := run("info", "no/such/db")
equals(t, "stat no/such/db: no such file or directory", output)
}

View File

@ -1,41 +0,0 @@
package main
import (
"os"
"github.com/boltdb/bolt"
)
// Keys retrieves a list of keys for a given bucket.
func Keys(path, name string) {
if _, err := os.Stat(path); os.IsNotExist(err) {
fatal(err)
return
}
db, err := bolt.Open(path, 0600, nil)
if err != nil {
fatal(err)
return
}
defer db.Close()
err = db.View(func(tx *bolt.Tx) error {
// Find bucket.
b := tx.Bucket([]byte(name))
if b == nil {
fatalf("bucket not found: %s", name)
return nil
}
// Iterate over each key.
return b.ForEach(func(key, _ []byte) error {
println(string(key))
return nil
})
})
if err != nil {
fatal(err)
return
}
}

View File

@ -1,42 +0,0 @@
package main_test
import (
"testing"
"github.com/boltdb/bolt"
. "github.com/boltdb/bolt/cmd/bolt"
)
// Ensure that a list of keys can be retrieved for a given bucket.
func TestKeys(t *testing.T) {
SetTestMode(true)
open(func(db *bolt.DB, path string) {
db.Update(func(tx *bolt.Tx) error {
tx.CreateBucket([]byte("widgets"))
tx.Bucket([]byte("widgets")).Put([]byte("0002"), []byte(""))
tx.Bucket([]byte("widgets")).Put([]byte("0001"), []byte(""))
tx.Bucket([]byte("widgets")).Put([]byte("0003"), []byte(""))
return nil
})
db.Close()
output := run("keys", path, "widgets")
equals(t, "0001\n0002\n0003", output)
})
}
// Ensure that an error is reported if the database is not found.
func TestKeysDBNotFound(t *testing.T) {
SetTestMode(true)
output := run("keys", "no/such/db", "widgets")
equals(t, "stat no/such/db: no such file or directory", output)
}
// Ensure that an error is reported if the bucket is not found.
func TestKeysBucketNotFound(t *testing.T) {
SetTestMode(true)
open(func(db *bolt.DB, path string) {
db.Close()
output := run("keys", path, "widgets")
equals(t, "bucket not found: widgets", output)
})
}

File diff suppressed because it is too large Load Diff

View File

@ -1,69 +1,145 @@
package main_test
import (
"fmt"
"bytes"
"io/ioutil"
"os"
"path/filepath"
"reflect"
"runtime"
"strings"
"strconv"
"testing"
"github.com/boltdb/bolt"
. "github.com/boltdb/bolt/cmd/bolt"
"github.com/boltdb/bolt/cmd/bolt"
)
// open creates and opens a Bolt database in the temp directory.
func open(fn func(*bolt.DB, string)) {
path := tempfile()
defer os.RemoveAll(path)
// Ensure the "info" command can print information about a database.
func TestInfoCommand_Run(t *testing.T) {
db := MustOpen(0666, nil)
db.DB.Close()
defer db.Close()
db, err := bolt.Open(path, 0600, nil)
if err != nil {
panic("db open error: " + err.Error())
// Run the info command.
m := NewMain()
if err := m.Run("info", db.Path); err != nil {
t.Fatal(err)
}
fn(db, path)
}
// run executes a command against the CLI and returns the output.
func run(args ...string) string {
args = append([]string{"bolt"}, args...)
NewApp().Run(args)
return strings.TrimSpace(LogBuffer())
// Ensure the "stats" command can execute correctly.
func TestStatsCommand_Run(t *testing.T) {
// Ignore
if os.Getpagesize() != 4096 {
t.Skip("system does not use 4KB page size")
}
db := MustOpen(0666, nil)
defer db.Close()
if err := db.Update(func(tx *bolt.Tx) error {
// Create "foo" bucket.
b, err := tx.CreateBucket([]byte("foo"))
if err != nil {
return err
}
for i := 0; i < 10; i++ {
if err := b.Put([]byte(strconv.Itoa(i)), []byte(strconv.Itoa(i))); err != nil {
return err
}
}
// Create "bar" bucket.
b, err = tx.CreateBucket([]byte("bar"))
if err != nil {
return err
}
for i := 0; i < 100; i++ {
if err := b.Put([]byte(strconv.Itoa(i)), []byte(strconv.Itoa(i))); err != nil {
return err
}
}
// Create "baz" bucket.
b, err = tx.CreateBucket([]byte("baz"))
if err != nil {
return err
}
if err := b.Put([]byte("key"), []byte("value")); err != nil {
return err
}
return nil
}); err != nil {
t.Fatal(err)
}
db.DB.Close()
// Generate expected result.
exp := "Aggregate statistics for 3 buckets\n\n" +
"Page count statistics\n" +
"\tNumber of logical branch pages: 0\n" +
"\tNumber of physical branch overflow pages: 0\n" +
"\tNumber of logical leaf pages: 1\n" +
"\tNumber of physical leaf overflow pages: 0\n" +
"Tree statistics\n" +
"\tNumber of keys/value pairs: 111\n" +
"\tNumber of levels in B+tree: 1\n" +
"Page size utilization\n" +
"\tBytes allocated for physical branch pages: 0\n" +
"\tBytes actually used for branch data: 0 (0%)\n" +
"\tBytes allocated for physical leaf pages: 4096\n" +
"\tBytes actually used for leaf data: 1996 (48%)\n" +
"Bucket statistics\n" +
"\tTotal number of buckets: 3\n" +
"\tTotal number on inlined buckets: 2 (66%)\n" +
"\tBytes used for inlined buckets: 236 (11%)\n"
// Run the command.
m := NewMain()
if err := m.Run("stats", db.Path); err != nil {
t.Fatal(err)
} else if m.Stdout.String() != exp {
t.Fatalf("unexpected stdout:\n\n%s", m.Stdout.String())
}
}
// tempfile returns a temporary file path.
func tempfile() string {
// Main represents a test wrapper for main.Main that records output.
type Main struct {
*main.Main
Stdin bytes.Buffer
Stdout bytes.Buffer
Stderr bytes.Buffer
}
// NewMain returns a new instance of Main.
func NewMain() *Main {
m := &Main{Main: main.NewMain()}
m.Main.Stdin = &m.Stdin
m.Main.Stdout = &m.Stdout
m.Main.Stderr = &m.Stderr
return m
}
// MustOpen creates a Bolt database in a temporary location.
func MustOpen(mode os.FileMode, options *bolt.Options) *DB {
// Create temporary path.
f, _ := ioutil.TempFile("", "bolt-")
f.Close()
os.Remove(f.Name())
return f.Name()
}
// assert fails the test if the condition is false.
func assert(tb testing.TB, condition bool, msg string, v ...interface{}) {
if !condition {
_, file, line, _ := runtime.Caller(1)
fmt.Printf("\033[31m%s:%d: "+msg+"\033[39m\n\n", append([]interface{}{filepath.Base(file), line}, v...)...)
tb.FailNow()
}
}
// ok fails the test if an err is not nil.
func ok(tb testing.TB, err error) {
db, err := bolt.Open(f.Name(), mode, options)
if err != nil {
_, file, line, _ := runtime.Caller(1)
fmt.Printf("\033[31m%s:%d: unexpected error: %s\033[39m\n\n", filepath.Base(file), line, err.Error())
tb.FailNow()
panic(err.Error())
}
return &DB{DB: db, Path: f.Name()}
}
// equals fails the test if exp is not equal to act.
func equals(tb testing.TB, exp, act interface{}) {
if !reflect.DeepEqual(exp, act) {
_, file, line, _ := runtime.Caller(1)
fmt.Printf("\033[31m%s:%d:\n\n\texp: %#v\n\n\tgot: %#v\033[39m\n\n", filepath.Base(file), line, exp, act)
tb.FailNow()
}
// DB is a test wrapper for bolt.DB.
type DB struct {
*bolt.DB
Path string
}
// Close closes and removes the database.
func (db *DB) Close() error {
defer os.Remove(db.Path)
return db.DB.Close()
}

View File

@ -1,57 +0,0 @@
package main
import (
"os"
"strconv"
"github.com/boltdb/bolt"
)
// Pages prints a list of all pages in a database.
func Pages(path string) {
if _, err := os.Stat(path); os.IsNotExist(err) {
fatal(err)
return
}
db, err := bolt.Open(path, 0600, nil)
if err != nil {
fatal(err)
return
}
defer db.Close()
println("ID TYPE ITEMS OVRFLW")
println("======== ========== ====== ======")
db.Update(func(tx *bolt.Tx) error {
var id int
for {
p, err := tx.Page(id)
if err != nil {
fatalf("page error: %d: %s", id, err)
} else if p == nil {
break
}
// Only display count and overflow if this is a non-free page.
var count, overflow string
if p.Type != "free" {
count = strconv.Itoa(p.Count)
if p.OverflowCount > 0 {
overflow = strconv.Itoa(p.OverflowCount)
}
}
// Print table row.
printf("%-8d %-10s %-6s %-6s\n", p.ID, p.Type, count, overflow)
// Move to the next non-overflow page.
id += 1
if p.Type != "free" {
id += p.OverflowCount
}
}
return nil
})
}

View File

@ -1,77 +0,0 @@
package main
import (
"bytes"
"os"
"github.com/boltdb/bolt"
)
// Collect stats for all top level buckets matching the prefix.
func Stats(path, prefix string) {
if _, err := os.Stat(path); os.IsNotExist(err) {
fatal(err)
return
}
db, err := bolt.Open(path, 0600, nil)
if err != nil {
fatal(err)
return
}
defer db.Close()
err = db.View(func(tx *bolt.Tx) error {
var s bolt.BucketStats
var count int
var prefix = []byte(prefix)
tx.ForEach(func(name []byte, b *bolt.Bucket) error {
if bytes.HasPrefix(name, prefix) {
s.Add(b.Stats())
count += 1
}
return nil
})
printf("Aggregate statistics for %d buckets\n\n", count)
println("Page count statistics")
printf("\tNumber of logical branch pages: %d\n", s.BranchPageN)
printf("\tNumber of physical branch overflow pages: %d\n", s.BranchOverflowN)
printf("\tNumber of logical leaf pages: %d\n", s.LeafPageN)
printf("\tNumber of physical leaf overflow pages: %d\n", s.LeafOverflowN)
println("Tree statistics")
printf("\tNumber of keys/value pairs: %d\n", s.KeyN)
printf("\tNumber of levels in B+tree: %d\n", s.Depth)
println("Page size utilization")
printf("\tBytes allocated for physical branch pages: %d\n", s.BranchAlloc)
var percentage int
if s.BranchAlloc != 0 {
percentage = int(float32(s.BranchInuse) * 100.0 / float32(s.BranchAlloc))
}
printf("\tBytes actually used for branch data: %d (%d%%)\n", s.BranchInuse, percentage)
printf("\tBytes allocated for physical leaf pages: %d\n", s.LeafAlloc)
percentage = 0
if s.LeafAlloc != 0 {
percentage = int(float32(s.LeafInuse) * 100.0 / float32(s.LeafAlloc))
}
printf("\tBytes actually used for leaf data: %d (%d%%)\n", s.LeafInuse, percentage)
println("Bucket statistics")
printf("\tTotal number of buckets: %d\n", s.BucketN)
percentage = int(float32(s.InlineBucketN) * 100.0 / float32(s.BucketN))
printf("\tTotal number on inlined buckets: %d (%d%%)\n", s.InlineBucketN, percentage)
percentage = 0
if s.LeafInuse != 0 {
percentage = int(float32(s.InlineBucketInuse) * 100.0 / float32(s.LeafInuse))
}
printf("\tBytes used for inlined buckets: %d (%d%%)\n", s.InlineBucketInuse, percentage)
return nil
})
if err != nil {
fatal(err)
return
}
}

View File

@ -1,61 +0,0 @@
package main_test
import (
"os"
"strconv"
"testing"
"github.com/boltdb/bolt"
. "github.com/boltdb/bolt/cmd/bolt"
)
func TestStats(t *testing.T) {
if os.Getpagesize() != 4096 {
t.Skip()
}
SetTestMode(true)
open(func(db *bolt.DB, path string) {
db.Update(func(tx *bolt.Tx) error {
b, err := tx.CreateBucket([]byte("foo"))
if err != nil {
return err
}
for i := 0; i < 10; i++ {
b.Put([]byte(strconv.Itoa(i)), []byte(strconv.Itoa(i)))
}
b, err = tx.CreateBucket([]byte("bar"))
if err != nil {
return err
}
for i := 0; i < 100; i++ {
b.Put([]byte(strconv.Itoa(i)), []byte(strconv.Itoa(i)))
}
b, err = tx.CreateBucket([]byte("baz"))
if err != nil {
return err
}
b.Put([]byte("key"), []byte("value"))
return nil
})
db.Close()
output := run("stats", path, "b")
equals(t, "Aggregate statistics for 2 buckets\n\n"+
"Page count statistics\n"+
"\tNumber of logical branch pages: 0\n"+
"\tNumber of physical branch overflow pages: 0\n"+
"\tNumber of logical leaf pages: 1\n"+
"\tNumber of physical leaf overflow pages: 0\n"+
"Tree statistics\n"+
"\tNumber of keys/value pairs: 101\n"+
"\tNumber of levels in B+tree: 1\n"+
"Page size utilization\n"+
"\tBytes allocated for physical branch pages: 0\n"+
"\tBytes actually used for branch data: 0 (0%)\n"+
"\tBytes allocated for physical leaf pages: 4096\n"+
"\tBytes actually used for leaf data: 1996 (48%)\n"+
"Bucket statistics\n"+
"\tTotal number of buckets: 2\n"+
"\tTotal number on inlined buckets: 1 (50%)\n"+
"\tBytes used for inlined buckets: 40 (2%)", output)
})
}

View File

@ -55,6 +55,14 @@ type DB struct {
// THIS IS UNSAFE. PLEASE USE WITH CAUTION.
NoSync bool
// When true, skips the truncate call when growing the database.
// Setting this to true is only safe on non-ext3/ext4 systems.
// Skipping truncation avoids preallocation of hard drive space and
// bypasses a truncate() and fsync() syscall on remapping.
//
// https://github.com/boltdb/bolt/issues/284
NoGrowSync bool
// MaxBatchSize is the maximum size of a batch. Default value is
// copied from DefaultMaxBatchSize in Open.
//
@ -96,6 +104,10 @@ type DB struct {
ops struct {
writeAt func(b []byte, off int64) (n int, err error)
}
// Read only mode.
// When true, Update() and Begin(true) return ErrDatabaseReadOnly immediately.
readOnly bool
}
// Path returns the path to currently open database file.
@ -123,24 +135,34 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
if options == nil {
options = DefaultOptions
}
db.NoGrowSync = options.NoGrowSync
// Set default values for later DB operations.
db.MaxBatchSize = DefaultMaxBatchSize
db.MaxBatchDelay = DefaultMaxBatchDelay
flag := os.O_RDWR
if options.ReadOnly {
flag = os.O_RDONLY
db.readOnly = true
}
// Open data file and separate sync handler for metadata writes.
db.path = path
var err error
if db.file, err = os.OpenFile(db.path, os.O_RDWR|os.O_CREATE, mode); err != nil {
if db.file, err = os.OpenFile(db.path, flag|os.O_CREATE, mode); err != nil {
_ = db.close()
return nil, err
}
// Lock file so that other processes using Bolt cannot use the database
// at the same time. This would cause corruption since the two processes
// would write meta pages and free pages separately.
if err := flock(db.file, options.Timeout); err != nil {
// Lock file so that other processes using Bolt in read-write mode cannot
// use the database at the same time. This would cause corruption since
// the two processes would write meta pages and free pages separately.
// The database file is locked exclusively (only one process can grab the lock)
// if !options.ReadOnly.
// The database file is locked using the shared lock (more than one process may
// hold a lock at the same time) otherwise (options.ReadOnly is set).
if err := flock(db.file, !db.readOnly, options.Timeout); err != nil {
_ = db.close()
return nil, err
}
@ -247,8 +269,8 @@ func (db *DB) munmap() error {
// of the database. The minimum size is 1MB and doubles until it reaches 1GB.
// Returns an error if the new mmap size is greater than the max allowed.
func (db *DB) mmapSize(size int) (int, error) {
// Double the size from 1MB until 1GB.
for i := uint(20); i <= 30; i++ {
// Double the size from 32KB until 1GB.
for i := uint(15); i <= 30; i++ {
if size <= 1<<i {
return 1 << i, nil
}
@ -329,8 +351,15 @@ func (db *DB) init() error {
// Close releases all database resources.
// All transactions must be closed before closing the database.
func (db *DB) Close() error {
db.rwlock.Lock()
defer db.rwlock.Unlock()
db.metalock.Lock()
defer db.metalock.Unlock()
db.mmaplock.RLock()
defer db.mmaplock.RUnlock()
return db.close()
}
@ -350,8 +379,11 @@ func (db *DB) close() error {
// Close file handles.
if db.file != nil {
// No need to unlock read-only file.
if !db.readOnly {
// Unlock the file.
_ = funlock(db.file)
}
// Close the file descriptor.
if err := db.file.Close(); err != nil {
@ -369,6 +401,11 @@ func (db *DB) close() error {
// will cause the calls to block and be serialized until the current write
// transaction finishes.
//
// Transactions should not be depedent on one another. Opening a read
// transaction and a write transaction in the same goroutine can cause the
// writer to deadlock because the database periodically needs to re-mmap itself
// as it grows and it cannot do that while a read transaction is open.
//
// IMPORTANT: You must close read-only transactions after you are finished or
// else the database will not reclaim old pages.
func (db *DB) Begin(writable bool) (*Tx, error) {
@ -417,6 +454,11 @@ func (db *DB) beginTx() (*Tx, error) {
}
func (db *DB) beginRWTx() (*Tx, error) {
// If the database was opened with Options.ReadOnly, return an error.
if db.readOnly {
return nil, ErrDatabaseReadOnly
}
// Obtain writer lock. This is released by the transaction when it closes.
// This enforces only one writer transaction at a time.
db.rwlock.Lock()
@ -547,6 +589,12 @@ func (db *DB) View(fn func(*Tx) error) error {
return nil
}
// Sync executes fdatasync() against the database file handle.
//
// This is not necessary under normal operation, however, if you use NoSync
// then it allows you to force the database file to sync against the disk.
func (db *DB) Sync() error { return fdatasync(db) }
// Stats retrieves ongoing performance stats for the database.
// This is only updated when a transaction closes.
func (db *DB) Stats() Stats {
@ -607,18 +655,30 @@ func (db *DB) allocate(count int) (*page, error) {
return p, nil
}
func (db *DB) IsReadOnly() bool {
return db.readOnly
}
// Options represents the options that can be set when opening a database.
type Options struct {
// Timeout is the amount of time to wait to obtain a file lock.
// When set to zero it will wait indefinitely. This option is only
// available on Darwin and Linux.
Timeout time.Duration
// Sets the DB.NoGrowSync flag before memory mapping the file.
NoGrowSync bool
// Open database in read-only mode. Uses flock(..., LOCK_SH |LOCK_NB) to
// grab a shared lock (UNIX).
ReadOnly bool
}
// DefaultOptions represent the options used if nil options are passed into Open().
// No timeout is used which will cause Bolt to wait indefinitely for a lock.
var DefaultOptions = &Options{
Timeout: 0,
NoGrowSync: false,
}
// Stats represents statistics about the database.

View File

@ -224,6 +224,76 @@ func TestDB_Open_FileTooSmall(t *testing.T) {
equals(t, errors.New("file size too small"), err)
}
// Ensure that a database can be opened in read-only mode by multiple processes
// and that a database can not be opened in read-write mode and in read-only
// mode at the same time.
func TestOpen_ReadOnly(t *testing.T) {
bucket, key, value := []byte(`bucket`), []byte(`key`), []byte(`value`)
path := tempfile()
defer os.Remove(path)
// Open in read-write mode.
db, err := bolt.Open(path, 0666, nil)
ok(t, db.Update(func(tx *bolt.Tx) error {
b, err := tx.CreateBucket(bucket)
if err != nil {
return err
}
return b.Put(key, value)
}))
assert(t, db != nil, "")
assert(t, !db.IsReadOnly(), "")
ok(t, err)
ok(t, db.Close())
// Open in read-only mode.
db0, err := bolt.Open(path, 0666, &bolt.Options{ReadOnly: true})
ok(t, err)
defer db0.Close()
// Opening in read-write mode should return an error.
_, err = bolt.Open(path, 0666, &bolt.Options{Timeout: time.Millisecond * 100})
assert(t, err != nil, "")
// And again (in read-only mode).
db1, err := bolt.Open(path, 0666, &bolt.Options{ReadOnly: true})
ok(t, err)
defer db1.Close()
// Verify both read-only databases are accessible.
for _, db := range []*bolt.DB{db0, db1} {
// Verify is is in read only mode indeed.
assert(t, db.IsReadOnly(), "")
// Read-only databases should not allow updates.
assert(t,
bolt.ErrDatabaseReadOnly == db.Update(func(*bolt.Tx) error {
panic(`should never get here`)
}),
"")
// Read-only databases should not allow beginning writable txns.
_, err = db.Begin(true)
assert(t, bolt.ErrDatabaseReadOnly == err, "")
// Verify the data.
ok(t, db.View(func(tx *bolt.Tx) error {
b := tx.Bucket(bucket)
if b == nil {
return fmt.Errorf("expected bucket `%s`", string(bucket))
}
got := string(b.Get(key))
expected := string(value)
if got != expected {
return fmt.Errorf("expected `%s`, got `%s`", expected, got)
}
return nil
}))
}
}
// TODO(benbjohnson): Test corruption at every byte of the first two pages.
// Ensure that a database cannot open a transaction when it's not open.
@ -254,6 +324,49 @@ func TestDB_BeginRW_Closed(t *testing.T) {
assert(t, tx == nil, "")
}
func TestDB_Close_PendingTx_RW(t *testing.T) { testDB_Close_PendingTx(t, true) }
func TestDB_Close_PendingTx_RO(t *testing.T) { testDB_Close_PendingTx(t, false) }
// Ensure that a database cannot close while transactions are open.
func testDB_Close_PendingTx(t *testing.T, writable bool) {
db := NewTestDB()
defer db.Close()
// Start transaction.
tx, err := db.Begin(true)
if err != nil {
t.Fatal(err)
}
// Open update in separate goroutine.
done := make(chan struct{})
go func() {
db.Close()
close(done)
}()
// Ensure database hasn't closed.
time.Sleep(100 * time.Millisecond)
select {
case <-done:
t.Fatal("database closed too early")
default:
}
// Commit transaction.
if err := tx.Commit(); err != nil {
t.Fatal(err)
}
// Ensure database closed now.
time.Sleep(100 * time.Millisecond)
select {
case <-done:
default:
t.Fatal("database did not close")
}
}
// Ensure a database can provide a transactional block.
func TestDB_Update(t *testing.T) {
db := NewTestDB()
@ -678,7 +791,7 @@ func (db *TestDB) PrintStats() {
// MustCheck runs a consistency check on the database and panics if any errors are found.
func (db *TestDB) MustCheck() {
db.View(func(tx *bolt.Tx) error {
db.Update(func(tx *bolt.Tx) error {
// Collect all the errors.
var errors []error
for err := range tx.Check() {

View File

@ -36,6 +36,10 @@ var (
// ErrTxClosed is returned when committing or rolling back a transaction
// that has already been committed or rolled back.
ErrTxClosed = errors.New("tx closed")
// ErrDatabaseReadOnly is returned when a mutating transaction is started on a
// read-only database.
ErrDatabaseReadOnly = errors.New("database is in read-only mode")
)
// These errors can occur when putting or deleting a value or a bucket.

View File

@ -48,15 +48,14 @@ func (f *freelist) pending_count() int {
// all returns a list of all free ids and all pending ids in one sorted list.
func (f *freelist) all() []pgid {
ids := make([]pgid, len(f.ids))
copy(ids, f.ids)
m := make(pgids, 0)
for _, list := range f.pending {
ids = append(ids, list...)
m = append(m, list...)
}
sort.Sort(pgids(ids))
return ids
sort.Sort(m)
return pgids(f.ids).merge(m)
}
// allocate returns the starting page id of a contiguous list of pages of a given size.
@ -127,15 +126,17 @@ func (f *freelist) free(txid txid, p *page) {
// release moves all page ids for a transaction id (or older) to the freelist.
func (f *freelist) release(txid txid) {
m := make(pgids, 0)
for tid, ids := range f.pending {
if tid <= txid {
// Move transaction's pending pages to the available freelist.
// Don't remove from the cache since the page is still free.
f.ids = append(f.ids, ids...)
m = append(m, ids...)
delete(f.pending, tid)
}
}
sort.Sort(pgids(f.ids))
sort.Sort(m)
f.ids = pgids(f.ids).merge(m)
}
// rollback removes the pages from a given pending tx.

View File

@ -1,7 +1,9 @@
package bolt
import (
"math/rand"
"reflect"
"sort"
"testing"
"unsafe"
)
@ -127,3 +129,28 @@ func TestFreelist_write(t *testing.T) {
t.Fatalf("exp=%v; got=%v", exp, f2.ids)
}
}
func Benchmark_FreelistRelease10K(b *testing.B) { benchmark_FreelistRelease(b, 10000) }
func Benchmark_FreelistRelease100K(b *testing.B) { benchmark_FreelistRelease(b, 100000) }
func Benchmark_FreelistRelease1000K(b *testing.B) { benchmark_FreelistRelease(b, 1000000) }
func Benchmark_FreelistRelease10000K(b *testing.B) { benchmark_FreelistRelease(b, 10000000) }
func benchmark_FreelistRelease(b *testing.B, size int) {
ids := randomPgids(size)
pending := randomPgids(len(ids) / 400)
b.ResetTimer()
for i := 0; i < b.N; i++ {
f := &freelist{ids: ids, pending: map[txid][]pgid{1: pending}}
f.release(1)
}
}
func randomPgids(n int) []pgid {
rand.Seed(42)
pgids := make(pgids, n)
for i := range pgids {
pgids[i] = pgid(rand.Int63())
}
sort.Sort(pgids)
return pgids
}

View File

@ -221,11 +221,20 @@ func (n *node) write(p *page) {
_assert(elem.pgid != p.id, "write: circular dependency occurred")
}
// If the length of key+value is larger than the max allocation size
// then we need to reallocate the byte array pointer.
//
// See: https://github.com/boltdb/bolt/pull/335
klen, vlen := len(item.key), len(item.value)
if len(b) < klen+vlen {
b = (*[maxAllocSize]byte)(unsafe.Pointer(&b[0]))[:]
}
// Write data for the element to the end of the page.
copy(b[0:], item.key)
b = b[len(item.key):]
b = b[klen:]
copy(b[0:], item.value)
b = b[len(item.value):]
b = b[vlen:]
}
// DEBUG ONLY: n.dump()

View File

@ -3,6 +3,7 @@ package bolt
import (
"fmt"
"os"
"sort"
"unsafe"
)
@ -96,7 +97,7 @@ type branchPageElement struct {
// key returns a byte slice of the node key.
func (n *branchPageElement) key() []byte {
buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
return buf[n.pos : n.pos+n.ksize]
return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos]))[:n.ksize]
}
// leafPageElement represents a node on a leaf page.
@ -110,13 +111,13 @@ type leafPageElement struct {
// key returns a byte slice of the node key.
func (n *leafPageElement) key() []byte {
buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
return buf[n.pos : n.pos+n.ksize]
return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos]))[:n.ksize]
}
// value returns a byte slice of the node value.
func (n *leafPageElement) value() []byte {
buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
return buf[n.pos+n.ksize : n.pos+n.ksize+n.vsize]
return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos+n.ksize]))[:n.vsize]
}
// PageInfo represents human readable information about a page.
@ -132,3 +133,40 @@ type pgids []pgid
func (s pgids) Len() int { return len(s) }
func (s pgids) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func (s pgids) Less(i, j int) bool { return s[i] < s[j] }
// merge returns the sorted union of a and b.
func (a pgids) merge(b pgids) pgids {
// Return the opposite slice if one is nil.
if len(a) == 0 {
return b
} else if len(b) == 0 {
return a
}
// Create a list to hold all elements from both lists.
merged := make(pgids, 0, len(a)+len(b))
// Assign lead to the slice with a lower starting value, follow to the higher value.
lead, follow := a, b
if b[0] < a[0] {
lead, follow = b, a
}
// Continue while there are elements in the lead.
for len(lead) > 0 {
// Merge largest prefix of lead that is ahead of follow[0].
n := sort.Search(len(lead), func(i int) bool { return lead[i] > follow[0] })
merged = append(merged, lead[:n]...)
if n >= len(lead) {
break
}
// Swap lead and follow.
lead, follow = follow, lead[n:]
}
// Append what's left in follow.
merged = append(merged, follow...)
return merged
}

View File

@ -1,7 +1,10 @@
package bolt
import (
"reflect"
"sort"
"testing"
"testing/quick"
)
// Ensure that the page type can be returned in human readable format.
@ -27,3 +30,43 @@ func TestPage_typ(t *testing.T) {
func TestPage_dump(t *testing.T) {
(&page{id: 256}).hexdump(16)
}
func TestPgids_merge(t *testing.T) {
a := pgids{4, 5, 6, 10, 11, 12, 13, 27}
b := pgids{1, 3, 8, 9, 25, 30}
c := a.merge(b)
if !reflect.DeepEqual(c, pgids{1, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 25, 27, 30}) {
t.Errorf("mismatch: %v", c)
}
a = pgids{4, 5, 6, 10, 11, 12, 13, 27, 35, 36}
b = pgids{8, 9, 25, 30}
c = a.merge(b)
if !reflect.DeepEqual(c, pgids{4, 5, 6, 8, 9, 10, 11, 12, 13, 25, 27, 30, 35, 36}) {
t.Errorf("mismatch: %v", c)
}
}
func TestPgids_merge_quick(t *testing.T) {
if err := quick.Check(func(a, b pgids) bool {
// Sort incoming lists.
sort.Sort(a)
sort.Sort(b)
// Merge the two lists together.
got := a.merge(b)
// The expected value should be the two lists combined and sorted.
exp := append(a, b...)
sort.Sort(exp)
if !reflect.DeepEqual(exp, got) {
t.Errorf("\nexp=%+v\ngot=%+v\n", exp, got)
return false
}
return true
}, nil); err != nil {
t.Fatal(err)
}
}

View File

@ -127,7 +127,8 @@ func (tx *Tx) OnCommit(fn func()) {
}
// Commit writes all changes to disk and updates the meta page.
// Returns an error if a disk write error occurs.
// Returns an error if a disk write error occurs, or if Commit is
// called on a read-only transaction.
func (tx *Tx) Commit() error {
_assert(!tx.managed, "managed tx commit not allowed")
if tx.db == nil {
@ -203,7 +204,8 @@ func (tx *Tx) Commit() error {
return nil
}
// Rollback closes the transaction and ignores all previous updates.
// Rollback closes the transaction and ignores all previous updates. Read-only
// transactions must be rolled back and not committed.
func (tx *Tx) Rollback() error {
_assert(!tx.managed, "managed tx rollback not allowed")
if tx.db == nil {
@ -421,15 +423,39 @@ func (tx *Tx) write() error {
// Write pages to disk in order.
for _, p := range pages {
size := (int(p.overflow) + 1) * tx.db.pageSize
buf := (*[maxAllocSize]byte)(unsafe.Pointer(p))[:size]
offset := int64(p.id) * int64(tx.db.pageSize)
// Write out page in "max allocation" sized chunks.
ptr := (*[maxAllocSize]byte)(unsafe.Pointer(p))
for {
// Limit our write to our max allocation size.
sz := size
if sz > maxAllocSize-1 {
sz = maxAllocSize - 1
}
// Write chunk to disk.
buf := ptr[:sz]
if _, err := tx.db.ops.writeAt(buf, offset); err != nil {
return err
}
// Update statistics.
tx.stats.Write++
// Exit inner for loop if we've written all the chunks.
size -= sz
if size == 0 {
break
}
// Otherwise move offset forward and move pointer to next chunk.
offset += int64(sz)
ptr = (*[maxAllocSize]byte)(unsafe.Pointer(&ptr[sz]))
}
}
// Ignore file sync if flag is set on DB.
if !tx.db.NoSync || IgnoreNoSync {
if err := fdatasync(tx.db); err != nil {
return err

View File

@ -252,6 +252,38 @@ func TestTx_DeleteBucket_NotFound(t *testing.T) {
})
}
// Ensure that no error is returned when a tx.ForEach function does not return
// an error.
func TestTx_ForEach_NoError(t *testing.T) {
db := NewTestDB()
defer db.Close()
db.Update(func(tx *bolt.Tx) error {
tx.CreateBucket([]byte("widgets"))
tx.Bucket([]byte("widgets")).Put([]byte("foo"), []byte("bar"))
equals(t, nil, tx.ForEach(func(name []byte, b *bolt.Bucket) error {
return nil
}))
return nil
})
}
// Ensure that an error is returned when a tx.ForEach function returns an error.
func TestTx_ForEach_WithError(t *testing.T) {
db := NewTestDB()
defer db.Close()
db.Update(func(tx *bolt.Tx) error {
tx.CreateBucket([]byte("widgets"))
tx.Bucket([]byte("widgets")).Put([]byte("foo"), []byte("bar"))
err := errors.New("foo")
equals(t, err, tx.ForEach(func(name []byte, b *bolt.Bucket) error {
return err
}))
return nil
})
}
// Ensure that Tx commit handlers are called after a transaction successfully commits.
func TestTx_OnCommit(t *testing.T) {
var x int

View File

@ -69,6 +69,7 @@ type DB struct {
compErrC chan error
compPerErrC chan error
compErrSetC chan error
compWriteLocking bool
compStats []cStats
// Close.
@ -108,6 +109,16 @@ func openDB(s *session) (*DB, error) {
closeC: make(chan struct{}),
}
// Read-only mode.
readOnly := s.o.GetReadOnly()
if readOnly {
// Recover journals (read-only mode).
if err := db.recoverJournalRO(); err != nil {
return nil, err
}
} else {
// Recover journals.
if err := db.recoverJournal(); err != nil {
return nil, err
}
@ -122,14 +133,20 @@ func openDB(s *session) (*DB, error) {
return nil, err
}
}
// Doesn't need to be included in the wait group.
go db.compactionError()
go db.mpoolDrain()
if readOnly {
db.SetReadOnly()
} else {
db.closeW.Add(3)
go db.tCompaction()
go db.mCompaction()
go db.jWriter()
}
s.logf("db@open done T·%v", time.Since(start))
@ -275,7 +292,7 @@ func recoverTable(s *session, o *opt.Options) error {
// We will drop corrupted table.
strict = o.GetStrict(opt.StrictRecovery)
rec = &sessionRecord{numLevel: o.GetNumLevel()}
rec = &sessionRecord{}
bpool = util.NewBufferPool(o.GetBlockSize() + 5)
)
buildTable := func(iter iterator.Iterator) (tmp storage.File, size int64, err error) {
@ -450,78 +467,92 @@ func recoverTable(s *session, o *opt.Options) error {
}
func (db *DB) recoverJournal() error {
// Get all tables and sort it by file number.
journalFiles_, err := db.s.getFiles(storage.TypeJournal)
// Get all journals and sort it by file number.
allJournalFiles, err := db.s.getFiles(storage.TypeJournal)
if err != nil {
return err
}
journalFiles := files(journalFiles_)
journalFiles.sort()
files(allJournalFiles).sort()
// Discard older journal.
prev := -1
for i, file := range journalFiles {
if file.Num() >= db.s.stJournalNum {
if prev >= 0 {
i--
journalFiles[i] = journalFiles[prev]
}
journalFiles = journalFiles[i:]
break
} else if file.Num() == db.s.stPrevJournalNum {
prev = i
// Journals that will be recovered.
var recJournalFiles []storage.File
for _, jf := range allJournalFiles {
if jf.Num() >= db.s.stJournalNum || jf.Num() == db.s.stPrevJournalNum {
recJournalFiles = append(recJournalFiles, jf)
}
}
var jr *journal.Reader
var of storage.File
var mem *memdb.DB
batch := new(Batch)
cm := newCMem(db.s)
buf := new(util.Buffer)
var (
of storage.File // Obsolete file.
rec = &sessionRecord{}
)
// Recover journals.
if len(recJournalFiles) > 0 {
db.logf("journal@recovery F·%d", len(recJournalFiles))
// Mark file number as used.
db.s.markFileNum(recJournalFiles[len(recJournalFiles)-1].Num())
var (
// Options.
strict := db.s.o.GetStrict(opt.StrictJournal)
checksum := db.s.o.GetStrict(opt.StrictJournalChecksum)
writeBuffer := db.s.o.GetWriteBuffer()
recoverJournal := func(file storage.File) error {
db.logf("journal@recovery recovering @%d", file.Num())
reader, err := file.Open()
strict = db.s.o.GetStrict(opt.StrictJournal)
checksum = db.s.o.GetStrict(opt.StrictJournalChecksum)
writeBuffer = db.s.o.GetWriteBuffer()
jr *journal.Reader
mdb = memdb.New(db.s.icmp, writeBuffer)
buf = &util.Buffer{}
batch = &Batch{}
)
for _, jf := range recJournalFiles {
db.logf("journal@recovery recovering @%d", jf.Num())
fr, err := jf.Open()
if err != nil {
return err
}
defer reader.Close()
// Create/reset journal reader instance.
// Create or reset journal reader instance.
if jr == nil {
jr = journal.NewReader(reader, dropper{db.s, file}, strict, checksum)
jr = journal.NewReader(fr, dropper{db.s, jf}, strict, checksum)
} else {
jr.Reset(reader, dropper{db.s, file}, strict, checksum)
jr.Reset(fr, dropper{db.s, jf}, strict, checksum)
}
// Flush memdb and remove obsolete journal file.
if of != nil {
if mem.Len() > 0 {
if err := cm.flush(mem, 0); err != nil {
if mdb.Len() > 0 {
if _, err := db.s.flushMemdb(rec, mdb, -1); err != nil {
fr.Close()
return err
}
}
if err := cm.commit(file.Num(), db.seq); err != nil {
rec.setJournalNum(jf.Num())
rec.setSeqNum(db.seq)
if err := db.s.commit(rec); err != nil {
fr.Close()
return err
}
cm.reset()
rec.resetAddedTables()
of.Remove()
of = nil
}
// Replay journal to memdb.
mem.Reset()
mdb.Reset()
for {
r, err := jr.Next()
if err != nil {
if err == io.EOF {
break
}
return errors.SetFile(err, file)
fr.Close()
return errors.SetFile(err, jf)
}
buf.Reset()
@ -529,53 +560,43 @@ func (db *DB) recoverJournal() error {
if err == io.ErrUnexpectedEOF {
// This is error returned due to corruption, with strict == false.
continue
} else {
return errors.SetFile(err, file)
}
fr.Close()
return errors.SetFile(err, jf)
}
if err := batch.memDecodeAndReplay(db.seq, buf.Bytes(), mem); err != nil {
if strict || !errors.IsCorrupted(err) {
return errors.SetFile(err, file)
} else {
if err := batch.memDecodeAndReplay(db.seq, buf.Bytes(), mdb); err != nil {
if !strict && errors.IsCorrupted(err) {
db.s.logf("journal error: %v (skipped)", err)
// We won't apply sequence number as it might be corrupted.
continue
}
fr.Close()
return errors.SetFile(err, jf)
}
// Save sequence number.
db.seq = batch.seq + uint64(batch.Len())
// Flush it if large enough.
if mem.Size() >= writeBuffer {
if err := cm.flush(mem, 0); err != nil {
if mdb.Size() >= writeBuffer {
if _, err := db.s.flushMemdb(rec, mdb, 0); err != nil {
fr.Close()
return err
}
mem.Reset()
mdb.Reset()
}
}
of = file
return nil
fr.Close()
of = jf
}
// Recover all journals.
if len(journalFiles) > 0 {
db.logf("journal@recovery F·%d", len(journalFiles))
// Mark file number as used.
db.s.markFileNum(journalFiles[len(journalFiles)-1].Num())
mem = memdb.New(db.s.icmp, writeBuffer)
for _, file := range journalFiles {
if err := recoverJournal(file); err != nil {
return err
}
}
// Flush the last journal.
if mem.Len() > 0 {
if err := cm.flush(mem, 0); err != nil {
// Flush the last memdb.
if mdb.Len() > 0 {
if _, err := db.s.flushMemdb(rec, mdb, 0); err != nil {
return err
}
}
@ -587,8 +608,10 @@ func (db *DB) recoverJournal() error {
}
// Commit.
if err := cm.commit(db.journalFile.Num(), db.seq); err != nil {
// Close journal.
rec.setJournalNum(db.journalFile.Num())
rec.setSeqNum(db.seq)
if err := db.s.commit(rec); err != nil {
// Close journal on error.
if db.journal != nil {
db.journal.Close()
db.journalWriter.Close()
@ -604,6 +627,103 @@ func (db *DB) recoverJournal() error {
return nil
}
func (db *DB) recoverJournalRO() error {
// Get all journals and sort it by file number.
allJournalFiles, err := db.s.getFiles(storage.TypeJournal)
if err != nil {
return err
}
files(allJournalFiles).sort()
// Journals that will be recovered.
var recJournalFiles []storage.File
for _, jf := range allJournalFiles {
if jf.Num() >= db.s.stJournalNum || jf.Num() == db.s.stPrevJournalNum {
recJournalFiles = append(recJournalFiles, jf)
}
}
var (
// Options.
strict = db.s.o.GetStrict(opt.StrictJournal)
checksum = db.s.o.GetStrict(opt.StrictJournalChecksum)
writeBuffer = db.s.o.GetWriteBuffer()
mdb = memdb.New(db.s.icmp, writeBuffer)
)
// Recover journals.
if len(recJournalFiles) > 0 {
db.logf("journal@recovery RO·Mode F·%d", len(recJournalFiles))
var (
jr *journal.Reader
buf = &util.Buffer{}
batch = &Batch{}
)
for _, jf := range recJournalFiles {
db.logf("journal@recovery recovering @%d", jf.Num())
fr, err := jf.Open()
if err != nil {
return err
}
// Create or reset journal reader instance.
if jr == nil {
jr = journal.NewReader(fr, dropper{db.s, jf}, strict, checksum)
} else {
jr.Reset(fr, dropper{db.s, jf}, strict, checksum)
}
// Replay journal to memdb.
for {
r, err := jr.Next()
if err != nil {
if err == io.EOF {
break
}
fr.Close()
return errors.SetFile(err, jf)
}
buf.Reset()
if _, err := buf.ReadFrom(r); err != nil {
if err == io.ErrUnexpectedEOF {
// This is error returned due to corruption, with strict == false.
continue
}
fr.Close()
return errors.SetFile(err, jf)
}
if err := batch.memDecodeAndReplay(db.seq, buf.Bytes(), mdb); err != nil {
if !strict && errors.IsCorrupted(err) {
db.s.logf("journal error: %v (skipped)", err)
// We won't apply sequence number as it might be corrupted.
continue
}
fr.Close()
return errors.SetFile(err, jf)
}
// Save sequence number.
db.seq = batch.seq + uint64(batch.Len())
}
fr.Close()
}
}
// Set memDB.
db.mem = &memDB{db: db, DB: mdb, ref: 1}
return nil
}
func (db *DB) get(key []byte, seq uint64, ro *opt.ReadOptions) (value []byte, err error) {
ikey := newIkey(key, seq, ktSeek)
@ -614,7 +734,7 @@ func (db *DB) get(key []byte, seq uint64, ro *opt.ReadOptions) (value []byte, er
}
defer m.decref()
mk, mv, me := m.mdb.Find(ikey)
mk, mv, me := m.Find(ikey)
if me == nil {
ukey, _, kt, kerr := parseIkey(mk)
if kerr != nil {
@ -652,7 +772,7 @@ func (db *DB) has(key []byte, seq uint64, ro *opt.ReadOptions) (ret bool, err er
}
defer m.decref()
mk, _, me := m.mdb.Find(ikey)
mk, _, me := m.Find(ikey)
if me == nil {
ukey, _, kt, kerr := parseIkey(mk)
if kerr != nil {
@ -784,7 +904,7 @@ func (db *DB) GetProperty(name string) (value string, err error) {
const prefix = "leveldb."
if !strings.HasPrefix(name, prefix) {
return "", errors.New("leveldb: GetProperty: unknown property: " + name)
return "", ErrNotFound
}
p := name[len(prefix):]
@ -798,7 +918,7 @@ func (db *DB) GetProperty(name string) (value string, err error) {
var rest string
n, _ := fmt.Sscanf(p[len(numFilesPrefix):], "%d%s", &level, &rest)
if n != 1 || int(level) >= db.s.o.GetNumLevel() {
err = errors.New("leveldb: GetProperty: invalid property: " + name)
err = ErrNotFound
} else {
value = fmt.Sprint(v.tLen(int(level)))
}
@ -837,7 +957,7 @@ func (db *DB) GetProperty(name string) (value string, err error) {
case p == "aliveiters":
value = fmt.Sprintf("%d", atomic.LoadInt32(&db.aliveIters))
default:
err = errors.New("leveldb: GetProperty: unknown property: " + name)
err = ErrNotFound
}
return
@ -900,6 +1020,9 @@ func (db *DB) Close() error {
var err error
select {
case err = <-db.compErrC:
if err == ErrReadOnly {
err = nil
}
default:
}

View File

@ -11,7 +11,6 @@ import (
"time"
"github.com/syndtr/goleveldb/leveldb/errors"
"github.com/syndtr/goleveldb/leveldb/memdb"
"github.com/syndtr/goleveldb/leveldb/opt"
)
@ -62,58 +61,8 @@ func (p *cStatsStaging) stopTimer() {
}
}
type cMem struct {
s *session
level int
rec *sessionRecord
}
func newCMem(s *session) *cMem {
return &cMem{s: s, rec: &sessionRecord{numLevel: s.o.GetNumLevel()}}
}
func (c *cMem) flush(mem *memdb.DB, level int) error {
s := c.s
// Write memdb to table.
iter := mem.NewIterator(nil)
defer iter.Release()
t, n, err := s.tops.createFrom(iter)
if err != nil {
return err
}
// Pick level.
if level < 0 {
v := s.version()
level = v.pickLevel(t.imin.ukey(), t.imax.ukey())
v.release()
}
c.rec.addTableFile(level, t)
s.logf("mem@flush created L%d@%d N·%d S·%s %q:%q", level, t.file.Num(), n, shortenb(int(t.size)), t.imin, t.imax)
c.level = level
return nil
}
func (c *cMem) reset() {
c.rec = &sessionRecord{numLevel: c.s.o.GetNumLevel()}
}
func (c *cMem) commit(journal, seq uint64) error {
c.rec.setJournalNum(journal)
c.rec.setSeqNum(seq)
// Commit changes.
return c.s.commit(c.rec)
}
func (db *DB) compactionError() {
var (
err error
wlocked bool
)
var err error
noerr:
// No error.
for {
@ -121,7 +70,7 @@ noerr:
case err = <-db.compErrSetC:
switch {
case err == nil:
case errors.IsCorrupted(err):
case err == ErrReadOnly, errors.IsCorrupted(err):
goto hasperr
default:
goto haserr
@ -139,7 +88,7 @@ haserr:
switch {
case err == nil:
goto noerr
case errors.IsCorrupted(err):
case err == ErrReadOnly, errors.IsCorrupted(err):
goto hasperr
default:
}
@ -155,9 +104,9 @@ hasperr:
case db.compPerErrC <- err:
case db.writeLockC <- struct{}{}:
// Hold write lock, so that write won't pass-through.
wlocked = true
db.compWriteLocking = true
case _, _ = <-db.closeC:
if wlocked {
if db.compWriteLocking {
// We should release the lock or Close will hang.
<-db.writeLockC
}
@ -287,21 +236,18 @@ func (db *DB) compactionExitTransact() {
}
func (db *DB) memCompaction() {
mem := db.getFrozenMem()
if mem == nil {
mdb := db.getFrozenMem()
if mdb == nil {
return
}
defer mem.decref()
defer mdb.decref()
c := newCMem(db.s)
stats := new(cStatsStaging)
db.logf("mem@flush N·%d S·%s", mem.mdb.Len(), shortenb(mem.mdb.Size()))
db.logf("memdb@flush N·%d S·%s", mdb.Len(), shortenb(mdb.Size()))
// Don't compact empty memdb.
if mem.mdb.Len() == 0 {
db.logf("mem@flush skipping")
// drop frozen mem
if mdb.Len() == 0 {
db.logf("memdb@flush skipping")
// drop frozen memdb
db.dropFrozenMem()
return
}
@ -317,13 +263,20 @@ func (db *DB) memCompaction() {
return
}
db.compactionTransactFunc("mem@flush", func(cnt *compactionTransactCounter) (err error) {
var (
rec = &sessionRecord{}
stats = &cStatsStaging{}
flushLevel int
)
db.compactionTransactFunc("memdb@flush", func(cnt *compactionTransactCounter) (err error) {
stats.startTimer()
defer stats.stopTimer()
return c.flush(mem.mdb, -1)
flushLevel, err = db.s.flushMemdb(rec, mdb.DB, -1)
stats.stopTimer()
return
}, func() error {
for _, r := range c.rec.addedTables {
db.logf("mem@flush revert @%d", r.num)
for _, r := range rec.addedTables {
db.logf("memdb@flush revert @%d", r.num)
f := db.s.getTableFile(r.num)
if err := f.Remove(); err != nil {
return err
@ -332,20 +285,23 @@ func (db *DB) memCompaction() {
return nil
})
db.compactionTransactFunc("mem@commit", func(cnt *compactionTransactCounter) (err error) {
db.compactionTransactFunc("memdb@commit", func(cnt *compactionTransactCounter) (err error) {
stats.startTimer()
defer stats.stopTimer()
return c.commit(db.journalFile.Num(), db.frozenSeq)
rec.setJournalNum(db.journalFile.Num())
rec.setSeqNum(db.frozenSeq)
err = db.s.commit(rec)
stats.stopTimer()
return
}, nil)
db.logf("mem@flush committed F·%d T·%v", len(c.rec.addedTables), stats.duration)
db.logf("memdb@flush committed F·%d T·%v", len(rec.addedTables), stats.duration)
for _, r := range c.rec.addedTables {
for _, r := range rec.addedTables {
stats.write += r.size
}
db.compStats[c.level].add(stats)
db.compStats[flushLevel].add(stats)
// Drop frozen mem.
// Drop frozen memdb.
db.dropFrozenMem()
// Resume table compaction.
@ -557,7 +513,7 @@ func (b *tableCompactionBuilder) revert() error {
func (db *DB) tableCompaction(c *compaction, noTrivial bool) {
defer c.release()
rec := &sessionRecord{numLevel: db.s.o.GetNumLevel()}
rec := &sessionRecord{}
rec.addCompPtr(c.level, c.imax)
if !noTrivial && c.trivial() {

View File

@ -40,11 +40,11 @@ func (db *DB) newRawIterator(slice *util.Range, ro *opt.ReadOptions) iterator.It
ti := v.getIterators(slice, ro)
n := len(ti) + 2
i := make([]iterator.Iterator, 0, n)
emi := em.mdb.NewIterator(slice)
emi := em.NewIterator(slice)
emi.SetReleaser(&memdbReleaser{m: em})
i = append(i, emi)
if fm != nil {
fmi := fm.mdb.NewIterator(slice)
fmi := fm.NewIterator(slice)
fmi.SetReleaser(&memdbReleaser{m: fm})
i = append(i, fmi)
}

View File

@ -16,7 +16,7 @@ import (
type memDB struct {
db *DB
mdb *memdb.DB
*memdb.DB
ref int32
}
@ -27,12 +27,12 @@ func (m *memDB) incref() {
func (m *memDB) decref() {
if ref := atomic.AddInt32(&m.ref, -1); ref == 0 {
// Only put back memdb with std capacity.
if m.mdb.Capacity() == m.db.s.o.GetWriteBuffer() {
m.mdb.Reset()
m.db.mpoolPut(m.mdb)
if m.Capacity() == m.db.s.o.GetWriteBuffer() {
m.Reset()
m.db.mpoolPut(m.DB)
}
m.db = nil
m.mdb = nil
m.DB = nil
} else if ref < 0 {
panic("negative memdb ref")
}
@ -126,7 +126,7 @@ func (db *DB) newMem(n int) (mem *memDB, err error) {
}
mem = &memDB{
db: db,
mdb: mdb,
DB: mdb,
ref: 2,
}
db.mem = mem

View File

@ -2445,7 +2445,7 @@ func TestDB_TableCompactionBuilder(t *testing.T) {
if err != nil {
t.Fatal(err)
}
rec := &sessionRecord{numLevel: s.o.GetNumLevel()}
rec := &sessionRecord{}
rec.addTableFile(i, tf)
if err := s.commit(rec); err != nil {
t.Fatal(err)
@ -2455,7 +2455,7 @@ func TestDB_TableCompactionBuilder(t *testing.T) {
// Build grandparent.
v := s.version()
c := newCompaction(s, v, 1, append(tFiles{}, v.tables[1]...))
rec := &sessionRecord{numLevel: s.o.GetNumLevel()}
rec := &sessionRecord{}
b := &tableCompactionBuilder{
s: s,
c: c,
@ -2479,7 +2479,7 @@ func TestDB_TableCompactionBuilder(t *testing.T) {
// Build level-1.
v = s.version()
c = newCompaction(s, v, 0, append(tFiles{}, v.tables[0]...))
rec = &sessionRecord{numLevel: s.o.GetNumLevel()}
rec = &sessionRecord{}
b = &tableCompactionBuilder{
s: s,
c: c,
@ -2523,7 +2523,7 @@ func TestDB_TableCompactionBuilder(t *testing.T) {
// Compaction with transient error.
v = s.version()
c = newCompaction(s, v, 1, append(tFiles{}, v.tables[1]...))
rec = &sessionRecord{numLevel: s.o.GetNumLevel()}
rec = &sessionRecord{}
b = &tableCompactionBuilder{
s: s,
c: c,
@ -2663,3 +2663,39 @@ func TestDB_IterTriggeredCompaction(t *testing.T) {
func TestDB_IterTriggeredCompactionHalf(t *testing.T) {
testDB_IterTriggeredCompaction(t, 2)
}
func TestDB_ReadOnly(t *testing.T) {
h := newDbHarness(t)
defer h.close()
h.put("foo", "v1")
h.put("bar", "v2")
h.compactMem()
h.put("xfoo", "v1")
h.put("xbar", "v2")
t.Log("Trigger read-only")
if err := h.db.SetReadOnly(); err != nil {
h.close()
t.Fatalf("SetReadOnly error: %v", err)
}
h.stor.SetEmuErr(storage.TypeAll, tsOpCreate, tsOpReplace, tsOpRemove, tsOpWrite, tsOpWrite, tsOpSync)
ro := func(key, value, wantValue string) {
if err := h.db.Put([]byte(key), []byte(value), h.wo); err != ErrReadOnly {
t.Fatalf("unexpected error: %v", err)
}
h.getVal(key, wantValue)
}
ro("foo", "vx", "v1")
h.o.ReadOnly = true
h.reopenDB()
ro("foo", "vx", "v1")
ro("bar", "vx", "v2")
h.assertNumKeys(4)
}

View File

@ -63,24 +63,24 @@ func (db *DB) rotateMem(n int) (mem *memDB, err error) {
return
}
func (db *DB) flush(n int) (mem *memDB, nn int, err error) {
func (db *DB) flush(n int) (mdb *memDB, mdbFree int, err error) {
delayed := false
flush := func() (retry bool) {
v := db.s.version()
defer v.release()
mem = db.getEffectiveMem()
mdb = db.getEffectiveMem()
defer func() {
if retry {
mem.decref()
mem = nil
mdb.decref()
mdb = nil
}
}()
nn = mem.mdb.Free()
mdbFree = mdb.Free()
switch {
case v.tLen(0) >= db.s.o.GetWriteL0SlowdownTrigger() && !delayed:
delayed = true
time.Sleep(time.Millisecond)
case nn >= n:
case mdbFree >= n:
return false
case v.tLen(0) >= db.s.o.GetWriteL0PauseTrigger():
delayed = true
@ -90,15 +90,15 @@ func (db *DB) flush(n int) (mem *memDB, nn int, err error) {
}
default:
// Allow memdb to grow if it has no entry.
if mem.mdb.Len() == 0 {
nn = n
if mdb.Len() == 0 {
mdbFree = n
} else {
mem.decref()
mem, err = db.rotateMem(n)
mdb.decref()
mdb, err = db.rotateMem(n)
if err == nil {
nn = mem.mdb.Free()
mdbFree = mdb.Free()
} else {
nn = 0
mdbFree = 0
}
}
return false
@ -157,18 +157,18 @@ func (db *DB) Write(b *Batch, wo *opt.WriteOptions) (err error) {
}
}()
mem, memFree, err := db.flush(b.size())
mdb, mdbFree, err := db.flush(b.size())
if err != nil {
return
}
defer mem.decref()
defer mdb.decref()
// Calculate maximum size of the batch.
m := 1 << 20
if x := b.size(); x <= 128<<10 {
m = x + (128 << 10)
}
m = minInt(m, memFree)
m = minInt(m, mdbFree)
// Merge with other batch.
drain:
@ -197,7 +197,7 @@ drain:
select {
case db.journalC <- b:
// Write into memdb
if berr := b.memReplay(mem.mdb); berr != nil {
if berr := b.memReplay(mdb.DB); berr != nil {
panic(berr)
}
case err = <-db.compPerErrC:
@ -211,7 +211,7 @@ drain:
case err = <-db.journalAckC:
if err != nil {
// Revert memdb if error detected
if berr := b.revertMemReplay(mem.mdb); berr != nil {
if berr := b.revertMemReplay(mdb.DB); berr != nil {
panic(berr)
}
return
@ -225,7 +225,7 @@ drain:
if err != nil {
return
}
if berr := b.memReplay(mem.mdb); berr != nil {
if berr := b.memReplay(mdb.DB); berr != nil {
panic(berr)
}
}
@ -233,7 +233,7 @@ drain:
// Set last seq number.
db.addSeq(uint64(b.Len()))
if b.size() >= memFree {
if b.size() >= mdbFree {
db.rotateMem(0)
}
return
@ -249,8 +249,7 @@ func (db *DB) Put(key, value []byte, wo *opt.WriteOptions) error {
return db.Write(b, wo)
}
// Delete deletes the value for the given key. It returns ErrNotFound if
// the DB does not contain the key.
// Delete deletes the value for the given key.
//
// It is safe to modify the contents of the arguments after Delete returns.
func (db *DB) Delete(key []byte, wo *opt.WriteOptions) error {
@ -290,9 +289,9 @@ func (db *DB) CompactRange(r util.Range) error {
}
// Check for overlaps in memdb.
mem := db.getEffectiveMem()
defer mem.decref()
if isMemOverlaps(db.s.icmp, mem.mdb, r.Start, r.Limit) {
mdb := db.getEffectiveMem()
defer mdb.decref()
if isMemOverlaps(db.s.icmp, mdb.DB, r.Start, r.Limit) {
// Memdb compaction.
if _, err := db.rotateMem(0); err != nil {
<-db.writeLockC
@ -309,3 +308,31 @@ func (db *DB) CompactRange(r util.Range) error {
// Table compaction.
return db.compSendRange(db.tcompCmdC, -1, r.Start, r.Limit)
}
// SetReadOnly makes DB read-only. It will stay read-only until reopened.
func (db *DB) SetReadOnly() error {
if err := db.ok(); err != nil {
return err
}
// Lock writer.
select {
case db.writeLockC <- struct{}{}:
db.compWriteLocking = true
case err := <-db.compPerErrC:
return err
case _, _ = <-db.closeC:
return ErrClosed
}
// Set compaction read-only.
select {
case db.compErrSetC <- ErrReadOnly:
case perr := <-db.compPerErrC:
return perr
case _, _ = <-db.closeC:
return ErrClosed
}
return nil
}

View File

@ -12,6 +12,7 @@ import (
var (
ErrNotFound = errors.ErrNotFound
ErrReadOnly = errors.New("leveldb: read-only mode")
ErrSnapshotReleased = errors.New("leveldb: snapshot released")
ErrIterReleased = errors.New("leveldb: iterator released")
ErrClosed = errors.New("leveldb: closed")

View File

@ -206,6 +206,7 @@ func (p *DB) randHeight() (h int) {
return
}
// Must hold RW-lock if prev == true, as it use shared prevNode slice.
func (p *DB) findGE(key []byte, prev bool) (int, bool) {
node := 0
h := p.maxHeight - 1
@ -302,7 +303,7 @@ func (p *DB) Put(key []byte, value []byte) error {
node := len(p.nodeData)
p.nodeData = append(p.nodeData, kvOffset, len(key), len(value), h)
for i, n := range p.prevNode[:h] {
m := n + 4 + i
m := n + nNext + i
p.nodeData = append(p.nodeData, p.nodeData[m])
p.nodeData[m] = node
}
@ -434,20 +435,22 @@ func (p *DB) Len() int {
// Reset resets the DB to initial empty state. Allows reuse the buffer.
func (p *DB) Reset() {
p.mu.Lock()
p.rnd = rand.New(rand.NewSource(0xdeadbeef))
p.maxHeight = 1
p.n = 0
p.kvSize = 0
p.kvData = p.kvData[:0]
p.nodeData = p.nodeData[:4+tMaxHeight]
p.nodeData = p.nodeData[:nNext+tMaxHeight]
p.nodeData[nKV] = 0
p.nodeData[nKey] = 0
p.nodeData[nVal] = 0
p.nodeData[nHeight] = tMaxHeight
for n := 0; n < tMaxHeight; n++ {
p.nodeData[4+n] = 0
p.nodeData[nNext+n] = 0
p.prevNode[n] = 0
}
p.mu.Unlock()
}
// New creates a new initalized in-memory key/value DB. The capacity

View File

@ -250,6 +250,11 @@ type Options struct {
// The default value (DefaultCompression) uses snappy compression.
Compression Compression
// DisableBufferPool allows disable use of util.BufferPool functionality.
//
// The default value is false.
DisableBufferPool bool
// DisableBlockCache allows disable use of cache.Cache functionality on
// 'sorted table' block.
//
@ -321,6 +326,11 @@ type Options struct {
// The default value is 500.
OpenFilesCacheCapacity int
// If true then opens DB in read-only mode.
//
// The default value is false.
ReadOnly bool
// Strict defines the DB strict level.
Strict Strict
@ -472,6 +482,20 @@ func (o *Options) GetCompression() Compression {
return o.Compression
}
func (o *Options) GetDisableBufferPool() bool {
if o == nil {
return false
}
return o.DisableBufferPool
}
func (o *Options) GetDisableBlockCache() bool {
if o == nil {
return false
}
return o.DisableBlockCache
}
func (o *Options) GetDisableCompactionBackoff() bool {
if o == nil {
return false
@ -548,6 +572,13 @@ func (o *Options) GetOpenFilesCacheCapacity() int {
return o.OpenFilesCacheCapacity
}
func (o *Options) GetReadOnly() bool {
if o == nil {
return false
}
return o.ReadOnly
}
func (o *Options) GetStrict(strict Strict) bool {
if o == nil || o.Strict == 0 {
return DefaultStrict&strict != 0

View File

@ -11,10 +11,8 @@ import (
"io"
"os"
"sync"
"sync/atomic"
"github.com/syndtr/goleveldb/leveldb/errors"
"github.com/syndtr/goleveldb/leveldb/iterator"
"github.com/syndtr/goleveldb/leveldb/journal"
"github.com/syndtr/goleveldb/leveldb/opt"
"github.com/syndtr/goleveldb/leveldb/storage"
@ -127,11 +125,16 @@ func (s *session) recover() (err error) {
return
}
defer reader.Close()
strict := s.o.GetStrict(opt.StrictManifest)
jr := journal.NewReader(reader, dropper{s, m}, strict, true)
staging := s.stVersion.newStaging()
rec := &sessionRecord{numLevel: s.o.GetNumLevel()}
var (
// Options.
numLevel = s.o.GetNumLevel()
strict = s.o.GetStrict(opt.StrictManifest)
jr = journal.NewReader(reader, dropper{s, m}, strict, true)
rec = &sessionRecord{}
staging = s.stVersion.newStaging()
)
for {
var r io.Reader
r, err = jr.Next()
@ -143,7 +146,7 @@ func (s *session) recover() (err error) {
return errors.SetFile(err, m)
}
err = rec.decode(r)
err = rec.decode(r, numLevel)
if err == nil {
// save compact pointers
for _, r := range rec.compPtrs {
@ -206,250 +209,3 @@ func (s *session) commit(r *sessionRecord) (err error) {
return
}
// Pick a compaction based on current state; need external synchronization.
func (s *session) pickCompaction() *compaction {
v := s.version()
var level int
var t0 tFiles
if v.cScore >= 1 {
level = v.cLevel
cptr := s.stCompPtrs[level]
tables := v.tables[level]
for _, t := range tables {
if cptr == nil || s.icmp.Compare(t.imax, cptr) > 0 {
t0 = append(t0, t)
break
}
}
if len(t0) == 0 {
t0 = append(t0, tables[0])
}
} else {
if p := atomic.LoadPointer(&v.cSeek); p != nil {
ts := (*tSet)(p)
level = ts.level
t0 = append(t0, ts.table)
} else {
v.release()
return nil
}
}
return newCompaction(s, v, level, t0)
}
// Create compaction from given level and range; need external synchronization.
func (s *session) getCompactionRange(level int, umin, umax []byte) *compaction {
v := s.version()
t0 := v.tables[level].getOverlaps(nil, s.icmp, umin, umax, level == 0)
if len(t0) == 0 {
v.release()
return nil
}
// Avoid compacting too much in one shot in case the range is large.
// But we cannot do this for level-0 since level-0 files can overlap
// and we must not pick one file and drop another older file if the
// two files overlap.
if level > 0 {
limit := uint64(v.s.o.GetCompactionSourceLimit(level))
total := uint64(0)
for i, t := range t0 {
total += t.size
if total >= limit {
s.logf("table@compaction limiting F·%d -> F·%d", len(t0), i+1)
t0 = t0[:i+1]
break
}
}
}
return newCompaction(s, v, level, t0)
}
func newCompaction(s *session, v *version, level int, t0 tFiles) *compaction {
c := &compaction{
s: s,
v: v,
level: level,
tables: [2]tFiles{t0, nil},
maxGPOverlaps: uint64(s.o.GetCompactionGPOverlaps(level)),
tPtrs: make([]int, s.o.GetNumLevel()),
}
c.expand()
c.save()
return c
}
// compaction represent a compaction state.
type compaction struct {
s *session
v *version
level int
tables [2]tFiles
maxGPOverlaps uint64
gp tFiles
gpi int
seenKey bool
gpOverlappedBytes uint64
imin, imax iKey
tPtrs []int
released bool
snapGPI int
snapSeenKey bool
snapGPOverlappedBytes uint64
snapTPtrs []int
}
func (c *compaction) save() {
c.snapGPI = c.gpi
c.snapSeenKey = c.seenKey
c.snapGPOverlappedBytes = c.gpOverlappedBytes
c.snapTPtrs = append(c.snapTPtrs[:0], c.tPtrs...)
}
func (c *compaction) restore() {
c.gpi = c.snapGPI
c.seenKey = c.snapSeenKey
c.gpOverlappedBytes = c.snapGPOverlappedBytes
c.tPtrs = append(c.tPtrs[:0], c.snapTPtrs...)
}
func (c *compaction) release() {
if !c.released {
c.released = true
c.v.release()
}
}
// Expand compacted tables; need external synchronization.
func (c *compaction) expand() {
limit := uint64(c.s.o.GetCompactionExpandLimit(c.level))
vt0, vt1 := c.v.tables[c.level], c.v.tables[c.level+1]
t0, t1 := c.tables[0], c.tables[1]
imin, imax := t0.getRange(c.s.icmp)
// We expand t0 here just incase ukey hop across tables.
t0 = vt0.getOverlaps(t0, c.s.icmp, imin.ukey(), imax.ukey(), c.level == 0)
if len(t0) != len(c.tables[0]) {
imin, imax = t0.getRange(c.s.icmp)
}
t1 = vt1.getOverlaps(t1, c.s.icmp, imin.ukey(), imax.ukey(), false)
// Get entire range covered by compaction.
amin, amax := append(t0, t1...).getRange(c.s.icmp)
// See if we can grow the number of inputs in "level" without
// changing the number of "level+1" files we pick up.
if len(t1) > 0 {
exp0 := vt0.getOverlaps(nil, c.s.icmp, amin.ukey(), amax.ukey(), c.level == 0)
if len(exp0) > len(t0) && t1.size()+exp0.size() < limit {
xmin, xmax := exp0.getRange(c.s.icmp)
exp1 := vt1.getOverlaps(nil, c.s.icmp, xmin.ukey(), xmax.ukey(), false)
if len(exp1) == len(t1) {
c.s.logf("table@compaction expanding L%d+L%d (F·%d S·%s)+(F·%d S·%s) -> (F·%d S·%s)+(F·%d S·%s)",
c.level, c.level+1, len(t0), shortenb(int(t0.size())), len(t1), shortenb(int(t1.size())),
len(exp0), shortenb(int(exp0.size())), len(exp1), shortenb(int(exp1.size())))
imin, imax = xmin, xmax
t0, t1 = exp0, exp1
amin, amax = append(t0, t1...).getRange(c.s.icmp)
}
}
}
// Compute the set of grandparent files that overlap this compaction
// (parent == level+1; grandparent == level+2)
if c.level+2 < c.s.o.GetNumLevel() {
c.gp = c.v.tables[c.level+2].getOverlaps(c.gp, c.s.icmp, amin.ukey(), amax.ukey(), false)
}
c.tables[0], c.tables[1] = t0, t1
c.imin, c.imax = imin, imax
}
// Check whether compaction is trivial.
func (c *compaction) trivial() bool {
return len(c.tables[0]) == 1 && len(c.tables[1]) == 0 && c.gp.size() <= c.maxGPOverlaps
}
func (c *compaction) baseLevelForKey(ukey []byte) bool {
for level, tables := range c.v.tables[c.level+2:] {
for c.tPtrs[level] < len(tables) {
t := tables[c.tPtrs[level]]
if c.s.icmp.uCompare(ukey, t.imax.ukey()) <= 0 {
// We've advanced far enough.
if c.s.icmp.uCompare(ukey, t.imin.ukey()) >= 0 {
// Key falls in this file's range, so definitely not base level.
return false
}
break
}
c.tPtrs[level]++
}
}
return true
}
func (c *compaction) shouldStopBefore(ikey iKey) bool {
for ; c.gpi < len(c.gp); c.gpi++ {
gp := c.gp[c.gpi]
if c.s.icmp.Compare(ikey, gp.imax) <= 0 {
break
}
if c.seenKey {
c.gpOverlappedBytes += gp.size
}
}
c.seenKey = true
if c.gpOverlappedBytes > c.maxGPOverlaps {
// Too much overlap for current output; start new output.
c.gpOverlappedBytes = 0
return true
}
return false
}
// Creates an iterator.
func (c *compaction) newIterator() iterator.Iterator {
// Creates iterator slice.
icap := len(c.tables)
if c.level == 0 {
// Special case for level-0
icap = len(c.tables[0]) + 1
}
its := make([]iterator.Iterator, 0, icap)
// Options.
ro := &opt.ReadOptions{
DontFillCache: true,
Strict: opt.StrictOverride,
}
strict := c.s.o.GetStrict(opt.StrictCompaction)
if strict {
ro.Strict |= opt.StrictReader
}
for i, tables := range c.tables {
if len(tables) == 0 {
continue
}
// Level-0 is not sorted and may overlaps each other.
if c.level+i == 0 {
for _, t := range tables {
its = append(its, c.s.tops.newIterator(t, nil, ro))
}
} else {
it := iterator.NewIndexedIterator(tables.newIndexIterator(c.s.tops, c.s.icmp, nil, ro), strict)
its = append(its, it)
}
}
return iterator.NewMergedIterator(its, c.s.icmp, strict)
}

View File

@ -0,0 +1,287 @@
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package leveldb
import (
"sync/atomic"
"github.com/syndtr/goleveldb/leveldb/iterator"
"github.com/syndtr/goleveldb/leveldb/memdb"
"github.com/syndtr/goleveldb/leveldb/opt"
)
func (s *session) pickMemdbLevel(umin, umax []byte) int {
v := s.version()
defer v.release()
return v.pickMemdbLevel(umin, umax)
}
func (s *session) flushMemdb(rec *sessionRecord, mdb *memdb.DB, level int) (level_ int, err error) {
// Create sorted table.
iter := mdb.NewIterator(nil)
defer iter.Release()
t, n, err := s.tops.createFrom(iter)
if err != nil {
return level, err
}
// Pick level and add to record.
if level < 0 {
level = s.pickMemdbLevel(t.imin.ukey(), t.imax.ukey())
}
rec.addTableFile(level, t)
s.logf("memdb@flush created L%d@%d N·%d S·%s %q:%q", level, t.file.Num(), n, shortenb(int(t.size)), t.imin, t.imax)
return level, nil
}
// Pick a compaction based on current state; need external synchronization.
func (s *session) pickCompaction() *compaction {
v := s.version()
var level int
var t0 tFiles
if v.cScore >= 1 {
level = v.cLevel
cptr := s.stCompPtrs[level]
tables := v.tables[level]
for _, t := range tables {
if cptr == nil || s.icmp.Compare(t.imax, cptr) > 0 {
t0 = append(t0, t)
break
}
}
if len(t0) == 0 {
t0 = append(t0, tables[0])
}
} else {
if p := atomic.LoadPointer(&v.cSeek); p != nil {
ts := (*tSet)(p)
level = ts.level
t0 = append(t0, ts.table)
} else {
v.release()
return nil
}
}
return newCompaction(s, v, level, t0)
}
// Create compaction from given level and range; need external synchronization.
func (s *session) getCompactionRange(level int, umin, umax []byte) *compaction {
v := s.version()
t0 := v.tables[level].getOverlaps(nil, s.icmp, umin, umax, level == 0)
if len(t0) == 0 {
v.release()
return nil
}
// Avoid compacting too much in one shot in case the range is large.
// But we cannot do this for level-0 since level-0 files can overlap
// and we must not pick one file and drop another older file if the
// two files overlap.
if level > 0 {
limit := uint64(v.s.o.GetCompactionSourceLimit(level))
total := uint64(0)
for i, t := range t0 {
total += t.size
if total >= limit {
s.logf("table@compaction limiting F·%d -> F·%d", len(t0), i+1)
t0 = t0[:i+1]
break
}
}
}
return newCompaction(s, v, level, t0)
}
func newCompaction(s *session, v *version, level int, t0 tFiles) *compaction {
c := &compaction{
s: s,
v: v,
level: level,
tables: [2]tFiles{t0, nil},
maxGPOverlaps: uint64(s.o.GetCompactionGPOverlaps(level)),
tPtrs: make([]int, s.o.GetNumLevel()),
}
c.expand()
c.save()
return c
}
// compaction represent a compaction state.
type compaction struct {
s *session
v *version
level int
tables [2]tFiles
maxGPOverlaps uint64
gp tFiles
gpi int
seenKey bool
gpOverlappedBytes uint64
imin, imax iKey
tPtrs []int
released bool
snapGPI int
snapSeenKey bool
snapGPOverlappedBytes uint64
snapTPtrs []int
}
func (c *compaction) save() {
c.snapGPI = c.gpi
c.snapSeenKey = c.seenKey
c.snapGPOverlappedBytes = c.gpOverlappedBytes
c.snapTPtrs = append(c.snapTPtrs[:0], c.tPtrs...)
}
func (c *compaction) restore() {
c.gpi = c.snapGPI
c.seenKey = c.snapSeenKey
c.gpOverlappedBytes = c.snapGPOverlappedBytes
c.tPtrs = append(c.tPtrs[:0], c.snapTPtrs...)
}
func (c *compaction) release() {
if !c.released {
c.released = true
c.v.release()
}
}
// Expand compacted tables; need external synchronization.
func (c *compaction) expand() {
limit := uint64(c.s.o.GetCompactionExpandLimit(c.level))
vt0, vt1 := c.v.tables[c.level], c.v.tables[c.level+1]
t0, t1 := c.tables[0], c.tables[1]
imin, imax := t0.getRange(c.s.icmp)
// We expand t0 here just incase ukey hop across tables.
t0 = vt0.getOverlaps(t0, c.s.icmp, imin.ukey(), imax.ukey(), c.level == 0)
if len(t0) != len(c.tables[0]) {
imin, imax = t0.getRange(c.s.icmp)
}
t1 = vt1.getOverlaps(t1, c.s.icmp, imin.ukey(), imax.ukey(), false)
// Get entire range covered by compaction.
amin, amax := append(t0, t1...).getRange(c.s.icmp)
// See if we can grow the number of inputs in "level" without
// changing the number of "level+1" files we pick up.
if len(t1) > 0 {
exp0 := vt0.getOverlaps(nil, c.s.icmp, amin.ukey(), amax.ukey(), c.level == 0)
if len(exp0) > len(t0) && t1.size()+exp0.size() < limit {
xmin, xmax := exp0.getRange(c.s.icmp)
exp1 := vt1.getOverlaps(nil, c.s.icmp, xmin.ukey(), xmax.ukey(), false)
if len(exp1) == len(t1) {
c.s.logf("table@compaction expanding L%d+L%d (F·%d S·%s)+(F·%d S·%s) -> (F·%d S·%s)+(F·%d S·%s)",
c.level, c.level+1, len(t0), shortenb(int(t0.size())), len(t1), shortenb(int(t1.size())),
len(exp0), shortenb(int(exp0.size())), len(exp1), shortenb(int(exp1.size())))
imin, imax = xmin, xmax
t0, t1 = exp0, exp1
amin, amax = append(t0, t1...).getRange(c.s.icmp)
}
}
}
// Compute the set of grandparent files that overlap this compaction
// (parent == level+1; grandparent == level+2)
if c.level+2 < c.s.o.GetNumLevel() {
c.gp = c.v.tables[c.level+2].getOverlaps(c.gp, c.s.icmp, amin.ukey(), amax.ukey(), false)
}
c.tables[0], c.tables[1] = t0, t1
c.imin, c.imax = imin, imax
}
// Check whether compaction is trivial.
func (c *compaction) trivial() bool {
return len(c.tables[0]) == 1 && len(c.tables[1]) == 0 && c.gp.size() <= c.maxGPOverlaps
}
func (c *compaction) baseLevelForKey(ukey []byte) bool {
for level, tables := range c.v.tables[c.level+2:] {
for c.tPtrs[level] < len(tables) {
t := tables[c.tPtrs[level]]
if c.s.icmp.uCompare(ukey, t.imax.ukey()) <= 0 {
// We've advanced far enough.
if c.s.icmp.uCompare(ukey, t.imin.ukey()) >= 0 {
// Key falls in this file's range, so definitely not base level.
return false
}
break
}
c.tPtrs[level]++
}
}
return true
}
func (c *compaction) shouldStopBefore(ikey iKey) bool {
for ; c.gpi < len(c.gp); c.gpi++ {
gp := c.gp[c.gpi]
if c.s.icmp.Compare(ikey, gp.imax) <= 0 {
break
}
if c.seenKey {
c.gpOverlappedBytes += gp.size
}
}
c.seenKey = true
if c.gpOverlappedBytes > c.maxGPOverlaps {
// Too much overlap for current output; start new output.
c.gpOverlappedBytes = 0
return true
}
return false
}
// Creates an iterator.
func (c *compaction) newIterator() iterator.Iterator {
// Creates iterator slice.
icap := len(c.tables)
if c.level == 0 {
// Special case for level-0.
icap = len(c.tables[0]) + 1
}
its := make([]iterator.Iterator, 0, icap)
// Options.
ro := &opt.ReadOptions{
DontFillCache: true,
Strict: opt.StrictOverride,
}
strict := c.s.o.GetStrict(opt.StrictCompaction)
if strict {
ro.Strict |= opt.StrictReader
}
for i, tables := range c.tables {
if len(tables) == 0 {
continue
}
// Level-0 is not sorted and may overlaps each other.
if c.level+i == 0 {
for _, t := range tables {
its = append(its, c.s.tops.newIterator(t, nil, ro))
}
} else {
it := iterator.NewIndexedIterator(tables.newIndexIterator(c.s.tops, c.s.icmp, nil, ro), strict)
its = append(its, it)
}
}
return iterator.NewMergedIterator(its, c.s.icmp, strict)
}

View File

@ -52,8 +52,6 @@ type dtRecord struct {
}
type sessionRecord struct {
numLevel int
hasRec int
comparer string
journalNum uint64
@ -230,7 +228,7 @@ func (p *sessionRecord) readBytes(field string, r byteReader) []byte {
return x
}
func (p *sessionRecord) readLevel(field string, r io.ByteReader) int {
func (p *sessionRecord) readLevel(field string, r io.ByteReader, numLevel int) int {
if p.err != nil {
return 0
}
@ -238,14 +236,14 @@ func (p *sessionRecord) readLevel(field string, r io.ByteReader) int {
if p.err != nil {
return 0
}
if x >= uint64(p.numLevel) {
if x >= uint64(numLevel) {
p.err = errors.NewErrCorrupted(nil, &ErrManifestCorrupted{field, "invalid level number"})
return 0
}
return int(x)
}
func (p *sessionRecord) decode(r io.Reader) error {
func (p *sessionRecord) decode(r io.Reader, numLevel int) error {
br, ok := r.(byteReader)
if !ok {
br = bufio.NewReader(r)
@ -286,13 +284,13 @@ func (p *sessionRecord) decode(r io.Reader) error {
p.setSeqNum(x)
}
case recCompPtr:
level := p.readLevel("comp-ptr.level", br)
level := p.readLevel("comp-ptr.level", br, numLevel)
ikey := p.readBytes("comp-ptr.ikey", br)
if p.err == nil {
p.addCompPtr(level, iKey(ikey))
}
case recAddTable:
level := p.readLevel("add-table.level", br)
level := p.readLevel("add-table.level", br, numLevel)
num := p.readUvarint("add-table.num", br)
size := p.readUvarint("add-table.size", br)
imin := p.readBytes("add-table.imin", br)
@ -301,7 +299,7 @@ func (p *sessionRecord) decode(r io.Reader) error {
p.addTable(level, num, size, imin, imax)
}
case recDelTable:
level := p.readLevel("del-table.level", br)
level := p.readLevel("del-table.level", br, numLevel)
num := p.readUvarint("del-table.num", br)
if p.err == nil {
p.delTable(level, num)

View File

@ -19,8 +19,8 @@ func decodeEncode(v *sessionRecord) (res bool, err error) {
if err != nil {
return
}
v2 := &sessionRecord{numLevel: opt.DefaultNumLevel}
err = v.decode(b)
v2 := &sessionRecord{}
err = v.decode(b, opt.DefaultNumLevel)
if err != nil {
return
}
@ -34,7 +34,7 @@ func decodeEncode(v *sessionRecord) (res bool, err error) {
func TestSessionRecord_EncodeDecode(t *testing.T) {
big := uint64(1) << 50
v := &sessionRecord{numLevel: opt.DefaultNumLevel}
v := &sessionRecord{}
i := uint64(0)
test := func() {
res, err := decodeEncode(v)

View File

@ -182,7 +182,7 @@ func (s *session) newManifest(rec *sessionRecord, v *version) (err error) {
defer v.release()
}
if rec == nil {
rec = &sessionRecord{numLevel: s.o.GetNumLevel()}
rec = &sessionRecord{}
}
s.fillRecord(rec, true)
v.fillRecord(rec)

View File

@ -42,6 +42,8 @@ type tsOp uint
const (
tsOpOpen tsOp = iota
tsOpCreate
tsOpReplace
tsOpRemove
tsOpRead
tsOpReadAt
tsOpWrite
@ -241,6 +243,10 @@ func (tf tsFile) Replace(newfile storage.File) (err error) {
if err != nil {
return
}
if tf.shouldErr(tsOpReplace) {
err = errors.New("leveldb.testStorage: emulated create error")
return
}
err = tf.File.Replace(newfile.(tsFile).File)
if err != nil {
ts.t.Errorf("E: cannot replace file, num=%d type=%v: %v", tf.Num(), tf.Type(), err)
@ -258,6 +264,10 @@ func (tf tsFile) Remove() (err error) {
if err != nil {
return
}
if tf.shouldErr(tsOpRemove) {
err = errors.New("leveldb.testStorage: emulated create error")
return
}
err = tf.File.Remove()
if err != nil {
ts.t.Errorf("E: cannot remove file, num=%d type=%v: %v", tf.Num(), tf.Type(), err)

View File

@ -441,22 +441,26 @@ func newTableOps(s *session) *tOps {
var (
cacher cache.Cacher
bcache *cache.Cache
bpool *util.BufferPool
)
if s.o.GetOpenFilesCacheCapacity() > 0 {
cacher = cache.NewLRU(s.o.GetOpenFilesCacheCapacity())
}
if !s.o.DisableBlockCache {
if !s.o.GetDisableBlockCache() {
var bcacher cache.Cacher
if s.o.GetBlockCacheCapacity() > 0 {
bcacher = cache.NewLRU(s.o.GetBlockCacheCapacity())
}
bcache = cache.NewCache(bcacher)
}
if !s.o.GetDisableBufferPool() {
bpool = util.NewBufferPool(s.o.GetBlockSize() + 5)
}
return &tOps{
s: s,
cache: cache.NewCache(cacher),
bcache: bcache,
bpool: util.NewBufferPool(s.o.GetBlockSize() + 5),
bpool: bpool,
}
}

View File

@ -14,7 +14,7 @@ import (
"strings"
"sync"
"github.com/syndtr/gosnappy/snappy"
"github.com/google/go-snappy/snappy"
"github.com/syndtr/goleveldb/leveldb/cache"
"github.com/syndtr/goleveldb/leveldb/comparer"

View File

@ -12,7 +12,7 @@ import (
"fmt"
"io"
"github.com/syndtr/gosnappy/snappy"
"github.com/google/go-snappy/snappy"
"github.com/syndtr/goleveldb/leveldb/comparer"
"github.com/syndtr/goleveldb/leveldb/filter"

View File

@ -300,7 +300,7 @@ func (v *version) offsetOf(ikey iKey) (n uint64, err error) {
return
}
func (v *version) pickLevel(umin, umax []byte) (level int) {
func (v *version) pickMemdbLevel(umin, umax []byte) (level int) {
if !v.tables[0].overlaps(v.s.icmp, umin, umax, true) {
var overlaps tFiles
maxLevel := v.s.o.GetMaxMemCompationLevel()