update godep

2015-06-18 16:23:42 +08:00 · 2015-06-18 16:23:42 +08:00 · a298919dd0
parent c80cda80d8
commit a298919dd0
55 changed files with 2846 additions and 1747 deletions
--- a/Godeps/Godeps.json
+++ b/Godeps/Godeps.json
@ -1,18 +1,19 @@
 {
 	"ImportPath": "github.com/siddontang/ledisdb",
-	"GoVersion": "go1.3.3",
+	"GoVersion": "go1.4.2",
 	"Packages": [
 		"./..."
 	],
 	"Deps": [
 		{
 			"ImportPath": "github.com/BurntSushi/toml",
+			"Comment": "v0.1.0",
 			"Rev": "2ceedfee35ad3848e49308ab0c9a4f640cfb5fb2"
 		},
 		{
 			"ImportPath": "github.com/boltdb/bolt",
-			"Comment": "v1.0-62-gee95430",
-			"Rev": "ee954308d64186f0fc9b7022b6178977848c17a3"
+			"Comment": "v1.0-111-g04a3e85",
+			"Rev": "04a3e85793043e76d41164037d0d7f9d53eecae3"
 		},
 		{
 			"ImportPath": "github.com/cupcake/rdb",
@ -22,6 +23,10 @@
 			"ImportPath": "github.com/edsrzf/mmap-go",
 			"Rev": "6c75090c55983bef2e129e173681b20d24871ef8"
 		},
+		{
+			"ImportPath": "github.com/google/go-snappy/snappy",
+			"Rev": "eaa750b9bf4dcb7cb20454be850613b66cda3273"
+		},
 		{
 			"ImportPath": "github.com/siddontang/go/bson",
 			"Rev": "530a23162549a31baa14dfa3b647a9eccee8878f"
@ -64,11 +69,7 @@
 		},
 		{
 			"ImportPath": "github.com/syndtr/goleveldb/leveldb",
-			"Rev": "4875955338b0a434238a31165cb87255ab6e9e4a"
-		},
-		{
-			"ImportPath": "github.com/syndtr/gosnappy/snappy",
-			"Rev": "156a073208e131d7d2e212cb749feae7c339e846"
+			"Rev": "a06509502ca32565bdf74afc1e573050023f261c"
 		},
 		{
 			"ImportPath": "github.com/ugorji/go/codec",
--- a/Godeps/_workspace/src/github.com/boltdb/bolt/.gitignore
+++ b/Godeps/_workspace/src/github.com/boltdb/bolt/.gitignore
@ -1,3 +1,4 @@
 *.prof
 *.test
+*.swp
 /bin/
--- a/Godeps/_workspace/src/github.com/boltdb/bolt/README.md
+++ b/Godeps/_workspace/src/github.com/boltdb/bolt/README.md
@ -87,6 +87,11 @@ are not thread safe. To work with data in multiple goroutines you must start
 a transaction for each one or use locking to ensure only one goroutine accesses
 a transaction at a time. Creating transaction from the `DB` is thread safe.

+Read-only transactions and read-write transactions should not depend on one
+another and generally shouldn't be opened simultaneously in the same goroutine.
+This can cause a deadlock as the read-write transaction needs to periodically
+re-map the data file but it cannot do so while a read-only transaction is open.
+

 #### Read-write transactions

@ -446,6 +451,21 @@ It's also useful to pipe these stats to a service such as statsd for monitoring
 or to provide an HTTP endpoint that will perform a fixed-length sample.


+### Read-Only Mode
+
+Sometimes it is useful to create a shared, read-only Bolt database. To this,
+set the `Options.ReadOnly` flag when opening your database. Read-only mode
+uses a shared lock to allow multiple processes to read from the database but
+it will block any processes from opening the database in read-write mode.
+
+```go
+db, err := bolt.Open("my.db", 0666, &bolt.Options{ReadOnly: true})
+if err != nil {
+	log.Fatal(err)
+}
+```
+
+
 ## Resources

 For more information on getting started with Bolt, check out the following articles:
@ -550,6 +570,11 @@ Here are a few things to note when evaluating and using Bolt:
  However, this is expected and the OS will release memory as needed. Bolt can
  handle databases much larger than the available physical RAM.

+* The data structures in the Bolt database are memory mapped so the data file
+  will be endian specific. This means that you cannot copy a Bolt file from a
+  little endian machine to a big endian machine and have it work. For most 
+  users this is not a concern since most modern CPUs are little endian.
+
 * Because of the way pages are laid out on disk, Bolt cannot truncate data files
  and return free pages back to the disk. Instead, Bolt maintains a free list
  of unused pages within its data file. These free pages can be reused by later
@ -586,5 +611,10 @@ Below is a list of public, open source projects that use Bolt:
 * [tentacool](https://github.com/optiflows/tentacool) - REST api server to manage system stuff (IP, DNS, Gateway...) on a linux server.
 * [SkyDB](https://github.com/skydb/sky) - Behavioral analytics database.
 * [Seaweed File System](https://github.com/chrislusf/weed-fs) - Highly scalable distributed key~file system with O(1) disk read.
+* [InfluxDB](http://influxdb.com) - Scalable datastore for metrics, events, and real-time analytics.
+* [Freehold](http://tshannon.bitbucket.org/freehold/) - An open, secure, and lightweight platform for your files and data.
+* [Prometheus Annotation Server](https://github.com/oliver006/prom_annotation_server) - Annotation server for PromDash & Prometheus service monitoring system.
+* [Consul](https://github.com/hashicorp/consul) - Consul is service discovery and configuration made easy. Distributed, highly available, and datacenter-aware.
+* [Kala](https://github.com/ajvb/kala) - Kala is a modern job scheduler optimized to run on a single node. It is persistant, JSON over HTTP API, ISO 8601 duration notation, and dependent jobs.

 If you are using Bolt in a project please send a pull request to add it to the list.
--- a/Godeps/_workspace/src/github.com/boltdb/bolt/batch.go
+++ b/Godeps/_workspace/src/github.com/boltdb/bolt/batch.go
@ -20,6 +20,9 @@ import (
 // take permanent effect only after a successful return is seen in
 // caller.
 //
+// The maximum batch size and delay can be adjusted with DB.MaxBatchSize
+// and DB.MaxBatchDelay, respectively.
+//
 // Batch is only useful when there are multiple goroutines calling it.
 func (db *DB) Batch(fn func(*Tx) error) error {
 	errCh := make(chan error, 1)
--- a/Godeps/_workspace/src/github.com/boltdb/bolt/bolt_unix.go
+++ b/Godeps/_workspace/src/github.com/boltdb/bolt/bolt_unix.go
@ -11,7 +11,7 @@ import (
 )

 // flock acquires an advisory lock on a file descriptor.
-func flock(f *os.File, timeout time.Duration) error {
+func flock(f *os.File, exclusive bool, timeout time.Duration) error {
 	var t time.Time
 	for {
 		// If we're beyond our timeout then return an error.
@ -21,9 +21,13 @@ func flock(f *os.File, timeout time.Duration) error {
 		} else if timeout > 0 && time.Since(t) > timeout {
 			return ErrTimeout
 		}
+		flag := syscall.LOCK_SH
+		if exclusive {
+			flag = syscall.LOCK_EX
+		}

 		// Otherwise attempt to obtain an exclusive lock.
-		err := syscall.Flock(int(f.Fd()), syscall.LOCK_EX|syscall.LOCK_NB)
+		err := syscall.Flock(int(f.Fd()), flag|syscall.LOCK_NB)
 		if err == nil {
 			return nil
 		} else if err != syscall.EWOULDBLOCK {
@ -44,11 +48,13 @@ func funlock(f *os.File) error {
 func mmap(db *DB, sz int) error {
 	// Truncate and fsync to ensure file size metadata is flushed.
 	// https://github.com/boltdb/bolt/issues/284
-	if err := db.file.Truncate(int64(sz)); err != nil {
-		return fmt.Errorf("file resize error: %s", err)
-	}
-	if err := db.file.Sync(); err != nil {
-		return fmt.Errorf("file sync error: %s", err)
+	if !db.NoGrowSync && !db.readOnly {
+		if err := db.file.Truncate(int64(sz)); err != nil {
+			return fmt.Errorf("file resize error: %s", err)
+		}
+		if err := db.file.Sync(); err != nil {
+			return fmt.Errorf("file sync error: %s", err)
+		}
 	}

 	// Map the data file to memory.
@ -57,6 +63,11 @@ func mmap(db *DB, sz int) error {
 		return err
 	}

+	// Advise the kernel that the mmap is accessed randomly.
+	if err := madvise(b, syscall.MADV_RANDOM); err != nil {
+		return fmt.Errorf("madvise: %s", err)
+	}
+
 	// Save the original byte slice and convert to a byte array pointer.
 	db.dataref = b
 	db.data = (*[maxMapSize]byte)(unsafe.Pointer(&b[0]))
@ -78,3 +89,12 @@ func munmap(db *DB) error {
 	db.datasz = 0
 	return err
 }
+
+// NOTE: This function is copied from stdlib because it is not available on darwin.
+func madvise(b []byte, advice int) (err error) {
+	_, _, e1 := syscall.Syscall(syscall.SYS_MADVISE, uintptr(unsafe.Pointer(&b[0])), uintptr(len(b)), uintptr(advice))
+	if e1 != 0 {
+		err = e1
+	}
+	return
+}
--- a/Godeps/_workspace/src/github.com/boltdb/bolt/bolt_windows.go
+++ b/Godeps/_workspace/src/github.com/boltdb/bolt/bolt_windows.go
@ -16,7 +16,7 @@ func fdatasync(db *DB) error {
 }

 // flock acquires an advisory lock on a file descriptor.
-func flock(f *os.File, _ time.Duration) error {
+func flock(f *os.File, _ bool, _ time.Duration) error {
 	return nil
 }

@ -28,9 +28,11 @@ func funlock(f *os.File) error {
 // mmap memory maps a DB's data file.
 // Based on: https://github.com/edsrzf/mmap-go
 func mmap(db *DB, sz int) error {
-	// Truncate the database to the size of the mmap.
-	if err := db.file.Truncate(int64(sz)); err != nil {
-		return fmt.Errorf("truncate: %s", err)
+	if !db.readOnly {
+		// Truncate the database to the size of the mmap.
+		if err := db.file.Truncate(int64(sz)); err != nil {
+			return fmt.Errorf("truncate: %s", err)
+		}
 	}

 	// Open a file mapping handle.
--- a/Godeps/_workspace/src/github.com/boltdb/bolt/bucket_test.go
+++ b/Godeps/_workspace/src/github.com/boltdb/bolt/bucket_test.go
@ -640,6 +640,22 @@ func TestBucket_Put_KeyTooLarge(t *testing.T) {
 	})
 }

+// Ensure that an error is returned when inserting a value that's too large.
+func TestBucket_Put_ValueTooLarge(t *testing.T) {
+	if os.Getenv("DRONE") == "true" {
+		t.Skip("not enough RAM for test")
+	}
+
+	db := NewTestDB()
+	defer db.Close()
+	db.Update(func(tx *bolt.Tx) error {
+		tx.CreateBucket([]byte("widgets"))
+		err := tx.Bucket([]byte("widgets")).Put([]byte("foo"), make([]byte, bolt.MaxValueSize+1))
+		equals(t, err, bolt.ErrValueTooLarge)
+		return nil
+	})
+}
+
 // Ensure a bucket can calculate stats.
 func TestBucket_Stats(t *testing.T) {
 	db := NewTestDB()
--- a/Godeps/_workspace/src/github.com/boltdb/bolt/cmd/bolt/bench.go
+++ b/Godeps/_workspace/src/github.com/boltdb/bolt/cmd/bolt/bench.go
@ -1,421 +0,0 @@
-package main
-
-import (
-	"encoding/binary"
-	"encoding/json"
-	"errors"
-	"fmt"
-	"io/ioutil"
-	"math/rand"
-	"os"
-	"runtime"
-	"runtime/pprof"
-	"time"
-
-	"github.com/boltdb/bolt"
-)
-
-// File handlers for the various profiles.
-var cpuprofile, memprofile, blockprofile *os.File
-
-var benchBucketName = []byte("bench")
-
-// Bench executes a customizable, synthetic benchmark against Bolt.
-func Bench(options *BenchOptions) {
-	var results BenchResults
-
-	// Validate options.
-	if options.BatchSize == 0 {
-		options.BatchSize = options.Iterations
-	} else if options.Iterations%options.BatchSize != 0 {
-		fatal("number of iterations must be divisible by the batch size")
-	}
-
-	// Find temporary location.
-	path := tempfile()
-
-	if options.Clean {
-		defer os.Remove(path)
-	} else {
-		println("work:", path)
-	}
-
-	// Create database.
-	db, err := bolt.Open(path, 0600, nil)
-	if err != nil {
-		fatal(err)
-		return
-	}
-	db.NoSync = options.NoSync
-	defer db.Close()
-
-	// Enable streaming stats.
-	if options.StatsInterval > 0 {
-		go printStats(db, options.StatsInterval)
-	}
-
-	// Start profiling for writes.
-	if options.ProfileMode == "rw" || options.ProfileMode == "w" {
-		benchStartProfiling(options)
-	}
-
-	// Write to the database.
-	if err := benchWrite(db, options, &results); err != nil {
-		fatal("bench: write: ", err)
-	}
-
-	// Stop profiling for writes only.
-	if options.ProfileMode == "w" {
-		benchStopProfiling()
-	}
-
-	// Start profiling for reads.
-	if options.ProfileMode == "r" {
-		benchStartProfiling(options)
-	}
-
-	// Read from the database.
-	if err := benchRead(db, options, &results); err != nil {
-		fatal("bench: read: ", err)
-	}
-
-	// Stop profiling for writes only.
-	if options.ProfileMode == "rw" || options.ProfileMode == "r" {
-		benchStopProfiling()
-	}
-
-	// Print results.
-	fmt.Fprintf(os.Stderr, "# Write\t%v\t(%v/op)\t(%v op/sec)\n", results.WriteDuration, results.WriteOpDuration(), results.WriteOpsPerSecond())
-	fmt.Fprintf(os.Stderr, "# Read\t%v\t(%v/op)\t(%v op/sec)\n", results.ReadDuration, results.ReadOpDuration(), results.ReadOpsPerSecond())
-	fmt.Fprintln(os.Stderr, "")
-}
-
-// Writes to the database.
-func benchWrite(db *bolt.DB, options *BenchOptions, results *BenchResults) error {
-	var err error
-	var t = time.Now()
-
-	switch options.WriteMode {
-	case "seq":
-		err = benchWriteSequential(db, options, results)
-	case "rnd":
-		err = benchWriteRandom(db, options, results)
-	case "seq-nest":
-		err = benchWriteSequentialNested(db, options, results)
-	case "rnd-nest":
-		err = benchWriteRandomNested(db, options, results)
-	default:
-		return fmt.Errorf("invalid write mode: %s", options.WriteMode)
-	}
-
-	results.WriteDuration = time.Since(t)
-
-	return err
-}
-
-func benchWriteSequential(db *bolt.DB, options *BenchOptions, results *BenchResults) error {
-	var i = uint32(0)
-	return benchWriteWithSource(db, options, results, func() uint32 { i++; return i })
-}
-
-func benchWriteRandom(db *bolt.DB, options *BenchOptions, results *BenchResults) error {
-	r := rand.New(rand.NewSource(time.Now().UnixNano()))
-	return benchWriteWithSource(db, options, results, func() uint32 { return r.Uint32() })
-}
-
-func benchWriteSequentialNested(db *bolt.DB, options *BenchOptions, results *BenchResults) error {
-	var i = uint32(0)
-	return benchWriteNestedWithSource(db, options, results, func() uint32 { i++; return i })
-}
-
-func benchWriteRandomNested(db *bolt.DB, options *BenchOptions, results *BenchResults) error {
-	r := rand.New(rand.NewSource(time.Now().UnixNano()))
-	return benchWriteNestedWithSource(db, options, results, func() uint32 { return r.Uint32() })
-}
-
-func benchWriteWithSource(db *bolt.DB, options *BenchOptions, results *BenchResults, keySource func() uint32) error {
-	results.WriteOps = options.Iterations
-
-	for i := 0; i < options.Iterations; i += options.BatchSize {
-		err := db.Update(func(tx *bolt.Tx) error {
-			b, _ := tx.CreateBucketIfNotExists(benchBucketName)
-			b.FillPercent = options.FillPercent
-
-			for j := 0; j < options.BatchSize; j++ {
-				var key = make([]byte, options.KeySize)
-				var value = make([]byte, options.ValueSize)
-				binary.BigEndian.PutUint32(key, keySource())
-				if err := b.Put(key, value); err != nil {
-					return err
-				}
-			}
-
-			return nil
-		})
-		if err != nil {
-			return err
-		}
-	}
-	return nil
-}
-
-func benchWriteNestedWithSource(db *bolt.DB, options *BenchOptions, results *BenchResults, keySource func() uint32) error {
-	results.WriteOps = options.Iterations
-
-	for i := 0; i < options.Iterations; i += options.BatchSize {
-		err := db.Update(func(tx *bolt.Tx) error {
-			top, _ := tx.CreateBucketIfNotExists(benchBucketName)
-			top.FillPercent = options.FillPercent
-
-			var name = make([]byte, options.KeySize)
-			binary.BigEndian.PutUint32(name, keySource())
-			b, _ := top.CreateBucketIfNotExists(name)
-			b.FillPercent = options.FillPercent
-
-			for j := 0; j < options.BatchSize; j++ {
-				var key = make([]byte, options.KeySize)
-				var value = make([]byte, options.ValueSize)
-				binary.BigEndian.PutUint32(key, keySource())
-				if err := b.Put(key, value); err != nil {
-					return err
-				}
-			}
-
-			return nil
-		})
-		if err != nil {
-			return err
-		}
-	}
-	return nil
-}
-
-// Reads from the database.
-func benchRead(db *bolt.DB, options *BenchOptions, results *BenchResults) error {
-	var err error
-	var t = time.Now()
-
-	switch options.ReadMode {
-	case "seq":
-		if options.WriteMode == "seq-nest" || options.WriteMode == "rnd-nest" {
-			err = benchReadSequentialNested(db, options, results)
-		} else {
-			err = benchReadSequential(db, options, results)
-		}
-	default:
-		return fmt.Errorf("invalid read mode: %s", options.ReadMode)
-	}
-
-	results.ReadDuration = time.Since(t)
-
-	return err
-}
-
-func benchReadSequential(db *bolt.DB, options *BenchOptions, results *BenchResults) error {
-	return db.View(func(tx *bolt.Tx) error {
-		var t = time.Now()
-
-		for {
-			c := tx.Bucket(benchBucketName).Cursor()
-			var count int
-			for k, v := c.First(); k != nil; k, v = c.Next() {
-				if v == nil {
-					return errors.New("invalid value")
-				}
-				count++
-			}
-
-			if options.WriteMode == "seq" && count != options.Iterations {
-				return fmt.Errorf("read seq: iter mismatch: expected %d, got %d", options.Iterations, count)
-			}
-
-			results.ReadOps += count
-
-			// Make sure we do this for at least a second.
-			if time.Since(t) >= time.Second {
-				break
-			}
-		}
-
-		return nil
-	})
-}
-
-func benchReadSequentialNested(db *bolt.DB, options *BenchOptions, results *BenchResults) error {
-	return db.View(func(tx *bolt.Tx) error {
-		var t = time.Now()
-
-		for {
-			var count int
-			var top = tx.Bucket(benchBucketName)
-			top.ForEach(func(name, _ []byte) error {
-				c := top.Bucket(name).Cursor()
-				for k, v := c.First(); k != nil; k, v = c.Next() {
-					if v == nil {
-						return errors.New("invalid value")
-					}
-					count++
-				}
-				return nil
-			})
-
-			if options.WriteMode == "seq-nest" && count != options.Iterations {
-				return fmt.Errorf("read seq-nest: iter mismatch: expected %d, got %d", options.Iterations, count)
-			}
-
-			results.ReadOps += count
-
-			// Make sure we do this for at least a second.
-			if time.Since(t) >= time.Second {
-				break
-			}
-		}
-
-		return nil
-	})
-}
-
-// Starts all profiles set on the options.
-func benchStartProfiling(options *BenchOptions) {
-	var err error
-
-	// Start CPU profiling.
-	if options.CPUProfile != "" {
-		cpuprofile, err = os.Create(options.CPUProfile)
-		if err != nil {
-			fatalf("bench: could not create cpu profile %q: %v", options.CPUProfile, err)
-		}
-		pprof.StartCPUProfile(cpuprofile)
-	}
-
-	// Start memory profiling.
-	if options.MemProfile != "" {
-		memprofile, err = os.Create(options.MemProfile)
-		if err != nil {
-			fatalf("bench: could not create memory profile %q: %v", options.MemProfile, err)
-		}
-		runtime.MemProfileRate = 4096
-	}
-
-	// Start fatal profiling.
-	if options.BlockProfile != "" {
-		blockprofile, err = os.Create(options.BlockProfile)
-		if err != nil {
-			fatalf("bench: could not create block profile %q: %v", options.BlockProfile, err)
-		}
-		runtime.SetBlockProfileRate(1)
-	}
-}
-
-// Stops all profiles.
-func benchStopProfiling() {
-	if cpuprofile != nil {
-		pprof.StopCPUProfile()
-		cpuprofile.Close()
-		cpuprofile = nil
-	}
-
-	if memprofile != nil {
-		pprof.Lookup("heap").WriteTo(memprofile, 0)
-		memprofile.Close()
-		memprofile = nil
-	}
-
-	if blockprofile != nil {
-		pprof.Lookup("block").WriteTo(blockprofile, 0)
-		blockprofile.Close()
-		blockprofile = nil
-		runtime.SetBlockProfileRate(0)
-	}
-}
-
-// Continuously prints stats on the database at given intervals.
-func printStats(db *bolt.DB, interval time.Duration) {
-	var prevStats = db.Stats()
-	var encoder = json.NewEncoder(os.Stdout)
-
-	for {
-		// Wait for the stats interval.
-		time.Sleep(interval)
-
-		// Retrieve new stats and find difference from previous iteration.
-		var stats = db.Stats()
-		var diff = stats.Sub(&prevStats)
-
-		// Print as JSON to STDOUT.
-		if err := encoder.Encode(diff); err != nil {
-			fatal(err)
-		}
-
-		// Save stats for next iteration.
-		prevStats = stats
-	}
-}
-
-// BenchOptions represents the set of options that can be passed to Bench().
-type BenchOptions struct {
-	ProfileMode   string
-	WriteMode     string
-	ReadMode      string
-	Iterations    int
-	BatchSize     int
-	KeySize       int
-	ValueSize     int
-	CPUProfile    string
-	MemProfile    string
-	BlockProfile  string
-	StatsInterval time.Duration
-	FillPercent   float64
-	NoSync        bool
-	Clean         bool
-}
-
-// BenchResults represents the performance results of the benchmark.
-type BenchResults struct {
-	WriteOps      int
-	WriteDuration time.Duration
-	ReadOps       int
-	ReadDuration  time.Duration
-}
-
-// Returns the duration for a single write operation.
-func (r *BenchResults) WriteOpDuration() time.Duration {
-	if r.WriteOps == 0 {
-		return 0
-	}
-	return r.WriteDuration / time.Duration(r.WriteOps)
-}
-
-// Returns average number of write operations that can be performed per second.
-func (r *BenchResults) WriteOpsPerSecond() int {
-	var op = r.WriteOpDuration()
-	if op == 0 {
-		return 0
-	}
-	return int(time.Second) / int(op)
-}
-
-// Returns the duration for a single read operation.
-func (r *BenchResults) ReadOpDuration() time.Duration {
-	if r.ReadOps == 0 {
-		return 0
-	}
-	return r.ReadDuration / time.Duration(r.ReadOps)
-}
-
-// Returns average number of read operations that can be performed per second.
-func (r *BenchResults) ReadOpsPerSecond() int {
-	var op = r.ReadOpDuration()
-	if op == 0 {
-		return 0
-	}
-	return int(time.Second) / int(op)
-}
-
-// tempfile returns a temporary file path.
-func tempfile() string {
-	f, _ := ioutil.TempFile("", "bolt-bench-")
-	f.Close()
-	os.Remove(f.Name())
-	return f.Name()
-}
--- a/Godeps/_workspace/src/github.com/boltdb/bolt/cmd/bolt/buckets.go
+++ b/Godeps/_workspace/src/github.com/boltdb/bolt/cmd/bolt/buckets.go
@ -1,33 +0,0 @@
-package main
-
-import (
-	"os"
-
-	"github.com/boltdb/bolt"
-)
-
-// Buckets prints a list of all buckets.
-func Buckets(path string) {
-	if _, err := os.Stat(path); os.IsNotExist(err) {
-		fatal(err)
-		return
-	}
-
-	db, err := bolt.Open(path, 0600, nil)
-	if err != nil {
-		fatal(err)
-		return
-	}
-	defer db.Close()
-
-	err = db.View(func(tx *bolt.Tx) error {
-		return tx.ForEach(func(name []byte, _ *bolt.Bucket) error {
-			println(string(name))
-			return nil
-		})
-	})
-	if err != nil {
-		fatal(err)
-		return
-	}
-}
--- a/Godeps/_workspace/src/github.com/boltdb/bolt/cmd/bolt/buckets_test.go
+++ b/Godeps/_workspace/src/github.com/boltdb/bolt/cmd/bolt/buckets_test.go
@ -1,31 +0,0 @@
-package main_test
-
-import (
-	"testing"
-
-	"github.com/boltdb/bolt"
-	. "github.com/boltdb/bolt/cmd/bolt"
-)
-
-// Ensure that a list of buckets can be retrieved.
-func TestBuckets(t *testing.T) {
-	SetTestMode(true)
-	open(func(db *bolt.DB, path string) {
-		db.Update(func(tx *bolt.Tx) error {
-			tx.CreateBucket([]byte("woojits"))
-			tx.CreateBucket([]byte("widgets"))
-			tx.CreateBucket([]byte("whatchits"))
-			return nil
-		})
-		db.Close()
-		output := run("buckets", path)
-		equals(t, "whatchits\nwidgets\nwoojits", output)
-	})
-}
-
-// Ensure that an error is reported if the database is not found.
-func TestBucketsDBNotFound(t *testing.T) {
-	SetTestMode(true)
-	output := run("buckets", "no/such/db")
-	equals(t, "stat no/such/db: no such file or directory", output)
-}
--- a/Godeps/_workspace/src/github.com/boltdb/bolt/cmd/bolt/check.go
+++ b/Godeps/_workspace/src/github.com/boltdb/bolt/cmd/bolt/check.go
@ -1,47 +0,0 @@
-package main
-
-import (
-	"os"
-
-	"github.com/boltdb/bolt"
-)
-
-// Check performs a consistency check on the database and prints any errors found.
-func Check(path string) {
-	if _, err := os.Stat(path); os.IsNotExist(err) {
-		fatal(err)
-		return
-	}
-
-	db, err := bolt.Open(path, 0600, nil)
-	if err != nil {
-		fatal(err)
-		return
-	}
-	defer db.Close()
-
-	// Perform consistency check.
-	_ = db.View(func(tx *bolt.Tx) error {
-		var count int
-		ch := tx.Check()
-	loop:
-		for {
-			select {
-			case err, ok := <-ch:
-				if !ok {
-					break loop
-				}
-				println(err)
-				count++
-			}
-		}
-
-		// Print summary of errors.
-		if count > 0 {
-			fatalf("%d errors found", count)
-		} else {
-			println("OK")
-		}
-		return nil
-	})
-}
--- a/Godeps/_workspace/src/github.com/boltdb/bolt/cmd/bolt/get.go
+++ b/Godeps/_workspace/src/github.com/boltdb/bolt/cmd/bolt/get.go
@ -1,45 +0,0 @@
-package main
-
-import (
-	"os"
-
-	"github.com/boltdb/bolt"
-)
-
-// Get retrieves the value for a given bucket/key.
-func Get(path, name, key string) {
-	if _, err := os.Stat(path); os.IsNotExist(err) {
-		fatal(err)
-		return
-	}
-
-	db, err := bolt.Open(path, 0600, nil)
-	if err != nil {
-		fatal(err)
-		return
-	}
-	defer db.Close()
-
-	err = db.View(func(tx *bolt.Tx) error {
-		// Find bucket.
-		b := tx.Bucket([]byte(name))
-		if b == nil {
-			fatalf("bucket not found: %s", name)
-			return nil
-		}
-
-		// Find value for a given key.
-		value := b.Get([]byte(key))
-		if value == nil {
-			fatalf("key not found: %s", key)
-			return nil
-		}
-
-		println(string(value))
-		return nil
-	})
-	if err != nil {
-		fatal(err)
-		return
-	}
-}
--- a/Godeps/_workspace/src/github.com/boltdb/bolt/cmd/bolt/get_test.go
+++ b/Godeps/_workspace/src/github.com/boltdb/bolt/cmd/bolt/get_test.go
@ -1,54 +0,0 @@
-package main_test
-
-import (
-	"testing"
-
-	"github.com/boltdb/bolt"
-	. "github.com/boltdb/bolt/cmd/bolt"
-)
-
-// Ensure that a value can be retrieved from the CLI.
-func TestGet(t *testing.T) {
-	SetTestMode(true)
-	open(func(db *bolt.DB, path string) {
-		db.Update(func(tx *bolt.Tx) error {
-			tx.CreateBucket([]byte("widgets"))
-			tx.Bucket([]byte("widgets")).Put([]byte("foo"), []byte("bar"))
-			return nil
-		})
-		db.Close()
-		output := run("get", path, "widgets", "foo")
-		equals(t, "bar", output)
-	})
-}
-
-// Ensure that an error is reported if the database is not found.
-func TestGetDBNotFound(t *testing.T) {
-	SetTestMode(true)
-	output := run("get", "no/such/db", "widgets", "foo")
-	equals(t, "stat no/such/db: no such file or directory", output)
-}
-
-// Ensure that an error is reported if the bucket is not found.
-func TestGetBucketNotFound(t *testing.T) {
-	SetTestMode(true)
-	open(func(db *bolt.DB, path string) {
-		db.Close()
-		output := run("get", path, "widgets", "foo")
-		equals(t, "bucket not found: widgets", output)
-	})
-}
-
-// Ensure that an error is reported if the key is not found.
-func TestGetKeyNotFound(t *testing.T) {
-	SetTestMode(true)
-	open(func(db *bolt.DB, path string) {
-		db.Update(func(tx *bolt.Tx) error {
-			_, err := tx.CreateBucket([]byte("widgets"))
-			return err
-		})
-		db.Close()
-		output := run("get", path, "widgets", "foo")
-		equals(t, "key not found: foo", output)
-	})
-}
--- a/Godeps/_workspace/src/github.com/boltdb/bolt/cmd/bolt/info.go
+++ b/Godeps/_workspace/src/github.com/boltdb/bolt/cmd/bolt/info.go
@ -1,26 +0,0 @@
-package main
-
-import (
-	"os"
-
-	"github.com/boltdb/bolt"
-)
-
-// Info prints basic information about a database.
-func Info(path string) {
-	if _, err := os.Stat(path); os.IsNotExist(err) {
-		fatal(err)
-		return
-	}
-
-	db, err := bolt.Open(path, 0600, nil)
-	if err != nil {
-		fatal(err)
-		return
-	}
-	defer db.Close()
-
-	// Print basic database info.
-	var info = db.Info()
-	printf("Page Size: %d\n", info.PageSize)
-}
--- a/Godeps/_workspace/src/github.com/boltdb/bolt/cmd/bolt/info_test.go
+++ b/Godeps/_workspace/src/github.com/boltdb/bolt/cmd/bolt/info_test.go
@ -1,31 +0,0 @@
-package main_test
-
-import (
-	"testing"
-
-	"github.com/boltdb/bolt"
-	. "github.com/boltdb/bolt/cmd/bolt"
-)
-
-// Ensure that a database info can be printed.
-func TestInfo(t *testing.T) {
-	SetTestMode(true)
-	open(func(db *bolt.DB, path string) {
-		db.Update(func(tx *bolt.Tx) error {
-			tx.CreateBucket([]byte("widgets"))
-			b := tx.Bucket([]byte("widgets"))
-			b.Put([]byte("foo"), []byte("0000"))
-			return nil
-		})
-		db.Close()
-		output := run("info", path)
-		equals(t, `Page Size: 4096`, output)
-	})
-}
-
-// Ensure that an error is reported if the database is not found.
-func TestInfo_NotFound(t *testing.T) {
-	SetTestMode(true)
-	output := run("info", "no/such/db")
-	equals(t, "stat no/such/db: no such file or directory", output)
-}
--- a/Godeps/_workspace/src/github.com/boltdb/bolt/cmd/bolt/keys.go
+++ b/Godeps/_workspace/src/github.com/boltdb/bolt/cmd/bolt/keys.go
@ -1,41 +0,0 @@
-package main
-
-import (
-	"os"
-
-	"github.com/boltdb/bolt"
-)
-
-// Keys retrieves a list of keys for a given bucket.
-func Keys(path, name string) {
-	if _, err := os.Stat(path); os.IsNotExist(err) {
-		fatal(err)
-		return
-	}
-
-	db, err := bolt.Open(path, 0600, nil)
-	if err != nil {
-		fatal(err)
-		return
-	}
-	defer db.Close()
-
-	err = db.View(func(tx *bolt.Tx) error {
-		// Find bucket.
-		b := tx.Bucket([]byte(name))
-		if b == nil {
-			fatalf("bucket not found: %s", name)
-			return nil
-		}
-
-		// Iterate over each key.
-		return b.ForEach(func(key, _ []byte) error {
-			println(string(key))
-			return nil
-		})
-	})
-	if err != nil {
-		fatal(err)
-		return
-	}
-}
--- a/Godeps/_workspace/src/github.com/boltdb/bolt/cmd/bolt/keys_test.go
+++ b/Godeps/_workspace/src/github.com/boltdb/bolt/cmd/bolt/keys_test.go
@ -1,42 +0,0 @@
-package main_test
-
-import (
-	"testing"
-
-	"github.com/boltdb/bolt"
-	. "github.com/boltdb/bolt/cmd/bolt"
-)
-
-// Ensure that a list of keys can be retrieved for a given bucket.
-func TestKeys(t *testing.T) {
-	SetTestMode(true)
-	open(func(db *bolt.DB, path string) {
-		db.Update(func(tx *bolt.Tx) error {
-			tx.CreateBucket([]byte("widgets"))
-			tx.Bucket([]byte("widgets")).Put([]byte("0002"), []byte(""))
-			tx.Bucket([]byte("widgets")).Put([]byte("0001"), []byte(""))
-			tx.Bucket([]byte("widgets")).Put([]byte("0003"), []byte(""))
-			return nil
-		})
-		db.Close()
-		output := run("keys", path, "widgets")
-		equals(t, "0001\n0002\n0003", output)
-	})
-}
-
-// Ensure that an error is reported if the database is not found.
-func TestKeysDBNotFound(t *testing.T) {
-	SetTestMode(true)
-	output := run("keys", "no/such/db", "widgets")
-	equals(t, "stat no/such/db: no such file or directory", output)
-}
-
-// Ensure that an error is reported if the bucket is not found.
-func TestKeysBucketNotFound(t *testing.T) {
-	SetTestMode(true)
-	open(func(db *bolt.DB, path string) {
-		db.Close()
-		output := run("keys", path, "widgets")
-		equals(t, "bucket not found: widgets", output)
-	})
-}
--- a/Godeps/_workspace/src/github.com/boltdb/bolt/cmd/bolt/main.go
+++ b/Godeps/_workspace/src/github.com/boltdb/bolt/cmd/bolt/main.go
--- a/Godeps/_workspace/src/github.com/boltdb/bolt/cmd/bolt/main_test.go
+++ b/Godeps/_workspace/src/github.com/boltdb/bolt/cmd/bolt/main_test.go
@ -1,69 +1,145 @@
 package main_test

 import (
-	"fmt"
+	"bytes"
 	"io/ioutil"
 	"os"
-	"path/filepath"
-	"reflect"
-	"runtime"
-	"strings"
+	"strconv"
 	"testing"

 	"github.com/boltdb/bolt"
-	. "github.com/boltdb/bolt/cmd/bolt"
+	"github.com/boltdb/bolt/cmd/bolt"
 )

-// open creates and opens a Bolt database in the temp directory.
-func open(fn func(*bolt.DB, string)) {
-	path := tempfile()
-	defer os.RemoveAll(path)
+// Ensure the "info" command can print information about a database.
+func TestInfoCommand_Run(t *testing.T) {
+	db := MustOpen(0666, nil)
+	db.DB.Close()
+	defer db.Close()

-	db, err := bolt.Open(path, 0600, nil)
-	if err != nil {
-		panic("db open error: " + err.Error())
+	// Run the info command.
+	m := NewMain()
+	if err := m.Run("info", db.Path); err != nil {
+		t.Fatal(err)
 	}
-	fn(db, path)
 }

-// run executes a command against the CLI and returns the output.
-func run(args ...string) string {
-	args = append([]string{"bolt"}, args...)
-	NewApp().Run(args)
-	return strings.TrimSpace(LogBuffer())
+// Ensure the "stats" command can execute correctly.
+func TestStatsCommand_Run(t *testing.T) {
+	// Ignore
+	if os.Getpagesize() != 4096 {
+		t.Skip("system does not use 4KB page size")
+	}
+
+	db := MustOpen(0666, nil)
+	defer db.Close()
+
+	if err := db.Update(func(tx *bolt.Tx) error {
+		// Create "foo" bucket.
+		b, err := tx.CreateBucket([]byte("foo"))
+		if err != nil {
+			return err
+		}
+		for i := 0; i < 10; i++ {
+			if err := b.Put([]byte(strconv.Itoa(i)), []byte(strconv.Itoa(i))); err != nil {
+				return err
+			}
+		}
+
+		// Create "bar" bucket.
+		b, err = tx.CreateBucket([]byte("bar"))
+		if err != nil {
+			return err
+		}
+		for i := 0; i < 100; i++ {
+			if err := b.Put([]byte(strconv.Itoa(i)), []byte(strconv.Itoa(i))); err != nil {
+				return err
+			}
+		}
+
+		// Create "baz" bucket.
+		b, err = tx.CreateBucket([]byte("baz"))
+		if err != nil {
+			return err
+		}
+		if err := b.Put([]byte("key"), []byte("value")); err != nil {
+			return err
+		}
+
+		return nil
+	}); err != nil {
+		t.Fatal(err)
+	}
+	db.DB.Close()
+
+	// Generate expected result.
+	exp := "Aggregate statistics for 3 buckets\n\n" +
+		"Page count statistics\n" +
+		"\tNumber of logical branch pages: 0\n" +
+		"\tNumber of physical branch overflow pages: 0\n" +
+		"\tNumber of logical leaf pages: 1\n" +
+		"\tNumber of physical leaf overflow pages: 0\n" +
+		"Tree statistics\n" +
+		"\tNumber of keys/value pairs: 111\n" +
+		"\tNumber of levels in B+tree: 1\n" +
+		"Page size utilization\n" +
+		"\tBytes allocated for physical branch pages: 0\n" +
+		"\tBytes actually used for branch data: 0 (0%)\n" +
+		"\tBytes allocated for physical leaf pages: 4096\n" +
+		"\tBytes actually used for leaf data: 1996 (48%)\n" +
+		"Bucket statistics\n" +
+		"\tTotal number of buckets: 3\n" +
+		"\tTotal number on inlined buckets: 2 (66%)\n" +
+		"\tBytes used for inlined buckets: 236 (11%)\n"
+
+	// Run the command.
+	m := NewMain()
+	if err := m.Run("stats", db.Path); err != nil {
+		t.Fatal(err)
+	} else if m.Stdout.String() != exp {
+		t.Fatalf("unexpected stdout:\n\n%s", m.Stdout.String())
+	}
 }

-// tempfile returns a temporary file path.
-func tempfile() string {
+// Main represents a test wrapper for main.Main that records output.
+type Main struct {
+	*main.Main
+	Stdin  bytes.Buffer
+	Stdout bytes.Buffer
+	Stderr bytes.Buffer
+}
+
+// NewMain returns a new instance of Main.
+func NewMain() *Main {
+	m := &Main{Main: main.NewMain()}
+	m.Main.Stdin = &m.Stdin
+	m.Main.Stdout = &m.Stdout
+	m.Main.Stderr = &m.Stderr
+	return m
+}
+
+// MustOpen creates a Bolt database in a temporary location.
+func MustOpen(mode os.FileMode, options *bolt.Options) *DB {
+	// Create temporary path.
 	f, _ := ioutil.TempFile("", "bolt-")
 	f.Close()
 	os.Remove(f.Name())
-	return f.Name()
-}

-// assert fails the test if the condition is false.
-func assert(tb testing.TB, condition bool, msg string, v ...interface{}) {
-	if !condition {
-		_, file, line, _ := runtime.Caller(1)
-		fmt.Printf("\033[31m%s:%d: "+msg+"\033[39m\n\n", append([]interface{}{filepath.Base(file), line}, v...)...)
-		tb.FailNow()
-	}
-}
-
-// ok fails the test if an err is not nil.
-func ok(tb testing.TB, err error) {
+	db, err := bolt.Open(f.Name(), mode, options)
 	if err != nil {
-		_, file, line, _ := runtime.Caller(1)
-		fmt.Printf("\033[31m%s:%d: unexpected error: %s\033[39m\n\n", filepath.Base(file), line, err.Error())
-		tb.FailNow()
+		panic(err.Error())
 	}
+	return &DB{DB: db, Path: f.Name()}
 }

-// equals fails the test if exp is not equal to act.
-func equals(tb testing.TB, exp, act interface{}) {
-	if !reflect.DeepEqual(exp, act) {
-		_, file, line, _ := runtime.Caller(1)
-		fmt.Printf("\033[31m%s:%d:\n\n\texp: %#v\n\n\tgot: %#v\033[39m\n\n", filepath.Base(file), line, exp, act)
-		tb.FailNow()
-	}
+// DB is a test wrapper for bolt.DB.
+type DB struct {
+	*bolt.DB
+	Path string
+}
+
+// Close closes and removes the database.
+func (db *DB) Close() error {
+	defer os.Remove(db.Path)
+	return db.DB.Close()
 }
--- a/Godeps/_workspace/src/github.com/boltdb/bolt/cmd/bolt/pages.go
+++ b/Godeps/_workspace/src/github.com/boltdb/bolt/cmd/bolt/pages.go
@ -1,57 +0,0 @@
-package main
-
-import (
-	"os"
-	"strconv"
-
-	"github.com/boltdb/bolt"
-)
-
-// Pages prints a list of all pages in a database.
-func Pages(path string) {
-	if _, err := os.Stat(path); os.IsNotExist(err) {
-		fatal(err)
-		return
-	}
-
-	db, err := bolt.Open(path, 0600, nil)
-	if err != nil {
-		fatal(err)
-		return
-	}
-	defer db.Close()
-
-	println("ID       TYPE       ITEMS  OVRFLW")
-	println("======== ========== ====== ======")
-
-	db.Update(func(tx *bolt.Tx) error {
-		var id int
-		for {
-			p, err := tx.Page(id)
-			if err != nil {
-				fatalf("page error: %d: %s", id, err)
-			} else if p == nil {
-				break
-			}
-
-			// Only display count and overflow if this is a non-free page.
-			var count, overflow string
-			if p.Type != "free" {
-				count = strconv.Itoa(p.Count)
-				if p.OverflowCount > 0 {
-					overflow = strconv.Itoa(p.OverflowCount)
-				}
-			}
-
-			// Print table row.
-			printf("%-8d %-10s %-6s %-6s\n", p.ID, p.Type, count, overflow)
-
-			// Move to the next non-overflow page.
-			id += 1
-			if p.Type != "free" {
-				id += p.OverflowCount
-			}
-		}
-		return nil
-	})
-}
--- a/Godeps/_workspace/src/github.com/boltdb/bolt/cmd/bolt/stats.go
+++ b/Godeps/_workspace/src/github.com/boltdb/bolt/cmd/bolt/stats.go
@ -1,77 +0,0 @@
-package main
-
-import (
-	"bytes"
-	"os"
-
-	"github.com/boltdb/bolt"
-)
-
-// Collect stats for all top level buckets matching the prefix.
-func Stats(path, prefix string) {
-	if _, err := os.Stat(path); os.IsNotExist(err) {
-		fatal(err)
-		return
-	}
-
-	db, err := bolt.Open(path, 0600, nil)
-	if err != nil {
-		fatal(err)
-		return
-	}
-	defer db.Close()
-
-	err = db.View(func(tx *bolt.Tx) error {
-		var s bolt.BucketStats
-		var count int
-		var prefix = []byte(prefix)
-		tx.ForEach(func(name []byte, b *bolt.Bucket) error {
-			if bytes.HasPrefix(name, prefix) {
-				s.Add(b.Stats())
-				count += 1
-			}
-			return nil
-		})
-		printf("Aggregate statistics for %d buckets\n\n", count)
-
-		println("Page count statistics")
-		printf("\tNumber of logical branch pages: %d\n", s.BranchPageN)
-		printf("\tNumber of physical branch overflow pages: %d\n", s.BranchOverflowN)
-		printf("\tNumber of logical leaf pages: %d\n", s.LeafPageN)
-		printf("\tNumber of physical leaf overflow pages: %d\n", s.LeafOverflowN)
-
-		println("Tree statistics")
-		printf("\tNumber of keys/value pairs: %d\n", s.KeyN)
-		printf("\tNumber of levels in B+tree: %d\n", s.Depth)
-
-		println("Page size utilization")
-		printf("\tBytes allocated for physical branch pages: %d\n", s.BranchAlloc)
-		var percentage int
-		if s.BranchAlloc != 0 {
-			percentage = int(float32(s.BranchInuse) * 100.0 / float32(s.BranchAlloc))
-		}
-		printf("\tBytes actually used for branch data: %d (%d%%)\n", s.BranchInuse, percentage)
-		printf("\tBytes allocated for physical leaf pages: %d\n", s.LeafAlloc)
-		percentage = 0
-		if s.LeafAlloc != 0 {
-			percentage = int(float32(s.LeafInuse) * 100.0 / float32(s.LeafAlloc))
-		}
-		printf("\tBytes actually used for leaf data: %d (%d%%)\n", s.LeafInuse, percentage)
-
-		println("Bucket statistics")
-		printf("\tTotal number of buckets: %d\n", s.BucketN)
-		percentage = int(float32(s.InlineBucketN) * 100.0 / float32(s.BucketN))
-		printf("\tTotal number on inlined buckets: %d (%d%%)\n", s.InlineBucketN, percentage)
-		percentage = 0
-		if s.LeafInuse != 0 {
-			percentage = int(float32(s.InlineBucketInuse) * 100.0 / float32(s.LeafInuse))
-		}
-		printf("\tBytes used for inlined buckets: %d (%d%%)\n", s.InlineBucketInuse, percentage)
-
-		return nil
-	})
-	if err != nil {
-		fatal(err)
-		return
-	}
-}
--- a/Godeps/_workspace/src/github.com/boltdb/bolt/cmd/bolt/stats_test.go
+++ b/Godeps/_workspace/src/github.com/boltdb/bolt/cmd/bolt/stats_test.go
@ -1,61 +0,0 @@
-package main_test
-
-import (
-	"os"
-	"strconv"
-	"testing"
-
-	"github.com/boltdb/bolt"
-	. "github.com/boltdb/bolt/cmd/bolt"
-)
-
-func TestStats(t *testing.T) {
-	if os.Getpagesize() != 4096 {
-		t.Skip()
-	}
-	SetTestMode(true)
-	open(func(db *bolt.DB, path string) {
-		db.Update(func(tx *bolt.Tx) error {
-			b, err := tx.CreateBucket([]byte("foo"))
-			if err != nil {
-				return err
-			}
-			for i := 0; i < 10; i++ {
-				b.Put([]byte(strconv.Itoa(i)), []byte(strconv.Itoa(i)))
-			}
-			b, err = tx.CreateBucket([]byte("bar"))
-			if err != nil {
-				return err
-			}
-			for i := 0; i < 100; i++ {
-				b.Put([]byte(strconv.Itoa(i)), []byte(strconv.Itoa(i)))
-			}
-			b, err = tx.CreateBucket([]byte("baz"))
-			if err != nil {
-				return err
-			}
-			b.Put([]byte("key"), []byte("value"))
-			return nil
-		})
-		db.Close()
-		output := run("stats", path, "b")
-		equals(t, "Aggregate statistics for 2 buckets\n\n"+
-			"Page count statistics\n"+
-			"\tNumber of logical branch pages: 0\n"+
-			"\tNumber of physical branch overflow pages: 0\n"+
-			"\tNumber of logical leaf pages: 1\n"+
-			"\tNumber of physical leaf overflow pages: 0\n"+
-			"Tree statistics\n"+
-			"\tNumber of keys/value pairs: 101\n"+
-			"\tNumber of levels in B+tree: 1\n"+
-			"Page size utilization\n"+
-			"\tBytes allocated for physical branch pages: 0\n"+
-			"\tBytes actually used for branch data: 0 (0%)\n"+
-			"\tBytes allocated for physical leaf pages: 4096\n"+
-			"\tBytes actually used for leaf data: 1996 (48%)\n"+
-			"Bucket statistics\n"+
-			"\tTotal number of buckets: 2\n"+
-			"\tTotal number on inlined buckets: 1 (50%)\n"+
-			"\tBytes used for inlined buckets: 40 (2%)", output)
-	})
-}
--- a/Godeps/_workspace/src/github.com/boltdb/bolt/db.go
+++ b/Godeps/_workspace/src/github.com/boltdb/bolt/db.go
@ -55,6 +55,14 @@ type DB struct {
 	// THIS IS UNSAFE. PLEASE USE WITH CAUTION.
 	NoSync bool

+	// When true, skips the truncate call when growing the database.
+	// Setting this to true is only safe on non-ext3/ext4 systems.
+	// Skipping truncation avoids preallocation of hard drive space and
+	// bypasses a truncate() and fsync() syscall on remapping.
+	//
+	// https://github.com/boltdb/bolt/issues/284
+	NoGrowSync bool
+
 	// MaxBatchSize is the maximum size of a batch. Default value is
 	// copied from DefaultMaxBatchSize in Open.
 	//
@ -96,6 +104,10 @@ type DB struct {
 	ops struct {
 		writeAt func(b []byte, off int64) (n int, err error)
 	}
+
+	// Read only mode.
+	// When true, Update() and Begin(true) return ErrDatabaseReadOnly immediately.
+	readOnly bool
 }

 // Path returns the path to currently open database file.
@ -123,24 +135,34 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
 	if options == nil {
 		options = DefaultOptions
 	}
+	db.NoGrowSync = options.NoGrowSync

 	// Set default values for later DB operations.
 	db.MaxBatchSize = DefaultMaxBatchSize
 	db.MaxBatchDelay = DefaultMaxBatchDelay

+	flag := os.O_RDWR
+	if options.ReadOnly {
+		flag = os.O_RDONLY
+		db.readOnly = true
+	}
+
 	// Open data file and separate sync handler for metadata writes.
 	db.path = path
-
 	var err error
-	if db.file, err = os.OpenFile(db.path, os.O_RDWR|os.O_CREATE, mode); err != nil {
+	if db.file, err = os.OpenFile(db.path, flag|os.O_CREATE, mode); err != nil {
 		_ = db.close()
 		return nil, err
 	}

-	// Lock file so that other processes using Bolt cannot use the database
-	// at the same time. This would cause corruption since the two processes
-	// would write meta pages and free pages separately.
-	if err := flock(db.file, options.Timeout); err != nil {
+	// Lock file so that other processes using Bolt in read-write mode cannot
+	// use the database  at the same time. This would cause corruption since
+	// the two processes would write meta pages and free pages separately.
+	// The database file is locked exclusively (only one process can grab the lock)
+	// if !options.ReadOnly.
+	// The database file is locked using the shared lock (more than one process may
+	// hold a lock at the same time) otherwise (options.ReadOnly is set).
+	if err := flock(db.file, !db.readOnly, options.Timeout); err != nil {
 		_ = db.close()
 		return nil, err
 	}
@ -247,8 +269,8 @@ func (db *DB) munmap() error {
 // of the database. The minimum size is 1MB and doubles until it reaches 1GB.
 // Returns an error if the new mmap size is greater than the max allowed.
 func (db *DB) mmapSize(size int) (int, error) {
-	// Double the size from 1MB until 1GB.
-	for i := uint(20); i <= 30; i++ {
+	// Double the size from 32KB until 1GB.
+	for i := uint(15); i <= 30; i++ {
 		if size <= 1<<i {
 			return 1 << i, nil
 		}
@ -329,8 +351,15 @@ func (db *DB) init() error {
 // Close releases all database resources.
 // All transactions must be closed before closing the database.
 func (db *DB) Close() error {
+	db.rwlock.Lock()
+	defer db.rwlock.Unlock()
+
 	db.metalock.Lock()
 	defer db.metalock.Unlock()
+
+	db.mmaplock.RLock()
+	defer db.mmaplock.RUnlock()
+
 	return db.close()
 }

@ -350,8 +379,11 @@ func (db *DB) close() error {

 	// Close file handles.
 	if db.file != nil {
-		// Unlock the file.
-		_ = funlock(db.file)
+		// No need to unlock read-only file.
+		if !db.readOnly {
+			// Unlock the file.
+			_ = funlock(db.file)
+		}

 		// Close the file descriptor.
 		if err := db.file.Close(); err != nil {
@ -369,6 +401,11 @@ func (db *DB) close() error {
 // will cause the calls to block and be serialized until the current write
 // transaction finishes.
 //
+// Transactions should not be depedent on one another. Opening a read
+// transaction and a write transaction in the same goroutine can cause the
+// writer to deadlock because the database periodically needs to re-mmap itself
+// as it grows and it cannot do that while a read transaction is open.
+//
 // IMPORTANT: You must close read-only transactions after you are finished or
 // else the database will not reclaim old pages.
 func (db *DB) Begin(writable bool) (*Tx, error) {
@ -417,6 +454,11 @@ func (db *DB) beginTx() (*Tx, error) {
 }

 func (db *DB) beginRWTx() (*Tx, error) {
+	// If the database was opened with Options.ReadOnly, return an error.
+	if db.readOnly {
+		return nil, ErrDatabaseReadOnly
+	}
+
 	// Obtain writer lock. This is released by the transaction when it closes.
 	// This enforces only one writer transaction at a time.
 	db.rwlock.Lock()
@ -547,6 +589,12 @@ func (db *DB) View(fn func(*Tx) error) error {
 	return nil
 }

+// Sync executes fdatasync() against the database file handle.
+//
+// This is not necessary under normal operation, however, if you use NoSync
+// then it allows you to force the database file to sync against the disk.
+func (db *DB) Sync() error { return fdatasync(db) }
+
 // Stats retrieves ongoing performance stats for the database.
 // This is only updated when a transaction closes.
 func (db *DB) Stats() Stats {
@ -607,18 +655,30 @@ func (db *DB) allocate(count int) (*page, error) {
 	return p, nil
 }

+func (db *DB) IsReadOnly() bool {
+	return db.readOnly
+}
+
 // Options represents the options that can be set when opening a database.
 type Options struct {
 	// Timeout is the amount of time to wait to obtain a file lock.
 	// When set to zero it will wait indefinitely. This option is only
 	// available on Darwin and Linux.
 	Timeout time.Duration
+
+	// Sets the DB.NoGrowSync flag before memory mapping the file.
+	NoGrowSync bool
+
+	// Open database in read-only mode. Uses flock(..., LOCK_SH |LOCK_NB) to
+	// grab a shared lock (UNIX).
+	ReadOnly bool
 }

 // DefaultOptions represent the options used if nil options are passed into Open().
 // No timeout is used which will cause Bolt to wait indefinitely for a lock.
 var DefaultOptions = &Options{
-	Timeout: 0,
+	Timeout:    0,
+	NoGrowSync: false,
 }

 // Stats represents statistics about the database.
--- a/Godeps/_workspace/src/github.com/boltdb/bolt/db_test.go
+++ b/Godeps/_workspace/src/github.com/boltdb/bolt/db_test.go
@ -224,6 +224,76 @@ func TestDB_Open_FileTooSmall(t *testing.T) {
 	equals(t, errors.New("file size too small"), err)
 }

+// Ensure that a database can be opened in read-only mode by multiple processes
+// and that a database can not be opened in read-write mode and in read-only
+// mode at the same time.
+func TestOpen_ReadOnly(t *testing.T) {
+	bucket, key, value := []byte(`bucket`), []byte(`key`), []byte(`value`)
+
+	path := tempfile()
+	defer os.Remove(path)
+
+	// Open in read-write mode.
+	db, err := bolt.Open(path, 0666, nil)
+	ok(t, db.Update(func(tx *bolt.Tx) error {
+		b, err := tx.CreateBucket(bucket)
+		if err != nil {
+			return err
+		}
+		return b.Put(key, value)
+	}))
+	assert(t, db != nil, "")
+	assert(t, !db.IsReadOnly(), "")
+	ok(t, err)
+	ok(t, db.Close())
+
+	// Open in read-only mode.
+	db0, err := bolt.Open(path, 0666, &bolt.Options{ReadOnly: true})
+	ok(t, err)
+	defer db0.Close()
+
+	// Opening in read-write mode should return an error.
+	_, err = bolt.Open(path, 0666, &bolt.Options{Timeout: time.Millisecond * 100})
+	assert(t, err != nil, "")
+
+	// And again (in read-only mode).
+	db1, err := bolt.Open(path, 0666, &bolt.Options{ReadOnly: true})
+	ok(t, err)
+	defer db1.Close()
+
+	// Verify both read-only databases are accessible.
+	for _, db := range []*bolt.DB{db0, db1} {
+		// Verify is is in read only mode indeed.
+		assert(t, db.IsReadOnly(), "")
+
+		// Read-only databases should not allow updates.
+		assert(t,
+			bolt.ErrDatabaseReadOnly == db.Update(func(*bolt.Tx) error {
+				panic(`should never get here`)
+			}),
+			"")
+
+		// Read-only databases should not allow beginning writable txns.
+		_, err = db.Begin(true)
+		assert(t, bolt.ErrDatabaseReadOnly == err, "")
+
+		// Verify the data.
+		ok(t, db.View(func(tx *bolt.Tx) error {
+			b := tx.Bucket(bucket)
+			if b == nil {
+				return fmt.Errorf("expected bucket `%s`", string(bucket))
+			}
+
+			got := string(b.Get(key))
+			expected := string(value)
+			if got != expected {
+				return fmt.Errorf("expected `%s`, got `%s`", expected, got)
+			}
+			return nil
+		}))
+	}
+}
+
 // TODO(benbjohnson): Test corruption at every byte of the first two pages.

 // Ensure that a database cannot open a transaction when it's not open.
@ -254,6 +324,49 @@ func TestDB_BeginRW_Closed(t *testing.T) {
 	assert(t, tx == nil, "")
 }

+func TestDB_Close_PendingTx_RW(t *testing.T) { testDB_Close_PendingTx(t, true) }
+func TestDB_Close_PendingTx_RO(t *testing.T) { testDB_Close_PendingTx(t, false) }
+
+// Ensure that a database cannot close while transactions are open.
+func testDB_Close_PendingTx(t *testing.T, writable bool) {
+	db := NewTestDB()
+	defer db.Close()
+
+	// Start transaction.
+	tx, err := db.Begin(true)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Open update in separate goroutine.
+	done := make(chan struct{})
+	go func() {
+		db.Close()
+		close(done)
+	}()
+
+	// Ensure database hasn't closed.
+	time.Sleep(100 * time.Millisecond)
+	select {
+	case <-done:
+		t.Fatal("database closed too early")
+	default:
+	}
+
+	// Commit transaction.
+	if err := tx.Commit(); err != nil {
+		t.Fatal(err)
+	}
+
+	// Ensure database closed now.
+	time.Sleep(100 * time.Millisecond)
+	select {
+	case <-done:
+	default:
+		t.Fatal("database did not close")
+	}
+}
+
 // Ensure a database can provide a transactional block.
 func TestDB_Update(t *testing.T) {
 	db := NewTestDB()
@ -678,7 +791,7 @@ func (db *TestDB) PrintStats() {

 // MustCheck runs a consistency check on the database and panics if any errors are found.
 func (db *TestDB) MustCheck() {
-	db.View(func(tx *bolt.Tx) error {
+	db.Update(func(tx *bolt.Tx) error {
 		// Collect all the errors.
 		var errors []error
 		for err := range tx.Check() {
--- a/Godeps/_workspace/src/github.com/boltdb/bolt/errors.go
+++ b/Godeps/_workspace/src/github.com/boltdb/bolt/errors.go
@ -36,6 +36,10 @@ var (
 	// ErrTxClosed is returned when committing or rolling back a transaction
 	// that has already been committed or rolled back.
 	ErrTxClosed = errors.New("tx closed")
+
+	// ErrDatabaseReadOnly is returned when a mutating transaction is started on a
+	// read-only database.
+	ErrDatabaseReadOnly = errors.New("database is in read-only mode")
 )

 // These errors can occur when putting or deleting a value or a bucket.
--- a/Godeps/_workspace/src/github.com/boltdb/bolt/freelist.go
+++ b/Godeps/_workspace/src/github.com/boltdb/bolt/freelist.go
@ -48,15 +48,14 @@ func (f *freelist) pending_count() int {

 // all returns a list of all free ids and all pending ids in one sorted list.
 func (f *freelist) all() []pgid {
-	ids := make([]pgid, len(f.ids))
-	copy(ids, f.ids)
+	m := make(pgids, 0)

 	for _, list := range f.pending {
-		ids = append(ids, list...)
+		m = append(m, list...)
 	}

-	sort.Sort(pgids(ids))
-	return ids
+	sort.Sort(m)
+	return pgids(f.ids).merge(m)
 }

 // allocate returns the starting page id of a contiguous list of pages of a given size.
@ -127,15 +126,17 @@ func (f *freelist) free(txid txid, p *page) {

 // release moves all page ids for a transaction id (or older) to the freelist.
 func (f *freelist) release(txid txid) {
+	m := make(pgids, 0)
 	for tid, ids := range f.pending {
 		if tid <= txid {
 			// Move transaction's pending pages to the available freelist.
 			// Don't remove from the cache since the page is still free.
-			f.ids = append(f.ids, ids...)
+			m = append(m, ids...)
 			delete(f.pending, tid)
 		}
 	}
-	sort.Sort(pgids(f.ids))
+	sort.Sort(m)
+	f.ids = pgids(f.ids).merge(m)
 }

 // rollback removes the pages from a given pending tx.
--- a/Godeps/_workspace/src/github.com/boltdb/bolt/freelist_test.go
+++ b/Godeps/_workspace/src/github.com/boltdb/bolt/freelist_test.go
@ -1,7 +1,9 @@
 package bolt

 import (
+	"math/rand"
 	"reflect"
+	"sort"
 	"testing"
 	"unsafe"
 )
@ -127,3 +129,28 @@ func TestFreelist_write(t *testing.T) {
 		t.Fatalf("exp=%v; got=%v", exp, f2.ids)
 	}
 }
+
+func Benchmark_FreelistRelease10K(b *testing.B)    { benchmark_FreelistRelease(b, 10000) }
+func Benchmark_FreelistRelease100K(b *testing.B)   { benchmark_FreelistRelease(b, 100000) }
+func Benchmark_FreelistRelease1000K(b *testing.B)  { benchmark_FreelistRelease(b, 1000000) }
+func Benchmark_FreelistRelease10000K(b *testing.B) { benchmark_FreelistRelease(b, 10000000) }
+
+func benchmark_FreelistRelease(b *testing.B, size int) {
+	ids := randomPgids(size)
+	pending := randomPgids(len(ids) / 400)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		f := &freelist{ids: ids, pending: map[txid][]pgid{1: pending}}
+		f.release(1)
+	}
+}
+
+func randomPgids(n int) []pgid {
+	rand.Seed(42)
+	pgids := make(pgids, n)
+	for i := range pgids {
+		pgids[i] = pgid(rand.Int63())
+	}
+	sort.Sort(pgids)
+	return pgids
+}
--- a/Godeps/_workspace/src/github.com/boltdb/bolt/node.go
+++ b/Godeps/_workspace/src/github.com/boltdb/bolt/node.go
@ -221,11 +221,20 @@ func (n *node) write(p *page) {
 			_assert(elem.pgid != p.id, "write: circular dependency occurred")
 		}

+		// If the length of key+value is larger than the max allocation size
+		// then we need to reallocate the byte array pointer.
+		//
+		// See: https://github.com/boltdb/bolt/pull/335
+		klen, vlen := len(item.key), len(item.value)
+		if len(b) < klen+vlen {
+			b = (*[maxAllocSize]byte)(unsafe.Pointer(&b[0]))[:]
+		}
+
 		// Write data for the element to the end of the page.
 		copy(b[0:], item.key)
-		b = b[len(item.key):]
+		b = b[klen:]
 		copy(b[0:], item.value)
-		b = b[len(item.value):]
+		b = b[vlen:]
 	}

 	// DEBUG ONLY: n.dump()
--- a/Godeps/_workspace/src/github.com/boltdb/bolt/page.go
+++ b/Godeps/_workspace/src/github.com/boltdb/bolt/page.go
@ -3,6 +3,7 @@ package bolt
 import (
 	"fmt"
 	"os"
+	"sort"
 	"unsafe"
 )

@ -96,7 +97,7 @@ type branchPageElement struct {
 // key returns a byte slice of the node key.
 func (n *branchPageElement) key() []byte {
 	buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
-	return buf[n.pos : n.pos+n.ksize]
+	return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos]))[:n.ksize]
 }

 // leafPageElement represents a node on a leaf page.
@ -110,13 +111,13 @@ type leafPageElement struct {
 // key returns a byte slice of the node key.
 func (n *leafPageElement) key() []byte {
 	buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
-	return buf[n.pos : n.pos+n.ksize]
+	return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos]))[:n.ksize]
 }

 // value returns a byte slice of the node value.
 func (n *leafPageElement) value() []byte {
 	buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
-	return buf[n.pos+n.ksize : n.pos+n.ksize+n.vsize]
+	return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos+n.ksize]))[:n.vsize]
 }

 // PageInfo represents human readable information about a page.
@ -132,3 +133,40 @@ type pgids []pgid
 func (s pgids) Len() int           { return len(s) }
 func (s pgids) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
 func (s pgids) Less(i, j int) bool { return s[i] < s[j] }
+
+// merge returns the sorted union of a and b.
+func (a pgids) merge(b pgids) pgids {
+	// Return the opposite slice if one is nil.
+	if len(a) == 0 {
+		return b
+	} else if len(b) == 0 {
+		return a
+	}
+
+	// Create a list to hold all elements from both lists.
+	merged := make(pgids, 0, len(a)+len(b))
+
+	// Assign lead to the slice with a lower starting value, follow to the higher value.
+	lead, follow := a, b
+	if b[0] < a[0] {
+		lead, follow = b, a
+	}
+
+	// Continue while there are elements in the lead.
+	for len(lead) > 0 {
+		// Merge largest prefix of lead that is ahead of follow[0].
+		n := sort.Search(len(lead), func(i int) bool { return lead[i] > follow[0] })
+		merged = append(merged, lead[:n]...)
+		if n >= len(lead) {
+			break
+		}
+
+		// Swap lead and follow.
+		lead, follow = follow, lead[n:]
+	}
+
+	// Append what's left in follow.
+	merged = append(merged, follow...)
+
+	return merged
+}
--- a/Godeps/_workspace/src/github.com/boltdb/bolt/page_test.go
+++ b/Godeps/_workspace/src/github.com/boltdb/bolt/page_test.go
@ -1,7 +1,10 @@
 package bolt

 import (
+	"reflect"
+	"sort"
 	"testing"
+	"testing/quick"
 )

 // Ensure that the page type can be returned in human readable format.
@ -27,3 +30,43 @@ func TestPage_typ(t *testing.T) {
 func TestPage_dump(t *testing.T) {
 	(&page{id: 256}).hexdump(16)
 }
+
+func TestPgids_merge(t *testing.T) {
+	a := pgids{4, 5, 6, 10, 11, 12, 13, 27}
+	b := pgids{1, 3, 8, 9, 25, 30}
+	c := a.merge(b)
+	if !reflect.DeepEqual(c, pgids{1, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 25, 27, 30}) {
+		t.Errorf("mismatch: %v", c)
+	}
+
+	a = pgids{4, 5, 6, 10, 11, 12, 13, 27, 35, 36}
+	b = pgids{8, 9, 25, 30}
+	c = a.merge(b)
+	if !reflect.DeepEqual(c, pgids{4, 5, 6, 8, 9, 10, 11, 12, 13, 25, 27, 30, 35, 36}) {
+		t.Errorf("mismatch: %v", c)
+	}
+}
+
+func TestPgids_merge_quick(t *testing.T) {
+	if err := quick.Check(func(a, b pgids) bool {
+		// Sort incoming lists.
+		sort.Sort(a)
+		sort.Sort(b)
+
+		// Merge the two lists together.
+		got := a.merge(b)
+
+		// The expected value should be the two lists combined and sorted.
+		exp := append(a, b...)
+		sort.Sort(exp)
+
+		if !reflect.DeepEqual(exp, got) {
+			t.Errorf("\nexp=%+v\ngot=%+v\n", exp, got)
+			return false
+		}
+
+		return true
+	}, nil); err != nil {
+		t.Fatal(err)
+	}
+}
--- a/Godeps/_workspace/src/github.com/boltdb/bolt/tx.go
+++ b/Godeps/_workspace/src/github.com/boltdb/bolt/tx.go
@ -127,7 +127,8 @@ func (tx *Tx) OnCommit(fn func()) {
 }

 // Commit writes all changes to disk and updates the meta page.
-// Returns an error if a disk write error occurs.
+// Returns an error if a disk write error occurs, or if Commit is
+// called on a read-only transaction.
 func (tx *Tx) Commit() error {
 	_assert(!tx.managed, "managed tx commit not allowed")
 	if tx.db == nil {
@ -203,7 +204,8 @@ func (tx *Tx) Commit() error {
 	return nil
 }

-// Rollback closes the transaction and ignores all previous updates.
+// Rollback closes the transaction and ignores all previous updates. Read-only
+// transactions must be rolled back and not committed.
 func (tx *Tx) Rollback() error {
 	_assert(!tx.managed, "managed tx rollback not allowed")
 	if tx.db == nil {
@ -421,15 +423,39 @@ func (tx *Tx) write() error {
 	// Write pages to disk in order.
 	for _, p := range pages {
 		size := (int(p.overflow) + 1) * tx.db.pageSize
-		buf := (*[maxAllocSize]byte)(unsafe.Pointer(p))[:size]
 		offset := int64(p.id) * int64(tx.db.pageSize)
-		if _, err := tx.db.ops.writeAt(buf, offset); err != nil {
-			return err
-		}

-		// Update statistics.
-		tx.stats.Write++
+		// Write out page in "max allocation" sized chunks.
+		ptr := (*[maxAllocSize]byte)(unsafe.Pointer(p))
+		for {
+			// Limit our write to our max allocation size.
+			sz := size
+			if sz > maxAllocSize-1 {
+				sz = maxAllocSize - 1
+			}
+
+			// Write chunk to disk.
+			buf := ptr[:sz]
+			if _, err := tx.db.ops.writeAt(buf, offset); err != nil {
+				return err
+			}
+
+			// Update statistics.
+			tx.stats.Write++
+
+			// Exit inner for loop if we've written all the chunks.
+			size -= sz
+			if size == 0 {
+				break
+			}
+
+			// Otherwise move offset forward and move pointer to next chunk.
+			offset += int64(sz)
+			ptr = (*[maxAllocSize]byte)(unsafe.Pointer(&ptr[sz]))
+		}
 	}
+
+	// Ignore file sync if flag is set on DB.
 	if !tx.db.NoSync || IgnoreNoSync {
 		if err := fdatasync(tx.db); err != nil {
 			return err
--- a/Godeps/_workspace/src/github.com/boltdb/bolt/tx_test.go
+++ b/Godeps/_workspace/src/github.com/boltdb/bolt/tx_test.go
@ -252,6 +252,38 @@ func TestTx_DeleteBucket_NotFound(t *testing.T) {
 	})
 }

+// Ensure that no error is returned when a tx.ForEach function does not return
+// an error.
+func TestTx_ForEach_NoError(t *testing.T) {
+	db := NewTestDB()
+	defer db.Close()
+	db.Update(func(tx *bolt.Tx) error {
+		tx.CreateBucket([]byte("widgets"))
+		tx.Bucket([]byte("widgets")).Put([]byte("foo"), []byte("bar"))
+
+		equals(t, nil, tx.ForEach(func(name []byte, b *bolt.Bucket) error {
+			return nil
+		}))
+		return nil
+	})
+}
+
+// Ensure that an error is returned when a tx.ForEach function returns an error.
+func TestTx_ForEach_WithError(t *testing.T) {
+	db := NewTestDB()
+	defer db.Close()
+	db.Update(func(tx *bolt.Tx) error {
+		tx.CreateBucket([]byte("widgets"))
+		tx.Bucket([]byte("widgets")).Put([]byte("foo"), []byte("bar"))
+
+		err := errors.New("foo")
+		equals(t, err, tx.ForEach(func(name []byte, b *bolt.Bucket) error {
+			return err
+		}))
+		return nil
+	})
+}
+
 // Ensure that Tx commit handlers are called after a transaction successfully commits.
 func TestTx_OnCommit(t *testing.T) {
 	var x int
--- a/Godeps/_workspace/src/github.com/google/go-snappy/snappy/decode.go
+++ b/Godeps/_workspace/src/github.com/google/go-snappy/snappy/decode.go
--- a/Godeps/_workspace/src/github.com/google/go-snappy/snappy/encode.go
+++ b/Godeps/_workspace/src/github.com/google/go-snappy/snappy/encode.go
--- a/Godeps/_workspace/src/github.com/google/go-snappy/snappy/snappy.go
+++ b/Godeps/_workspace/src/github.com/google/go-snappy/snappy/snappy.go
--- a/Godeps/_workspace/src/github.com/google/go-snappy/snappy/snappy_test.go
+++ b/Godeps/_workspace/src/github.com/google/go-snappy/snappy/snappy_test.go
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db.go
@ -63,13 +63,14 @@ type DB struct {
 	journalAckC  chan error

 	// Compaction.
-	tcompCmdC   chan cCmd
-	tcompPauseC chan chan<- struct{}
-	mcompCmdC   chan cCmd
-	compErrC    chan error
-	compPerErrC chan error
-	compErrSetC chan error
-	compStats   []cStats
+	tcompCmdC        chan cCmd
+	tcompPauseC      chan chan<- struct{}
+	mcompCmdC        chan cCmd
+	compErrC         chan error
+	compPerErrC      chan error
+	compErrSetC      chan error
+	compWriteLocking bool
+	compStats        []cStats

 	// Close.
 	closeW sync.WaitGroup
@ -108,28 +109,44 @@ func openDB(s *session) (*DB, error) {
 		closeC: make(chan struct{}),
 	}

-	if err := db.recoverJournal(); err != nil {
-		return nil, err
-	}
+	// Read-only mode.
+	readOnly := s.o.GetReadOnly()

-	// Remove any obsolete files.
-	if err := db.checkAndCleanFiles(); err != nil {
-		// Close journal.
-		if db.journal != nil {
-			db.journal.Close()
-			db.journalWriter.Close()
+	if readOnly {
+		// Recover journals (read-only mode).
+		if err := db.recoverJournalRO(); err != nil {
+			return nil, err
 		}
-		return nil, err
+	} else {
+		// Recover journals.
+		if err := db.recoverJournal(); err != nil {
+			return nil, err
+		}
+
+		// Remove any obsolete files.
+		if err := db.checkAndCleanFiles(); err != nil {
+			// Close journal.
+			if db.journal != nil {
+				db.journal.Close()
+				db.journalWriter.Close()
+			}
+			return nil, err
+		}
+
 	}

 	// Doesn't need to be included in the wait group.
 	go db.compactionError()
 	go db.mpoolDrain()

-	db.closeW.Add(3)
-	go db.tCompaction()
-	go db.mCompaction()
-	go db.jWriter()
+	if readOnly {
+		db.SetReadOnly()
+	} else {
+		db.closeW.Add(3)
+		go db.tCompaction()
+		go db.mCompaction()
+		go db.jWriter()
+	}

 	s.logf("db@open done T·%v", time.Since(start))

@ -275,7 +292,7 @@ func recoverTable(s *session, o *opt.Options) error {
 		// We will drop corrupted table.
 		strict = o.GetStrict(opt.StrictRecovery)

-		rec   = &sessionRecord{numLevel: o.GetNumLevel()}
+		rec   = &sessionRecord{}
 		bpool = util.NewBufferPool(o.GetBlockSize() + 5)
 	)
 	buildTable := func(iter iterator.Iterator) (tmp storage.File, size int64, err error) {
@ -450,132 +467,136 @@ func recoverTable(s *session, o *opt.Options) error {
 }

 func (db *DB) recoverJournal() error {
-	// Get all tables and sort it by file number.
-	journalFiles_, err := db.s.getFiles(storage.TypeJournal)
+	// Get all journals and sort it by file number.
+	allJournalFiles, err := db.s.getFiles(storage.TypeJournal)
 	if err != nil {
 		return err
 	}
-	journalFiles := files(journalFiles_)
-	journalFiles.sort()
+	files(allJournalFiles).sort()

-	// Discard older journal.
-	prev := -1
-	for i, file := range journalFiles {
-		if file.Num() >= db.s.stJournalNum {
-			if prev >= 0 {
-				i--
-				journalFiles[i] = journalFiles[prev]
-			}
-			journalFiles = journalFiles[i:]
-			break
-		} else if file.Num() == db.s.stPrevJournalNum {
-			prev = i
+	// Journals that will be recovered.
+	var recJournalFiles []storage.File
+	for _, jf := range allJournalFiles {
+		if jf.Num() >= db.s.stJournalNum || jf.Num() == db.s.stPrevJournalNum {
+			recJournalFiles = append(recJournalFiles, jf)
 		}
 	}

-	var jr *journal.Reader
-	var of storage.File
-	var mem *memdb.DB
-	batch := new(Batch)
-	cm := newCMem(db.s)
-	buf := new(util.Buffer)
-	// Options.
-	strict := db.s.o.GetStrict(opt.StrictJournal)
-	checksum := db.s.o.GetStrict(opt.StrictJournalChecksum)
-	writeBuffer := db.s.o.GetWriteBuffer()
-	recoverJournal := func(file storage.File) error {
-		db.logf("journal@recovery recovering @%d", file.Num())
-		reader, err := file.Open()
-		if err != nil {
-			return err
-		}
-		defer reader.Close()
+	var (
+		of  storage.File // Obsolete file.
+		rec = &sessionRecord{}
+	)

-		// Create/reset journal reader instance.
-		if jr == nil {
-			jr = journal.NewReader(reader, dropper{db.s, file}, strict, checksum)
-		} else {
-			jr.Reset(reader, dropper{db.s, file}, strict, checksum)
-		}
-
-		// Flush memdb and remove obsolete journal file.
-		if of != nil {
-			if mem.Len() > 0 {
-				if err := cm.flush(mem, 0); err != nil {
-					return err
-				}
-			}
-			if err := cm.commit(file.Num(), db.seq); err != nil {
-				return err
-			}
-			cm.reset()
-			of.Remove()
-			of = nil
-		}
-
-		// Replay journal to memdb.
-		mem.Reset()
-		for {
-			r, err := jr.Next()
-			if err != nil {
-				if err == io.EOF {
-					break
-				}
-				return errors.SetFile(err, file)
-			}
-
-			buf.Reset()
-			if _, err := buf.ReadFrom(r); err != nil {
-				if err == io.ErrUnexpectedEOF {
-					// This is error returned due to corruption, with strict == false.
-					continue
-				} else {
-					return errors.SetFile(err, file)
-				}
-			}
-			if err := batch.memDecodeAndReplay(db.seq, buf.Bytes(), mem); err != nil {
-				if strict || !errors.IsCorrupted(err) {
-					return errors.SetFile(err, file)
-				} else {
-					db.s.logf("journal error: %v (skipped)", err)
-					// We won't apply sequence number as it might be corrupted.
-					continue
-				}
-			}
-
-			// Save sequence number.
-			db.seq = batch.seq + uint64(batch.Len())
-
-			// Flush it if large enough.
-			if mem.Size() >= writeBuffer {
-				if err := cm.flush(mem, 0); err != nil {
-					return err
-				}
-				mem.Reset()
-			}
-		}
-
-		of = file
-		return nil
-	}
-
-	// Recover all journals.
-	if len(journalFiles) > 0 {
-		db.logf("journal@recovery F·%d", len(journalFiles))
+	// Recover journals.
+	if len(recJournalFiles) > 0 {
+		db.logf("journal@recovery F·%d", len(recJournalFiles))

 		// Mark file number as used.
-		db.s.markFileNum(journalFiles[len(journalFiles)-1].Num())
+		db.s.markFileNum(recJournalFiles[len(recJournalFiles)-1].Num())

-		mem = memdb.New(db.s.icmp, writeBuffer)
-		for _, file := range journalFiles {
-			if err := recoverJournal(file); err != nil {
+		var (
+			// Options.
+			strict      = db.s.o.GetStrict(opt.StrictJournal)
+			checksum    = db.s.o.GetStrict(opt.StrictJournalChecksum)
+			writeBuffer = db.s.o.GetWriteBuffer()
+
+			jr    *journal.Reader
+			mdb   = memdb.New(db.s.icmp, writeBuffer)
+			buf   = &util.Buffer{}
+			batch = &Batch{}
+		)
+
+		for _, jf := range recJournalFiles {
+			db.logf("journal@recovery recovering @%d", jf.Num())
+
+			fr, err := jf.Open()
+			if err != nil {
 				return err
 			}
+
+			// Create or reset journal reader instance.
+			if jr == nil {
+				jr = journal.NewReader(fr, dropper{db.s, jf}, strict, checksum)
+			} else {
+				jr.Reset(fr, dropper{db.s, jf}, strict, checksum)
+			}
+
+			// Flush memdb and remove obsolete journal file.
+			if of != nil {
+				if mdb.Len() > 0 {
+					if _, err := db.s.flushMemdb(rec, mdb, -1); err != nil {
+						fr.Close()
+						return err
+					}
+				}
+
+				rec.setJournalNum(jf.Num())
+				rec.setSeqNum(db.seq)
+				if err := db.s.commit(rec); err != nil {
+					fr.Close()
+					return err
+				}
+				rec.resetAddedTables()
+
+				of.Remove()
+				of = nil
+			}
+
+			// Replay journal to memdb.
+			mdb.Reset()
+			for {
+				r, err := jr.Next()
+				if err != nil {
+					if err == io.EOF {
+						break
+					}
+
+					fr.Close()
+					return errors.SetFile(err, jf)
+				}
+
+				buf.Reset()
+				if _, err := buf.ReadFrom(r); err != nil {
+					if err == io.ErrUnexpectedEOF {
+						// This is error returned due to corruption, with strict == false.
+						continue
+					}
+
+					fr.Close()
+					return errors.SetFile(err, jf)
+				}
+				if err := batch.memDecodeAndReplay(db.seq, buf.Bytes(), mdb); err != nil {
+					if !strict && errors.IsCorrupted(err) {
+						db.s.logf("journal error: %v (skipped)", err)
+						// We won't apply sequence number as it might be corrupted.
+						continue
+					}
+
+					fr.Close()
+					return errors.SetFile(err, jf)
+				}
+
+				// Save sequence number.
+				db.seq = batch.seq + uint64(batch.Len())
+
+				// Flush it if large enough.
+				if mdb.Size() >= writeBuffer {
+					if _, err := db.s.flushMemdb(rec, mdb, 0); err != nil {
+						fr.Close()
+						return err
+					}
+
+					mdb.Reset()
+				}
+			}
+
+			fr.Close()
+			of = jf
 		}

-		// Flush the last journal.
-		if mem.Len() > 0 {
-			if err := cm.flush(mem, 0); err != nil {
+		// Flush the last memdb.
+		if mdb.Len() > 0 {
+			if _, err := db.s.flushMemdb(rec, mdb, 0); err != nil {
 				return err
 			}
 		}
@ -587,8 +608,10 @@ func (db *DB) recoverJournal() error {
 	}

 	// Commit.
-	if err := cm.commit(db.journalFile.Num(), db.seq); err != nil {
-		// Close journal.
+	rec.setJournalNum(db.journalFile.Num())
+	rec.setSeqNum(db.seq)
+	if err := db.s.commit(rec); err != nil {
+		// Close journal on error.
 		if db.journal != nil {
 			db.journal.Close()
 			db.journalWriter.Close()
@ -604,6 +627,103 @@ func (db *DB) recoverJournal() error {
 	return nil
 }

+func (db *DB) recoverJournalRO() error {
+	// Get all journals and sort it by file number.
+	allJournalFiles, err := db.s.getFiles(storage.TypeJournal)
+	if err != nil {
+		return err
+	}
+	files(allJournalFiles).sort()
+
+	// Journals that will be recovered.
+	var recJournalFiles []storage.File
+	for _, jf := range allJournalFiles {
+		if jf.Num() >= db.s.stJournalNum || jf.Num() == db.s.stPrevJournalNum {
+			recJournalFiles = append(recJournalFiles, jf)
+		}
+	}
+
+	var (
+		// Options.
+		strict      = db.s.o.GetStrict(opt.StrictJournal)
+		checksum    = db.s.o.GetStrict(opt.StrictJournalChecksum)
+		writeBuffer = db.s.o.GetWriteBuffer()
+
+		mdb = memdb.New(db.s.icmp, writeBuffer)
+	)
+
+	// Recover journals.
+	if len(recJournalFiles) > 0 {
+		db.logf("journal@recovery RO·Mode F·%d", len(recJournalFiles))
+
+		var (
+			jr    *journal.Reader
+			buf   = &util.Buffer{}
+			batch = &Batch{}
+		)
+
+		for _, jf := range recJournalFiles {
+			db.logf("journal@recovery recovering @%d", jf.Num())
+
+			fr, err := jf.Open()
+			if err != nil {
+				return err
+			}
+
+			// Create or reset journal reader instance.
+			if jr == nil {
+				jr = journal.NewReader(fr, dropper{db.s, jf}, strict, checksum)
+			} else {
+				jr.Reset(fr, dropper{db.s, jf}, strict, checksum)
+			}
+
+			// Replay journal to memdb.
+			for {
+				r, err := jr.Next()
+				if err != nil {
+					if err == io.EOF {
+						break
+					}
+
+					fr.Close()
+					return errors.SetFile(err, jf)
+				}
+
+				buf.Reset()
+				if _, err := buf.ReadFrom(r); err != nil {
+					if err == io.ErrUnexpectedEOF {
+						// This is error returned due to corruption, with strict == false.
+						continue
+					}
+
+					fr.Close()
+					return errors.SetFile(err, jf)
+				}
+				if err := batch.memDecodeAndReplay(db.seq, buf.Bytes(), mdb); err != nil {
+					if !strict && errors.IsCorrupted(err) {
+						db.s.logf("journal error: %v (skipped)", err)
+						// We won't apply sequence number as it might be corrupted.
+						continue
+					}
+
+					fr.Close()
+					return errors.SetFile(err, jf)
+				}
+
+				// Save sequence number.
+				db.seq = batch.seq + uint64(batch.Len())
+			}
+
+			fr.Close()
+		}
+	}
+
+	// Set memDB.
+	db.mem = &memDB{db: db, DB: mdb, ref: 1}
+
+	return nil
+}
+
 func (db *DB) get(key []byte, seq uint64, ro *opt.ReadOptions) (value []byte, err error) {
 	ikey := newIkey(key, seq, ktSeek)

@ -614,7 +734,7 @@ func (db *DB) get(key []byte, seq uint64, ro *opt.ReadOptions) (value []byte, er
 		}
 		defer m.decref()

-		mk, mv, me := m.mdb.Find(ikey)
+		mk, mv, me := m.Find(ikey)
 		if me == nil {
 			ukey, _, kt, kerr := parseIkey(mk)
 			if kerr != nil {
@ -652,7 +772,7 @@ func (db *DB) has(key []byte, seq uint64, ro *opt.ReadOptions) (ret bool, err er
 		}
 		defer m.decref()

-		mk, _, me := m.mdb.Find(ikey)
+		mk, _, me := m.Find(ikey)
 		if me == nil {
 			ukey, _, kt, kerr := parseIkey(mk)
 			if kerr != nil {
@ -784,7 +904,7 @@ func (db *DB) GetProperty(name string) (value string, err error) {

 	const prefix = "leveldb."
 	if !strings.HasPrefix(name, prefix) {
-		return "", errors.New("leveldb: GetProperty: unknown property: " + name)
+		return "", ErrNotFound
 	}
 	p := name[len(prefix):]

@ -798,7 +918,7 @@ func (db *DB) GetProperty(name string) (value string, err error) {
 		var rest string
 		n, _ := fmt.Sscanf(p[len(numFilesPrefix):], "%d%s", &level, &rest)
 		if n != 1 || int(level) >= db.s.o.GetNumLevel() {
-			err = errors.New("leveldb: GetProperty: invalid property: " + name)
+			err = ErrNotFound
 		} else {
 			value = fmt.Sprint(v.tLen(int(level)))
 		}
@ -837,7 +957,7 @@ func (db *DB) GetProperty(name string) (value string, err error) {
 	case p == "aliveiters":
 		value = fmt.Sprintf("%d", atomic.LoadInt32(&db.aliveIters))
 	default:
-		err = errors.New("leveldb: GetProperty: unknown property: " + name)
+		err = ErrNotFound
 	}

 	return
@ -900,6 +1020,9 @@ func (db *DB) Close() error {
 	var err error
 	select {
 	case err = <-db.compErrC:
+		if err == ErrReadOnly {
+			err = nil
+		}
 	default:
 	}

--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_compaction.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_compaction.go
@ -11,7 +11,6 @@ import (
 	"time"

 	"github.com/syndtr/goleveldb/leveldb/errors"
-	"github.com/syndtr/goleveldb/leveldb/memdb"
 	"github.com/syndtr/goleveldb/leveldb/opt"
 )

@ -62,58 +61,8 @@ func (p *cStatsStaging) stopTimer() {
 	}
 }

-type cMem struct {
-	s     *session
-	level int
-	rec   *sessionRecord
-}
-
-func newCMem(s *session) *cMem {
-	return &cMem{s: s, rec: &sessionRecord{numLevel: s.o.GetNumLevel()}}
-}
-
-func (c *cMem) flush(mem *memdb.DB, level int) error {
-	s := c.s
-
-	// Write memdb to table.
-	iter := mem.NewIterator(nil)
-	defer iter.Release()
-	t, n, err := s.tops.createFrom(iter)
-	if err != nil {
-		return err
-	}
-
-	// Pick level.
-	if level < 0 {
-		v := s.version()
-		level = v.pickLevel(t.imin.ukey(), t.imax.ukey())
-		v.release()
-	}
-	c.rec.addTableFile(level, t)
-
-	s.logf("mem@flush created L%d@%d N·%d S·%s %q:%q", level, t.file.Num(), n, shortenb(int(t.size)), t.imin, t.imax)
-
-	c.level = level
-	return nil
-}
-
-func (c *cMem) reset() {
-	c.rec = &sessionRecord{numLevel: c.s.o.GetNumLevel()}
-}
-
-func (c *cMem) commit(journal, seq uint64) error {
-	c.rec.setJournalNum(journal)
-	c.rec.setSeqNum(seq)
-
-	// Commit changes.
-	return c.s.commit(c.rec)
-}
-
 func (db *DB) compactionError() {
-	var (
-		err     error
-		wlocked bool
-	)
+	var err error
 noerr:
 	// No error.
 	for {
@ -121,7 +70,7 @@ noerr:
 		case err = <-db.compErrSetC:
 			switch {
 			case err == nil:
-			case errors.IsCorrupted(err):
+			case err == ErrReadOnly, errors.IsCorrupted(err):
 				goto hasperr
 			default:
 				goto haserr
@ -139,7 +88,7 @@ haserr:
 			switch {
 			case err == nil:
 				goto noerr
-			case errors.IsCorrupted(err):
+			case err == ErrReadOnly, errors.IsCorrupted(err):
 				goto hasperr
 			default:
 			}
@ -155,9 +104,9 @@ hasperr:
 		case db.compPerErrC <- err:
 		case db.writeLockC <- struct{}{}:
 			// Hold write lock, so that write won't pass-through.
-			wlocked = true
+			db.compWriteLocking = true
 		case _, _ = <-db.closeC:
-			if wlocked {
+			if db.compWriteLocking {
 				// We should release the lock or Close will hang.
 				<-db.writeLockC
 			}
@ -287,21 +236,18 @@ func (db *DB) compactionExitTransact() {
 }

 func (db *DB) memCompaction() {
-	mem := db.getFrozenMem()
-	if mem == nil {
+	mdb := db.getFrozenMem()
+	if mdb == nil {
 		return
 	}
-	defer mem.decref()
+	defer mdb.decref()

-	c := newCMem(db.s)
-	stats := new(cStatsStaging)
-
-	db.logf("mem@flush N·%d S·%s", mem.mdb.Len(), shortenb(mem.mdb.Size()))
+	db.logf("memdb@flush N·%d S·%s", mdb.Len(), shortenb(mdb.Size()))

 	// Don't compact empty memdb.
-	if mem.mdb.Len() == 0 {
-		db.logf("mem@flush skipping")
-		// drop frozen mem
+	if mdb.Len() == 0 {
+		db.logf("memdb@flush skipping")
+		// drop frozen memdb
 		db.dropFrozenMem()
 		return
 	}
@ -317,13 +263,20 @@ func (db *DB) memCompaction() {
 		return
 	}

-	db.compactionTransactFunc("mem@flush", func(cnt *compactionTransactCounter) (err error) {
+	var (
+		rec        = &sessionRecord{}
+		stats      = &cStatsStaging{}
+		flushLevel int
+	)
+
+	db.compactionTransactFunc("memdb@flush", func(cnt *compactionTransactCounter) (err error) {
 		stats.startTimer()
-		defer stats.stopTimer()
-		return c.flush(mem.mdb, -1)
+		flushLevel, err = db.s.flushMemdb(rec, mdb.DB, -1)
+		stats.stopTimer()
+		return
 	}, func() error {
-		for _, r := range c.rec.addedTables {
-			db.logf("mem@flush revert @%d", r.num)
+		for _, r := range rec.addedTables {
+			db.logf("memdb@flush revert @%d", r.num)
 			f := db.s.getTableFile(r.num)
 			if err := f.Remove(); err != nil {
 				return err
@ -332,20 +285,23 @@ func (db *DB) memCompaction() {
 		return nil
 	})

-	db.compactionTransactFunc("mem@commit", func(cnt *compactionTransactCounter) (err error) {
+	db.compactionTransactFunc("memdb@commit", func(cnt *compactionTransactCounter) (err error) {
 		stats.startTimer()
-		defer stats.stopTimer()
-		return c.commit(db.journalFile.Num(), db.frozenSeq)
+		rec.setJournalNum(db.journalFile.Num())
+		rec.setSeqNum(db.frozenSeq)
+		err = db.s.commit(rec)
+		stats.stopTimer()
+		return
 	}, nil)

-	db.logf("mem@flush committed F·%d T·%v", len(c.rec.addedTables), stats.duration)
+	db.logf("memdb@flush committed F·%d T·%v", len(rec.addedTables), stats.duration)

-	for _, r := range c.rec.addedTables {
+	for _, r := range rec.addedTables {
 		stats.write += r.size
 	}
-	db.compStats[c.level].add(stats)
+	db.compStats[flushLevel].add(stats)

-	// Drop frozen mem.
+	// Drop frozen memdb.
 	db.dropFrozenMem()

 	// Resume table compaction.
@ -557,7 +513,7 @@ func (b *tableCompactionBuilder) revert() error {
 func (db *DB) tableCompaction(c *compaction, noTrivial bool) {
 	defer c.release()

-	rec := &sessionRecord{numLevel: db.s.o.GetNumLevel()}
+	rec := &sessionRecord{}
 	rec.addCompPtr(c.level, c.imax)

 	if !noTrivial && c.trivial() {
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_iter.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_iter.go
@ -40,11 +40,11 @@ func (db *DB) newRawIterator(slice *util.Range, ro *opt.ReadOptions) iterator.It
 	ti := v.getIterators(slice, ro)
 	n := len(ti) + 2
 	i := make([]iterator.Iterator, 0, n)
-	emi := em.mdb.NewIterator(slice)
+	emi := em.NewIterator(slice)
 	emi.SetReleaser(&memdbReleaser{m: em})
 	i = append(i, emi)
 	if fm != nil {
-		fmi := fm.mdb.NewIterator(slice)
+		fmi := fm.NewIterator(slice)
 		fmi.SetReleaser(&memdbReleaser{m: fm})
 		i = append(i, fmi)
 	}
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_state.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_state.go
@ -15,8 +15,8 @@ import (
 )

 type memDB struct {
-	db  *DB
-	mdb *memdb.DB
+	db *DB
+	*memdb.DB
 	ref int32
 }

@ -27,12 +27,12 @@ func (m *memDB) incref() {
 func (m *memDB) decref() {
 	if ref := atomic.AddInt32(&m.ref, -1); ref == 0 {
 		// Only put back memdb with std capacity.
-		if m.mdb.Capacity() == m.db.s.o.GetWriteBuffer() {
-			m.mdb.Reset()
-			m.db.mpoolPut(m.mdb)
+		if m.Capacity() == m.db.s.o.GetWriteBuffer() {
+			m.Reset()
+			m.db.mpoolPut(m.DB)
 		}
 		m.db = nil
-		m.mdb = nil
+		m.DB = nil
 	} else if ref < 0 {
 		panic("negative memdb ref")
 	}
@ -126,7 +126,7 @@ func (db *DB) newMem(n int) (mem *memDB, err error) {
 	}
 	mem = &memDB{
 		db:  db,
-		mdb: mdb,
+		DB:  mdb,
 		ref: 2,
 	}
 	db.mem = mem
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_test.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_test.go
@ -2445,7 +2445,7 @@ func TestDB_TableCompactionBuilder(t *testing.T) {
 		if err != nil {
 			t.Fatal(err)
 		}
-		rec := &sessionRecord{numLevel: s.o.GetNumLevel()}
+		rec := &sessionRecord{}
 		rec.addTableFile(i, tf)
 		if err := s.commit(rec); err != nil {
 			t.Fatal(err)
@ -2455,7 +2455,7 @@ func TestDB_TableCompactionBuilder(t *testing.T) {
 	// Build grandparent.
 	v := s.version()
 	c := newCompaction(s, v, 1, append(tFiles{}, v.tables[1]...))
-	rec := &sessionRecord{numLevel: s.o.GetNumLevel()}
+	rec := &sessionRecord{}
 	b := &tableCompactionBuilder{
 		s:         s,
 		c:         c,
@ -2479,7 +2479,7 @@ func TestDB_TableCompactionBuilder(t *testing.T) {
 	// Build level-1.
 	v = s.version()
 	c = newCompaction(s, v, 0, append(tFiles{}, v.tables[0]...))
-	rec = &sessionRecord{numLevel: s.o.GetNumLevel()}
+	rec = &sessionRecord{}
 	b = &tableCompactionBuilder{
 		s:         s,
 		c:         c,
@ -2523,7 +2523,7 @@ func TestDB_TableCompactionBuilder(t *testing.T) {
 	// Compaction with transient error.
 	v = s.version()
 	c = newCompaction(s, v, 1, append(tFiles{}, v.tables[1]...))
-	rec = &sessionRecord{numLevel: s.o.GetNumLevel()}
+	rec = &sessionRecord{}
 	b = &tableCompactionBuilder{
 		s:         s,
 		c:         c,
@ -2663,3 +2663,39 @@ func TestDB_IterTriggeredCompaction(t *testing.T) {
 func TestDB_IterTriggeredCompactionHalf(t *testing.T) {
 	testDB_IterTriggeredCompaction(t, 2)
 }
+
+func TestDB_ReadOnly(t *testing.T) {
+	h := newDbHarness(t)
+	defer h.close()
+
+	h.put("foo", "v1")
+	h.put("bar", "v2")
+	h.compactMem()
+
+	h.put("xfoo", "v1")
+	h.put("xbar", "v2")
+
+	t.Log("Trigger read-only")
+	if err := h.db.SetReadOnly(); err != nil {
+		h.close()
+		t.Fatalf("SetReadOnly error: %v", err)
+	}
+
+	h.stor.SetEmuErr(storage.TypeAll, tsOpCreate, tsOpReplace, tsOpRemove, tsOpWrite, tsOpWrite, tsOpSync)
+
+	ro := func(key, value, wantValue string) {
+		if err := h.db.Put([]byte(key), []byte(value), h.wo); err != ErrReadOnly {
+			t.Fatalf("unexpected error: %v", err)
+		}
+		h.getVal(key, wantValue)
+	}
+
+	ro("foo", "vx", "v1")
+
+	h.o.ReadOnly = true
+	h.reopenDB()
+
+	ro("foo", "vx", "v1")
+	ro("bar", "vx", "v2")
+	h.assertNumKeys(4)
+}
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_write.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_write.go
@ -63,24 +63,24 @@ func (db *DB) rotateMem(n int) (mem *memDB, err error) {
 	return
 }

-func (db *DB) flush(n int) (mem *memDB, nn int, err error) {
+func (db *DB) flush(n int) (mdb *memDB, mdbFree int, err error) {
 	delayed := false
 	flush := func() (retry bool) {
 		v := db.s.version()
 		defer v.release()
-		mem = db.getEffectiveMem()
+		mdb = db.getEffectiveMem()
 		defer func() {
 			if retry {
-				mem.decref()
-				mem = nil
+				mdb.decref()
+				mdb = nil
 			}
 		}()
-		nn = mem.mdb.Free()
+		mdbFree = mdb.Free()
 		switch {
 		case v.tLen(0) >= db.s.o.GetWriteL0SlowdownTrigger() && !delayed:
 			delayed = true
 			time.Sleep(time.Millisecond)
-		case nn >= n:
+		case mdbFree >= n:
 			return false
 		case v.tLen(0) >= db.s.o.GetWriteL0PauseTrigger():
 			delayed = true
@ -90,15 +90,15 @@ func (db *DB) flush(n int) (mem *memDB, nn int, err error) {
 			}
 		default:
 			// Allow memdb to grow if it has no entry.
-			if mem.mdb.Len() == 0 {
-				nn = n
+			if mdb.Len() == 0 {
+				mdbFree = n
 			} else {
-				mem.decref()
-				mem, err = db.rotateMem(n)
+				mdb.decref()
+				mdb, err = db.rotateMem(n)
 				if err == nil {
-					nn = mem.mdb.Free()
+					mdbFree = mdb.Free()
 				} else {
-					nn = 0
+					mdbFree = 0
 				}
 			}
 			return false
@ -157,18 +157,18 @@ func (db *DB) Write(b *Batch, wo *opt.WriteOptions) (err error) {
 		}
 	}()

-	mem, memFree, err := db.flush(b.size())
+	mdb, mdbFree, err := db.flush(b.size())
 	if err != nil {
 		return
 	}
-	defer mem.decref()
+	defer mdb.decref()

 	// Calculate maximum size of the batch.
 	m := 1 << 20
 	if x := b.size(); x <= 128<<10 {
 		m = x + (128 << 10)
 	}
-	m = minInt(m, memFree)
+	m = minInt(m, mdbFree)

 	// Merge with other batch.
 drain:
@ -197,7 +197,7 @@ drain:
 		select {
 		case db.journalC <- b:
 			// Write into memdb
-			if berr := b.memReplay(mem.mdb); berr != nil {
+			if berr := b.memReplay(mdb.DB); berr != nil {
 				panic(berr)
 			}
 		case err = <-db.compPerErrC:
@ -211,7 +211,7 @@ drain:
 		case err = <-db.journalAckC:
 			if err != nil {
 				// Revert memdb if error detected
-				if berr := b.revertMemReplay(mem.mdb); berr != nil {
+				if berr := b.revertMemReplay(mdb.DB); berr != nil {
 					panic(berr)
 				}
 				return
@ -225,7 +225,7 @@ drain:
 		if err != nil {
 			return
 		}
-		if berr := b.memReplay(mem.mdb); berr != nil {
+		if berr := b.memReplay(mdb.DB); berr != nil {
 			panic(berr)
 		}
 	}
@ -233,7 +233,7 @@ drain:
 	// Set last seq number.
 	db.addSeq(uint64(b.Len()))

-	if b.size() >= memFree {
+	if b.size() >= mdbFree {
 		db.rotateMem(0)
 	}
 	return
@ -249,8 +249,7 @@ func (db *DB) Put(key, value []byte, wo *opt.WriteOptions) error {
 	return db.Write(b, wo)
 }

-// Delete deletes the value for the given key. It returns ErrNotFound if
-// the DB does not contain the key.
+// Delete deletes the value for the given key.
 //
 // It is safe to modify the contents of the arguments after Delete returns.
 func (db *DB) Delete(key []byte, wo *opt.WriteOptions) error {
@ -290,9 +289,9 @@ func (db *DB) CompactRange(r util.Range) error {
 	}

 	// Check for overlaps in memdb.
-	mem := db.getEffectiveMem()
-	defer mem.decref()
-	if isMemOverlaps(db.s.icmp, mem.mdb, r.Start, r.Limit) {
+	mdb := db.getEffectiveMem()
+	defer mdb.decref()
+	if isMemOverlaps(db.s.icmp, mdb.DB, r.Start, r.Limit) {
 		// Memdb compaction.
 		if _, err := db.rotateMem(0); err != nil {
 			<-db.writeLockC
@ -309,3 +308,31 @@ func (db *DB) CompactRange(r util.Range) error {
 	// Table compaction.
 	return db.compSendRange(db.tcompCmdC, -1, r.Start, r.Limit)
 }
+
+// SetReadOnly makes DB read-only. It will stay read-only until reopened.
+func (db *DB) SetReadOnly() error {
+	if err := db.ok(); err != nil {
+		return err
+	}
+
+	// Lock writer.
+	select {
+	case db.writeLockC <- struct{}{}:
+		db.compWriteLocking = true
+	case err := <-db.compPerErrC:
+		return err
+	case _, _ = <-db.closeC:
+		return ErrClosed
+	}
+
+	// Set compaction read-only.
+	select {
+	case db.compErrSetC <- ErrReadOnly:
+	case perr := <-db.compPerErrC:
+		return perr
+	case _, _ = <-db.closeC:
+		return ErrClosed
+	}
+
+	return nil
+}
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors.go
@ -12,6 +12,7 @@ import (

 var (
 	ErrNotFound         = errors.ErrNotFound
+	ErrReadOnly         = errors.New("leveldb: read-only mode")
 	ErrSnapshotReleased = errors.New("leveldb: snapshot released")
 	ErrIterReleased     = errors.New("leveldb: iterator released")
 	ErrClosed           = errors.New("leveldb: closed")
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb/memdb.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb/memdb.go
@ -206,6 +206,7 @@ func (p *DB) randHeight() (h int) {
 	return
 }

+// Must hold RW-lock if prev == true, as it use shared prevNode slice.
 func (p *DB) findGE(key []byte, prev bool) (int, bool) {
 	node := 0
 	h := p.maxHeight - 1
@ -302,7 +303,7 @@ func (p *DB) Put(key []byte, value []byte) error {
 	node := len(p.nodeData)
 	p.nodeData = append(p.nodeData, kvOffset, len(key), len(value), h)
 	for i, n := range p.prevNode[:h] {
-		m := n + 4 + i
+		m := n + nNext + i
 		p.nodeData = append(p.nodeData, p.nodeData[m])
 		p.nodeData[m] = node
 	}
@ -434,20 +435,22 @@ func (p *DB) Len() int {

 // Reset resets the DB to initial empty state. Allows reuse the buffer.
 func (p *DB) Reset() {
+	p.mu.Lock()
 	p.rnd = rand.New(rand.NewSource(0xdeadbeef))
 	p.maxHeight = 1
 	p.n = 0
 	p.kvSize = 0
 	p.kvData = p.kvData[:0]
-	p.nodeData = p.nodeData[:4+tMaxHeight]
+	p.nodeData = p.nodeData[:nNext+tMaxHeight]
 	p.nodeData[nKV] = 0
 	p.nodeData[nKey] = 0
 	p.nodeData[nVal] = 0
 	p.nodeData[nHeight] = tMaxHeight
 	for n := 0; n < tMaxHeight; n++ {
-		p.nodeData[4+n] = 0
+		p.nodeData[nNext+n] = 0
 		p.prevNode[n] = 0
 	}
+	p.mu.Unlock()
 }

 // New creates a new initalized in-memory key/value DB. The capacity
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/opt/options.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/opt/options.go
@ -250,6 +250,11 @@ type Options struct {
 	// The default value (DefaultCompression) uses snappy compression.
 	Compression Compression

+	// DisableBufferPool allows disable use of util.BufferPool functionality.
+	//
+	// The default value is false.
+	DisableBufferPool bool
+
 	// DisableBlockCache allows disable use of cache.Cache functionality on
 	// 'sorted table' block.
 	//
@ -321,6 +326,11 @@ type Options struct {
 	// The default value is 500.
 	OpenFilesCacheCapacity int

+	// If true then opens DB in read-only mode.
+	//
+	// The default value is false.
+	ReadOnly bool
+
 	// Strict defines the DB strict level.
 	Strict Strict

@ -472,6 +482,20 @@ func (o *Options) GetCompression() Compression {
 	return o.Compression
 }

+func (o *Options) GetDisableBufferPool() bool {
+	if o == nil {
+		return false
+	}
+	return o.DisableBufferPool
+}
+
+func (o *Options) GetDisableBlockCache() bool {
+	if o == nil {
+		return false
+	}
+	return o.DisableBlockCache
+}
+
 func (o *Options) GetDisableCompactionBackoff() bool {
 	if o == nil {
 		return false
@ -548,6 +572,13 @@ func (o *Options) GetOpenFilesCacheCapacity() int {
 	return o.OpenFilesCacheCapacity
 }

+func (o *Options) GetReadOnly() bool {
+	if o == nil {
+		return false
+	}
+	return o.ReadOnly
+}
+
 func (o *Options) GetStrict(strict Strict) bool {
 	if o == nil || o.Strict == 0 {
 		return DefaultStrict&strict != 0
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session.go
@ -11,10 +11,8 @@ import (
 	"io"
 	"os"
 	"sync"
-	"sync/atomic"

 	"github.com/syndtr/goleveldb/leveldb/errors"
-	"github.com/syndtr/goleveldb/leveldb/iterator"
 	"github.com/syndtr/goleveldb/leveldb/journal"
 	"github.com/syndtr/goleveldb/leveldb/opt"
 	"github.com/syndtr/goleveldb/leveldb/storage"
@ -127,11 +125,16 @@ func (s *session) recover() (err error) {
 		return
 	}
 	defer reader.Close()
-	strict := s.o.GetStrict(opt.StrictManifest)
-	jr := journal.NewReader(reader, dropper{s, m}, strict, true)

-	staging := s.stVersion.newStaging()
-	rec := &sessionRecord{numLevel: s.o.GetNumLevel()}
+	var (
+		// Options.
+		numLevel = s.o.GetNumLevel()
+		strict   = s.o.GetStrict(opt.StrictManifest)
+
+		jr      = journal.NewReader(reader, dropper{s, m}, strict, true)
+		rec     = &sessionRecord{}
+		staging = s.stVersion.newStaging()
+	)
 	for {
 		var r io.Reader
 		r, err = jr.Next()
@ -143,7 +146,7 @@ func (s *session) recover() (err error) {
 			return errors.SetFile(err, m)
 		}

-		err = rec.decode(r)
+		err = rec.decode(r, numLevel)
 		if err == nil {
 			// save compact pointers
 			for _, r := range rec.compPtrs {
@ -206,250 +209,3 @@ func (s *session) commit(r *sessionRecord) (err error) {

 	return
 }
-
-// Pick a compaction based on current state; need external synchronization.
-func (s *session) pickCompaction() *compaction {
-	v := s.version()
-
-	var level int
-	var t0 tFiles
-	if v.cScore >= 1 {
-		level = v.cLevel
-		cptr := s.stCompPtrs[level]
-		tables := v.tables[level]
-		for _, t := range tables {
-			if cptr == nil || s.icmp.Compare(t.imax, cptr) > 0 {
-				t0 = append(t0, t)
-				break
-			}
-		}
-		if len(t0) == 0 {
-			t0 = append(t0, tables[0])
-		}
-	} else {
-		if p := atomic.LoadPointer(&v.cSeek); p != nil {
-			ts := (*tSet)(p)
-			level = ts.level
-			t0 = append(t0, ts.table)
-		} else {
-			v.release()
-			return nil
-		}
-	}
-
-	return newCompaction(s, v, level, t0)
-}
-
-// Create compaction from given level and range; need external synchronization.
-func (s *session) getCompactionRange(level int, umin, umax []byte) *compaction {
-	v := s.version()
-
-	t0 := v.tables[level].getOverlaps(nil, s.icmp, umin, umax, level == 0)
-	if len(t0) == 0 {
-		v.release()
-		return nil
-	}
-
-	// Avoid compacting too much in one shot in case the range is large.
-	// But we cannot do this for level-0 since level-0 files can overlap
-	// and we must not pick one file and drop another older file if the
-	// two files overlap.
-	if level > 0 {
-		limit := uint64(v.s.o.GetCompactionSourceLimit(level))
-		total := uint64(0)
-		for i, t := range t0 {
-			total += t.size
-			if total >= limit {
-				s.logf("table@compaction limiting F·%d -> F·%d", len(t0), i+1)
-				t0 = t0[:i+1]
-				break
-			}
-		}
-	}
-
-	return newCompaction(s, v, level, t0)
-}
-
-func newCompaction(s *session, v *version, level int, t0 tFiles) *compaction {
-	c := &compaction{
-		s:             s,
-		v:             v,
-		level:         level,
-		tables:        [2]tFiles{t0, nil},
-		maxGPOverlaps: uint64(s.o.GetCompactionGPOverlaps(level)),
-		tPtrs:         make([]int, s.o.GetNumLevel()),
-	}
-	c.expand()
-	c.save()
-	return c
-}
-
-// compaction represent a compaction state.
-type compaction struct {
-	s *session
-	v *version
-
-	level         int
-	tables        [2]tFiles
-	maxGPOverlaps uint64
-
-	gp                tFiles
-	gpi               int
-	seenKey           bool
-	gpOverlappedBytes uint64
-	imin, imax        iKey
-	tPtrs             []int
-	released          bool
-
-	snapGPI               int
-	snapSeenKey           bool
-	snapGPOverlappedBytes uint64
-	snapTPtrs             []int
-}
-
-func (c *compaction) save() {
-	c.snapGPI = c.gpi
-	c.snapSeenKey = c.seenKey
-	c.snapGPOverlappedBytes = c.gpOverlappedBytes
-	c.snapTPtrs = append(c.snapTPtrs[:0], c.tPtrs...)
-}
-
-func (c *compaction) restore() {
-	c.gpi = c.snapGPI
-	c.seenKey = c.snapSeenKey
-	c.gpOverlappedBytes = c.snapGPOverlappedBytes
-	c.tPtrs = append(c.tPtrs[:0], c.snapTPtrs...)
-}
-
-func (c *compaction) release() {
-	if !c.released {
-		c.released = true
-		c.v.release()
-	}
-}
-
-// Expand compacted tables; need external synchronization.
-func (c *compaction) expand() {
-	limit := uint64(c.s.o.GetCompactionExpandLimit(c.level))
-	vt0, vt1 := c.v.tables[c.level], c.v.tables[c.level+1]
-
-	t0, t1 := c.tables[0], c.tables[1]
-	imin, imax := t0.getRange(c.s.icmp)
-	// We expand t0 here just incase ukey hop across tables.
-	t0 = vt0.getOverlaps(t0, c.s.icmp, imin.ukey(), imax.ukey(), c.level == 0)
-	if len(t0) != len(c.tables[0]) {
-		imin, imax = t0.getRange(c.s.icmp)
-	}
-	t1 = vt1.getOverlaps(t1, c.s.icmp, imin.ukey(), imax.ukey(), false)
-	// Get entire range covered by compaction.
-	amin, amax := append(t0, t1...).getRange(c.s.icmp)
-
-	// See if we can grow the number of inputs in "level" without
-	// changing the number of "level+1" files we pick up.
-	if len(t1) > 0 {
-		exp0 := vt0.getOverlaps(nil, c.s.icmp, amin.ukey(), amax.ukey(), c.level == 0)
-		if len(exp0) > len(t0) && t1.size()+exp0.size() < limit {
-			xmin, xmax := exp0.getRange(c.s.icmp)
-			exp1 := vt1.getOverlaps(nil, c.s.icmp, xmin.ukey(), xmax.ukey(), false)
-			if len(exp1) == len(t1) {
-				c.s.logf("table@compaction expanding L%d+L%d (F·%d S·%s)+(F·%d S·%s) -> (F·%d S·%s)+(F·%d S·%s)",
-					c.level, c.level+1, len(t0), shortenb(int(t0.size())), len(t1), shortenb(int(t1.size())),
-					len(exp0), shortenb(int(exp0.size())), len(exp1), shortenb(int(exp1.size())))
-				imin, imax = xmin, xmax
-				t0, t1 = exp0, exp1
-				amin, amax = append(t0, t1...).getRange(c.s.icmp)
-			}
-		}
-	}
-
-	// Compute the set of grandparent files that overlap this compaction
-	// (parent == level+1; grandparent == level+2)
-	if c.level+2 < c.s.o.GetNumLevel() {
-		c.gp = c.v.tables[c.level+2].getOverlaps(c.gp, c.s.icmp, amin.ukey(), amax.ukey(), false)
-	}
-
-	c.tables[0], c.tables[1] = t0, t1
-	c.imin, c.imax = imin, imax
-}
-
-// Check whether compaction is trivial.
-func (c *compaction) trivial() bool {
-	return len(c.tables[0]) == 1 && len(c.tables[1]) == 0 && c.gp.size() <= c.maxGPOverlaps
-}
-
-func (c *compaction) baseLevelForKey(ukey []byte) bool {
-	for level, tables := range c.v.tables[c.level+2:] {
-		for c.tPtrs[level] < len(tables) {
-			t := tables[c.tPtrs[level]]
-			if c.s.icmp.uCompare(ukey, t.imax.ukey()) <= 0 {
-				// We've advanced far enough.
-				if c.s.icmp.uCompare(ukey, t.imin.ukey()) >= 0 {
-					// Key falls in this file's range, so definitely not base level.
-					return false
-				}
-				break
-			}
-			c.tPtrs[level]++
-		}
-	}
-	return true
-}
-
-func (c *compaction) shouldStopBefore(ikey iKey) bool {
-	for ; c.gpi < len(c.gp); c.gpi++ {
-		gp := c.gp[c.gpi]
-		if c.s.icmp.Compare(ikey, gp.imax) <= 0 {
-			break
-		}
-		if c.seenKey {
-			c.gpOverlappedBytes += gp.size
-		}
-	}
-	c.seenKey = true
-
-	if c.gpOverlappedBytes > c.maxGPOverlaps {
-		// Too much overlap for current output; start new output.
-		c.gpOverlappedBytes = 0
-		return true
-	}
-	return false
-}
-
-// Creates an iterator.
-func (c *compaction) newIterator() iterator.Iterator {
-	// Creates iterator slice.
-	icap := len(c.tables)
-	if c.level == 0 {
-		// Special case for level-0
-		icap = len(c.tables[0]) + 1
-	}
-	its := make([]iterator.Iterator, 0, icap)
-
-	// Options.
-	ro := &opt.ReadOptions{
-		DontFillCache: true,
-		Strict:        opt.StrictOverride,
-	}
-	strict := c.s.o.GetStrict(opt.StrictCompaction)
-	if strict {
-		ro.Strict |= opt.StrictReader
-	}
-
-	for i, tables := range c.tables {
-		if len(tables) == 0 {
-			continue
-		}
-
-		// Level-0 is not sorted and may overlaps each other.
-		if c.level+i == 0 {
-			for _, t := range tables {
-				its = append(its, c.s.tops.newIterator(t, nil, ro))
-			}
-		} else {
-			it := iterator.NewIndexedIterator(tables.newIndexIterator(c.s.tops, c.s.icmp, nil, ro), strict)
-			its = append(its, it)
-		}
-	}
-
-	return iterator.NewMergedIterator(its, c.s.icmp, strict)
-}
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_compaction.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_compaction.go
@ -0,0 +1,287 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+	"sync/atomic"
+
+	"github.com/syndtr/goleveldb/leveldb/iterator"
+	"github.com/syndtr/goleveldb/leveldb/memdb"
+	"github.com/syndtr/goleveldb/leveldb/opt"
+)
+
+func (s *session) pickMemdbLevel(umin, umax []byte) int {
+	v := s.version()
+	defer v.release()
+	return v.pickMemdbLevel(umin, umax)
+}
+
+func (s *session) flushMemdb(rec *sessionRecord, mdb *memdb.DB, level int) (level_ int, err error) {
+	// Create sorted table.
+	iter := mdb.NewIterator(nil)
+	defer iter.Release()
+	t, n, err := s.tops.createFrom(iter)
+	if err != nil {
+		return level, err
+	}
+
+	// Pick level and add to record.
+	if level < 0 {
+		level = s.pickMemdbLevel(t.imin.ukey(), t.imax.ukey())
+	}
+	rec.addTableFile(level, t)
+
+	s.logf("memdb@flush created L%d@%d N·%d S·%s %q:%q", level, t.file.Num(), n, shortenb(int(t.size)), t.imin, t.imax)
+	return level, nil
+}
+
+// Pick a compaction based on current state; need external synchronization.
+func (s *session) pickCompaction() *compaction {
+	v := s.version()
+
+	var level int
+	var t0 tFiles
+	if v.cScore >= 1 {
+		level = v.cLevel
+		cptr := s.stCompPtrs[level]
+		tables := v.tables[level]
+		for _, t := range tables {
+			if cptr == nil || s.icmp.Compare(t.imax, cptr) > 0 {
+				t0 = append(t0, t)
+				break
+			}
+		}
+		if len(t0) == 0 {
+			t0 = append(t0, tables[0])
+		}
+	} else {
+		if p := atomic.LoadPointer(&v.cSeek); p != nil {
+			ts := (*tSet)(p)
+			level = ts.level
+			t0 = append(t0, ts.table)
+		} else {
+			v.release()
+			return nil
+		}
+	}
+
+	return newCompaction(s, v, level, t0)
+}
+
+// Create compaction from given level and range; need external synchronization.
+func (s *session) getCompactionRange(level int, umin, umax []byte) *compaction {
+	v := s.version()
+
+	t0 := v.tables[level].getOverlaps(nil, s.icmp, umin, umax, level == 0)
+	if len(t0) == 0 {
+		v.release()
+		return nil
+	}
+
+	// Avoid compacting too much in one shot in case the range is large.
+	// But we cannot do this for level-0 since level-0 files can overlap
+	// and we must not pick one file and drop another older file if the
+	// two files overlap.
+	if level > 0 {
+		limit := uint64(v.s.o.GetCompactionSourceLimit(level))
+		total := uint64(0)
+		for i, t := range t0 {
+			total += t.size
+			if total >= limit {
+				s.logf("table@compaction limiting F·%d -> F·%d", len(t0), i+1)
+				t0 = t0[:i+1]
+				break
+			}
+		}
+	}
+
+	return newCompaction(s, v, level, t0)
+}
+
+func newCompaction(s *session, v *version, level int, t0 tFiles) *compaction {
+	c := &compaction{
+		s:             s,
+		v:             v,
+		level:         level,
+		tables:        [2]tFiles{t0, nil},
+		maxGPOverlaps: uint64(s.o.GetCompactionGPOverlaps(level)),
+		tPtrs:         make([]int, s.o.GetNumLevel()),
+	}
+	c.expand()
+	c.save()
+	return c
+}
+
+// compaction represent a compaction state.
+type compaction struct {
+	s *session
+	v *version
+
+	level         int
+	tables        [2]tFiles
+	maxGPOverlaps uint64
+
+	gp                tFiles
+	gpi               int
+	seenKey           bool
+	gpOverlappedBytes uint64
+	imin, imax        iKey
+	tPtrs             []int
+	released          bool
+
+	snapGPI               int
+	snapSeenKey           bool
+	snapGPOverlappedBytes uint64
+	snapTPtrs             []int
+}
+
+func (c *compaction) save() {
+	c.snapGPI = c.gpi
+	c.snapSeenKey = c.seenKey
+	c.snapGPOverlappedBytes = c.gpOverlappedBytes
+	c.snapTPtrs = append(c.snapTPtrs[:0], c.tPtrs...)
+}
+
+func (c *compaction) restore() {
+	c.gpi = c.snapGPI
+	c.seenKey = c.snapSeenKey
+	c.gpOverlappedBytes = c.snapGPOverlappedBytes
+	c.tPtrs = append(c.tPtrs[:0], c.snapTPtrs...)
+}
+
+func (c *compaction) release() {
+	if !c.released {
+		c.released = true
+		c.v.release()
+	}
+}
+
+// Expand compacted tables; need external synchronization.
+func (c *compaction) expand() {
+	limit := uint64(c.s.o.GetCompactionExpandLimit(c.level))
+	vt0, vt1 := c.v.tables[c.level], c.v.tables[c.level+1]
+
+	t0, t1 := c.tables[0], c.tables[1]
+	imin, imax := t0.getRange(c.s.icmp)
+	// We expand t0 here just incase ukey hop across tables.
+	t0 = vt0.getOverlaps(t0, c.s.icmp, imin.ukey(), imax.ukey(), c.level == 0)
+	if len(t0) != len(c.tables[0]) {
+		imin, imax = t0.getRange(c.s.icmp)
+	}
+	t1 = vt1.getOverlaps(t1, c.s.icmp, imin.ukey(), imax.ukey(), false)
+	// Get entire range covered by compaction.
+	amin, amax := append(t0, t1...).getRange(c.s.icmp)
+
+	// See if we can grow the number of inputs in "level" without
+	// changing the number of "level+1" files we pick up.
+	if len(t1) > 0 {
+		exp0 := vt0.getOverlaps(nil, c.s.icmp, amin.ukey(), amax.ukey(), c.level == 0)
+		if len(exp0) > len(t0) && t1.size()+exp0.size() < limit {
+			xmin, xmax := exp0.getRange(c.s.icmp)
+			exp1 := vt1.getOverlaps(nil, c.s.icmp, xmin.ukey(), xmax.ukey(), false)
+			if len(exp1) == len(t1) {
+				c.s.logf("table@compaction expanding L%d+L%d (F·%d S·%s)+(F·%d S·%s) -> (F·%d S·%s)+(F·%d S·%s)",
+					c.level, c.level+1, len(t0), shortenb(int(t0.size())), len(t1), shortenb(int(t1.size())),
+					len(exp0), shortenb(int(exp0.size())), len(exp1), shortenb(int(exp1.size())))
+				imin, imax = xmin, xmax
+				t0, t1 = exp0, exp1
+				amin, amax = append(t0, t1...).getRange(c.s.icmp)
+			}
+		}
+	}
+
+	// Compute the set of grandparent files that overlap this compaction
+	// (parent == level+1; grandparent == level+2)
+	if c.level+2 < c.s.o.GetNumLevel() {
+		c.gp = c.v.tables[c.level+2].getOverlaps(c.gp, c.s.icmp, amin.ukey(), amax.ukey(), false)
+	}
+
+	c.tables[0], c.tables[1] = t0, t1
+	c.imin, c.imax = imin, imax
+}
+
+// Check whether compaction is trivial.
+func (c *compaction) trivial() bool {
+	return len(c.tables[0]) == 1 && len(c.tables[1]) == 0 && c.gp.size() <= c.maxGPOverlaps
+}
+
+func (c *compaction) baseLevelForKey(ukey []byte) bool {
+	for level, tables := range c.v.tables[c.level+2:] {
+		for c.tPtrs[level] < len(tables) {
+			t := tables[c.tPtrs[level]]
+			if c.s.icmp.uCompare(ukey, t.imax.ukey()) <= 0 {
+				// We've advanced far enough.
+				if c.s.icmp.uCompare(ukey, t.imin.ukey()) >= 0 {
+					// Key falls in this file's range, so definitely not base level.
+					return false
+				}
+				break
+			}
+			c.tPtrs[level]++
+		}
+	}
+	return true
+}
+
+func (c *compaction) shouldStopBefore(ikey iKey) bool {
+	for ; c.gpi < len(c.gp); c.gpi++ {
+		gp := c.gp[c.gpi]
+		if c.s.icmp.Compare(ikey, gp.imax) <= 0 {
+			break
+		}
+		if c.seenKey {
+			c.gpOverlappedBytes += gp.size
+		}
+	}
+	c.seenKey = true
+
+	if c.gpOverlappedBytes > c.maxGPOverlaps {
+		// Too much overlap for current output; start new output.
+		c.gpOverlappedBytes = 0
+		return true
+	}
+	return false
+}
+
+// Creates an iterator.
+func (c *compaction) newIterator() iterator.Iterator {
+	// Creates iterator slice.
+	icap := len(c.tables)
+	if c.level == 0 {
+		// Special case for level-0.
+		icap = len(c.tables[0]) + 1
+	}
+	its := make([]iterator.Iterator, 0, icap)
+
+	// Options.
+	ro := &opt.ReadOptions{
+		DontFillCache: true,
+		Strict:        opt.StrictOverride,
+	}
+	strict := c.s.o.GetStrict(opt.StrictCompaction)
+	if strict {
+		ro.Strict |= opt.StrictReader
+	}
+
+	for i, tables := range c.tables {
+		if len(tables) == 0 {
+			continue
+		}
+
+		// Level-0 is not sorted and may overlaps each other.
+		if c.level+i == 0 {
+			for _, t := range tables {
+				its = append(its, c.s.tops.newIterator(t, nil, ro))
+			}
+		} else {
+			it := iterator.NewIndexedIterator(tables.newIndexIterator(c.s.tops, c.s.icmp, nil, ro), strict)
+			its = append(its, it)
+		}
+	}
+
+	return iterator.NewMergedIterator(its, c.s.icmp, strict)
+}
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_record.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_record.go
@ -52,8 +52,6 @@ type dtRecord struct {
 }

 type sessionRecord struct {
-	numLevel int
-
 	hasRec         int
 	comparer       string
 	journalNum     uint64
@ -230,7 +228,7 @@ func (p *sessionRecord) readBytes(field string, r byteReader) []byte {
 	return x
 }

-func (p *sessionRecord) readLevel(field string, r io.ByteReader) int {
+func (p *sessionRecord) readLevel(field string, r io.ByteReader, numLevel int) int {
 	if p.err != nil {
 		return 0
 	}
@ -238,14 +236,14 @@ func (p *sessionRecord) readLevel(field string, r io.ByteReader) int {
 	if p.err != nil {
 		return 0
 	}
-	if x >= uint64(p.numLevel) {
+	if x >= uint64(numLevel) {
 		p.err = errors.NewErrCorrupted(nil, &ErrManifestCorrupted{field, "invalid level number"})
 		return 0
 	}
 	return int(x)
 }

-func (p *sessionRecord) decode(r io.Reader) error {
+func (p *sessionRecord) decode(r io.Reader, numLevel int) error {
 	br, ok := r.(byteReader)
 	if !ok {
 		br = bufio.NewReader(r)
@ -286,13 +284,13 @@ func (p *sessionRecord) decode(r io.Reader) error {
 				p.setSeqNum(x)
 			}
 		case recCompPtr:
-			level := p.readLevel("comp-ptr.level", br)
+			level := p.readLevel("comp-ptr.level", br, numLevel)
 			ikey := p.readBytes("comp-ptr.ikey", br)
 			if p.err == nil {
 				p.addCompPtr(level, iKey(ikey))
 			}
 		case recAddTable:
-			level := p.readLevel("add-table.level", br)
+			level := p.readLevel("add-table.level", br, numLevel)
 			num := p.readUvarint("add-table.num", br)
 			size := p.readUvarint("add-table.size", br)
 			imin := p.readBytes("add-table.imin", br)
@ -301,7 +299,7 @@ func (p *sessionRecord) decode(r io.Reader) error {
 				p.addTable(level, num, size, imin, imax)
 			}
 		case recDelTable:
-			level := p.readLevel("del-table.level", br)
+			level := p.readLevel("del-table.level", br, numLevel)
 			num := p.readUvarint("del-table.num", br)
 			if p.err == nil {
 				p.delTable(level, num)
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_record_test.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_record_test.go
@ -19,8 +19,8 @@ func decodeEncode(v *sessionRecord) (res bool, err error) {
 	if err != nil {
 		return
 	}
-	v2 := &sessionRecord{numLevel: opt.DefaultNumLevel}
-	err = v.decode(b)
+	v2 := &sessionRecord{}
+	err = v.decode(b, opt.DefaultNumLevel)
 	if err != nil {
 		return
 	}
@ -34,7 +34,7 @@ func decodeEncode(v *sessionRecord) (res bool, err error) {

 func TestSessionRecord_EncodeDecode(t *testing.T) {
 	big := uint64(1) << 50
-	v := &sessionRecord{numLevel: opt.DefaultNumLevel}
+	v := &sessionRecord{}
 	i := uint64(0)
 	test := func() {
 		res, err := decodeEncode(v)
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_util.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_util.go
@ -182,7 +182,7 @@ func (s *session) newManifest(rec *sessionRecord, v *version) (err error) {
 		defer v.release()
 	}
 	if rec == nil {
-		rec = &sessionRecord{numLevel: s.o.GetNumLevel()}
+		rec = &sessionRecord{}
 	}
 	s.fillRecord(rec, true)
 	v.fillRecord(rec)
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage_test.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage_test.go
@ -42,6 +42,8 @@ type tsOp uint
 const (
 	tsOpOpen tsOp = iota
 	tsOpCreate
+	tsOpReplace
+	tsOpRemove
 	tsOpRead
 	tsOpReadAt
 	tsOpWrite
@ -241,6 +243,10 @@ func (tf tsFile) Replace(newfile storage.File) (err error) {
 	if err != nil {
 		return
 	}
+	if tf.shouldErr(tsOpReplace) {
+		err = errors.New("leveldb.testStorage: emulated create error")
+		return
+	}
 	err = tf.File.Replace(newfile.(tsFile).File)
 	if err != nil {
 		ts.t.Errorf("E: cannot replace file, num=%d type=%v: %v", tf.Num(), tf.Type(), err)
@ -258,6 +264,10 @@ func (tf tsFile) Remove() (err error) {
 	if err != nil {
 		return
 	}
+	if tf.shouldErr(tsOpRemove) {
+		err = errors.New("leveldb.testStorage: emulated create error")
+		return
+	}
 	err = tf.File.Remove()
 	if err != nil {
 		ts.t.Errorf("E: cannot remove file, num=%d type=%v: %v", tf.Num(), tf.Type(), err)
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table.go
@ -441,22 +441,26 @@ func newTableOps(s *session) *tOps {
 	var (
 		cacher cache.Cacher
 		bcache *cache.Cache
+		bpool  *util.BufferPool
 	)
 	if s.o.GetOpenFilesCacheCapacity() > 0 {
 		cacher = cache.NewLRU(s.o.GetOpenFilesCacheCapacity())
 	}
-	if !s.o.DisableBlockCache {
+	if !s.o.GetDisableBlockCache() {
 		var bcacher cache.Cacher
 		if s.o.GetBlockCacheCapacity() > 0 {
 			bcacher = cache.NewLRU(s.o.GetBlockCacheCapacity())
 		}
 		bcache = cache.NewCache(bcacher)
 	}
+	if !s.o.GetDisableBufferPool() {
+		bpool = util.NewBufferPool(s.o.GetBlockSize() + 5)
+	}
 	return &tOps{
 		s:      s,
 		cache:  cache.NewCache(cacher),
 		bcache: bcache,
-		bpool:  util.NewBufferPool(s.o.GetBlockSize() + 5),
+		bpool:  bpool,
 	}
 }

--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/reader.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/reader.go
@ -14,7 +14,7 @@ import (
 	"strings"
 	"sync"

-	"github.com/syndtr/gosnappy/snappy"
+	"github.com/google/go-snappy/snappy"

 	"github.com/syndtr/goleveldb/leveldb/cache"
 	"github.com/syndtr/goleveldb/leveldb/comparer"
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/writer.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/writer.go
@ -12,7 +12,7 @@ import (
 	"fmt"
 	"io"

-	"github.com/syndtr/gosnappy/snappy"
+	"github.com/google/go-snappy/snappy"

 	"github.com/syndtr/goleveldb/leveldb/comparer"
 	"github.com/syndtr/goleveldb/leveldb/filter"
--- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/version.go
+++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/version.go
@ -300,7 +300,7 @@ func (v *version) offsetOf(ikey iKey) (n uint64, err error) {
 	return
 }

-func (v *version) pickLevel(umin, umax []byte) (level int) {
+func (v *version) pickMemdbLevel(umin, umax []byte) (level int) {
 	if !v.tables[0].overlaps(v.s.icmp, umin, umax, true) {
 		var overlaps tFiles
 		maxLevel := v.s.o.GetMaxMemCompationLevel()