go-ethereum/vendor/github.com/syndtr/goleveldb/leveldb/db.go

1176 lines
28 KiB
Go
Raw Normal View History

2015-02-16 13:28:33 +00:00
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package leveldb
import (
"container/list"
2015-02-16 13:28:33 +00:00
"fmt"
"io"
"os"
"runtime"
"strings"
"sync"
"sync/atomic"
2015-02-16 13:28:33 +00:00
"time"
"github.com/syndtr/goleveldb/leveldb/errors"
2015-02-16 13:28:33 +00:00
"github.com/syndtr/goleveldb/leveldb/iterator"
"github.com/syndtr/goleveldb/leveldb/journal"
"github.com/syndtr/goleveldb/leveldb/memdb"
"github.com/syndtr/goleveldb/leveldb/opt"
"github.com/syndtr/goleveldb/leveldb/storage"
"github.com/syndtr/goleveldb/leveldb/table"
"github.com/syndtr/goleveldb/leveldb/util"
)
// DB is a LevelDB database.
type DB struct {
// Need 64-bit alignment.
seq uint64
// Stats. Need 64-bit alignment.
cWriteDelay int64 // The cumulative duration of write delays
cWriteDelayN int32 // The cumulative number of write delays
inWritePaused int32 // The indicator whether write operation is paused by compaction
aliveSnaps, aliveIters int32
// Session.
2015-02-16 13:28:33 +00:00
s *session
// MemDB.
memMu sync.RWMutex
memPool chan *memdb.DB
mem, frozenMem *memDB
journal *journal.Writer
journalWriter storage.Writer
journalFd storage.FileDesc
frozenJournalFd storage.FileDesc
frozenSeq uint64
2015-02-16 13:28:33 +00:00
// Snapshot.
2015-02-16 13:28:33 +00:00
snapsMu sync.Mutex
snapsList *list.List
2015-02-16 13:28:33 +00:00
// Write.
batchPool sync.Pool
writeMergeC chan writeMerge
2015-02-16 13:28:33 +00:00
writeMergedC chan bool
writeLockC chan struct{}
writeAckC chan error
writeDelay time.Duration
writeDelayN int
tr *Transaction
2015-02-16 13:28:33 +00:00
// Compaction.
compCommitLk sync.Mutex
tcompCmdC chan cCmd
tcompPauseC chan chan<- struct{}
mcompCmdC chan cCmd
compErrC chan error
compPerErrC chan error
compErrSetC chan error
compWriteLocking bool
compStats cStats
memdbMaxLevel int // For testing.
// Close.
2015-02-16 13:28:33 +00:00
closeW sync.WaitGroup
closeC chan struct{}
closed uint32
closer io.Closer
}
func openDB(s *session) (*DB, error) {
s.log("db@open opening")
start := time.Now()
db := &DB{
s: s,
// Initial sequence
seq: s.stSeqNum,
// MemDB
memPool: make(chan *memdb.DB, 1),
// Snapshot
snapsList: list.New(),
2015-02-16 13:28:33 +00:00
// Write
batchPool: sync.Pool{New: newBatch},
writeMergeC: make(chan writeMerge),
2015-02-16 13:28:33 +00:00
writeMergedC: make(chan bool),
writeLockC: make(chan struct{}, 1),
writeAckC: make(chan error),
// Compaction
tcompCmdC: make(chan cCmd),
tcompPauseC: make(chan chan<- struct{}),
mcompCmdC: make(chan cCmd),
compErrC: make(chan error),
compPerErrC: make(chan error),
compErrSetC: make(chan error),
2015-02-16 13:28:33 +00:00
// Close
closeC: make(chan struct{}),
}
// Read-only mode.
readOnly := s.o.GetReadOnly()
2015-02-16 13:28:33 +00:00
if readOnly {
// Recover journals (read-only mode).
if err := db.recoverJournalRO(); err != nil {
return nil, err
2015-02-16 13:28:33 +00:00
}
} else {
// Recover journals.
if err := db.recoverJournal(); err != nil {
return nil, err
}
// Remove any obsolete files.
if err := db.checkAndCleanFiles(); err != nil {
// Close journal.
if db.journal != nil {
db.journal.Close()
db.journalWriter.Close()
}
return nil, err
}
2015-02-16 13:28:33 +00:00
}
// Doesn't need to be included in the wait group.
2015-02-16 13:28:33 +00:00
go db.compactionError()
go db.mpoolDrain()
2015-02-16 13:28:33 +00:00
if readOnly {
db.SetReadOnly()
} else {
db.closeW.Add(2)
go db.tCompaction()
go db.mCompaction()
// go db.jWriter()
}
2015-02-16 13:28:33 +00:00
s.logf("db@open done T·%v", time.Since(start))
runtime.SetFinalizer(db, (*DB).Close)
return db, nil
}
// Open opens or creates a DB for the given storage.
// The DB will be created if not exist, unless ErrorIfMissing is true.
// Also, if ErrorIfExist is true and the DB exist Open will returns
// os.ErrExist error.
//
// Open will return an error with type of ErrCorrupted if corruption
// detected in the DB. Use errors.IsCorrupted to test whether an error is
// due to corruption. Corrupted DB can be recovered with Recover function.
2015-02-16 13:28:33 +00:00
//
// The returned DB instance is safe for concurrent use.
2015-02-16 13:28:33 +00:00
// The DB must be closed after use, by calling Close method.
func Open(stor storage.Storage, o *opt.Options) (db *DB, err error) {
s, err := newSession(stor, o)
2015-02-16 13:28:33 +00:00
if err != nil {
return
}
defer func() {
if err != nil {
s.close()
s.release()
}
}()
err = s.recover()
if err != nil {
if !os.IsNotExist(err) || s.o.GetErrorIfMissing() {
return
}
err = s.create()
if err != nil {
return
}
} else if s.o.GetErrorIfExist() {
err = os.ErrExist
return
}
return openDB(s)
}
// OpenFile opens or creates a DB for the given path.
// The DB will be created if not exist, unless ErrorIfMissing is true.
// Also, if ErrorIfExist is true and the DB exist OpenFile will returns
// os.ErrExist error.
//
// OpenFile uses standard file-system backed storage implementation as
// described in the leveldb/storage package.
2015-02-16 13:28:33 +00:00
//
// OpenFile will return an error with type of ErrCorrupted if corruption
// detected in the DB. Use errors.IsCorrupted to test whether an error is
// due to corruption. Corrupted DB can be recovered with Recover function.
2015-02-16 13:28:33 +00:00
//
// The returned DB instance is safe for concurrent use.
2015-02-16 13:28:33 +00:00
// The DB must be closed after use, by calling Close method.
func OpenFile(path string, o *opt.Options) (db *DB, err error) {
stor, err := storage.OpenFile(path, o.GetReadOnly())
2015-02-16 13:28:33 +00:00
if err != nil {
return
}
db, err = Open(stor, o)
if err != nil {
stor.Close()
} else {
db.closer = stor
}
return
}
// Recover recovers and opens a DB with missing or corrupted manifest files
// for the given storage. It will ignore any manifest files, valid or not.
// The DB must already exist or it will returns an error.
// Also, Recover will ignore ErrorIfMissing and ErrorIfExist options.
//
// The returned DB instance is safe for concurrent use.
2015-02-16 13:28:33 +00:00
// The DB must be closed after use, by calling Close method.
func Recover(stor storage.Storage, o *opt.Options) (db *DB, err error) {
s, err := newSession(stor, o)
2015-02-16 13:28:33 +00:00
if err != nil {
return
}
defer func() {
if err != nil {
s.close()
s.release()
}
}()
err = recoverTable(s, o)
if err != nil {
return
}
return openDB(s)
}
// RecoverFile recovers and opens a DB with missing or corrupted manifest files
// for the given path. It will ignore any manifest files, valid or not.
// The DB must already exist or it will returns an error.
// Also, Recover will ignore ErrorIfMissing and ErrorIfExist options.
//
// RecoverFile uses standard file-system backed storage implementation as described
2015-02-16 13:28:33 +00:00
// in the leveldb/storage package.
//
// The returned DB instance is safe for concurrent use.
2015-02-16 13:28:33 +00:00
// The DB must be closed after use, by calling Close method.
func RecoverFile(path string, o *opt.Options) (db *DB, err error) {
stor, err := storage.OpenFile(path, false)
2015-02-16 13:28:33 +00:00
if err != nil {
return
}
db, err = Recover(stor, o)
if err != nil {
stor.Close()
} else {
db.closer = stor
}
return
}
func recoverTable(s *session, o *opt.Options) error {
o = dupOptions(o)
// Mask StrictReader, lets StrictRecovery doing its job.
o.Strict &= ^opt.StrictReader
// Get all tables and sort it by file number.
fds, err := s.stor.List(storage.TypeTable)
2015-02-16 13:28:33 +00:00
if err != nil {
return err
}
sortFds(fds)
2015-02-16 13:28:33 +00:00
var (
maxSeq uint64
recoveredKey, goodKey, corruptedKey, corruptedBlock, droppedTable int
// We will drop corrupted table.
strict = o.GetStrict(opt.StrictRecovery)
noSync = o.GetNoSync()
rec = &sessionRecord{}
bpool = util.NewBufferPool(o.GetBlockSize() + 5)
)
buildTable := func(iter iterator.Iterator) (tmpFd storage.FileDesc, size int64, err error) {
tmpFd = s.newTemp()
writer, err := s.stor.Create(tmpFd)
2015-02-16 13:28:33 +00:00
if err != nil {
return
}
defer func() {
writer.Close()
if err != nil {
s.stor.Remove(tmpFd)
tmpFd = storage.FileDesc{}
2015-02-16 13:28:33 +00:00
}
}()
// Copy entries.
2015-02-16 13:28:33 +00:00
tw := table.NewWriter(writer, o)
for iter.Next() {
key := iter.Key()
if validInternalKey(key) {
2015-02-16 13:28:33 +00:00
err = tw.Append(key, iter.Value())
if err != nil {
return
}
}
}
err = iter.Error()
if err != nil && !errors.IsCorrupted(err) {
2015-02-16 13:28:33 +00:00
return
}
err = tw.Close()
if err != nil {
return
}
if !noSync {
err = writer.Sync()
if err != nil {
return
}
2015-02-16 13:28:33 +00:00
}
size = int64(tw.BytesLen())
return
}
recoverTable := func(fd storage.FileDesc) error {
s.logf("table@recovery recovering @%d", fd.Num)
reader, err := s.stor.Open(fd)
2015-02-16 13:28:33 +00:00
if err != nil {
return err
}
var closed bool
defer func() {
if !closed {
reader.Close()
}
}()
2015-02-16 13:28:33 +00:00
// Get file size.
size, err := reader.Seek(0, 2)
if err != nil {
return err
}
var (
tSeq uint64
tgoodKey, tcorruptedKey, tcorruptedBlock int
imin, imax []byte
)
tr, err := table.NewReader(reader, size, fd, nil, bpool, o)
if err != nil {
return err
}
2015-02-16 13:28:33 +00:00
iter := tr.NewIterator(nil, nil)
if itererr, ok := iter.(iterator.ErrorCallbackSetter); ok {
itererr.SetErrorCallback(func(err error) {
if errors.IsCorrupted(err) {
s.logf("table@recovery block corruption @%d %q", fd.Num, err)
tcorruptedBlock++
}
})
}
2015-02-16 13:28:33 +00:00
// Scan the table.
for iter.Next() {
key := iter.Key()
_, seq, _, kerr := parseInternalKey(key)
if kerr != nil {
tcorruptedKey++
2015-02-16 13:28:33 +00:00
continue
}
tgoodKey++
2015-02-16 13:28:33 +00:00
if seq > tSeq {
tSeq = seq
}
if imin == nil {
imin = append([]byte{}, key...)
2015-02-16 13:28:33 +00:00
}
imax = append(imax[:0], key...)
2015-02-16 13:28:33 +00:00
}
if err := iter.Error(); err != nil && !errors.IsCorrupted(err) {
2015-02-16 13:28:33 +00:00
iter.Release()
return err
}
iter.Release()
goodKey += tgoodKey
corruptedKey += tcorruptedKey
corruptedBlock += tcorruptedBlock
if strict && (tcorruptedKey > 0 || tcorruptedBlock > 0) {
droppedTable++
s.logf("table@recovery dropped @%d Gk·%d Ck·%d Cb·%d S·%d Q·%d", fd.Num, tgoodKey, tcorruptedKey, tcorruptedBlock, size, tSeq)
return nil
}
if tgoodKey > 0 {
if tcorruptedKey > 0 || tcorruptedBlock > 0 {
2015-02-16 13:28:33 +00:00
// Rebuild the table.
s.logf("table@recovery rebuilding @%d", fd.Num)
2015-02-16 13:28:33 +00:00
iter := tr.NewIterator(nil, nil)
tmpFd, newSize, err := buildTable(iter)
2015-02-16 13:28:33 +00:00
iter.Release()
if err != nil {
return err
}
closed = true
2015-02-16 13:28:33 +00:00
reader.Close()
if err := s.stor.Rename(tmpFd, fd); err != nil {
2015-02-16 13:28:33 +00:00
return err
}
size = newSize
}
if tSeq > maxSeq {
maxSeq = tSeq
2015-02-16 13:28:33 +00:00
}
recoveredKey += tgoodKey
2015-02-16 13:28:33 +00:00
// Add table to level 0.
rec.addTable(0, fd.Num, size, imin, imax)
s.logf("table@recovery recovered @%d Gk·%d Ck·%d Cb·%d S·%d Q·%d", fd.Num, tgoodKey, tcorruptedKey, tcorruptedBlock, size, tSeq)
2015-02-16 13:28:33 +00:00
} else {
droppedTable++
s.logf("table@recovery unrecoverable @%d Ck·%d Cb·%d S·%d", fd.Num, tcorruptedKey, tcorruptedBlock, size)
2015-02-16 13:28:33 +00:00
}
return nil
}
2015-02-16 13:28:33 +00:00
// Recover all tables.
if len(fds) > 0 {
s.logf("table@recovery F·%d", len(fds))
// Mark file number as used.
s.markFileNum(fds[len(fds)-1].Num)
for _, fd := range fds {
if err := recoverTable(fd); err != nil {
2015-02-16 13:28:33 +00:00
return err
}
}
s.logf("table@recovery recovered F·%d N·%d Gk·%d Ck·%d Q·%d", len(fds), recoveredKey, goodKey, corruptedKey, maxSeq)
2015-02-16 13:28:33 +00:00
}
2015-02-16 13:28:33 +00:00
// Set sequence number.
rec.setSeqNum(maxSeq)
2015-02-16 13:28:33 +00:00
// Create new manifest.
if err := s.create(); err != nil {
return err
}
2015-02-16 13:28:33 +00:00
// Commit.
return s.commit(rec)
}
func (db *DB) recoverJournal() error {
// Get all journals and sort it by file number.
rawFds, err := db.s.stor.List(storage.TypeJournal)
2015-02-16 13:28:33 +00:00
if err != nil {
return err
}
sortFds(rawFds)
// Journals that will be recovered.
var fds []storage.FileDesc
for _, fd := range rawFds {
if fd.Num >= db.s.stJournalNum || fd.Num == db.s.stPrevJournalNum {
fds = append(fds, fd)
2015-02-16 13:28:33 +00:00
}
}
var (
ofd storage.FileDesc // Obsolete file.
rec = &sessionRecord{}
)
// Recover journals.
if len(fds) > 0 {
db.logf("journal@recovery F·%d", len(fds))
// Mark file number as used.
db.s.markFileNum(fds[len(fds)-1].Num)
var (
// Options.
strict = db.s.o.GetStrict(opt.StrictJournal)
checksum = db.s.o.GetStrict(opt.StrictJournalChecksum)
writeBuffer = db.s.o.GetWriteBuffer()
jr *journal.Reader
mdb = memdb.New(db.s.icmp, writeBuffer)
buf = &util.Buffer{}
batchSeq uint64
batchLen int
)
for _, fd := range fds {
db.logf("journal@recovery recovering @%d", fd.Num)
fr, err := db.s.stor.Open(fd)
if err != nil {
2015-02-16 13:28:33 +00:00
return err
}
// Create or reset journal reader instance.
if jr == nil {
jr = journal.NewReader(fr, dropper{db.s, fd}, strict, checksum)
} else {
jr.Reset(fr, dropper{db.s, fd}, strict, checksum)
2015-02-16 13:28:33 +00:00
}
// Flush memdb and remove obsolete journal file.
if !ofd.Zero() {
if mdb.Len() > 0 {
if _, err := db.s.flushMemdb(rec, mdb, 0); err != nil {
fr.Close()
return err
}
2015-02-16 13:28:33 +00:00
}
rec.setJournalNum(fd.Num)
rec.setSeqNum(db.seq)
if err := db.s.commit(rec); err != nil {
fr.Close()
return err
}
rec.resetAddedTables()
db.s.stor.Remove(ofd)
ofd = storage.FileDesc{}
2015-02-16 13:28:33 +00:00
}
// Replay journal to memdb.
mdb.Reset()
for {
r, err := jr.Next()
if err != nil {
if err == io.EOF {
break
}
fr.Close()
return errors.SetFd(err, fd)
2015-02-16 13:28:33 +00:00
}
buf.Reset()
if _, err := buf.ReadFrom(r); err != nil {
if err == io.ErrUnexpectedEOF {
// This is error returned due to corruption, with strict == false.
continue
}
fr.Close()
return errors.SetFd(err, fd)
}
batchSeq, batchLen, err = decodeBatchToMem(buf.Bytes(), db.seq, mdb)
if err != nil {
if !strict && errors.IsCorrupted(err) {
db.s.logf("journal error: %v (skipped)", err)
// We won't apply sequence number as it might be corrupted.
continue
}
fr.Close()
return errors.SetFd(err, fd)
}
// Save sequence number.
db.seq = batchSeq + uint64(batchLen)
// Flush it if large enough.
if mdb.Size() >= writeBuffer {
if _, err := db.s.flushMemdb(rec, mdb, 0); err != nil {
fr.Close()
return err
}
mdb.Reset()
}
2015-02-16 13:28:33 +00:00
}
fr.Close()
ofd = fd
2015-02-16 13:28:33 +00:00
}
// Flush the last memdb.
if mdb.Len() > 0 {
if _, err := db.s.flushMemdb(rec, mdb, 0); err != nil {
2015-02-16 13:28:33 +00:00
return err
}
}
}
2015-02-16 13:28:33 +00:00
// Create a new journal.
if _, err := db.newMem(0); err != nil {
2015-02-16 13:28:33 +00:00
return err
}
2015-02-16 13:28:33 +00:00
// Commit.
rec.setJournalNum(db.journalFd.Num)
rec.setSeqNum(db.seq)
if err := db.s.commit(rec); err != nil {
// Close journal on error.
if db.journal != nil {
db.journal.Close()
db.journalWriter.Close()
2015-02-16 13:28:33 +00:00
}
return err
}
// Remove the last obsolete journal file.
if !ofd.Zero() {
db.s.stor.Remove(ofd)
}
return nil
}
func (db *DB) recoverJournalRO() error {
// Get all journals and sort it by file number.
rawFds, err := db.s.stor.List(storage.TypeJournal)
if err != nil {
return err
}
sortFds(rawFds)
// Journals that will be recovered.
var fds []storage.FileDesc
for _, fd := range rawFds {
if fd.Num >= db.s.stJournalNum || fd.Num == db.s.stPrevJournalNum {
fds = append(fds, fd)
}
}
var (
// Options.
strict = db.s.o.GetStrict(opt.StrictJournal)
checksum = db.s.o.GetStrict(opt.StrictJournalChecksum)
writeBuffer = db.s.o.GetWriteBuffer()
mdb = memdb.New(db.s.icmp, writeBuffer)
)
// Recover journals.
if len(fds) > 0 {
db.logf("journal@recovery RO·Mode F·%d", len(fds))
var (
jr *journal.Reader
buf = &util.Buffer{}
batchSeq uint64
batchLen int
)
for _, fd := range fds {
db.logf("journal@recovery recovering @%d", fd.Num)
fr, err := db.s.stor.Open(fd)
if err != nil {
return err
}
// Create or reset journal reader instance.
if jr == nil {
jr = journal.NewReader(fr, dropper{db.s, fd}, strict, checksum)
} else {
jr.Reset(fr, dropper{db.s, fd}, strict, checksum)
}
// Replay journal to memdb.
for {
r, err := jr.Next()
if err != nil {
if err == io.EOF {
break
}
fr.Close()
return errors.SetFd(err, fd)
}
buf.Reset()
if _, err := buf.ReadFrom(r); err != nil {
if err == io.ErrUnexpectedEOF {
// This is error returned due to corruption, with strict == false.
continue
}
fr.Close()
return errors.SetFd(err, fd)
}
batchSeq, batchLen, err = decodeBatchToMem(buf.Bytes(), db.seq, mdb)
if err != nil {
if !strict && errors.IsCorrupted(err) {
db.s.logf("journal error: %v (skipped)", err)
// We won't apply sequence number as it might be corrupted.
continue
}
fr.Close()
return errors.SetFd(err, fd)
}
// Save sequence number.
db.seq = batchSeq + uint64(batchLen)
}
fr.Close()
}
2015-02-16 13:28:33 +00:00
}
// Set memDB.
db.mem = &memDB{db: db, DB: mdb, ref: 1}
2015-02-16 13:28:33 +00:00
return nil
}
func memGet(mdb *memdb.DB, ikey internalKey, icmp *iComparer) (ok bool, mv []byte, err error) {
mk, mv, err := mdb.Find(ikey)
if err == nil {
ukey, _, kt, kerr := parseInternalKey(mk)
if kerr != nil {
// Shouldn't have had happen.
panic(kerr)
}
if icmp.uCompare(ukey, ikey.ukey()) == 0 {
if kt == keyTypeDel {
return true, nil, ErrNotFound
}
return true, mv, nil
}
} else if err != ErrNotFound {
return true, nil, err
}
return
}
func (db *DB) get(auxm *memdb.DB, auxt tFiles, key []byte, seq uint64, ro *opt.ReadOptions) (value []byte, err error) {
ikey := makeInternalKey(nil, key, seq, keyTypeSeek)
if auxm != nil {
if ok, mv, me := memGet(auxm, ikey, db.s.icmp); ok {
return append([]byte{}, mv...), me
}
}
2015-02-16 13:28:33 +00:00
em, fm := db.getMems()
for _, m := range [...]*memDB{em, fm} {
2015-02-16 13:28:33 +00:00
if m == nil {
continue
}
defer m.decref()
if ok, mv, me := memGet(m.DB, ikey, db.s.icmp); ok {
return append([]byte{}, mv...), me
2015-02-16 13:28:33 +00:00
}
}
v := db.s.version()
value, cSched, err := v.get(auxt, ikey, ro, false)
2015-02-16 13:28:33 +00:00
v.release()
if cSched {
// Trigger table compaction.
db.compTrigger(db.tcompCmdC)
}
return
}
func nilIfNotFound(err error) error {
if err == ErrNotFound {
return nil
}
return err
}
func (db *DB) has(auxm *memdb.DB, auxt tFiles, key []byte, seq uint64, ro *opt.ReadOptions) (ret bool, err error) {
ikey := makeInternalKey(nil, key, seq, keyTypeSeek)
if auxm != nil {
if ok, _, me := memGet(auxm, ikey, db.s.icmp); ok {
return me == nil, nilIfNotFound(me)
}
}
em, fm := db.getMems()
for _, m := range [...]*memDB{em, fm} {
if m == nil {
continue
}
defer m.decref()
if ok, _, me := memGet(m.DB, ikey, db.s.icmp); ok {
return me == nil, nilIfNotFound(me)
}
}
v := db.s.version()
_, cSched, err := v.get(auxt, ikey, ro, true)
v.release()
if cSched {
// Trigger table compaction.
db.compTrigger(db.tcompCmdC)
}
if err == nil {
ret = true
} else if err == ErrNotFound {
err = nil
2015-02-16 13:28:33 +00:00
}
return
}
// Get gets the value for the given key. It returns ErrNotFound if the
// DB does not contains the key.
2015-02-16 13:28:33 +00:00
//
// The returned slice is its own copy, it is safe to modify the contents
// of the returned slice.
// It is safe to modify the contents of the argument after Get returns.
func (db *DB) Get(key []byte, ro *opt.ReadOptions) (value []byte, err error) {
err = db.ok()
2015-02-16 13:28:33 +00:00
if err != nil {
return
}
se := db.acquireSnapshot()
defer db.releaseSnapshot(se)
return db.get(nil, nil, key, se.seq, ro)
}
// Has returns true if the DB does contains the given key.
//
// It is safe to modify the contents of the argument after Has returns.
func (db *DB) Has(key []byte, ro *opt.ReadOptions) (ret bool, err error) {
err = db.ok()
if err != nil {
return
}
se := db.acquireSnapshot()
defer db.releaseSnapshot(se)
return db.has(nil, nil, key, se.seq, ro)
2015-02-16 13:28:33 +00:00
}
// NewIterator returns an iterator for the latest snapshot of the
// underlying DB.
// The returned iterator is not safe for concurrent use, but it is safe to use
2015-02-16 13:28:33 +00:00
// multiple iterators concurrently, with each in a dedicated goroutine.
// It is also safe to use an iterator concurrently with modifying its
// underlying DB. The resultant key/value pairs are guaranteed to be
// consistent.
//
// Slice allows slicing the iterator to only contains keys in the given
// range. A nil Range.Start is treated as a key before all keys in the
// DB. And a nil Range.Limit is treated as a key after all keys in
// the DB.
//
// The iterator must be released after use, by calling Release method.
//
// Also read Iterator documentation of the leveldb/iterator package.
func (db *DB) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
if err := db.ok(); err != nil {
2015-02-16 13:28:33 +00:00
return iterator.NewEmptyIterator(err)
}
se := db.acquireSnapshot()
defer db.releaseSnapshot(se)
// Iterator holds 'version' lock, 'version' is immutable so snapshot
// can be released after iterator created.
return db.newIterator(nil, nil, se.seq, slice, ro)
2015-02-16 13:28:33 +00:00
}
// GetSnapshot returns a latest snapshot of the underlying DB. A snapshot
// is a frozen snapshot of a DB state at a particular point in time. The
// content of snapshot are guaranteed to be consistent.
//
// The snapshot must be released after use, by calling Release method.
func (db *DB) GetSnapshot() (*Snapshot, error) {
if err := db.ok(); err != nil {
2015-02-16 13:28:33 +00:00
return nil, err
}
return db.newSnapshot(), nil
2015-02-16 13:28:33 +00:00
}
// GetProperty returns value of the given property name.
//
// Property names:
// leveldb.num-files-at-level{n}
// Returns the number of files at level 'n'.
2015-02-16 13:28:33 +00:00
// leveldb.stats
// Returns statistics of the underlying DB.
// leveldb.iostats
// Returns statistics of effective disk read and write.
// leveldb.writedelay
// Returns cumulative write delay caused by compaction.
2015-02-16 13:28:33 +00:00
// leveldb.sstables
// Returns sstables list for each level.
// leveldb.blockpool
// Returns block pool stats.
// leveldb.cachedblock
// Returns size of cached block.
// leveldb.openedtables
// Returns number of opened tables.
// leveldb.alivesnaps
// Returns number of alive snapshots.
// leveldb.aliveiters
// Returns number of alive iterators.
func (db *DB) GetProperty(name string) (value string, err error) {
err = db.ok()
2015-02-16 13:28:33 +00:00
if err != nil {
return
}
const prefix = "leveldb."
if !strings.HasPrefix(name, prefix) {
return "", ErrNotFound
2015-02-16 13:28:33 +00:00
}
p := name[len(prefix):]
v := db.s.version()
2015-02-16 13:28:33 +00:00
defer v.release()
numFilesPrefix := "num-files-at-level"
2015-02-16 13:28:33 +00:00
switch {
case strings.HasPrefix(p, numFilesPrefix):
2015-02-16 13:28:33 +00:00
var level uint
var rest string
n, _ := fmt.Sscanf(p[len(numFilesPrefix):], "%d%s", &level, &rest)
if n != 1 {
err = ErrNotFound
2015-02-16 13:28:33 +00:00
} else {
value = fmt.Sprint(v.tLen(int(level)))
}
case p == "stats":
value = "Compactions\n" +
" Level | Tables | Size(MB) | Time(sec) | Read(MB) | Write(MB)\n" +
"-------+------------+---------------+---------------+---------------+---------------\n"
for level, tables := range v.levels {
duration, read, write := db.compStats.getStat(level)
if len(tables) == 0 && duration == 0 {
2015-02-16 13:28:33 +00:00
continue
}
value += fmt.Sprintf(" %3d | %10d | %13.5f | %13.5f | %13.5f | %13.5f\n",
level, len(tables), float64(tables.size())/1048576.0, duration.Seconds(),
2015-02-16 13:28:33 +00:00
float64(read)/1048576.0, float64(write)/1048576.0)
}
case p == "iostats":
value = fmt.Sprintf("Read(MB):%.5f Write(MB):%.5f",
float64(db.s.stor.reads())/1048576.0,
float64(db.s.stor.writes())/1048576.0)
case p == "writedelay":
writeDelayN, writeDelay := atomic.LoadInt32(&db.cWriteDelayN), time.Duration(atomic.LoadInt64(&db.cWriteDelay))
paused := atomic.LoadInt32(&db.inWritePaused) == 1
value = fmt.Sprintf("DelayN:%d Delay:%s Paused:%t", writeDelayN, writeDelay, paused)
2015-02-16 13:28:33 +00:00
case p == "sstables":
for level, tables := range v.levels {
2015-02-16 13:28:33 +00:00
value += fmt.Sprintf("--- level %d ---\n", level)
for _, t := range tables {
value += fmt.Sprintf("%d:%d[%q .. %q]\n", t.fd.Num, t.size, t.imin, t.imax)
2015-02-16 13:28:33 +00:00
}
}
case p == "blockpool":
value = fmt.Sprintf("%v", db.s.tops.bpool)
case p == "cachedblock":
if db.s.tops.bcache != nil {
value = fmt.Sprintf("%d", db.s.tops.bcache.Size())
} else {
value = "<nil>"
}
case p == "openedtables":
value = fmt.Sprintf("%d", db.s.tops.cache.Size())
case p == "alivesnaps":
value = fmt.Sprintf("%d", atomic.LoadInt32(&db.aliveSnaps))
case p == "aliveiters":
value = fmt.Sprintf("%d", atomic.LoadInt32(&db.aliveIters))
2015-02-16 13:28:33 +00:00
default:
err = ErrNotFound
2015-02-16 13:28:33 +00:00
}
return
}
// DBStats is database statistics.
type DBStats struct {
WriteDelayCount int32
WriteDelayDuration time.Duration
WritePaused bool
AliveSnapshots int32
AliveIterators int32
IOWrite uint64
IORead uint64
BlockCacheSize int
OpenedTablesCount int
LevelSizes []int64
LevelTablesCounts []int
LevelRead []int64
LevelWrite []int64
LevelDurations []time.Duration
}
// Stats populates s with database statistics.
func (db *DB) Stats(s *DBStats) error {
err := db.ok()
if err != nil {
return err
}
s.IORead = db.s.stor.reads()
s.IOWrite = db.s.stor.writes()
s.WriteDelayCount = atomic.LoadInt32(&db.cWriteDelayN)
s.WriteDelayDuration = time.Duration(atomic.LoadInt64(&db.cWriteDelay))
s.WritePaused = atomic.LoadInt32(&db.inWritePaused) == 1
s.OpenedTablesCount = db.s.tops.cache.Size()
if db.s.tops.bcache != nil {
s.BlockCacheSize = db.s.tops.bcache.Size()
} else {
s.BlockCacheSize = 0
}
s.AliveIterators = atomic.LoadInt32(&db.aliveIters)
s.AliveSnapshots = atomic.LoadInt32(&db.aliveSnaps)
s.LevelDurations = s.LevelDurations[:0]
s.LevelRead = s.LevelRead[:0]
s.LevelWrite = s.LevelWrite[:0]
s.LevelSizes = s.LevelSizes[:0]
s.LevelTablesCounts = s.LevelTablesCounts[:0]
v := db.s.version()
defer v.release()
for level, tables := range v.levels {
duration, read, write := db.compStats.getStat(level)
if len(tables) == 0 && duration == 0 {
continue
}
s.LevelDurations = append(s.LevelDurations, duration)
s.LevelRead = append(s.LevelRead, read)
s.LevelWrite = append(s.LevelWrite, write)
s.LevelSizes = append(s.LevelSizes, tables.size())
s.LevelTablesCounts = append(s.LevelTablesCounts, len(tables))
}
return nil
}
2015-02-16 13:28:33 +00:00
// SizeOf calculates approximate sizes of the given key ranges.
// The length of the returned sizes are equal with the length of the given
// ranges. The returned sizes measure storage space usage, so if the user
// data compresses by a factor of ten, the returned sizes will be one-tenth
// the size of the corresponding user data size.
// The results may not include the sizes of recently written data.
func (db *DB) SizeOf(ranges []util.Range) (Sizes, error) {
if err := db.ok(); err != nil {
2015-02-16 13:28:33 +00:00
return nil, err
}
v := db.s.version()
2015-02-16 13:28:33 +00:00
defer v.release()
sizes := make(Sizes, 0, len(ranges))
for _, r := range ranges {
imin := makeInternalKey(nil, r.Start, keyMaxSeq, keyTypeSeek)
imax := makeInternalKey(nil, r.Limit, keyMaxSeq, keyTypeSeek)
start, err := v.offsetOf(imin)
2015-02-16 13:28:33 +00:00
if err != nil {
return nil, err
}
limit, err := v.offsetOf(imax)
2015-02-16 13:28:33 +00:00
if err != nil {
return nil, err
}
var size int64
2015-02-16 13:28:33 +00:00
if limit >= start {
size = limit - start
}
sizes = append(sizes, size)
}
return sizes, nil
}
// Close closes the DB. This will also releases any outstanding snapshot,
// abort any in-flight compaction and discard open transaction.
2015-02-16 13:28:33 +00:00
//
// It is not safe to close a DB until all outstanding iterators are released.
// It is valid to call Close multiple times. Other methods should not be
// called after the DB has been closed.
func (db *DB) Close() error {
if !db.setClosed() {
2015-02-16 13:28:33 +00:00
return ErrClosed
}
start := time.Now()
db.log("db@close closing")
2015-02-16 13:28:33 +00:00
// Clear the finalizer.
runtime.SetFinalizer(db, nil)
2015-02-16 13:28:33 +00:00
// Get compaction error.
var err error
select {
case err = <-db.compErrC:
if err == ErrReadOnly {
err = nil
}
2015-02-16 13:28:33 +00:00
default:
}
// Signal all goroutines.
close(db.closeC)
2015-02-16 13:28:33 +00:00
// Discard open transaction.
if db.tr != nil {
db.tr.Discard()
}
// Acquire writer lock.
db.writeLockC <- struct{}{}
// Wait for all gorotines to exit.
db.closeW.Wait()
// Closes journal.
if db.journal != nil {
db.journal.Close()
db.journalWriter.Close()
db.journal = nil
db.journalWriter = nil
}
2015-02-16 13:28:33 +00:00
if db.writeDelayN > 0 {
db.logf("db@write was delayed N·%d T·%v", db.writeDelayN, db.writeDelay)
2015-02-16 13:28:33 +00:00
}
// Close session.
db.s.close()
db.logf("db@close done T·%v", time.Since(start))
db.s.release()
2015-02-16 13:28:33 +00:00
if db.closer != nil {
if err1 := db.closer.Close(); err == nil {
2015-02-16 13:28:33 +00:00
err = err1
}
db.closer = nil
2015-02-16 13:28:33 +00:00
}
// Clear memdbs.
db.clearMems()
2015-02-16 13:28:33 +00:00
return err
}