package db import ( "bytes" "context" "fmt" "sync" "github.com/google/btree" corestore "cosmossdk.io/core/store" "cosmossdk.io/store/v2/errors" ) const ( // The approximate number of items and children per B-tree node. Tuned with benchmarks. bTreeDegree = 32 ) // item is a btree.Item with byte slices as keys and values type item struct { key []byte value []byte } // Less implements btree.Item. func (i item) Less(other btree.Item) bool { // this considers nil == []byte{}, but that's ok since we handle nil endpoints // in iterators specially anyway return bytes.Compare(i.key, other.(item).key) == -1 } // newKey creates a new key item. func newKey(key []byte) item { return item{key: key} } // newPair creates a new pair item. func newPair(key, value []byte) item { return item{key: key, value: value} } var _ corestore.KVStoreWithBatch = (*MemDB)(nil) // MemDB is an in-memory database backend using a B-tree for storage. // // For performance reasons, all given and returned keys and values are pointers to the in-memory // database, so modifying them will cause the stored values to be modified as well. All DB methods // already specify that keys and values should be considered read-only, but this is especially // important with MemDB. type MemDB struct { mtx sync.RWMutex btree *btree.BTree } // NewMemDB creates a new in-memory database. func NewMemDB() *MemDB { database := &MemDB{ btree: btree.New(bTreeDegree), } return database } // Get implements DB. func (db *MemDB) Get(key []byte) ([]byte, error) { if len(key) == 0 { return nil, errors.ErrKeyEmpty } db.mtx.RLock() defer db.mtx.RUnlock() i := db.btree.Get(newKey(key)) if i != nil { return i.(item).value, nil } return nil, nil } // Has implements DB. func (db *MemDB) Has(key []byte) (bool, error) { if len(key) == 0 { return false, errors.ErrKeyEmpty } db.mtx.RLock() defer db.mtx.RUnlock() return db.btree.Has(newKey(key)), nil } // Set implements DB. func (db *MemDB) Set(key, value []byte) error { if len(key) == 0 { return errors.ErrKeyEmpty } if value == nil { return errors.ErrValueNil } db.mtx.Lock() defer db.mtx.Unlock() db.set(key, value) return nil } // set sets a value without locking the mutex. func (db *MemDB) set(key, value []byte) { db.btree.ReplaceOrInsert(newPair(key, value)) } // SetSync implements DB. func (db *MemDB) SetSync(key, value []byte) error { return db.Set(key, value) } // Delete implements DB. func (db *MemDB) Delete(key []byte) error { if len(key) == 0 { return errors.ErrKeyEmpty } db.mtx.Lock() defer db.mtx.Unlock() db.delete(key) return nil } // delete deletes a key without locking the mutex. func (db *MemDB) delete(key []byte) { db.btree.Delete(newKey(key)) } // DeleteSync implements DB. func (db *MemDB) DeleteSync(key []byte) error { return db.Delete(key) } // Close implements DB. func (db *MemDB) Close() error { // Close is a noop since for an in-memory database, we don't have a destination to flush // contents to nor do we want any data loss on invoking Close(). // See the discussion in https://github.com/tendermint/tendermint/libs/pull/56 return nil } // Print implements DB. func (db *MemDB) Print() error { db.mtx.RLock() defer db.mtx.RUnlock() db.btree.Ascend(func(i btree.Item) bool { item := i.(item) fmt.Printf("[%X]:\t[%X]\n", item.key, item.value) return true }) return nil } // Stats implements DB. func (db *MemDB) Stats() map[string]string { db.mtx.RLock() defer db.mtx.RUnlock() stats := make(map[string]string) stats["database.type"] = "memDB" stats["database.size"] = fmt.Sprintf("%d", db.btree.Len()) return stats } // NewBatch implements DB. func (db *MemDB) NewBatch() corestore.Batch { return newMemDBBatch(db) } // NewBatchWithSize implements DB. // It does the same thing as NewBatch because we can't pre-allocate memDBBatch func (db *MemDB) NewBatchWithSize(size int) corestore.Batch { return newMemDBBatch(db) } // Iterator implements DB. // Takes out a read-lock on the database until the iterator is closed. func (db *MemDB) Iterator(start, end []byte) (corestore.Iterator, error) { if (start != nil && len(start) == 0) || (end != nil && len(end) == 0) { return nil, errors.ErrKeyEmpty } return newMemDBIterator(db, start, end, false), nil } // ReverseIterator implements DB. // Takes out a read-lock on the database until the iterator is closed. func (db *MemDB) ReverseIterator(start, end []byte) (corestore.Iterator, error) { if (start != nil && len(start) == 0) || (end != nil && len(end) == 0) { return nil, errors.ErrKeyEmpty } return newMemDBIterator(db, start, end, true), nil } // IteratorNoMtx makes an iterator with no mutex. func (db *MemDB) IteratorNoMtx(start, end []byte) (corestore.Iterator, error) { if (start != nil && len(start) == 0) || (end != nil && len(end) == 0) { return nil, errors.ErrKeyEmpty } return newMemDBIteratorMtxChoice(db, start, end, false, false), nil } // ReverseIteratorNoMtx makes an iterator with no mutex. func (db *MemDB) ReverseIteratorNoMtx(start, end []byte) (corestore.Iterator, error) { if (start != nil && len(start) == 0) || (end != nil && len(end) == 0) { return nil, errors.ErrKeyEmpty } return newMemDBIteratorMtxChoice(db, start, end, true, false), nil } const ( // Size of the channel buffer between traversal goroutine and iterator. Using an unbuffered // channel causes two context switches per item sent, while buffering allows more work per // context switch. Tuned with benchmarks. chBufferSize = 64 ) // memDBIterator is a memDB iterator. type memDBIterator struct { ch <-chan *item cancel context.CancelFunc item *item start []byte end []byte useMtx bool } var _ corestore.Iterator = (*memDBIterator)(nil) // newMemDBIterator creates a new memDBIterator. func newMemDBIterator(db *MemDB, start, end []byte, reverse bool) *memDBIterator { return newMemDBIteratorMtxChoice(db, start, end, reverse, true) } func newMemDBIteratorMtxChoice(db *MemDB, start, end []byte, reverse, useMtx bool) *memDBIterator { ctx, cancel := context.WithCancel(context.Background()) ch := make(chan *item, chBufferSize) iter := &memDBIterator{ ch: ch, cancel: cancel, start: start, end: end, useMtx: useMtx, } if useMtx { db.mtx.RLock() } go func() { if useMtx { defer db.mtx.RUnlock() } // Because we use [start, end) for reverse ranges, while btree uses (start, end], we need // the following variables to handle some reverse iteration conditions ourselves. var ( skipEqual []byte abortLessThan []byte ) visitor := func(i btree.Item) bool { item := i.(item) if skipEqual != nil && bytes.Equal(item.key, skipEqual) { skipEqual = nil return true } if abortLessThan != nil && bytes.Compare(item.key, abortLessThan) == -1 { return false } select { case <-ctx.Done(): return false case ch <- &item: return true } } switch { case start == nil && end == nil && !reverse: db.btree.Ascend(visitor) case start == nil && end == nil && reverse: db.btree.Descend(visitor) case end == nil && !reverse: // must handle this specially, since nil is considered less than anything else db.btree.AscendGreaterOrEqual(newKey(start), visitor) case !reverse: db.btree.AscendRange(newKey(start), newKey(end), visitor) case end == nil: // abort after start, since we use [start, end) while btree uses (start, end] abortLessThan = start db.btree.Descend(visitor) default: // skip end and abort after start, since we use [start, end) while btree uses (start, end] skipEqual = end abortLessThan = start db.btree.DescendLessOrEqual(newKey(end), visitor) } close(ch) }() // prime the iterator with the first value, if any if item, ok := <-ch; ok { iter.item = item } return iter } // Close implements Iterator. func (i *memDBIterator) Close() error { i.cancel() for range i.ch { // drain channel } i.item = nil return nil } // Domain implements Iterator. func (i *memDBIterator) Domain() ([]byte, []byte) { return i.start, i.end } // Valid implements Iterator. func (i *memDBIterator) Valid() bool { return i.item != nil } // Next implements Iterator. func (i *memDBIterator) Next() { i.assertIsValid() item, ok := <-i.ch switch { case ok: i.item = item default: i.item = nil } } // Error implements Iterator. func (i *memDBIterator) Error() error { return nil // famous last words } // Key implements Iterator. func (i *memDBIterator) Key() []byte { i.assertIsValid() return i.item.key } // Value implements Iterator. func (i *memDBIterator) Value() []byte { i.assertIsValid() return i.item.value } func (i *memDBIterator) assertIsValid() { if !i.Valid() { panic("iterator is invalid") } } // memDBBatch operations type opType int const ( opTypeSet opType = iota + 1 opTypeDelete ) type operation struct { opType key []byte value []byte } // memDBBatch handles in-memory batching. type memDBBatch struct { db *MemDB ops []operation size int } var _ corestore.Batch = (*memDBBatch)(nil) // newMemDBBatch creates a new memDBBatch func newMemDBBatch(db *MemDB) *memDBBatch { return &memDBBatch{ db: db, ops: []operation{}, size: 0, } } // Set implements Batch. func (b *memDBBatch) Set(key, value []byte) error { if len(key) == 0 { return errors.ErrKeyEmpty } if value == nil { return errors.ErrValueNil } if b.ops == nil { return errors.ErrBatchClosed } b.size += len(key) + len(value) b.ops = append(b.ops, operation{opTypeSet, key, value}) return nil } // Delete implements Batch. func (b *memDBBatch) Delete(key []byte) error { if len(key) == 0 { return errors.ErrKeyEmpty } if b.ops == nil { return errors.ErrBatchClosed } b.size += len(key) b.ops = append(b.ops, operation{opTypeDelete, key, nil}) return nil } // Write implements Batch. func (b *memDBBatch) Write() error { if b.ops == nil { return errors.ErrBatchClosed } b.db.mtx.Lock() defer b.db.mtx.Unlock() for _, op := range b.ops { switch op.opType { case opTypeSet: b.db.set(op.key, op.value) case opTypeDelete: b.db.delete(op.key) default: return fmt.Errorf("unknown operation type %v (%v)", op.opType, op) } } // Make sure batch cannot be used afterwards. Callers should still call Close(), for errors. return b.Close() } // WriteSync implements Batch. func (b *memDBBatch) WriteSync() error { return b.Write() } // Close implements Batch. func (b *memDBBatch) Close() error { b.ops = nil b.size = 0 return nil } // GetByteSize implements Batch func (b *memDBBatch) GetByteSize() (int, error) { if b.ops == nil { return 0, errors.ErrBatchClosed } return b.size, nil }