cosmos-sdk/store/cachekv/store.go
Emmanuel T Odeke 5399e72f32
perf: store/cachekv: avoid a map lookup if unnecessary, clear maps fast (#10486)
We can shave off some milliseconds, but also cut down some Megabytes of
RAM consumed by only requesting from the cache if needed, but also using
the map clearing idiom which is recognized by the compiler to make fast
code.

Noticed in profiles from Tharsis' Ethermint per https://github.com/tharsis/ethermint/issues/710

- Before
* Memory profiles
```shell
   19.50MB    19.50MB    134:	store.cache = make(map[string]*cValue)
   18.50MB    18.50MB    135:	store.deleted = make(map[string]struct{})
   15.50MB    15.50MB    136:	store.unsortedCache = make(map[string]struct{})
```

* CPU profiles
```go
         .          .    118:	// TODO: Consider allowing usage of Batch, which would allow the write to
         .          .    119:	// at least happen atomically.
     150ms      150ms    120:	for _, key := range keys {
     220ms      3.64s    121:		cacheValue := store.cache[key]
         .          .    122:
         .          .    123:		switch {
         .      250ms    124:		case store.isDeleted(key):
         .          .    125:			store.parent.Delete([]byte(key))
     210ms      210ms    126:		case cacheValue.value == nil:
         .          .    127:			// Skip, it already doesn't exist in parent.
         .          .    128:		default:
     240ms     27.94s    129:			store.parent.Set([]byte(key), cacheValue.value)
         .          .    130:		}
         .          .    131:	}

...

      10ms       60ms    134:	store.cache = make(map[string]*cValue)
         .       40ms    135:	store.deleted = make(map[string]struct{})
         .       50ms    136:	store.unsortedCache = make(map[string]struct{})
         .      110ms    137:	store.sortedCache = dbm.NewMemDB()
```

- After
* Memory profiles
```shell
         .          .    130:	// Clear the cache using the map clearing idiom
         .          .    131:	// and not allocating fresh objects.
         .          .    132:	// Please see https://bencher.orijtech.com/perfclinic/mapclearing/
         .          .    133:	for key := range store.cache {
         .          .    134:		delete(store.cache, key)
         .          .    135:	}
         .          .    136:	for key := range store.deleted {
         .          .    137:		delete(store.deleted, key)
         .          .    138:	}
         .          .    139:	for key := range store.unsortedCache {
         .          .    140:		delete(store.unsortedCache, key)
         .          .    141:	}
```

* CPU profiles
```shell
         .          .    111:	// TODO: Consider allowing usage of Batch, which would allow the write to
         .          .    112:	// at least happen atomically.
     110ms      110ms    113:	for _, key := range keys {
         .      210ms    114:		if store.isDeleted(key) {
         .          .    115:			// We use []byte(key) instead of conv.UnsafeStrToBytes because we cannot
         .          .    116:			// be sure if the underlying store might do a save with the byteslice or
         .          .    117:			// not. Once we get confirmation that .Delete is guaranteed not to
         .          .    118:			// save the byteslice, then we can assume only a read-only copy is sufficient.
         .          .    119:			store.parent.Delete([]byte(key))
         .          .    120:			continue
         .          .    121:		}
         .          .    122:
      50ms      2.45s    123:		cacheValue := store.cache[key]
     910ms      920ms    124:		if cacheValue.value != nil {
         .          .    125:			// It already exists in the parent, hence delete it.
     120ms     29.56s    126:			store.parent.Set([]byte(key), cacheValue.value)
         .          .    127:		}
         .          .    128:	}
         .          .    129:
         .          .    130:	// Clear the cache using the map clearing idiom
         .          .    131:	// and not allocating fresh objects.
         .          .    132:	// Please see https://bencher.orijtech.com/perfclinic/mapclearing/
         .      210ms    133:	for key := range store.cache {
         .          .    134:		delete(store.cache, key)
         .          .    135:	}
         .       10ms    136:	for key := range store.deleted {
         .          .    137:		delete(store.deleted, key)
         .          .    138:	}
         .      170ms    139:	for key := range store.unsortedCache {
         .          .    140:		delete(store.unsortedCache, key)
         .          .    141:	}
         .      260ms    142:	store.sortedCache = dbm.NewMemDB()
         .       10ms    143:}

```

Fixes #10487
Updates https://github.com/tharsis/ethermint/issues/710
2021-11-08 23:49:13 +00:00

388 lines
9.8 KiB
Go

package cachekv
import (
"bytes"
"io"
"sort"
"sync"
dbm "github.com/tendermint/tm-db"
"github.com/cosmos/cosmos-sdk/internal/conv"
"github.com/cosmos/cosmos-sdk/store/listenkv"
"github.com/cosmos/cosmos-sdk/store/tracekv"
"github.com/cosmos/cosmos-sdk/store/types"
"github.com/cosmos/cosmos-sdk/types/kv"
)
// If value is nil but deleted is false, it means the parent doesn't have the
// key. (No need to delete upon Write())
type cValue struct {
value []byte
dirty bool
}
// Store wraps an in-memory cache around an underlying types.KVStore.
type Store struct {
mtx sync.Mutex
cache map[string]*cValue
deleted map[string]struct{}
unsortedCache map[string]struct{}
sortedCache *dbm.MemDB // always ascending sorted
parent types.KVStore
}
var _ types.CacheKVStore = (*Store)(nil)
// NewStore creates a new Store object
func NewStore(parent types.KVStore) *Store {
return &Store{
cache: make(map[string]*cValue),
deleted: make(map[string]struct{}),
unsortedCache: make(map[string]struct{}),
sortedCache: dbm.NewMemDB(),
parent: parent,
}
}
// GetStoreType implements Store.
func (store *Store) GetStoreType() types.StoreType {
return store.parent.GetStoreType()
}
// Get implements types.KVStore.
func (store *Store) Get(key []byte) (value []byte) {
store.mtx.Lock()
defer store.mtx.Unlock()
types.AssertValidKey(key)
cacheValue, ok := store.cache[conv.UnsafeBytesToStr(key)]
if !ok {
value = store.parent.Get(key)
store.setCacheValue(key, value, false, false)
} else {
value = cacheValue.value
}
return value
}
// Set implements types.KVStore.
func (store *Store) Set(key []byte, value []byte) {
store.mtx.Lock()
defer store.mtx.Unlock()
types.AssertValidKey(key)
types.AssertValidValue(value)
store.setCacheValue(key, value, false, true)
}
// Has implements types.KVStore.
func (store *Store) Has(key []byte) bool {
value := store.Get(key)
return value != nil
}
// Delete implements types.KVStore.
func (store *Store) Delete(key []byte) {
store.mtx.Lock()
defer store.mtx.Unlock()
types.AssertValidKey(key)
store.setCacheValue(key, nil, true, true)
}
// Implements Cachetypes.KVStore.
func (store *Store) Write() {
store.mtx.Lock()
defer store.mtx.Unlock()
// We need a copy of all of the keys.
// Not the best, but probably not a bottleneck depending.
keys := make([]string, 0, len(store.cache))
for key, dbValue := range store.cache {
if dbValue.dirty {
keys = append(keys, key)
}
}
sort.Strings(keys)
// TODO: Consider allowing usage of Batch, which would allow the write to
// at least happen atomically.
for _, key := range keys {
if store.isDeleted(key) {
// We use []byte(key) instead of conv.UnsafeStrToBytes because we cannot
// be sure if the underlying store might do a save with the byteslice or
// not. Once we get confirmation that .Delete is guaranteed not to
// save the byteslice, then we can assume only a read-only copy is sufficient.
store.parent.Delete([]byte(key))
continue
}
cacheValue := store.cache[key]
if cacheValue.value != nil {
// It already exists in the parent, hence delete it.
store.parent.Set([]byte(key), cacheValue.value)
}
}
// Clear the cache using the map clearing idiom
// and not allocating fresh objects.
// Please see https://bencher.orijtech.com/perfclinic/mapclearing/
for key := range store.cache {
delete(store.cache, key)
}
for key := range store.deleted {
delete(store.deleted, key)
}
for key := range store.unsortedCache {
delete(store.unsortedCache, key)
}
store.sortedCache = dbm.NewMemDB()
}
// CacheWrap implements CacheWrapper.
func (store *Store) CacheWrap() types.CacheWrap {
return NewStore(store)
}
// CacheWrapWithTrace implements the CacheWrapper interface.
func (store *Store) CacheWrapWithTrace(w io.Writer, tc types.TraceContext) types.CacheWrap {
return NewStore(tracekv.NewStore(store, w, tc))
}
// CacheWrapWithListeners implements the CacheWrapper interface.
func (store *Store) CacheWrapWithListeners(storeKey types.StoreKey, listeners []types.WriteListener) types.CacheWrap {
return NewStore(listenkv.NewStore(store, storeKey, listeners))
}
//----------------------------------------
// Iteration
// Iterator implements types.KVStore.
func (store *Store) Iterator(start, end []byte) types.Iterator {
return store.iterator(start, end, true)
}
// ReverseIterator implements types.KVStore.
func (store *Store) ReverseIterator(start, end []byte) types.Iterator {
return store.iterator(start, end, false)
}
func (store *Store) iterator(start, end []byte, ascending bool) types.Iterator {
store.mtx.Lock()
defer store.mtx.Unlock()
var parent, cache types.Iterator
if ascending {
parent = store.parent.Iterator(start, end)
} else {
parent = store.parent.ReverseIterator(start, end)
}
store.dirtyItems(start, end)
cache = newMemIterator(start, end, store.sortedCache, store.deleted, ascending)
return newCacheMergeIterator(parent, cache, ascending)
}
func findStartIndex(strL []string, startQ string) int {
// Modified binary search to find the very first element in >=startQ.
if len(strL) == 0 {
return -1
}
var left, right, mid int
right = len(strL) - 1
for left <= right {
mid = (left + right) >> 1
midStr := strL[mid]
if midStr == startQ {
// Handle condition where there might be multiple values equal to startQ.
// We are looking for the very first value < midStL, that i+1 will be the first
// element >= midStr.
for i := mid - 1; i >= 0; i-- {
if strL[i] != midStr {
return i + 1
}
}
return 0
}
if midStr < startQ {
left = mid + 1
} else { // midStrL > startQ
right = mid - 1
}
}
if left >= 0 && left < len(strL) && strL[left] >= startQ {
return left
}
return -1
}
func findEndIndex(strL []string, endQ string) int {
if len(strL) == 0 {
return -1
}
// Modified binary search to find the very first element <endQ.
var left, right, mid int
right = len(strL) - 1
for left <= right {
mid = (left + right) >> 1
midStr := strL[mid]
if midStr == endQ {
// Handle condition where there might be multiple values equal to startQ.
// We are looking for the very first value < midStL, that i+1 will be the first
// element >= midStr.
for i := mid - 1; i >= 0; i-- {
if strL[i] < midStr {
return i + 1
}
}
return 0
}
if midStr < endQ {
left = mid + 1
} else { // midStrL > startQ
right = mid - 1
}
}
// Binary search failed, now let's find a value less than endQ.
for i := right; i >= 0; i-- {
if strL[i] < endQ {
return i
}
}
return -1
}
type sortState int
const (
stateUnsorted sortState = iota
stateAlreadySorted
)
// Constructs a slice of dirty items, to use w/ memIterator.
func (store *Store) dirtyItems(start, end []byte) {
startStr, endStr := conv.UnsafeBytesToStr(start), conv.UnsafeBytesToStr(end)
if startStr > endStr {
// Nothing to do here.
return
}
n := len(store.unsortedCache)
unsorted := make([]*kv.Pair, 0)
// If the unsortedCache is too big, its costs too much to determine
// whats in the subset we are concerned about.
// If you are interleaving iterator calls with writes, this can easily become an
// O(N^2) overhead.
// Even without that, too many range checks eventually becomes more expensive
// than just not having the cache.
if n < 1024 {
for key := range store.unsortedCache {
if dbm.IsKeyInDomain(conv.UnsafeStrToBytes(key), start, end) {
cacheValue := store.cache[key]
unsorted = append(unsorted, &kv.Pair{Key: []byte(key), Value: cacheValue.value})
}
}
store.clearUnsortedCacheSubset(unsorted, stateUnsorted)
return
}
// Otherwise it is large so perform a modified binary search to find
// the target ranges for the keys that we should be looking for.
strL := make([]string, 0, n)
for key := range store.unsortedCache {
strL = append(strL, key)
}
sort.Strings(strL)
// Now find the values within the domain
// [start, end)
startIndex := findStartIndex(strL, startStr)
endIndex := findEndIndex(strL, endStr)
if endIndex < 0 {
endIndex = len(strL) - 1
}
if startIndex < 0 {
startIndex = 0
}
kvL := make([]*kv.Pair, 0)
for i := startIndex; i <= endIndex; i++ {
key := strL[i]
cacheValue := store.cache[key]
kvL = append(kvL, &kv.Pair{Key: []byte(key), Value: cacheValue.value})
}
// kvL was already sorted so pass it in as is.
store.clearUnsortedCacheSubset(kvL, stateAlreadySorted)
}
func (store *Store) clearUnsortedCacheSubset(unsorted []*kv.Pair, sortState sortState) {
n := len(store.unsortedCache)
if len(unsorted) == n { // This pattern allows the Go compiler to emit the map clearing idiom for the entire map.
for key := range store.unsortedCache {
delete(store.unsortedCache, key)
}
} else { // Otherwise, normally delete the unsorted keys from the map.
for _, kv := range unsorted {
delete(store.unsortedCache, conv.UnsafeBytesToStr(kv.Key))
}
}
if sortState == stateUnsorted {
sort.Slice(unsorted, func(i, j int) bool {
return bytes.Compare(unsorted[i].Key, unsorted[j].Key) < 0
})
}
for _, item := range unsorted {
if item.Value == nil {
// deleted element, tracked by store.deleted
// setting arbitrary value
store.sortedCache.Set(item.Key, []byte{})
continue
}
err := store.sortedCache.Set(item.Key, item.Value)
if err != nil {
panic(err)
}
}
}
//----------------------------------------
// etc
// Only entrypoint to mutate store.cache.
func (store *Store) setCacheValue(key, value []byte, deleted bool, dirty bool) {
keyStr := conv.UnsafeBytesToStr(key)
store.cache[keyStr] = &cValue{
value: value,
dirty: dirty,
}
if deleted {
store.deleted[keyStr] = struct{}{}
} else {
delete(store.deleted, keyStr)
}
if dirty {
store.unsortedCache[keyStr] = struct{}{}
}
}
func (store *Store) isDeleted(key string) bool {
_, ok := store.deleted[key]
return ok
}