ethdb/leveldb: disable seek compaction (#20130)

* vendor: update leveldb

* ethdb/leveldb: disable seek compaction and add metrics

* vendor: udpate latest levledb

* ethdb/leveldb: fix typo
This commit is contained in:
gary rong 2019-09-26 22:44:00 +08:00 committed by Péter Szilágyi
parent ead711779d
commit df89233b57
15 changed files with 196 additions and 69 deletions

View File

@ -70,6 +70,10 @@ type Database struct {
diskSizeGauge metrics.Gauge // Gauge for tracking the size of all the levels in the database
diskReadMeter metrics.Meter // Meter for measuring the effective amount of data read
diskWriteMeter metrics.Meter // Meter for measuring the effective amount of data written
memCompGauge metrics.Gauge // Gauge for tracking the number of memory compaction
level0CompGauge metrics.Gauge // Gauge for tracking the number of table compaction in level0
nonlevel0CompGauge metrics.Gauge // Gauge for tracking the number of table compaction in non0 level
seekCompGauge metrics.Gauge // Gauge for tracking the number of table compaction caused by read opt
quitLock sync.Mutex // Mutex protecting the quit channel access
quitChan chan chan error // Quit channel to stop the metrics collection before closing the database
@ -96,6 +100,7 @@ func New(file string, cache int, handles int, namespace string) (*Database, erro
BlockCacheCapacity: cache / 2 * opt.MiB,
WriteBuffer: cache / 4 * opt.MiB, // Two of these are used internally
Filter: filter.NewBloomFilter(10),
DisableSeeksCompaction: true,
})
if _, corrupted := err.(*errors.ErrCorrupted); corrupted {
db, err = leveldb.RecoverFile(file, nil)
@ -118,6 +123,10 @@ func New(file string, cache int, handles int, namespace string) (*Database, erro
ldb.diskWriteMeter = metrics.NewRegisteredMeter(namespace+"disk/write", nil)
ldb.writeDelayMeter = metrics.NewRegisteredMeter(namespace+"compact/writedelay/duration", nil)
ldb.writeDelayNMeter = metrics.NewRegisteredMeter(namespace+"compact/writedelay/counter", nil)
ldb.memCompGauge = metrics.NewRegisteredGauge(namespace+"compact/memory", nil)
ldb.level0CompGauge = metrics.NewRegisteredGauge(namespace+"compact/level0", nil)
ldb.nonlevel0CompGauge = metrics.NewRegisteredGauge(namespace+"compact/nonlevel0", nil)
ldb.seekCompGauge = metrics.NewRegisteredGauge(namespace+"compact/seek", nil)
// Start up the metrics gathering and return
go ldb.meter(metricsGatheringInterval)
@ -375,6 +384,29 @@ func (db *Database) meter(refresh time.Duration) {
}
iostats[0], iostats[1] = nRead, nWrite
compCount, err := db.db.GetProperty("leveldb.compcount")
if err != nil {
db.log.Error("Failed to read database iostats", "err", err)
merr = err
continue
}
var (
memComp uint32
level0Comp uint32
nonLevel0Comp uint32
seekComp uint32
)
if n, err := fmt.Sscanf(compCount, "MemComp:%d Level0Comp:%d NonLevel0Comp:%d SeekComp:%d", &memComp, &level0Comp, &nonLevel0Comp, &seekComp); n != 4 || err != nil {
db.log.Error("Compaction count statistic not found")
merr = err
continue
}
db.memCompGauge.Update(int64(memComp))
db.level0CompGauge.Update(int64(level0Comp))
db.nonlevel0CompGauge.Update(int64(nonLevel0Comp))
db.seekCompGauge.Update(int64(seekComp))
// Sleep a bit, then repeat the stats collection
select {
case errc = <-db.quitChan:

View File

@ -238,6 +238,11 @@ func newBatch() interface{} {
return &Batch{}
}
// MakeBatch returns empty batch with preallocated buffer.
func MakeBatch(n int) *Batch {
return &Batch{data: make([]byte, 0, n)}
}
func decodeBatch(data []byte, fn func(i int, index batchIndex) error) error {
var index batchIndex
for i, o := 0, 0; o < len(data); i++ {

View File

@ -38,6 +38,12 @@ type DB struct {
inWritePaused int32 // The indicator whether write operation is paused by compaction
aliveSnaps, aliveIters int32
// Compaction statistic
memComp uint32 // The cumulative number of memory compaction
level0Comp uint32 // The cumulative number of level0 compaction
nonLevel0Comp uint32 // The cumulative number of non-level0 compaction
seekComp uint32 // The cumulative number of seek compaction
// Session.
s *session
@ -978,6 +984,8 @@ func (db *DB) GetProperty(name string) (value string, err error) {
value += fmt.Sprintf(" Total | %10d | %13.5f | %13.5f | %13.5f | %13.5f\n",
totalTables, float64(totalSize)/1048576.0, totalDuration.Seconds(),
float64(totalRead)/1048576.0, float64(totalWrite)/1048576.0)
case p == "compcount":
value = fmt.Sprintf("MemComp:%d Level0Comp:%d NonLevel0Comp:%d SeekComp:%d", atomic.LoadUint32(&db.memComp), atomic.LoadUint32(&db.level0Comp), atomic.LoadUint32(&db.nonLevel0Comp), atomic.LoadUint32(&db.seekComp))
case p == "iostats":
value = fmt.Sprintf("Read(MB):%.5f Write(MB):%.5f",
float64(db.s.stor.reads())/1048576.0,
@ -1034,6 +1042,11 @@ type DBStats struct {
LevelRead Sizes
LevelWrite Sizes
LevelDurations []time.Duration
MemComp uint32
Level0Comp uint32
NonLevel0Comp uint32
SeekComp uint32
}
// Stats populates s with database statistics.
@ -1070,16 +1083,17 @@ func (db *DB) Stats(s *DBStats) error {
for level, tables := range v.levels {
duration, read, write := db.compStats.getStat(level)
if len(tables) == 0 && duration == 0 {
continue
}
s.LevelDurations = append(s.LevelDurations, duration)
s.LevelRead = append(s.LevelRead, read)
s.LevelWrite = append(s.LevelWrite, write)
s.LevelSizes = append(s.LevelSizes, tables.size())
s.LevelTablesCounts = append(s.LevelTablesCounts, len(tables))
}
s.MemComp = atomic.LoadUint32(&db.memComp)
s.Level0Comp = atomic.LoadUint32(&db.level0Comp)
s.NonLevel0Comp = atomic.LoadUint32(&db.nonLevel0Comp)
s.SeekComp = atomic.LoadUint32(&db.seekComp)
return nil
}

View File

@ -8,6 +8,7 @@ package leveldb
import (
"sync"
"sync/atomic"
"time"
"github.com/syndtr/goleveldb/leveldb/errors"
@ -324,10 +325,12 @@ func (db *DB) memCompaction() {
db.logf("memdb@flush committed F·%d T·%v", len(rec.addedTables), stats.duration)
// Save compaction stats
for _, r := range rec.addedTables {
stats.write += r.size
}
db.compStats.addStat(flushLevel, stats)
atomic.AddUint32(&db.memComp, 1)
// Drop frozen memdb.
db.dropFrozenMem()
@ -588,6 +591,14 @@ func (db *DB) tableCompaction(c *compaction, noTrivial bool) {
for i := range stats {
db.compStats.addStat(c.sourceLevel+1, &stats[i])
}
switch c.typ {
case level0Compaction:
atomic.AddUint32(&db.level0Comp, 1)
case nonLevel0Compaction:
atomic.AddUint32(&db.nonLevel0Comp, 1)
case seekCompaction:
atomic.AddUint32(&db.seekComp, 1)
}
}
func (db *DB) tableRangeCompaction(level int, umin, umax []byte) error {

View File

@ -83,9 +83,13 @@ func (db *DB) newIterator(auxm *memDB, auxt tFiles, seq uint64, slice *util.Rang
iter: rawIter,
seq: seq,
strict: opt.GetStrict(db.s.o.Options, ro, opt.StrictReader),
disableSampling: db.s.o.GetDisableSeeksCompaction() || db.s.o.GetIteratorSamplingRate() <= 0,
key: make([]byte, 0),
value: make([]byte, 0),
}
if !iter.disableSampling {
iter.samplingGap = db.iterSamplingRate()
}
atomic.AddInt32(&db.aliveIters, 1)
runtime.SetFinalizer(iter, (*dbIter).Release)
return iter
@ -112,8 +116,9 @@ type dbIter struct {
iter iterator.Iterator
seq uint64
strict bool
disableSampling bool
smaplingGap int
samplingGap int
dir dir
key []byte
value []byte
@ -122,10 +127,14 @@ type dbIter struct {
}
func (i *dbIter) sampleSeek() {
if i.disableSampling {
return
}
ikey := i.iter.Key()
i.smaplingGap -= len(ikey) + len(i.iter.Value())
for i.smaplingGap < 0 {
i.smaplingGap += i.db.iterSamplingRate()
i.samplingGap -= len(ikey) + len(i.iter.Value())
for i.samplingGap < 0 {
i.samplingGap += i.db.iterSamplingRate()
i.db.sampleSeek(ikey)
}
}

View File

@ -69,6 +69,9 @@ func (tr *Transaction) Has(key []byte, ro *opt.ReadOptions) (bool, error) {
// DB. And a nil Range.Limit is treated as a key after all keys in
// the DB.
//
// The returned iterator has locks on its own resources, so it can live beyond
// the lifetime of the transaction who creates them.
//
// WARNING: Any slice returned by interator (e.g. slice returned by calling
// Iterator.Key() or Iterator.Key() methods), its content should not be modified
// unless noted otherwise.
@ -252,13 +255,14 @@ func (tr *Transaction) discard() {
// Discard transaction.
for _, t := range tr.tables {
tr.db.logf("transaction@discard @%d", t.fd.Num)
if err1 := tr.db.s.stor.Remove(t.fd); err1 == nil {
tr.db.s.reuseFileNum(t.fd.Num)
}
// Iterator may still use the table, so we use tOps.remove here.
tr.db.s.tops.remove(t.fd)
}
}
// Discard discards the transaction.
// This method is noop if transaction is already closed (either committed or
// discarded)
//
// Other methods should not be called after transaction has been discarded.
func (tr *Transaction) Discard() {
@ -282,8 +286,10 @@ func (db *DB) waitCompaction() error {
// until in-flight transaction is committed or discarded.
// The returned transaction handle is safe for concurrent use.
//
// Transaction is expensive and can overwhelm compaction, especially if
// Transaction is very expensive and can overwhelm compaction, especially if
// transaction size is small. Use with caution.
// The rule of thumb is if you need to merge at least same amount of
// `Options.WriteBuffer` worth of data then use transaction, otherwise don't.
//
// The transaction must be closed once done, either by committing or discarding
// the transaction.

View File

@ -16,7 +16,7 @@ func bloomHash(key []byte) uint32 {
type bloomFilter int
// The bloom filter serializes its parameters and is backward compatible
// Name: The bloom filter serializes its parameters and is backward compatible
// with respect to them. Therefor, its parameters are not added to its
// name.
func (bloomFilter) Name() string {

View File

@ -397,6 +397,10 @@ func (p *DB) Find(key []byte) (rkey, value []byte, err error) {
// DB. And a nil Range.Limit is treated as a key after all keys in
// the DB.
//
// WARNING: Any slice returned by interator (e.g. slice returned by calling
// Iterator.Key() or Iterator.Key() methods), its content should not be modified
// unless noted otherwise.
//
// The iterator must be released after use, by calling Release method.
//
// Also read Iterator documentation of the leveldb/iterator package.

View File

@ -278,6 +278,14 @@ type Options struct {
// The default is false.
DisableLargeBatchTransaction bool
// DisableSeeksCompaction allows disabling 'seeks triggered compaction'.
// The purpose of 'seeks triggered compaction' is to optimize database so
// that 'level seeks' can be minimized, however this might generate many
// small compaction which may not preferable.
//
// The default is false.
DisableSeeksCompaction bool
// ErrorIfExist defines whether an error should returned if the DB already
// exist.
//
@ -309,6 +317,8 @@ type Options struct {
// IteratorSamplingRate defines approximate gap (in bytes) between read
// sampling of an iterator. The samples will be used to determine when
// compaction should be triggered.
// Use negative value to disable iterator sampling.
// The iterator sampling is disabled if DisableSeeksCompaction is true.
//
// The default is 1MiB.
IteratorSamplingRate int
@ -526,6 +536,13 @@ func (o *Options) GetDisableLargeBatchTransaction() bool {
return o.DisableLargeBatchTransaction
}
func (o *Options) GetDisableSeeksCompaction() bool {
if o == nil {
return false
}
return o.DisableSeeksCompaction
}
func (o *Options) GetErrorIfExist() bool {
if o == nil {
return false
@ -548,8 +565,10 @@ func (o *Options) GetFilter() filter.Filter {
}
func (o *Options) GetIteratorSamplingRate() int {
if o == nil || o.IteratorSamplingRate <= 0 {
if o == nil || o.IteratorSamplingRate == 0 {
return DefaultIteratorSamplingRate
} else if o.IteratorSamplingRate < 0 {
return 0
}
return o.IteratorSamplingRate
}

View File

@ -14,6 +14,13 @@ import (
"github.com/syndtr/goleveldb/leveldb/opt"
)
const (
undefinedCompaction = iota
level0Compaction
nonLevel0Compaction
seekCompaction
)
func (s *session) pickMemdbLevel(umin, umax []byte, maxLevel int) int {
v := s.version()
defer v.release()
@ -50,6 +57,7 @@ func (s *session) pickCompaction() *compaction {
var sourceLevel int
var t0 tFiles
var typ int
if v.cScore >= 1 {
sourceLevel = v.cLevel
cptr := s.getCompPtr(sourceLevel)
@ -63,18 +71,24 @@ func (s *session) pickCompaction() *compaction {
if len(t0) == 0 {
t0 = append(t0, tables[0])
}
if sourceLevel == 0 {
typ = level0Compaction
} else {
typ = nonLevel0Compaction
}
} else {
if p := atomic.LoadPointer(&v.cSeek); p != nil {
ts := (*tSet)(p)
sourceLevel = ts.level
t0 = append(t0, ts.table)
typ = seekCompaction
} else {
v.release()
return nil
}
}
return newCompaction(s, v, sourceLevel, t0)
return newCompaction(s, v, sourceLevel, t0, typ)
}
// Create compaction from given level and range; need external synchronization.
@ -109,13 +123,18 @@ func (s *session) getCompactionRange(sourceLevel int, umin, umax []byte, noLimit
}
}
return newCompaction(s, v, sourceLevel, t0)
typ := level0Compaction
if sourceLevel != 0 {
typ = nonLevel0Compaction
}
return newCompaction(s, v, sourceLevel, t0, typ)
}
func newCompaction(s *session, v *version, sourceLevel int, t0 tFiles) *compaction {
func newCompaction(s *session, v *version, sourceLevel int, t0 tFiles, typ int) *compaction {
c := &compaction{
s: s,
v: v,
typ: typ,
sourceLevel: sourceLevel,
levels: [2]tFiles{t0, nil},
maxGPOverlaps: int64(s.o.GetCompactionGPOverlaps(sourceLevel)),
@ -131,6 +150,7 @@ type compaction struct {
s *session
v *version
typ int
sourceLevel int
levels [2]tFiles
maxGPOverlaps int64

View File

@ -308,7 +308,7 @@ func (s *session) setNextFileNum(num int64) {
func (s *session) markFileNum(num int64) {
nextFileNum := num + 1
for {
old, x := s.stNextFileNum, nextFileNum
old, x := atomic.LoadInt64(&s.stNextFileNum), nextFileNum
if old > x {
x = old
}
@ -326,7 +326,7 @@ func (s *session) allocFileNum() int64 {
// Reuse given file number.
func (s *session) reuseFileNum(num int64) {
for {
old, x := s.stNextFileNum, num
old, x := atomic.LoadInt64(&s.stNextFileNum), num
if old != x+1 {
x = old
}

View File

@ -493,6 +493,8 @@ func (t *tOps) remove(fd storage.FileDesc) {
if t.evictRemoved && t.bcache != nil {
t.bcache.EvictNS(uint64(fd.Num))
}
// Try to reuse file num, useful for discarded transaction.
t.s.reuseFileNum(fd.Num)
})
}

View File

@ -787,6 +787,10 @@ func (r *Reader) getDataIterErr(dataBH blockHandle, slice *util.Range, verifyChe
// table. And a nil Range.Limit is treated as a key after all keys in
// the table.
//
// WARNING: Any slice returned by interator (e.g. slice returned by calling
// Iterator.Key() or Iterator.Key() methods), its content should not be modified
// unless noted otherwise.
//
// The returned iterator is not safe for concurrent use and should be released
// after use.
//

View File

@ -144,6 +144,7 @@ func (v *version) get(aux tFiles, ikey internalKey, ro *opt.ReadOptions, noValue
}
ukey := ikey.ukey()
sampleSeeks := !v.s.o.GetDisableSeeksCompaction()
var (
tset *tSet
@ -161,7 +162,7 @@ func (v *version) get(aux tFiles, ikey internalKey, ro *opt.ReadOptions, noValue
// Since entries never hop across level, finding key/value
// in smaller level make later levels irrelevant.
v.walkOverlapping(aux, ikey, func(level int, t *tFile) bool {
if level >= 0 && !tseek {
if sampleSeeks && level >= 0 && !tseek {
if tset == nil {
tset = &tSet{level, t}
} else {

58
vendor/vendor.json vendored
View File

@ -455,76 +455,76 @@
"revisionTime": "2017-07-05T02:17:15Z"
},
{
"checksumSHA1": "4NTmfUj7H5J59M2wCnp3/8FWt1I=",
"checksumSHA1": "Bl4KYAyUkgJSjcdEyv3VhHQ8PVs=",
"path": "github.com/syndtr/goleveldb/leveldb",
"revision": "c3a204f8e96543bb0cc090385c001078f184fc46",
"revisionTime": "2019-03-18T03:00:20Z"
"revision": "758128399b1df3a87e92df6c26c1d2063da8fabe",
"revisionTime": "2019-09-23T12:57:48Z"
},
{
"checksumSHA1": "mPNraL2edpk/2FYq26rSXfMHbJg=",
"path": "github.com/syndtr/goleveldb/leveldb/cache",
"revision": "b001fa50d6b27f3f0bb175a87d0cb55426d0a0ae",
"revisionTime": "2018-11-28T10:09:59Z"
"revision": "758128399b1df3a87e92df6c26c1d2063da8fabe",
"revisionTime": "2019-09-23T12:57:48Z"
},
{
"checksumSHA1": "UA+PKDKWlDnE2OZblh23W6wZwbY=",
"path": "github.com/syndtr/goleveldb/leveldb/comparer",
"revision": "b001fa50d6b27f3f0bb175a87d0cb55426d0a0ae",
"revisionTime": "2018-11-28T10:09:59Z"
"revision": "758128399b1df3a87e92df6c26c1d2063da8fabe",
"revisionTime": "2019-09-23T12:57:48Z"
},
{
"checksumSHA1": "1DRAxdlWzS4U0xKN/yQ/fdNN7f0=",
"path": "github.com/syndtr/goleveldb/leveldb/errors",
"revision": "b001fa50d6b27f3f0bb175a87d0cb55426d0a0ae",
"revisionTime": "2018-11-28T10:09:59Z"
"revision": "758128399b1df3a87e92df6c26c1d2063da8fabe",
"revisionTime": "2019-09-23T12:57:48Z"
},
{
"checksumSHA1": "eqKeD6DS7eNCtxVYZEHHRKkyZrw=",
"checksumSHA1": "iBorxU3FBbau81WSyVa8KwcutzA=",
"path": "github.com/syndtr/goleveldb/leveldb/filter",
"revision": "b001fa50d6b27f3f0bb175a87d0cb55426d0a0ae",
"revisionTime": "2018-11-28T10:09:59Z"
"revision": "758128399b1df3a87e92df6c26c1d2063da8fabe",
"revisionTime": "2019-09-23T12:57:48Z"
},
{
"checksumSHA1": "hPyFsMiqZ1OB7MX+6wIAA6nsdtc=",
"path": "github.com/syndtr/goleveldb/leveldb/iterator",
"revision": "b001fa50d6b27f3f0bb175a87d0cb55426d0a0ae",
"revisionTime": "2018-11-28T10:09:59Z"
"revision": "758128399b1df3a87e92df6c26c1d2063da8fabe",
"revisionTime": "2019-09-23T12:57:48Z"
},
{
"checksumSHA1": "gJY7bRpELtO0PJpZXgPQ2BYFJ88=",
"path": "github.com/syndtr/goleveldb/leveldb/journal",
"revision": "b001fa50d6b27f3f0bb175a87d0cb55426d0a0ae",
"revisionTime": "2018-11-28T10:09:59Z"
"revision": "758128399b1df3a87e92df6c26c1d2063da8fabe",
"revisionTime": "2019-09-23T12:57:48Z"
},
{
"checksumSHA1": "MtYY1b2234y/MlS+djL8tXVAcQs=",
"checksumSHA1": "2ncG38FDk2thSlrHd7JFmiuvnxA=",
"path": "github.com/syndtr/goleveldb/leveldb/memdb",
"revision": "b001fa50d6b27f3f0bb175a87d0cb55426d0a0ae",
"revisionTime": "2018-11-28T10:09:59Z"
"revision": "758128399b1df3a87e92df6c26c1d2063da8fabe",
"revisionTime": "2019-09-23T12:57:48Z"
},
{
"checksumSHA1": "o2TorI3z+vc+EBMJ8XeFoUmXBtU=",
"checksumSHA1": "LC+WnyNq4O2J9SHuVfWL19wZH48=",
"path": "github.com/syndtr/goleveldb/leveldb/opt",
"revision": "b001fa50d6b27f3f0bb175a87d0cb55426d0a0ae",
"revisionTime": "2018-11-28T10:09:59Z"
"revision": "758128399b1df3a87e92df6c26c1d2063da8fabe",
"revisionTime": "2019-09-23T12:57:48Z"
},
{
"checksumSHA1": "ZnyuciM+R19NG8L5YS3TIJdo1e8=",
"path": "github.com/syndtr/goleveldb/leveldb/storage",
"revision": "b001fa50d6b27f3f0bb175a87d0cb55426d0a0ae",
"revisionTime": "2018-11-28T10:09:59Z"
"revision": "758128399b1df3a87e92df6c26c1d2063da8fabe",
"revisionTime": "2019-09-23T12:57:48Z"
},
{
"checksumSHA1": "gWFPMz8OQeul0t54RM66yMTX49g=",
"checksumSHA1": "DS0i9KReIeZn3T1Bpu31xPMtzio=",
"path": "github.com/syndtr/goleveldb/leveldb/table",
"revision": "b001fa50d6b27f3f0bb175a87d0cb55426d0a0ae",
"revisionTime": "2018-11-28T10:09:59Z"
"revision": "758128399b1df3a87e92df6c26c1d2063da8fabe",
"revisionTime": "2019-09-23T12:57:48Z"
},
{
"checksumSHA1": "V/Dh7NV0/fy/5jX1KaAjmGcNbzI=",
"path": "github.com/syndtr/goleveldb/leveldb/util",
"revision": "b001fa50d6b27f3f0bb175a87d0cb55426d0a0ae",
"revisionTime": "2018-11-28T10:09:59Z"
"revision": "758128399b1df3a87e92df6c26c1d2063da8fabe",
"revisionTime": "2019-09-23T12:57:48Z"
},
{
"checksumSHA1": "SsMMqb3xn7hg1ZX5ugwZz5rzpx0=",