Merge pull request #10391 from filecoin-project/feat/record-hotstore-space

feat:splitstore:Configure max space used by hotstore and GC makes best effort to respect
This commit is contained in:
Łukasz Magiera 2023-03-09 17:41:36 +01:00 committed by GitHub
commit faedc12531
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 265 additions and 64 deletions

View File

@ -115,6 +115,23 @@ type Config struct {
// A positive value is the number of compactions before a full GC is performed;
// a value of 1 will perform full GC in every compaction.
HotStoreFullGCFrequency uint64
// HotstoreMaxSpaceTarget suggests the max allowed space the hotstore can take.
// This is not a hard limit, it is possible for the hotstore to exceed the target
// for example if state grows massively between compactions. The splitstore
// will make a best effort to avoid overflowing the target and in practice should
// never overflow. This field is used when doing GC at the end of a compaction to
// adaptively choose moving GC
HotstoreMaxSpaceTarget uint64
// Moving GC will be triggered when total moving size exceeds
// HotstoreMaxSpaceTarget - HotstoreMaxSpaceThreshold
HotstoreMaxSpaceThreshold uint64
// Safety buffer to prevent moving GC from overflowing disk.
// Moving GC will not occur when total moving size exceeds
// HotstoreMaxSpaceTarget - HotstoreMaxSpaceSafetyBuffer
HotstoreMaxSpaceSafetyBuffer uint64
}
// ChainAccessor allows the Splitstore to access the chain. It will most likely
@ -165,6 +182,7 @@ type SplitStore struct {
compactionIndex int64
pruneIndex int64
onlineGCCnt int64
ctx context.Context
cancel func()
@ -195,6 +213,17 @@ type SplitStore struct {
// registered protectors
protectors []func(func(cid.Cid) error) error
// dag sizes measured during latest compaction
// logged and used for GC strategy
// protected by compaction lock
szWalk int64
szProtectedTxns int64
szKeys int64 // approximate, not counting keys protected when entering critical section
// protected by txnLk
szMarkedLiveRefs int64
}
var _ bstore.Blockstore = (*SplitStore)(nil)

View File

@ -95,7 +95,7 @@ func (s *SplitStore) doCheck(curTs *types.TipSet) error {
}
defer visitor.Close() //nolint
err = s.walkChain(curTs, boundaryEpoch, boundaryEpoch, visitor,
size := s.walkChain(curTs, boundaryEpoch, boundaryEpoch, visitor,
func(c cid.Cid) error {
if isUnitaryObject(c) {
return errStopWalk
@ -133,7 +133,7 @@ func (s *SplitStore) doCheck(curTs *types.TipSet) error {
return err
}
log.Infow("check done", "cold", *coldCnt, "missing", *missingCnt)
log.Infow("check done", "cold", *coldCnt, "missing", *missingCnt, "walk size", size)
write("--")
write("cold: %d missing: %d", *coldCnt, *missingCnt)
write("DONE")

View File

@ -66,7 +66,8 @@ var (
)
const (
batchSize = 16384
batchSize = 16384
cidKeySize = 128
)
func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error {
@ -199,9 +200,11 @@ func (s *SplitStore) markLiveRefs(cids []cid.Cid) {
log.Debugf("marking %d live refs", len(cids))
startMark := time.Now()
szMarked := new(int64)
count := new(int32)
visitor := newConcurrentVisitor()
walkObject := func(c cid.Cid) error {
walkObject := func(c cid.Cid) (int64, error) {
return s.walkObjectIncomplete(c, visitor,
func(c cid.Cid) error {
if isUnitaryObject(c) {
@ -228,10 +231,12 @@ func (s *SplitStore) markLiveRefs(cids []cid.Cid) {
// optimize the common case of single put
if len(cids) == 1 {
if err := walkObject(cids[0]); err != nil {
sz, err := walkObject(cids[0])
if err != nil {
log.Errorf("error marking tipset refs: %s", err)
}
log.Debugw("marking live refs done", "took", time.Since(startMark), "marked", *count)
atomic.AddInt64(szMarked, sz)
return
}
@ -243,9 +248,11 @@ func (s *SplitStore) markLiveRefs(cids []cid.Cid) {
worker := func() error {
for c := range workch {
if err := walkObject(c); err != nil {
sz, err := walkObject(c)
if err != nil {
return err
}
atomic.AddInt64(szMarked, sz)
}
return nil
@ -268,7 +275,8 @@ func (s *SplitStore) markLiveRefs(cids []cid.Cid) {
log.Errorf("error marking tipset refs: %s", err)
}
log.Debugw("marking live refs done", "took", time.Since(startMark), "marked", *count)
log.Debugw("marking live refs done", "took", time.Since(startMark), "marked", *count, "size marked", *szMarked)
s.szMarkedLiveRefs += atomic.LoadInt64(szMarked)
}
// transactionally protect a view
@ -361,6 +369,7 @@ func (s *SplitStore) protectTxnRefs(markSet MarkSet) error {
log.Infow("protecting transactional references", "refs", len(txnRefs))
count := 0
sz := new(int64)
workch := make(chan cid.Cid, len(txnRefs))
startProtect := time.Now()
@ -393,10 +402,11 @@ func (s *SplitStore) protectTxnRefs(markSet MarkSet) error {
worker := func() error {
for c := range workch {
err := s.doTxnProtect(c, markSet)
szTxn, err := s.doTxnProtect(c, markSet)
if err != nil {
return xerrors.Errorf("error protecting transactional references to %s: %w", c, err)
}
atomic.AddInt64(sz, szTxn)
}
return nil
}
@ -409,16 +419,16 @@ func (s *SplitStore) protectTxnRefs(markSet MarkSet) error {
if err := g.Wait(); err != nil {
return err
}
log.Infow("protecting transactional refs done", "took", time.Since(startProtect), "protected", count)
s.szProtectedTxns += atomic.LoadInt64(sz)
log.Infow("protecting transactional refs done", "took", time.Since(startProtect), "protected", count, "protected size", sz)
}
}
// transactionally protect a reference by walking the object and marking.
// concurrent markings are short circuited by checking the markset.
func (s *SplitStore) doTxnProtect(root cid.Cid, markSet MarkSet) error {
func (s *SplitStore) doTxnProtect(root cid.Cid, markSet MarkSet) (int64, error) {
if err := s.checkClosing(); err != nil {
return err
return 0, err
}
// Note: cold objects are deleted heaviest first, so the consituents of an object
@ -509,6 +519,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error {
// might be potentially inconsistent; abort compaction and notify the user to intervene.
return xerrors.Errorf("checkpoint exists; aborting compaction")
}
s.clearSizeMeasurements()
currentEpoch := curTs.Height()
boundaryEpoch := currentEpoch - CompactionBoundary
@ -598,7 +609,6 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error {
}
err = s.walkChain(curTs, boundaryEpoch, inclMsgsEpoch, &noopVisitor{}, fHot, fCold)
if err != nil {
return xerrors.Errorf("error marking: %w", err)
}
@ -638,7 +648,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error {
defer purgew.Close() //nolint:errcheck
// some stats for logging
var hotCnt, coldCnt, purgeCnt int
var hotCnt, coldCnt, purgeCnt int64
err = s.hot.ForEachKey(func(c cid.Cid) error {
// was it marked?
mark, err := markSet.Has(c)
@ -690,8 +700,9 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error {
log.Infow("cold collection done", "took", time.Since(startCollect))
log.Infow("compaction stats", "hot", hotCnt, "cold", coldCnt, "purge", purgeCnt)
stats.Record(s.ctx, metrics.SplitstoreCompactionHot.M(int64(hotCnt)))
stats.Record(s.ctx, metrics.SplitstoreCompactionCold.M(int64(coldCnt)))
s.szKeys = hotCnt * cidKeySize
stats.Record(s.ctx, metrics.SplitstoreCompactionHot.M(hotCnt))
stats.Record(s.ctx, metrics.SplitstoreCompactionCold.M(coldCnt))
if err := s.checkClosing(); err != nil {
return err
@ -773,8 +784,8 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error {
return xerrors.Errorf("error purging cold objects: %w", err)
}
log.Infow("purging cold objects from hotstore done", "took", time.Since(startPurge))
s.endCriticalSection()
log.Infow("critical section done", "total protected size", s.szProtectedTxns, "total marked live size", s.szMarkedLiveRefs)
if err := checkpoint.Close(); err != nil {
log.Warnf("error closing checkpoint: %s", err)
@ -907,6 +918,7 @@ func (s *SplitStore) walkChain(ts *types.TipSet, inclState, inclMsgs abi.ChainEp
copy(toWalk, ts.Cids())
walkCnt := new(int64)
scanCnt := new(int64)
szWalk := new(int64)
tsRef := func(blkCids []cid.Cid) (cid.Cid, error) {
return types.NewTipSetKey(blkCids...).Cid()
@ -942,48 +954,64 @@ func (s *SplitStore) walkChain(ts *types.TipSet, inclState, inclMsgs abi.ChainEp
if err != nil {
return xerrors.Errorf("error computing cid reference to parent tipset")
}
if err := s.walkObjectIncomplete(pRef, visitor, fHot, stopWalk); err != nil {
sz, err := s.walkObjectIncomplete(pRef, visitor, fHot, stopWalk)
if err != nil {
return xerrors.Errorf("error walking parent tipset cid reference")
}
atomic.AddInt64(szWalk, sz)
// message are retained if within the inclMsgs boundary
if hdr.Height >= inclMsgs && hdr.Height > 0 {
if inclMsgs < inclState {
// we need to use walkObjectIncomplete here, as messages/receipts may be missing early on if we
// synced from snapshot and have a long HotStoreMessageRetentionPolicy.
if err := s.walkObjectIncomplete(hdr.Messages, visitor, fHot, stopWalk); err != nil {
sz, err := s.walkObjectIncomplete(hdr.Messages, visitor, fHot, stopWalk)
if err != nil {
return xerrors.Errorf("error walking messages (cid: %s): %w", hdr.Messages, err)
}
atomic.AddInt64(szWalk, sz)
if err := s.walkObjectIncomplete(hdr.ParentMessageReceipts, visitor, fHot, stopWalk); err != nil {
sz, err = s.walkObjectIncomplete(hdr.ParentMessageReceipts, visitor, fHot, stopWalk)
if err != nil {
return xerrors.Errorf("error walking messages receipts (cid: %s): %w", hdr.ParentMessageReceipts, err)
}
atomic.AddInt64(szWalk, sz)
} else {
if err := s.walkObject(hdr.Messages, visitor, fHot); err != nil {
sz, err = s.walkObject(hdr.Messages, visitor, fHot)
if err != nil {
return xerrors.Errorf("error walking messages (cid: %s): %w", hdr.Messages, err)
}
atomic.AddInt64(szWalk, sz)
if err := s.walkObject(hdr.ParentMessageReceipts, visitor, fHot); err != nil {
sz, err := s.walkObject(hdr.ParentMessageReceipts, visitor, fHot)
if err != nil {
return xerrors.Errorf("error walking message receipts (cid: %s): %w", hdr.ParentMessageReceipts, err)
}
atomic.AddInt64(szWalk, sz)
}
}
// messages and receipts outside of inclMsgs are included in the cold store
if hdr.Height < inclMsgs && hdr.Height > 0 {
if err := s.walkObjectIncomplete(hdr.Messages, visitor, fCold, stopWalk); err != nil {
sz, err := s.walkObjectIncomplete(hdr.Messages, visitor, fCold, stopWalk)
if err != nil {
return xerrors.Errorf("error walking messages (cid: %s): %w", hdr.Messages, err)
}
if err := s.walkObjectIncomplete(hdr.ParentMessageReceipts, visitor, fCold, stopWalk); err != nil {
atomic.AddInt64(szWalk, sz)
sz, err = s.walkObjectIncomplete(hdr.ParentMessageReceipts, visitor, fCold, stopWalk)
if err != nil {
return xerrors.Errorf("error walking messages receipts (cid: %s): %w", hdr.ParentMessageReceipts, err)
}
atomic.AddInt64(szWalk, sz)
}
// state is only retained if within the inclState boundary, with the exception of genesis
if hdr.Height >= inclState || hdr.Height == 0 {
if err := s.walkObject(hdr.ParentStateRoot, visitor, fHot); err != nil {
sz, err := s.walkObject(hdr.ParentStateRoot, visitor, fHot)
if err != nil {
return xerrors.Errorf("error walking state root (cid: %s): %w", hdr.ParentStateRoot, err)
}
atomic.AddInt64(szWalk, sz)
atomic.AddInt64(scanCnt, 1)
}
@ -1001,9 +1029,11 @@ func (s *SplitStore) walkChain(ts *types.TipSet, inclState, inclMsgs abi.ChainEp
if err != nil {
return xerrors.Errorf("error computing cid reference to parent tipset")
}
if err := s.walkObjectIncomplete(hRef, visitor, fHot, stopWalk); err != nil {
sz, err := s.walkObjectIncomplete(hRef, visitor, fHot, stopWalk)
if err != nil {
return xerrors.Errorf("error walking parent tipset cid reference")
}
atomic.AddInt64(szWalk, sz)
for len(toWalk) > 0 {
// walking can take a while, so check this with every opportunity
@ -1047,123 +1077,129 @@ func (s *SplitStore) walkChain(ts *types.TipSet, inclState, inclMsgs abi.ChainEp
}
}
log.Infow("chain walk done", "walked", *walkCnt, "scanned", *scanCnt)
log.Infow("chain walk done", "walked", *walkCnt, "scanned", *scanCnt, "walk size", szWalk)
s.szWalk = atomic.LoadInt64(szWalk)
return nil
}
func (s *SplitStore) walkObject(c cid.Cid, visitor ObjectVisitor, f func(cid.Cid) error) error {
func (s *SplitStore) walkObject(c cid.Cid, visitor ObjectVisitor, f func(cid.Cid) error) (int64, error) {
var sz int64
visit, err := visitor.Visit(c)
if err != nil {
return xerrors.Errorf("error visiting object: %w", err)
return 0, xerrors.Errorf("error visiting object: %w", err)
}
if !visit {
return nil
return sz, nil
}
if err := f(c); err != nil {
if err == errStopWalk {
return nil
return sz, nil
}
return err
return 0, err
}
if c.Prefix().Codec != cid.DagCBOR {
return nil
return sz, nil
}
// check this before recursing
if err := s.checkClosing(); err != nil {
return err
return 0, err
}
var links []cid.Cid
err = s.view(c, func(data []byte) error {
sz += int64(len(data))
return cbg.ScanForLinks(bytes.NewReader(data), func(c cid.Cid) {
links = append(links, c)
})
})
if err != nil {
return xerrors.Errorf("error scanning linked block (cid: %s): %w", c, err)
return 0, xerrors.Errorf("error scanning linked block (cid: %s): %w", c, err)
}
for _, c := range links {
err := s.walkObject(c, visitor, f)
szLink, err := s.walkObject(c, visitor, f)
if err != nil {
return xerrors.Errorf("error walking link (cid: %s): %w", c, err)
return 0, xerrors.Errorf("error walking link (cid: %s): %w", c, err)
}
sz += szLink
}
return nil
return sz, nil
}
// like walkObject, but the object may be potentially incomplete (references missing)
func (s *SplitStore) walkObjectIncomplete(c cid.Cid, visitor ObjectVisitor, f, missing func(cid.Cid) error) error {
func (s *SplitStore) walkObjectIncomplete(c cid.Cid, visitor ObjectVisitor, f, missing func(cid.Cid) error) (int64, error) {
var sz int64
visit, err := visitor.Visit(c)
if err != nil {
return xerrors.Errorf("error visiting object: %w", err)
return 0, xerrors.Errorf("error visiting object: %w", err)
}
if !visit {
return nil
return sz, nil
}
// occurs check -- only for DAGs
if c.Prefix().Codec == cid.DagCBOR {
has, err := s.has(c)
if err != nil {
return xerrors.Errorf("error occur checking %s: %w", c, err)
return 0, xerrors.Errorf("error occur checking %s: %w", c, err)
}
if !has {
err = missing(c)
if err == errStopWalk {
return nil
return sz, nil
}
return err
return 0, err
}
}
if err := f(c); err != nil {
if err == errStopWalk {
return nil
return sz, nil
}
return err
return 0, err
}
if c.Prefix().Codec != cid.DagCBOR {
return nil
return sz, nil
}
// check this before recursing
if err := s.checkClosing(); err != nil {
return err
return sz, err
}
var links []cid.Cid
err = s.view(c, func(data []byte) error {
sz += int64(len(data))
return cbg.ScanForLinks(bytes.NewReader(data), func(c cid.Cid) {
links = append(links, c)
})
})
if err != nil {
return xerrors.Errorf("error scanning linked block (cid: %s): %w", c, err)
return 0, xerrors.Errorf("error scanning linked block (cid: %s): %w", c, err)
}
for _, c := range links {
err := s.walkObjectIncomplete(c, visitor, f, missing)
szLink, err := s.walkObjectIncomplete(c, visitor, f, missing)
if err != nil {
return xerrors.Errorf("error walking link (cid: %s): %w", c, err)
return 0, xerrors.Errorf("error walking link (cid: %s): %w", c, err)
}
sz += szLink
}
return nil
return sz, nil
}
// internal version used during compaction and related operations
@ -1429,8 +1465,9 @@ func (s *SplitStore) completeCompaction() error {
}
s.compactType = none
// Note: at this point we can start the splitstore; a compaction should run on
// the first head change, which will trigger gc on the hotstore.
// Note: at this point we can start the splitstore; base epoch is not
// incremented here so a compaction should run on the first head
// change, which will trigger gc on the hotstore.
// We don't mind the second (back-to-back) compaction as the head will
// have advanced during marking and coldset accumulation.
return nil
@ -1488,6 +1525,13 @@ func (s *SplitStore) completePurge(coldr *ColdSetReader, checkpoint *Checkpoint,
return nil
}
func (s *SplitStore) clearSizeMeasurements() {
s.szKeys = 0
s.szMarkedLiveRefs = 0
s.szProtectedTxns = 0
s.szWalk = 0
}
// I really don't like having this code, but we seem to have some occasional DAG references with
// missing constituents. During testing in mainnet *some* of these references *sometimes* appeared
// after a little bit.
@ -1528,7 +1572,7 @@ func (s *SplitStore) waitForMissingRefs(markSet MarkSet) {
missing = make(map[cid.Cid]struct{})
for c := range towalk {
err := s.walkObjectIncomplete(c, visitor,
_, err := s.walkObjectIncomplete(c, visitor,
func(c cid.Cid) error {
if isUnitaryObject(c) {
return errStopWalk

View File

@ -7,15 +7,61 @@ import (
bstore "github.com/filecoin-project/lotus/blockstore"
)
const (
// Fraction of garbage in badger vlog for online GC traversal to collect garbage
AggressiveOnlineGCThreshold = 0.0001
)
func (s *SplitStore) gcHotAfterCompaction() {
// Measure hotstore size, determine if we should do full GC, determine if we can do full GC.
// We should do full GC if
// FullGCFrequency is specified and compaction index matches frequency
// OR HotstoreMaxSpaceTarget is specified and total moving space is within 150 GB of target
// We can do full if
// HotstoreMaxSpaceTarget is not specified
// OR total moving space would not exceed 50 GB below target
//
// a) If we should not do full GC => online GC
// b) If we should do full GC and can => moving GC
// c) If we should do full GC and can't => aggressive online GC
getSize := func() int64 {
sizer, ok := s.hot.(bstore.BlockstoreSize)
if ok {
size, err := sizer.Size()
if err != nil {
log.Warnf("error getting hotstore size: %s, estimating empty hot store for targeting", err)
return 0
}
return size
}
log.Errorf("Could not measure hotstore size, assuming it is 0 bytes, which it is not")
return 0
}
hotSize := getSize()
copySizeApprox := s.szKeys + s.szMarkedLiveRefs + s.szProtectedTxns + s.szWalk
shouldTarget := s.cfg.HotstoreMaxSpaceTarget > 0 && hotSize+copySizeApprox > int64(s.cfg.HotstoreMaxSpaceTarget)-int64(s.cfg.HotstoreMaxSpaceThreshold)
shouldFreq := s.cfg.HotStoreFullGCFrequency > 0 && s.compactionIndex%int64(s.cfg.HotStoreFullGCFrequency) == 0
shouldDoFull := shouldTarget || shouldFreq
canDoFull := s.cfg.HotstoreMaxSpaceTarget == 0 || hotSize+copySizeApprox < int64(s.cfg.HotstoreMaxSpaceTarget)-int64(s.cfg.HotstoreMaxSpaceSafetyBuffer)
log.Debugw("approximating new hot store size", "key size", s.szKeys, "marked live refs", s.szMarkedLiveRefs, "protected txns", s.szProtectedTxns, "walked DAG", s.szWalk)
log.Infof("measured hot store size: %d, approximate new size: %d, should do full %t, can do full %t", hotSize, copySizeApprox, shouldDoFull, canDoFull)
var opts []bstore.BlockstoreGCOption
if s.cfg.HotStoreFullGCFrequency > 0 && s.compactionIndex%int64(s.cfg.HotStoreFullGCFrequency) == 0 {
if shouldDoFull && canDoFull {
opts = append(opts, bstore.WithFullGC(true))
} else if shouldDoFull && !canDoFull {
log.Warnf("Attention! Estimated moving GC size %d is not within safety buffer %d of target max %d, performing aggressive online GC to attempt to bring hotstore size down safely", copySizeApprox, s.cfg.HotstoreMaxSpaceSafetyBuffer, s.cfg.HotstoreMaxSpaceTarget)
log.Warn("If problem continues you can 1) temporarily allocate more disk space to hotstore and 2) reflect in HotstoreMaxSpaceTarget OR trigger manual move with `lotus chain prune hot-moving`")
log.Warn("If problem continues and you do not have any more disk space you can run continue to manually trigger online GC at aggressive thresholds (< 0.01) with `lotus chain prune hot`")
opts = append(opts, bstore.WithThreshold(AggressiveOnlineGCThreshold))
}
if err := s.gcBlockstore(s.hot, opts); err != nil {
log.Warnf("error garbage collecting hostore: %s", err)
}
log.Infof("measured hot store size after GC: %d", getSize())
}
func (s *SplitStore) gcBlockstore(b bstore.Blockstore, opts []bstore.BlockstoreGCOption) error {

View File

@ -101,7 +101,7 @@ func (s *SplitStore) doReify(c cid.Cid) {
defer s.txnLk.RUnlock()
count := 0
err := s.walkObjectIncomplete(c, newTmpVisitor(),
_, err := s.walkObjectIncomplete(c, newTmpVisitor(),
func(c cid.Cid) error {
if isUnitaryObject(c) {
return errStopWalk

View File

@ -230,6 +230,35 @@
# env var: LOTUS_CHAINSTORE_SPLITSTORE_HOTSTOREFULLGCFREQUENCY
#HotStoreFullGCFrequency = 20
# HotStoreMaxSpaceTarget sets a target max disk size for the hotstore. Splitstore GC
# will run moving GC if disk utilization gets within a threshold (150 GB) of the target.
# Splitstore GC will NOT run moving GC if the total size of the move would get
# within 50 GB of the target, and instead will run a more aggressive online GC.
# If both HotStoreFullGCFrequency and HotStoreMaxSpaceTarget are set then splitstore
# GC will trigger moving GC if either configuration condition is met.
# A reasonable minimum is 2x fully GCed hotstore size + 50 G buffer.
# At this minimum size moving GC happens every time, any smaller and moving GC won't
# be able to run. In spring 2023 this minimum is ~550 GB.
#
# type: uint64
# env var: LOTUS_CHAINSTORE_SPLITSTORE_HOTSTOREMAXSPACETARGET
#HotStoreMaxSpaceTarget = 0
# When HotStoreMaxSpaceTarget is set Moving GC will be triggered when total moving size
# exceeds HotstoreMaxSpaceTarget - HotstoreMaxSpaceThreshold
#
# type: uint64
# env var: LOTUS_CHAINSTORE_SPLITSTORE_HOTSTOREMAXSPACETHRESHOLD
#HotStoreMaxSpaceThreshold = 150000000000
# Safety buffer to prevent moving GC from overflowing disk when HotStoreMaxSpaceTarget
# is set. Moving GC will not occur when total moving size exceeds
# HotstoreMaxSpaceTarget - HotstoreMaxSpaceSafetyBuffer
#
# type: uint64
# env var: LOTUS_CHAINSTORE_SPLITSTORE_HOTSTOREMAXSPACESAFETYBUFFER
#HotstoreMaxSpaceSafetyBuffer = 50000000000
[Cluster]
# EXPERIMENTAL. config to enabled node cluster with raft consensus

View File

@ -95,7 +95,9 @@ func DefaultFullNode() *FullNode {
HotStoreType: "badger",
MarkSetType: "badger",
HotStoreFullGCFrequency: 20,
HotStoreFullGCFrequency: 20,
HotStoreMaxSpaceThreshold: 150_000_000_000,
HotstoreMaxSpaceSafetyBuffer: 50_000_000_000,
},
},
Cluster: *DefaultUserRaftConfig(),

View File

@ -1286,6 +1286,35 @@ the compaction boundary; default is 0.`,
A value of 0 disables, while a value 1 will do full GC in every compaction.
Default is 20 (about once a week).`,
},
{
Name: "HotStoreMaxSpaceTarget",
Type: "uint64",
Comment: `HotStoreMaxSpaceTarget sets a target max disk size for the hotstore. Splitstore GC
will run moving GC if disk utilization gets within a threshold (150 GB) of the target.
Splitstore GC will NOT run moving GC if the total size of the move would get
within 50 GB of the target, and instead will run a more aggressive online GC.
If both HotStoreFullGCFrequency and HotStoreMaxSpaceTarget are set then splitstore
GC will trigger moving GC if either configuration condition is met.
A reasonable minimum is 2x fully GCed hotstore size + 50 G buffer.
At this minimum size moving GC happens every time, any smaller and moving GC won't
be able to run. In spring 2023 this minimum is ~550 GB.`,
},
{
Name: "HotStoreMaxSpaceThreshold",
Type: "uint64",
Comment: `When HotStoreMaxSpaceTarget is set Moving GC will be triggered when total moving size
exceeds HotstoreMaxSpaceTarget - HotstoreMaxSpaceThreshold`,
},
{
Name: "HotstoreMaxSpaceSafetyBuffer",
Type: "uint64",
Comment: `Safety buffer to prevent moving GC from overflowing disk when HotStoreMaxSpaceTarget
is set. Moving GC will not occur when total moving size exceeds
HotstoreMaxSpaceTarget - HotstoreMaxSpaceSafetyBuffer`,
},
},
"StorageMiner": []DocField{
{

View File

@ -601,6 +601,25 @@ type Splitstore struct {
// A value of 0 disables, while a value 1 will do full GC in every compaction.
// Default is 20 (about once a week).
HotStoreFullGCFrequency uint64
// HotStoreMaxSpaceTarget sets a target max disk size for the hotstore. Splitstore GC
// will run moving GC if disk utilization gets within a threshold (150 GB) of the target.
// Splitstore GC will NOT run moving GC if the total size of the move would get
// within 50 GB of the target, and instead will run a more aggressive online GC.
// If both HotStoreFullGCFrequency and HotStoreMaxSpaceTarget are set then splitstore
// GC will trigger moving GC if either configuration condition is met.
// A reasonable minimum is 2x fully GCed hotstore size + 50 G buffer.
// At this minimum size moving GC happens every time, any smaller and moving GC won't
// be able to run. In spring 2023 this minimum is ~550 GB.
HotStoreMaxSpaceTarget uint64
// When HotStoreMaxSpaceTarget is set Moving GC will be triggered when total moving size
// exceeds HotstoreMaxSpaceTarget - HotstoreMaxSpaceThreshold
HotStoreMaxSpaceThreshold uint64
// Safety buffer to prevent moving GC from overflowing disk when HotStoreMaxSpaceTarget
// is set. Moving GC will not occur when total moving size exceeds
// HotstoreMaxSpaceTarget - HotstoreMaxSpaceSafetyBuffer
HotstoreMaxSpaceSafetyBuffer uint64
}
// // Full Node

View File

@ -82,11 +82,14 @@ func SplitBlockstore(cfg *config.Chainstore) func(lc fx.Lifecycle, r repo.Locked
}
cfg := &splitstore.Config{
MarkSetType: cfg.Splitstore.MarkSetType,
DiscardColdBlocks: cfg.Splitstore.ColdStoreType == "discard",
UniversalColdBlocks: cfg.Splitstore.ColdStoreType == "universal",
HotStoreMessageRetention: cfg.Splitstore.HotStoreMessageRetention,
HotStoreFullGCFrequency: cfg.Splitstore.HotStoreFullGCFrequency,
MarkSetType: cfg.Splitstore.MarkSetType,
DiscardColdBlocks: cfg.Splitstore.ColdStoreType == "discard",
UniversalColdBlocks: cfg.Splitstore.ColdStoreType == "universal",
HotStoreMessageRetention: cfg.Splitstore.HotStoreMessageRetention,
HotStoreFullGCFrequency: cfg.Splitstore.HotStoreFullGCFrequency,
HotstoreMaxSpaceTarget: cfg.Splitstore.HotStoreMaxSpaceTarget,
HotstoreMaxSpaceThreshold: cfg.Splitstore.HotStoreMaxSpaceThreshold,
HotstoreMaxSpaceSafetyBuffer: cfg.Splitstore.HotstoreMaxSpaceSafetyBuffer,
}
ss, err := splitstore.Open(path, ds, hot, cold, cfg)
if err != nil {