feat:chain:splitstore auto prune (#9123)
Auto Prune Co-authored-by: zenground0 <ZenGround0@users.noreply.github.com>
This commit is contained in:
parent
0c91b0dc10
commit
8b7be6d47e
@ -46,6 +46,9 @@ var (
|
||||
// stores the prune index (serial number)
|
||||
pruneIndexKey = dstore.NewKey("/splitstore/pruneIndex")
|
||||
|
||||
// stores the base epoch of last prune in the metadata store
|
||||
pruneEpochKey = dstore.NewKey("/splitstore/pruneEpoch")
|
||||
|
||||
log = logging.Logger("splitstore")
|
||||
|
||||
errClosing = errors.New("splitstore is closing")
|
||||
@ -108,6 +111,21 @@ type Config struct {
|
||||
// A positive value is the number of compactions before a full GC is performed;
|
||||
// a value of 1 will perform full GC in every compaction.
|
||||
HotStoreFullGCFrequency uint64
|
||||
|
||||
// EnableColdStoreAutoPrune turns on compaction of the cold store i.e. pruning
|
||||
// where hotstore compaction occurs every finality epochs pruning happens every 3 finalities
|
||||
// Default is false
|
||||
EnableColdStoreAutoPrune bool
|
||||
|
||||
// ColdStoreFullGCFrequency specifies how often to performa a full (moving) GC on the coldstore.
|
||||
// Only applies if auto prune is enabled. A value of 0 disables while a value of 1 will do
|
||||
// full GC in every prune.
|
||||
// Default is 7 (about once every a week)
|
||||
ColdStoreFullGCFrequency uint64
|
||||
|
||||
// ColdStoreRetention specifies the retention policy for data reachable from the chain, in
|
||||
// finalities beyond the compaction boundary, default is 0, -1 retains everything
|
||||
ColdStoreRetention int64
|
||||
}
|
||||
|
||||
// ChainAccessor allows the Splitstore to access the chain. It will most likely
|
||||
@ -142,6 +160,7 @@ type SplitStore struct {
|
||||
mx sync.Mutex
|
||||
warmupEpoch abi.ChainEpoch // protected by mx
|
||||
baseEpoch abi.ChainEpoch // protected by compaction lock
|
||||
pruneEpoch abi.ChainEpoch // protected by compaction lock
|
||||
|
||||
headChangeMx sync.Mutex
|
||||
|
||||
@ -676,6 +695,23 @@ func (s *SplitStore) Start(chain ChainAccessor, us stmgr.UpgradeSchedule) error
|
||||
return xerrors.Errorf("error loading base epoch: %w", err)
|
||||
}
|
||||
|
||||
// load prune epoch from metadata ds
|
||||
bs, err = s.ds.Get(s.ctx, pruneEpochKey)
|
||||
switch err {
|
||||
case nil:
|
||||
s.pruneEpoch = bytesToEpoch(bs)
|
||||
case dstore.ErrNotFound:
|
||||
if curTs == nil {
|
||||
//this can happen in some tests
|
||||
break
|
||||
}
|
||||
if err := s.setPruneEpoch(curTs.Height()); err != nil {
|
||||
return xerrors.Errorf("error saving prune epoch: %w", err)
|
||||
}
|
||||
default:
|
||||
return xerrors.Errorf("error loading prune epoch: %w", err)
|
||||
}
|
||||
|
||||
// load warmup epoch from metadata ds
|
||||
bs, err = s.ds.Get(s.ctx, warmupEpochKey)
|
||||
switch err {
|
||||
@ -775,3 +811,8 @@ func (s *SplitStore) setBaseEpoch(epoch abi.ChainEpoch) error {
|
||||
s.baseEpoch = epoch
|
||||
return s.ds.Put(s.ctx, baseEpochKey, epochToBytes(epoch))
|
||||
}
|
||||
|
||||
func (s *SplitStore) setPruneEpoch(epoch abi.ChainEpoch) error {
|
||||
s.pruneEpoch = epoch
|
||||
return s.ds.Put(s.ctx, pruneEpochKey, epochToBytes(epoch))
|
||||
}
|
||||
|
@ -20,6 +20,7 @@ import (
|
||||
|
||||
"github.com/filecoin-project/go-state-types/abi"
|
||||
|
||||
bstore "github.com/filecoin-project/lotus/blockstore"
|
||||
"github.com/filecoin-project/lotus/build"
|
||||
"github.com/filecoin-project/lotus/chain/types"
|
||||
"github.com/filecoin-project/lotus/metrics"
|
||||
@ -115,6 +116,8 @@ func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Prioritize hot store compaction over cold store prune
|
||||
|
||||
if epoch-s.baseEpoch > CompactionThreshold {
|
||||
// it's time to compact -- prepare the transaction and go!
|
||||
s.beginTxnProtect()
|
||||
@ -130,6 +133,40 @@ func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error {
|
||||
|
||||
log.Infow("compaction done", "took", time.Since(start))
|
||||
}()
|
||||
// only prune if auto prune is enabled and after at least one compaction
|
||||
} else if s.cfg.EnableColdStoreAutoPrune && epoch-s.pruneEpoch > PruneThreshold && s.compactionIndex > 0 {
|
||||
s.beginTxnProtect()
|
||||
s.compactType = cold
|
||||
go func() {
|
||||
defer atomic.StoreInt32(&s.compacting, 0)
|
||||
defer s.endTxnProtect()
|
||||
|
||||
log.Info("pruning splitstore")
|
||||
start := time.Now()
|
||||
|
||||
var retainP func(int64) bool
|
||||
switch {
|
||||
case s.cfg.ColdStoreRetention > int64(0):
|
||||
retainP = func(depth int64) bool {
|
||||
return depth <= int64(CompactionBoundary)+s.cfg.ColdStoreRetention*int64(build.Finality)
|
||||
}
|
||||
case s.cfg.ColdStoreRetention < 0:
|
||||
retainP = func(_ int64) bool { return true }
|
||||
default:
|
||||
retainP = func(depth int64) bool {
|
||||
return depth <= int64(CompactionBoundary)
|
||||
}
|
||||
}
|
||||
movingGC := s.cfg.ColdStoreFullGCFrequency > 0 && s.pruneIndex%int64(s.cfg.ColdStoreFullGCFrequency) == 0
|
||||
var gcOpts []bstore.BlockstoreGCOption
|
||||
if movingGC {
|
||||
gcOpts = append(gcOpts, bstore.WithFullGC(true))
|
||||
}
|
||||
doGC := func() error { return s.gcBlockstore(s.cold, gcOpts) }
|
||||
|
||||
s.prune(curTs, retainP, doGC)
|
||||
log.Infow("prune done", "took", time.Since(start))
|
||||
}()
|
||||
} else {
|
||||
// no compaction necessary
|
||||
atomic.StoreInt32(&s.compacting, 0)
|
||||
|
@ -41,6 +41,10 @@ var (
|
||||
// - if it is a positive integer, then it's the number of finalities past the compaction boundary
|
||||
// for which chain-reachable state objects are retained.
|
||||
PruneRetainState = "splitstore.PruneRetainState"
|
||||
|
||||
// PruneThreshold is the number of epochs that need to have elapsed
|
||||
// from the previously pruned epoch to trigger a new prune
|
||||
PruneThreshold = 7 * build.Finality
|
||||
)
|
||||
|
||||
// PruneChain instructs the SplitStore to prune chain state in the coldstore, according to the
|
||||
@ -132,7 +136,9 @@ func (s *SplitStore) prune(curTs *types.TipSet, retainStateP func(int64) bool, d
|
||||
|
||||
func (s *SplitStore) doPrune(curTs *types.TipSet, retainStateP func(int64) bool, doGC func() error) error {
|
||||
currentEpoch := curTs.Height()
|
||||
log.Infow("running prune", "currentEpoch", currentEpoch, "baseEpoch", s.baseEpoch)
|
||||
boundaryEpoch := currentEpoch - CompactionBoundary
|
||||
|
||||
log.Infow("running prune", "currentEpoch", currentEpoch, "pruneEpoch", s.pruneEpoch)
|
||||
|
||||
markSet, err := s.markSetEnv.New("live", s.markSetSize)
|
||||
if err != nil {
|
||||
@ -318,6 +324,10 @@ func (s *SplitStore) doPrune(curTs *types.TipSet, retainStateP func(int64) bool,
|
||||
log.Warnf("error garbage collecting cold store: %s", err)
|
||||
}
|
||||
|
||||
if err := s.setPruneEpoch(boundaryEpoch); err != nil {
|
||||
return xerrors.Errorf("error saving prune base epoch: %w", err)
|
||||
}
|
||||
|
||||
s.pruneIndex++
|
||||
err = s.ds.Put(s.ctx, pruneIndexKey, int64ToBytes(s.compactionIndex))
|
||||
if err != nil {
|
||||
|
@ -201,4 +201,28 @@
|
||||
# env var: LOTUS_CHAINSTORE_SPLITSTORE_HOTSTOREFULLGCFREQUENCY
|
||||
#HotStoreFullGCFrequency = 20
|
||||
|
||||
# EnableColdStoreAutoPrune turns on compaction of the cold store i.e. pruning
|
||||
# where hotstore compaction occurs every finality epochs pruning happens every 3 finalities
|
||||
# Default is false
|
||||
#
|
||||
# type: bool
|
||||
# env var: LOTUS_CHAINSTORE_SPLITSTORE_ENABLECOLDSTOREAUTOPRUNE
|
||||
#EnableColdStoreAutoPrune = false
|
||||
|
||||
# ColdStoreFullGCFrequency specifies how often to performa a full (moving) GC on the coldstore.
|
||||
# Only applies if auto prune is enabled. A value of 0 disables while a value of 1 will do
|
||||
# full GC in every prune.
|
||||
# Default is 7 (about once every a week)
|
||||
#
|
||||
# type: uint64
|
||||
# env var: LOTUS_CHAINSTORE_SPLITSTORE_COLDSTOREFULLGCFREQUENCY
|
||||
#ColdStoreFullGCFrequency = 7
|
||||
|
||||
# ColdStoreRetention specifies the retention policy for data reachable from the chain, in
|
||||
# finalities beyond the compaction boundary, default is 0, -1 retains everything
|
||||
#
|
||||
# type: int64
|
||||
# env var: LOTUS_CHAINSTORE_SPLITSTORE_COLDSTORERETENTION
|
||||
#ColdStoreRetention = 0
|
||||
|
||||
|
||||
|
@ -551,9 +551,6 @@ func (n *Ensemble) Start() *Ensemble {
|
||||
|
||||
cfg.Subsystems.SectorIndexApiInfo = fmt.Sprintf("%s:%s", token, m.options.mainMiner.ListenAddr)
|
||||
cfg.Subsystems.SealerApiInfo = fmt.Sprintf("%s:%s", token, m.options.mainMiner.ListenAddr)
|
||||
|
||||
fmt.Println("config for market node, setting SectorIndexApiInfo to: ", cfg.Subsystems.SectorIndexApiInfo)
|
||||
fmt.Println("config for market node, setting SealerApiInfo to: ", cfg.Subsystems.SealerApiInfo)
|
||||
}
|
||||
|
||||
err = lr.SetConfig(func(raw interface{}) {
|
||||
|
@ -277,3 +277,11 @@ func SplitstoreUniversal() NodeOpt {
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
func SplitstoreAutoPrune() NodeOpt {
|
||||
return WithCfgOpt(func(cfg *config.FullNode) error {
|
||||
cfg.Chainstore.Splitstore.EnableColdStoreAutoPrune = true // turn on
|
||||
cfg.Chainstore.Splitstore.ColdStoreFullGCFrequency = 0 // turn off full gc
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
@ -168,6 +168,60 @@ func TestColdStorePrune(t *testing.T) {
|
||||
assert.False(g.t, g.Exists(ctx, garbage), "Garbage should be removed from cold store after prune but it's still there")
|
||||
}
|
||||
|
||||
func TestAutoPrune(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
// disable sync checking because efficient itests require that the node is out of sync : /
|
||||
splitstore.CheckSyncGap = false
|
||||
opts := []interface{}{kit.MockProofs(), kit.SplitstoreUniversal(), kit.SplitstoreAutoPrune(), kit.FsRepo()}
|
||||
full, genesisMiner, ens := kit.EnsembleMinimal(t, opts...)
|
||||
bm := ens.InterconnectAll().BeginMining(4 * time.Millisecond)[0]
|
||||
_ = full
|
||||
_ = genesisMiner
|
||||
|
||||
// create garbage
|
||||
g := NewGarbager(ctx, t, full)
|
||||
garbage, e := g.Drop(ctx)
|
||||
assert.True(g.t, g.Exists(ctx, garbage), "Garbage not found in splitstore")
|
||||
|
||||
// calculate next compaction where we should actually see cleanup
|
||||
|
||||
// pause, check for compacting and get compaction info
|
||||
// we do this to remove the (very unlikely) race where compaction index
|
||||
// and compaction epoch are in the middle of update, or a whole compaction
|
||||
// runs between the two
|
||||
for {
|
||||
bm.Pause()
|
||||
if splitStoreCompacting(ctx, t, full) {
|
||||
bm.Restart()
|
||||
time.Sleep(3 * time.Second)
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
lastCompactionEpoch := splitStoreBaseEpoch(ctx, t, full)
|
||||
garbageCompactionIndex := splitStoreCompactionIndex(ctx, t, full) + 1
|
||||
boundary := lastCompactionEpoch + splitstore.CompactionThreshold - splitstore.CompactionBoundary
|
||||
|
||||
for e > boundary {
|
||||
boundary += splitstore.CompactionThreshold - splitstore.CompactionBoundary
|
||||
garbageCompactionIndex++
|
||||
}
|
||||
bm.Restart()
|
||||
|
||||
// wait for compaction to occur
|
||||
waitForCompaction(ctx, t, garbageCompactionIndex, full)
|
||||
|
||||
bm.Pause()
|
||||
|
||||
// This data should now be moved to the coldstore.
|
||||
// Access it without hotview to keep it there while checking that it still exists
|
||||
// Only state compute uses hot view so garbager Exists backed by ChainReadObj is all good
|
||||
assert.True(g.t, g.Exists(ctx, garbage), "Garbage not found in splitstore")
|
||||
bm.Restart()
|
||||
waitForPrune(ctx, t, 1, full)
|
||||
assert.False(g.t, g.Exists(ctx, garbage), "Garbage should be removed from cold store through auto prune but it's still there")
|
||||
}
|
||||
|
||||
func waitForCompaction(ctx context.Context, t *testing.T, cIdx int64, n *kit.TestFullNode) {
|
||||
for {
|
||||
if splitStoreCompactionIndex(ctx, t, n) >= cIdx {
|
||||
|
@ -95,7 +95,8 @@ func DefaultFullNode() *FullNode {
|
||||
HotStoreType: "badger",
|
||||
MarkSetType: "badger",
|
||||
|
||||
HotStoreFullGCFrequency: 20,
|
||||
HotStoreFullGCFrequency: 20,
|
||||
ColdStoreFullGCFrequency: 7,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
@ -1106,6 +1106,30 @@ the compaction boundary; default is 0.`,
|
||||
A value of 0 disables, while a value 1 will do full GC in every compaction.
|
||||
Default is 20 (about once a week).`,
|
||||
},
|
||||
{
|
||||
Name: "EnableColdStoreAutoPrune",
|
||||
Type: "bool",
|
||||
|
||||
Comment: `EnableColdStoreAutoPrune turns on compaction of the cold store i.e. pruning
|
||||
where hotstore compaction occurs every finality epochs pruning happens every 3 finalities
|
||||
Default is false`,
|
||||
},
|
||||
{
|
||||
Name: "ColdStoreFullGCFrequency",
|
||||
Type: "uint64",
|
||||
|
||||
Comment: `ColdStoreFullGCFrequency specifies how often to performa a full (moving) GC on the coldstore.
|
||||
Only applies if auto prune is enabled. A value of 0 disables while a value of 1 will do
|
||||
full GC in every prune.
|
||||
Default is 7 (about once every a week)`,
|
||||
},
|
||||
{
|
||||
Name: "ColdStoreRetention",
|
||||
Type: "int64",
|
||||
|
||||
Comment: `ColdStoreRetention specifies the retention policy for data reachable from the chain, in
|
||||
finalities beyond the compaction boundary, default is 0, -1 retains everything`,
|
||||
},
|
||||
},
|
||||
"StorageMiner": []DocField{
|
||||
{
|
||||
|
@ -545,6 +545,21 @@ type Splitstore struct {
|
||||
// A value of 0 disables, while a value 1 will do full GC in every compaction.
|
||||
// Default is 20 (about once a week).
|
||||
HotStoreFullGCFrequency uint64
|
||||
|
||||
// EnableColdStoreAutoPrune turns on compaction of the cold store i.e. pruning
|
||||
// where hotstore compaction occurs every finality epochs pruning happens every 3 finalities
|
||||
// Default is false
|
||||
EnableColdStoreAutoPrune bool
|
||||
|
||||
// ColdStoreFullGCFrequency specifies how often to performa a full (moving) GC on the coldstore.
|
||||
// Only applies if auto prune is enabled. A value of 0 disables while a value of 1 will do
|
||||
// full GC in every prune.
|
||||
// Default is 7 (about once every a week)
|
||||
ColdStoreFullGCFrequency uint64
|
||||
|
||||
// ColdStoreRetention specifies the retention policy for data reachable from the chain, in
|
||||
// finalities beyond the compaction boundary, default is 0, -1 retains everything
|
||||
ColdStoreRetention int64
|
||||
}
|
||||
|
||||
// // Full Node
|
||||
|
@ -86,6 +86,9 @@ func SplitBlockstore(cfg *config.Chainstore) func(lc fx.Lifecycle, r repo.Locked
|
||||
DiscardColdBlocks: cfg.Splitstore.ColdStoreType == "discard",
|
||||
HotStoreMessageRetention: cfg.Splitstore.HotStoreMessageRetention,
|
||||
HotStoreFullGCFrequency: cfg.Splitstore.HotStoreFullGCFrequency,
|
||||
EnableColdStoreAutoPrune: cfg.Splitstore.EnableColdStoreAutoPrune,
|
||||
ColdStoreFullGCFrequency: cfg.Splitstore.ColdStoreFullGCFrequency,
|
||||
ColdStoreRetention: cfg.Splitstore.ColdStoreRetention,
|
||||
}
|
||||
ss, err := splitstore.Open(path, ds, hot, cold, cfg)
|
||||
if err != nil {
|
||||
|
Loading…
Reference in New Issue
Block a user