feat:chain:splitstore auto prune (#9123)

Auto Prune

Co-authored-by: zenground0 <ZenGround0@users.noreply.github.com>
This commit is contained in:
ZenGround0 2022-08-08 16:06:32 -04:00 committed by GitHub
parent 0c91b0dc10
commit 8b7be6d47e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 219 additions and 5 deletions

View File

@ -46,6 +46,9 @@ var (
// stores the prune index (serial number)
pruneIndexKey = dstore.NewKey("/splitstore/pruneIndex")
// stores the base epoch of last prune in the metadata store
pruneEpochKey = dstore.NewKey("/splitstore/pruneEpoch")
log = logging.Logger("splitstore")
errClosing = errors.New("splitstore is closing")
@ -108,6 +111,21 @@ type Config struct {
// A positive value is the number of compactions before a full GC is performed;
// a value of 1 will perform full GC in every compaction.
HotStoreFullGCFrequency uint64
// EnableColdStoreAutoPrune turns on compaction of the cold store i.e. pruning
// where hotstore compaction occurs every finality epochs pruning happens every 3 finalities
// Default is false
EnableColdStoreAutoPrune bool
// ColdStoreFullGCFrequency specifies how often to performa a full (moving) GC on the coldstore.
// Only applies if auto prune is enabled. A value of 0 disables while a value of 1 will do
// full GC in every prune.
// Default is 7 (about once every a week)
ColdStoreFullGCFrequency uint64
// ColdStoreRetention specifies the retention policy for data reachable from the chain, in
// finalities beyond the compaction boundary, default is 0, -1 retains everything
ColdStoreRetention int64
}
// ChainAccessor allows the Splitstore to access the chain. It will most likely
@ -142,6 +160,7 @@ type SplitStore struct {
mx sync.Mutex
warmupEpoch abi.ChainEpoch // protected by mx
baseEpoch abi.ChainEpoch // protected by compaction lock
pruneEpoch abi.ChainEpoch // protected by compaction lock
headChangeMx sync.Mutex
@ -676,6 +695,23 @@ func (s *SplitStore) Start(chain ChainAccessor, us stmgr.UpgradeSchedule) error
return xerrors.Errorf("error loading base epoch: %w", err)
}
// load prune epoch from metadata ds
bs, err = s.ds.Get(s.ctx, pruneEpochKey)
switch err {
case nil:
s.pruneEpoch = bytesToEpoch(bs)
case dstore.ErrNotFound:
if curTs == nil {
//this can happen in some tests
break
}
if err := s.setPruneEpoch(curTs.Height()); err != nil {
return xerrors.Errorf("error saving prune epoch: %w", err)
}
default:
return xerrors.Errorf("error loading prune epoch: %w", err)
}
// load warmup epoch from metadata ds
bs, err = s.ds.Get(s.ctx, warmupEpochKey)
switch err {
@ -775,3 +811,8 @@ func (s *SplitStore) setBaseEpoch(epoch abi.ChainEpoch) error {
s.baseEpoch = epoch
return s.ds.Put(s.ctx, baseEpochKey, epochToBytes(epoch))
}
func (s *SplitStore) setPruneEpoch(epoch abi.ChainEpoch) error {
s.pruneEpoch = epoch
return s.ds.Put(s.ctx, pruneEpochKey, epochToBytes(epoch))
}

View File

@ -20,6 +20,7 @@ import (
"github.com/filecoin-project/go-state-types/abi"
bstore "github.com/filecoin-project/lotus/blockstore"
"github.com/filecoin-project/lotus/build"
"github.com/filecoin-project/lotus/chain/types"
"github.com/filecoin-project/lotus/metrics"
@ -115,6 +116,8 @@ func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error {
return nil
}
// Prioritize hot store compaction over cold store prune
if epoch-s.baseEpoch > CompactionThreshold {
// it's time to compact -- prepare the transaction and go!
s.beginTxnProtect()
@ -130,6 +133,40 @@ func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error {
log.Infow("compaction done", "took", time.Since(start))
}()
// only prune if auto prune is enabled and after at least one compaction
} else if s.cfg.EnableColdStoreAutoPrune && epoch-s.pruneEpoch > PruneThreshold && s.compactionIndex > 0 {
s.beginTxnProtect()
s.compactType = cold
go func() {
defer atomic.StoreInt32(&s.compacting, 0)
defer s.endTxnProtect()
log.Info("pruning splitstore")
start := time.Now()
var retainP func(int64) bool
switch {
case s.cfg.ColdStoreRetention > int64(0):
retainP = func(depth int64) bool {
return depth <= int64(CompactionBoundary)+s.cfg.ColdStoreRetention*int64(build.Finality)
}
case s.cfg.ColdStoreRetention < 0:
retainP = func(_ int64) bool { return true }
default:
retainP = func(depth int64) bool {
return depth <= int64(CompactionBoundary)
}
}
movingGC := s.cfg.ColdStoreFullGCFrequency > 0 && s.pruneIndex%int64(s.cfg.ColdStoreFullGCFrequency) == 0
var gcOpts []bstore.BlockstoreGCOption
if movingGC {
gcOpts = append(gcOpts, bstore.WithFullGC(true))
}
doGC := func() error { return s.gcBlockstore(s.cold, gcOpts) }
s.prune(curTs, retainP, doGC)
log.Infow("prune done", "took", time.Since(start))
}()
} else {
// no compaction necessary
atomic.StoreInt32(&s.compacting, 0)

View File

@ -41,6 +41,10 @@ var (
// - if it is a positive integer, then it's the number of finalities past the compaction boundary
// for which chain-reachable state objects are retained.
PruneRetainState = "splitstore.PruneRetainState"
// PruneThreshold is the number of epochs that need to have elapsed
// from the previously pruned epoch to trigger a new prune
PruneThreshold = 7 * build.Finality
)
// PruneChain instructs the SplitStore to prune chain state in the coldstore, according to the
@ -132,7 +136,9 @@ func (s *SplitStore) prune(curTs *types.TipSet, retainStateP func(int64) bool, d
func (s *SplitStore) doPrune(curTs *types.TipSet, retainStateP func(int64) bool, doGC func() error) error {
currentEpoch := curTs.Height()
log.Infow("running prune", "currentEpoch", currentEpoch, "baseEpoch", s.baseEpoch)
boundaryEpoch := currentEpoch - CompactionBoundary
log.Infow("running prune", "currentEpoch", currentEpoch, "pruneEpoch", s.pruneEpoch)
markSet, err := s.markSetEnv.New("live", s.markSetSize)
if err != nil {
@ -318,6 +324,10 @@ func (s *SplitStore) doPrune(curTs *types.TipSet, retainStateP func(int64) bool,
log.Warnf("error garbage collecting cold store: %s", err)
}
if err := s.setPruneEpoch(boundaryEpoch); err != nil {
return xerrors.Errorf("error saving prune base epoch: %w", err)
}
s.pruneIndex++
err = s.ds.Put(s.ctx, pruneIndexKey, int64ToBytes(s.compactionIndex))
if err != nil {

View File

@ -201,4 +201,28 @@
# env var: LOTUS_CHAINSTORE_SPLITSTORE_HOTSTOREFULLGCFREQUENCY
#HotStoreFullGCFrequency = 20
# EnableColdStoreAutoPrune turns on compaction of the cold store i.e. pruning
# where hotstore compaction occurs every finality epochs pruning happens every 3 finalities
# Default is false
#
# type: bool
# env var: LOTUS_CHAINSTORE_SPLITSTORE_ENABLECOLDSTOREAUTOPRUNE
#EnableColdStoreAutoPrune = false
# ColdStoreFullGCFrequency specifies how often to performa a full (moving) GC on the coldstore.
# Only applies if auto prune is enabled. A value of 0 disables while a value of 1 will do
# full GC in every prune.
# Default is 7 (about once every a week)
#
# type: uint64
# env var: LOTUS_CHAINSTORE_SPLITSTORE_COLDSTOREFULLGCFREQUENCY
#ColdStoreFullGCFrequency = 7
# ColdStoreRetention specifies the retention policy for data reachable from the chain, in
# finalities beyond the compaction boundary, default is 0, -1 retains everything
#
# type: int64
# env var: LOTUS_CHAINSTORE_SPLITSTORE_COLDSTORERETENTION
#ColdStoreRetention = 0

View File

@ -551,9 +551,6 @@ func (n *Ensemble) Start() *Ensemble {
cfg.Subsystems.SectorIndexApiInfo = fmt.Sprintf("%s:%s", token, m.options.mainMiner.ListenAddr)
cfg.Subsystems.SealerApiInfo = fmt.Sprintf("%s:%s", token, m.options.mainMiner.ListenAddr)
fmt.Println("config for market node, setting SectorIndexApiInfo to: ", cfg.Subsystems.SectorIndexApiInfo)
fmt.Println("config for market node, setting SealerApiInfo to: ", cfg.Subsystems.SealerApiInfo)
}
err = lr.SetConfig(func(raw interface{}) {

View File

@ -277,3 +277,11 @@ func SplitstoreUniversal() NodeOpt {
return nil
})
}
func SplitstoreAutoPrune() NodeOpt {
return WithCfgOpt(func(cfg *config.FullNode) error {
cfg.Chainstore.Splitstore.EnableColdStoreAutoPrune = true // turn on
cfg.Chainstore.Splitstore.ColdStoreFullGCFrequency = 0 // turn off full gc
return nil
})
}

View File

@ -168,6 +168,60 @@ func TestColdStorePrune(t *testing.T) {
assert.False(g.t, g.Exists(ctx, garbage), "Garbage should be removed from cold store after prune but it's still there")
}
func TestAutoPrune(t *testing.T) {
ctx := context.Background()
// disable sync checking because efficient itests require that the node is out of sync : /
splitstore.CheckSyncGap = false
opts := []interface{}{kit.MockProofs(), kit.SplitstoreUniversal(), kit.SplitstoreAutoPrune(), kit.FsRepo()}
full, genesisMiner, ens := kit.EnsembleMinimal(t, opts...)
bm := ens.InterconnectAll().BeginMining(4 * time.Millisecond)[0]
_ = full
_ = genesisMiner
// create garbage
g := NewGarbager(ctx, t, full)
garbage, e := g.Drop(ctx)
assert.True(g.t, g.Exists(ctx, garbage), "Garbage not found in splitstore")
// calculate next compaction where we should actually see cleanup
// pause, check for compacting and get compaction info
// we do this to remove the (very unlikely) race where compaction index
// and compaction epoch are in the middle of update, or a whole compaction
// runs between the two
for {
bm.Pause()
if splitStoreCompacting(ctx, t, full) {
bm.Restart()
time.Sleep(3 * time.Second)
} else {
break
}
}
lastCompactionEpoch := splitStoreBaseEpoch(ctx, t, full)
garbageCompactionIndex := splitStoreCompactionIndex(ctx, t, full) + 1
boundary := lastCompactionEpoch + splitstore.CompactionThreshold - splitstore.CompactionBoundary
for e > boundary {
boundary += splitstore.CompactionThreshold - splitstore.CompactionBoundary
garbageCompactionIndex++
}
bm.Restart()
// wait for compaction to occur
waitForCompaction(ctx, t, garbageCompactionIndex, full)
bm.Pause()
// This data should now be moved to the coldstore.
// Access it without hotview to keep it there while checking that it still exists
// Only state compute uses hot view so garbager Exists backed by ChainReadObj is all good
assert.True(g.t, g.Exists(ctx, garbage), "Garbage not found in splitstore")
bm.Restart()
waitForPrune(ctx, t, 1, full)
assert.False(g.t, g.Exists(ctx, garbage), "Garbage should be removed from cold store through auto prune but it's still there")
}
func waitForCompaction(ctx context.Context, t *testing.T, cIdx int64, n *kit.TestFullNode) {
for {
if splitStoreCompactionIndex(ctx, t, n) >= cIdx {

View File

@ -96,6 +96,7 @@ func DefaultFullNode() *FullNode {
MarkSetType: "badger",
HotStoreFullGCFrequency: 20,
ColdStoreFullGCFrequency: 7,
},
},
}

View File

@ -1106,6 +1106,30 @@ the compaction boundary; default is 0.`,
A value of 0 disables, while a value 1 will do full GC in every compaction.
Default is 20 (about once a week).`,
},
{
Name: "EnableColdStoreAutoPrune",
Type: "bool",
Comment: `EnableColdStoreAutoPrune turns on compaction of the cold store i.e. pruning
where hotstore compaction occurs every finality epochs pruning happens every 3 finalities
Default is false`,
},
{
Name: "ColdStoreFullGCFrequency",
Type: "uint64",
Comment: `ColdStoreFullGCFrequency specifies how often to performa a full (moving) GC on the coldstore.
Only applies if auto prune is enabled. A value of 0 disables while a value of 1 will do
full GC in every prune.
Default is 7 (about once every a week)`,
},
{
Name: "ColdStoreRetention",
Type: "int64",
Comment: `ColdStoreRetention specifies the retention policy for data reachable from the chain, in
finalities beyond the compaction boundary, default is 0, -1 retains everything`,
},
},
"StorageMiner": []DocField{
{

View File

@ -545,6 +545,21 @@ type Splitstore struct {
// A value of 0 disables, while a value 1 will do full GC in every compaction.
// Default is 20 (about once a week).
HotStoreFullGCFrequency uint64
// EnableColdStoreAutoPrune turns on compaction of the cold store i.e. pruning
// where hotstore compaction occurs every finality epochs pruning happens every 3 finalities
// Default is false
EnableColdStoreAutoPrune bool
// ColdStoreFullGCFrequency specifies how often to performa a full (moving) GC on the coldstore.
// Only applies if auto prune is enabled. A value of 0 disables while a value of 1 will do
// full GC in every prune.
// Default is 7 (about once every a week)
ColdStoreFullGCFrequency uint64
// ColdStoreRetention specifies the retention policy for data reachable from the chain, in
// finalities beyond the compaction boundary, default is 0, -1 retains everything
ColdStoreRetention int64
}
// // Full Node

View File

@ -86,6 +86,9 @@ func SplitBlockstore(cfg *config.Chainstore) func(lc fx.Lifecycle, r repo.Locked
DiscardColdBlocks: cfg.Splitstore.ColdStoreType == "discard",
HotStoreMessageRetention: cfg.Splitstore.HotStoreMessageRetention,
HotStoreFullGCFrequency: cfg.Splitstore.HotStoreFullGCFrequency,
EnableColdStoreAutoPrune: cfg.Splitstore.EnableColdStoreAutoPrune,
ColdStoreFullGCFrequency: cfg.Splitstore.ColdStoreFullGCFrequency,
ColdStoreRetention: cfg.Splitstore.ColdStoreRetention,
}
ss, err := splitstore.Open(path, ds, hot, cold, cfg)
if err != nil {