GC respects target for max hotstore space

This commit is contained in:
zenground0 2023-03-07 07:38:27 -07:00
parent 0fe91846cd
commit a994153e27
8 changed files with 103 additions and 12 deletions

View File

@ -115,6 +115,14 @@ type Config struct {
// A positive value is the number of compactions before a full GC is performed;
// a value of 1 will perform full GC in every compaction.
HotStoreFullGCFrequency uint64
// HotstoreMaxSpaceTarget suggests the max allowed space the hotstore can take.
// This is not a hard limit, it is possible for the hotstore to exceed the target
// for example if state grows massively between compactions. The splitstore
// will make a best effort to avoid overflowing the target and in practice should
// never overflow. This field is used when doing GC at the end of a compaction to
// adaptively choose moving GC
HotstoreMaxSpaceTarget uint64
}
// ChainAccessor allows the Splitstore to access the chain. It will most likely
@ -165,6 +173,7 @@ type SplitStore struct {
compactionIndex int64
pruneIndex int64
onlineGCCnt int64
ctx context.Context
cancel func()
@ -203,6 +212,7 @@ type SplitStore struct {
szWalk int64
szProtectedTxns int64
szToPurge int64 // expected purges before critical section protections and live marking
szKeys int64 // approximate, not counting keys protected when entering critical section
// protected by txnLk
szMarkedLiveRefs int64

View File

@ -95,7 +95,7 @@ func (s *SplitStore) doCheck(curTs *types.TipSet) error {
}
defer visitor.Close() //nolint
size, err := s.walkChain(curTs, boundaryEpoch, boundaryEpoch, visitor,
size := s.walkChain(curTs, boundaryEpoch, boundaryEpoch, visitor,
func(c cid.Cid) error {
if isUnitaryObject(c) {
return errStopWalk

View File

@ -66,7 +66,8 @@ var (
)
const (
batchSize = 16384
batchSize = 16384
cidKeySize = 32
)
func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error {
@ -518,6 +519,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error {
// might be potentially inconsistent; abort compaction and notify the user to intervene.
return xerrors.Errorf("checkpoint exists; aborting compaction")
}
s.clearSizeMeasurements()
currentEpoch := curTs.Height()
boundaryEpoch := currentEpoch - CompactionBoundary
@ -709,6 +711,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error {
log.Infow("compaction stats", "hot", hotCnt, "cold", coldCnt, "purge", purgeCnt, "purge size", szPurge)
s.szToPurge = int64(szPurge)
s.szKeys = int64(hotCnt) * cidKeySize
stats.Record(s.ctx, metrics.SplitstoreCompactionHot.M(int64(hotCnt)))
stats.Record(s.ctx, metrics.SplitstoreCompactionCold.M(int64(coldCnt)))
@ -1473,8 +1476,9 @@ func (s *SplitStore) completeCompaction() error {
}
s.compactType = none
// Note: at this point we can start the splitstore; a compaction should run on
// the first head change, which will trigger gc on the hotstore.
// Note: at this point we can start the splitstore; base epoch is not
// incremented here so a compaction should run on the first head
// change, which will trigger gc on the hotstore.
// We don't mind the second (back-to-back) compaction as the head will
// have advanced during marking and coldset accumulation.
return nil
@ -1532,6 +1536,14 @@ func (s *SplitStore) completePurge(coldr *ColdSetReader, checkpoint *Checkpoint,
return nil
}
func (s *SplitStore) clearSizeMeasurements() {
s.szKeys = 0
s.szMarkedLiveRefs = 0
s.szProtectedTxns = 0
s.szToPurge = 0
s.szWalk = 0
}
// I really don't like having this code, but we seem to have some occasional DAG references with
// missing constituents. During testing in mainnet *some* of these references *sometimes* appeared
// after a little bit.

View File

@ -7,17 +7,56 @@ import (
bstore "github.com/filecoin-project/lotus/blockstore"
)
const (
// When < 150 GB of space would remain during moving GC, trigger moving GC
targetThreshold = 150_000_000_000
// Don't attempt moving GC with 50 GB or less would remain during moving GC
targetBuffer = 50_000_000_000
// Fraction of garbage in badger vlog for online GC traversal to collect garbage
aggressiveOnlineGCThreshold = 0.0001
)
func (s *SplitStore) gcHotAfterCompaction() {
// TODO size aware GC
// 1. Add a config value to specify targetted max number of bytes M
// 2. Use measurement of marked hotstore size H (we now have this), actual hostore size T (need to compute this), total move size H + T, approximate purged size P
// 3. Trigger moving GC whenever H + T is within 50 GB of M
// 4. if H + T > M use aggressive online threshold
// 5. Use threshold that covers 3 std devs of vlogs when doing aggresive online. Mean == (H + P) / T, assume normal distribution
// 6. Use threshold that covers 1 or 2 std devs of vlogs when doing regular online GC
// Measure hotstore size, determine if we should do full GC, determine if we can do full GC.
// We should do full GC if
// FullGCFrequency is specified and compaction index matches frequency
// OR HotstoreMaxSpaceTarget is specified and total moving space is within 150 GB of target
// We can do full if
// HotstoreMaxSpaceTarget is not specified
// OR total moving space would not exceed 50 GB below target
//
// a) If we should not do full GC => online GC
// b) If we should do full GC and can => moving GC
// c) If we should do full GC and can't => aggressive online GC
var hotSize int64
var err error
sizer, ok := s.hot.(bstore.BlockstoreSize)
if ok {
hotSize, err = sizer.Size()
if err != nil {
log.Warnf("error getting hotstore size: %s, estimating empty hot store for targeting", err)
hotSize = 0
}
} else {
hotSize = 0
}
copySizeApprox := s.szKeys + s.szMarkedLiveRefs + s.szProtectedTxns + s.szWalk
shouldTarget := s.cfg.HotstoreMaxSpaceTarget > 0 && hotSize+copySizeApprox > int64(s.cfg.HotstoreMaxSpaceTarget)-targetThreshold
shouldFreq := s.cfg.HotStoreFullGCFrequency > 0 && s.compactionIndex%int64(s.cfg.HotStoreFullGCFrequency) == 0
shouldDoFull := shouldTarget || shouldFreq
canDoFull := s.cfg.HotstoreMaxSpaceTarget == 0 || hotSize+copySizeApprox < int64(s.cfg.HotstoreMaxSpaceTarget)-targetBuffer
log.Infof("measured hot store size: %d, approximate new size: %d, should do full %t, can do full %t", hotSize, copySizeApprox, shouldDoFull, canDoFull)
var opts []bstore.BlockstoreGCOption
if s.cfg.HotStoreFullGCFrequency > 0 && s.compactionIndex%int64(s.cfg.HotStoreFullGCFrequency) == 0 {
if shouldDoFull && canDoFull {
opts = append(opts, bstore.WithFullGC(true))
} else if shouldDoFull && !canDoFull {
log.Warnf("Attention! Estimated moving GC size %d is not within safety buffer %d of target max %d, performing aggressive online GC to attempt to bring hotstore size down safely", copySizeApprox, targetBuffer, s.cfg.HotstoreMaxSpaceTarget)
log.Warn("If problem continues you can 1) temporarily allocate more disk space to hotstore and 2) reflect in HotstoreMaxSpaceTarget OR trigger manual move with `lotus chain prune hot-moving`")
log.Warn("If problem continues and you do not have any more disk space you can run continue to manually trigger online GC at agressive thresholds (< 0.01) with `lotus chain prune hot`")
opts = append(opts, bstore.WithThreshold(aggressiveOnlineGCThreshold))
}
if err := s.gcBlockstore(s.hot, opts); err != nil {

View File

@ -230,6 +230,17 @@
# env var: LOTUS_CHAINSTORE_SPLITSTORE_HOTSTOREFULLGCFREQUENCY
#HotStoreFullGCFrequency = 20
# HotStoreMaxSpaceTarget sets a target max disk size for the hotstore. Splitstore GC
# will run moving GC if disk utilization gets within a threshold (150 GB) of the target.
# Splitstore GC will NOT run moving GC if the total size of the move would get
# within 50 GB of the target, and instead will run a more aggressive online GC.
# If both HotStoreFullGCFrequency and HotStoreMaxSpaceTarget are set then splitstore
# GC will trigger moving GC if either configuration condition is met.
#
# type: uint64
# env var: LOTUS_CHAINSTORE_SPLITSTORE_HOTSTOREMAXSPACETARGET
#HotStoreMaxSpaceTarget = 0
[Cluster]
# EXPERIMENTAL. config to enabled node cluster with raft consensus

View File

@ -1286,6 +1286,17 @@ the compaction boundary; default is 0.`,
A value of 0 disables, while a value 1 will do full GC in every compaction.
Default is 20 (about once a week).`,
},
{
Name: "HotStoreMaxSpaceTarget",
Type: "uint64",
Comment: `HotStoreMaxSpaceTarget sets a target max disk size for the hotstore. Splitstore GC
will run moving GC if disk utilization gets within a threshold (150 GB) of the target.
Splitstore GC will NOT run moving GC if the total size of the move would get
within 50 GB of the target, and instead will run a more aggressive online GC.
If both HotStoreFullGCFrequency and HotStoreMaxSpaceTarget are set then splitstore
GC will trigger moving GC if either configuration condition is met.`,
},
},
"StorageMiner": []DocField{
{

View File

@ -601,6 +601,13 @@ type Splitstore struct {
// A value of 0 disables, while a value 1 will do full GC in every compaction.
// Default is 20 (about once a week).
HotStoreFullGCFrequency uint64
// HotStoreMaxSpaceTarget sets a target max disk size for the hotstore. Splitstore GC
// will run moving GC if disk utilization gets within a threshold (150 GB) of the target.
// Splitstore GC will NOT run moving GC if the total size of the move would get
// within 50 GB of the target, and instead will run a more aggressive online GC.
// If both HotStoreFullGCFrequency and HotStoreMaxSpaceTarget are set then splitstore
// GC will trigger moving GC if either configuration condition is met.
HotStoreMaxSpaceTarget uint64
}
// // Full Node

View File

@ -87,6 +87,7 @@ func SplitBlockstore(cfg *config.Chainstore) func(lc fx.Lifecycle, r repo.Locked
UniversalColdBlocks: cfg.Splitstore.ColdStoreType == "universal",
HotStoreMessageRetention: cfg.Splitstore.HotStoreMessageRetention,
HotStoreFullGCFrequency: cfg.Splitstore.HotStoreFullGCFrequency,
HotstoreMaxSpaceTarget: cfg.Splitstore.HotStoreMaxSpaceTarget,
}
ss, err := splitstore.Open(path, ds, hot, cold, cfg)
if err != nil {