GC respects target for max hotstore space
This commit is contained in:
parent
0fe91846cd
commit
a994153e27
@ -115,6 +115,14 @@ type Config struct {
|
||||
// A positive value is the number of compactions before a full GC is performed;
|
||||
// a value of 1 will perform full GC in every compaction.
|
||||
HotStoreFullGCFrequency uint64
|
||||
|
||||
// HotstoreMaxSpaceTarget suggests the max allowed space the hotstore can take.
|
||||
// This is not a hard limit, it is possible for the hotstore to exceed the target
|
||||
// for example if state grows massively between compactions. The splitstore
|
||||
// will make a best effort to avoid overflowing the target and in practice should
|
||||
// never overflow. This field is used when doing GC at the end of a compaction to
|
||||
// adaptively choose moving GC
|
||||
HotstoreMaxSpaceTarget uint64
|
||||
}
|
||||
|
||||
// ChainAccessor allows the Splitstore to access the chain. It will most likely
|
||||
@ -165,6 +173,7 @@ type SplitStore struct {
|
||||
|
||||
compactionIndex int64
|
||||
pruneIndex int64
|
||||
onlineGCCnt int64
|
||||
|
||||
ctx context.Context
|
||||
cancel func()
|
||||
@ -203,6 +212,7 @@ type SplitStore struct {
|
||||
szWalk int64
|
||||
szProtectedTxns int64
|
||||
szToPurge int64 // expected purges before critical section protections and live marking
|
||||
szKeys int64 // approximate, not counting keys protected when entering critical section
|
||||
|
||||
// protected by txnLk
|
||||
szMarkedLiveRefs int64
|
||||
|
@ -95,7 +95,7 @@ func (s *SplitStore) doCheck(curTs *types.TipSet) error {
|
||||
}
|
||||
defer visitor.Close() //nolint
|
||||
|
||||
size, err := s.walkChain(curTs, boundaryEpoch, boundaryEpoch, visitor,
|
||||
size := s.walkChain(curTs, boundaryEpoch, boundaryEpoch, visitor,
|
||||
func(c cid.Cid) error {
|
||||
if isUnitaryObject(c) {
|
||||
return errStopWalk
|
||||
|
@ -67,6 +67,7 @@ var (
|
||||
|
||||
const (
|
||||
batchSize = 16384
|
||||
cidKeySize = 32
|
||||
)
|
||||
|
||||
func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error {
|
||||
@ -518,6 +519,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error {
|
||||
// might be potentially inconsistent; abort compaction and notify the user to intervene.
|
||||
return xerrors.Errorf("checkpoint exists; aborting compaction")
|
||||
}
|
||||
s.clearSizeMeasurements()
|
||||
|
||||
currentEpoch := curTs.Height()
|
||||
boundaryEpoch := currentEpoch - CompactionBoundary
|
||||
@ -709,6 +711,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error {
|
||||
|
||||
log.Infow("compaction stats", "hot", hotCnt, "cold", coldCnt, "purge", purgeCnt, "purge size", szPurge)
|
||||
s.szToPurge = int64(szPurge)
|
||||
s.szKeys = int64(hotCnt) * cidKeySize
|
||||
stats.Record(s.ctx, metrics.SplitstoreCompactionHot.M(int64(hotCnt)))
|
||||
stats.Record(s.ctx, metrics.SplitstoreCompactionCold.M(int64(coldCnt)))
|
||||
|
||||
@ -1473,8 +1476,9 @@ func (s *SplitStore) completeCompaction() error {
|
||||
}
|
||||
s.compactType = none
|
||||
|
||||
// Note: at this point we can start the splitstore; a compaction should run on
|
||||
// the first head change, which will trigger gc on the hotstore.
|
||||
// Note: at this point we can start the splitstore; base epoch is not
|
||||
// incremented here so a compaction should run on the first head
|
||||
// change, which will trigger gc on the hotstore.
|
||||
// We don't mind the second (back-to-back) compaction as the head will
|
||||
// have advanced during marking and coldset accumulation.
|
||||
return nil
|
||||
@ -1532,6 +1536,14 @@ func (s *SplitStore) completePurge(coldr *ColdSetReader, checkpoint *Checkpoint,
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *SplitStore) clearSizeMeasurements() {
|
||||
s.szKeys = 0
|
||||
s.szMarkedLiveRefs = 0
|
||||
s.szProtectedTxns = 0
|
||||
s.szToPurge = 0
|
||||
s.szWalk = 0
|
||||
}
|
||||
|
||||
// I really don't like having this code, but we seem to have some occasional DAG references with
|
||||
// missing constituents. During testing in mainnet *some* of these references *sometimes* appeared
|
||||
// after a little bit.
|
||||
|
@ -7,17 +7,56 @@ import (
|
||||
bstore "github.com/filecoin-project/lotus/blockstore"
|
||||
)
|
||||
|
||||
const (
|
||||
// When < 150 GB of space would remain during moving GC, trigger moving GC
|
||||
targetThreshold = 150_000_000_000
|
||||
// Don't attempt moving GC with 50 GB or less would remain during moving GC
|
||||
targetBuffer = 50_000_000_000
|
||||
// Fraction of garbage in badger vlog for online GC traversal to collect garbage
|
||||
aggressiveOnlineGCThreshold = 0.0001
|
||||
)
|
||||
|
||||
func (s *SplitStore) gcHotAfterCompaction() {
|
||||
// TODO size aware GC
|
||||
// 1. Add a config value to specify targetted max number of bytes M
|
||||
// 2. Use measurement of marked hotstore size H (we now have this), actual hostore size T (need to compute this), total move size H + T, approximate purged size P
|
||||
// 3. Trigger moving GC whenever H + T is within 50 GB of M
|
||||
// 4. if H + T > M use aggressive online threshold
|
||||
// 5. Use threshold that covers 3 std devs of vlogs when doing aggresive online. Mean == (H + P) / T, assume normal distribution
|
||||
// 6. Use threshold that covers 1 or 2 std devs of vlogs when doing regular online GC
|
||||
// Measure hotstore size, determine if we should do full GC, determine if we can do full GC.
|
||||
// We should do full GC if
|
||||
// FullGCFrequency is specified and compaction index matches frequency
|
||||
// OR HotstoreMaxSpaceTarget is specified and total moving space is within 150 GB of target
|
||||
// We can do full if
|
||||
// HotstoreMaxSpaceTarget is not specified
|
||||
// OR total moving space would not exceed 50 GB below target
|
||||
//
|
||||
// a) If we should not do full GC => online GC
|
||||
// b) If we should do full GC and can => moving GC
|
||||
// c) If we should do full GC and can't => aggressive online GC
|
||||
var hotSize int64
|
||||
var err error
|
||||
sizer, ok := s.hot.(bstore.BlockstoreSize)
|
||||
if ok {
|
||||
hotSize, err = sizer.Size()
|
||||
if err != nil {
|
||||
log.Warnf("error getting hotstore size: %s, estimating empty hot store for targeting", err)
|
||||
hotSize = 0
|
||||
}
|
||||
} else {
|
||||
hotSize = 0
|
||||
}
|
||||
|
||||
copySizeApprox := s.szKeys + s.szMarkedLiveRefs + s.szProtectedTxns + s.szWalk
|
||||
shouldTarget := s.cfg.HotstoreMaxSpaceTarget > 0 && hotSize+copySizeApprox > int64(s.cfg.HotstoreMaxSpaceTarget)-targetThreshold
|
||||
shouldFreq := s.cfg.HotStoreFullGCFrequency > 0 && s.compactionIndex%int64(s.cfg.HotStoreFullGCFrequency) == 0
|
||||
shouldDoFull := shouldTarget || shouldFreq
|
||||
canDoFull := s.cfg.HotstoreMaxSpaceTarget == 0 || hotSize+copySizeApprox < int64(s.cfg.HotstoreMaxSpaceTarget)-targetBuffer
|
||||
log.Infof("measured hot store size: %d, approximate new size: %d, should do full %t, can do full %t", hotSize, copySizeApprox, shouldDoFull, canDoFull)
|
||||
|
||||
var opts []bstore.BlockstoreGCOption
|
||||
if s.cfg.HotStoreFullGCFrequency > 0 && s.compactionIndex%int64(s.cfg.HotStoreFullGCFrequency) == 0 {
|
||||
if shouldDoFull && canDoFull {
|
||||
opts = append(opts, bstore.WithFullGC(true))
|
||||
} else if shouldDoFull && !canDoFull {
|
||||
log.Warnf("Attention! Estimated moving GC size %d is not within safety buffer %d of target max %d, performing aggressive online GC to attempt to bring hotstore size down safely", copySizeApprox, targetBuffer, s.cfg.HotstoreMaxSpaceTarget)
|
||||
log.Warn("If problem continues you can 1) temporarily allocate more disk space to hotstore and 2) reflect in HotstoreMaxSpaceTarget OR trigger manual move with `lotus chain prune hot-moving`")
|
||||
log.Warn("If problem continues and you do not have any more disk space you can run continue to manually trigger online GC at agressive thresholds (< 0.01) with `lotus chain prune hot`")
|
||||
|
||||
opts = append(opts, bstore.WithThreshold(aggressiveOnlineGCThreshold))
|
||||
}
|
||||
|
||||
if err := s.gcBlockstore(s.hot, opts); err != nil {
|
||||
|
@ -230,6 +230,17 @@
|
||||
# env var: LOTUS_CHAINSTORE_SPLITSTORE_HOTSTOREFULLGCFREQUENCY
|
||||
#HotStoreFullGCFrequency = 20
|
||||
|
||||
# HotStoreMaxSpaceTarget sets a target max disk size for the hotstore. Splitstore GC
|
||||
# will run moving GC if disk utilization gets within a threshold (150 GB) of the target.
|
||||
# Splitstore GC will NOT run moving GC if the total size of the move would get
|
||||
# within 50 GB of the target, and instead will run a more aggressive online GC.
|
||||
# If both HotStoreFullGCFrequency and HotStoreMaxSpaceTarget are set then splitstore
|
||||
# GC will trigger moving GC if either configuration condition is met.
|
||||
#
|
||||
# type: uint64
|
||||
# env var: LOTUS_CHAINSTORE_SPLITSTORE_HOTSTOREMAXSPACETARGET
|
||||
#HotStoreMaxSpaceTarget = 0
|
||||
|
||||
|
||||
[Cluster]
|
||||
# EXPERIMENTAL. config to enabled node cluster with raft consensus
|
||||
|
@ -1286,6 +1286,17 @@ the compaction boundary; default is 0.`,
|
||||
A value of 0 disables, while a value 1 will do full GC in every compaction.
|
||||
Default is 20 (about once a week).`,
|
||||
},
|
||||
{
|
||||
Name: "HotStoreMaxSpaceTarget",
|
||||
Type: "uint64",
|
||||
|
||||
Comment: `HotStoreMaxSpaceTarget sets a target max disk size for the hotstore. Splitstore GC
|
||||
will run moving GC if disk utilization gets within a threshold (150 GB) of the target.
|
||||
Splitstore GC will NOT run moving GC if the total size of the move would get
|
||||
within 50 GB of the target, and instead will run a more aggressive online GC.
|
||||
If both HotStoreFullGCFrequency and HotStoreMaxSpaceTarget are set then splitstore
|
||||
GC will trigger moving GC if either configuration condition is met.`,
|
||||
},
|
||||
},
|
||||
"StorageMiner": []DocField{
|
||||
{
|
||||
|
@ -601,6 +601,13 @@ type Splitstore struct {
|
||||
// A value of 0 disables, while a value 1 will do full GC in every compaction.
|
||||
// Default is 20 (about once a week).
|
||||
HotStoreFullGCFrequency uint64
|
||||
// HotStoreMaxSpaceTarget sets a target max disk size for the hotstore. Splitstore GC
|
||||
// will run moving GC if disk utilization gets within a threshold (150 GB) of the target.
|
||||
// Splitstore GC will NOT run moving GC if the total size of the move would get
|
||||
// within 50 GB of the target, and instead will run a more aggressive online GC.
|
||||
// If both HotStoreFullGCFrequency and HotStoreMaxSpaceTarget are set then splitstore
|
||||
// GC will trigger moving GC if either configuration condition is met.
|
||||
HotStoreMaxSpaceTarget uint64
|
||||
}
|
||||
|
||||
// // Full Node
|
||||
|
@ -87,6 +87,7 @@ func SplitBlockstore(cfg *config.Chainstore) func(lc fx.Lifecycle, r repo.Locked
|
||||
UniversalColdBlocks: cfg.Splitstore.ColdStoreType == "universal",
|
||||
HotStoreMessageRetention: cfg.Splitstore.HotStoreMessageRetention,
|
||||
HotStoreFullGCFrequency: cfg.Splitstore.HotStoreFullGCFrequency,
|
||||
HotstoreMaxSpaceTarget: cfg.Splitstore.HotStoreMaxSpaceTarget,
|
||||
}
|
||||
ss, err := splitstore.Open(path, ds, hot, cold, cfg)
|
||||
if err != nil {
|
||||
|
Loading…
Reference in New Issue
Block a user