2021-07-11 05:38:57 +00:00
package splitstore
import (
2021-07-12 08:14:36 +00:00
"fmt"
2021-07-11 05:38:57 +00:00
"time"
bstore "github.com/filecoin-project/lotus/blockstore"
)
2023-03-07 14:38:27 +00:00
const (
// Fraction of garbage in badger vlog for online GC traversal to collect garbage
2023-03-09 14:57:35 +00:00
AggressiveOnlineGCThreshold = 0.0001
2023-03-07 14:38:27 +00:00
)
2023-03-03 16:14:52 +00:00
func ( s * SplitStore ) gcHotAfterCompaction ( ) {
2023-03-07 14:38:27 +00:00
// Measure hotstore size, determine if we should do full GC, determine if we can do full GC.
// We should do full GC if
// FullGCFrequency is specified and compaction index matches frequency
// OR HotstoreMaxSpaceTarget is specified and total moving space is within 150 GB of target
// We can do full if
// HotstoreMaxSpaceTarget is not specified
// OR total moving space would not exceed 50 GB below target
//
// a) If we should not do full GC => online GC
// b) If we should do full GC and can => moving GC
// c) If we should do full GC and can't => aggressive online GC
2023-03-09 01:11:39 +00:00
getSize := func ( ) int64 {
sizer , ok := s . hot . ( bstore . BlockstoreSize )
if ok {
size , err := sizer . Size ( )
if err != nil {
log . Warnf ( "error getting hotstore size: %s, estimating empty hot store for targeting" , err )
return 0
}
return size
2023-03-07 14:38:27 +00:00
}
2023-03-09 01:20:27 +00:00
log . Errorf ( "Could not measure hotstore size, assuming it is 0 bytes, which it is not" )
return 0
2023-03-07 14:38:27 +00:00
}
2023-03-09 01:11:39 +00:00
hotSize := getSize ( )
2023-03-07 14:38:27 +00:00
copySizeApprox := s . szKeys + s . szMarkedLiveRefs + s . szProtectedTxns + s . szWalk
2023-03-09 15:40:14 +00:00
shouldTarget := s . cfg . HotstoreMaxSpaceTarget > 0 && hotSize + copySizeApprox > int64 ( s . cfg . HotstoreMaxSpaceTarget ) - int64 ( s . cfg . HotstoreMaxSpaceThreshold )
2023-03-07 14:38:27 +00:00
shouldFreq := s . cfg . HotStoreFullGCFrequency > 0 && s . compactionIndex % int64 ( s . cfg . HotStoreFullGCFrequency ) == 0
shouldDoFull := shouldTarget || shouldFreq
2023-03-09 15:40:14 +00:00
canDoFull := s . cfg . HotstoreMaxSpaceTarget == 0 || hotSize + copySizeApprox < int64 ( s . cfg . HotstoreMaxSpaceTarget ) - int64 ( s . cfg . HotstoreMaxSpaceSafetyBuffer )
2023-03-08 16:43:37 +00:00
log . Debugw ( "approximating new hot store size" , "key size" , s . szKeys , "marked live refs" , s . szMarkedLiveRefs , "protected txns" , s . szProtectedTxns , "walked DAG" , s . szWalk )
2023-03-07 14:38:27 +00:00
log . Infof ( "measured hot store size: %d, approximate new size: %d, should do full %t, can do full %t" , hotSize , copySizeApprox , shouldDoFull , canDoFull )
2021-07-27 06:53:22 +00:00
var opts [ ] bstore . BlockstoreGCOption
2023-03-07 14:38:27 +00:00
if shouldDoFull && canDoFull {
2021-07-27 06:53:22 +00:00
opts = append ( opts , bstore . WithFullGC ( true ) )
2023-03-07 14:38:27 +00:00
} else if shouldDoFull && ! canDoFull {
2023-03-09 15:57:14 +00:00
log . Warnf ( "Attention! Estimated moving GC size %d is not within safety buffer %d of target max %d, performing aggressive online GC to attempt to bring hotstore size down safely" , copySizeApprox , s . cfg . HotstoreMaxSpaceSafetyBuffer , s . cfg . HotstoreMaxSpaceTarget )
2023-03-07 14:38:27 +00:00
log . Warn ( "If problem continues you can 1) temporarily allocate more disk space to hotstore and 2) reflect in HotstoreMaxSpaceTarget OR trigger manual move with `lotus chain prune hot-moving`" )
2023-03-07 15:21:15 +00:00
log . Warn ( "If problem continues and you do not have any more disk space you can run continue to manually trigger online GC at aggressive thresholds (< 0.01) with `lotus chain prune hot`" )
2023-03-07 14:38:27 +00:00
2023-03-09 14:57:35 +00:00
opts = append ( opts , bstore . WithThreshold ( AggressiveOnlineGCThreshold ) )
2021-07-23 19:55:03 +00:00
}
if err := s . gcBlockstore ( s . hot , opts ) ; err != nil {
2021-07-12 08:14:36 +00:00
log . Warnf ( "error garbage collecting hostore: %s" , err )
}
2023-03-09 01:11:39 +00:00
log . Infof ( "measured hot store size after GC: %d" , getSize ( ) )
2021-07-12 08:14:36 +00:00
}
2021-07-27 06:53:22 +00:00
func ( s * SplitStore ) gcBlockstore ( b bstore . Blockstore , opts [ ] bstore . BlockstoreGCOption ) error {
2023-03-04 14:54:56 +00:00
if err := s . checkYield ( ) ; err != nil {
return err
}
2021-07-12 08:14:36 +00:00
if gc , ok := b . ( bstore . BlockstoreGC ) ; ok {
log . Info ( "garbage collecting blockstore" )
2021-07-11 05:38:57 +00:00
startGC := time . Now ( )
2021-07-12 08:14:36 +00:00
2023-03-04 15:23:02 +00:00
opts = append ( opts , bstore . WithCheckFreq ( 90 * time . Second ) )
opts = append ( opts , bstore . WithCheck ( s . checkYield ) )
2023-03-03 16:14:52 +00:00
if err := gc . CollectGarbage ( s . ctx , opts ... ) ; err != nil {
2021-07-12 08:14:36 +00:00
return err
2021-07-11 05:38:57 +00:00
}
2022-08-05 20:34:16 +00:00
log . Infow ( "garbage collecting blockstore done" , "took" , time . Since ( startGC ) )
2021-07-12 08:14:36 +00:00
return nil
2021-07-11 05:38:57 +00:00
}
2021-07-23 19:55:03 +00:00
return fmt . Errorf ( "blockstore doesn't support garbage collection: %T" , b )
2021-07-11 05:38:57 +00:00
}
2023-03-03 16:14:52 +00:00
func ( s * SplitStore ) gcBlockstoreOnce ( b bstore . Blockstore , opts [ ] bstore . BlockstoreGCOption ) error {
if gc , ok := b . ( bstore . BlockstoreGCOnce ) ; ok {
log . Debug ( "gc blockstore once" )
startGC := time . Now ( )
if err := gc . GCOnce ( s . ctx , opts ... ) ; err != nil {
return err
}
log . Debugw ( "gc blockstore once done" , "took" , time . Since ( startGC ) )
return nil
}
return fmt . Errorf ( "blockstore doesn't support gc once: %T" , b )
}