mark from current epoch to boundary epoch when necessary
this is necessary to avoid wearing clown shoes when the node stays offline for an extended period of time (more than 1 finality). Basically it gets quite slow if we do the full 2 finality walk, so we try to avoid it unless necessary. The conditions under which a full walk is necessary is if there is a sync gap (most likely because the node was offline) during which the tracking of writes is inaccurate because we have not yet delivered the HeadChange notification. In this case, it is possible to have actually hot blocks to be tracked before the boundary and fail to mark them accordingly. So when we detect a sync gap, we do the full walk; if there is no sync gap, we can just use the much faster boundary epoch walk.
This commit is contained in:
parent
d7ceef104e
commit
3a9b7c592d
@ -52,6 +52,10 @@ var (
|
||||
// CompactionBoundary is the number of epochs from the current epoch at which
|
||||
// we will walk the chain for live objects.
|
||||
CompactionBoundary = 2 * build.Finality
|
||||
|
||||
// SyncGapTime is the time delay from a tipset's min timestamp before we decide
|
||||
// there is a sync gap
|
||||
SyncGapTime = 5 * time.Minute
|
||||
)
|
||||
|
||||
var (
|
||||
@ -64,6 +68,11 @@ var (
|
||||
// all active blocks into the hotstore.
|
||||
warmupEpochKey = dstore.NewKey("/splitstore/warmupEpoch")
|
||||
|
||||
// syncGapEpochKey stores the last epoch where a sync gap was detected.
|
||||
// If there is a sync gap after the boundary epoch, compaction will perform
|
||||
// a slower full walk from the current epoch to the boundary epoch
|
||||
syncGapEpochKey = dstore.NewKey("/splitstore/syncGapEpoch")
|
||||
|
||||
// markSetSizeKey stores the current estimate for the mark set size.
|
||||
// this is first computed at warmup and updated in every compaction
|
||||
markSetSizeKey = dstore.NewKey("/splitstore/markSetSize")
|
||||
@ -102,9 +111,10 @@ type SplitStore struct {
|
||||
critsection int32 // compaction critical section
|
||||
closing int32 // the split store is closing
|
||||
|
||||
baseEpoch abi.ChainEpoch
|
||||
warmupEpoch abi.ChainEpoch
|
||||
warm bool
|
||||
baseEpoch abi.ChainEpoch
|
||||
syncGapEpoch abi.ChainEpoch
|
||||
warmupEpoch abi.ChainEpoch
|
||||
warm bool
|
||||
|
||||
coldPurgeSize int
|
||||
|
||||
@ -348,6 +358,17 @@ func (s *SplitStore) Start(chain ChainAccessor) error {
|
||||
return xerrors.Errorf("error loading warmup epoch: %w", err)
|
||||
}
|
||||
|
||||
// load sync gap epoch from metadata ds
|
||||
bs, err = s.ds.Get(syncGapEpochKey)
|
||||
switch err {
|
||||
case nil:
|
||||
s.syncGapEpoch = bytesToEpoch(bs)
|
||||
|
||||
case dstore.ErrNotFound:
|
||||
default:
|
||||
return xerrors.Errorf("error loading sync gap epoch: %w", err)
|
||||
}
|
||||
|
||||
// load markSetSize from metadata ds
|
||||
// if none, the splitstore will compute it during warmup and update in every compaction
|
||||
bs, err = s.ds.Get(markSetSizeKey)
|
||||
@ -393,6 +414,14 @@ func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error {
|
||||
s.curTs = curTs
|
||||
s.mx.Unlock()
|
||||
|
||||
timestamp := time.Unix(int64(curTs.MinTimestamp()), 0)
|
||||
if time.Since(timestamp) > SyncGapTime {
|
||||
err := s.setSyncGapEpoch(epoch)
|
||||
if err != nil {
|
||||
log.Warnf("error saving sync gap epoch: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
if !atomic.CompareAndSwapInt32(&s.compacting, 0, 1) {
|
||||
// we are currently compacting, do nothing and wait for the next head change
|
||||
return nil
|
||||
@ -432,7 +461,7 @@ func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error {
|
||||
log.Info("compacting splitstore")
|
||||
start := time.Now()
|
||||
|
||||
s.compact(curTs)
|
||||
s.compact(curTs, s.syncGapEpoch)
|
||||
|
||||
log.Infow("compaction done", "took", time.Since(start))
|
||||
}()
|
||||
@ -513,7 +542,7 @@ func (s *SplitStore) warmup(curTs *types.TipSet) error {
|
||||
}
|
||||
}
|
||||
|
||||
log.Infow("warmup stats", "visited", count, "cold", xcount, "missing", missing)
|
||||
log.Infow("warmup stats", "visited", count, "warm", xcount, "missing", missing)
|
||||
|
||||
if count > s.markSetSize {
|
||||
s.markSetSize = count + count>>2 // overestimate a bit
|
||||
@ -536,7 +565,7 @@ func (s *SplitStore) warmup(curTs *types.TipSet) error {
|
||||
}
|
||||
|
||||
// Compaction/GC Algorithm
|
||||
func (s *SplitStore) compact(curTs *types.TipSet) {
|
||||
func (s *SplitStore) compact(curTs *types.TipSet, syncGapEpoch abi.ChainEpoch) {
|
||||
var err error
|
||||
if s.markSetSize == 0 {
|
||||
start := time.Now()
|
||||
@ -552,7 +581,7 @@ func (s *SplitStore) compact(curTs *types.TipSet) {
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
err = s.doCompact(curTs)
|
||||
err = s.doCompact(curTs, syncGapEpoch)
|
||||
took := time.Since(start).Milliseconds()
|
||||
stats.Record(context.Background(), metrics.SplitstoreCompactionTimeSeconds.M(float64(took)/1e3))
|
||||
|
||||
@ -579,33 +608,48 @@ func (s *SplitStore) estimateMarkSetSize(curTs *types.TipSet) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *SplitStore) doCompact(curTs *types.TipSet) error {
|
||||
func (s *SplitStore) doCompact(curTs *types.TipSet, syncGapEpoch abi.ChainEpoch) error {
|
||||
coldEpoch := s.baseEpoch + CompactionCold
|
||||
currentEpoch := curTs.Height()
|
||||
boundaryEpoch := currentEpoch - CompactionBoundary
|
||||
|
||||
log.Infow("running compaction", "currentEpoch", currentEpoch, "baseEpoch", s.baseEpoch, "coldEpoch", coldEpoch, "boundaryEpoch", boundaryEpoch)
|
||||
|
||||
coldSet, err := s.env.Create("cold", s.markSetSize)
|
||||
markSet, err := s.env.Create("live", s.markSetSize)
|
||||
if err != nil {
|
||||
return xerrors.Errorf("error creating mark set: %w", err)
|
||||
}
|
||||
defer coldSet.Close() //nolint:errcheck
|
||||
defer markSet.Close() //nolint:errcheck
|
||||
|
||||
// 1. mark reachable cold objects by looking at the objects reachable only from the cold epoch
|
||||
log.Infow("marking reachable cold blocks", "boundaryEpoch", boundaryEpoch)
|
||||
// 1. mark reachable objects by walking the chain from the current epoch to the boundary epoch
|
||||
log.Infow("marking reachable blocks", "currentEpoch", currentEpoch, "boundaryEpoch", boundaryEpoch)
|
||||
startMark := time.Now()
|
||||
|
||||
boundaryTs, err := s.chain.GetTipsetByHeight(context.Background(), boundaryEpoch, curTs, true)
|
||||
if err != nil {
|
||||
return xerrors.Errorf("error getting tipset at boundary epoch: %w", err)
|
||||
var markTs *types.TipSet
|
||||
if syncGapEpoch > boundaryEpoch {
|
||||
// There is a sync gap that may have caused writes that are logically after the boundary
|
||||
// epoch to be written before the respective head change notification and hence be tracked
|
||||
// at the wrong epoch.
|
||||
// This can happen if the node is offline or falls out of sync for an extended period of time.
|
||||
// In this case we perform a full walk to avoid pathologies with pushing actually hot
|
||||
// objects into the coldstore.
|
||||
markTs = curTs
|
||||
log.Infof("sync gap detected at epoch %d; marking from current epoch to boundary epoch", syncGapEpoch)
|
||||
} else {
|
||||
// There is no pathological sync gap, so we can use the much faster single tipset walk at
|
||||
// the boundary epoch.
|
||||
boundaryTs, err := s.chain.GetTipsetByHeight(context.Background(), boundaryEpoch, curTs, true)
|
||||
if err != nil {
|
||||
return xerrors.Errorf("error getting tipset at boundary epoch: %w", err)
|
||||
}
|
||||
markTs = boundaryTs
|
||||
}
|
||||
|
||||
var count int64
|
||||
err = s.walk(boundaryTs, boundaryEpoch,
|
||||
err = s.walk(markTs, boundaryEpoch,
|
||||
func(cid cid.Cid) error {
|
||||
count++
|
||||
return coldSet.Mark(cid)
|
||||
return markSet.Mark(cid)
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
@ -637,9 +681,9 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error {
|
||||
}
|
||||
|
||||
// check whether it is reachable in the cold boundary
|
||||
mark, err := coldSet.Has(cid)
|
||||
mark, err := markSet.Has(cid)
|
||||
if err != nil {
|
||||
return xerrors.Errorf("error checkiing cold set for %s: %w", cid, err)
|
||||
return xerrors.Errorf("error checkiing mark set for %s: %w", cid, err)
|
||||
}
|
||||
|
||||
if mark {
|
||||
@ -909,10 +953,14 @@ func (s *SplitStore) gcHotstore() {
|
||||
|
||||
func (s *SplitStore) setBaseEpoch(epoch abi.ChainEpoch) error {
|
||||
s.baseEpoch = epoch
|
||||
// write to datastore
|
||||
return s.ds.Put(baseEpochKey, epochToBytes(epoch))
|
||||
}
|
||||
|
||||
func (s *SplitStore) setSyncGapEpoch(epoch abi.ChainEpoch) error {
|
||||
s.syncGapEpoch = epoch
|
||||
return s.ds.Put(syncGapEpochKey, epochToBytes(epoch))
|
||||
}
|
||||
|
||||
func epochToBytes(epoch abi.ChainEpoch) []byte {
|
||||
return uint64ToBytes(uint64(epoch))
|
||||
}
|
||||
|
@ -46,6 +46,7 @@ func testSplitStore(t *testing.T, cfg *Config) {
|
||||
genBlock.Messages = garbage.Cid()
|
||||
genBlock.ParentMessageReceipts = garbage.Cid()
|
||||
genBlock.ParentStateRoot = garbage.Cid()
|
||||
genBlock.Timestamp = uint64(time.Now().Unix())
|
||||
|
||||
genTs := mock.TipSet(genBlock)
|
||||
chain.push(genTs)
|
||||
@ -80,6 +81,7 @@ func testSplitStore(t *testing.T, cfg *Config) {
|
||||
blk.Messages = garbage.Cid()
|
||||
blk.ParentMessageReceipts = garbage.Cid()
|
||||
blk.ParentStateRoot = garbage.Cid()
|
||||
blk.Timestamp = uint64(time.Now().Unix())
|
||||
|
||||
sblk, err := blk.ToStorageBlock()
|
||||
if err != nil {
|
||||
|
Loading…
Reference in New Issue
Block a user