mark from current epoch to boundary epoch when necessary

this is necessary to avoid wearing clown shoes when the node stays offline for an extended period of time (more than 1 finality). Basically it gets quite slow if we do the full 2 finality walk, so we try to avoid it unless necessary. The conditions under which a full walk is necessary is if there is a sync gap (most likely because the node was offline) during which the tracking of writes is inaccurate because we have not yet delivered the HeadChange notification. In this case, it is possible to have actually hot blocks to be tracked before the boundary and fail to mark them accordingly. So when we detect a sync gap, we do the full walk; if there is no sync gap, we can just use the much faster boundary epoch walk.
2021-03-14 12:32:05 +02:00 · 2021-03-14 12:32:05 +02:00 · 3a9b7c592d
commit 3a9b7c592d
parent d7ceef104e
2 changed files with 70 additions and 20 deletions
--- a/blockstore/splitstore/splitstore.go
+++ b/blockstore/splitstore/splitstore.go
@ -52,6 +52,10 @@ var (
 	// CompactionBoundary is the number of epochs from the current epoch at which
 	// we will walk the chain for live objects.
 	CompactionBoundary = 2 * build.Finality
+
+	// SyncGapTime is the time delay from a tipset's min timestamp before we decide
+	// there is a sync gap
+	SyncGapTime = 5 * time.Minute
 )

 var (
@ -64,6 +68,11 @@ var (
 	// all active blocks into the hotstore.
 	warmupEpochKey = dstore.NewKey("/splitstore/warmupEpoch")

+	// syncGapEpochKey stores the last epoch where a sync gap was detected.
+	// If there is a sync gap after the boundary epoch, compaction will perform
+	// a slower full walk from the current epoch to the boundary epoch
+	syncGapEpochKey = dstore.NewKey("/splitstore/syncGapEpoch")
+
 	// markSetSizeKey stores the current estimate for the mark set size.
 	// this is first computed at warmup and updated in every compaction
 	markSetSizeKey = dstore.NewKey("/splitstore/markSetSize")
@ -102,9 +111,10 @@ type SplitStore struct {
 	critsection int32 // compaction critical section
 	closing     int32 // the split store is closing

-	baseEpoch   abi.ChainEpoch
-	warmupEpoch abi.ChainEpoch
-	warm        bool
+	baseEpoch    abi.ChainEpoch
+	syncGapEpoch abi.ChainEpoch
+	warmupEpoch  abi.ChainEpoch
+	warm         bool

 	coldPurgeSize int

@ -348,6 +358,17 @@ func (s *SplitStore) Start(chain ChainAccessor) error {
 		return xerrors.Errorf("error loading warmup epoch: %w", err)
 	}

+	// load sync gap epoch from metadata ds
+	bs, err = s.ds.Get(syncGapEpochKey)
+	switch err {
+	case nil:
+		s.syncGapEpoch = bytesToEpoch(bs)
+
+	case dstore.ErrNotFound:
+	default:
+		return xerrors.Errorf("error loading sync gap epoch: %w", err)
+	}
+
 	// load markSetSize from metadata ds
 	// if none, the splitstore will compute it during warmup and update in every compaction
 	bs, err = s.ds.Get(markSetSizeKey)
@ -393,6 +414,14 @@ func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error {
 	s.curTs = curTs
 	s.mx.Unlock()

+	timestamp := time.Unix(int64(curTs.MinTimestamp()), 0)
+	if time.Since(timestamp) > SyncGapTime {
+		err := s.setSyncGapEpoch(epoch)
+		if err != nil {
+			log.Warnf("error saving sync gap epoch: %s", err)
+		}
+	}
+
 	if !atomic.CompareAndSwapInt32(&s.compacting, 0, 1) {
 		// we are currently compacting, do nothing and wait for the next head change
 		return nil
@ -432,7 +461,7 @@ func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error {
 			log.Info("compacting splitstore")
 			start := time.Now()

-			s.compact(curTs)
+			s.compact(curTs, s.syncGapEpoch)

 			log.Infow("compaction done", "took", time.Since(start))
 		}()
@ -513,7 +542,7 @@ func (s *SplitStore) warmup(curTs *types.TipSet) error {
 		}
 	}

-	log.Infow("warmup stats", "visited", count, "cold", xcount, "missing", missing)
+	log.Infow("warmup stats", "visited", count, "warm", xcount, "missing", missing)

 	if count > s.markSetSize {
 		s.markSetSize = count + count>>2 // overestimate a bit
@ -536,7 +565,7 @@ func (s *SplitStore) warmup(curTs *types.TipSet) error {
 }

 // Compaction/GC Algorithm
-func (s *SplitStore) compact(curTs *types.TipSet) {
+func (s *SplitStore) compact(curTs *types.TipSet, syncGapEpoch abi.ChainEpoch) {
 	var err error
 	if s.markSetSize == 0 {
 		start := time.Now()
@ -552,7 +581,7 @@ func (s *SplitStore) compact(curTs *types.TipSet) {
 	}

 	start := time.Now()
-	err = s.doCompact(curTs)
+	err = s.doCompact(curTs, syncGapEpoch)
 	took := time.Since(start).Milliseconds()
 	stats.Record(context.Background(), metrics.SplitstoreCompactionTimeSeconds.M(float64(took)/1e3))

@ -579,33 +608,48 @@ func (s *SplitStore) estimateMarkSetSize(curTs *types.TipSet) error {
 	return nil
 }

-func (s *SplitStore) doCompact(curTs *types.TipSet) error {
+func (s *SplitStore) doCompact(curTs *types.TipSet, syncGapEpoch abi.ChainEpoch) error {
 	coldEpoch := s.baseEpoch + CompactionCold
 	currentEpoch := curTs.Height()
 	boundaryEpoch := currentEpoch - CompactionBoundary

 	log.Infow("running compaction", "currentEpoch", currentEpoch, "baseEpoch", s.baseEpoch, "coldEpoch", coldEpoch, "boundaryEpoch", boundaryEpoch)

-	coldSet, err := s.env.Create("cold", s.markSetSize)
+	markSet, err := s.env.Create("live", s.markSetSize)
 	if err != nil {
 		return xerrors.Errorf("error creating mark set: %w", err)
 	}
-	defer coldSet.Close() //nolint:errcheck
+	defer markSet.Close() //nolint:errcheck

-	// 1. mark reachable cold objects by looking at the objects reachable only from the cold epoch
-	log.Infow("marking reachable cold blocks", "boundaryEpoch", boundaryEpoch)
+	// 1. mark reachable objects by walking the chain from the current epoch to the boundary epoch
+	log.Infow("marking reachable blocks", "currentEpoch", currentEpoch, "boundaryEpoch", boundaryEpoch)
 	startMark := time.Now()

-	boundaryTs, err := s.chain.GetTipsetByHeight(context.Background(), boundaryEpoch, curTs, true)
-	if err != nil {
-		return xerrors.Errorf("error getting tipset at boundary epoch: %w", err)
+	var markTs *types.TipSet
+	if syncGapEpoch > boundaryEpoch {
+		// There is a sync gap that may have caused writes that are logically after the boundary
+		// epoch to be written before the respective head change notification and hence be tracked
+		// at the wrong epoch.
+		// This can happen if the node is offline or falls out of sync for an extended period of time.
+		// In this case we perform a full walk to avoid pathologies with pushing actually hot
+		// objects into the coldstore.
+		markTs = curTs
+		log.Infof("sync gap detected at epoch %d; marking from current epoch to boundary epoch", syncGapEpoch)
+	} else {
+		// There is no pathological sync gap, so we can use the much faster single tipset walk at
+		// the boundary epoch.
+		boundaryTs, err := s.chain.GetTipsetByHeight(context.Background(), boundaryEpoch, curTs, true)
+		if err != nil {
+			return xerrors.Errorf("error getting tipset at boundary epoch: %w", err)
+		}
+		markTs = boundaryTs
 	}

 	var count int64
-	err = s.walk(boundaryTs, boundaryEpoch,
+	err = s.walk(markTs, boundaryEpoch,
 		func(cid cid.Cid) error {
 			count++
-			return coldSet.Mark(cid)
+			return markSet.Mark(cid)
 		})

 	if err != nil {
@ -637,9 +681,9 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error {
 		}

 		// check whether it is reachable in the cold boundary
-		mark, err := coldSet.Has(cid)
+		mark, err := markSet.Has(cid)
 		if err != nil {
-			return xerrors.Errorf("error checkiing cold set for %s: %w", cid, err)
+			return xerrors.Errorf("error checkiing mark set for %s: %w", cid, err)
 		}

 		if mark {
@ -909,10 +953,14 @@ func (s *SplitStore) gcHotstore() {

 func (s *SplitStore) setBaseEpoch(epoch abi.ChainEpoch) error {
 	s.baseEpoch = epoch
-	// write to datastore
 	return s.ds.Put(baseEpochKey, epochToBytes(epoch))
 }

+func (s *SplitStore) setSyncGapEpoch(epoch abi.ChainEpoch) error {
+	s.syncGapEpoch = epoch
+	return s.ds.Put(syncGapEpochKey, epochToBytes(epoch))
+}
+
 func epochToBytes(epoch abi.ChainEpoch) []byte {
 	return uint64ToBytes(uint64(epoch))
 }
--- a/blockstore/splitstore/splitstore_test.go
+++ b/blockstore/splitstore/splitstore_test.go
@ -46,6 +46,7 @@ func testSplitStore(t *testing.T, cfg *Config) {
 	genBlock.Messages = garbage.Cid()
 	genBlock.ParentMessageReceipts = garbage.Cid()
 	genBlock.ParentStateRoot = garbage.Cid()
+	genBlock.Timestamp = uint64(time.Now().Unix())

 	genTs := mock.TipSet(genBlock)
 	chain.push(genTs)
@ -80,6 +81,7 @@ func testSplitStore(t *testing.T, cfg *Config) {
 		blk.Messages = garbage.Cid()
 		blk.ParentMessageReceipts = garbage.Cid()
 		blk.ParentStateRoot = garbage.Cid()
+		blk.Timestamp = uint64(time.Now().Unix())

 		sblk, err := blk.ToStorageBlock()
 		if err != nil {