fix some residual purge races

2021-07-03 08:13:26 +03:00 · 2021-07-03 08:13:26 +03:00 · e4bb4be855
commit e4bb4be855
parent 68bc5d2291
1 changed files with 41 additions and 26 deletions
--- a/blockstore/splitstore/splitstore.go
+++ b/blockstore/splitstore/splitstore.go
@ -236,17 +236,19 @@ func (s *SplitStore) Has(c cid.Cid) (bool, error) {
 	}
 	if has {
-		// treat it as an implicit Write, absence options -- the vm uses this check to avoid duplicate
+		// treat it as an implicit (recursive) Write, absence options -- the vm uses this check to avoid
-		// writes on Flush. When we have options in the API, the vm can explicitly signal that this is
+		// duplicate writes on Copy.
-		// an implicit Write.
+		// When we have options in the API, the vm can explicitly signal that this is an implicit Write.
 		s.trackWrite(c, true)
 		// also make sure the object is considered live during compaction in case we have already
-		// flushed pending writes and started compaction
+		// flushed pending writes and started compaction.
 		// in case of a race with purge, this will return a track error, which we can use to
 		// signal to the vm that the object is not fully present.
 		trackErr := s.trackTxnRef(c, true)
 		// if we failed to track the object and all its dependencies, then return false so as
-		// to cause the vm to recompute
+		// to cause the vm to copy
 		return trackErr == nil, nil
 	}
@ -673,6 +675,16 @@ func (s *SplitStore) trackTxnRef(c cid.Cid, implicit bool) error {
 		err = s.txnProtect.Mark(c)
 	} else {
 		err = s.walkLinks(c, cid.NewSet(), func(c cid.Cid) error {
 			// this check is necessary to avoid races because objects are purged in random order
 			has, err := s.hot.Has(c)
 			if err != nil {
 				return err
 			}
 			if !has {
 				return xerrors.Errorf("object (cid: %s) has been purged", c)
 			}
 			return s.txnProtect.Mark(c)
 		})
 	}
@ -949,28 +961,8 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error {
 		return xerrors.Errorf("error creating mark set: %w", err)
 	}
 	defer markSet.Close() //nolint:errcheck
 	// create the pruge protect filter
 	s.txnLk.Lock()
 	s.txnProtect, err = s.txnEnv.Create("protected", s.markSetSize)
 	if err != nil {
 		s.txnLk.Unlock()
 		return xerrors.Errorf("error creating transactional mark set: %w", err)
 	}
 	s.txnLk.Unlock()
 	defer func() {
 		s.txnLk.Lock()
 		_ = s.txnProtect.Close()
 		s.txnProtect = nil
 		s.txnLk.Unlock()
 	}()
 	defer s.debug.Flush()
 	// flush pending writes to update the tracker
 	s.flushPendingWrites(false)
 	// 1. mark reachable objects by walking the chain from the current epoch to the boundary epoch
 	log.Infow("marking reachable blocks", "currentEpoch", currentEpoch, "boundaryEpoch", boundaryEpoch)
 	startMark := time.Now()
@ -992,6 +984,28 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error {
 	log.Infow("marking done", "took", time.Since(startMark), "marked", count)
 	// create the transaction protect filter
 	s.txnLk.Lock()
 	s.txnProtect, err = s.txnEnv.Create("protected", s.markSetSize)
 	if err != nil {
 		s.txnLk.Unlock()
 		return xerrors.Errorf("error creating transactional mark set: %w", err)
 	}
 	s.txnLk.Unlock()
 	defer func() {
 		s.txnLk.Lock()
 		_ = s.txnProtect.Close()
 		s.txnProtect = nil
 		s.txnLk.Unlock()
 	}()
 	// flush pending writes to update the tracker
 	log.Info("flushing pending writes")
 	startFlush := time.Now()
 	s.flushPendingWrites(false)
 	log.Infow("flushing done", "took", time.Since(startFlush))
 	// 2. move cold unreachable objects to the coldstore
 	log.Info("collecting cold objects")
 	startCollect := time.Now()
@ -1071,6 +1085,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error {
 		}
 		log.Infow("moving done", "took", time.Since(startMove))
 	}
 	// 2.3 purge cold objects from the hotstore
 	log.Info("purging cold objects from the hotstore")
 	startPurge := time.Now()
@ -1293,7 +1308,7 @@ func (s *SplitStore) purge(curTs *types.TipSet, cids []cid.Cid) error {
 	deadCids := make([]cid.Cid, 0, batchSize)
 	var purgeCnt, liveCnt int
 	defer func() {
-		log.Infow("purged objects", "purged", purgeCnt, "live", liveCnt)
+		log.Infow("purged cold objects", "purged", purgeCnt, "live", liveCnt)
 	}()
 	return s.purgeBatch(cids,