check the closing state variable often
so that we have a reasonably quick graceful shutdown
This commit is contained in:
parent
4f808367f8
commit
f5c45bd517
@ -807,6 +807,10 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error {
|
|||||||
defer markSet.Close() //nolint:errcheck
|
defer markSet.Close() //nolint:errcheck
|
||||||
defer s.debug.Flush()
|
defer s.debug.Flush()
|
||||||
|
|
||||||
|
if err := s.checkClosing(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
// 1. mark reachable objects by walking the chain from the current epoch; we keep state roots
|
// 1. mark reachable objects by walking the chain from the current epoch; we keep state roots
|
||||||
// and messages until the boundary epoch.
|
// and messages until the boundary epoch.
|
||||||
log.Info("marking reachable objects")
|
log.Info("marking reachable objects")
|
||||||
@ -815,6 +819,11 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error {
|
|||||||
var count int64
|
var count int64
|
||||||
err = s.walkChain(curTs, boundaryEpoch, true,
|
err = s.walkChain(curTs, boundaryEpoch, true,
|
||||||
func(c cid.Cid) error {
|
func(c cid.Cid) error {
|
||||||
|
// marking takes a while, so check this with every opportunity
|
||||||
|
if err := s.checkClosing(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
if isFilCommitment(c) {
|
if isFilCommitment(c) {
|
||||||
return errStopWalk
|
return errStopWalk
|
||||||
}
|
}
|
||||||
@ -831,6 +840,10 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error {
|
|||||||
|
|
||||||
log.Infow("marking done", "took", time.Since(startMark), "marked", count)
|
log.Infow("marking done", "took", time.Since(startMark), "marked", count)
|
||||||
|
|
||||||
|
if err := s.checkClosing(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
// begin transactional protection with concurrent marking and fetch references created while marking
|
// begin transactional protection with concurrent marking and fetch references created while marking
|
||||||
txnRefs := s.beginTxnConcurrentMarking(markSet)
|
txnRefs := s.beginTxnConcurrentMarking(markSet)
|
||||||
|
|
||||||
@ -842,6 +855,10 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error {
|
|||||||
count = 0
|
count = 0
|
||||||
|
|
||||||
for c := range txnRefs {
|
for c := range txnRefs {
|
||||||
|
if err := s.checkClosing(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
if isFilCommitment(c) {
|
if isFilCommitment(c) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@ -906,6 +923,10 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error {
|
|||||||
log.Infow("update mark set done", "took", time.Since(startMark), "marked", count)
|
log.Infow("update mark set done", "took", time.Since(startMark), "marked", count)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if err := s.checkClosing(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
// 2. iterate through the hotstore to collect cold objects
|
// 2. iterate through the hotstore to collect cold objects
|
||||||
log.Info("collecting cold objects")
|
log.Info("collecting cold objects")
|
||||||
startCollect := time.Now()
|
startCollect := time.Now()
|
||||||
@ -947,11 +968,19 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error {
|
|||||||
stats.Record(context.Background(), metrics.SplitstoreCompactionHot.M(int64(hotCnt)))
|
stats.Record(context.Background(), metrics.SplitstoreCompactionHot.M(int64(hotCnt)))
|
||||||
stats.Record(context.Background(), metrics.SplitstoreCompactionCold.M(int64(coldCnt)))
|
stats.Record(context.Background(), metrics.SplitstoreCompactionCold.M(int64(coldCnt)))
|
||||||
|
|
||||||
|
if err := s.checkClosing(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
// now that we have collected cold objects, check for missing references from transactional i/o
|
// now that we have collected cold objects, check for missing references from transactional i/o
|
||||||
// and disable further collection of such references (they will not be acted upon as we can't
|
// and disable further collection of such references (they will not be acted upon as we can't
|
||||||
// possibly delete objects we didn't have when we were collecting cold objects)
|
// possibly delete objects we didn't have when we were collecting cold objects)
|
||||||
s.waitForMissingRefs()
|
s.waitForMissingRefs()
|
||||||
|
|
||||||
|
if err := s.checkClosing(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
// 3. copy the cold objects to the coldstore -- if we have one
|
// 3. copy the cold objects to the coldstore -- if we have one
|
||||||
if !s.cfg.DiscardColdBlocks {
|
if !s.cfg.DiscardColdBlocks {
|
||||||
log.Info("moving cold objects to the coldstore")
|
log.Info("moving cold objects to the coldstore")
|
||||||
@ -980,9 +1009,8 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error {
|
|||||||
defer atomic.StoreInt32(&s.critsection, 0)
|
defer atomic.StoreInt32(&s.critsection, 0)
|
||||||
|
|
||||||
// check to see if we are closing first; if that's the case just return
|
// check to see if we are closing first; if that's the case just return
|
||||||
if atomic.LoadInt32(&s.closing) == 1 {
|
if err := s.checkClosing(); err != nil {
|
||||||
log.Info("splitstore is closing; aborting compaction")
|
return err
|
||||||
return xerrors.Errorf("compaction aborted")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// 5. purge cold objects from the hotstore, taking protected references into account
|
// 5. purge cold objects from the hotstore, taking protected references into account
|
||||||
@ -1232,6 +1260,15 @@ func (s *SplitStore) has(c cid.Cid) (bool, error) {
|
|||||||
return s.cold.Has(c)
|
return s.cold.Has(c)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *SplitStore) checkClosing() error {
|
||||||
|
if atomic.LoadInt32(&s.closing) == 1 {
|
||||||
|
log.Info("splitstore is closing; aborting compaction")
|
||||||
|
return xerrors.Errorf("compaction aborted")
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func (s *SplitStore) isOldBlockHeader(c cid.Cid, epoch abi.ChainEpoch) (isOldBlock bool, err error) {
|
func (s *SplitStore) isOldBlockHeader(c cid.Cid, epoch abi.ChainEpoch) (isOldBlock bool, err error) {
|
||||||
if c.Prefix().Codec != cid.DagCBOR {
|
if c.Prefix().Codec != cid.DagCBOR {
|
||||||
return false, nil
|
return false, nil
|
||||||
@ -1252,6 +1289,10 @@ func (s *SplitStore) moveColdBlocks(cold []cid.Cid) error {
|
|||||||
batch := make([]blocks.Block, 0, batchSize)
|
batch := make([]blocks.Block, 0, batchSize)
|
||||||
|
|
||||||
for _, c := range cold {
|
for _, c := range cold {
|
||||||
|
if err := s.checkClosing(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
blk, err := s.hot.Get(c)
|
blk, err := s.hot.Get(c)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if err == bstore.ErrNotFound {
|
if err == bstore.ErrNotFound {
|
||||||
@ -1300,6 +1341,11 @@ func (s *SplitStore) sortObjects(cids []cid.Cid) error {
|
|||||||
// compute sorting weights as the cumulative number of DAG links
|
// compute sorting weights as the cumulative number of DAG links
|
||||||
weights := make(map[string]int)
|
weights := make(map[string]int)
|
||||||
for _, c := range cids {
|
for _, c := range cids {
|
||||||
|
// this can take quite a while, so check for shutdown with every opportunity
|
||||||
|
if err := s.checkClosing(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
w := s.getObjectWeight(c, weights, key)
|
w := s.getObjectWeight(c, weights, key)
|
||||||
weights[key(c)] = w
|
weights[key(c)] = w
|
||||||
}
|
}
|
||||||
@ -1402,9 +1448,8 @@ func (s *SplitStore) purge(cids []cid.Cid) error {
|
|||||||
func(cids []cid.Cid) error {
|
func(cids []cid.Cid) error {
|
||||||
deadCids := deadCids[:0]
|
deadCids := deadCids[:0]
|
||||||
|
|
||||||
if atomic.LoadInt32(&s.closing) == 1 {
|
if err := s.checkClosing(); err != nil {
|
||||||
log.Info("splitstore is closing; aborting purge")
|
return err
|
||||||
return xerrors.Errorf("compaction aborted")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
s.txnLk.Lock()
|
s.txnLk.Lock()
|
||||||
@ -1461,6 +1506,10 @@ func (s *SplitStore) waitForMissingRefs() {
|
|||||||
}()
|
}()
|
||||||
|
|
||||||
for i := 0; i < 3 && len(missing) > 0; i++ {
|
for i := 0; i < 3 && len(missing) > 0; i++ {
|
||||||
|
if err := s.checkClosing(); err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
wait := time.Duration(i) * time.Minute
|
wait := time.Duration(i) * time.Minute
|
||||||
log.Infof("retrying for %d missing references in %s (attempt: %d)", len(missing), wait, i+1)
|
log.Infof("retrying for %d missing references in %s (attempt: %d)", len(missing), wait, i+1)
|
||||||
if wait > 0 {
|
if wait > 0 {
|
||||||
|
Loading…
Reference in New Issue
Block a user