2021-07-25 07:35:37 +00:00
|
|
|
package splitstore
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"os"
|
|
|
|
"path/filepath"
|
2022-01-26 07:01:51 +00:00
|
|
|
"sync"
|
2021-07-25 07:35:37 +00:00
|
|
|
"sync/atomic"
|
|
|
|
"time"
|
|
|
|
|
2022-06-15 10:06:22 +00:00
|
|
|
"github.com/ipfs/go-cid"
|
2022-06-14 15:00:51 +00:00
|
|
|
"golang.org/x/xerrors"
|
2021-07-26 05:45:46 +00:00
|
|
|
|
|
|
|
bstore "github.com/filecoin-project/lotus/blockstore"
|
|
|
|
"github.com/filecoin-project/lotus/chain/types"
|
2021-07-25 07:35:37 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
// performs an asynchronous health-check on the splitstore; results are appended to
|
|
|
|
// <splitstore-path>/check.txt
|
|
|
|
func (s *SplitStore) Check() error {
|
|
|
|
s.headChangeMx.Lock()
|
|
|
|
defer s.headChangeMx.Unlock()
|
|
|
|
|
|
|
|
// try to take compaction lock and inhibit compaction while the health-check is running
|
|
|
|
if !atomic.CompareAndSwapInt32(&s.compacting, 0, 1) {
|
|
|
|
return xerrors.Errorf("can't acquire compaction lock; compacting operation in progress")
|
|
|
|
}
|
2022-08-05 20:34:16 +00:00
|
|
|
s.compactType = check
|
2021-07-25 07:35:37 +00:00
|
|
|
|
|
|
|
if s.compactionIndex == 0 {
|
|
|
|
atomic.StoreInt32(&s.compacting, 0)
|
|
|
|
return xerrors.Errorf("splitstore hasn't compacted yet; health check is not meaningful")
|
|
|
|
}
|
|
|
|
|
|
|
|
// check if we are actually closing first
|
|
|
|
if err := s.checkClosing(); err != nil {
|
|
|
|
atomic.StoreInt32(&s.compacting, 0)
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
curTs := s.chain.GetHeaviestTipSet()
|
|
|
|
go func() {
|
|
|
|
defer atomic.StoreInt32(&s.compacting, 0)
|
|
|
|
|
|
|
|
log.Info("checking splitstore health")
|
|
|
|
start := time.Now()
|
|
|
|
|
|
|
|
err := s.doCheck(curTs)
|
|
|
|
if err != nil {
|
|
|
|
log.Errorf("error checking splitstore health: %s", err)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
log.Infow("health check done", "took", time.Since(start))
|
|
|
|
}()
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *SplitStore) doCheck(curTs *types.TipSet) error {
|
|
|
|
currentEpoch := curTs.Height()
|
|
|
|
boundaryEpoch := currentEpoch - CompactionBoundary
|
|
|
|
|
|
|
|
outputPath := filepath.Join(s.path, "check.txt")
|
|
|
|
output, err := os.OpenFile(outputPath, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644)
|
|
|
|
if err != nil {
|
|
|
|
return xerrors.Errorf("error opening check output file %s: %w", outputPath, err)
|
|
|
|
}
|
|
|
|
defer output.Close() //nolint:errcheck
|
|
|
|
|
2022-01-26 07:01:51 +00:00
|
|
|
var mx sync.Mutex
|
2021-07-25 07:35:37 +00:00
|
|
|
write := func(format string, args ...interface{}) {
|
2022-01-26 07:01:51 +00:00
|
|
|
mx.Lock()
|
|
|
|
defer mx.Unlock()
|
2021-07-25 08:30:58 +00:00
|
|
|
_, err := fmt.Fprintf(output, format+"\n", args...)
|
2021-07-25 07:35:37 +00:00
|
|
|
if err != nil {
|
|
|
|
log.Warnf("error writing check output: %s", err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ts, _ := time.Now().MarshalText()
|
2021-07-25 08:30:58 +00:00
|
|
|
write("---------------------------------------------")
|
|
|
|
write("start check at %s", ts)
|
|
|
|
write("current epoch: %d", currentEpoch)
|
|
|
|
write("boundary epoch: %d", boundaryEpoch)
|
|
|
|
write("compaction index: %d", s.compactionIndex)
|
|
|
|
write("--")
|
2021-07-25 07:35:37 +00:00
|
|
|
|
2022-01-26 07:01:51 +00:00
|
|
|
coldCnt := new(int64)
|
|
|
|
missingCnt := new(int64)
|
2021-07-30 06:51:09 +00:00
|
|
|
|
2022-01-28 13:41:33 +00:00
|
|
|
visitor, err := s.markSetEnv.New("check", 0)
|
2021-07-30 06:51:09 +00:00
|
|
|
if err != nil {
|
|
|
|
return xerrors.Errorf("error creating visitor: %w", err)
|
|
|
|
}
|
|
|
|
defer visitor.Close() //nolint
|
|
|
|
|
2023-03-04 16:38:18 +00:00
|
|
|
size, err := s.walkChain(curTs, boundaryEpoch, boundaryEpoch, visitor,
|
2021-07-25 07:35:37 +00:00
|
|
|
func(c cid.Cid) error {
|
|
|
|
if isUnitaryObject(c) {
|
|
|
|
return errStopWalk
|
|
|
|
}
|
|
|
|
|
2021-12-11 21:03:00 +00:00
|
|
|
has, err := s.hot.Has(s.ctx, c)
|
2021-07-25 07:35:37 +00:00
|
|
|
if err != nil {
|
|
|
|
return xerrors.Errorf("error checking hotstore: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if has {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2021-12-11 21:03:00 +00:00
|
|
|
has, err = s.cold.Has(s.ctx, c)
|
2021-07-25 07:35:37 +00:00
|
|
|
if err != nil {
|
|
|
|
return xerrors.Errorf("error checking coldstore: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if has {
|
2022-01-26 07:01:51 +00:00
|
|
|
atomic.AddInt64(coldCnt, 1)
|
2021-07-25 07:35:37 +00:00
|
|
|
write("cold object reference: %s", c)
|
|
|
|
} else {
|
2022-01-26 07:01:51 +00:00
|
|
|
atomic.AddInt64(missingCnt, 1)
|
2021-07-25 07:35:37 +00:00
|
|
|
write("missing object reference: %s", c)
|
2021-07-25 10:42:20 +00:00
|
|
|
return errStopWalk
|
2021-07-25 07:35:37 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
2022-11-07 21:31:12 +00:00
|
|
|
}, func(cid.Cid) error { return nil })
|
2021-07-25 07:35:37 +00:00
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
err = xerrors.Errorf("error walking chain: %w", err)
|
2021-07-25 08:30:58 +00:00
|
|
|
write("ERROR: %s", err)
|
2021-07-25 07:35:37 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2023-03-04 16:38:18 +00:00
|
|
|
log.Infow("check done", "cold", *coldCnt, "missing", *missingCnt, "walk size", size)
|
2021-07-25 08:42:13 +00:00
|
|
|
write("--")
|
2022-01-26 07:01:51 +00:00
|
|
|
write("cold: %d missing: %d", *coldCnt, *missingCnt)
|
2021-07-25 08:42:13 +00:00
|
|
|
write("DONE")
|
2021-07-25 07:35:37 +00:00
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
2021-07-26 05:45:46 +00:00
|
|
|
|
|
|
|
// provides some basic information about the splitstore
|
|
|
|
func (s *SplitStore) Info() map[string]interface{} {
|
|
|
|
info := make(map[string]interface{})
|
|
|
|
info["base epoch"] = s.baseEpoch
|
|
|
|
info["warmup epoch"] = s.warmupEpoch
|
|
|
|
info["compactions"] = s.compactionIndex
|
2022-08-05 20:34:16 +00:00
|
|
|
info["prunes"] = s.pruneIndex
|
|
|
|
info["compacting"] = s.compacting == 1
|
2021-07-26 05:45:46 +00:00
|
|
|
|
|
|
|
sizer, ok := s.hot.(bstore.BlockstoreSize)
|
|
|
|
if ok {
|
|
|
|
size, err := sizer.Size()
|
|
|
|
if err != nil {
|
|
|
|
log.Warnf("error getting hotstore size: %s", err)
|
|
|
|
} else {
|
|
|
|
info["hotstore size"] = size
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return info
|
|
|
|
}
|