lotus/extern/sector-storage/faults.go

118 lines
3.5 KiB
Go
Raw Normal View History

package sectorstorage
2020-05-16 21:03:29 +00:00
import (
"context"
"fmt"
2020-06-08 16:47:59 +00:00
"os"
"path/filepath"
2020-05-16 21:03:29 +00:00
"golang.org/x/xerrors"
2020-09-07 03:49:10 +00:00
"github.com/filecoin-project/go-state-types/abi"
"github.com/filecoin-project/specs-storage/storage"
"github.com/filecoin-project/lotus/extern/sector-storage/storiface"
2020-05-16 21:03:29 +00:00
)
2020-08-16 10:40:35 +00:00
// FaultTracker TODO: Track things more actively
2020-05-16 21:03:29 +00:00
type FaultTracker interface {
2020-11-26 07:02:43 +00:00
CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof, sectors []storage.SectorRef) (map[abi.SectorID]string, error)
2020-05-16 21:03:29 +00:00
}
2020-08-16 10:40:35 +00:00
// CheckProvable returns unprovable sectors
2020-11-26 07:02:43 +00:00
func (m *Manager) CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof, sectors []storage.SectorRef) (map[abi.SectorID]string, error) {
var bad = make(map[abi.SectorID]string)
2020-05-16 21:03:29 +00:00
ssize, err := pp.SectorSize()
2020-06-15 10:50:53 +00:00
if err != nil {
return nil, err
}
2020-05-16 21:03:29 +00:00
// TODO: More better checks
for _, sector := range sectors {
err := func() error {
2020-06-08 16:47:59 +00:00
ctx, cancel := context.WithCancel(ctx)
defer cancel()
locked, err := m.index.StorageTryLock(ctx, sector.ID, storiface.FTSealed|storiface.FTCache, storiface.FTNone)
2020-06-08 16:47:59 +00:00
if err != nil {
return xerrors.Errorf("acquiring sector lock: %w", err)
}
if !locked {
2020-11-19 15:48:28 +00:00
log.Warnw("CheckProvable Sector FAULT: can't acquire read lock", "sector", sector)
2020-11-26 07:02:43 +00:00
bad[sector.ID] = fmt.Sprint("can't acquire read lock")
2020-06-08 16:47:59 +00:00
return nil
}
lp, _, err := m.localStore.AcquireSector(ctx, sector, storiface.FTSealed|storiface.FTCache, storiface.FTNone, storiface.PathStorage, storiface.AcquireMove)
2020-05-16 21:03:29 +00:00
if err != nil {
2020-07-06 14:13:42 +00:00
log.Warnw("CheckProvable Sector FAULT: acquire sector in checkProvable", "sector", sector, "error", err)
2020-11-26 07:02:43 +00:00
bad[sector.ID] = fmt.Sprintf("acquire sector failed: %s", err)
2020-07-06 14:13:42 +00:00
return nil
2020-05-16 21:03:29 +00:00
}
if lp.Sealed == "" || lp.Cache == "" {
2020-11-26 07:02:43 +00:00
log.Warnw("CheckProvable Sector FAULT: cache and/or sealed paths not found", "sector", sector, "sealed", lp.Sealed, "cache", lp.Cache)
bad[sector.ID] = fmt.Sprintf("cache and/or sealed paths not found, cache %q, sealed %q", lp.Cache, lp.Sealed)
2020-05-16 21:03:29 +00:00
return nil
}
2020-06-15 10:50:53 +00:00
toCheck := map[string]int64{
lp.Sealed: 1,
filepath.Join(lp.Cache, "t_aux"): 0,
filepath.Join(lp.Cache, "p_aux"): 0,
2020-06-08 16:47:59 +00:00
}
addCachePathsForSectorSize(toCheck, lp.Cache, ssize)
2020-06-15 10:50:53 +00:00
for p, sz := range toCheck {
st, err := os.Stat(p)
2020-06-08 16:47:59 +00:00
if err != nil {
log.Warnw("CheckProvable Sector FAULT: sector file stat error", "sector", sector, "sealed", lp.Sealed, "cache", lp.Cache, "file", p, "err", err)
2020-11-26 07:02:43 +00:00
bad[sector.ID] = fmt.Sprintf("%s", err)
2020-06-08 16:47:59 +00:00
return nil
}
2020-06-15 10:50:53 +00:00
if sz != 0 {
if st.Size() != int64(ssize)*sz {
log.Warnw("CheckProvable Sector FAULT: sector file is wrong size", "sector", sector, "sealed", lp.Sealed, "cache", lp.Cache, "file", p, "size", st.Size(), "expectSize", int64(ssize)*sz)
2020-11-26 07:02:43 +00:00
bad[sector.ID] = fmt.Sprintf("%s is wrong size (got %d, expect %d)", p, st.Size(), int64(ssize)*sz)
2020-06-15 10:50:53 +00:00
return nil
}
}
2020-06-08 16:47:59 +00:00
}
2020-05-16 21:03:29 +00:00
return nil
}()
if err != nil {
return nil, err
}
}
return bad, nil
}
func addCachePathsForSectorSize(chk map[string]int64, cacheDir string, ssize abi.SectorSize) {
switch ssize {
case 2 << 10:
fallthrough
case 8 << 20:
fallthrough
case 512 << 20:
chk[filepath.Join(cacheDir, "sc-02-data-tree-r-last.dat")] = 0
case 32 << 30:
for i := 0; i < 8; i++ {
chk[filepath.Join(cacheDir, fmt.Sprintf("sc-02-data-tree-r-last-%d.dat", i))] = 0
}
case 64 << 30:
for i := 0; i < 16; i++ {
chk[filepath.Join(cacheDir, fmt.Sprintf("sc-02-data-tree-r-last-%d.dat", i))] = 0
}
default:
log.Warnf("not checking cache files of %s sectors for faults", ssize)
}
}
2020-05-16 21:03:29 +00:00
var _ FaultTracker = &Manager{}