address review comments

This commit is contained in:
vyzo 2021-07-10 16:30:27 +03:00
parent 870a47f55d
commit 0c5e336ff1

View File

@ -13,6 +13,7 @@ import (
"time" "time"
"go.uber.org/multierr" "go.uber.org/multierr"
"golang.org/x/sync/errgroup"
"golang.org/x/xerrors" "golang.org/x/xerrors"
blocks "github.com/ipfs/go-block-format" blocks "github.com/ipfs/go-block-format"
@ -110,9 +111,15 @@ type ChainAccessor interface {
SubscribeHeadChanges(change func(revert []*types.TipSet, apply []*types.TipSet) error) SubscribeHeadChanges(change func(revert []*types.TipSet, apply []*types.TipSet) error)
} }
// hotstore is the interface that must be satisfied by the hot blockstore; it is an extension
// of the Blockstore interface with the traits we need for compaction.
type hotstore interface {
bstore.Blockstore
bstore.BlockstoreIterator
}
type SplitStore struct { type SplitStore struct {
compacting int32 // compaction (or warmp up) in progress compacting int32 // compaction (or warmp up) in progress
critsection int32 // compaction critical section
closing int32 // the splitstore is closing closing int32 // the splitstore is closing
cfg *Config cfg *Config
@ -125,8 +132,8 @@ type SplitStore struct {
chain ChainAccessor chain ChainAccessor
ds dstore.Datastore ds dstore.Datastore
hot bstore.Blockstore
cold bstore.Blockstore cold bstore.Blockstore
hot hotstore
markSetEnv MarkSetEnv markSetEnv MarkSetEnv
markSetSize int64 markSetSize int64
@ -139,7 +146,7 @@ type SplitStore struct {
// transactional protection for concurrent read/writes during compaction // transactional protection for concurrent read/writes during compaction
txnLk sync.RWMutex txnLk sync.RWMutex
txnActive bool txnActive bool
txnViews *sync.WaitGroup txnViews sync.WaitGroup
txnProtect MarkSet txnProtect MarkSet
txnRefsMx sync.Mutex txnRefsMx sync.Mutex
txnRefs map[cid.Cid]struct{} txnRefs map[cid.Cid]struct{}
@ -162,11 +169,17 @@ func init() {
// is backed by the provided hot and cold stores. The returned SplitStore MUST be // is backed by the provided hot and cold stores. The returned SplitStore MUST be
// attached to the ChainStore with Start in order to trigger compaction. // attached to the ChainStore with Start in order to trigger compaction.
func Open(path string, ds dstore.Datastore, hot, cold bstore.Blockstore, cfg *Config) (*SplitStore, error) { func Open(path string, ds dstore.Datastore, hot, cold bstore.Blockstore, cfg *Config) (*SplitStore, error) {
// hot blockstore must support BlockstoreIterator // hot blockstore must support the hotstore interface
hots, ok := hot.(hotstore)
if !ok {
// be specific about what is missing
if _, ok := hot.(bstore.BlockstoreIterator); !ok { if _, ok := hot.(bstore.BlockstoreIterator); !ok {
return nil, xerrors.Errorf("hot blockstore does not support efficient iteration: %T", hot) return nil, xerrors.Errorf("hot blockstore does not support efficient iteration: %T", hot)
} }
return nil, xerrors.Errorf("hot blockstore does not support the necessary traits: %T", hot)
}
// the markset env // the markset env
markSetEnv, err := OpenMarkSetEnv(path, cfg.MarkSetType) markSetEnv, err := OpenMarkSetEnv(path, cfg.MarkSetType)
if err != nil { if err != nil {
@ -177,12 +190,10 @@ func Open(path string, ds dstore.Datastore, hot, cold bstore.Blockstore, cfg *Co
ss := &SplitStore{ ss := &SplitStore{
cfg: cfg, cfg: cfg,
ds: ds, ds: ds,
hot: hot,
cold: cold, cold: cold,
hot: hots,
markSetEnv: markSetEnv, markSetEnv: markSetEnv,
txnViews: new(sync.WaitGroup),
coldPurgeSize: defaultColdPurgeSize, coldPurgeSize: defaultColdPurgeSize,
} }
@ -252,18 +263,13 @@ func (s *SplitStore) Get(cid cid.Cid) (blocks.Block, error) {
return blk, nil return blk, nil
case bstore.ErrNotFound: case bstore.ErrNotFound:
if s.debug != nil { if s.isWarm() {
s.mx.Lock()
warm := s.warmupEpoch > 0
s.mx.Unlock()
if warm {
s.debug.LogReadMiss(cid) s.debug.LogReadMiss(cid)
} }
}
blk, err = s.cold.Get(cid) blk, err = s.cold.Get(cid)
if err == nil { if err == nil {
stats.Record(context.Background(), metrics.SplitstoreMiss.M(1)) stats.Record(s.ctx, metrics.SplitstoreMiss.M(1))
} }
return blk, err return blk, err
@ -294,18 +300,13 @@ func (s *SplitStore) GetSize(cid cid.Cid) (int, error) {
return size, nil return size, nil
case bstore.ErrNotFound: case bstore.ErrNotFound:
if s.debug != nil { if s.isWarm() {
s.mx.Lock()
warm := s.warmupEpoch > 0
s.mx.Unlock()
if warm {
s.debug.LogReadMiss(cid) s.debug.LogReadMiss(cid)
} }
}
size, err = s.cold.GetSize(cid) size, err = s.cold.GetSize(cid)
if err == nil { if err == nil {
stats.Record(context.Background(), metrics.SplitstoreMiss.M(1)) stats.Record(s.ctx, metrics.SplitstoreMiss.M(1))
} }
return size, err return size, err
@ -393,15 +394,21 @@ func (s *SplitStore) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) {
return nil, err return nil, err
} }
ch := make(chan cid.Cid) seen := cid.NewSet()
ch := make(chan cid.Cid, 8) // buffer is arbitrary, just enough to avoid context switches
go func() { go func() {
defer cancel() defer cancel()
defer close(ch) defer close(ch)
for _, in := range []<-chan cid.Cid{chHot, chCold} { for _, in := range []<-chan cid.Cid{chHot, chCold} {
for cid := range in { for c := range in {
// ensure we only emit each key once
if !seen.Visit(c) {
continue
}
select { select {
case ch <- cid: case ch <- c:
case <-ctx.Done(): case <-ctx.Done():
return return
} }
@ -443,18 +450,13 @@ func (s *SplitStore) View(cid cid.Cid, cb func([]byte) error) error {
err := s.hot.View(cid, cb) err := s.hot.View(cid, cb)
switch err { switch err {
case bstore.ErrNotFound: case bstore.ErrNotFound:
if s.debug != nil { if s.isWarm() {
s.mx.Lock()
warm := s.warmupEpoch > 0
s.mx.Unlock()
if warm {
s.debug.LogReadMiss(cid) s.debug.LogReadMiss(cid)
} }
}
err = s.cold.View(cid, cb) err = s.cold.View(cid, cb)
if err == nil { if err == nil {
stats.Record(context.Background(), metrics.SplitstoreMiss.M(1)) stats.Record(s.ctx, metrics.SplitstoreMiss.M(1))
} }
return err return err
@ -463,6 +465,12 @@ func (s *SplitStore) View(cid cid.Cid, cb func([]byte) error) error {
} }
} }
func (s *SplitStore) isWarm() bool {
s.mx.Lock()
defer s.mx.Unlock()
return s.warmupEpoch > 0
}
// State tracking // State tracking
func (s *SplitStore) Start(chain ChainAccessor) error { func (s *SplitStore) Start(chain ChainAccessor) error {
s.chain = chain s.chain = chain
@ -527,11 +535,14 @@ func (s *SplitStore) Start(chain ChainAccessor) error {
} }
func (s *SplitStore) Close() error { func (s *SplitStore) Close() error {
atomic.StoreInt32(&s.closing, 1) if !atomic.CompareAndSwapInt32(&s.closing, 0, 1) {
// already closing
return nil
}
if atomic.LoadInt32(&s.critsection) == 1 { if atomic.LoadInt32(&s.compacting) == 1 {
log.Warn("ongoing compaction in critical section; waiting for it to finish...") log.Warn("close with ongoing compaction in progress; waiting for it to finish...")
for atomic.LoadInt32(&s.critsection) == 1 { for atomic.LoadInt32(&s.compacting) == 1 {
time.Sleep(time.Second) time.Sleep(time.Second)
} }
} }
@ -549,12 +560,24 @@ func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error {
curTs := apply[len(apply)-1] curTs := apply[len(apply)-1]
epoch := curTs.Height() epoch := curTs.Height()
// NOTE: there is an implicit invariant assumption that HeadChange is invoked
// synchronously and no other HeadChange can be invoked while one is in
// progress.
// this is guaranteed by the chainstore, and it is pervasive in all lotus
// -- if that ever changes then all hell will break loose in general and
// we will have a rance to protectTipSets here.
if !atomic.CompareAndSwapInt32(&s.compacting, 0, 1) { if !atomic.CompareAndSwapInt32(&s.compacting, 0, 1) {
// we are currently compacting -- protect the new tipset(s) // we are currently compacting -- protect the new tipset(s)
s.protectTipSets(apply) s.protectTipSets(apply)
return nil return nil
} }
// check if we are actually closing first
if atomic.LoadInt32(&s.closing) == 1 {
atomic.StoreInt32(&s.compacting, 0)
return nil
}
timestamp := time.Unix(int64(curTs.MinTimestamp()), 0) timestamp := time.Unix(int64(curTs.MinTimestamp()), 0)
if time.Since(timestamp) > SyncGapTime { if time.Since(timestamp) > SyncGapTime {
// don't attempt compaction before we have caught up syncing // don't attempt compaction before we have caught up syncing
@ -608,7 +631,7 @@ func (s *SplitStore) protectView(c cid.Cid) *sync.WaitGroup {
if !s.txnActive { if !s.txnActive {
s.txnViews.Add(1) s.txnViews.Add(1)
return s.txnViews return &s.txnViews
} }
s.trackTxnRef(c) s.trackTxnRef(c)
@ -653,6 +676,8 @@ func (s *SplitStore) trackTxnRefMany(cids []cid.Cid) {
} }
s.txnRefsMx.Lock() s.txnRefsMx.Lock()
defer s.txnRefsMx.Unlock()
quiet := false quiet := false
for _, c := range cids { for _, c := range cids {
if isUnitaryObject(c) { if isUnitaryObject(c) {
@ -676,7 +701,7 @@ func (s *SplitStore) trackTxnRefMany(cids []cid.Cid) {
s.txnRefs[c] = struct{}{} s.txnRefs[c] = struct{}{}
} }
s.txnRefsMx.Unlock()
return return
} }
@ -714,6 +739,7 @@ func (s *SplitStore) protectTxnRefs(markSet MarkSet) error {
workch <- c workch <- c
count++ count++
} }
close(workch)
if count == 0 { if count == 0 {
return nil return nil
@ -727,31 +753,23 @@ func (s *SplitStore) protectTxnRefs(markSet MarkSet) error {
workers = count workers = count
} }
close(workch) worker := func() error {
worker := func(wg *sync.WaitGroup) {
if wg != nil {
defer wg.Done()
}
for c := range workch { for c := range workch {
err := s.doTxnProtect(c, markSet) err := s.doTxnProtect(c, markSet)
if err != nil { if err != nil {
log.Warnf("error protecting transactional references: %s", err) return xerrors.Errorf("error protecting transactional references to %s: %w", c, err)
return
} }
} }
return nil
} }
if workers > 1 { g := new(errgroup.Group)
wg := new(sync.WaitGroup)
for i := 0; i < workers; i++ { for i := 0; i < workers; i++ {
wg.Add(1) g.Go(worker)
go worker(wg)
} }
wg.Wait()
} else { if err := g.Wait(); err != nil {
worker(nil) return err
} }
log.Infow("protecting transactional refs done", "took", time.Since(startProtect), "protected", count) log.Infow("protecting transactional refs done", "took", time.Since(startProtect), "protected", count)
@ -761,6 +779,10 @@ func (s *SplitStore) protectTxnRefs(markSet MarkSet) error {
// transactionally protect a reference by walking the object and marking. // transactionally protect a reference by walking the object and marking.
// concurrent markings are short circuited by checking the markset. // concurrent markings are short circuited by checking the markset.
func (s *SplitStore) doTxnProtect(root cid.Cid, markSet MarkSet) error { func (s *SplitStore) doTxnProtect(root cid.Cid, markSet MarkSet) error {
if err := s.checkClosing(); err != nil {
return err
}
// Note: cold objects are deleted heaviest first, so the consituents of an object // Note: cold objects are deleted heaviest first, so the consituents of an object
// cannot be deleted before the object itself. // cannot be deleted before the object itself.
return s.walkObjectIncomplete(root, cid.NewSet(), return s.walkObjectIncomplete(root, cid.NewSet(),
@ -918,7 +940,7 @@ func (s *SplitStore) compact(curTs *types.TipSet, wg *sync.WaitGroup) {
start = time.Now() start = time.Now()
err := s.doCompact(curTs) err := s.doCompact(curTs)
took := time.Since(start).Milliseconds() took := time.Since(start).Milliseconds()
stats.Record(context.Background(), metrics.SplitstoreCompactionTimeSeconds.M(float64(took)/1e3)) stats.Record(s.ctx, metrics.SplitstoreCompactionTimeSeconds.M(float64(took)/1e3))
if err != nil { if err != nil {
log.Errorf("COMPACTION ERROR: %s", err) log.Errorf("COMPACTION ERROR: %s", err)
@ -991,7 +1013,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error {
var hotCnt, coldCnt int var hotCnt, coldCnt int
cold := make([]cid.Cid, 0, s.coldPurgeSize) cold := make([]cid.Cid, 0, s.coldPurgeSize)
err = s.hot.(bstore.BlockstoreIterator).ForEachKey(func(c cid.Cid) error { err = s.hot.ForEachKey(func(c cid.Cid) error {
// was it marked? // was it marked?
mark, err := markSet.Has(c) mark, err := markSet.Has(c)
if err != nil { if err != nil {
@ -1021,8 +1043,8 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error {
} }
log.Infow("compaction stats", "hot", hotCnt, "cold", coldCnt) log.Infow("compaction stats", "hot", hotCnt, "cold", coldCnt)
stats.Record(context.Background(), metrics.SplitstoreCompactionHot.M(int64(hotCnt))) stats.Record(s.ctx, metrics.SplitstoreCompactionHot.M(int64(hotCnt)))
stats.Record(context.Background(), metrics.SplitstoreCompactionCold.M(int64(coldCnt))) stats.Record(s.ctx, metrics.SplitstoreCompactionCold.M(int64(coldCnt)))
if err := s.checkClosing(); err != nil { if err := s.checkClosing(); err != nil {
return err return err
@ -1064,6 +1086,9 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error {
log.Infow("sorting done", "took", time.Since(startSort)) log.Infow("sorting done", "took", time.Since(startSort))
// 4.1 protect transactional refs once more // 4.1 protect transactional refs once more
// strictly speaking, this is not necessary as purge will do it before deleting each
// batch. however, there is likely a largish number of references accumulated during
// ths sort and this protects before entering pruge context.
err = s.protectTxnRefs(markSet) err = s.protectTxnRefs(markSet)
if err != nil { if err != nil {
return xerrors.Errorf("error protecting transactional refs: %w", err) return xerrors.Errorf("error protecting transactional refs: %w", err)
@ -1073,16 +1098,6 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error {
return err return err
} }
// Enter critical section
log.Info("entering critical section")
atomic.StoreInt32(&s.critsection, 1)
defer atomic.StoreInt32(&s.critsection, 0)
// check to see if we are closing first; if that's the case just return
if err := s.checkClosing(); err != nil {
return err
}
// 5. purge cold objects from the hotstore, taking protected references into account // 5. purge cold objects from the hotstore, taking protected references into account
log.Info("purging cold objects from the hotstore") log.Info("purging cold objects from the hotstore")
startPurge := time.Now() startPurge := time.Now()
@ -1119,10 +1134,7 @@ func (s *SplitStore) beginTxnProtect() *sync.WaitGroup {
s.txnRefs = make(map[cid.Cid]struct{}) s.txnRefs = make(map[cid.Cid]struct{})
s.txnMissing = make(map[cid.Cid]struct{}) s.txnMissing = make(map[cid.Cid]struct{})
wg := s.txnViews return &s.txnViews
s.txnViews = nil
return wg
} }
func (s *SplitStore) beginTxnMarking(markSet MarkSet) { func (s *SplitStore) beginTxnMarking(markSet MarkSet) {
@ -1141,11 +1153,13 @@ func (s *SplitStore) endTxnProtect() {
return return
} }
// release markset memory
s.txnProtect.Close()
s.txnActive = false s.txnActive = false
s.txnProtect = nil s.txnProtect = nil
s.txnRefs = nil s.txnRefs = nil
s.txnMissing = nil s.txnMissing = nil
s.txnViews = new(sync.WaitGroup)
} }
func (s *SplitStore) walkChain(ts *types.TipSet, boundary abi.ChainEpoch, inclMsgs bool, func (s *SplitStore) walkChain(ts *types.TipSet, boundary abi.ChainEpoch, inclMsgs bool,
@ -1238,6 +1252,11 @@ func (s *SplitStore) walkObject(c cid.Cid, walked *cid.Set, f func(cid.Cid) erro
return nil return nil
} }
// check this before recursing
if err := s.checkClosing(); err != nil {
return err
}
var links []cid.Cid var links []cid.Cid
err := s.view(c, func(data []byte) error { err := s.view(c, func(data []byte) error {
return cbg.ScanForLinks(bytes.NewReader(data), func(c cid.Cid) { return cbg.ScanForLinks(bytes.NewReader(data), func(c cid.Cid) {
@ -1294,6 +1313,11 @@ func (s *SplitStore) walkObjectIncomplete(c cid.Cid, walked *cid.Set, f, missing
return nil return nil
} }
// check this before recursing
if err := s.checkClosing(); err != nil {
return err
}
var links []cid.Cid var links []cid.Cid
err := s.view(c, func(data []byte) error { err := s.view(c, func(data []byte) error {
return cbg.ScanForLinks(bytes.NewReader(data), func(c cid.Cid) { return cbg.ScanForLinks(bytes.NewReader(data), func(c cid.Cid) {
@ -1522,24 +1546,28 @@ func (s *SplitStore) purge(cids []cid.Cid, markSet MarkSet) error {
func(cids []cid.Cid) error { func(cids []cid.Cid) error {
deadCids := deadCids[:0] deadCids := deadCids[:0]
again: for {
if err := s.checkClosing(); err != nil { if err := s.checkClosing(); err != nil {
return err return err
} }
s.txnLk.Lock() s.txnLk.Lock()
if len(s.txnRefs) > 0 { if len(s.txnRefs) == 0 {
// keep the lock!
break
}
// unlock and protect
s.txnLk.Unlock() s.txnLk.Unlock()
err := s.protectTxnRefs(markSet) err := s.protectTxnRefs(markSet)
if err != nil { if err != nil {
return xerrors.Errorf("error protecting transactional refs: %w", err) return xerrors.Errorf("error protecting transactional refs: %w", err)
} }
goto again
} }
defer s.txnLk.Unlock() defer s.txnLk.Unlock()
for _, c := range cids { for _, c := range cids {
live, err := markSet.Has(c) live, err := markSet.Has(c)
if err != nil { if err != nil {