From 6577cc8ea66e5d700daf717aef3d6841c667c2d4 Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 24 Nov 2020 16:51:00 +0200 Subject: [PATCH 001/148] splitstore struct and Blockstore interface implementation --- chain/store/splitstore.go | 161 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 161 insertions(+) create mode 100644 chain/store/splitstore.go diff --git a/chain/store/splitstore.go b/chain/store/splitstore.go new file mode 100644 index 000000000..9419450bf --- /dev/null +++ b/chain/store/splitstore.go @@ -0,0 +1,161 @@ +package store + +import ( + "context" + "errors" + + blocks "github.com/ipfs/go-block-format" + cid "github.com/ipfs/go-cid" + bstore "github.com/ipfs/go-ipfs-blockstore" + + "github.com/filecoin-project/go-state-types/abi" + "github.com/filecoin-project/lotus/chain/types" + bstore2 "github.com/filecoin-project/lotus/lib/blockstore" +) + +type SplitStore struct { + baseEpoch abi.ChainEpoch + curTs *types.TipSet + + hot bstore2.Blockstore + cold bstore2.Blockstore + + snoop TrackingStore + sweep TrackingStore +} + +type TrackingStore interface { + Put(cid.Cid, abi.ChainEpoch) error + PutBatch([]cid.Cid, abi.ChainEpoch) error + Get(cid.Cid) (abi.ChainEpoch, error) + Delete(cid.Cid) error + Has(cid.Cid) (bool, error) + Keys() (<-chan cid.Cid, error) +} + +var _ bstore2.Blockstore = (*SplitStore)(nil) + +// Blockstore interface +func (s *SplitStore) DeleteBlock(cid cid.Cid) error { + // afaict we don't seem to be using this method, so it's not implemented + return errors.New("DeleteBlock not implemented on SplitStore; don't do this Luke!") +} + +func (s *SplitStore) Has(cid cid.Cid) (bool, error) { + has, err := s.hot.Has(cid) + + if err != nil { + return false, err + } + + if has { + return true, nil + } + + return s.cold.Has(cid) +} + +func (s *SplitStore) Get(cid cid.Cid) (blocks.Block, error) { + blk, err := s.hot.Get(cid) + + switch err { + case nil: + return blk, nil + + case bstore.ErrNotFound: + return s.cold.Get(cid) + + default: + return nil, err + } +} + +func (s *SplitStore) GetSize(cid cid.Cid) (int, error) { + size, err := s.hot.GetSize(cid) + + switch err { + case nil: + return size, nil + + case bstore.ErrNotFound: + return s.cold.GetSize(cid) + + default: + return 0, err + } +} + +func (s *SplitStore) Put(blk blocks.Block) error { + err := s.hot.Put(blk) + if err != nil { + return err + } + + epoch := s.curTs.Height() + + return s.snoop.Put(blk.Cid(), epoch) +} + +func (s *SplitStore) PutMany(blks []blocks.Block) error { + err := s.hot.PutMany(blks) + if err != nil { + return err + } + + epoch := s.curTs.Height() + + batch := make([]cid.Cid, 0, len(blks)) + for _, blk := range blks { + batch = append(batch, blk.Cid()) + } + + return s.snoop.PutBatch(batch, epoch) +} + +func (s *SplitStore) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) { + ctx, cancel := context.WithCancel(ctx) + + chHot, err := s.hot.AllKeysChan(ctx) + if err != nil { + return nil, err + } + + chCold, err := s.cold.AllKeysChan(ctx) + if err != nil { + cancel() + return nil, err + } + + ch := make(chan cid.Cid) + go func() { + defer cancel() + + for _, in := range []<-chan cid.Cid{chHot, chCold} { + for cid := range in { + select { + case ch <- cid: + case <-ctx.Done(): + return + } + } + } + }() + + return ch, nil +} + +func (s *SplitStore) HashOnRead(enabled bool) { + s.hot.HashOnRead(enabled) + s.cold.HashOnRead(enabled) +} + +func (s *SplitStore) View(cid cid.Cid, cb func([]byte) error) error { + err := s.hot.View(cid, cb) + switch err { + case bstore.ErrNotFound: + return s.cold.View(cid, cb) + + default: + return err + } +} From c8f1139e0d4c8aadf3e1047769a0e46fa9e90c03 Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 24 Nov 2020 19:15:38 +0200 Subject: [PATCH 002/148] compaction algorithm --- chain/store/splitstore.go | 114 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 110 insertions(+), 4 deletions(-) diff --git a/chain/store/splitstore.go b/chain/store/splitstore.go index 9419450bf..eef2781ce 100644 --- a/chain/store/splitstore.go +++ b/chain/store/splitstore.go @@ -9,6 +9,7 @@ import ( bstore "github.com/ipfs/go-ipfs-blockstore" "github.com/filecoin-project/go-state-types/abi" + "github.com/filecoin-project/lotus/build" "github.com/filecoin-project/lotus/chain/types" bstore2 "github.com/filecoin-project/lotus/lib/blockstore" ) @@ -17,6 +18,8 @@ type SplitStore struct { baseEpoch abi.ChainEpoch curTs *types.TipSet + cs *ChainStore + hot bstore2.Blockstore cold bstore2.Blockstore @@ -86,14 +89,13 @@ func (s *SplitStore) GetSize(cid cid.Cid) (int, error) { } func (s *SplitStore) Put(blk blocks.Block) error { - err := s.hot.Put(blk) + epoch := s.curTs.Height() + err := s.snoop.Put(blk.Cid(), epoch) if err != nil { return err } - epoch := s.curTs.Height() - - return s.snoop.Put(blk.Cid(), epoch) + return s.hot.Put(blk) } func (s *SplitStore) PutMany(blks []blocks.Block) error { @@ -159,3 +161,107 @@ func (s *SplitStore) View(cid cid.Cid, cb func([]byte) error) error { return err } } + +// Compaction/GC Algorithm +func (s *SplitStore) compact() { + // Phase 1: mark all reachable CIDs with the current epoch + curTs := s.curTs + epoch := curTs.Height() + err := s.cs.WalkSnapshot(context.Background(), curTs, epoch-s.baseEpoch, false, false, + func(cid cid.Cid) error { + return s.sweep.Put(cid, epoch) + }) + + if err != nil { + // TODO do something better here + panic(err) + } + + // Phase 2: sweep cold objects, moving reachable ones to the coldstore and deleting the others + coldEpoch := s.baseEpoch + build.Finality + + ch, err := s.snoop.Keys() + if err != nil { + // TODO do something better here + panic(err) + } + + for cid := range ch { + wrEpoch, err := s.snoop.Get(cid) + if err != nil { + // TODO do something better here + panic(err) + } + + // is the object stil hot? + if wrEpoch >= coldEpoch { + // yes, just clear the mark and continue + err := s.sweep.Delete(cid) + if err != nil { + // TODO do something better here + panic(err) + } + continue + } + + // the object is cold -- check whether it is reachable + mark, err := s.sweep.Has(cid) + if err != nil { + // TODO do something better here + panic(err) + } + + if mark { + // the object is reachable, move it to the cold store and delete the mark + blk, err := s.hot.Get(cid) + if err != nil { + // TODO do something better here + panic(err) + } + + err = s.cold.Put(blk) + if err != nil { + // TODO do something better here + panic(err) + } + + err = s.sweep.Delete(cid) + if err != nil { + // TODO do something better here + panic(err) + } + } + + // delete the object from the hotstore + err = s.hot.DeleteBlock(cid) + if err != nil { + // TODO do something better here + panic(err) + } + + // remove the snoop tracking + err = s.snoop.Delete(cid) + if err != nil { + // TODO do something better here + panic(err) + } + } + + // clear all remaining marks for cold objects that may have been reachable + ch, err = s.sweep.Keys() + if err != nil { + // TODO do something better here + panic(err) + } + + for cid := range ch { + err = s.sweep.Delete(cid) + if err != nil { + // TODO do something better here + panic(err) + } + } + + // TODO persist base epoch to metadata ds + s.baseEpoch = coldEpoch +} From b192adfd2e6412a918be8e0cac3c26fc5d4bd34d Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 24 Nov 2020 19:26:28 +0200 Subject: [PATCH 003/148] trigger compaction from head changes --- chain/store/splitstore.go | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/chain/store/splitstore.go b/chain/store/splitstore.go index eef2781ce..a8f31f7b1 100644 --- a/chain/store/splitstore.go +++ b/chain/store/splitstore.go @@ -14,6 +14,8 @@ import ( bstore2 "github.com/filecoin-project/lotus/lib/blockstore" ) +const CompactionThreshold = 5 * build.Finality + type SplitStore struct { baseEpoch abi.ChainEpoch curTs *types.TipSet @@ -25,6 +27,8 @@ type SplitStore struct { snoop TrackingStore sweep TrackingStore + + compacting bool } type TrackingStore interface { @@ -162,6 +166,28 @@ func (s *SplitStore) View(cid cid.Cid, cb func([]byte) error) error { } } +// State tracking +func (s *SplitStore) Start(cs *ChainStore) { + // TODO load base epoch from metadata ds -- if none, then use current epoch + s.cs = cs + s.curTs = cs.GetHeaviestTipSet() + cs.SubscribeHeadChanges(s.HeadChange) +} + +func (s *SplitStore) HeadChange(revert, apply []*types.TipSet) error { + s.curTs = apply[len(apply)-1] + epoch := s.curTs.Height() + if epoch-s.baseEpoch > CompactionThreshold && !s.compacting { + s.compacting = true + go func() { + defer func() { s.compacting = false }() + s.compact() + }() + } + + return nil +} + // Compaction/GC Algorithm func (s *SplitStore) compact() { // Phase 1: mark all reachable CIDs with the current epoch From fd08786048a0e5b26a5a4bc109188bb9fc5cdf9a Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 24 Nov 2020 19:41:07 +0200 Subject: [PATCH 004/148] track base epoch in metadata ds --- chain/store/splitstore.go | 49 +++++++++++++++++++++++++++++++++++---- 1 file changed, 45 insertions(+), 4 deletions(-) diff --git a/chain/store/splitstore.go b/chain/store/splitstore.go index a8f31f7b1..0c6196ff7 100644 --- a/chain/store/splitstore.go +++ b/chain/store/splitstore.go @@ -2,10 +2,12 @@ package store import ( "context" + "encoding/binary" "errors" blocks "github.com/ipfs/go-block-format" cid "github.com/ipfs/go-cid" + dstore "github.com/ipfs/go-datastore" bstore "github.com/ipfs/go-ipfs-blockstore" "github.com/filecoin-project/go-state-types/abi" @@ -16,11 +18,14 @@ import ( const CompactionThreshold = 5 * build.Finality +var baseEpochKey = dstore.NewKey("baseEpoch") + type SplitStore struct { baseEpoch abi.ChainEpoch curTs *types.TipSet cs *ChainStore + ds dstore.Datastore hot bstore2.Blockstore cold bstore2.Blockstore @@ -167,11 +172,35 @@ func (s *SplitStore) View(cid cid.Cid, cb func([]byte) error) error { } // State tracking -func (s *SplitStore) Start(cs *ChainStore) { - // TODO load base epoch from metadata ds -- if none, then use current epoch +func (s *SplitStore) Start(cs *ChainStore) error { s.cs = cs s.curTs = cs.GetHeaviestTipSet() + + // load base epoch from metadata ds + // if none, then use current epoch because it's a fresh start + bs, err := s.ds.Get(baseEpochKey) + switch err { + case nil: + epoch, n := binary.Uvarint(bs) + if n < 0 { + panic("bogus base epoch") + } + s.baseEpoch = abi.ChainEpoch(epoch) + + case dstore.ErrNotFound: + err = s.setBaseEpoch(s.curTs.Height()) + if err != nil { + return err + } + + default: + return err + } + + // watch the chain cs.SubscribeHeadChanges(s.HeadChange) + + return nil } func (s *SplitStore) HeadChange(revert, apply []*types.TipSet) error { @@ -288,6 +317,18 @@ func (s *SplitStore) compact() { } } - // TODO persist base epoch to metadata ds - s.baseEpoch = coldEpoch + err = s.setBaseEpoch(coldEpoch) + if err != nil { + // TODO do something better here + panic(err) + } +} + +func (s *SplitStore) setBaseEpoch(epoch abi.ChainEpoch) error { + s.baseEpoch = epoch + // write to datastore + bs := make([]byte, 16) + n := binary.PutUvarint(bs, uint64(epoch)) + bs = bs[:n] + return s.ds.Put(baseEpochKey, bs) } From c2cc1983160ea0920fcd422a94455f26c027fdcd Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 24 Nov 2020 23:32:43 +0200 Subject: [PATCH 005/148] fix off by 1 in marking --- chain/store/splitstore.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chain/store/splitstore.go b/chain/store/splitstore.go index 0c6196ff7..a069b28fd 100644 --- a/chain/store/splitstore.go +++ b/chain/store/splitstore.go @@ -222,7 +222,7 @@ func (s *SplitStore) compact() { // Phase 1: mark all reachable CIDs with the current epoch curTs := s.curTs epoch := curTs.Height() - err := s.cs.WalkSnapshot(context.Background(), curTs, epoch-s.baseEpoch, false, false, + err := s.cs.WalkSnapshot(context.Background(), curTs, epoch-s.baseEpoch+1, false, false, func(cid cid.Cid) error { return s.sweep.Put(cid, epoch) }) From 2bed6c94cd3bf92fe01ae13d9df32642dc34bb82 Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 24 Nov 2020 23:55:57 +0200 Subject: [PATCH 006/148] use dual live set marking algorithm to keep all hotly reachable objects in the hotstore --- chain/store/splitstore.go | 104 ++++++++++++++++++++++++-------------- 1 file changed, 67 insertions(+), 37 deletions(-) diff --git a/chain/store/splitstore.go b/chain/store/splitstore.go index a069b28fd..01bb4574c 100644 --- a/chain/store/splitstore.go +++ b/chain/store/splitstore.go @@ -31,7 +31,6 @@ type SplitStore struct { cold bstore2.Blockstore snoop TrackingStore - sweep TrackingStore compacting bool } @@ -41,10 +40,15 @@ type TrackingStore interface { PutBatch([]cid.Cid, abi.ChainEpoch) error Get(cid.Cid) (abi.ChainEpoch, error) Delete(cid.Cid) error - Has(cid.Cid) (bool, error) Keys() (<-chan cid.Cid, error) } +type LiveSet interface { + Mark(cid.Cid) error + Has(cid.Cid) (bool, error) + Close() error +} + var _ bstore2.Blockstore = (*SplitStore)(nil) // Blockstore interface @@ -219,12 +223,29 @@ func (s *SplitStore) HeadChange(revert, apply []*types.TipSet) error { // Compaction/GC Algorithm func (s *SplitStore) compact() { - // Phase 1: mark all reachable CIDs with the current epoch + // create two on disk live sets, one for marking the cold finality region + // and one for marking the hot region + hotSet, err := s.newLiveSet() + if err != nil { + // TODO do something better here + panic(err) + } + defer hotSet.Close() //nolint:errcheck + + coldSet, err := s.newLiveSet() + if err != nil { + // TODO do something better here + panic(err) + } + defer coldSet.Close() //nolint:errcheck + + // Phase 1a: mark all reachable CIDs in the hot range curTs := s.curTs epoch := curTs.Height() - err := s.cs.WalkSnapshot(context.Background(), curTs, epoch-s.baseEpoch+1, false, false, + coldEpoch := s.baseEpoch + build.Finality + err = s.cs.WalkSnapshot(context.Background(), curTs, epoch-coldEpoch+1, false, false, func(cid cid.Cid) error { - return s.sweep.Put(cid, epoch) + return hotSet.Mark(cid) }) if err != nil { @@ -232,9 +253,27 @@ func (s *SplitStore) compact() { panic(err) } - // Phase 2: sweep cold objects, moving reachable ones to the coldstore and deleting the others - coldEpoch := s.baseEpoch + build.Finality + // Phase 1b: mark all reachable CIDs in the cold range + coldTs, err := s.cs.GetTipsetByHeight(context.Background(), coldEpoch-1, curTs, true) + if err != nil { + // TODO do something better here + panic(err) + } + err = s.cs.WalkSnapshot(context.Background(), coldTs, build.Finality, false, false, + func(cid cid.Cid) error { + return coldSet.Mark(cid) + }) + + if err != nil { + // TODO do something better here + panic(err) + } + + // Phase 2: sweep cold objects: + // - If a cold object is reachable in the hot range, it stays in the hotstore. + // - If a cold object is reachable in the cold range, it is moved to the coldstore. + // - If a cold object is unreachable, it is deleted. ch, err := s.snoop.Keys() if err != nil { // TODO do something better here @@ -250,24 +289,31 @@ func (s *SplitStore) compact() { // is the object stil hot? if wrEpoch >= coldEpoch { - // yes, just clear the mark and continue - err := s.sweep.Delete(cid) - if err != nil { - // TODO do something better here - panic(err) - } + // yes, stay in the hotstore continue } - // the object is cold -- check whether it is reachable - mark, err := s.sweep.Has(cid) + // the object is cold -- check whether it is reachable in the hot range + mark, err := hotSet.Has(cid) if err != nil { // TODO do something better here panic(err) } if mark { - // the object is reachable, move it to the cold store and delete the mark + // the object is reachable in the hot range, stay in the hotstore + continue + } + + // check whether it is reachable in the cold range + mark, err = coldSet.Has(cid) + if err != nil { + // TODO do something better here + panic(err) + } + + if mark { + // the object is reachable in the cold range, move it to the cold store blk, err := s.hot.Get(cid) if err != nil { // TODO do something better here @@ -279,12 +325,6 @@ func (s *SplitStore) compact() { // TODO do something better here panic(err) } - - err = s.sweep.Delete(cid) - if err != nil { - // TODO do something better here - panic(err) - } } // delete the object from the hotstore @@ -302,21 +342,6 @@ func (s *SplitStore) compact() { } } - // clear all remaining marks for cold objects that may have been reachable - ch, err = s.sweep.Keys() - if err != nil { - // TODO do something better here - panic(err) - } - - for cid := range ch { - err = s.sweep.Delete(cid) - if err != nil { - // TODO do something better here - panic(err) - } - } - err = s.setBaseEpoch(coldEpoch) if err != nil { // TODO do something better here @@ -332,3 +357,8 @@ func (s *SplitStore) setBaseEpoch(epoch abi.ChainEpoch) error { bs = bs[:n] return s.ds.Put(baseEpochKey, bs) } + +func (s *SplitStore) newLiveSet() (LiveSet, error) { + // TODO implementation + return nil, errors.New("newLiveSet: IMPLEMENT ME!!!") +} From b945747eb26e1976b85946e93918c84f0f9cfe3b Mon Sep 17 00:00:00 2001 From: vyzo Date: Wed, 25 Nov 2020 00:01:10 +0200 Subject: [PATCH 007/148] satisfy linter --- chain/store/splitstore.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/chain/store/splitstore.go b/chain/store/splitstore.go index 01bb4574c..1a69abf50 100644 --- a/chain/store/splitstore.go +++ b/chain/store/splitstore.go @@ -54,7 +54,7 @@ var _ bstore2.Blockstore = (*SplitStore)(nil) // Blockstore interface func (s *SplitStore) DeleteBlock(cid cid.Cid) error { // afaict we don't seem to be using this method, so it's not implemented - return errors.New("DeleteBlock not implemented on SplitStore; don't do this Luke!") + return errors.New("DeleteBlock not implemented on SplitStore; don't do this Luke!") //nolint } func (s *SplitStore) Has(cid cid.Cid) (bool, error) { @@ -132,6 +132,7 @@ func (s *SplitStore) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) { chHot, err := s.hot.AllKeysChan(ctx) if err != nil { + cancel() return nil, err } @@ -360,5 +361,5 @@ func (s *SplitStore) setBaseEpoch(epoch abi.ChainEpoch) error { func (s *SplitStore) newLiveSet() (LiveSet, error) { // TODO implementation - return nil, errors.New("newLiveSet: IMPLEMENT ME!!!") + return nil, errors.New("newLiveSet: IMPLEMENT ME!!!") //nolint } From 101e5c6540ce8265b7730b518ee82a9f99da3b09 Mon Sep 17 00:00:00 2001 From: vyzo Date: Wed, 25 Nov 2020 09:07:06 +0200 Subject: [PATCH 008/148] close keys channel when dome emitting keys --- chain/store/splitstore.go | 1 + 1 file changed, 1 insertion(+) diff --git a/chain/store/splitstore.go b/chain/store/splitstore.go index 1a69abf50..42e2f132e 100644 --- a/chain/store/splitstore.go +++ b/chain/store/splitstore.go @@ -145,6 +145,7 @@ func (s *SplitStore) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) { ch := make(chan cid.Cid) go func() { defer cancel() + defer close(ch) for _, in := range []<-chan cid.Cid{chHot, chCold} { for cid := range in { From 3083d80f5ee7574a1463443eb672e54c063e0731 Mon Sep 17 00:00:00 2001 From: vyzo Date: Wed, 25 Nov 2020 09:07:48 +0200 Subject: [PATCH 009/148] no need to import go-ipfs-blockstore, lib/blockstore will do --- chain/store/splitstore.go | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/chain/store/splitstore.go b/chain/store/splitstore.go index 42e2f132e..f17a89dcf 100644 --- a/chain/store/splitstore.go +++ b/chain/store/splitstore.go @@ -8,12 +8,11 @@ import ( blocks "github.com/ipfs/go-block-format" cid "github.com/ipfs/go-cid" dstore "github.com/ipfs/go-datastore" - bstore "github.com/ipfs/go-ipfs-blockstore" "github.com/filecoin-project/go-state-types/abi" "github.com/filecoin-project/lotus/build" "github.com/filecoin-project/lotus/chain/types" - bstore2 "github.com/filecoin-project/lotus/lib/blockstore" + bstore "github.com/filecoin-project/lotus/lib/blockstore" ) const CompactionThreshold = 5 * build.Finality @@ -27,8 +26,8 @@ type SplitStore struct { cs *ChainStore ds dstore.Datastore - hot bstore2.Blockstore - cold bstore2.Blockstore + hot bstore.Blockstore + cold bstore.Blockstore snoop TrackingStore @@ -49,7 +48,7 @@ type LiveSet interface { Close() error } -var _ bstore2.Blockstore = (*SplitStore)(nil) +var _ bstore.Blockstore = (*SplitStore)(nil) // Blockstore interface func (s *SplitStore) DeleteBlock(cid cid.Cid) error { From c1b1a9ce2a162f793283190e3a46b54de263456a Mon Sep 17 00:00:00 2001 From: vyzo Date: Wed, 25 Nov 2020 09:10:58 +0200 Subject: [PATCH 010/148] avoid race with compacting state variable --- chain/store/splitstore.go | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/chain/store/splitstore.go b/chain/store/splitstore.go index f17a89dcf..d766bc322 100644 --- a/chain/store/splitstore.go +++ b/chain/store/splitstore.go @@ -4,6 +4,7 @@ import ( "context" "encoding/binary" "errors" + "sync" blocks "github.com/ipfs/go-block-format" cid "github.com/ipfs/go-cid" @@ -31,6 +32,7 @@ type SplitStore struct { snoop TrackingStore + stateMx sync.Mutex compacting bool } @@ -211,10 +213,10 @@ func (s *SplitStore) Start(cs *ChainStore) error { func (s *SplitStore) HeadChange(revert, apply []*types.TipSet) error { s.curTs = apply[len(apply)-1] epoch := s.curTs.Height() - if epoch-s.baseEpoch > CompactionThreshold && !s.compacting { - s.compacting = true + if epoch-s.baseEpoch > CompactionThreshold && !s.isCompacting() { + s.setCompacting(true) go func() { - defer func() { s.compacting = false }() + defer s.setCompacting(false) s.compact() }() } @@ -222,6 +224,18 @@ func (s *SplitStore) HeadChange(revert, apply []*types.TipSet) error { return nil } +func (s *SplitStore) isCompacting() bool { + s.stateMx.Lock() + defer s.stateMx.Unlock() + return s.compacting +} + +func (s *SplitStore) setCompacting(state bool) { + s.stateMx.Lock() + defer s.stateMx.Unlock() + s.compacting = state +} + // Compaction/GC Algorithm func (s *SplitStore) compact() { // create two on disk live sets, one for marking the cold finality region From 2c9b58aaec94d244cf8925238259aff033fabdf1 Mon Sep 17 00:00:00 2001 From: vyzo Date: Wed, 25 Nov 2020 10:11:42 +0200 Subject: [PATCH 011/148] add some logging --- chain/store/splitstore.go | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/chain/store/splitstore.go b/chain/store/splitstore.go index d766bc322..3fb86bb5b 100644 --- a/chain/store/splitstore.go +++ b/chain/store/splitstore.go @@ -5,6 +5,7 @@ import ( "encoding/binary" "errors" "sync" + "time" blocks "github.com/ipfs/go-block-format" cid "github.com/ipfs/go-cid" @@ -217,7 +218,13 @@ func (s *SplitStore) HeadChange(revert, apply []*types.TipSet) error { s.setCompacting(true) go func() { defer s.setCompacting(false) + + log.Info("compacting splitstore") + start := time.Now() + s.compact() + + log.Infow("compaction done", "took", time.Since(start)) }() } @@ -254,6 +261,10 @@ func (s *SplitStore) compact() { } defer coldSet.Close() //nolint:errcheck + // Phase 1: marking + log.Info("marking live objects") + startMark := time.Now() + // Phase 1a: mark all reachable CIDs in the hot range curTs := s.curTs epoch := curTs.Height() @@ -285,6 +296,8 @@ func (s *SplitStore) compact() { panic(err) } + log.Infow("marking done", "took", time.Since(startMark)) + // Phase 2: sweep cold objects: // - If a cold object is reachable in the hot range, it stays in the hotstore. // - If a cold object is reachable in the cold range, it is moved to the coldstore. @@ -295,6 +308,12 @@ func (s *SplitStore) compact() { panic(err) } + startSweep := time.Now() + log.Info("sweeping cold objects") + + // some stats for logging + var stHot, stCold, stDead int + for cid := range ch { wrEpoch, err := s.snoop.Get(cid) if err != nil { @@ -305,6 +324,7 @@ func (s *SplitStore) compact() { // is the object stil hot? if wrEpoch >= coldEpoch { // yes, stay in the hotstore + stHot++ continue } @@ -317,6 +337,7 @@ func (s *SplitStore) compact() { if mark { // the object is reachable in the hot range, stay in the hotstore + stHot++ continue } @@ -340,6 +361,10 @@ func (s *SplitStore) compact() { // TODO do something better here panic(err) } + + stCold++ + } else { + stDead++ } // delete the object from the hotstore @@ -357,6 +382,9 @@ func (s *SplitStore) compact() { } } + log.Infow("sweeping done", "took", time.Since(startSweep)) + log.Infow("compaction stats", "hot", stHot, "cold", stCold, "dead", stDead) + err = s.setBaseEpoch(coldEpoch) if err != nil { // TODO do something better here From 17bc5fcd85f4f8c6940c6233adf333362427d0b9 Mon Sep 17 00:00:00 2001 From: vyzo Date: Thu, 26 Nov 2020 16:51:16 +0200 Subject: [PATCH 012/148] move splitstore implementation to its own directory --- chain/store/{ => splitstore}/splitstore.go | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) rename chain/store/{ => splitstore}/splitstore.go (97%) diff --git a/chain/store/splitstore.go b/chain/store/splitstore/splitstore.go similarity index 97% rename from chain/store/splitstore.go rename to chain/store/splitstore/splitstore.go index 3fb86bb5b..a5366ecd9 100644 --- a/chain/store/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -1,4 +1,4 @@ -package store +package splitstore import ( "context" @@ -10,9 +10,11 @@ import ( blocks "github.com/ipfs/go-block-format" cid "github.com/ipfs/go-cid" dstore "github.com/ipfs/go-datastore" + logging "github.com/ipfs/go-log/v2" "github.com/filecoin-project/go-state-types/abi" "github.com/filecoin-project/lotus/build" + "github.com/filecoin-project/lotus/chain/store" "github.com/filecoin-project/lotus/chain/types" bstore "github.com/filecoin-project/lotus/lib/blockstore" ) @@ -21,11 +23,13 @@ const CompactionThreshold = 5 * build.Finality var baseEpochKey = dstore.NewKey("baseEpoch") +var log = logging.Logger("splitstore") + type SplitStore struct { baseEpoch abi.ChainEpoch curTs *types.TipSet - cs *ChainStore + cs *store.ChainStore ds dstore.Datastore hot bstore.Blockstore @@ -180,7 +184,7 @@ func (s *SplitStore) View(cid cid.Cid, cb func([]byte) error) error { } // State tracking -func (s *SplitStore) Start(cs *ChainStore) error { +func (s *SplitStore) Start(cs *store.ChainStore) error { s.cs = cs s.curTs = cs.GetHeaviestTipSet() From 0bf1a78b392fdcbf9c159d2cd8a3b7484c2de070 Mon Sep 17 00:00:00 2001 From: vyzo Date: Thu, 26 Nov 2020 16:53:16 +0200 Subject: [PATCH 013/148] stubs for tracking store and live set --- chain/store/splitstore/liveset.go | 11 +++++++++++ chain/store/splitstore/snoop.go | 15 +++++++++++++++ chain/store/splitstore/splitstore.go | 14 -------------- 3 files changed, 26 insertions(+), 14 deletions(-) create mode 100644 chain/store/splitstore/liveset.go create mode 100644 chain/store/splitstore/snoop.go diff --git a/chain/store/splitstore/liveset.go b/chain/store/splitstore/liveset.go new file mode 100644 index 000000000..51852c3d5 --- /dev/null +++ b/chain/store/splitstore/liveset.go @@ -0,0 +1,11 @@ +package splitstore + +import ( + cid "github.com/ipfs/go-cid" +) + +type LiveSet interface { + Mark(cid.Cid) error + Has(cid.Cid) (bool, error) + Close() error +} diff --git a/chain/store/splitstore/snoop.go b/chain/store/splitstore/snoop.go new file mode 100644 index 000000000..b31ea16e9 --- /dev/null +++ b/chain/store/splitstore/snoop.go @@ -0,0 +1,15 @@ +package splitstore + +import ( + cid "github.com/ipfs/go-cid" + + "github.com/filecoin-project/go-state-types/abi" +) + +type TrackingStore interface { + Put(cid.Cid, abi.ChainEpoch) error + PutBatch([]cid.Cid, abi.ChainEpoch) error + Get(cid.Cid) (abi.ChainEpoch, error) + Delete(cid.Cid) error + Keys() (<-chan cid.Cid, error) +} diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index a5366ecd9..7d82ca709 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -41,20 +41,6 @@ type SplitStore struct { compacting bool } -type TrackingStore interface { - Put(cid.Cid, abi.ChainEpoch) error - PutBatch([]cid.Cid, abi.ChainEpoch) error - Get(cid.Cid) (abi.ChainEpoch, error) - Delete(cid.Cid) error - Keys() (<-chan cid.Cid, error) -} - -type LiveSet interface { - Mark(cid.Cid) error - Has(cid.Cid) (bool, error) - Close() error -} - var _ bstore.Blockstore = (*SplitStore)(nil) // Blockstore interface From df856b73151ad851298319aef7b33962e7564a40 Mon Sep 17 00:00:00 2001 From: vyzo Date: Thu, 26 Nov 2020 17:43:39 +0200 Subject: [PATCH 014/148] gomod: get lmdb-go --- go.mod | 1 + go.sum | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/go.mod b/go.mod index 54d6b42ed..220779501 100644 --- a/go.mod +++ b/go.mod @@ -11,6 +11,7 @@ require ( github.com/Jeffail/gabs v1.4.0 github.com/StackExchange/wmi v0.0.0-20190523213315-cbe66965904d // indirect github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d + github.com/bmatsuo/lmdb-go v1.8.0 github.com/buger/goterm v0.0.0-20200322175922-2f3e71b85129 github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e github.com/cockroachdb/pebble v0.0.0-20201001221639-879f3bfeef07 diff --git a/go.sum b/go.sum index c1e5494ee..2d06052f5 100644 --- a/go.sum +++ b/go.sum @@ -90,6 +90,8 @@ github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+Ce github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= +github.com/bmatsuo/lmdb-go v1.8.0 h1:ohf3Q4xjXZBKh4AayUY4bb2CXuhRAI8BYGlJq08EfNA= +github.com/bmatsuo/lmdb-go v1.8.0/go.mod h1:wWPZmKdOAZsl4qOqkowQ1aCrFie1HU8gWloHMCeAUdM= github.com/bradfitz/go-smtpd v0.0.0-20170404230938-deb6d6237625/go.mod h1:HYsPBTaaSFSlLx/70C2HPIMNZpVV8+vt/A+FMnYP11g= github.com/briandowns/spinner v1.11.1/go.mod h1:QOuQk7x+EaDASo80FEXwlwiA+j/PPIcX3FScO+3/ZPQ= github.com/btcsuite/btcd v0.0.0-20190213025234-306aecffea32/go.mod h1:DrZx5ec/dmnfpw9KyYoQyYo7d0KEvTkk/5M/vbZjAr8= @@ -1495,6 +1497,8 @@ github.com/whyrusleeping/yamux v1.1.5/go.mod h1:E8LnQQ8HKx5KD29HZFUwM1PxCOdPRzGw github.com/x-cray/logrus-prefixed-formatter v0.5.2/go.mod h1:2duySbKsL6M18s5GU7VPsoEPHyzalCE06qoARUCeBBE= github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= github.com/xlab/c-for-go v0.0.0-20201112171043-ea6dce5809cb h1:/7/dQyiKnxAOj9L69FhST7uMe17U015XPzX7cy+5ykM= +github.com/xlab/c-for-go v0.0.0-20200718154222-87b0065af829 h1:wb7xrDzfkLgPHsSEBm+VSx6aDdi64VtV0xvP0E6j8bk= +github.com/xlab/c-for-go v0.0.0-20200718154222-87b0065af829/go.mod h1:h/1PEBwj7Ym/8kOuMWvO2ujZ6Lt+TMbySEXNhjjR87I= github.com/xlab/c-for-go v0.0.0-20201112171043-ea6dce5809cb/go.mod h1:pbNsDSxn1ICiNn9Ct4ZGNrwzfkkwYbx/lw8VuyutFIg= github.com/xlab/pkgconfig v0.0.0-20170226114623-cea12a0fd245 h1:Sw125DKxZhPUI4JLlWugkzsrlB50jR9v2khiD9FxuSo= github.com/xlab/pkgconfig v0.0.0-20170226114623-cea12a0fd245/go.mod h1:C+diUUz7pxhNY6KAoLgrTYARGWnt82zWTylZlxT92vk= @@ -1936,6 +1940,8 @@ modernc.org/cc v1.0.0/go.mod h1:1Sk4//wdnYJiUIxnW8ddKpaOJCF37yAdqYnkxUpaYxw= modernc.org/fileutil v1.0.0/go.mod h1:JHsWpkrk/CnVV1H/eGlFf85BEpfkrp56ro8nojIq9Q8= modernc.org/golex v1.0.0/go.mod h1:b/QX9oBD/LhixY6NDh+IdGv17hgB+51fET1i2kPSmvk= modernc.org/golex v1.0.1 h1:EYKY1a3wStt0RzHaH8mdSRNg78Ub0OHxYfCRWw35YtM= +modernc.org/golex v1.0.0 h1:wWpDlbK8ejRfSyi0frMyhilD3JBvtcx2AdGDnU+JtsE= +modernc.org/golex v1.0.0/go.mod h1:b/QX9oBD/LhixY6NDh+IdGv17hgB+51fET1i2kPSmvk= modernc.org/golex v1.0.1/go.mod h1:QCA53QtsT1NdGkaZZkF5ezFwk4IXh4BGNafAARTC254= modernc.org/lex v1.0.0/go.mod h1:G6rxMTy3cH2iA0iXL/HRRv4Znu8MK4higxph/lE7ypk= modernc.org/lexer v1.0.0/go.mod h1:F/Dld0YKYdZCLQ7bD0USbWL4YKCyTDRDHiDTOs0q0vk= From 3f92a000c721f4ec72e3a2c3569cd53488f58c9e Mon Sep 17 00:00:00 2001 From: vyzo Date: Thu, 26 Nov 2020 17:49:47 +0200 Subject: [PATCH 015/148] implement lmdb-backed LiveSet --- chain/store/splitstore/liveset.go | 54 ++++++++++++++++++++++++++++ chain/store/splitstore/splitstore.go | 24 +++++++++---- 2 files changed, 71 insertions(+), 7 deletions(-) diff --git a/chain/store/splitstore/liveset.go b/chain/store/splitstore/liveset.go index 51852c3d5..f55053fe5 100644 --- a/chain/store/splitstore/liveset.go +++ b/chain/store/splitstore/liveset.go @@ -1,6 +1,8 @@ package splitstore import ( + "github.com/bmatsuo/lmdb-go/lmdb" + cid "github.com/ipfs/go-cid" ) @@ -9,3 +11,55 @@ type LiveSet interface { Has(cid.Cid) (bool, error) Close() error } + +type liveSet struct { + env *lmdb.Env + db lmdb.DBI +} + +var markBytes = []byte{1} + +func NewLiveSet(env *lmdb.Env, name string) (LiveSet, error) { + var db lmdb.DBI + err := env.Update(func(txn *lmdb.Txn) (err error) { + db, err = txn.CreateDBI(name) + return + }) + + if err != nil { + return nil, err + } + + return &liveSet{env: env, db: db}, nil +} + +func (s *liveSet) Mark(cid cid.Cid) error { + return s.env.Update(func(txn *lmdb.Txn) error { + return txn.Put(s.db, cid.Hash(), markBytes, 0) + }) +} + +func (s *liveSet) Has(cid cid.Cid) (has bool, err error) { + err = s.env.View(func(txn *lmdb.Txn) error { + _, err := txn.Get(s.db, cid.Hash()) + if err != nil { + if lmdb.IsNotFound(err) { + has = false + return nil + } + + return err + } + + has = true + return nil + }) + + return +} + +func (s *liveSet) Close() error { + return s.env.Update(func(txn *lmdb.Txn) error { + return txn.Drop(s.db, true) + }) +} diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index 7d82ca709..aeb38b7fe 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -7,6 +7,8 @@ import ( "sync" "time" + "github.com/bmatsuo/lmdb-go/lmdb" + blocks "github.com/ipfs/go-block-format" cid "github.com/ipfs/go-cid" dstore "github.com/ipfs/go-datastore" @@ -39,6 +41,8 @@ type SplitStore struct { stateMx sync.Mutex compacting bool + + env *lmdb.Env } var _ bstore.Blockstore = (*SplitStore)(nil) @@ -201,6 +205,17 @@ func (s *SplitStore) Start(cs *store.ChainStore) error { return nil } +func (s *SplitStore) Close() error { + if s.isCompacting() { + log.Warn("ongoing compaction; waiting for it to finish...") + for s.isCompacting() { + time.Sleep(time.Second) + } + } + + return s.env.Close() +} + func (s *SplitStore) HeadChange(revert, apply []*types.TipSet) error { s.curTs = apply[len(apply)-1] epoch := s.curTs.Height() @@ -237,14 +252,14 @@ func (s *SplitStore) setCompacting(state bool) { func (s *SplitStore) compact() { // create two on disk live sets, one for marking the cold finality region // and one for marking the hot region - hotSet, err := s.newLiveSet() + hotSet, err := NewLiveSet(s.env, "hot") if err != nil { // TODO do something better here panic(err) } defer hotSet.Close() //nolint:errcheck - coldSet, err := s.newLiveSet() + coldSet, err := NewLiveSet(s.env, "cold") if err != nil { // TODO do something better here panic(err) @@ -390,8 +405,3 @@ func (s *SplitStore) setBaseEpoch(epoch abi.ChainEpoch) error { bs = bs[:n] return s.ds.Put(baseEpochKey, bs) } - -func (s *SplitStore) newLiveSet() (LiveSet, error) { - // TODO implementation - return nil, errors.New("newLiveSet: IMPLEMENT ME!!!") //nolint -} From 5043f31adf16c0433a26d917323e4883ab20608f Mon Sep 17 00:00:00 2001 From: vyzo Date: Thu, 26 Nov 2020 18:58:03 +0200 Subject: [PATCH 016/148] liveset unit test --- chain/store/splitstore/liveset.go | 2 +- chain/store/splitstore/liveset_test.go | 145 +++++++++++++++++++++++++ 2 files changed, 146 insertions(+), 1 deletion(-) create mode 100644 chain/store/splitstore/liveset_test.go diff --git a/chain/store/splitstore/liveset.go b/chain/store/splitstore/liveset.go index f55053fe5..f8d57ea14 100644 --- a/chain/store/splitstore/liveset.go +++ b/chain/store/splitstore/liveset.go @@ -17,7 +17,7 @@ type liveSet struct { db lmdb.DBI } -var markBytes = []byte{1} +var markBytes = []byte{} func NewLiveSet(env *lmdb.Env, name string) (LiveSet, error) { var db lmdb.DBI diff --git a/chain/store/splitstore/liveset_test.go b/chain/store/splitstore/liveset_test.go new file mode 100644 index 000000000..7c3a8ca4f --- /dev/null +++ b/chain/store/splitstore/liveset_test.go @@ -0,0 +1,145 @@ +package splitstore + +import ( + "os" + "testing" + + "github.com/bmatsuo/lmdb-go/lmdb" + + cid "github.com/ipfs/go-cid" + "github.com/multiformats/go-multihash" +) + +func TestLiveSet(t *testing.T) { + env, err := lmdb.NewEnv() + if err != nil { + t.Fatal(err) + } + defer env.Close() //nolint:errcheck + + if err = env.SetMapSize(1 << 30); err != nil { + t.Fatal(err) + } + if err = env.SetMaxDBs(2); err != nil { + t.Fatal(err) + } + if err = env.SetMaxReaders(1); err != nil { + t.Fatal(err) + } + + err = os.MkdirAll("/tmp/liveset-test", 0777) + if err != nil { + t.Fatal(err) + } + + err = env.Open("/tmp/liveset-test", lmdb.NoSync|lmdb.WriteMap|lmdb.MapAsync|lmdb.NoReadahead, 0777) + if err != nil { + t.Fatal(err) + } + + hotSet, err := NewLiveSet(env, "hot") + if err != nil { + t.Fatal(err) + } + + coldSet, err := NewLiveSet(env, "cold") + if err != nil { + t.Fatal(err) + } + + makeCid := func(key string) cid.Cid { + h, err := multihash.Sum([]byte(key), multihash.SHA2_256, -1) + if err != nil { + t.Fatal(err) + } + + return cid.NewCidV1(cid.Raw, h) + } + + mustHave := func(s LiveSet, cid cid.Cid) { + has, err := s.Has(cid) + if err != nil { + t.Fatal(err) + } + + if !has { + t.Fatal("mark not found") + } + } + + mustNotHave := func(s LiveSet, cid cid.Cid) { + has, err := s.Has(cid) + if err != nil { + t.Fatal(err) + } + + if has { + t.Fatal("unexpected mark") + } + } + + k1 := makeCid("a") + k2 := makeCid("b") + k3 := makeCid("c") + k4 := makeCid("d") + + hotSet.Mark(k1) + hotSet.Mark(k2) + coldSet.Mark(k3) + + mustHave(hotSet, k1) + mustHave(hotSet, k2) + mustNotHave(hotSet, k3) + mustNotHave(hotSet, k4) + + mustNotHave(coldSet, k1) + mustNotHave(coldSet, k2) + mustHave(coldSet, k3) + mustNotHave(coldSet, k4) + + // close them and reopen to redo the dance + + err = hotSet.Close() + if err != nil { + t.Fatal(err) + } + + err = coldSet.Close() + if err != nil { + t.Fatal(err) + } + + hotSet, err = NewLiveSet(env, "hot") + if err != nil { + t.Fatal(err) + } + + coldSet, err = NewLiveSet(env, "cold") + if err != nil { + t.Fatal(err) + } + + hotSet.Mark(k3) + hotSet.Mark(k4) + coldSet.Mark(k1) + + mustNotHave(hotSet, k1) + mustNotHave(hotSet, k2) + mustHave(hotSet, k3) + mustHave(hotSet, k4) + + mustHave(coldSet, k1) + mustNotHave(coldSet, k2) + mustNotHave(coldSet, k3) + mustNotHave(coldSet, k4) + + err = hotSet.Close() + if err != nil { + t.Fatal(err) + } + + err = coldSet.Close() + if err != nil { + t.Fatal(err) + } +} From 83f8a0ab128ba3040d2f279c12401ccf1f54da88 Mon Sep 17 00:00:00 2001 From: vyzo Date: Thu, 26 Nov 2020 19:47:54 +0200 Subject: [PATCH 017/148] quiet linter --- chain/store/splitstore/liveset_test.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/chain/store/splitstore/liveset_test.go b/chain/store/splitstore/liveset_test.go index 7c3a8ca4f..77c8cb5e6 100644 --- a/chain/store/splitstore/liveset_test.go +++ b/chain/store/splitstore/liveset_test.go @@ -83,9 +83,9 @@ func TestLiveSet(t *testing.T) { k3 := makeCid("c") k4 := makeCid("d") - hotSet.Mark(k1) - hotSet.Mark(k2) - coldSet.Mark(k3) + hotSet.Mark(k1) //nolint + hotSet.Mark(k2) //nolint + coldSet.Mark(k3) //nolint mustHave(hotSet, k1) mustHave(hotSet, k2) @@ -119,9 +119,9 @@ func TestLiveSet(t *testing.T) { t.Fatal(err) } - hotSet.Mark(k3) - hotSet.Mark(k4) - coldSet.Mark(k1) + hotSet.Mark(k3) //nolint + hotSet.Mark(k4) //nolint + coldSet.Mark(k1) //nolint mustNotHave(hotSet, k1) mustNotHave(hotSet, k2) From 0d7476c5b28c7ef9dbc2265f4d446c3efe180b90 Mon Sep 17 00:00:00 2001 From: vyzo Date: Thu, 26 Nov 2020 20:37:02 +0200 Subject: [PATCH 018/148] implement LMDB-backed tracking store --- chain/store/splitstore/liveset.go | 2 + chain/store/splitstore/snoop.go | 148 ++++++++++++++++++++++++++- chain/store/splitstore/splitstore.go | 27 +++-- 3 files changed, 166 insertions(+), 11 deletions(-) diff --git a/chain/store/splitstore/liveset.go b/chain/store/splitstore/liveset.go index f8d57ea14..d7571aa62 100644 --- a/chain/store/splitstore/liveset.go +++ b/chain/store/splitstore/liveset.go @@ -41,6 +41,8 @@ func (s *liveSet) Mark(cid cid.Cid) error { func (s *liveSet) Has(cid cid.Cid) (has bool, err error) { err = s.env.View(func(txn *lmdb.Txn) error { + txn.RawRead = true + _, err := txn.Get(s.db, cid.Hash()) if err != nil { if lmdb.IsNotFound(err) { diff --git a/chain/store/splitstore/snoop.go b/chain/store/splitstore/snoop.go index b31ea16e9..3a452a933 100644 --- a/chain/store/splitstore/snoop.go +++ b/chain/store/splitstore/snoop.go @@ -1,15 +1,161 @@ package splitstore import ( + "context" + "fmt" + "os" + + "github.com/bmatsuo/lmdb-go/lmdb" + cid "github.com/ipfs/go-cid" "github.com/filecoin-project/go-state-types/abi" ) +var TrackingStoreMapSize int64 = 1 << 34 // 16G + type TrackingStore interface { Put(cid.Cid, abi.ChainEpoch) error PutBatch([]cid.Cid, abi.ChainEpoch) error Get(cid.Cid) (abi.ChainEpoch, error) Delete(cid.Cid) error - Keys() (<-chan cid.Cid, error) + Keys(context.Context) (<-chan cid.Cid, error) + Close() error +} + +type trackingStore struct { + env *lmdb.Env + db lmdb.DBI +} + +func NewTrackingStore(path string) (TrackingStore, error) { + env, err := lmdb.NewEnv() + if err != nil { + return nil, fmt.Errorf("failed to initialize LMDB env: %w", err) + } + if err = env.SetMapSize(TrackingStoreMapSize); err != nil { + return nil, fmt.Errorf("failed to set LMDB map size: %w", err) + } + if err = env.SetMaxDBs(1); err != nil { + return nil, fmt.Errorf("failed to set LMDB max dbs: %w", err) + } + if err = env.SetMaxReaders(1); err != nil { + return nil, fmt.Errorf("failed to set LMDB max readers: %w", err) + } + + if st, err := os.Stat(path); os.IsNotExist(err) { + if err := os.MkdirAll(path, 0777); err != nil { + return nil, fmt.Errorf("failed to create LMDB data directory at %s: %w", path, err) + } + } else if err != nil { + return nil, fmt.Errorf("failed to stat LMDB data dir: %w", err) + } else if !st.IsDir() { + return nil, fmt.Errorf("LMDB path is not a directory %s", path) + } + + err = env.Open(path, lmdb.NoSync|lmdb.WriteMap|lmdb.MapAsync|lmdb.NoReadahead, 0777) + if err != nil { + env.Close() //nolint:errcheck + return nil, fmt.Errorf("error opening LMDB database: %w", err) + } + + s := new(trackingStore) + s.env = env + err = env.Update(func(txn *lmdb.Txn) (err error) { + s.db, err = txn.CreateDBI("snoop") + return err + }) + + if err != nil { + return nil, err + } + + return s, nil +} + +func (s *trackingStore) Put(cid cid.Cid, epoch abi.ChainEpoch) error { + val := epochToBytes(epoch) + return s.env.Update(func(txn *lmdb.Txn) error { + return txn.Put(s.db, cid.Hash(), val, 0) + }) +} + +func (s *trackingStore) PutBatch(cids []cid.Cid, epoch abi.ChainEpoch) error { + val := epochToBytes(epoch) + return s.env.Update(func(txn *lmdb.Txn) error { + for _, cid := range cids { + err := txn.Put(s.db, cid.Hash(), val, 0) + if err != nil { + return err + } + } + + return nil + }) +} + +func (s *trackingStore) Get(cid cid.Cid) (epoch abi.ChainEpoch, err error) { + err = s.env.View(func(txn *lmdb.Txn) error { + txn.RawRead = true + + val, err := txn.Get(s.db, cid.Hash()) + if err != nil { + return err + } + + epoch = bytesToEpoch(val) + return nil + }) + + return +} + +func (s *trackingStore) Delete(cid cid.Cid) error { + return s.env.Update(func(txn *lmdb.Txn) error { + return txn.Del(s.db, cid.Hash(), nil) + }) +} + +func (s *trackingStore) Keys(ctx context.Context) (<-chan cid.Cid, error) { + ch := make(chan cid.Cid) + go func() { + err := s.env.View(func(txn *lmdb.Txn) error { + defer close(ch) + + txn.RawRead = true + cur, err := txn.OpenCursor(s.db) + if err != nil { + return err + } + defer cur.Close() + + for { + k, _, err := cur.Get(nil, nil, lmdb.Next) + if err != nil { + if lmdb.IsNotFound(err) { + return nil + } + + return err + } + + select { + case ch <- cid.NewCidV1(cid.Raw, k): + case <-ctx.Done(): + return nil + } + } + }) + + if err != nil { + log.Errorf("error iterating over tracking store keys: %s", err) + } + }() + + return ch, nil +} + +func (s *trackingStore) Close() error { + s.env.CloseDBI(s.db) + return s.env.Close() } diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index aeb38b7fe..4969f8265 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -183,11 +183,7 @@ func (s *SplitStore) Start(cs *store.ChainStore) error { bs, err := s.ds.Get(baseEpochKey) switch err { case nil: - epoch, n := binary.Uvarint(bs) - if n < 0 { - panic("bogus base epoch") - } - s.baseEpoch = abi.ChainEpoch(epoch) + s.baseEpoch = bytesToEpoch(bs) case dstore.ErrNotFound: err = s.setBaseEpoch(s.curTs.Height()) @@ -307,7 +303,7 @@ func (s *SplitStore) compact() { // - If a cold object is reachable in the hot range, it stays in the hotstore. // - If a cold object is reachable in the cold range, it is moved to the coldstore. // - If a cold object is unreachable, it is deleted. - ch, err := s.snoop.Keys() + ch, err := s.snoop.Keys(context.Background()) if err != nil { // TODO do something better here panic(err) @@ -400,8 +396,19 @@ func (s *SplitStore) compact() { func (s *SplitStore) setBaseEpoch(epoch abi.ChainEpoch) error { s.baseEpoch = epoch // write to datastore - bs := make([]byte, 16) - n := binary.PutUvarint(bs, uint64(epoch)) - bs = bs[:n] - return s.ds.Put(baseEpochKey, bs) + return s.ds.Put(baseEpochKey, epochToBytes(epoch)) +} + +func epochToBytes(epoch abi.ChainEpoch) []byte { + buf := make([]byte, 16) + n := binary.PutUvarint(buf, uint64(epoch)) + return buf[:n] +} + +func bytesToEpoch(buf []byte) abi.ChainEpoch { + epoch, n := binary.Uvarint(buf) + if n < 0 { + panic("bogus base epoch bytes") + } + return abi.ChainEpoch(epoch) } From 47633972812b8695fd9cbd076a55d07bb575d7f7 Mon Sep 17 00:00:00 2001 From: vyzo Date: Thu, 26 Nov 2020 20:49:50 +0200 Subject: [PATCH 019/148] add tracking store test --- chain/store/splitstore/snoop_test.go | 117 +++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 chain/store/splitstore/snoop_test.go diff --git a/chain/store/splitstore/snoop_test.go b/chain/store/splitstore/snoop_test.go new file mode 100644 index 000000000..fe1b3629c --- /dev/null +++ b/chain/store/splitstore/snoop_test.go @@ -0,0 +1,117 @@ +package splitstore + +import ( + "context" + "testing" + + "github.com/bmatsuo/lmdb-go/lmdb" + + cid "github.com/ipfs/go-cid" + "github.com/multiformats/go-multihash" + + "github.com/filecoin-project/go-state-types/abi" +) + +func TestTrackingStore(t *testing.T) { + makeCid := func(key string) cid.Cid { + h, err := multihash.Sum([]byte(key), multihash.SHA2_256, -1) + if err != nil { + t.Fatal(err) + } + + return cid.NewCidV1(cid.Raw, h) + } + + mustHave := func(s TrackingStore, cid cid.Cid, epoch abi.ChainEpoch) { + val, err := s.Get(cid) + if err != nil { + t.Fatal(err) + } + + if val != epoch { + t.Fatal("epoch mismatch") + } + } + + mustNotHave := func(s TrackingStore, cid cid.Cid) { + _, err := s.Get(cid) + if !lmdb.IsNotFound(err) { + t.Fatal("expected key not found") + } + } + + s, err := NewTrackingStore("/tmp/snoop-test") + if err != nil { + t.Fatal(err) + } + + k1 := makeCid("a") + k2 := makeCid("b") + k3 := makeCid("c") + k4 := makeCid("d") + + s.Put(k1, 1) //nolint + s.Put(k2, 2) //nolint + s.Put(k3, 3) //nolint + s.Put(k4, 4) //nolint + + mustHave(s, k1, 1) + mustHave(s, k2, 2) + mustHave(s, k3, 3) + mustHave(s, k4, 4) + + s.Delete(k1) // nolint + s.Delete(k2) // nolint + + mustNotHave(s, k1) + mustNotHave(s, k2) + mustHave(s, k3, 3) + mustHave(s, k4, 4) + + s.PutBatch([]cid.Cid{k1}, 1) + s.PutBatch([]cid.Cid{k2}, 2) + + mustHave(s, k1, 1) + mustHave(s, k2, 2) + mustHave(s, k3, 3) + mustHave(s, k4, 4) + + allKeys := map[string]struct{}{ + k1.String(): struct{}{}, + k2.String(): struct{}{}, + k3.String(): struct{}{}, + k4.String(): struct{}{}, + } + + ch, _ := s.Keys(context.Background()) //nolint:errcheck + for k := range ch { + _, ok := allKeys[k.String()] + if !ok { + t.Fatal("unexpected key") + } + + delete(allKeys, k.String()) + } + + if len(allKeys) != 0 { + t.Fatal("not all keys were returned") + } + + // no close and reopen and ensure the keys still exist + err = s.Close() + if err != nil { + t.Fatal(err) + } + + s, err = NewTrackingStore("/tmp/snoop-test") + if err != nil { + t.Fatal(err) + } + + mustHave(s, k1, 1) + mustHave(s, k2, 2) + mustHave(s, k3, 3) + mustHave(s, k4, 4) + + s.Close() //nolint:errcheck +} From da478832cb37beef10ec3b115d64c02b549a092a Mon Sep 17 00:00:00 2001 From: vyzo Date: Thu, 26 Nov 2020 20:52:52 +0200 Subject: [PATCH 020/148] quiet linter --- chain/store/splitstore/snoop_test.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/chain/store/splitstore/snoop_test.go b/chain/store/splitstore/snoop_test.go index fe1b3629c..5fc62d3b1 100644 --- a/chain/store/splitstore/snoop_test.go +++ b/chain/store/splitstore/snoop_test.go @@ -68,8 +68,8 @@ func TestTrackingStore(t *testing.T) { mustHave(s, k3, 3) mustHave(s, k4, 4) - s.PutBatch([]cid.Cid{k1}, 1) - s.PutBatch([]cid.Cid{k2}, 2) + s.PutBatch([]cid.Cid{k1}, 1) //nolint + s.PutBatch([]cid.Cid{k2}, 2) //nolint mustHave(s, k1, 1) mustHave(s, k2, 2) @@ -77,10 +77,10 @@ func TestTrackingStore(t *testing.T) { mustHave(s, k4, 4) allKeys := map[string]struct{}{ - k1.String(): struct{}{}, - k2.String(): struct{}{}, - k3.String(): struct{}{}, - k4.String(): struct{}{}, + k1.String(): {}, + k2.String(): {}, + k3.String(): {}, + k4.String(): {}, } ch, _ := s.Keys(context.Background()) //nolint:errcheck From d20cbc0c28012acbea75a143bee6b0ffa3e0fe2c Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 29 Nov 2020 12:48:52 +0200 Subject: [PATCH 021/148] protect against potential data races overkill, but let's not have race detectors scream at us. --- chain/store/splitstore/splitstore.go | 30 ++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index 4969f8265..ad1203eef 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -28,8 +28,10 @@ var baseEpochKey = dstore.NewKey("baseEpoch") var log = logging.Logger("splitstore") type SplitStore struct { - baseEpoch abi.ChainEpoch - curTs *types.TipSet + mx sync.Mutex + baseEpoch abi.ChainEpoch + curTs *types.TipSet + compacting bool cs *store.ChainStore ds dstore.Datastore @@ -39,9 +41,6 @@ type SplitStore struct { snoop TrackingStore - stateMx sync.Mutex - compacting bool - env *lmdb.Env } @@ -98,7 +97,10 @@ func (s *SplitStore) GetSize(cid cid.Cid) (int, error) { } func (s *SplitStore) Put(blk blocks.Block) error { + s.mx.Lock() epoch := s.curTs.Height() + s.mx.Unlock() + err := s.snoop.Put(blk.Cid(), epoch) if err != nil { return err @@ -113,7 +115,9 @@ func (s *SplitStore) PutMany(blks []blocks.Block) error { return err } + s.mx.Lock() epoch := s.curTs.Height() + s.mx.Unlock() batch := make([]cid.Cid, 0, len(blks)) for _, blk := range blks { @@ -213,9 +217,12 @@ func (s *SplitStore) Close() error { } func (s *SplitStore) HeadChange(revert, apply []*types.TipSet) error { + s.mx.Lock() s.curTs = apply[len(apply)-1] epoch := s.curTs.Height() - if epoch-s.baseEpoch > CompactionThreshold && !s.isCompacting() { + s.mx.Unlock() + + if !s.isCompacting() && epoch-s.baseEpoch > CompactionThreshold { s.setCompacting(true) go func() { defer s.setCompacting(false) @@ -233,14 +240,14 @@ func (s *SplitStore) HeadChange(revert, apply []*types.TipSet) error { } func (s *SplitStore) isCompacting() bool { - s.stateMx.Lock() - defer s.stateMx.Unlock() + s.mx.Lock() + defer s.mx.Unlock() return s.compacting } func (s *SplitStore) setCompacting(state bool) { - s.stateMx.Lock() - defer s.stateMx.Unlock() + s.mx.Lock() + defer s.mx.Unlock() s.compacting = state } @@ -267,7 +274,10 @@ func (s *SplitStore) compact() { startMark := time.Now() // Phase 1a: mark all reachable CIDs in the hot range + s.mx.Lock() curTs := s.curTs + s.mx.Unlock() + epoch := curTs.Height() coldEpoch := s.baseEpoch + build.Finality err = s.cs.WalkSnapshot(context.Background(), curTs, epoch-coldEpoch+1, false, false, From 5db314f422b4a6c077d6bc97ff30d0e9cab7c0e3 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 29 Nov 2020 12:50:59 +0200 Subject: [PATCH 022/148] fallback to coldstore if snooping fails. --- chain/store/splitstore/splitstore.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index ad1203eef..60604351a 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -103,7 +103,8 @@ func (s *SplitStore) Put(blk blocks.Block) error { err := s.snoop.Put(blk.Cid(), epoch) if err != nil { - return err + log.Errorf("error tracking CID in hotstore: %s; falling back to coldstore", err) + return s.cold.Put(blk) } return s.hot.Put(blk) @@ -112,7 +113,8 @@ func (s *SplitStore) Put(blk blocks.Block) error { func (s *SplitStore) PutMany(blks []blocks.Block) error { err := s.hot.PutMany(blks) if err != nil { - return err + log.Errorf("error tracking CIDs in hotstore: %s; falling back to coldstore", err) + return s.cold.PutMany(blks) } s.mx.Lock() From 37e391f13361a9cddc93a4781f20549e001470ab Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 29 Nov 2020 12:58:09 +0200 Subject: [PATCH 023/148] add TODO note about map size --- chain/store/splitstore/snoop.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chain/store/splitstore/snoop.go b/chain/store/splitstore/snoop.go index 3a452a933..e7dceaaef 100644 --- a/chain/store/splitstore/snoop.go +++ b/chain/store/splitstore/snoop.go @@ -12,7 +12,7 @@ import ( "github.com/filecoin-project/go-state-types/abi" ) -var TrackingStoreMapSize int64 = 1 << 34 // 16G +var TrackingStoreMapSize int64 = 1 << 34 // 16G; TODO this may be a little too big, we should figure out how to gradually grow the map. type TrackingStore interface { Put(cid.Cid, abi.ChainEpoch) error From 0af7b16ad501bb63fb5c3c61b8e15c7b3efd95d1 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 29 Nov 2020 13:00:36 +0200 Subject: [PATCH 024/148] simplify Has --- chain/store/splitstore/splitstore.go | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index 60604351a..3adc589ad 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -55,12 +55,8 @@ func (s *SplitStore) DeleteBlock(cid cid.Cid) error { func (s *SplitStore) Has(cid cid.Cid) (bool, error) { has, err := s.hot.Has(cid) - if err != nil { - return false, err - } - - if has { - return true, nil + if err != nil || has { + return has, err } return s.cold.Has(cid) From b0f48b500f4bf9450fed11bf0ce34ecd33e1554c Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 29 Nov 2020 15:10:30 +0200 Subject: [PATCH 025/148] use CAS for compacting state --- chain/store/splitstore/splitstore.go | 40 +++++++++++++--------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index 3adc589ad..3b89bb7c6 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -5,6 +5,7 @@ import ( "encoding/binary" "errors" "sync" + "sync/atomic" "time" "github.com/bmatsuo/lmdb-go/lmdb" @@ -28,10 +29,12 @@ var baseEpochKey = dstore.NewKey("baseEpoch") var log = logging.Logger("splitstore") type SplitStore struct { - mx sync.Mutex - baseEpoch abi.ChainEpoch - curTs *types.TipSet - compacting bool + compacting int32 + + baseEpoch abi.ChainEpoch + + mx sync.Mutex + curTs *types.TipSet cs *store.ChainStore ds dstore.Datastore @@ -204,9 +207,9 @@ func (s *SplitStore) Start(cs *store.ChainStore) error { } func (s *SplitStore) Close() error { - if s.isCompacting() { + if atomic.LoadInt32(&s.compacting) == 1 { log.Warn("ongoing compaction; waiting for it to finish...") - for s.isCompacting() { + for atomic.LoadInt32(&s.compacting) == 1 { time.Sleep(time.Second) } } @@ -220,10 +223,14 @@ func (s *SplitStore) HeadChange(revert, apply []*types.TipSet) error { epoch := s.curTs.Height() s.mx.Unlock() - if !s.isCompacting() && epoch-s.baseEpoch > CompactionThreshold { - s.setCompacting(true) + if !atomic.CompareAndSwapInt32(&s.compacting, 0, 1) { + // we are currently compacting, do nothing and wait for the next head change + return nil + } + + if epoch-s.baseEpoch > CompactionThreshold { go func() { - defer s.setCompacting(false) + defer atomic.StoreInt32(&s.compacting, 0) log.Info("compacting splitstore") start := time.Now() @@ -232,23 +239,14 @@ func (s *SplitStore) HeadChange(revert, apply []*types.TipSet) error { log.Infow("compaction done", "took", time.Since(start)) }() + } else { + // no compaction necessary + atomic.StoreInt32(&s.compacting, 0) } return nil } -func (s *SplitStore) isCompacting() bool { - s.mx.Lock() - defer s.mx.Unlock() - return s.compacting -} - -func (s *SplitStore) setCompacting(state bool) { - s.mx.Lock() - defer s.mx.Unlock() - s.compacting = state -} - // Compaction/GC Algorithm func (s *SplitStore) compact() { // create two on disk live sets, one for marking the cold finality region From e87ce6cb604e2c5fdc1dc66cf6c03026cdd35266 Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 1 Dec 2020 17:16:49 +0200 Subject: [PATCH 026/148] go get go-bs-lmdb --- go.mod | 1 + go.sum | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/go.mod b/go.mod index 220779501..0ba662838 100644 --- a/go.mod +++ b/go.mod @@ -29,6 +29,7 @@ require ( github.com/filecoin-project/go-address v0.0.5 github.com/filecoin-project/go-amt-ipld/v2 v2.1.1-0.20201006184820-924ee87a1349 // indirect github.com/filecoin-project/go-bitfield v0.2.4 + github.com/filecoin-project/go-bs-lmdb v0.0.2 github.com/filecoin-project/go-cbor-util v0.0.0-20191219014500-08c40a1e63a2 github.com/filecoin-project/go-commp-utils v0.0.0-20201119054358-b88f7a96a434 github.com/filecoin-project/go-crypto v0.0.0-20191218222705-effae4ea9f03 diff --git a/go.sum b/go.sum index 2d06052f5..9415aac06 100644 --- a/go.sum +++ b/go.sum @@ -246,6 +246,10 @@ github.com/filecoin-project/go-bitfield v0.2.0/go.mod h1:CNl9WG8hgR5mttCnUErjcQj github.com/filecoin-project/go-bitfield v0.2.3/go.mod h1:CNl9WG8hgR5mttCnUErjcQjGvuiZjRqK9rHVBsQF4oM= github.com/filecoin-project/go-bitfield v0.2.4 h1:uZ7MeE+XfM5lqrHJZ93OnhQKc/rveW8p9au0C68JPgk= github.com/filecoin-project/go-bitfield v0.2.4/go.mod h1:CNl9WG8hgR5mttCnUErjcQjGvuiZjRqK9rHVBsQF4oM= +github.com/filecoin-project/go-bitfield v0.2.3-0.20201110211213-fe2c1862e816 h1:RMdzMqe3mu2Z/3N3b9UEfkbGZxukstmZgNC024ybWhA= +github.com/filecoin-project/go-bitfield v0.2.3-0.20201110211213-fe2c1862e816/go.mod h1:CNl9WG8hgR5mttCnUErjcQjGvuiZjRqK9rHVBsQF4oM= +github.com/filecoin-project/go-bs-lmdb v0.0.2 h1:FeHjg3B0TPcVUVYyykNrfMaRujZr0aN14CxWGv3feXo= +github.com/filecoin-project/go-bs-lmdb v0.0.2/go.mod h1:Rt1cAcl80csj0aEvISab9g7r8PEqTdZGDnCaOR3tjFw= github.com/filecoin-project/go-cbor-util v0.0.0-20191219014500-08c40a1e63a2 h1:av5fw6wmm58FYMgJeoB/lK9XXrgdugYiTqkdxjTy9k8= github.com/filecoin-project/go-cbor-util v0.0.0-20191219014500-08c40a1e63a2/go.mod h1:pqTiPHobNkOVM5thSRsHYjyQfq7O5QSCMhvuu9JoDlg= github.com/filecoin-project/go-commp-utils v0.0.0-20201119054358-b88f7a96a434 h1:0kHszkYP3hgApcjl5x4rpwONhN9+j7XDobf6at5XfHs= @@ -1321,6 +1325,9 @@ github.com/raulk/clock v1.1.0 h1:dpb29+UKMbLqiU/jqIJptgLR1nn23HLgMY0sTCDza5Y= github.com/raulk/clock v1.1.0/go.mod h1:3MpVxdZ/ODBQDxbN+kzshf5OSZwPjtMDx6BBXBmOeY0= github.com/raulk/go-watchdog v1.0.1 h1:qgm3DIJAeb+2byneLrQJ7kvmDLGxN2vy3apXyGaDKN4= github.com/raulk/go-watchdog v1.0.1/go.mod h1:lzSbAl5sh4rtI8tYHU01BWIDzgzqaQLj6RcA1i4mlqI= +github.com/raulk/freecache v1.2.0 h1:1HDmZsDi+zvFe1/vMLkdrX9ThWdXQsIp4btrGJ824yc= +github.com/raulk/freecache v1.2.0/go.mod h1:Ixh8xigQnoxRDvSTzugeiLYv35Y/q9neOs90UaPT7N8= +github.com/raulk/go-bs-tests v0.0.3/go.mod h1:ZREaOSaReTvV4nY7Qh6Lkl+QisYXNBWcPRa0gjrIaG4= github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0 h1:OdAsTTz6OkFY5QxjkYwrChwuRruF69c169dPK26NUlk= github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= From e07c6c71c017ed4ade07c330591ba37032257b48 Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 1 Dec 2020 17:17:34 +0200 Subject: [PATCH 027/148] splitstore constructor --- chain/store/splitstore/liveset.go | 38 +++++++++++++++++++++ chain/store/splitstore/splitstore.go | 49 ++++++++++++++++++++++++---- 2 files changed, 81 insertions(+), 6 deletions(-) diff --git a/chain/store/splitstore/liveset.go b/chain/store/splitstore/liveset.go index d7571aa62..21c139a80 100644 --- a/chain/store/splitstore/liveset.go +++ b/chain/store/splitstore/liveset.go @@ -1,11 +1,16 @@ package splitstore import ( + "fmt" + "os" + "github.com/bmatsuo/lmdb-go/lmdb" cid "github.com/ipfs/go-cid" ) +var LiveSetMapSize int64 = 1 << 34 // 16G; TODO this may be a little too big, we should figure out how to gradually grow the map. + type LiveSet interface { Mark(cid.Cid) error Has(cid.Cid) (bool, error) @@ -19,6 +24,39 @@ type liveSet struct { var markBytes = []byte{} +func NewLiveSetEnv(path string) (*lmdb.Env, error) { + env, err := lmdb.NewEnv() + if err != nil { + return nil, fmt.Errorf("failed to initialize LDMB env: %w", err) + } + if err = env.SetMapSize(LiveSetMapSize); err != nil { + return nil, fmt.Errorf("failed to set LMDB map size: %w", err) + } + if err = env.SetMaxDBs(2); err != nil { + return nil, fmt.Errorf("failed to set LMDB max dbs: %w", err) + } + if err = env.SetMaxReaders(1); err != nil { + return nil, fmt.Errorf("failed to set LMDB max readers: %w", err) + } + + if st, err := os.Stat(path); os.IsNotExist(err) { + if err := os.MkdirAll(path, 0777); err != nil { + return nil, fmt.Errorf("failed to create LMDB data directory at %s: %w", path, err) + } + } else if err != nil { + return nil, fmt.Errorf("failed to stat LMDB data dir: %w", err) + } else if !st.IsDir() { + return nil, fmt.Errorf("LMDB path is not a directory %s", path) + } + err = env.Open(path, lmdb.NoSync|lmdb.WriteMap|lmdb.MapAsync|lmdb.NoReadahead, 0777) + if err != nil { + env.Close() //nolint:errcheck + return nil, fmt.Errorf("error opening LMDB database: %w", err) + } + + return env, nil +} + func NewLiveSet(env *lmdb.Env, name string) (LiveSet, error) { var db lmdb.DBI err := env.Update(func(txn *lmdb.Txn) (err error) { diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index 3b89bb7c6..5ae97a1ef 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -4,6 +4,7 @@ import ( "context" "encoding/binary" "errors" + "path/filepath" "sync" "sync/atomic" "time" @@ -15,6 +16,7 @@ import ( dstore "github.com/ipfs/go-datastore" logging "github.com/ipfs/go-log/v2" + "github.com/filecoin-project/go-bs-lmdb" "github.com/filecoin-project/go-state-types/abi" "github.com/filecoin-project/lotus/build" "github.com/filecoin-project/lotus/chain/store" @@ -36,12 +38,10 @@ type SplitStore struct { mx sync.Mutex curTs *types.TipSet - cs *store.ChainStore - ds dstore.Datastore - - hot bstore.Blockstore - cold bstore.Blockstore - + cs *store.ChainStore + ds dstore.Datastore + hot bstore.Blockstore + cold bstore.Blockstore snoop TrackingStore env *lmdb.Env @@ -49,6 +49,43 @@ type SplitStore struct { var _ bstore.Blockstore = (*SplitStore)(nil) +// NewSplitStore creates a new SplitStore instance, given a path for the hotstore dbs and a cold +// blockstore. The SplitStore must be attached to the ChainStore with Start in order to trigger +// compaction. +func NewSplitStore(path string, ds dstore.Datastore, cold bstore.Blockstore) (*SplitStore, error) { + // the hot store + hot, err := lmdbbs.Open(filepath.Join(path, "hot.db")) + if err != nil { + return nil, err + } + + // the tracking store + snoop, err := NewTrackingStore(filepath.Join(path, "snoop.db")) + if err != nil { + hot.Close() //nolint:errcheck + return nil, err + } + + // the liveset env + env, err := NewLiveSetEnv(filepath.Join(path, "sweep.db")) + if err != nil { + hot.Close() //nolint:errcheck + snoop.Close() //nolint:errcheck + return nil, err + } + + // and now we can make a SplitStore + ss := &SplitStore{ + ds: ds, + hot: hot, + cold: cold, + snoop: snoop, + env: env, + } + + return ss, nil +} + // Blockstore interface func (s *SplitStore) DeleteBlock(cid cid.Cid) error { // afaict we don't seem to be using this method, so it's not implemented From 622b4f7d9da5528064de82da7c945c426751ebfe Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 1 Dec 2020 17:35:58 +0200 Subject: [PATCH 028/148] hook splitstore into DI --- node/builder.go | 1 + node/modules/blockstore.go | 58 +++++++++++++++++++++++++++++++--- node/modules/chain.go | 12 ++++++- node/modules/dtypes/storage.go | 3 ++ node/repo/interface.go | 3 ++ 5 files changed, 72 insertions(+), 5 deletions(-) diff --git a/node/builder.go b/node/builder.go index b9f2e85bf..f4aebd429 100644 --- a/node/builder.go +++ b/node/builder.go @@ -591,6 +591,7 @@ func Repo(r repo.Repo) Option { Override(new(dtypes.MetadataDS), modules.Datastore), Override(new(dtypes.UniversalBlockstore), modules.UniversalBlockstore), + Override(new(dtypes.SplitBlockstore), modules.SplitBlockstore), Override(new(dtypes.ChainBlockstore), modules.ChainBlockstore), Override(new(dtypes.StateBlockstore), modules.StateBlockstore), Override(new(dtypes.ExposedBlockstore), From(new(dtypes.UniversalBlockstore))), diff --git a/node/modules/blockstore.go b/node/modules/blockstore.go index 5b1d2ee63..160ac8fc4 100644 --- a/node/modules/blockstore.go +++ b/node/modules/blockstore.go @@ -9,6 +9,7 @@ import ( "golang.org/x/xerrors" "github.com/filecoin-project/lotus/blockstore" + "github.com/filecoin-project/lotus/chain/store/splitstore" "github.com/filecoin-project/lotus/node/modules/dtypes" "github.com/filecoin-project/lotus/node/modules/helpers" "github.com/filecoin-project/lotus/node/repo" @@ -31,18 +32,67 @@ func UniversalBlockstore(lc fx.Lifecycle, mctx helpers.MetricsCtx, r repo.Locked return bs, err } +func SplitBlockstore(lc fx.Lifecycle, r repo.LockedRepo, ds dtypes.MetadataDS, bs dtypes.UniversalBlockstore) (dtypes.SplitBlockstore, error) { + path, err := r.SplitstorePath() + if err != nil { + return nil, err + } + + ss, err := splitstore.NewSplitStore(path, ds, bs) + if err != nil { + return nil, err + } + lc.Append(fx.Hook{ + OnStop: func(context.Context) error { + return ss.Close() + }, + }) + + return ss, err +} + +// StateBlockstore returns the blockstore to use to store the state tree. // StateBlockstore is a hook to overlay caches for state objects, or in the // future, to segregate the universal blockstore into different physical state // and chain stores. -func StateBlockstore(lc fx.Lifecycle, mctx helpers.MetricsCtx, bs dtypes.UniversalBlockstore) (dtypes.StateBlockstore, error) { - return bs, nil +func StateBlockstore(lc fx.Lifecycle, mctx helpers.MetricsCtx, bs dtypes.SplitBlockstore) (dtypes.StateBlockstore, error) { + sbs, err := blockstore.WrapFreecacheCache(helpers.LifecycleCtx(mctx, lc), bs, blockstore.FreecacheConfig{ + Name: "state", + BlockCapacity: 288 * 1024 * 1024, // 288MiB. + ExistsCapacity: 48 * 1024 * 1024, // 48MiB. + }) + if err != nil { + return nil, err + } + // this may end up double closing the underlying blockstore, but all + // blockstores should be lenient or idempotent on double-close. The native + // badger blockstore is (and unit tested). + if c, ok := bs.(io.Closer); ok { + lc.Append(closerStopHook(c)) + } + return sbs, nil } +// ChainBlockstore returns the blockstore to use for chain data (tipsets, blocks, messages). // ChainBlockstore is a hook to overlay caches for state objects, or in the // future, to segregate the universal blockstore into different physical state // and chain stores. -func ChainBlockstore(lc fx.Lifecycle, mctx helpers.MetricsCtx, bs dtypes.UniversalBlockstore) (dtypes.ChainBlockstore, error) { - return bs, nil +func ChainBlockstore(lc fx.Lifecycle, mctx helpers.MetricsCtx, bs dtypes.SplitBlockstore) (dtypes.ChainBlockstore, error) { + cbs, err := blockstore.WrapFreecacheCache(helpers.LifecycleCtx(mctx, lc), bs, blockstore.FreecacheConfig{ + Name: "chain", + BlockCapacity: 64 * 1024 * 1024, // 64MiB. + ExistsCapacity: 16 * 1024, // 16MiB. + }) + if err != nil { + return nil, err + } + // this may end up double closing the underlying blockstore, but all + // blockstores should be lenient or idempotent on double-close. The native + // badger blockstore is (and unit tested). + if c, ok := bs.(io.Closer); ok { + lc.Append(closerStopHook(c)) + } + return cbs, nil } func FallbackChainBlockstore(cbs dtypes.ChainBlockstore) dtypes.ChainBlockstore { diff --git a/node/modules/chain.go b/node/modules/chain.go index 029064b97..0108a6282 100644 --- a/node/modules/chain.go +++ b/node/modules/chain.go @@ -22,6 +22,7 @@ import ( "github.com/filecoin-project/lotus/chain/messagepool" "github.com/filecoin-project/lotus/chain/stmgr" "github.com/filecoin-project/lotus/chain/store" + "github.com/filecoin-project/lotus/chain/store/splitstore" "github.com/filecoin-project/lotus/chain/vm" "github.com/filecoin-project/lotus/extern/sector-storage/ffiwrapper" "github.com/filecoin-project/lotus/journal" @@ -72,13 +73,22 @@ func MessagePool(lc fx.Lifecycle, sm *stmgr.StateManager, ps *pubsub.PubSub, ds return mp, nil } -func ChainStore(lc fx.Lifecycle, cbs dtypes.ChainBlockstore, sbs dtypes.StateBlockstore, ds dtypes.MetadataDS, syscalls vm.SyscallBuilder, j journal.Journal) *store.ChainStore { +func ChainStore(lc fx.Lifecycle, cbs dtypes.ChainBlockstore, sbs dtypes.StateBlockstore, ds dtypes.MetadataDS, ss dtypes.SplitBlockstore, syscalls vm.SyscallBuilder, j journal.Journal) *store.ChainStore { chain := store.NewChainStore(cbs, sbs, ds, syscalls, j) if err := chain.Load(); err != nil { log.Warnf("loading chain state from disk: %s", err) } + if ssp, ok := ss.(*splitstore.SplitStore); ok { + err := ssp.Start(chain) + if err != nil { + log.Errorf("error starting splitstore: %s", err) + } + } else { + log.Warnf("unexpected splitstore type: %+v", ss) + } + lc.Append(fx.Hook{ OnStop: func(_ context.Context) error { return chain.Close() diff --git a/node/modules/dtypes/storage.go b/node/modules/dtypes/storage.go index c6963e1e2..4d1d957c0 100644 --- a/node/modules/dtypes/storage.go +++ b/node/modules/dtypes/storage.go @@ -27,6 +27,9 @@ type ( // UniversalBlockstore is the cold blockstore. UniversalBlockstore blockstore.Blockstore + // SplitBlockstore is the hot/cold blockstore that sits on top of the ColdBlockstore. + SplitBlockstore blockstore.Blockstore + // ChainBlockstore is a blockstore to store chain data (tipsets, blocks, // messages). It is physically backed by the BareMonolithBlockstore, but it // has a cache on top that is specially tuned for chain data access diff --git a/node/repo/interface.go b/node/repo/interface.go index 33979c8de..8c24caac4 100644 --- a/node/repo/interface.go +++ b/node/repo/interface.go @@ -66,6 +66,9 @@ type LockedRepo interface { // SplitstorePath returns the path for the SplitStore SplitstorePath() (string, error) + // SplitstorePath returns the path for the SplitStore + SplitstorePath() (string, error) + // Returns config in this repo Config() (interface{}, error) SetConfig(func(interface{})) error From 3912694fb75ccab14c64af2a36a0b65b6c85e70b Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 1 Dec 2020 17:47:14 +0200 Subject: [PATCH 029/148] fix lotus-shed build --- cmd/lotus-shed/balances.go | 27 +++++++++++++++++++++++++-- cmd/lotus-shed/export.go | 14 +++++++++++++- 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/cmd/lotus-shed/balances.go b/cmd/lotus-shed/balances.go index 8c5bfefb8..6a8551edf 100644 --- a/cmd/lotus-shed/balances.go +++ b/cmd/lotus-shed/balances.go @@ -36,6 +36,7 @@ import ( "github.com/filecoin-project/lotus/chain/state" "github.com/filecoin-project/lotus/chain/stmgr" "github.com/filecoin-project/lotus/chain/store" + "github.com/filecoin-project/lotus/chain/store/splitstore" "github.com/filecoin-project/lotus/chain/types" "github.com/filecoin-project/lotus/chain/vm" lcli "github.com/filecoin-project/lotus/cli" @@ -193,7 +194,18 @@ var chainBalanceStateCmd = &cli.Command{ return err } - cs := store.NewChainStore(bs, bs, mds, vm.Syscalls(ffiwrapper.ProofVerifier), nil) + ssPath, err := lkrepo.SplitstorePath() + if err != nil { + return err + } + + ss, err := splitstore.NewSplitStore(ssPath, mds, bs) + if err != nil { + return err + } + defer ss.Close() //nolint:errcheck + + cs := store.NewChainStore(ss, ss, mds, vm.Syscalls(ffiwrapper.ProofVerifier), nil) defer cs.Close() //nolint:errcheck cst := cbor.NewCborStore(bs) @@ -414,7 +426,18 @@ var chainPledgeCmd = &cli.Command{ return err } - cs := store.NewChainStore(bs, bs, mds, vm.Syscalls(ffiwrapper.ProofVerifier), nil) + ssPath, err := lkrepo.SplitstorePath() + if err != nil { + return err + } + + ss, err := splitstore.NewSplitStore(ssPath, mds, bs) + if err != nil { + return err + } + defer ss.Close() //nolint:errcheck + + cs := store.NewChainStore(ss, ss, mds, vm.Syscalls(ffiwrapper.ProofVerifier), nil) defer cs.Close() //nolint:errcheck cst := cbor.NewCborStore(bs) diff --git a/cmd/lotus-shed/export.go b/cmd/lotus-shed/export.go index e711ba2bb..7d1016e6c 100644 --- a/cmd/lotus-shed/export.go +++ b/cmd/lotus-shed/export.go @@ -12,6 +12,7 @@ import ( "github.com/filecoin-project/go-state-types/abi" "github.com/filecoin-project/lotus/chain/store" + "github.com/filecoin-project/lotus/chain/store/splitstore" "github.com/filecoin-project/lotus/chain/types" lcli "github.com/filecoin-project/lotus/cli" "github.com/filecoin-project/lotus/node/repo" @@ -90,7 +91,18 @@ var exportChainCmd = &cli.Command{ return err } - cs := store.NewChainStore(bs, bs, mds, nil, nil) + ssPath, err := lr.SplitstorePath() + if err != nil { + return err + } + + ss, err := splitstore.NewSplitStore(ssPath, mds, bs) + if err != nil { + return err + } + defer ss.Close() //nolint:errcheck + + cs := store.NewChainStore(ss, ss, mds, nil, nil) defer cs.Close() //nolint:errcheck if err := cs.Load(); err != nil { From facdc555b154b991515bd12499cc4b2c5f380852 Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 1 Dec 2020 17:56:22 +0200 Subject: [PATCH 030/148] add nil check for curTs -- some tests don't have chain state --- chain/store/splitstore/splitstore.go | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index 5ae97a1ef..751ab2a56 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -134,6 +134,11 @@ func (s *SplitStore) GetSize(cid cid.Cid) (int, error) { func (s *SplitStore) Put(blk blocks.Block) error { s.mx.Lock() + if s.curTs == nil { + s.mx.Unlock() + return s.cold.Put(blk) + } + epoch := s.curTs.Height() s.mx.Unlock() @@ -147,16 +152,21 @@ func (s *SplitStore) Put(blk blocks.Block) error { } func (s *SplitStore) PutMany(blks []blocks.Block) error { + s.mx.Lock() + if s.curTs == nil { + s.mx.Unlock() + return s.cold.PutMany(blks) + } + + epoch := s.curTs.Height() + s.mx.Unlock() + err := s.hot.PutMany(blks) if err != nil { log.Errorf("error tracking CIDs in hotstore: %s; falling back to coldstore", err) return s.cold.PutMany(blks) } - s.mx.Lock() - epoch := s.curTs.Height() - s.mx.Unlock() - batch := make([]cid.Cid, 0, len(blks)) for _, blk := range blks { batch = append(batch, blk.Cid()) @@ -228,6 +238,11 @@ func (s *SplitStore) Start(cs *store.ChainStore) error { s.baseEpoch = bytesToEpoch(bs) case dstore.ErrNotFound: + if s.curTs == nil { + // this can happen in some tests + break + } + err = s.setBaseEpoch(s.curTs.Height()) if err != nil { return err From f44cf0f2c44cce710be44db052108c4dd0d41bd2 Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 1 Dec 2020 18:14:34 +0200 Subject: [PATCH 031/148] appease linter --- chain/store/splitstore/splitstore.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index 751ab2a56..c3559f167 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -16,7 +16,7 @@ import ( dstore "github.com/ipfs/go-datastore" logging "github.com/ipfs/go-log/v2" - "github.com/filecoin-project/go-bs-lmdb" + lmdbbs "github.com/filecoin-project/go-bs-lmdb" "github.com/filecoin-project/go-state-types/abi" "github.com/filecoin-project/lotus/build" "github.com/filecoin-project/lotus/chain/store" From 843fd09a6229ccb81cf64054d7a3d8c1dce0d75d Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 1 Dec 2020 18:21:46 +0200 Subject: [PATCH 032/148] deal with MDB_KEY_EXIST errors --- chain/store/splitstore/splitstore.go | 30 +++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index c3559f167..c3df9cd0e 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -144,6 +144,11 @@ func (s *SplitStore) Put(blk blocks.Block) error { err := s.snoop.Put(blk.Cid(), epoch) if err != nil { + if lmdb.IsErrno(err, lmdb.KeyExist) { + // duplicate write, ignore + return nil + } + log.Errorf("error tracking CID in hotstore: %s; falling back to coldstore", err) return s.cold.Put(blk) } @@ -161,18 +166,29 @@ func (s *SplitStore) PutMany(blks []blocks.Block) error { epoch := s.curTs.Height() s.mx.Unlock() - err := s.hot.PutMany(blks) - if err != nil { - log.Errorf("error tracking CIDs in hotstore: %s; falling back to coldstore", err) - return s.cold.PutMany(blks) - } - batch := make([]cid.Cid, 0, len(blks)) for _, blk := range blks { batch = append(batch, blk.Cid()) } - return s.snoop.PutBatch(batch, epoch) + err := s.snoop.PutBatch(batch, epoch) + if err != nil { + if lmdb.IsErrno(err, lmdb.KeyExist) { + // a write is duplicate, but we don't know which; write each block separately + for _, blk := range blks { + err = s.Put(blk) + if err != nil { + return err + } + } + return nil + } + + log.Errorf("error tracking CIDs in hotstore: %s; falling back to coldstore", err) + return s.cold.PutMany(blks) + } + + return s.hot.PutMany(blks) } func (s *SplitStore) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) { From ce41e394dc1f3bbcaa4f2106e2ec5e55caa6bdf0 Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 1 Dec 2020 18:34:11 +0200 Subject: [PATCH 033/148] handle MDB_KEYEXIST in liveset marking --- chain/store/splitstore/liveset.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/chain/store/splitstore/liveset.go b/chain/store/splitstore/liveset.go index 21c139a80..a598e42da 100644 --- a/chain/store/splitstore/liveset.go +++ b/chain/store/splitstore/liveset.go @@ -73,7 +73,11 @@ func NewLiveSet(env *lmdb.Env, name string) (LiveSet, error) { func (s *liveSet) Mark(cid cid.Cid) error { return s.env.Update(func(txn *lmdb.Txn) error { - return txn.Put(s.db, cid.Hash(), markBytes, 0) + err := txn.Put(s.db, cid.Hash(), markBytes, 0) + if err == nil || lmdb.IsErrno(err, lmdb.KeyExist) { + return nil + } + return err }) } From 3f8da19a34706c481c342986ad4b81c51000ae19 Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 1 Dec 2020 18:40:14 +0200 Subject: [PATCH 034/148] go get go-bs-lmdb@v0.0.3 --- go.mod | 2 +- go.sum | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/go.mod b/go.mod index 0ba662838..536251e17 100644 --- a/go.mod +++ b/go.mod @@ -29,7 +29,7 @@ require ( github.com/filecoin-project/go-address v0.0.5 github.com/filecoin-project/go-amt-ipld/v2 v2.1.1-0.20201006184820-924ee87a1349 // indirect github.com/filecoin-project/go-bitfield v0.2.4 - github.com/filecoin-project/go-bs-lmdb v0.0.2 + github.com/filecoin-project/go-bs-lmdb v0.0.3 github.com/filecoin-project/go-cbor-util v0.0.0-20191219014500-08c40a1e63a2 github.com/filecoin-project/go-commp-utils v0.0.0-20201119054358-b88f7a96a434 github.com/filecoin-project/go-crypto v0.0.0-20191218222705-effae4ea9f03 diff --git a/go.sum b/go.sum index 9415aac06..ae413c088 100644 --- a/go.sum +++ b/go.sum @@ -250,6 +250,10 @@ github.com/filecoin-project/go-bitfield v0.2.3-0.20201110211213-fe2c1862e816 h1: github.com/filecoin-project/go-bitfield v0.2.3-0.20201110211213-fe2c1862e816/go.mod h1:CNl9WG8hgR5mttCnUErjcQjGvuiZjRqK9rHVBsQF4oM= github.com/filecoin-project/go-bs-lmdb v0.0.2 h1:FeHjg3B0TPcVUVYyykNrfMaRujZr0aN14CxWGv3feXo= github.com/filecoin-project/go-bs-lmdb v0.0.2/go.mod h1:Rt1cAcl80csj0aEvISab9g7r8PEqTdZGDnCaOR3tjFw= +github.com/filecoin-project/go-bs-lmdb v0.0.3-0.20201201162933-d7f9ef615d0f h1:PYFZWzcv0PlANuh64UopYppkulhD9O5uiJ19o359DKM= +github.com/filecoin-project/go-bs-lmdb v0.0.3-0.20201201162933-d7f9ef615d0f/go.mod h1:Oq9zP5FMx7IomcY79neGD76YsfeMY3N7BKGDkofn/Ao= +github.com/filecoin-project/go-bs-lmdb v0.0.3 h1:tSgG5S9+5zojhmnRSQxiabWFyHPxdDP9eU7sAgaECvI= +github.com/filecoin-project/go-bs-lmdb v0.0.3/go.mod h1:Oq9zP5FMx7IomcY79neGD76YsfeMY3N7BKGDkofn/Ao= github.com/filecoin-project/go-cbor-util v0.0.0-20191219014500-08c40a1e63a2 h1:av5fw6wmm58FYMgJeoB/lK9XXrgdugYiTqkdxjTy9k8= github.com/filecoin-project/go-cbor-util v0.0.0-20191219014500-08c40a1e63a2/go.mod h1:pqTiPHobNkOVM5thSRsHYjyQfq7O5QSCMhvuu9JoDlg= github.com/filecoin-project/go-commp-utils v0.0.0-20201119054358-b88f7a96a434 h1:0kHszkYP3hgApcjl5x4rpwONhN9+j7XDobf6at5XfHs= @@ -1328,6 +1332,7 @@ github.com/raulk/go-watchdog v1.0.1/go.mod h1:lzSbAl5sh4rtI8tYHU01BWIDzgzqaQLj6R github.com/raulk/freecache v1.2.0 h1:1HDmZsDi+zvFe1/vMLkdrX9ThWdXQsIp4btrGJ824yc= github.com/raulk/freecache v1.2.0/go.mod h1:Ixh8xigQnoxRDvSTzugeiLYv35Y/q9neOs90UaPT7N8= github.com/raulk/go-bs-tests v0.0.3/go.mod h1:ZREaOSaReTvV4nY7Qh6Lkl+QisYXNBWcPRa0gjrIaG4= +github.com/raulk/go-bs-tests v0.0.4/go.mod h1:ZREaOSaReTvV4nY7Qh6Lkl+QisYXNBWcPRa0gjrIaG4= github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0 h1:OdAsTTz6OkFY5QxjkYwrChwuRruF69c169dPK26NUlk= github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= From 6e51e6db9a7f4765494ce733e4b85a3dc174fdad Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 1 Dec 2020 18:44:39 +0200 Subject: [PATCH 035/148] better handling of MDB_KEYEXIST in Put --- chain/store/splitstore/splitstore.go | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index c3df9cd0e..f1394562b 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -143,12 +143,7 @@ func (s *SplitStore) Put(blk blocks.Block) error { s.mx.Unlock() err := s.snoop.Put(blk.Cid(), epoch) - if err != nil { - if lmdb.IsErrno(err, lmdb.KeyExist) { - // duplicate write, ignore - return nil - } - + if err != nil && !lmdb.IsErrno(err, lmdb.KeyExist) { log.Errorf("error tracking CID in hotstore: %s; falling back to coldstore", err) return s.cold.Put(blk) } From 1a23b1f6afb399f3fe1462b505bf6cba869748fd Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 1 Dec 2020 18:48:43 +0200 Subject: [PATCH 036/148] make CompactionThreshold a var to fix lotus-soup build finality is not a constant there! --- chain/store/splitstore/splitstore.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index f1394562b..65619b44a 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -24,7 +24,7 @@ import ( bstore "github.com/filecoin-project/lotus/lib/blockstore" ) -const CompactionThreshold = 5 * build.Finality +var CompactionThreshold = 5 * build.Finality var baseEpochKey = dstore.NewKey("baseEpoch") From 76d6edbb5247c5f8e5b42d8ed4ad6cb087f6ec4d Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 1 Dec 2020 18:57:31 +0200 Subject: [PATCH 037/148] fix max readers for tracking store --- chain/store/splitstore/snoop.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chain/store/splitstore/snoop.go b/chain/store/splitstore/snoop.go index e7dceaaef..286c43be2 100644 --- a/chain/store/splitstore/snoop.go +++ b/chain/store/splitstore/snoop.go @@ -39,7 +39,7 @@ func NewTrackingStore(path string) (TrackingStore, error) { if err = env.SetMaxDBs(1); err != nil { return nil, fmt.Errorf("failed to set LMDB max dbs: %w", err) } - if err = env.SetMaxReaders(1); err != nil { + if err = env.SetMaxReaders(2); err != nil { return nil, fmt.Errorf("failed to set LMDB max readers: %w", err) } From 8b0087524fbd07e85dd0895b29f161da91b655e2 Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 1 Dec 2020 21:48:08 +0200 Subject: [PATCH 038/148] adjust walk boundaries for marking --- chain/store/splitstore/splitstore.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index 65619b44a..fac3c796c 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -339,7 +339,7 @@ func (s *SplitStore) compact() { epoch := curTs.Height() coldEpoch := s.baseEpoch + build.Finality - err = s.cs.WalkSnapshot(context.Background(), curTs, epoch-coldEpoch+1, false, false, + err = s.cs.WalkSnapshot(context.Background(), curTs, epoch-coldEpoch, false, false, func(cid cid.Cid) error { return hotSet.Mark(cid) }) @@ -350,7 +350,7 @@ func (s *SplitStore) compact() { } // Phase 1b: mark all reachable CIDs in the cold range - coldTs, err := s.cs.GetTipsetByHeight(context.Background(), coldEpoch-1, curTs, true) + coldTs, err := s.cs.GetTipsetByHeight(context.Background(), coldEpoch, curTs, true) if err != nil { // TODO do something better here panic(err) @@ -392,7 +392,7 @@ func (s *SplitStore) compact() { } // is the object stil hot? - if wrEpoch >= coldEpoch { + if wrEpoch > coldEpoch { // yes, stay in the hotstore stHot++ continue From 58a84348253f1a91cc5d053480ad34b082a5ec81 Mon Sep 17 00:00:00 2001 From: vyzo Date: Wed, 13 Jan 2021 14:21:35 +0200 Subject: [PATCH 039/148] temporary log level for splitstore to DEBUG --- chain/store/splitstore/splitstore.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index fac3c796c..e280bca15 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -30,6 +30,11 @@ var baseEpochKey = dstore.NewKey("baseEpoch") var log = logging.Logger("splitstore") +func init() { + // TODO temporary for debugging purposes; to be removed for merge. + logging.SetLogLevel("splitstore", "DEBUG") +} + type SplitStore struct { compacting int32 From 5b4e6b7b2639e3d9835ff1aabcac98290ac638f5 Mon Sep 17 00:00:00 2001 From: vyzo Date: Wed, 20 Jan 2021 20:52:18 +0200 Subject: [PATCH 040/148] don't set max readers for livesets --- chain/store/splitstore/liveset.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/chain/store/splitstore/liveset.go b/chain/store/splitstore/liveset.go index a598e42da..433046371 100644 --- a/chain/store/splitstore/liveset.go +++ b/chain/store/splitstore/liveset.go @@ -35,9 +35,9 @@ func NewLiveSetEnv(path string) (*lmdb.Env, error) { if err = env.SetMaxDBs(2); err != nil { return nil, fmt.Errorf("failed to set LMDB max dbs: %w", err) } - if err = env.SetMaxReaders(1); err != nil { - return nil, fmt.Errorf("failed to set LMDB max readers: %w", err) - } + // if err = env.SetMaxReaders(1); err != nil { + // return nil, fmt.Errorf("failed to set LMDB max readers: %w", err) + // } if st, err := os.Stat(path); os.IsNotExist(err) { if err := os.MkdirAll(path, 0777); err != nil { From 877ecab960d955a54d94e87a2cb3cfb6f6941b9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Kripalani?= Date: Mon, 25 Jan 2021 19:28:38 +0000 Subject: [PATCH 041/148] update go-bs-lmdb and migrate to ledgerwatch/lmdb-go. --- chain/store/splitstore/liveset.go | 2 +- chain/store/splitstore/liveset_test.go | 2 +- chain/store/splitstore/snoop.go | 2 +- chain/store/splitstore/snoop_test.go | 2 +- chain/store/splitstore/splitstore.go | 11 +++++++++-- go.mod | 4 ++-- go.sum | 6 ++++-- 7 files changed, 19 insertions(+), 10 deletions(-) diff --git a/chain/store/splitstore/liveset.go b/chain/store/splitstore/liveset.go index 433046371..f4d06129e 100644 --- a/chain/store/splitstore/liveset.go +++ b/chain/store/splitstore/liveset.go @@ -4,7 +4,7 @@ import ( "fmt" "os" - "github.com/bmatsuo/lmdb-go/lmdb" + "github.com/ledgerwatch/lmdb-go/lmdb" cid "github.com/ipfs/go-cid" ) diff --git a/chain/store/splitstore/liveset_test.go b/chain/store/splitstore/liveset_test.go index 77c8cb5e6..449b3c92f 100644 --- a/chain/store/splitstore/liveset_test.go +++ b/chain/store/splitstore/liveset_test.go @@ -4,7 +4,7 @@ import ( "os" "testing" - "github.com/bmatsuo/lmdb-go/lmdb" + "github.com/ledgerwatch/lmdb-go/lmdb" cid "github.com/ipfs/go-cid" "github.com/multiformats/go-multihash" diff --git a/chain/store/splitstore/snoop.go b/chain/store/splitstore/snoop.go index 286c43be2..731014fb6 100644 --- a/chain/store/splitstore/snoop.go +++ b/chain/store/splitstore/snoop.go @@ -5,7 +5,7 @@ import ( "fmt" "os" - "github.com/bmatsuo/lmdb-go/lmdb" + "github.com/ledgerwatch/lmdb-go/lmdb" cid "github.com/ipfs/go-cid" diff --git a/chain/store/splitstore/snoop_test.go b/chain/store/splitstore/snoop_test.go index 5fc62d3b1..7e60d03d9 100644 --- a/chain/store/splitstore/snoop_test.go +++ b/chain/store/splitstore/snoop_test.go @@ -4,7 +4,7 @@ import ( "context" "testing" - "github.com/bmatsuo/lmdb-go/lmdb" + "github.com/ledgerwatch/lmdb-go/lmdb" cid "github.com/ipfs/go-cid" "github.com/multiformats/go-multihash" diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index e280bca15..474ae2a50 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -9,7 +9,7 @@ import ( "sync/atomic" "time" - "github.com/bmatsuo/lmdb-go/lmdb" + "github.com/ledgerwatch/lmdb-go/lmdb" blocks "github.com/ipfs/go-block-format" cid "github.com/ipfs/go-cid" @@ -59,7 +59,14 @@ var _ bstore.Blockstore = (*SplitStore)(nil) // compaction. func NewSplitStore(path string, ds dstore.Datastore, cold bstore.Blockstore) (*SplitStore, error) { // the hot store - hot, err := lmdbbs.Open(filepath.Join(path, "hot.db")) + path = filepath.Join(path, "hot.db") + hot, err := lmdbbs.Open(&lmdbbs.Options{ + Path: path, + InitialMmapSize: 256 << 20, // 256MiB. + MmapGrowthStepFactor: 1.25, // scale slower than the default of 1.5 + MmapGrowthStepMax: 512 << 20, // 512MiB. + MaxReaders: 32, + }) if err != nil { return nil, err } diff --git a/go.mod b/go.mod index 536251e17..56d193618 100644 --- a/go.mod +++ b/go.mod @@ -11,7 +11,6 @@ require ( github.com/Jeffail/gabs v1.4.0 github.com/StackExchange/wmi v0.0.0-20190523213315-cbe66965904d // indirect github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d - github.com/bmatsuo/lmdb-go v1.8.0 github.com/buger/goterm v0.0.0-20200322175922-2f3e71b85129 github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e github.com/cockroachdb/pebble v0.0.0-20201001221639-879f3bfeef07 @@ -29,7 +28,7 @@ require ( github.com/filecoin-project/go-address v0.0.5 github.com/filecoin-project/go-amt-ipld/v2 v2.1.1-0.20201006184820-924ee87a1349 // indirect github.com/filecoin-project/go-bitfield v0.2.4 - github.com/filecoin-project/go-bs-lmdb v0.0.3 + github.com/filecoin-project/go-bs-lmdb v1.0.1 github.com/filecoin-project/go-cbor-util v0.0.0-20191219014500-08c40a1e63a2 github.com/filecoin-project/go-commp-utils v0.0.0-20201119054358-b88f7a96a434 github.com/filecoin-project/go-crypto v0.0.0-20191218222705-effae4ea9f03 @@ -97,6 +96,7 @@ require ( github.com/ipld/go-car v0.1.1-0.20201119040415-11b6074b6d4d github.com/ipld/go-ipld-prime v0.5.1-0.20201021195245-109253e8a018 github.com/kelseyhightower/envconfig v1.4.0 + github.com/ledgerwatch/lmdb-go v1.17.4 github.com/lib/pq v1.7.0 github.com/libp2p/go-buffer-pool v0.0.2 github.com/libp2p/go-eventbus v0.2.1 diff --git a/go.sum b/go.sum index ae413c088..13155e513 100644 --- a/go.sum +++ b/go.sum @@ -90,8 +90,6 @@ github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+Ce github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= -github.com/bmatsuo/lmdb-go v1.8.0 h1:ohf3Q4xjXZBKh4AayUY4bb2CXuhRAI8BYGlJq08EfNA= -github.com/bmatsuo/lmdb-go v1.8.0/go.mod h1:wWPZmKdOAZsl4qOqkowQ1aCrFie1HU8gWloHMCeAUdM= github.com/bradfitz/go-smtpd v0.0.0-20170404230938-deb6d6237625/go.mod h1:HYsPBTaaSFSlLx/70C2HPIMNZpVV8+vt/A+FMnYP11g= github.com/briandowns/spinner v1.11.1/go.mod h1:QOuQk7x+EaDASo80FEXwlwiA+j/PPIcX3FScO+3/ZPQ= github.com/btcsuite/btcd v0.0.0-20190213025234-306aecffea32/go.mod h1:DrZx5ec/dmnfpw9KyYoQyYo7d0KEvTkk/5M/vbZjAr8= @@ -254,6 +252,8 @@ github.com/filecoin-project/go-bs-lmdb v0.0.3-0.20201201162933-d7f9ef615d0f h1:P github.com/filecoin-project/go-bs-lmdb v0.0.3-0.20201201162933-d7f9ef615d0f/go.mod h1:Oq9zP5FMx7IomcY79neGD76YsfeMY3N7BKGDkofn/Ao= github.com/filecoin-project/go-bs-lmdb v0.0.3 h1:tSgG5S9+5zojhmnRSQxiabWFyHPxdDP9eU7sAgaECvI= github.com/filecoin-project/go-bs-lmdb v0.0.3/go.mod h1:Oq9zP5FMx7IomcY79neGD76YsfeMY3N7BKGDkofn/Ao= +github.com/filecoin-project/go-bs-lmdb v1.0.1 h1:kAoPGgZqUQ0IowIzjdDo251X6smWuy286E6h3m03zqY= +github.com/filecoin-project/go-bs-lmdb v1.0.1/go.mod h1:peFIZ9XEE9OLFkCzi7FMlr84UexqVKj6+AyxZD5SiGs= github.com/filecoin-project/go-cbor-util v0.0.0-20191219014500-08c40a1e63a2 h1:av5fw6wmm58FYMgJeoB/lK9XXrgdugYiTqkdxjTy9k8= github.com/filecoin-project/go-cbor-util v0.0.0-20191219014500-08c40a1e63a2/go.mod h1:pqTiPHobNkOVM5thSRsHYjyQfq7O5QSCMhvuu9JoDlg= github.com/filecoin-project/go-commp-utils v0.0.0-20201119054358-b88f7a96a434 h1:0kHszkYP3hgApcjl5x4rpwONhN9+j7XDobf6at5XfHs= @@ -762,6 +762,8 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/pty v1.1.3/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/ledgerwatch/lmdb-go v1.17.4 h1:dDgPXUrzFWG/EB3RwOKZ+P3XGAlbsZxmVahjc+qWwyA= +github.com/ledgerwatch/lmdb-go v1.17.4/go.mod h1:NKRpCxksoTQPyxsUcBiVOe0135uqnJsnf6cElxmOL0o= github.com/lib/pq v1.7.0 h1:h93mCPfUSkaul3Ka/VG8uZdmW1uMHDGxzu0NWHuJmHY= github.com/lib/pq v1.7.0/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= github.com/libp2p/go-addr-util v0.0.1/go.mod h1:4ac6O7n9rIAKB1dnd+s8IbbMXkt+oBpzX4/+RACcnlQ= From 5872f246fff2c6a9d60f4cc8e924ed794bdf371c Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 26 Jan 2021 21:35:03 +0200 Subject: [PATCH 042/148] go get go-bs-lmdb@v1.0.2 --- go.mod | 2 +- go.sum | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/go.mod b/go.mod index 56d193618..eaa743053 100644 --- a/go.mod +++ b/go.mod @@ -28,7 +28,7 @@ require ( github.com/filecoin-project/go-address v0.0.5 github.com/filecoin-project/go-amt-ipld/v2 v2.1.1-0.20201006184820-924ee87a1349 // indirect github.com/filecoin-project/go-bitfield v0.2.4 - github.com/filecoin-project/go-bs-lmdb v1.0.1 + github.com/filecoin-project/go-bs-lmdb v1.0.2 github.com/filecoin-project/go-cbor-util v0.0.0-20191219014500-08c40a1e63a2 github.com/filecoin-project/go-commp-utils v0.0.0-20201119054358-b88f7a96a434 github.com/filecoin-project/go-crypto v0.0.0-20191218222705-effae4ea9f03 diff --git a/go.sum b/go.sum index 13155e513..f92585251 100644 --- a/go.sum +++ b/go.sum @@ -254,6 +254,8 @@ github.com/filecoin-project/go-bs-lmdb v0.0.3 h1:tSgG5S9+5zojhmnRSQxiabWFyHPxdDP github.com/filecoin-project/go-bs-lmdb v0.0.3/go.mod h1:Oq9zP5FMx7IomcY79neGD76YsfeMY3N7BKGDkofn/Ao= github.com/filecoin-project/go-bs-lmdb v1.0.1 h1:kAoPGgZqUQ0IowIzjdDo251X6smWuy286E6h3m03zqY= github.com/filecoin-project/go-bs-lmdb v1.0.1/go.mod h1:peFIZ9XEE9OLFkCzi7FMlr84UexqVKj6+AyxZD5SiGs= +github.com/filecoin-project/go-bs-lmdb v1.0.2 h1:cj+M3DzlcWYtNJpea8AqeU2SKz2+eTr+1N1GIUU0iBQ= +github.com/filecoin-project/go-bs-lmdb v1.0.2/go.mod h1:peFIZ9XEE9OLFkCzi7FMlr84UexqVKj6+AyxZD5SiGs= github.com/filecoin-project/go-cbor-util v0.0.0-20191219014500-08c40a1e63a2 h1:av5fw6wmm58FYMgJeoB/lK9XXrgdugYiTqkdxjTy9k8= github.com/filecoin-project/go-cbor-util v0.0.0-20191219014500-08c40a1e63a2/go.mod h1:pqTiPHobNkOVM5thSRsHYjyQfq7O5QSCMhvuu9JoDlg= github.com/filecoin-project/go-commp-utils v0.0.0-20201119054358-b88f7a96a434 h1:0kHszkYP3hgApcjl5x4rpwONhN9+j7XDobf6at5XfHs= From 2080e467bad8c0a33bf94a057d049b25ef6e3c1d Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 29 Jan 2021 13:02:01 +0200 Subject: [PATCH 043/148] don't set MaxReaders for tracking store --- chain/store/splitstore/snoop.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/chain/store/splitstore/snoop.go b/chain/store/splitstore/snoop.go index 731014fb6..dab1794cf 100644 --- a/chain/store/splitstore/snoop.go +++ b/chain/store/splitstore/snoop.go @@ -39,9 +39,9 @@ func NewTrackingStore(path string) (TrackingStore, error) { if err = env.SetMaxDBs(1); err != nil { return nil, fmt.Errorf("failed to set LMDB max dbs: %w", err) } - if err = env.SetMaxReaders(2); err != nil { - return nil, fmt.Errorf("failed to set LMDB max readers: %w", err) - } + // if err = env.SetMaxReaders(2); err != nil { + // return nil, fmt.Errorf("failed to set LMDB max readers: %w", err) + // } if st, err := os.Stat(path); os.IsNotExist(err) { if err := os.MkdirAll(path, 0777); err != nil { From c89ab1a99068c9b60c65dc933d1a3dd08e1fb6f9 Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 1 Feb 2021 14:27:20 +0200 Subject: [PATCH 044/148] retry on MDB_READERS_FULL errors --- chain/store/splitstore/lmdb_util.go | 20 +++++ chain/store/splitstore/snoop.go | 109 ++++++++++++++++------------ 2 files changed, 82 insertions(+), 47 deletions(-) create mode 100644 chain/store/splitstore/lmdb_util.go diff --git a/chain/store/splitstore/lmdb_util.go b/chain/store/splitstore/lmdb_util.go new file mode 100644 index 000000000..394fad9da --- /dev/null +++ b/chain/store/splitstore/lmdb_util.go @@ -0,0 +1,20 @@ +package splitstore + +import ( + "math/rand" + "time" + + "github.com/ledgerwatch/lmdb-go/lmdb" +) + +func withMaxReadersRetry(f func() error) error { +retry: + err := f() + if lmdb.IsErrno(err, lmdb.ReadersFull) { + dt := time.Microsecond + time.Duration(rand.Intn(int(time.Millisecond))) + time.Sleep(dt) + goto retry + } + + return err +} diff --git a/chain/store/splitstore/snoop.go b/chain/store/splitstore/snoop.go index dab1794cf..80b878ec1 100644 --- a/chain/store/splitstore/snoop.go +++ b/chain/store/splitstore/snoop.go @@ -75,77 +75,92 @@ func NewTrackingStore(path string) (TrackingStore, error) { func (s *trackingStore) Put(cid cid.Cid, epoch abi.ChainEpoch) error { val := epochToBytes(epoch) - return s.env.Update(func(txn *lmdb.Txn) error { - return txn.Put(s.db, cid.Hash(), val, 0) - }) + return withMaxReadersRetry( + func() error { + return s.env.Update(func(txn *lmdb.Txn) error { + return txn.Put(s.db, cid.Hash(), val, 0) + }) + }) } func (s *trackingStore) PutBatch(cids []cid.Cid, epoch abi.ChainEpoch) error { val := epochToBytes(epoch) - return s.env.Update(func(txn *lmdb.Txn) error { - for _, cid := range cids { - err := txn.Put(s.db, cid.Hash(), val, 0) - if err != nil { - return err - } - } + return withMaxReadersRetry( + func() error { + return s.env.Update(func(txn *lmdb.Txn) error { + for _, cid := range cids { + err := txn.Put(s.db, cid.Hash(), val, 0) + if err != nil { + return err + } + } - return nil - }) + return nil + }) + }) } func (s *trackingStore) Get(cid cid.Cid) (epoch abi.ChainEpoch, err error) { - err = s.env.View(func(txn *lmdb.Txn) error { - txn.RawRead = true + err = withMaxReadersRetry( + func() error { + return s.env.View(func(txn *lmdb.Txn) error { + txn.RawRead = true - val, err := txn.Get(s.db, cid.Hash()) - if err != nil { - return err - } + val, err := txn.Get(s.db, cid.Hash()) + if err != nil { + return err + } - epoch = bytesToEpoch(val) - return nil - }) + epoch = bytesToEpoch(val) + return nil + }) + }) return } func (s *trackingStore) Delete(cid cid.Cid) error { - return s.env.Update(func(txn *lmdb.Txn) error { - return txn.Del(s.db, cid.Hash(), nil) - }) + return withMaxReadersRetry( + func() error { + return s.env.Update(func(txn *lmdb.Txn) error { + return txn.Del(s.db, cid.Hash(), nil) + }) + }) } func (s *trackingStore) Keys(ctx context.Context) (<-chan cid.Cid, error) { ch := make(chan cid.Cid) go func() { - err := s.env.View(func(txn *lmdb.Txn) error { - defer close(ch) + err := withMaxReadersRetry( + func() error { + return s.env.View(func(txn *lmdb.Txn) error { + defer close(ch) - txn.RawRead = true - cur, err := txn.OpenCursor(s.db) - if err != nil { - return err - } - defer cur.Close() - - for { - k, _, err := cur.Get(nil, nil, lmdb.Next) - if err != nil { - if lmdb.IsNotFound(err) { - return nil + txn.RawRead = true + cur, err := txn.OpenCursor(s.db) + if err != nil { + return err } + defer cur.Close() - return err - } + for { + k, _, err := cur.Get(nil, nil, lmdb.Next) + if err != nil { + if lmdb.IsNotFound(err) { + return nil + } - select { - case ch <- cid.NewCidV1(cid.Raw, k): - case <-ctx.Done(): - return nil - } - } - }) + return err + } + + select { + case ch <- cid.NewCidV1(cid.Raw, k): + case <-ctx.Done(): + return nil + } + } + }) + }) if err != nil { log.Errorf("error iterating over tracking store keys: %s", err) From b9f8a3d587795865bc0e5dc2a106899c43d7f9b1 Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 1 Feb 2021 14:31:31 +0200 Subject: [PATCH 045/148] log MDB_READERS_FULL retries --- chain/store/splitstore/lmdb_util.go | 1 + 1 file changed, 1 insertion(+) diff --git a/chain/store/splitstore/lmdb_util.go b/chain/store/splitstore/lmdb_util.go index 394fad9da..2f0cb706c 100644 --- a/chain/store/splitstore/lmdb_util.go +++ b/chain/store/splitstore/lmdb_util.go @@ -12,6 +12,7 @@ retry: err := f() if lmdb.IsErrno(err, lmdb.ReadersFull) { dt := time.Microsecond + time.Duration(rand.Intn(int(time.Millisecond))) + log.Debugf("MDB_READERS_FULL; retrying operation in %s", dt) time.Sleep(dt) goto retry } From d91b60df91eff9e2dd533ad67517facad89feb40 Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 1 Feb 2021 18:39:50 +0200 Subject: [PATCH 046/148] fix potential panic with max readers retry and cursor channel --- chain/store/splitstore/snoop.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/chain/store/splitstore/snoop.go b/chain/store/splitstore/snoop.go index 80b878ec1..edf0ca63f 100644 --- a/chain/store/splitstore/snoop.go +++ b/chain/store/splitstore/snoop.go @@ -131,10 +131,11 @@ func (s *trackingStore) Delete(cid cid.Cid) error { func (s *trackingStore) Keys(ctx context.Context) (<-chan cid.Cid, error) { ch := make(chan cid.Cid) go func() { + defer close(ch) + err := withMaxReadersRetry( func() error { return s.env.View(func(txn *lmdb.Txn) error { - defer close(ch) txn.RawRead = true cur, err := txn.OpenCursor(s.db) From ea05fd9d865556a2afadd74ba694ef3f17b3d020 Mon Sep 17 00:00:00 2001 From: vyzo Date: Wed, 10 Feb 2021 19:14:23 +0200 Subject: [PATCH 047/148] use xerrors instead of fmt.Errorf --- chain/store/splitstore/liveset.go | 19 ++++++++++--------- chain/store/splitstore/snoop.go | 19 ++++++++++--------- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/chain/store/splitstore/liveset.go b/chain/store/splitstore/liveset.go index f4d06129e..3fea285d0 100644 --- a/chain/store/splitstore/liveset.go +++ b/chain/store/splitstore/liveset.go @@ -1,9 +1,10 @@ package splitstore import ( - "fmt" "os" + "golang.org/x/xerrors" + "github.com/ledgerwatch/lmdb-go/lmdb" cid "github.com/ipfs/go-cid" @@ -27,31 +28,31 @@ var markBytes = []byte{} func NewLiveSetEnv(path string) (*lmdb.Env, error) { env, err := lmdb.NewEnv() if err != nil { - return nil, fmt.Errorf("failed to initialize LDMB env: %w", err) + return nil, xerrors.Errorf("failed to initialize LDMB env: %w", err) } if err = env.SetMapSize(LiveSetMapSize); err != nil { - return nil, fmt.Errorf("failed to set LMDB map size: %w", err) + return nil, xerrors.Errorf("failed to set LMDB map size: %w", err) } if err = env.SetMaxDBs(2); err != nil { - return nil, fmt.Errorf("failed to set LMDB max dbs: %w", err) + return nil, xerrors.Errorf("failed to set LMDB max dbs: %w", err) } // if err = env.SetMaxReaders(1); err != nil { - // return nil, fmt.Errorf("failed to set LMDB max readers: %w", err) + // return nil, xerrors.Errorf("failed to set LMDB max readers: %w", err) // } if st, err := os.Stat(path); os.IsNotExist(err) { if err := os.MkdirAll(path, 0777); err != nil { - return nil, fmt.Errorf("failed to create LMDB data directory at %s: %w", path, err) + return nil, xerrors.Errorf("failed to create LMDB data directory at %s: %w", path, err) } } else if err != nil { - return nil, fmt.Errorf("failed to stat LMDB data dir: %w", err) + return nil, xerrors.Errorf("failed to stat LMDB data dir: %w", err) } else if !st.IsDir() { - return nil, fmt.Errorf("LMDB path is not a directory %s", path) + return nil, xerrors.Errorf("LMDB path is not a directory %s", path) } err = env.Open(path, lmdb.NoSync|lmdb.WriteMap|lmdb.MapAsync|lmdb.NoReadahead, 0777) if err != nil { env.Close() //nolint:errcheck - return nil, fmt.Errorf("error opening LMDB database: %w", err) + return nil, xerrors.Errorf("error opening LMDB database: %w", err) } return env, nil diff --git a/chain/store/splitstore/snoop.go b/chain/store/splitstore/snoop.go index edf0ca63f..7b07b7e21 100644 --- a/chain/store/splitstore/snoop.go +++ b/chain/store/splitstore/snoop.go @@ -2,9 +2,10 @@ package splitstore import ( "context" - "fmt" "os" + "golang.org/x/xerrors" + "github.com/ledgerwatch/lmdb-go/lmdb" cid "github.com/ipfs/go-cid" @@ -31,32 +32,32 @@ type trackingStore struct { func NewTrackingStore(path string) (TrackingStore, error) { env, err := lmdb.NewEnv() if err != nil { - return nil, fmt.Errorf("failed to initialize LMDB env: %w", err) + return nil, xerrors.Errorf("failed to initialize LMDB env: %w", err) } if err = env.SetMapSize(TrackingStoreMapSize); err != nil { - return nil, fmt.Errorf("failed to set LMDB map size: %w", err) + return nil, xerrors.Errorf("failed to set LMDB map size: %w", err) } if err = env.SetMaxDBs(1); err != nil { - return nil, fmt.Errorf("failed to set LMDB max dbs: %w", err) + return nil, xerrors.Errorf("failed to set LMDB max dbs: %w", err) } // if err = env.SetMaxReaders(2); err != nil { - // return nil, fmt.Errorf("failed to set LMDB max readers: %w", err) + // return nil, xerrors.Errorf("failed to set LMDB max readers: %w", err) // } if st, err := os.Stat(path); os.IsNotExist(err) { if err := os.MkdirAll(path, 0777); err != nil { - return nil, fmt.Errorf("failed to create LMDB data directory at %s: %w", path, err) + return nil, xerrors.Errorf("failed to create LMDB data directory at %s: %w", path, err) } } else if err != nil { - return nil, fmt.Errorf("failed to stat LMDB data dir: %w", err) + return nil, xerrors.Errorf("failed to stat LMDB data dir: %w", err) } else if !st.IsDir() { - return nil, fmt.Errorf("LMDB path is not a directory %s", path) + return nil, xerrors.Errorf("LMDB path is not a directory %s", path) } err = env.Open(path, lmdb.NoSync|lmdb.WriteMap|lmdb.MapAsync|lmdb.NoReadahead, 0777) if err != nil { env.Close() //nolint:errcheck - return nil, fmt.Errorf("error opening LMDB database: %w", err) + return nil, xerrors.Errorf("error opening LMDB database: %w", err) } s := new(trackingStore) From cdf5bd0500a4f99f35b0cfd7b10fe7cdf7599d39 Mon Sep 17 00:00:00 2001 From: vyzo Date: Wed, 10 Feb 2021 19:21:42 +0200 Subject: [PATCH 048/148] return annotated xerrors where appropriate --- chain/store/splitstore/lmdb_util.go | 10 ++++++++-- chain/store/splitstore/snoop.go | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/chain/store/splitstore/lmdb_util.go b/chain/store/splitstore/lmdb_util.go index 2f0cb706c..91920bca3 100644 --- a/chain/store/splitstore/lmdb_util.go +++ b/chain/store/splitstore/lmdb_util.go @@ -4,18 +4,24 @@ import ( "math/rand" "time" + "golang.org/x/xerrors" + "github.com/ledgerwatch/lmdb-go/lmdb" ) func withMaxReadersRetry(f func() error) error { retry: err := f() - if lmdb.IsErrno(err, lmdb.ReadersFull) { + if err != nil && lmdb.IsErrno(err, lmdb.ReadersFull) { dt := time.Microsecond + time.Duration(rand.Intn(int(time.Millisecond))) log.Debugf("MDB_READERS_FULL; retrying operation in %s", dt) time.Sleep(dt) goto retry } - return err + if err != nil { + return xerrors.Errorf("error performing lmdb operation: %w", err) + } + + return nil } diff --git a/chain/store/splitstore/snoop.go b/chain/store/splitstore/snoop.go index 7b07b7e21..f823a45ed 100644 --- a/chain/store/splitstore/snoop.go +++ b/chain/store/splitstore/snoop.go @@ -68,7 +68,7 @@ func NewTrackingStore(path string) (TrackingStore, error) { }) if err != nil { - return nil, err + return nil, xerrors.Errorf("error creating tracking store: %w", err) } return s, nil From 69a88d41b636ae5ce3039d5adfab3c46d5dc93fc Mon Sep 17 00:00:00 2001 From: vyzo Date: Wed, 10 Feb 2021 19:35:34 +0200 Subject: [PATCH 049/148] fix snoop test --- chain/store/splitstore/snoop_test.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/chain/store/splitstore/snoop_test.go b/chain/store/splitstore/snoop_test.go index 7e60d03d9..816fdbdb7 100644 --- a/chain/store/splitstore/snoop_test.go +++ b/chain/store/splitstore/snoop_test.go @@ -4,6 +4,8 @@ import ( "context" "testing" + "golang.org/x/xerrors" + "github.com/ledgerwatch/lmdb-go/lmdb" cid "github.com/ipfs/go-cid" @@ -35,7 +37,11 @@ func TestTrackingStore(t *testing.T) { mustNotHave := func(s TrackingStore, cid cid.Cid) { _, err := s.Get(cid) - if !lmdb.IsNotFound(err) { + xerr := xerrors.Unwrap(err) + if xerr == nil { + xerr = err + } + if !lmdb.IsNotFound(xerr) { t.Fatal("expected key not found") } } From ca8a673b5fd2a051faefa3a1e21c440f3554ad5f Mon Sep 17 00:00:00 2001 From: vyzo Date: Thu, 11 Feb 2021 19:15:59 +0200 Subject: [PATCH 050/148] adjust hot store options --- chain/store/splitstore/splitstore.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index 474ae2a50..f68fb5fed 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -62,10 +62,10 @@ func NewSplitStore(path string, ds dstore.Datastore, cold bstore.Blockstore) (*S path = filepath.Join(path, "hot.db") hot, err := lmdbbs.Open(&lmdbbs.Options{ Path: path, - InitialMmapSize: 256 << 20, // 256MiB. - MmapGrowthStepFactor: 1.25, // scale slower than the default of 1.5 - MmapGrowthStepMax: 512 << 20, // 512MiB. - MaxReaders: 32, + InitialMmapSize: 1 << 30, // 1GiB. + MmapGrowthStepFactor: 1.25, // scale slower than the default of 1.5 + MmapGrowthStepMax: 1 << 32, // 4GiB + MaxReaders: 192, }) if err != nil { return nil, err From 874ecd3573fcba3f7cb012a650a0a3e4b3d0ca45 Mon Sep 17 00:00:00 2001 From: vyzo Date: Thu, 11 Feb 2021 19:24:39 +0200 Subject: [PATCH 051/148] adjust hot store options, redux. --- chain/store/splitstore/splitstore.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index f68fb5fed..170520b56 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -62,10 +62,9 @@ func NewSplitStore(path string, ds dstore.Datastore, cold bstore.Blockstore) (*S path = filepath.Join(path, "hot.db") hot, err := lmdbbs.Open(&lmdbbs.Options{ Path: path, - InitialMmapSize: 1 << 30, // 1GiB. + InitialMmapSize: 4 << 30, // 4GiB. MmapGrowthStepFactor: 1.25, // scale slower than the default of 1.5 - MmapGrowthStepMax: 1 << 32, // 4GiB - MaxReaders: 192, + MmapGrowthStepMax: 4 << 30, // 4GiB }) if err != nil { return nil, err From 723e48b3996ddb03b03d0f22a72cef1c770930e7 Mon Sep 17 00:00:00 2001 From: vyzo Date: Thu, 11 Feb 2021 19:33:31 +0200 Subject: [PATCH 052/148] gomod:update go-bs-lmdb to v1.0.3 --- go.mod | 2 +- go.sum | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/go.mod b/go.mod index eaa743053..05ede3b12 100644 --- a/go.mod +++ b/go.mod @@ -28,7 +28,7 @@ require ( github.com/filecoin-project/go-address v0.0.5 github.com/filecoin-project/go-amt-ipld/v2 v2.1.1-0.20201006184820-924ee87a1349 // indirect github.com/filecoin-project/go-bitfield v0.2.4 - github.com/filecoin-project/go-bs-lmdb v1.0.2 + github.com/filecoin-project/go-bs-lmdb v1.0.3 github.com/filecoin-project/go-cbor-util v0.0.0-20191219014500-08c40a1e63a2 github.com/filecoin-project/go-commp-utils v0.0.0-20201119054358-b88f7a96a434 github.com/filecoin-project/go-crypto v0.0.0-20191218222705-effae4ea9f03 diff --git a/go.sum b/go.sum index f92585251..1c5e6fce4 100644 --- a/go.sum +++ b/go.sum @@ -256,6 +256,10 @@ github.com/filecoin-project/go-bs-lmdb v1.0.1 h1:kAoPGgZqUQ0IowIzjdDo251X6smWuy2 github.com/filecoin-project/go-bs-lmdb v1.0.1/go.mod h1:peFIZ9XEE9OLFkCzi7FMlr84UexqVKj6+AyxZD5SiGs= github.com/filecoin-project/go-bs-lmdb v1.0.2 h1:cj+M3DzlcWYtNJpea8AqeU2SKz2+eTr+1N1GIUU0iBQ= github.com/filecoin-project/go-bs-lmdb v1.0.2/go.mod h1:peFIZ9XEE9OLFkCzi7FMlr84UexqVKj6+AyxZD5SiGs= +github.com/filecoin-project/go-bitfield v0.2.3 h1:pedK/7maYF06Z+BYJf2OeFFqIDEh6SP6mIOlLFpYXGs= +github.com/filecoin-project/go-bitfield v0.2.3/go.mod h1:CNl9WG8hgR5mttCnUErjcQjGvuiZjRqK9rHVBsQF4oM= +github.com/filecoin-project/go-bs-lmdb v1.0.3 h1:QRf/yMw5hFjqMIpi9mi/Hkh4qberUI++56XAdB0VgwM= +github.com/filecoin-project/go-bs-lmdb v1.0.3/go.mod h1:peFIZ9XEE9OLFkCzi7FMlr84UexqVKj6+AyxZD5SiGs= github.com/filecoin-project/go-cbor-util v0.0.0-20191219014500-08c40a1e63a2 h1:av5fw6wmm58FYMgJeoB/lK9XXrgdugYiTqkdxjTy9k8= github.com/filecoin-project/go-cbor-util v0.0.0-20191219014500-08c40a1e63a2/go.mod h1:pqTiPHobNkOVM5thSRsHYjyQfq7O5QSCMhvuu9JoDlg= github.com/filecoin-project/go-commp-utils v0.0.0-20201119054358-b88f7a96a434 h1:0kHszkYP3hgApcjl5x4rpwONhN9+j7XDobf6at5XfHs= From 95befa1e415e16fc5a3e265658bd8d0e32fe1946 Mon Sep 17 00:00:00 2001 From: vyzo Date: Thu, 11 Feb 2021 19:35:07 +0200 Subject: [PATCH 053/148] set lmdb max readers retry delay to 1ms --- chain/store/splitstore/splitstore.go | 1 + 1 file changed, 1 insertion(+) diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index 170520b56..8edb8993e 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -65,6 +65,7 @@ func NewSplitStore(path string, ds dstore.Datastore, cold bstore.Blockstore) (*S InitialMmapSize: 4 << 30, // 4GiB. MmapGrowthStepFactor: 1.25, // scale slower than the default of 1.5 MmapGrowthStepMax: 4 << 30, // 4GiB + RetryDelay: time.Millisecond, }) if err != nil { return nil, err From f6c930d0aa53c3df36ce4f8d5d9dafb9a542917e Mon Sep 17 00:00:00 2001 From: vyzo Date: Sat, 13 Feb 2021 16:54:19 +0200 Subject: [PATCH 054/148] crank up blockstore max readers to 16K, reduce retry delays to 10us --- chain/store/splitstore/lmdb_util.go | 2 +- chain/store/splitstore/splitstore.go | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/chain/store/splitstore/lmdb_util.go b/chain/store/splitstore/lmdb_util.go index 91920bca3..5aff2ed0d 100644 --- a/chain/store/splitstore/lmdb_util.go +++ b/chain/store/splitstore/lmdb_util.go @@ -13,7 +13,7 @@ func withMaxReadersRetry(f func() error) error { retry: err := f() if err != nil && lmdb.IsErrno(err, lmdb.ReadersFull) { - dt := time.Microsecond + time.Duration(rand.Intn(int(time.Millisecond))) + dt := time.Microsecond + time.Duration(rand.Intn(int(10*time.Microsecond))) log.Debugf("MDB_READERS_FULL; retrying operation in %s", dt) time.Sleep(dt) goto retry diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index 8edb8993e..19f2c6f12 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -65,7 +65,8 @@ func NewSplitStore(path string, ds dstore.Datastore, cold bstore.Blockstore) (*S InitialMmapSize: 4 << 30, // 4GiB. MmapGrowthStepFactor: 1.25, // scale slower than the default of 1.5 MmapGrowthStepMax: 4 << 30, // 4GiB - RetryDelay: time.Millisecond, + RetryDelay: 10 * time.Microsecond, + MaxReaders: 16384, }) if err != nil { return nil, err From 7044e623f9c28d761e23d5323d8c213fe2533c17 Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 26 Feb 2021 12:47:27 +0200 Subject: [PATCH 055/148] flag to enable GC during compaction, disabled for now --- chain/store/splitstore/splitstore.go | 34 ++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index 19f2c6f12..d1edd9715 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -38,6 +38,8 @@ func init() { type SplitStore struct { compacting int32 + enableGC bool // TODO disabled for now, as it causes panics + baseEpoch abi.ChainEpoch mx sync.Mutex @@ -431,8 +433,28 @@ func (s *SplitStore) compact() { panic(err) } - if mark { - // the object is reachable in the cold range, move it to the cold store + if s.enableGC { + if mark { + // the object is reachable in the cold range, move it to the cold store + blk, err := s.hot.Get(cid) + if err != nil { + // TODO do something better here + panic(err) + } + + err = s.cold.Put(blk) + if err != nil { + // TODO do something better here + panic(err) + } + + stCold++ + } else { + // the object will be deleted + stDead++ + } + } else { + // if GC is disabled, we move both cold and dead objects to the coldstore blk, err := s.hot.Get(cid) if err != nil { // TODO do something better here @@ -445,9 +467,11 @@ func (s *SplitStore) compact() { panic(err) } - stCold++ - } else { - stDead++ + if mark { + stCold++ + } else { + stDead++ + } } // delete the object from the hotstore From a586d42c3b7396a0bc704f683d1471d66ff7497b Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 26 Feb 2021 15:45:30 +0200 Subject: [PATCH 056/148] make hot store DI injectable in the split store, default to badger. --- chain/store/splitstore/splitstore.go | 19 +---- node/builder.go | 40 ++++++++- node/config/def.go | 12 ++- node/modules/blockstore.go | 119 +++++++++++++++++---------- node/modules/chain.go | 8 +- node/modules/dtypes/storage.go | 6 ++ node/repo/interface.go | 1 + 7 files changed, 133 insertions(+), 72 deletions(-) diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index d1edd9715..cd6390cae 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -16,7 +16,6 @@ import ( dstore "github.com/ipfs/go-datastore" logging "github.com/ipfs/go-log/v2" - lmdbbs "github.com/filecoin-project/go-bs-lmdb" "github.com/filecoin-project/go-state-types/abi" "github.com/filecoin-project/lotus/build" "github.com/filecoin-project/lotus/chain/store" @@ -59,32 +58,16 @@ var _ bstore.Blockstore = (*SplitStore)(nil) // NewSplitStore creates a new SplitStore instance, given a path for the hotstore dbs and a cold // blockstore. The SplitStore must be attached to the ChainStore with Start in order to trigger // compaction. -func NewSplitStore(path string, ds dstore.Datastore, cold bstore.Blockstore) (*SplitStore, error) { - // the hot store - path = filepath.Join(path, "hot.db") - hot, err := lmdbbs.Open(&lmdbbs.Options{ - Path: path, - InitialMmapSize: 4 << 30, // 4GiB. - MmapGrowthStepFactor: 1.25, // scale slower than the default of 1.5 - MmapGrowthStepMax: 4 << 30, // 4GiB - RetryDelay: 10 * time.Microsecond, - MaxReaders: 16384, - }) - if err != nil { - return nil, err - } - +func NewSplitStore(path string, ds dstore.Datastore, cold, hot bstore.Blockstore) (*SplitStore, error) { // the tracking store snoop, err := NewTrackingStore(filepath.Join(path, "snoop.db")) if err != nil { - hot.Close() //nolint:errcheck return nil, err } // the liveset env env, err := NewLiveSetEnv(filepath.Join(path, "sweep.db")) if err != nil { - hot.Close() //nolint:errcheck snoop.Close() //nolint:errcheck return nil, err } diff --git a/node/builder.go b/node/builder.go index f4aebd429..d4569a402 100644 --- a/node/builder.go +++ b/node/builder.go @@ -586,15 +586,47 @@ func Repo(r repo.Repo) Option { return err } + var cfg *config.Blockstore + switch settings.nodeType { + case repo.FullNode: + cfgp, ok := c.(*config.FullNode) + if !ok { + return xerrors.Errorf("invalid config from repo, got: %T", c) + } + cfg = &cfgp.Blockstore + case repo.StorageMiner: + cfgp, ok := c.(*config.StorageMiner) + if !ok { + return xerrors.Errorf("invalid config from repo, got: %T", c) + } + cfg = &cfgp.Blockstore + default: + cfg = &config.Blockstore{} + } + return Options( Override(new(repo.LockedRepo), modules.LockedRepo(lr)), // module handles closing Override(new(dtypes.MetadataDS), modules.Datastore), Override(new(dtypes.UniversalBlockstore), modules.UniversalBlockstore), - Override(new(dtypes.SplitBlockstore), modules.SplitBlockstore), - Override(new(dtypes.ChainBlockstore), modules.ChainBlockstore), - Override(new(dtypes.StateBlockstore), modules.StateBlockstore), - Override(new(dtypes.ExposedBlockstore), From(new(dtypes.UniversalBlockstore))), + + If(cfg.Splitstore, + If(cfg.UseLMDB, + Override(new(dtypes.HotBlockstore), modules.LMDBHotBlockstore)), + If(!cfg.UseLMDB, + Override(new(dtypes.HotBlockstore), modules.BadgerHotBlockstore)), + Override(new(dtypes.SplitBlockstore), modules.SplitBlockstore), + Override(new(dtypes.ChainBlockstore), modules.ChainSplitBlockstore), + Override(new(dtypes.StateBlockstore), modules.StateSplitBlockstore), + Override(new(dtypes.BaseBlockstore), From(new(dtypes.SplitBlockstore))), + Override(new(dtypes.ExposedBlockstore), From(new(dtypes.SplitBlockstore))), + ), + If(!cfg.Splitstore, + Override(new(dtypes.ChainBlockstore), modules.ChainFlatBlockstore), + Override(new(dtypes.StateBlockstore), modules.StateFlatBlockstore), + Override(new(dtypes.BaseBlockstore), From(new(dtypes.UniversalBlockstore))), + Override(new(dtypes.ExposedBlockstore), From(new(dtypes.UniversalBlockstore))), + ), If(os.Getenv("LOTUS_ENABLE_CHAINSTORE_FALLBACK") == "1", Override(new(dtypes.ChainBlockstore), modules.FallbackChainBlockstore), diff --git a/node/config/def.go b/node/config/def.go index 579f123c8..56bba08ed 100644 --- a/node/config/def.go +++ b/node/config/def.go @@ -12,9 +12,10 @@ import ( // Common is common config between full node and miner type Common struct { - API API - Libp2p Libp2p - Pubsub Pubsub + API API + Libp2p Libp2p + Pubsub Pubsub + Blockstore Blockstore } // FullNode is a full node config @@ -119,6 +120,11 @@ type Pubsub struct { RemoteTracer string } +type Blockstore struct { + Splitstore bool + UseLMDB bool +} + // // Full Node type Metrics struct { diff --git a/node/modules/blockstore.go b/node/modules/blockstore.go index 160ac8fc4..57c916865 100644 --- a/node/modules/blockstore.go +++ b/node/modules/blockstore.go @@ -3,7 +3,12 @@ package modules import ( "context" "io" + "os" + "path/filepath" + "time" + lmdbbs "github.com/filecoin-project/go-bs-lmdb" + badgerbs "github.com/filecoin-project/lotus/lib/blockstore/badger" bstore "github.com/ipfs/go-ipfs-blockstore" "go.uber.org/fx" "golang.org/x/xerrors" @@ -32,13 +37,71 @@ func UniversalBlockstore(lc fx.Lifecycle, mctx helpers.MetricsCtx, r repo.Locked return bs, err } -func SplitBlockstore(lc fx.Lifecycle, r repo.LockedRepo, ds dtypes.MetadataDS, bs dtypes.UniversalBlockstore) (dtypes.SplitBlockstore, error) { +func LMDBHotBlockstore(lc fx.Lifecycle, r repo.LockedRepo) (dtypes.HotBlockstore, error) { path, err := r.SplitstorePath() if err != nil { return nil, err } - ss, err := splitstore.NewSplitStore(path, ds, bs) + path = filepath.Join(path, "hot.db") + bs, err := lmdbbs.Open(&lmdbbs.Options{ + Path: path, + InitialMmapSize: 4 << 30, // 4GiB. + MmapGrowthStepFactor: 1.25, // scale slower than the default of 1.5 + MmapGrowthStepMax: 4 << 30, // 4GiB + RetryDelay: 10 * time.Microsecond, + MaxReaders: 1024, + }) + if err != nil { + return nil, err + } + + lc.Append(fx.Hook{ + OnStop: func(_ context.Context) error { + return bs.Close() + }}) + + hot := blockstore.WrapIDStore(bs) + return hot, err +} + +func BadgerHotBlockstore(lc fx.Lifecycle, r repo.LockedRepo) (dtypes.HotBlockstore, error) { + path, err := r.SplitstorePath() + if err != nil { + return nil, err + } + + path = filepath.Join(path, "hot.bs") + if err := os.MkdirAll(path, 0755); err != nil { + return nil, err + } + + opts, err := repo.BadgerBlockstoreOptions(repo.HotBlockstore, path, r.Readonly()) + if err != nil { + return nil, err + } + + bs, err := badgerbs.Open(opts) + if err != nil { + return nil, err + } + + lc.Append(fx.Hook{ + OnStop: func(_ context.Context) error { + return bs.Close() + }}) + + hot := blockstore.WrapIDStore(bs) + return hot, err +} + +func SplitBlockstore(lc fx.Lifecycle, r repo.LockedRepo, ds dtypes.MetadataDS, cold dtypes.ColdBlockstore, hot dtypes.HotBlockstore) (dtypes.SplitBlockstore, error) { + path, err := r.SplitstorePath() + if err != nil { + return nil, err + } + + ss, err := splitstore.NewSplitStore(path, ds, cold, hot) if err != nil { return nil, err } @@ -51,48 +114,20 @@ func SplitBlockstore(lc fx.Lifecycle, r repo.LockedRepo, ds dtypes.MetadataDS, b return ss, err } -// StateBlockstore returns the blockstore to use to store the state tree. -// StateBlockstore is a hook to overlay caches for state objects, or in the -// future, to segregate the universal blockstore into different physical state -// and chain stores. -func StateBlockstore(lc fx.Lifecycle, mctx helpers.MetricsCtx, bs dtypes.SplitBlockstore) (dtypes.StateBlockstore, error) { - sbs, err := blockstore.WrapFreecacheCache(helpers.LifecycleCtx(mctx, lc), bs, blockstore.FreecacheConfig{ - Name: "state", - BlockCapacity: 288 * 1024 * 1024, // 288MiB. - ExistsCapacity: 48 * 1024 * 1024, // 48MiB. - }) - if err != nil { - return nil, err - } - // this may end up double closing the underlying blockstore, but all - // blockstores should be lenient or idempotent on double-close. The native - // badger blockstore is (and unit tested). - if c, ok := bs.(io.Closer); ok { - lc.Append(closerStopHook(c)) - } - return sbs, nil +func StateFlatBlockstore(lc fx.Lifecycle, mctx helpers.MetricsCtx, bs dtypes.ColdBlockstore) (dtypes.StateBlockstore, error) { + return bs, nil } -// ChainBlockstore returns the blockstore to use for chain data (tipsets, blocks, messages). -// ChainBlockstore is a hook to overlay caches for state objects, or in the -// future, to segregate the universal blockstore into different physical state -// and chain stores. -func ChainBlockstore(lc fx.Lifecycle, mctx helpers.MetricsCtx, bs dtypes.SplitBlockstore) (dtypes.ChainBlockstore, error) { - cbs, err := blockstore.WrapFreecacheCache(helpers.LifecycleCtx(mctx, lc), bs, blockstore.FreecacheConfig{ - Name: "chain", - BlockCapacity: 64 * 1024 * 1024, // 64MiB. - ExistsCapacity: 16 * 1024, // 16MiB. - }) - if err != nil { - return nil, err - } - // this may end up double closing the underlying blockstore, but all - // blockstores should be lenient or idempotent on double-close. The native - // badger blockstore is (and unit tested). - if c, ok := bs.(io.Closer); ok { - lc.Append(closerStopHook(c)) - } - return cbs, nil +func StateSplitBlockstore(lc fx.Lifecycle, mctx helpers.MetricsCtx, bs dtypes.SplitBlockstore) (dtypes.StateBlockstore, error) { + return bs, nil +} + +func ChainFlatBlockstore(lc fx.Lifecycle, mctx helpers.MetricsCtx, bs dtypes.ColdBlockstore) (dtypes.ChainBlockstore, error) { + return bs, nil +} + +func ChainSplitBlockstore(lc fx.Lifecycle, mctx helpers.MetricsCtx, bs dtypes.SplitBlockstore) (dtypes.ChainBlockstore, error) { + return bs, nil } func FallbackChainBlockstore(cbs dtypes.ChainBlockstore) dtypes.ChainBlockstore { diff --git a/node/modules/chain.go b/node/modules/chain.go index 0108a6282..a59418688 100644 --- a/node/modules/chain.go +++ b/node/modules/chain.go @@ -73,20 +73,18 @@ func MessagePool(lc fx.Lifecycle, sm *stmgr.StateManager, ps *pubsub.PubSub, ds return mp, nil } -func ChainStore(lc fx.Lifecycle, cbs dtypes.ChainBlockstore, sbs dtypes.StateBlockstore, ds dtypes.MetadataDS, ss dtypes.SplitBlockstore, syscalls vm.SyscallBuilder, j journal.Journal) *store.ChainStore { +func ChainStore(lc fx.Lifecycle, cbs dtypes.ChainBlockstore, sbs dtypes.StateBlockstore, ds dtypes.MetadataDS, basebs dtypes.BaseBlockstore, syscalls vm.SyscallBuilder, j journal.Journal) *store.ChainStore { chain := store.NewChainStore(cbs, sbs, ds, syscalls, j) if err := chain.Load(); err != nil { log.Warnf("loading chain state from disk: %s", err) } - if ssp, ok := ss.(*splitstore.SplitStore); ok { - err := ssp.Start(chain) + if ss, ok := basebs.(*splitstore.SplitStore); ok { + err := ss.Start(chain) if err != nil { log.Errorf("error starting splitstore: %s", err) } - } else { - log.Warnf("unexpected splitstore type: %+v", ss) } lc.Append(fx.Hook{ diff --git a/node/modules/dtypes/storage.go b/node/modules/dtypes/storage.go index 4d1d957c0..216ccc1b1 100644 --- a/node/modules/dtypes/storage.go +++ b/node/modules/dtypes/storage.go @@ -27,9 +27,15 @@ type ( // UniversalBlockstore is the cold blockstore. UniversalBlockstore blockstore.Blockstore + // HotBlockstore is the Hot blockstore abstraction for the splitstore + HotBlockstore blockstore.Blockstore + // SplitBlockstore is the hot/cold blockstore that sits on top of the ColdBlockstore. SplitBlockstore blockstore.Blockstore + // BaseBlockstore is something, coz DI + BaseBlockstore blockstore.Blockstore + // ChainBlockstore is a blockstore to store chain data (tipsets, blocks, // messages). It is physically backed by the BareMonolithBlockstore, but it // has a cache on top that is specially tuned for chain data access diff --git a/node/repo/interface.go b/node/repo/interface.go index 8c24caac4..d4afbe2a0 100644 --- a/node/repo/interface.go +++ b/node/repo/interface.go @@ -23,6 +23,7 @@ const ( // well as state. In the future, they may get segregated into different // domains. UniversalBlockstore = BlockstoreDomain("universal") + HotBlockstore = BlockstoreDomain("hot") ) var ( From 842ec43c2f1ce239df0c0e2492a1eb66d98a4483 Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 26 Feb 2021 15:59:36 +0200 Subject: [PATCH 057/148] get rid of goroutine iteration in tracking store; long live ForEach --- chain/store/splitstore/snoop.go | 66 +++++++++++----------------- chain/store/splitstore/splitstore.go | 53 +++++++++------------- 2 files changed, 47 insertions(+), 72 deletions(-) diff --git a/chain/store/splitstore/snoop.go b/chain/store/splitstore/snoop.go index f823a45ed..b260c2008 100644 --- a/chain/store/splitstore/snoop.go +++ b/chain/store/splitstore/snoop.go @@ -1,7 +1,6 @@ package splitstore import ( - "context" "os" "golang.org/x/xerrors" @@ -20,7 +19,7 @@ type TrackingStore interface { PutBatch([]cid.Cid, abi.ChainEpoch) error Get(cid.Cid) (abi.ChainEpoch, error) Delete(cid.Cid) error - Keys(context.Context) (<-chan cid.Cid, error) + ForEach(func(cid.Cid, abi.ChainEpoch) error) error Close() error } @@ -40,9 +39,6 @@ func NewTrackingStore(path string) (TrackingStore, error) { if err = env.SetMaxDBs(1); err != nil { return nil, xerrors.Errorf("failed to set LMDB max dbs: %w", err) } - // if err = env.SetMaxReaders(2); err != nil { - // return nil, xerrors.Errorf("failed to set LMDB max readers: %w", err) - // } if st, err := os.Stat(path); os.IsNotExist(err) { if err := os.MkdirAll(path, 0777); err != nil { @@ -129,47 +125,37 @@ func (s *trackingStore) Delete(cid cid.Cid) error { }) } -func (s *trackingStore) Keys(ctx context.Context) (<-chan cid.Cid, error) { - ch := make(chan cid.Cid) - go func() { - defer close(ch) +func (s *trackingStore) ForEach(f func(cid.Cid, abi.ChainEpoch) error) error { + return withMaxReadersRetry( + func() error { + return s.env.View(func(txn *lmdb.Txn) error { + txn.RawRead = true + cur, err := txn.OpenCursor(s.db) + if err != nil { + return err + } + defer cur.Close() - err := withMaxReadersRetry( - func() error { - return s.env.View(func(txn *lmdb.Txn) error { + for { + k, v, err := cur.Get(nil, nil, lmdb.Next) + if err != nil { + if lmdb.IsNotFound(err) { + return nil + } - txn.RawRead = true - cur, err := txn.OpenCursor(s.db) + return err + } + + cid := cid.NewCidV1(cid.Raw, k) + epoch := bytesToEpoch(v) + + err = f(cid, epoch) if err != nil { return err } - defer cur.Close() - - for { - k, _, err := cur.Get(nil, nil, lmdb.Next) - if err != nil { - if lmdb.IsNotFound(err) { - return nil - } - - return err - } - - select { - case ch <- cid.NewCidV1(cid.Raw, k): - case <-ctx.Done(): - return nil - } - } - }) + } }) - - if err != nil { - log.Errorf("error iterating over tracking store keys: %s", err) - } - }() - - return ch, nil + }) } func (s *trackingStore) Close() error { diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index cd6390cae..4ede61fe3 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -9,6 +9,8 @@ import ( "sync/atomic" "time" + "golang.org/x/xerrors" + "github.com/ledgerwatch/lmdb-go/lmdb" blocks "github.com/ipfs/go-block-format" @@ -369,51 +371,37 @@ func (s *SplitStore) compact() { // Phase 2: sweep cold objects: // - If a cold object is reachable in the hot range, it stays in the hotstore. // - If a cold object is reachable in the cold range, it is moved to the coldstore. - // - If a cold object is unreachable, it is deleted. - ch, err := s.snoop.Keys(context.Background()) - if err != nil { - // TODO do something better here - panic(err) - } - + // - If a cold object is unreachable, it is deleted if GC is enabled, otherwise moved to the coldstore. startSweep := time.Now() log.Info("sweeping cold objects") // some stats for logging var stHot, stCold, stDead int - for cid := range ch { - wrEpoch, err := s.snoop.Get(cid) - if err != nil { - // TODO do something better here - panic(err) - } - + err = s.snoop.ForEach(func(cid cid.Cid, wrEpoch abi.ChainEpoch) error { // is the object stil hot? if wrEpoch > coldEpoch { // yes, stay in the hotstore stHot++ - continue + return nil } // the object is cold -- check whether it is reachable in the hot range mark, err := hotSet.Has(cid) if err != nil { - // TODO do something better here - panic(err) + return xerrors.Errorf("error checking live mark for %s: %w", cid, err) } if mark { // the object is reachable in the hot range, stay in the hotstore stHot++ - continue + return nil } // check whether it is reachable in the cold range mark, err = coldSet.Has(cid) if err != nil { - // TODO do something better here - panic(err) + return xerrors.Errorf("error checkiing cold set for %s: %w", cid, err) } if s.enableGC { @@ -421,14 +409,12 @@ func (s *SplitStore) compact() { // the object is reachable in the cold range, move it to the cold store blk, err := s.hot.Get(cid) if err != nil { - // TODO do something better here - panic(err) + return xerrors.Errorf("error retrieving tracked block %s from hotstore: %w ", cid, err) } err = s.cold.Put(blk) if err != nil { - // TODO do something better here - panic(err) + return xerrors.Errorf("error puting block %s to coldstore: %w", cid, err) } stCold++ @@ -440,14 +426,12 @@ func (s *SplitStore) compact() { // if GC is disabled, we move both cold and dead objects to the coldstore blk, err := s.hot.Get(cid) if err != nil { - // TODO do something better here - panic(err) + return xerrors.Errorf("error retrieving tracked block %s from hotstore: %w ", cid, err) } err = s.cold.Put(blk) if err != nil { - // TODO do something better here - panic(err) + return xerrors.Errorf("error puting block %s to coldstore: %w", cid, err) } if mark { @@ -460,16 +444,21 @@ func (s *SplitStore) compact() { // delete the object from the hotstore err = s.hot.DeleteBlock(cid) if err != nil { - // TODO do something better here - panic(err) + return xerrors.Errorf("error deleting block %s from hotstore: %w", cid, err) } // remove the snoop tracking err = s.snoop.Delete(cid) if err != nil { - // TODO do something better here - panic(err) + return xerrors.Errorf("error deleting cid %s from tracking store: %w", cid, err) } + + return nil + }) + + if err != nil { + // TODO do something better here + panic(err) } log.Infow("sweeping done", "took", time.Since(startSweep)) From d44719dbd3b0e444151fa100cfdaf8c21880f331 Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 26 Feb 2021 16:40:12 +0200 Subject: [PATCH 058/148] amend confusing comment --- chain/store/splitstore/splitstore.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index 4ede61fe3..9909b4308 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -39,7 +39,7 @@ func init() { type SplitStore struct { compacting int32 - enableGC bool // TODO disabled for now, as it causes panics + enableGC bool // TODO disabled for now, as it needs testing baseEpoch abi.ChainEpoch From 5068d51ac3742c53a96fc06e35650c932dad5d42 Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 26 Feb 2021 16:59:03 +0200 Subject: [PATCH 059/148] use CompactionCold epochs for delinating the cold epoch cliff this allows us to change the thresholds for testing. --- chain/store/splitstore/splitstore.go | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index 9909b4308..ee2734f49 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -25,7 +25,10 @@ import ( bstore "github.com/filecoin-project/lotus/lib/blockstore" ) -var CompactionThreshold = 5 * build.Finality +const ( + CompactionThreshold = 5 * build.Finality + CompactionCold = build.Finality +) var baseEpochKey = dstore.NewKey("baseEpoch") @@ -338,7 +341,7 @@ func (s *SplitStore) compact() { s.mx.Unlock() epoch := curTs.Height() - coldEpoch := s.baseEpoch + build.Finality + coldEpoch := s.baseEpoch + CompactionCold err = s.cs.WalkSnapshot(context.Background(), curTs, epoch-coldEpoch, false, false, func(cid cid.Cid) error { return hotSet.Mark(cid) @@ -356,7 +359,7 @@ func (s *SplitStore) compact() { panic(err) } - err = s.cs.WalkSnapshot(context.Background(), coldTs, build.Finality, false, false, + err = s.cs.WalkSnapshot(context.Background(), coldTs, CompactionCold, false, false, func(cid cid.Cid) error { return coldSet.Mark(cid) }) From 31268ba6855beb0fde9993143e45c03beff9d3df Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 26 Feb 2021 17:14:10 +0200 Subject: [PATCH 060/148] walk snapshot the same way snapshot exporting does; skip old msgs and receipts by default. so that we don't panic with missing blocks in non-archival nodes --- chain/store/splitstore/splitstore.go | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index ee2734f49..c2c70dc28 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -42,7 +42,9 @@ func init() { type SplitStore struct { compacting int32 - enableGC bool // TODO disabled for now, as it needs testing + enableGC bool // TODO disabled for now, as it needs testing + skipOldMsgs bool // TODO this should be false for full archival nodes + skipMsgReceipts bool // TODO this should be false for full archival nodes baseEpoch abi.ChainEpoch @@ -79,11 +81,14 @@ func NewSplitStore(path string, ds dstore.Datastore, cold, hot bstore.Blockstore // and now we can make a SplitStore ss := &SplitStore{ - ds: ds, - hot: hot, - cold: cold, - snoop: snoop, - env: env, + ds: ds, + hot: hot, + cold: cold, + snoop: snoop, + env: env, + enableGC: false, // TODO option for this + skipOldMsgs: true, // TODO option for this + skipMsgReceipts: true, // TODO option for this } return ss, nil @@ -342,7 +347,7 @@ func (s *SplitStore) compact() { epoch := curTs.Height() coldEpoch := s.baseEpoch + CompactionCold - err = s.cs.WalkSnapshot(context.Background(), curTs, epoch-coldEpoch, false, false, + err = s.cs.WalkSnapshot(context.Background(), curTs, epoch-coldEpoch, s.skipOldMsgs, s.skipMsgReceipts, func(cid cid.Cid) error { return hotSet.Mark(cid) }) @@ -359,7 +364,7 @@ func (s *SplitStore) compact() { panic(err) } - err = s.cs.WalkSnapshot(context.Background(), coldTs, CompactionCold, false, false, + err = s.cs.WalkSnapshot(context.Background(), coldTs, CompactionCold, s.skipOldMsgs, s.skipMsgReceipts, func(cid cid.Cid) error { return coldSet.Mark(cid) }) From 8e12377e698853b984de635ee7cbe67b6362c921 Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 26 Feb 2021 19:52:36 +0200 Subject: [PATCH 061/148] handle consistency edge case --- chain/store/splitstore/splitstore.go | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index c2c70dc28..71f271b82 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -417,7 +417,17 @@ func (s *SplitStore) compact() { // the object is reachable in the cold range, move it to the cold store blk, err := s.hot.Get(cid) if err != nil { - return xerrors.Errorf("error retrieving tracked block %s from hotstore: %w ", cid, err) + if err == dstore.ErrNotFound { + // this can happen if the node is killed after we have deleted the block from the hotstore + // but before we have deleted it from the snoop; just delete the snoop. + err = s.snoop.Delete(cid) + if err != nil { + return xerrors.Errorf("error deleting cid %s from tracking store: %w", cid, err) + } + return nil + } else { + return xerrors.Errorf("error retrieving tracked block %s from hotstore: %w ", cid, err) + } } err = s.cold.Put(blk) @@ -434,7 +444,17 @@ func (s *SplitStore) compact() { // if GC is disabled, we move both cold and dead objects to the coldstore blk, err := s.hot.Get(cid) if err != nil { - return xerrors.Errorf("error retrieving tracked block %s from hotstore: %w ", cid, err) + if err == dstore.ErrNotFound { + // this can happen if the node is killed after we have deleted the block from the hotstore + // but before we delete it from the snoop; just delete the snoop. + err = s.snoop.Delete(cid) + if err != nil { + return xerrors.Errorf("error deleting cid %s from tracking store: %w", cid, err) + } + return nil + } else { + return xerrors.Errorf("error retrieving tracked block %s from hotstore: %w ", cid, err) + } } err = s.cold.Put(blk) From 99c7d8e3eb8025b0e17b3ed1af5deaa5d76c549d Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 26 Feb 2021 20:53:54 +0200 Subject: [PATCH 062/148] more informative names for the hotstore directories --- node/modules/blockstore.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/node/modules/blockstore.go b/node/modules/blockstore.go index 57c916865..7b6100bd0 100644 --- a/node/modules/blockstore.go +++ b/node/modules/blockstore.go @@ -43,7 +43,7 @@ func LMDBHotBlockstore(lc fx.Lifecycle, r repo.LockedRepo) (dtypes.HotBlockstore return nil, err } - path = filepath.Join(path, "hot.db") + path = filepath.Join(path, "hot.lmdb") bs, err := lmdbbs.Open(&lmdbbs.Options{ Path: path, InitialMmapSize: 4 << 30, // 4GiB. @@ -71,7 +71,7 @@ func BadgerHotBlockstore(lc fx.Lifecycle, r repo.LockedRepo) (dtypes.HotBlocksto return nil, err } - path = filepath.Join(path, "hot.bs") + path = filepath.Join(path, "hot.badger") if err := os.MkdirAll(path, 0755); err != nil { return nil, err } From ee751f88cdee736e0ec1e89637ae1b4d1a9fa20c Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 26 Feb 2021 20:54:47 +0200 Subject: [PATCH 063/148] refactor lmdb specific snoop/liveset code into their own files paves the way for different back ends --- chain/store/splitstore/liveset.go | 97 --------------- chain/store/splitstore/liveset_lmdb.go | 104 ++++++++++++++++ chain/store/splitstore/liveset_test.go | 8 +- chain/store/splitstore/snoop.go | 151 +----------------------- chain/store/splitstore/snoop_lmdb.go | 157 +++++++++++++++++++++++++ chain/store/splitstore/snoop_test.go | 13 +- chain/store/splitstore/splitstore.go | 8 +- 7 files changed, 278 insertions(+), 260 deletions(-) create mode 100644 chain/store/splitstore/liveset_lmdb.go create mode 100644 chain/store/splitstore/snoop_lmdb.go diff --git a/chain/store/splitstore/liveset.go b/chain/store/splitstore/liveset.go index 3fea285d0..5a7c8e6f2 100644 --- a/chain/store/splitstore/liveset.go +++ b/chain/store/splitstore/liveset.go @@ -1,110 +1,13 @@ package splitstore import ( - "os" - - "golang.org/x/xerrors" - - "github.com/ledgerwatch/lmdb-go/lmdb" - cid "github.com/ipfs/go-cid" ) -var LiveSetMapSize int64 = 1 << 34 // 16G; TODO this may be a little too big, we should figure out how to gradually grow the map. - type LiveSet interface { Mark(cid.Cid) error Has(cid.Cid) (bool, error) Close() error } -type liveSet struct { - env *lmdb.Env - db lmdb.DBI -} - var markBytes = []byte{} - -func NewLiveSetEnv(path string) (*lmdb.Env, error) { - env, err := lmdb.NewEnv() - if err != nil { - return nil, xerrors.Errorf("failed to initialize LDMB env: %w", err) - } - if err = env.SetMapSize(LiveSetMapSize); err != nil { - return nil, xerrors.Errorf("failed to set LMDB map size: %w", err) - } - if err = env.SetMaxDBs(2); err != nil { - return nil, xerrors.Errorf("failed to set LMDB max dbs: %w", err) - } - // if err = env.SetMaxReaders(1); err != nil { - // return nil, xerrors.Errorf("failed to set LMDB max readers: %w", err) - // } - - if st, err := os.Stat(path); os.IsNotExist(err) { - if err := os.MkdirAll(path, 0777); err != nil { - return nil, xerrors.Errorf("failed to create LMDB data directory at %s: %w", path, err) - } - } else if err != nil { - return nil, xerrors.Errorf("failed to stat LMDB data dir: %w", err) - } else if !st.IsDir() { - return nil, xerrors.Errorf("LMDB path is not a directory %s", path) - } - err = env.Open(path, lmdb.NoSync|lmdb.WriteMap|lmdb.MapAsync|lmdb.NoReadahead, 0777) - if err != nil { - env.Close() //nolint:errcheck - return nil, xerrors.Errorf("error opening LMDB database: %w", err) - } - - return env, nil -} - -func NewLiveSet(env *lmdb.Env, name string) (LiveSet, error) { - var db lmdb.DBI - err := env.Update(func(txn *lmdb.Txn) (err error) { - db, err = txn.CreateDBI(name) - return - }) - - if err != nil { - return nil, err - } - - return &liveSet{env: env, db: db}, nil -} - -func (s *liveSet) Mark(cid cid.Cid) error { - return s.env.Update(func(txn *lmdb.Txn) error { - err := txn.Put(s.db, cid.Hash(), markBytes, 0) - if err == nil || lmdb.IsErrno(err, lmdb.KeyExist) { - return nil - } - return err - }) -} - -func (s *liveSet) Has(cid cid.Cid) (has bool, err error) { - err = s.env.View(func(txn *lmdb.Txn) error { - txn.RawRead = true - - _, err := txn.Get(s.db, cid.Hash()) - if err != nil { - if lmdb.IsNotFound(err) { - has = false - return nil - } - - return err - } - - has = true - return nil - }) - - return -} - -func (s *liveSet) Close() error { - return s.env.Update(func(txn *lmdb.Txn) error { - return txn.Drop(s.db, true) - }) -} diff --git a/chain/store/splitstore/liveset_lmdb.go b/chain/store/splitstore/liveset_lmdb.go new file mode 100644 index 000000000..3eb44dbe1 --- /dev/null +++ b/chain/store/splitstore/liveset_lmdb.go @@ -0,0 +1,104 @@ +package splitstore + +import ( + "os" + + "golang.org/x/xerrors" + + "github.com/ledgerwatch/lmdb-go/lmdb" + + cid "github.com/ipfs/go-cid" +) + +var LMDBLiveSetMapSize int64 = 1 << 34 // 16G; TODO grow the map dynamically + +type LMDBLiveSet struct { + env *lmdb.Env + db lmdb.DBI +} + +var _ LiveSet = (*LMDBLiveSet)(nil) + +func NewLMDBLiveSetEnv(path string) (*lmdb.Env, error) { + env, err := lmdb.NewEnv() + if err != nil { + return nil, xerrors.Errorf("failed to initialize LDMB env: %w", err) + } + if err = env.SetMapSize(LMDBLiveSetMapSize); err != nil { + return nil, xerrors.Errorf("failed to set LMDB map size: %w", err) + } + if err = env.SetMaxDBs(2); err != nil { + return nil, xerrors.Errorf("failed to set LMDB max dbs: %w", err) + } + // if err = env.SetMaxReaders(1); err != nil { + // return nil, xerrors.Errorf("failed to set LMDB max readers: %w", err) + // } + + if st, err := os.Stat(path); os.IsNotExist(err) { + if err := os.MkdirAll(path, 0777); err != nil { + return nil, xerrors.Errorf("failed to create LMDB data directory at %s: %w", path, err) + } + } else if err != nil { + return nil, xerrors.Errorf("failed to stat LMDB data dir: %w", err) + } else if !st.IsDir() { + return nil, xerrors.Errorf("LMDB path is not a directory %s", path) + } + err = env.Open(path, lmdb.NoSync|lmdb.WriteMap|lmdb.MapAsync|lmdb.NoReadahead, 0777) + if err != nil { + env.Close() //nolint:errcheck + return nil, xerrors.Errorf("error opening LMDB database: %w", err) + } + + return env, nil +} + +func NewLMDBLiveSet(env *lmdb.Env, name string) (*LMDBLiveSet, error) { + var db lmdb.DBI + err := env.Update(func(txn *lmdb.Txn) (err error) { + db, err = txn.CreateDBI(name) + return + }) + + if err != nil { + return nil, err + } + + return &LMDBLiveSet{env: env, db: db}, nil +} + +func (s *LMDBLiveSet) Mark(cid cid.Cid) error { + return s.env.Update(func(txn *lmdb.Txn) error { + err := txn.Put(s.db, cid.Hash(), markBytes, 0) + if err == nil || lmdb.IsErrno(err, lmdb.KeyExist) { + return nil + } + return err + }) +} + +func (s *LMDBLiveSet) Has(cid cid.Cid) (has bool, err error) { + err = s.env.View(func(txn *lmdb.Txn) error { + txn.RawRead = true + + _, err := txn.Get(s.db, cid.Hash()) + if err != nil { + if lmdb.IsNotFound(err) { + has = false + return nil + } + + return err + } + + has = true + return nil + }) + + return +} + +func (s *LMDBLiveSet) Close() error { + return s.env.Update(func(txn *lmdb.Txn) error { + return txn.Drop(s.db, true) + }) +} diff --git a/chain/store/splitstore/liveset_test.go b/chain/store/splitstore/liveset_test.go index 449b3c92f..7d03e873a 100644 --- a/chain/store/splitstore/liveset_test.go +++ b/chain/store/splitstore/liveset_test.go @@ -37,12 +37,12 @@ func TestLiveSet(t *testing.T) { t.Fatal(err) } - hotSet, err := NewLiveSet(env, "hot") + hotSet, err := NewLMDBLiveSet(env, "hot") if err != nil { t.Fatal(err) } - coldSet, err := NewLiveSet(env, "cold") + coldSet, err := NewLMDBLiveSet(env, "cold") if err != nil { t.Fatal(err) } @@ -109,12 +109,12 @@ func TestLiveSet(t *testing.T) { t.Fatal(err) } - hotSet, err = NewLiveSet(env, "hot") + hotSet, err = NewLMDBLiveSet(env, "hot") if err != nil { t.Fatal(err) } - coldSet, err = NewLiveSet(env, "cold") + coldSet, err = NewLMDBLiveSet(env, "cold") if err != nil { t.Fatal(err) } diff --git a/chain/store/splitstore/snoop.go b/chain/store/splitstore/snoop.go index b260c2008..a762be79c 100644 --- a/chain/store/splitstore/snoop.go +++ b/chain/store/splitstore/snoop.go @@ -1,19 +1,10 @@ package splitstore import ( - "os" - - "golang.org/x/xerrors" - - "github.com/ledgerwatch/lmdb-go/lmdb" - - cid "github.com/ipfs/go-cid" - "github.com/filecoin-project/go-state-types/abi" + cid "github.com/ipfs/go-cid" ) -var TrackingStoreMapSize int64 = 1 << 34 // 16G; TODO this may be a little too big, we should figure out how to gradually grow the map. - type TrackingStore interface { Put(cid.Cid, abi.ChainEpoch) error PutBatch([]cid.Cid, abi.ChainEpoch) error @@ -22,143 +13,3 @@ type TrackingStore interface { ForEach(func(cid.Cid, abi.ChainEpoch) error) error Close() error } - -type trackingStore struct { - env *lmdb.Env - db lmdb.DBI -} - -func NewTrackingStore(path string) (TrackingStore, error) { - env, err := lmdb.NewEnv() - if err != nil { - return nil, xerrors.Errorf("failed to initialize LMDB env: %w", err) - } - if err = env.SetMapSize(TrackingStoreMapSize); err != nil { - return nil, xerrors.Errorf("failed to set LMDB map size: %w", err) - } - if err = env.SetMaxDBs(1); err != nil { - return nil, xerrors.Errorf("failed to set LMDB max dbs: %w", err) - } - - if st, err := os.Stat(path); os.IsNotExist(err) { - if err := os.MkdirAll(path, 0777); err != nil { - return nil, xerrors.Errorf("failed to create LMDB data directory at %s: %w", path, err) - } - } else if err != nil { - return nil, xerrors.Errorf("failed to stat LMDB data dir: %w", err) - } else if !st.IsDir() { - return nil, xerrors.Errorf("LMDB path is not a directory %s", path) - } - - err = env.Open(path, lmdb.NoSync|lmdb.WriteMap|lmdb.MapAsync|lmdb.NoReadahead, 0777) - if err != nil { - env.Close() //nolint:errcheck - return nil, xerrors.Errorf("error opening LMDB database: %w", err) - } - - s := new(trackingStore) - s.env = env - err = env.Update(func(txn *lmdb.Txn) (err error) { - s.db, err = txn.CreateDBI("snoop") - return err - }) - - if err != nil { - return nil, xerrors.Errorf("error creating tracking store: %w", err) - } - - return s, nil -} - -func (s *trackingStore) Put(cid cid.Cid, epoch abi.ChainEpoch) error { - val := epochToBytes(epoch) - return withMaxReadersRetry( - func() error { - return s.env.Update(func(txn *lmdb.Txn) error { - return txn.Put(s.db, cid.Hash(), val, 0) - }) - }) -} - -func (s *trackingStore) PutBatch(cids []cid.Cid, epoch abi.ChainEpoch) error { - val := epochToBytes(epoch) - return withMaxReadersRetry( - func() error { - return s.env.Update(func(txn *lmdb.Txn) error { - for _, cid := range cids { - err := txn.Put(s.db, cid.Hash(), val, 0) - if err != nil { - return err - } - } - - return nil - }) - }) -} - -func (s *trackingStore) Get(cid cid.Cid) (epoch abi.ChainEpoch, err error) { - err = withMaxReadersRetry( - func() error { - return s.env.View(func(txn *lmdb.Txn) error { - txn.RawRead = true - - val, err := txn.Get(s.db, cid.Hash()) - if err != nil { - return err - } - - epoch = bytesToEpoch(val) - return nil - }) - }) - - return -} - -func (s *trackingStore) Delete(cid cid.Cid) error { - return withMaxReadersRetry( - func() error { - return s.env.Update(func(txn *lmdb.Txn) error { - return txn.Del(s.db, cid.Hash(), nil) - }) - }) -} - -func (s *trackingStore) ForEach(f func(cid.Cid, abi.ChainEpoch) error) error { - return withMaxReadersRetry( - func() error { - return s.env.View(func(txn *lmdb.Txn) error { - txn.RawRead = true - cur, err := txn.OpenCursor(s.db) - if err != nil { - return err - } - defer cur.Close() - - for { - k, v, err := cur.Get(nil, nil, lmdb.Next) - if err != nil { - if lmdb.IsNotFound(err) { - return nil - } - - return err - } - - cid := cid.NewCidV1(cid.Raw, k) - epoch := bytesToEpoch(v) - - err = f(cid, epoch) - if err != nil { - return err - } - } - }) - }) -} - -func (s *trackingStore) Close() error { - s.env.CloseDBI(s.db) - return s.env.Close() -} diff --git a/chain/store/splitstore/snoop_lmdb.go b/chain/store/splitstore/snoop_lmdb.go new file mode 100644 index 000000000..08c139f52 --- /dev/null +++ b/chain/store/splitstore/snoop_lmdb.go @@ -0,0 +1,157 @@ +package splitstore + +import ( + "os" + + "golang.org/x/xerrors" + + "github.com/ledgerwatch/lmdb-go/lmdb" + + cid "github.com/ipfs/go-cid" + + "github.com/filecoin-project/go-state-types/abi" +) + +var LMDBTrackingStoreMapSize int64 = 1 << 34 // 16G -- TODO grow the map dynamically + +type LMDBTrackingStore struct { + env *lmdb.Env + db lmdb.DBI +} + +var _ TrackingStore = (*LMDBTrackingStore)(nil) + +func NewLMDBTrackingStore(path string) (TrackingStore, error) { + env, err := lmdb.NewEnv() + if err != nil { + return nil, xerrors.Errorf("failed to initialize LMDB env: %w", err) + } + if err = env.SetMapSize(LMDBTrackingStoreMapSize); err != nil { + return nil, xerrors.Errorf("failed to set LMDB map size: %w", err) + } + if err = env.SetMaxDBs(1); err != nil { + return nil, xerrors.Errorf("failed to set LMDB max dbs: %w", err) + } + + if st, err := os.Stat(path); os.IsNotExist(err) { + if err := os.MkdirAll(path, 0777); err != nil { + return nil, xerrors.Errorf("failed to create LMDB data directory at %s: %w", path, err) + } + } else if err != nil { + return nil, xerrors.Errorf("failed to stat LMDB data dir: %w", err) + } else if !st.IsDir() { + return nil, xerrors.Errorf("LMDB path is not a directory %s", path) + } + + err = env.Open(path, lmdb.NoSync|lmdb.WriteMap|lmdb.MapAsync|lmdb.NoReadahead, 0777) + if err != nil { + env.Close() //nolint:errcheck + return nil, xerrors.Errorf("error opening LMDB database: %w", err) + } + + s := new(LMDBTrackingStore) + s.env = env + err = env.Update(func(txn *lmdb.Txn) (err error) { + s.db, err = txn.CreateDBI("snoop") + return err + }) + + if err != nil { + return nil, xerrors.Errorf("error creating tracking store: %w", err) + } + + return s, nil +} + +func (s *LMDBTrackingStore) Put(cid cid.Cid, epoch abi.ChainEpoch) error { + val := epochToBytes(epoch) + return withMaxReadersRetry( + func() error { + return s.env.Update(func(txn *lmdb.Txn) error { + return txn.Put(s.db, cid.Hash(), val, 0) + }) + }) +} + +func (s *LMDBTrackingStore) PutBatch(cids []cid.Cid, epoch abi.ChainEpoch) error { + val := epochToBytes(epoch) + return withMaxReadersRetry( + func() error { + return s.env.Update(func(txn *lmdb.Txn) error { + for _, cid := range cids { + err := txn.Put(s.db, cid.Hash(), val, 0) + if err != nil { + return err + } + } + + return nil + }) + }) +} + +func (s *LMDBTrackingStore) Get(cid cid.Cid) (epoch abi.ChainEpoch, err error) { + err = withMaxReadersRetry( + func() error { + return s.env.View(func(txn *lmdb.Txn) error { + txn.RawRead = true + + val, err := txn.Get(s.db, cid.Hash()) + if err != nil { + return err + } + + epoch = bytesToEpoch(val) + return nil + }) + }) + + return +} + +func (s *LMDBTrackingStore) Delete(cid cid.Cid) error { + return withMaxReadersRetry( + func() error { + return s.env.Update(func(txn *lmdb.Txn) error { + return txn.Del(s.db, cid.Hash(), nil) + }) + }) +} + +func (s *LMDBTrackingStore) ForEach(f func(cid.Cid, abi.ChainEpoch) error) error { + return withMaxReadersRetry( + func() error { + return s.env.View(func(txn *lmdb.Txn) error { + txn.RawRead = true + cur, err := txn.OpenCursor(s.db) + if err != nil { + return err + } + defer cur.Close() + + for { + k, v, err := cur.Get(nil, nil, lmdb.Next) + if err != nil { + if lmdb.IsNotFound(err) { + return nil + } + + return err + } + + cid := cid.NewCidV1(cid.Raw, k) + epoch := bytesToEpoch(v) + + err = f(cid, epoch) + if err != nil { + return err + } + } + }) + }) +} + +func (s *LMDBTrackingStore) Close() error { + s.env.CloseDBI(s.db) + return s.env.Close() +} diff --git a/chain/store/splitstore/snoop_test.go b/chain/store/splitstore/snoop_test.go index 816fdbdb7..225c5936b 100644 --- a/chain/store/splitstore/snoop_test.go +++ b/chain/store/splitstore/snoop_test.go @@ -1,7 +1,6 @@ package splitstore import ( - "context" "testing" "golang.org/x/xerrors" @@ -46,7 +45,7 @@ func TestTrackingStore(t *testing.T) { } } - s, err := NewTrackingStore("/tmp/snoop-test") + s, err := NewLMDBTrackingStore("/tmp/snoop-test") if err != nil { t.Fatal(err) } @@ -89,14 +88,18 @@ func TestTrackingStore(t *testing.T) { k4.String(): {}, } - ch, _ := s.Keys(context.Background()) //nolint:errcheck - for k := range ch { + err = s.ForEach(func(k cid.Cid, _ abi.ChainEpoch) error { _, ok := allKeys[k.String()] if !ok { t.Fatal("unexpected key") } delete(allKeys, k.String()) + return nil + }) + + if err != nil { + t.Fatal(err) } if len(allKeys) != 0 { @@ -109,7 +112,7 @@ func TestTrackingStore(t *testing.T) { t.Fatal(err) } - s, err = NewTrackingStore("/tmp/snoop-test") + s, err = NewLMDBTrackingStore("/tmp/snoop-test") if err != nil { t.Fatal(err) } diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index 71f271b82..82ffc1feb 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -67,13 +67,13 @@ var _ bstore.Blockstore = (*SplitStore)(nil) // compaction. func NewSplitStore(path string, ds dstore.Datastore, cold, hot bstore.Blockstore) (*SplitStore, error) { // the tracking store - snoop, err := NewTrackingStore(filepath.Join(path, "snoop.db")) + snoop, err := NewLMDBTrackingStore(filepath.Join(path, "snoop.lmdb")) if err != nil { return nil, err } // the liveset env - env, err := NewLiveSetEnv(filepath.Join(path, "sweep.db")) + env, err := NewLMDBLiveSetEnv(filepath.Join(path, "sweep.lmdb")) if err != nil { snoop.Close() //nolint:errcheck return nil, err @@ -322,14 +322,14 @@ func (s *SplitStore) HeadChange(revert, apply []*types.TipSet) error { func (s *SplitStore) compact() { // create two on disk live sets, one for marking the cold finality region // and one for marking the hot region - hotSet, err := NewLiveSet(s.env, "hot") + hotSet, err := NewLMDBLiveSet(s.env, "hot") if err != nil { // TODO do something better here panic(err) } defer hotSet.Close() //nolint:errcheck - coldSet, err := NewLiveSet(s.env, "cold") + coldSet, err := NewLMDBLiveSet(s.env, "cold") if err != nil { // TODO do something better here panic(err) From 9977f5c3ec8e12f3ea9de1b0f6ba80dbc3812b17 Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 26 Feb 2021 21:28:16 +0200 Subject: [PATCH 064/148] rewrite sweep logic to avoid doing writes/deletes nested in a read txn --- chain/store/splitstore/splitstore.go | 129 ++++++++++++++++----------- 1 file changed, 76 insertions(+), 53 deletions(-) diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index 82ffc1feb..de6ada11f 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -386,6 +386,9 @@ func (s *SplitStore) compact() { // some stats for logging var stHot, stCold, stDead int + cold := make(map[cid.Cid]struct{}) + dead := make(map[cid.Cid]struct{}) + err = s.snoop.ForEach(func(cid cid.Cid, wrEpoch abi.ChainEpoch) error { // is the object stil hot? if wrEpoch > coldEpoch { @@ -415,53 +418,16 @@ func (s *SplitStore) compact() { if s.enableGC { if mark { // the object is reachable in the cold range, move it to the cold store - blk, err := s.hot.Get(cid) - if err != nil { - if err == dstore.ErrNotFound { - // this can happen if the node is killed after we have deleted the block from the hotstore - // but before we have deleted it from the snoop; just delete the snoop. - err = s.snoop.Delete(cid) - if err != nil { - return xerrors.Errorf("error deleting cid %s from tracking store: %w", cid, err) - } - return nil - } else { - return xerrors.Errorf("error retrieving tracked block %s from hotstore: %w ", cid, err) - } - } - - err = s.cold.Put(blk) - if err != nil { - return xerrors.Errorf("error puting block %s to coldstore: %w", cid, err) - } - + cold[cid] = struct{}{} stCold++ } else { - // the object will be deleted + // the object is dead and will be deleted + dead[cid] = struct{}{} stDead++ } } else { // if GC is disabled, we move both cold and dead objects to the coldstore - blk, err := s.hot.Get(cid) - if err != nil { - if err == dstore.ErrNotFound { - // this can happen if the node is killed after we have deleted the block from the hotstore - // but before we delete it from the snoop; just delete the snoop. - err = s.snoop.Delete(cid) - if err != nil { - return xerrors.Errorf("error deleting cid %s from tracking store: %w", cid, err) - } - return nil - } else { - return xerrors.Errorf("error retrieving tracked block %s from hotstore: %w ", cid, err) - } - } - - err = s.cold.Put(blk) - if err != nil { - return xerrors.Errorf("error puting block %s to coldstore: %w", cid, err) - } - + cold[cid] = struct{}{} if mark { stCold++ } else { @@ -469,18 +435,6 @@ func (s *SplitStore) compact() { } } - // delete the object from the hotstore - err = s.hot.DeleteBlock(cid) - if err != nil { - return xerrors.Errorf("error deleting block %s from hotstore: %w", cid, err) - } - - // remove the snoop tracking - err = s.snoop.Delete(cid) - if err != nil { - return xerrors.Errorf("error deleting cid %s from tracking store: %w", cid, err) - } - return nil }) @@ -489,6 +443,75 @@ func (s *SplitStore) compact() { panic(err) } + log.Info("moving cold objects to the coldstore") + for cid := range cold { + blk, err := s.hot.Get(cid) + if err != nil { + if err == dstore.ErrNotFound { + // this can happen if the node is killed after we have deleted the block from the hotstore + // but before we have deleted it from the snoop; just delete the snoop. + err = s.snoop.Delete(cid) + if err != nil { + log.Errorf("error deleting cid %s from tracking store: %w", cid, err) + // TODO do something better here -- just continue? + panic(err) + + } + } else { + log.Errorf("error retrieving tracked block %s from hotstore: %w ", cid, err) + // TODO do something better here -- just continue? + panic(err) + } + + continue + } + + err = s.cold.Put(blk) + if err != nil { + log.Errorf("error puting block %s to coldstore: %w", cid, err) + // TODO do something better here -- just continue? + panic(err) + } + + // delete the object from the hotstore + err = s.hot.DeleteBlock(cid) + if err != nil { + log.Errorf("error deleting block %s from hotstore: %w", cid, err) + // TODO do something better here -- just continue? + panic(err) + } + + // remove the snoop tracking + err = s.snoop.Delete(cid) + if err != nil { + log.Errorf("error deleting cid %s from tracking store: %w", cid, err) + // TODO do something better here -- just continue? + panic(err) + } + } + + if len(dead) > 0 { + log.Info("deleting dead objects") + + for cid := range dead { + // delete the object from the hotstore + err = s.hot.DeleteBlock(cid) + if err != nil { + log.Errorf("error deleting block %s from hotstore: %w", cid, err) + // TODO do something better here -- just continue? + panic(err) + } + + // remove the snoop tracking + err = s.snoop.Delete(cid) + if err != nil { + log.Errorf("error deleting cid %s from tracking store: %w", cid, err) + // TODO do something better here -- just continue? + panic(err) + } + } + } + log.Infow("sweeping done", "took", time.Since(startSweep)) log.Infow("compaction stats", "hot", stHot, "cold", stCold, "dead", stDead) From e79445123fb2815e357c60c858677e56f918a2c3 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sat, 27 Feb 2021 10:03:41 +0200 Subject: [PATCH 065/148] handle MDB_KEY_EXIST in tracking store Puts --- chain/store/splitstore/snoop_lmdb.go | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/chain/store/splitstore/snoop_lmdb.go b/chain/store/splitstore/snoop_lmdb.go index 08c139f52..d89c591d6 100644 --- a/chain/store/splitstore/snoop_lmdb.go +++ b/chain/store/splitstore/snoop_lmdb.go @@ -68,7 +68,11 @@ func (s *LMDBTrackingStore) Put(cid cid.Cid, epoch abi.ChainEpoch) error { return withMaxReadersRetry( func() error { return s.env.Update(func(txn *lmdb.Txn) error { - return txn.Put(s.db, cid.Hash(), val, 0) + err := txn.Put(s.db, cid.Hash(), val, 0) + if err == nil || lmdb.IsErrno(err, lmdb.KeyExist) { + return nil + } + return err }) }) } @@ -80,9 +84,10 @@ func (s *LMDBTrackingStore) PutBatch(cids []cid.Cid, epoch abi.ChainEpoch) error return s.env.Update(func(txn *lmdb.Txn) error { for _, cid := range cids { err := txn.Put(s.db, cid.Hash(), val, 0) - if err != nil { - return err + if err == nil || lmdb.IsErrno(err, lmdb.KeyExist) { + continue } + return err } return nil From 8f0ddac41a0ea5d6a4708ad12c565faf70856c72 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sat, 27 Feb 2021 10:15:43 +0200 Subject: [PATCH 066/148] add comment --- chain/store/splitstore/splitstore.go | 1 + 1 file changed, 1 insertion(+) diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index de6ada11f..9c629e46e 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -466,6 +466,7 @@ func (s *SplitStore) compact() { continue } + // put the object in the coldstore err = s.cold.Put(blk) if err != nil { log.Errorf("error puting block %s to coldstore: %w", cid, err) From 923a3db4b0762f32d4be799612912c7a381b567c Mon Sep 17 00:00:00 2001 From: vyzo Date: Sat, 27 Feb 2021 12:01:55 +0200 Subject: [PATCH 067/148] abstract tracking store and live set construction --- chain/store/splitstore/liveset.go | 16 ++++++++++++++++ chain/store/splitstore/liveset_lmdb.go | 18 ++++++++++++++++-- chain/store/splitstore/snoop.go | 11 +++++++++++ chain/store/splitstore/splitstore.go | 13 +++++++------ 4 files changed, 50 insertions(+), 8 deletions(-) diff --git a/chain/store/splitstore/liveset.go b/chain/store/splitstore/liveset.go index 5a7c8e6f2..17024bf37 100644 --- a/chain/store/splitstore/liveset.go +++ b/chain/store/splitstore/liveset.go @@ -1,6 +1,9 @@ package splitstore import ( + "fmt" + "path/filepath" + cid "github.com/ipfs/go-cid" ) @@ -11,3 +14,16 @@ type LiveSet interface { } var markBytes = []byte{} + +type LiveSetEnv interface { + NewLiveSet(name string) (LiveSet, error) + Close() error +} + +func NewLiveSetEnv(path string, useLMDB bool) (LiveSetEnv, error) { + if useLMDB { + return NewLMDBLiveSetEnv(filepath.Join(path, "sweep.lmdb")) + } + + return nil, fmt.Errorf("FIXME: non-lmdb livesets") +} diff --git a/chain/store/splitstore/liveset_lmdb.go b/chain/store/splitstore/liveset_lmdb.go index 3eb44dbe1..615605a75 100644 --- a/chain/store/splitstore/liveset_lmdb.go +++ b/chain/store/splitstore/liveset_lmdb.go @@ -12,6 +12,12 @@ import ( var LMDBLiveSetMapSize int64 = 1 << 34 // 16G; TODO grow the map dynamically +type LMDBLiveSetEnv struct { + env *lmdb.Env +} + +var _ LiveSetEnv = (*LMDBLiveSetEnv)(nil) + type LMDBLiveSet struct { env *lmdb.Env db lmdb.DBI @@ -19,7 +25,7 @@ type LMDBLiveSet struct { var _ LiveSet = (*LMDBLiveSet)(nil) -func NewLMDBLiveSetEnv(path string) (*lmdb.Env, error) { +func NewLMDBLiveSetEnv(path string) (*LMDBLiveSetEnv, error) { env, err := lmdb.NewEnv() if err != nil { return nil, xerrors.Errorf("failed to initialize LDMB env: %w", err) @@ -49,7 +55,15 @@ func NewLMDBLiveSetEnv(path string) (*lmdb.Env, error) { return nil, xerrors.Errorf("error opening LMDB database: %w", err) } - return env, nil + return &LMDBLiveSetEnv{env: env}, nil +} + +func (e *LMDBLiveSetEnv) NewLiveSet(name string) (LiveSet, error) { + return NewLMDBLiveSet(e.env, name+".lmdb") +} + +func (e *LMDBLiveSetEnv) Close() error { + return e.env.Close() } func NewLMDBLiveSet(env *lmdb.Env, name string) (*LMDBLiveSet, error) { diff --git a/chain/store/splitstore/snoop.go b/chain/store/splitstore/snoop.go index a762be79c..08faabd6c 100644 --- a/chain/store/splitstore/snoop.go +++ b/chain/store/splitstore/snoop.go @@ -1,6 +1,9 @@ package splitstore import ( + "fmt" + "path/filepath" + "github.com/filecoin-project/go-state-types/abi" cid "github.com/ipfs/go-cid" ) @@ -13,3 +16,11 @@ type TrackingStore interface { ForEach(func(cid.Cid, abi.ChainEpoch) error) error Close() error } + +func NewTrackingStore(path string, useLMDB bool) (TrackingStore, error) { + if useLMDB { + return NewLMDBTrackingStore(filepath.Join(path, "snoop.lmdb")) + } + + return nil, fmt.Errorf("TODO: non-lmdb livesets") +} diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index 9c629e46e..680ec2a5b 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -4,7 +4,6 @@ import ( "context" "encoding/binary" "errors" - "path/filepath" "sync" "sync/atomic" "time" @@ -32,6 +31,8 @@ const ( var baseEpochKey = dstore.NewKey("baseEpoch") +var UseLMDB = true // TODO snake this through DI + var log = logging.Logger("splitstore") func init() { @@ -57,7 +58,7 @@ type SplitStore struct { cold bstore.Blockstore snoop TrackingStore - env *lmdb.Env + env LiveSetEnv } var _ bstore.Blockstore = (*SplitStore)(nil) @@ -67,13 +68,13 @@ var _ bstore.Blockstore = (*SplitStore)(nil) // compaction. func NewSplitStore(path string, ds dstore.Datastore, cold, hot bstore.Blockstore) (*SplitStore, error) { // the tracking store - snoop, err := NewLMDBTrackingStore(filepath.Join(path, "snoop.lmdb")) + snoop, err := NewTrackingStore(path, UseLMDB) if err != nil { return nil, err } // the liveset env - env, err := NewLMDBLiveSetEnv(filepath.Join(path, "sweep.lmdb")) + env, err := NewLiveSetEnv(path, UseLMDB) if err != nil { snoop.Close() //nolint:errcheck return nil, err @@ -322,14 +323,14 @@ func (s *SplitStore) HeadChange(revert, apply []*types.TipSet) error { func (s *SplitStore) compact() { // create two on disk live sets, one for marking the cold finality region // and one for marking the hot region - hotSet, err := NewLMDBLiveSet(s.env, "hot") + hotSet, err := s.env.NewLiveSet("hot") if err != nil { // TODO do something better here panic(err) } defer hotSet.Close() //nolint:errcheck - coldSet, err := NewLMDBLiveSet(s.env, "cold") + coldSet, err := s.env.NewLiveSet("cold") if err != nil { // TODO do something better here panic(err) From 68b6f913c7d1002b4f3b5d44d0fc5a0f2c64e6d9 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sat, 27 Feb 2021 12:10:10 +0200 Subject: [PATCH 068/148] propagate useLMDB option to splitstore through DI --- chain/store/splitstore/liveset.go | 2 +- chain/store/splitstore/splitstore.go | 8 +++---- node/builder.go | 2 +- node/modules/blockstore.go | 33 +++++++++++++++------------- 4 files changed, 23 insertions(+), 22 deletions(-) diff --git a/chain/store/splitstore/liveset.go b/chain/store/splitstore/liveset.go index 17024bf37..31961f4d5 100644 --- a/chain/store/splitstore/liveset.go +++ b/chain/store/splitstore/liveset.go @@ -25,5 +25,5 @@ func NewLiveSetEnv(path string, useLMDB bool) (LiveSetEnv, error) { return NewLMDBLiveSetEnv(filepath.Join(path, "sweep.lmdb")) } - return nil, fmt.Errorf("FIXME: non-lmdb livesets") + return nil, fmt.Errorf("TODO: non-lmdb livesets") } diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index 680ec2a5b..a307e995c 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -31,8 +31,6 @@ const ( var baseEpochKey = dstore.NewKey("baseEpoch") -var UseLMDB = true // TODO snake this through DI - var log = logging.Logger("splitstore") func init() { @@ -66,15 +64,15 @@ var _ bstore.Blockstore = (*SplitStore)(nil) // NewSplitStore creates a new SplitStore instance, given a path for the hotstore dbs and a cold // blockstore. The SplitStore must be attached to the ChainStore with Start in order to trigger // compaction. -func NewSplitStore(path string, ds dstore.Datastore, cold, hot bstore.Blockstore) (*SplitStore, error) { +func NewSplitStore(path string, ds dstore.Datastore, cold, hot bstore.Blockstore, useLMDB bool) (*SplitStore, error) { // the tracking store - snoop, err := NewTrackingStore(path, UseLMDB) + snoop, err := NewTrackingStore(path, useLMDB) if err != nil { return nil, err } // the liveset env - env, err := NewLiveSetEnv(path, UseLMDB) + env, err := NewLiveSetEnv(path, useLMDB) if err != nil { snoop.Close() //nolint:errcheck return nil, err diff --git a/node/builder.go b/node/builder.go index d4569a402..7f13aaee0 100644 --- a/node/builder.go +++ b/node/builder.go @@ -615,7 +615,7 @@ func Repo(r repo.Repo) Option { Override(new(dtypes.HotBlockstore), modules.LMDBHotBlockstore)), If(!cfg.UseLMDB, Override(new(dtypes.HotBlockstore), modules.BadgerHotBlockstore)), - Override(new(dtypes.SplitBlockstore), modules.SplitBlockstore), + Override(new(dtypes.SplitBlockstore), modules.SplitBlockstore(cfg)), Override(new(dtypes.ChainBlockstore), modules.ChainSplitBlockstore), Override(new(dtypes.StateBlockstore), modules.StateSplitBlockstore), Override(new(dtypes.BaseBlockstore), From(new(dtypes.SplitBlockstore))), diff --git a/node/modules/blockstore.go b/node/modules/blockstore.go index 7b6100bd0..f82fe4602 100644 --- a/node/modules/blockstore.go +++ b/node/modules/blockstore.go @@ -15,6 +15,7 @@ import ( "github.com/filecoin-project/lotus/blockstore" "github.com/filecoin-project/lotus/chain/store/splitstore" + "github.com/filecoin-project/lotus/node/config" "github.com/filecoin-project/lotus/node/modules/dtypes" "github.com/filecoin-project/lotus/node/modules/helpers" "github.com/filecoin-project/lotus/node/repo" @@ -95,23 +96,25 @@ func BadgerHotBlockstore(lc fx.Lifecycle, r repo.LockedRepo) (dtypes.HotBlocksto return hot, err } -func SplitBlockstore(lc fx.Lifecycle, r repo.LockedRepo, ds dtypes.MetadataDS, cold dtypes.ColdBlockstore, hot dtypes.HotBlockstore) (dtypes.SplitBlockstore, error) { - path, err := r.SplitstorePath() - if err != nil { - return nil, err - } +func SplitBlockstore(cfg *config.Blockstore) func(lc fx.Lifecycle, r repo.LockedRepo, ds dtypes.MetadataDS, cold dtypes.ColdBlockstore, hot dtypes.HotBlockstore) (dtypes.SplitBlockstore, error) { + return func(lc fx.Lifecycle, r repo.LockedRepo, ds dtypes.MetadataDS, cold dtypes.ColdBlockstore, hot dtypes.HotBlockstore) (dtypes.SplitBlockstore, error) { + path, err := r.SplitstorePath() + if err != nil { + return nil, err + } - ss, err := splitstore.NewSplitStore(path, ds, cold, hot) - if err != nil { - return nil, err - } - lc.Append(fx.Hook{ - OnStop: func(context.Context) error { - return ss.Close() - }, - }) + ss, err := splitstore.NewSplitStore(path, ds, cold, hot, cfg.UseLMDB) + if err != nil { + return nil, err + } + lc.Append(fx.Hook{ + OnStop: func(context.Context) error { + return ss.Close() + }, + }) - return ss, err + return ss, err + } } func StateFlatBlockstore(lc fx.Lifecycle, mctx helpers.MetricsCtx, bs dtypes.ColdBlockstore) (dtypes.StateBlockstore, error) { From cb1789ea6ec2977cbc47e2b5db54b08dbe446fc1 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sat, 27 Feb 2021 12:42:29 +0200 Subject: [PATCH 069/148] gomod: use bolt --- go.mod | 1 + 1 file changed, 1 insertion(+) diff --git a/go.mod b/go.mod index 05ede3b12..d3142a530 100644 --- a/go.mod +++ b/go.mod @@ -139,6 +139,7 @@ require ( github.com/whyrusleeping/multiaddr-filter v0.0.0-20160516205228-e903e4adabd7 github.com/whyrusleeping/pubsub v0.0.0-20190708150250-92bcb0691325 github.com/xorcare/golden v0.6.1-0.20191112154924-b87f686d7542 + go.etcd.io/bbolt v1.3.4 go.opencensus.io v0.22.5 go.uber.org/dig v1.10.0 // indirect go.uber.org/fx v1.9.0 From 27a9b974db8bcc5483e37ebad494cf8a8cd7ec78 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sat, 27 Feb 2021 12:44:31 +0200 Subject: [PATCH 070/148] implement bolt-backed liveset --- chain/store/splitstore/liveset.go | 3 +- chain/store/splitstore/liveset_bolt.go | 80 ++++++++++++++++++++++++++ chain/store/splitstore/liveset_lmdb.go | 3 +- chain/store/splitstore/snoop_lmdb.go | 3 +- 4 files changed, 83 insertions(+), 6 deletions(-) create mode 100644 chain/store/splitstore/liveset_bolt.go diff --git a/chain/store/splitstore/liveset.go b/chain/store/splitstore/liveset.go index 31961f4d5..1636c6efa 100644 --- a/chain/store/splitstore/liveset.go +++ b/chain/store/splitstore/liveset.go @@ -1,7 +1,6 @@ package splitstore import ( - "fmt" "path/filepath" cid "github.com/ipfs/go-cid" @@ -25,5 +24,5 @@ func NewLiveSetEnv(path string, useLMDB bool) (LiveSetEnv, error) { return NewLMDBLiveSetEnv(filepath.Join(path, "sweep.lmdb")) } - return nil, fmt.Errorf("TODO: non-lmdb livesets") + return NewBoltLiveSetEnv(filepath.Join(path, "sweep.bolt")) } diff --git a/chain/store/splitstore/liveset_bolt.go b/chain/store/splitstore/liveset_bolt.go new file mode 100644 index 000000000..07a22b93f --- /dev/null +++ b/chain/store/splitstore/liveset_bolt.go @@ -0,0 +1,80 @@ +package splitstore + +import ( + "time" + + "golang.org/x/xerrors" + + cid "github.com/ipfs/go-cid" + bolt "go.etcd.io/bbolt" +) + +type BoltLiveSetEnv struct { + db *bolt.DB +} + +var _ LiveSetEnv = (*BoltLiveSetEnv)(nil) + +type BoltLiveSet struct { + db *bolt.DB + bucketId []byte +} + +var _ LiveSet = (*BoltLiveSet)(nil) + +func NewBoltLiveSetEnv(path string) (*BoltLiveSetEnv, error) { + db, err := bolt.Open(path, 0644, + &bolt.Options{ + Timeout: 1 * time.Second, + }) + if err != nil { + return nil, err + } + + return &BoltLiveSetEnv{db: db}, nil +} + +func (e *BoltLiveSetEnv) NewLiveSet(name string) (LiveSet, error) { + bucketId := []byte(name) + err := e.db.Update(func(tx *bolt.Tx) error { + _, err := tx.CreateBucketIfNotExists(bucketId) + if err != nil { + return xerrors.Errorf("error creating bolt db bucket %s: %w", name, err) + } + return nil + }) + + if err != nil { + return nil, err + } + + return &BoltLiveSet{db: e.db, bucketId: bucketId}, nil +} + +func (e *BoltLiveSetEnv) Close() error { + return e.db.Close() +} + +func (s *BoltLiveSet) Mark(cid cid.Cid) error { + return s.db.Update(func(tx *bolt.Tx) error { + b := tx.Bucket(s.bucketId) + return b.Put(cid.Hash(), markBytes) + }) +} + +func (s *BoltLiveSet) Has(cid cid.Cid) (result bool, err error) { + err = s.db.View(func(tx *bolt.Tx) error { + b := tx.Bucket(s.bucketId) + v := b.Get(cid.Hash()) + result = v != nil + return nil + }) + + return result, err +} + +func (s *BoltLiveSet) Close() error { + return s.db.Update(func(tx *bolt.Tx) error { + return tx.DeleteBucket(s.bucketId) + }) +} diff --git a/chain/store/splitstore/liveset_lmdb.go b/chain/store/splitstore/liveset_lmdb.go index 615605a75..f41907207 100644 --- a/chain/store/splitstore/liveset_lmdb.go +++ b/chain/store/splitstore/liveset_lmdb.go @@ -5,9 +5,8 @@ import ( "golang.org/x/xerrors" - "github.com/ledgerwatch/lmdb-go/lmdb" - cid "github.com/ipfs/go-cid" + "github.com/ledgerwatch/lmdb-go/lmdb" ) var LMDBLiveSetMapSize int64 = 1 << 34 // 16G; TODO grow the map dynamically diff --git a/chain/store/splitstore/snoop_lmdb.go b/chain/store/splitstore/snoop_lmdb.go index d89c591d6..21d596bfb 100644 --- a/chain/store/splitstore/snoop_lmdb.go +++ b/chain/store/splitstore/snoop_lmdb.go @@ -5,9 +5,8 @@ import ( "golang.org/x/xerrors" - "github.com/ledgerwatch/lmdb-go/lmdb" - cid "github.com/ipfs/go-cid" + "github.com/ledgerwatch/lmdb-go/lmdb" "github.com/filecoin-project/go-state-types/abi" ) From 2c1a9781cf5307686321e4e143bf78bf43e1c103 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sat, 27 Feb 2021 12:50:48 +0200 Subject: [PATCH 071/148] add test for bolt liveset --- chain/store/splitstore/liveset_test.go | 51 ++++++++++++-------------- 1 file changed, 23 insertions(+), 28 deletions(-) diff --git a/chain/store/splitstore/liveset_test.go b/chain/store/splitstore/liveset_test.go index 7d03e873a..e08bf1865 100644 --- a/chain/store/splitstore/liveset_test.go +++ b/chain/store/splitstore/liveset_test.go @@ -4,45 +4,40 @@ import ( "os" "testing" - "github.com/ledgerwatch/lmdb-go/lmdb" - cid "github.com/ipfs/go-cid" "github.com/multiformats/go-multihash" ) -func TestLiveSet(t *testing.T) { - env, err := lmdb.NewEnv() +func TestLMDBLiveSet(t *testing.T) { + testLiveSet(t, true) +} + +func TestBoltLiveSet(t *testing.T) { + testLiveSet(t, false) +} + +func testLiveSet(t *testing.T, useLMDB bool) { + t.Helper() + + path := "/tmp/liveset-test" + + err := os.MkdirAll(path, 0777) + if err != nil { + t.Fatal(err) + } + + env, err := NewLiveSetEnv(path, useLMDB) if err != nil { t.Fatal(err) } defer env.Close() //nolint:errcheck - if err = env.SetMapSize(1 << 30); err != nil { - t.Fatal(err) - } - if err = env.SetMaxDBs(2); err != nil { - t.Fatal(err) - } - if err = env.SetMaxReaders(1); err != nil { - t.Fatal(err) - } - - err = os.MkdirAll("/tmp/liveset-test", 0777) + hotSet, err := env.NewLiveSet("hot") if err != nil { t.Fatal(err) } - err = env.Open("/tmp/liveset-test", lmdb.NoSync|lmdb.WriteMap|lmdb.MapAsync|lmdb.NoReadahead, 0777) - if err != nil { - t.Fatal(err) - } - - hotSet, err := NewLMDBLiveSet(env, "hot") - if err != nil { - t.Fatal(err) - } - - coldSet, err := NewLMDBLiveSet(env, "cold") + coldSet, err := env.NewLiveSet("cold") if err != nil { t.Fatal(err) } @@ -109,12 +104,12 @@ func TestLiveSet(t *testing.T) { t.Fatal(err) } - hotSet, err = NewLMDBLiveSet(env, "hot") + hotSet, err = env.NewLiveSet("hot") if err != nil { t.Fatal(err) } - coldSet, err = NewLMDBLiveSet(env, "cold") + coldSet, err = env.NewLiveSet("cold") if err != nil { t.Fatal(err) } From b83994797a797104538c272c0397038c7fc7236b Mon Sep 17 00:00:00 2001 From: vyzo Date: Sat, 27 Feb 2021 13:19:31 +0200 Subject: [PATCH 072/148] separate LMDB options for hotstore and tracking stores --- node/builder.go | 4 ++-- node/config/def.go | 5 +++-- node/modules/blockstore.go | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/node/builder.go b/node/builder.go index 7f13aaee0..e0f7fcfa1 100644 --- a/node/builder.go +++ b/node/builder.go @@ -611,9 +611,9 @@ func Repo(r repo.Repo) Option { Override(new(dtypes.UniversalBlockstore), modules.UniversalBlockstore), If(cfg.Splitstore, - If(cfg.UseLMDB, + If(cfg.UseLMDBHotstore, Override(new(dtypes.HotBlockstore), modules.LMDBHotBlockstore)), - If(!cfg.UseLMDB, + If(!cfg.UseLMDBHotstore, Override(new(dtypes.HotBlockstore), modules.BadgerHotBlockstore)), Override(new(dtypes.SplitBlockstore), modules.SplitBlockstore(cfg)), Override(new(dtypes.ChainBlockstore), modules.ChainSplitBlockstore), diff --git a/node/config/def.go b/node/config/def.go index 56bba08ed..21141e279 100644 --- a/node/config/def.go +++ b/node/config/def.go @@ -121,8 +121,9 @@ type Pubsub struct { } type Blockstore struct { - Splitstore bool - UseLMDB bool + Splitstore bool + UseLMDBHotstore bool + UseLMDBTracking bool } // // Full Node diff --git a/node/modules/blockstore.go b/node/modules/blockstore.go index f82fe4602..7b3bce5e1 100644 --- a/node/modules/blockstore.go +++ b/node/modules/blockstore.go @@ -103,7 +103,7 @@ func SplitBlockstore(cfg *config.Blockstore) func(lc fx.Lifecycle, r repo.Locked return nil, err } - ss, err := splitstore.NewSplitStore(path, ds, cold, hot, cfg.UseLMDB) + ss, err := splitstore.NewSplitStore(path, ds, cold, hot, cfg.UseLMDBTracking) if err != nil { return nil, err } From f1c61c47538449bfa334ee2e5fed380021575d5f Mon Sep 17 00:00:00 2001 From: vyzo Date: Sat, 27 Feb 2021 13:35:57 +0200 Subject: [PATCH 073/148] implement bolt backed tracking store --- chain/store/splitstore/snoop.go | 3 +- chain/store/splitstore/snoop_bolt.go | 102 +++++++++++++++++++++++++++ chain/store/splitstore/snoop_lmdb.go | 2 +- 3 files changed, 104 insertions(+), 3 deletions(-) create mode 100644 chain/store/splitstore/snoop_bolt.go diff --git a/chain/store/splitstore/snoop.go b/chain/store/splitstore/snoop.go index 08faabd6c..3a2b5f43e 100644 --- a/chain/store/splitstore/snoop.go +++ b/chain/store/splitstore/snoop.go @@ -1,7 +1,6 @@ package splitstore import ( - "fmt" "path/filepath" "github.com/filecoin-project/go-state-types/abi" @@ -22,5 +21,5 @@ func NewTrackingStore(path string, useLMDB bool) (TrackingStore, error) { return NewLMDBTrackingStore(filepath.Join(path, "snoop.lmdb")) } - return nil, fmt.Errorf("TODO: non-lmdb livesets") + return NewBoltTrackingStore(filepath.Join(path, "snoop.bolt")) } diff --git a/chain/store/splitstore/snoop_bolt.go b/chain/store/splitstore/snoop_bolt.go new file mode 100644 index 000000000..015a11838 --- /dev/null +++ b/chain/store/splitstore/snoop_bolt.go @@ -0,0 +1,102 @@ +package splitstore + +import ( + "time" + + "golang.org/x/xerrors" + + cid "github.com/ipfs/go-cid" + bolt "go.etcd.io/bbolt" + + "github.com/filecoin-project/go-state-types/abi" +) + +type BoltTrackingStore struct { + db *bolt.DB + bucketId []byte +} + +var _ TrackingStore = (*BoltTrackingStore)(nil) + +func NewBoltTrackingStore(path string) (*BoltTrackingStore, error) { + db, err := bolt.Open(path, 0644, + &bolt.Options{ + Timeout: 1 * time.Second, + }) + if err != nil { + return nil, err + } + + bucketId := []byte("snoop") + err = db.Update(func(tx *bolt.Tx) error { + _, err := tx.CreateBucketIfNotExists(bucketId) + if err != nil { + return xerrors.Errorf("error creating bolt db bucket %s: %w", string(bucketId), err) + } + return nil + }) + + if err != nil { + db.Close() //nolint:errcheck + return nil, err + } + + return &BoltTrackingStore{db: db, bucketId: bucketId}, nil +} + +func (s *BoltTrackingStore) Put(cid cid.Cid, epoch abi.ChainEpoch) error { + val := epochToBytes(epoch) + return s.db.Batch(func(tx *bolt.Tx) error { + b := tx.Bucket(s.bucketId) + return b.Put(cid.Hash(), val) + }) +} + +func (s *BoltTrackingStore) PutBatch(cids []cid.Cid, epoch abi.ChainEpoch) error { + val := epochToBytes(epoch) + return s.db.Batch(func(tx *bolt.Tx) error { + b := tx.Bucket(s.bucketId) + for _, cid := range cids { + err := b.Put(cid.Hash(), val) + if err != nil { + return err + } + } + return nil + }) +} + +func (s *BoltTrackingStore) Get(cid cid.Cid) (epoch abi.ChainEpoch, err error) { + err = s.db.View(func(tx *bolt.Tx) error { + b := tx.Bucket(s.bucketId) + val := b.Get(cid.Hash()) + if val == nil { + return xerrors.Errorf("missing tracking epoch for %s", cid) + } + epoch = bytesToEpoch(val) + return nil + }) + return epoch, err +} + +func (s *BoltTrackingStore) Delete(cid cid.Cid) error { + return s.db.Batch(func(tx *bolt.Tx) error { + b := tx.Bucket(s.bucketId) + return b.Delete(cid.Hash()) + }) +} + +func (s *BoltTrackingStore) ForEach(f func(cid.Cid, abi.ChainEpoch) error) error { + return s.db.View(func(tx *bolt.Tx) error { + b := tx.Bucket(s.bucketId) + return b.ForEach(func(k, v []byte) error { + cid := cid.NewCidV1(cid.Raw, k) + epoch := bytesToEpoch(v) + return f(cid, epoch) + }) + }) +} + +func (s *BoltTrackingStore) Close() error { + return s.db.Close() +} diff --git a/chain/store/splitstore/snoop_lmdb.go b/chain/store/splitstore/snoop_lmdb.go index 21d596bfb..1eca73feb 100644 --- a/chain/store/splitstore/snoop_lmdb.go +++ b/chain/store/splitstore/snoop_lmdb.go @@ -20,7 +20,7 @@ type LMDBTrackingStore struct { var _ TrackingStore = (*LMDBTrackingStore)(nil) -func NewLMDBTrackingStore(path string) (TrackingStore, error) { +func NewLMDBTrackingStore(path string) (*LMDBTrackingStore, error) { env, err := lmdb.NewEnv() if err != nil { return nil, xerrors.Errorf("failed to initialize LMDB env: %w", err) From 2e4d45ef0734f508cf9f8f67ef906dc601fbce61 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sat, 27 Feb 2021 13:40:26 +0200 Subject: [PATCH 074/148] test for bolt backed tracking store --- chain/store/splitstore/snoop_test.go | 36 ++++++++++++++++++---------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/chain/store/splitstore/snoop_test.go b/chain/store/splitstore/snoop_test.go index 225c5936b..96cfb667d 100644 --- a/chain/store/splitstore/snoop_test.go +++ b/chain/store/splitstore/snoop_test.go @@ -1,19 +1,18 @@ package splitstore import ( + "os" "testing" - "golang.org/x/xerrors" - - "github.com/ledgerwatch/lmdb-go/lmdb" - cid "github.com/ipfs/go-cid" "github.com/multiformats/go-multihash" "github.com/filecoin-project/go-state-types/abi" ) -func TestTrackingStore(t *testing.T) { +func testTrackingStore(t *testing.T, useLMDB bool) { + t.Helper() + makeCid := func(key string) cid.Cid { h, err := multihash.Sum([]byte(key), multihash.SHA2_256, -1) if err != nil { @@ -36,16 +35,19 @@ func TestTrackingStore(t *testing.T) { mustNotHave := func(s TrackingStore, cid cid.Cid) { _, err := s.Get(cid) - xerr := xerrors.Unwrap(err) - if xerr == nil { - xerr = err - } - if !lmdb.IsNotFound(xerr) { - t.Fatal("expected key not found") + if err == nil { + t.Fatal("expected error") } } - s, err := NewLMDBTrackingStore("/tmp/snoop-test") + path := "/tmp/liveset-test" + + err := os.MkdirAll(path, 0777) + if err != nil { + t.Fatal(err) + } + + s, err := NewTrackingStore(path, useLMDB) if err != nil { t.Fatal(err) } @@ -112,7 +114,7 @@ func TestTrackingStore(t *testing.T) { t.Fatal(err) } - s, err = NewLMDBTrackingStore("/tmp/snoop-test") + s, err = NewTrackingStore(path, useLMDB) if err != nil { t.Fatal(err) } @@ -124,3 +126,11 @@ func TestTrackingStore(t *testing.T) { s.Close() //nolint:errcheck } + +func TestLMDBTrackingStore(t *testing.T) { + testTrackingStore(t, true) +} + +func TestBoltTrackingStore(t *testing.T) { + testTrackingStore(t, false) +} From 73259aa35051f2c5fdf5bbe3d5df30eab55faefe Mon Sep 17 00:00:00 2001 From: vyzo Date: Sat, 27 Feb 2021 15:20:14 +0200 Subject: [PATCH 075/148] add configuration for splitstore and default to a simple compaction algorithm --- chain/store/splitstore/splitstore.go | 154 +++++++++++++++++++++++---- node/builder.go | 8 +- node/config/def.go | 13 ++- node/modules/blockstore.go | 8 +- 4 files changed, 157 insertions(+), 26 deletions(-) diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index a307e995c..47398860a 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -38,12 +38,29 @@ func init() { logging.SetLogLevel("splitstore", "DEBUG") } +type Config struct { + // use LMDB for tracking store and liveset instead of BoltDB + UseLMDB bool + // perform full reachability analysis (expensive) for compaction + // You should enable this option if you plan to use the splitstore without a backing coldstore + EnableFullCompaction bool + // EXPERIMENTAL enable pruning of unreachable objects. + // This has not been sufficiently tested yet; only enable if you know what you are doing. + // Only applies if you enable full compaction. + EnableGC bool + // full archival nodes should enable this if EnableFullCompaction is enabled + // do NOT enable this if you synced from a snapshot. + // Only applies if you enabled full compaction + Archival bool +} + type SplitStore struct { compacting int32 - enableGC bool // TODO disabled for now, as it needs testing - skipOldMsgs bool // TODO this should be false for full archival nodes - skipMsgReceipts bool // TODO this should be false for full archival nodes + fullCompaction bool + enableGC bool + skipOldMsgs bool + skipMsgReceipts bool baseEpoch abi.ChainEpoch @@ -64,30 +81,35 @@ var _ bstore.Blockstore = (*SplitStore)(nil) // NewSplitStore creates a new SplitStore instance, given a path for the hotstore dbs and a cold // blockstore. The SplitStore must be attached to the ChainStore with Start in order to trigger // compaction. -func NewSplitStore(path string, ds dstore.Datastore, cold, hot bstore.Blockstore, useLMDB bool) (*SplitStore, error) { +func NewSplitStore(path string, ds dstore.Datastore, cold, hot bstore.Blockstore, cfg *Config) (*SplitStore, error) { // the tracking store - snoop, err := NewTrackingStore(path, useLMDB) + snoop, err := NewTrackingStore(path, cfg.UseLMDB) if err != nil { return nil, err } // the liveset env - env, err := NewLiveSetEnv(path, useLMDB) - if err != nil { - snoop.Close() //nolint:errcheck - return nil, err + var env LiveSetEnv + if cfg.EnableFullCompaction { + env, err = NewLiveSetEnv(path, cfg.UseLMDB) + if err != nil { + snoop.Close() //nolint:errcheck + return nil, err + } } // and now we can make a SplitStore ss := &SplitStore{ - ds: ds, - hot: hot, - cold: cold, - snoop: snoop, - env: env, - enableGC: false, // TODO option for this - skipOldMsgs: true, // TODO option for this - skipMsgReceipts: true, // TODO option for this + ds: ds, + hot: hot, + cold: cold, + snoop: snoop, + env: env, + + fullCompaction: cfg.EnableFullCompaction, + enableGC: cfg.EnableGC, + skipOldMsgs: !cfg.Archival, + skipMsgReceipts: !cfg.Archival, } return ss, nil @@ -284,7 +306,11 @@ func (s *SplitStore) Close() error { } } - return s.env.Close() + if s.env != nil { + return s.env.Close() + } + + return nil } func (s *SplitStore) HeadChange(revert, apply []*types.TipSet) error { @@ -319,6 +345,98 @@ func (s *SplitStore) HeadChange(revert, apply []*types.TipSet) error { // Compaction/GC Algorithm func (s *SplitStore) compact() { + if s.fullCompaction { + s.compactFull() + } else { + s.compactSimple() + } +} + +func (s *SplitStore) compactSimple() { + // some stats for logging + var stHot, stCold int + + coldEpoch := s.baseEpoch + CompactionCold + cold := make(map[cid.Cid]struct{}) + + log.Info("collecting cold objects") + startCollect := time.Now() + + err := s.snoop.ForEach(func(cid cid.Cid, wrEpoch abi.ChainEpoch) error { + // is the object stil hot? + if wrEpoch > coldEpoch { + // yes, stay in the hotstore + stHot++ + return nil + } + + cold[cid] = struct{}{} + stCold++ + return nil + }) + + if err != nil { + // TODO do something better here + panic(err) + } + + log.Infow("collection done", "took", time.Since(startCollect)) + + log.Info("moving cold objects to the coldstore") + startMove := time.Now() + + for cid := range cold { + blk, err := s.hot.Get(cid) + if err != nil { + if err == dstore.ErrNotFound { + // this can happen if the node is killed after we have deleted the block from the hotstore + // but before we have deleted it from the snoop; just delete the snoop. + err = s.snoop.Delete(cid) + if err != nil { + log.Errorf("error deleting cid %s from tracking store: %w", cid, err) + // TODO do something better here -- just continue? + panic(err) + + } + } else { + log.Errorf("error retrieving tracked block %s from hotstore: %w ", cid, err) + // TODO do something better here -- just continue? + panic(err) + } + + continue + } + + // put the object in the coldstore + err = s.cold.Put(blk) + if err != nil { + log.Errorf("error puting block %s to coldstore: %w", cid, err) + // TODO do something better here -- just continue? + panic(err) + } + + // delete the object from the hotstore + err = s.hot.DeleteBlock(cid) + if err != nil { + log.Errorf("error deleting block %s from hotstore: %w", cid, err) + // TODO do something better here -- just continue? + panic(err) + } + + // remove the snoop tracking + err = s.snoop.Delete(cid) + if err != nil { + log.Errorf("error deleting cid %s from tracking store: %w", cid, err) + // TODO do something better here -- just continue? + panic(err) + } + } + + log.Infow("moving done", "took", time.Since(startMove)) + log.Infow("compaction stats", "hot", stHot, "cold", stCold) +} + +func (s *SplitStore) compactFull() { // create two on disk live sets, one for marking the cold finality region // and one for marking the hot region hotSet, err := s.env.NewLiveSet("hot") diff --git a/node/builder.go b/node/builder.go index e0f7fcfa1..2efa52c52 100644 --- a/node/builder.go +++ b/node/builder.go @@ -610,10 +610,10 @@ func Repo(r repo.Repo) Option { Override(new(dtypes.MetadataDS), modules.Datastore), Override(new(dtypes.UniversalBlockstore), modules.UniversalBlockstore), - If(cfg.Splitstore, - If(cfg.UseLMDBHotstore, + If(cfg.EnableSplitstore, + If(cfg.Splitstore.UseLMDBHotstore, Override(new(dtypes.HotBlockstore), modules.LMDBHotBlockstore)), - If(!cfg.UseLMDBHotstore, + If(!cfg.Splitstore.UseLMDBHotstore, Override(new(dtypes.HotBlockstore), modules.BadgerHotBlockstore)), Override(new(dtypes.SplitBlockstore), modules.SplitBlockstore(cfg)), Override(new(dtypes.ChainBlockstore), modules.ChainSplitBlockstore), @@ -621,7 +621,7 @@ func Repo(r repo.Repo) Option { Override(new(dtypes.BaseBlockstore), From(new(dtypes.SplitBlockstore))), Override(new(dtypes.ExposedBlockstore), From(new(dtypes.SplitBlockstore))), ), - If(!cfg.Splitstore, + If(!cfg.EnableSplitstore, Override(new(dtypes.ChainBlockstore), modules.ChainFlatBlockstore), Override(new(dtypes.StateBlockstore), modules.StateFlatBlockstore), Override(new(dtypes.BaseBlockstore), From(new(dtypes.UniversalBlockstore))), diff --git a/node/config/def.go b/node/config/def.go index 21141e279..fcbdefda5 100644 --- a/node/config/def.go +++ b/node/config/def.go @@ -121,9 +121,16 @@ type Pubsub struct { } type Blockstore struct { - Splitstore bool - UseLMDBHotstore bool - UseLMDBTracking bool + EnableSplitstore bool + Splitstore Splitstore +} + +type Splitstore struct { + UseLMDBHotstore bool + UseLMDBTracking bool + EnableFullCompaction bool + EnableGC bool // EXPERIMENTAL + Archival bool } // // Full Node diff --git a/node/modules/blockstore.go b/node/modules/blockstore.go index 7b3bce5e1..fdb6d1417 100644 --- a/node/modules/blockstore.go +++ b/node/modules/blockstore.go @@ -103,7 +103,13 @@ func SplitBlockstore(cfg *config.Blockstore) func(lc fx.Lifecycle, r repo.Locked return nil, err } - ss, err := splitstore.NewSplitStore(path, ds, cold, hot, cfg.UseLMDBTracking) + ss, err := splitstore.NewSplitStore(path, ds, cold, hot, + &splitstore.Config{ + UseLMDB: cfg.Splitstore.UseLMDBTracking, + EnableFullCompaction: cfg.Splitstore.EnableFullCompaction, + EnableGC: cfg.Splitstore.EnableGC, + Archival: cfg.Splitstore.Archival, + }) if err != nil { return nil, err } From 364076c1ea5b8e54dcf37f80eadc4beddc8a39b7 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sat, 27 Feb 2021 16:06:33 +0200 Subject: [PATCH 076/148] set NoSync option for bolt livesets --- chain/store/splitstore/liveset_bolt.go | 1 + 1 file changed, 1 insertion(+) diff --git a/chain/store/splitstore/liveset_bolt.go b/chain/store/splitstore/liveset_bolt.go index 07a22b93f..0d6d7cd2e 100644 --- a/chain/store/splitstore/liveset_bolt.go +++ b/chain/store/splitstore/liveset_bolt.go @@ -26,6 +26,7 @@ func NewBoltLiveSetEnv(path string) (*BoltLiveSetEnv, error) { db, err := bolt.Open(path, 0644, &bolt.Options{ Timeout: 1 * time.Second, + NoSync: true, }) if err != nil { return nil, err From 783dcda19cba29c83bd5a575d8bf13ef4a03bffb Mon Sep 17 00:00:00 2001 From: vyzo Date: Sat, 27 Feb 2021 18:16:09 +0200 Subject: [PATCH 077/148] add Sync to the tracking store --- chain/store/splitstore/snoop.go | 1 + chain/store/splitstore/snoop_bolt.go | 5 +++++ chain/store/splitstore/snoop_lmdb.go | 4 ++++ chain/store/splitstore/splitstore.go | 6 ++++++ 4 files changed, 16 insertions(+) diff --git a/chain/store/splitstore/snoop.go b/chain/store/splitstore/snoop.go index 3a2b5f43e..b312cae93 100644 --- a/chain/store/splitstore/snoop.go +++ b/chain/store/splitstore/snoop.go @@ -13,6 +13,7 @@ type TrackingStore interface { Get(cid.Cid) (abi.ChainEpoch, error) Delete(cid.Cid) error ForEach(func(cid.Cid, abi.ChainEpoch) error) error + Sync() error Close() error } diff --git a/chain/store/splitstore/snoop_bolt.go b/chain/store/splitstore/snoop_bolt.go index 015a11838..5c305e91e 100644 --- a/chain/store/splitstore/snoop_bolt.go +++ b/chain/store/splitstore/snoop_bolt.go @@ -22,6 +22,7 @@ func NewBoltTrackingStore(path string) (*BoltTrackingStore, error) { db, err := bolt.Open(path, 0644, &bolt.Options{ Timeout: 1 * time.Second, + NoSync: true, }) if err != nil { return nil, err @@ -97,6 +98,10 @@ func (s *BoltTrackingStore) ForEach(f func(cid.Cid, abi.ChainEpoch) error) error }) } +func (s *BoltTrackingStore) Sync() error { + return s.db.Sync() +} + func (s *BoltTrackingStore) Close() error { return s.db.Close() } diff --git a/chain/store/splitstore/snoop_lmdb.go b/chain/store/splitstore/snoop_lmdb.go index 1eca73feb..225f5f199 100644 --- a/chain/store/splitstore/snoop_lmdb.go +++ b/chain/store/splitstore/snoop_lmdb.go @@ -155,6 +155,10 @@ func (s *LMDBTrackingStore) ForEach(f func(cid.Cid, abi.ChainEpoch) error) error }) } +func (s *LMDBTrackingStore) Sync() error { + return nil +} + func (s *LMDBTrackingStore) Close() error { s.env.CloseDBI(s.db) return s.env.Close() diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index 47398860a..31a60b49e 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -633,6 +633,12 @@ func (s *SplitStore) compactFull() { log.Infow("sweeping done", "took", time.Since(startSweep)) log.Infow("compaction stats", "hot", stHot, "cold", stCold, "dead", stDead) + err = s.snoop.Sync() + if err != nil { + // TODO do something better here + panic(err) + } + err = s.setBaseEpoch(coldEpoch) if err != nil { // TODO do something better here From 2f26026991da57475f8c77d8cabff1ba37109f39 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sat, 27 Feb 2021 18:27:58 +0200 Subject: [PATCH 078/148] compactSimple should walk the cold epoch at depth 1 So that it finds reachable objects that should stay in the hotstore --- chain/store/splitstore/splitstore.go | 77 +++++++++++++++++++++++----- 1 file changed, 64 insertions(+), 13 deletions(-) diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index 31a60b49e..945d735fa 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -89,13 +89,10 @@ func NewSplitStore(path string, ds dstore.Datastore, cold, hot bstore.Blockstore } // the liveset env - var env LiveSetEnv - if cfg.EnableFullCompaction { - env, err = NewLiveSetEnv(path, cfg.UseLMDB) - if err != nil { - snoop.Close() //nolint:errcheck - return nil, err - } + env, err := NewLiveSetEnv(path, cfg.UseLMDB) + if err != nil { + snoop.Close() //nolint:errcheck + return nil, err } // and now we can make a SplitStore @@ -306,11 +303,7 @@ func (s *SplitStore) Close() error { } } - if s.env != nil { - return s.env.Close() - } - - return nil + return s.env.Close() } func (s *SplitStore) HeadChange(revert, apply []*types.TipSet) error { @@ -359,10 +352,44 @@ func (s *SplitStore) compactSimple() { coldEpoch := s.baseEpoch + CompactionCold cold := make(map[cid.Cid]struct{}) + coldSet, err := s.env.NewLiveSet("cold") + if err != nil { + // TODO do something better here + panic(err) + } + defer coldSet.Close() //nolint:errcheck + + // 1. mark reachable cold objects by looking at the objects reachable only from the cold epoch + log.Infof("marking reachable cold objects") + startMark := time.Now() + + s.mx.Lock() + curTs := s.curTs + s.mx.Unlock() + + coldTs, err := s.cs.GetTipsetByHeight(context.Background(), coldEpoch, curTs, true) + if err != nil { + // TODO do something better here + panic(err) + } + + err = s.cs.WalkSnapshot(context.Background(), coldTs, 1, s.skipOldMsgs, s.skipMsgReceipts, + func(cid cid.Cid) error { + return coldSet.Mark(cid) + }) + + if err != nil { + // TODO do something better here + panic(err) + } + + log.Infow("marking done", "took", time.Since(startMark)) + + // 2. move cold unreachable objects to the coldstore log.Info("collecting cold objects") startCollect := time.Now() - err := s.snoop.ForEach(func(cid cid.Cid, wrEpoch abi.ChainEpoch) error { + err = s.snoop.ForEach(func(cid cid.Cid, wrEpoch abi.ChainEpoch) error { // is the object stil hot? if wrEpoch > coldEpoch { // yes, stay in the hotstore @@ -370,6 +397,18 @@ func (s *SplitStore) compactSimple() { return nil } + // check whether it is reachable in the cold boundary + mark, err := coldSet.Has(cid) + if err != nil { + return xerrors.Errorf("error checkiing cold set for %s: %w", cid, err) + } + + if mark { + stHot++ + return nil + } + + // it's cold, mark it for move cold[cid] = struct{}{} stCold++ return nil @@ -434,6 +473,18 @@ func (s *SplitStore) compactSimple() { log.Infow("moving done", "took", time.Since(startMove)) log.Infow("compaction stats", "hot", stHot, "cold", stCold) + + err = s.snoop.Sync() + if err != nil { + // TODO do something better here + panic(err) + } + + err = s.setBaseEpoch(coldEpoch) + if err != nil { + // TODO do something better here + panic(err) + } } func (s *SplitStore) compactFull() { From 2426ffb2771238b057cb502ae757f378da9d0b1c Mon Sep 17 00:00:00 2001 From: vyzo Date: Sat, 27 Feb 2021 18:47:13 +0200 Subject: [PATCH 079/148] better logging plus moving some code around --- chain/store/splitstore/splitstore.go | 38 +++++++++++++++++----------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index 945d735fa..5fe41a4e8 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -346,11 +346,14 @@ func (s *SplitStore) compact() { } func (s *SplitStore) compactSimple() { - // some stats for logging - var stHot, stCold int + s.mx.Lock() + curTs := s.curTs + s.mx.Unlock() coldEpoch := s.baseEpoch + CompactionCold - cold := make(map[cid.Cid]struct{}) + + log.Infof("running simple compaction; currentEpoch: %d baseEpoch: %d coldEpoch: %d", + curTs.Height(), s.baseEpoch, coldEpoch) coldSet, err := s.env.NewLiveSet("cold") if err != nil { @@ -360,13 +363,9 @@ func (s *SplitStore) compactSimple() { defer coldSet.Close() //nolint:errcheck // 1. mark reachable cold objects by looking at the objects reachable only from the cold epoch - log.Infof("marking reachable cold objects") + log.Info("marking reachable cold objects") startMark := time.Now() - s.mx.Lock() - curTs := s.curTs - s.mx.Unlock() - coldTs, err := s.cs.GetTipsetByHeight(context.Background(), coldEpoch, curTs, true) if err != nil { // TODO do something better here @@ -389,6 +388,11 @@ func (s *SplitStore) compactSimple() { log.Info("collecting cold objects") startCollect := time.Now() + cold := make(map[cid.Cid]struct{}) + + // some stats for logging + var stHot, stCold int + err = s.snoop.ForEach(func(cid cid.Cid, wrEpoch abi.ChainEpoch) error { // is the object stil hot? if wrEpoch > coldEpoch { @@ -488,7 +492,17 @@ func (s *SplitStore) compactSimple() { } func (s *SplitStore) compactFull() { - // create two on disk live sets, one for marking the cold finality region + s.mx.Lock() + curTs := s.curTs + s.mx.Unlock() + + epoch := curTs.Height() + coldEpoch := s.baseEpoch + CompactionCold + + log.Infof("running full compaction; currentEpoch: %d baseEpoch: %d coldEpoch: %d", + curTs.Height(), s.baseEpoch, coldEpoch) + + // create two live sets, one for marking the cold finality region // and one for marking the hot region hotSet, err := s.env.NewLiveSet("hot") if err != nil { @@ -509,12 +523,6 @@ func (s *SplitStore) compactFull() { startMark := time.Now() // Phase 1a: mark all reachable CIDs in the hot range - s.mx.Lock() - curTs := s.curTs - s.mx.Unlock() - - epoch := curTs.Height() - coldEpoch := s.baseEpoch + CompactionCold err = s.cs.WalkSnapshot(context.Background(), curTs, epoch-coldEpoch, s.skipOldMsgs, s.skipMsgReceipts, func(cid cid.Cid) error { return hotSet.Mark(cid) From e52c709d8ad8c43e44992e0063b834c06b638343 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sat, 27 Feb 2021 21:30:01 +0200 Subject: [PATCH 080/148] more accurate setting of skip params --- chain/store/splitstore/splitstore.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index 5fe41a4e8..c2f6efc79 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -105,8 +105,8 @@ func NewSplitStore(path string, ds dstore.Datastore, cold, hot bstore.Blockstore fullCompaction: cfg.EnableFullCompaction, enableGC: cfg.EnableGC, - skipOldMsgs: !cfg.Archival, - skipMsgReceipts: !cfg.Archival, + skipOldMsgs: !(cfg.EnableFullCompaction && cfg.Archival), + skipMsgReceipts: !(cfg.EnableFullCompaction && cfg.Archival), } return ss, nil From 09cd1175a10ee1425daeb5fa01dd0144cbecb03a Mon Sep 17 00:00:00 2001 From: vyzo Date: Sat, 27 Feb 2021 21:46:40 +0200 Subject: [PATCH 081/148] structured log for beginning of compaction --- chain/store/splitstore/splitstore.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index c2f6efc79..92d50a925 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -352,8 +352,7 @@ func (s *SplitStore) compactSimple() { coldEpoch := s.baseEpoch + CompactionCold - log.Infof("running simple compaction; currentEpoch: %d baseEpoch: %d coldEpoch: %d", - curTs.Height(), s.baseEpoch, coldEpoch) + log.Infow("running simple compaction", "currentEpoch", curTs.Height(), "baseEpoch", s.baseEpoch, "coldEpoch", coldEpoch) coldSet, err := s.env.NewLiveSet("cold") if err != nil { @@ -499,8 +498,7 @@ func (s *SplitStore) compactFull() { epoch := curTs.Height() coldEpoch := s.baseEpoch + CompactionCold - log.Infof("running full compaction; currentEpoch: %d baseEpoch: %d coldEpoch: %d", - curTs.Height(), s.baseEpoch, coldEpoch) + log.Infow("running full compaction", "currentEpoch", curTs.Height(), "baseEpoch", s.baseEpoch, "coldEpoch", coldEpoch) // create two live sets, one for marking the cold finality region // and one for marking the hot region From aba6530411b9e54ecc37269622d1b7668e108cbd Mon Sep 17 00:00:00 2001 From: vyzo Date: Sat, 27 Feb 2021 23:08:23 +0200 Subject: [PATCH 082/148] batch deletion for purging the tracking store --- chain/store/splitstore/snoop.go | 1 + chain/store/splitstore/snoop_bolt.go | 13 ++++ chain/store/splitstore/snoop_lmdb.go | 15 +++++ chain/store/splitstore/splitstore.go | 89 ++++++++++++++++------------ 4 files changed, 79 insertions(+), 39 deletions(-) diff --git a/chain/store/splitstore/snoop.go b/chain/store/splitstore/snoop.go index b312cae93..02c270fc7 100644 --- a/chain/store/splitstore/snoop.go +++ b/chain/store/splitstore/snoop.go @@ -12,6 +12,7 @@ type TrackingStore interface { PutBatch([]cid.Cid, abi.ChainEpoch) error Get(cid.Cid) (abi.ChainEpoch, error) Delete(cid.Cid) error + DeleteBatch(map[cid.Cid]struct{}) error ForEach(func(cid.Cid, abi.ChainEpoch) error) error Sync() error Close() error diff --git a/chain/store/splitstore/snoop_bolt.go b/chain/store/splitstore/snoop_bolt.go index 5c305e91e..2fc5d4f6d 100644 --- a/chain/store/splitstore/snoop_bolt.go +++ b/chain/store/splitstore/snoop_bolt.go @@ -87,6 +87,19 @@ func (s *BoltTrackingStore) Delete(cid cid.Cid) error { }) } +func (s *BoltTrackingStore) DeleteBatch(cids map[cid.Cid]struct{}) error { + return s.db.Batch(func(tx *bolt.Tx) error { + b := tx.Bucket(s.bucketId) + for cid := range cids { + err := b.Delete(cid.Hash()) + if err != nil { + return xerrors.Errorf("error deleting %s", cid) + } + } + return nil + }) +} + func (s *BoltTrackingStore) ForEach(f func(cid.Cid, abi.ChainEpoch) error) error { return s.db.View(func(tx *bolt.Tx) error { b := tx.Bucket(s.bucketId) diff --git a/chain/store/splitstore/snoop_lmdb.go b/chain/store/splitstore/snoop_lmdb.go index 225f5f199..4222c94be 100644 --- a/chain/store/splitstore/snoop_lmdb.go +++ b/chain/store/splitstore/snoop_lmdb.go @@ -122,6 +122,21 @@ func (s *LMDBTrackingStore) Delete(cid cid.Cid) error { }) } +func (s *LMDBTrackingStore) DeleteBatch(cids map[cid.Cid]struct{}) error { + return withMaxReadersRetry( + func() error { + return s.env.Update(func(txn *lmdb.Txn) error { + for cid := range cids { + err := txn.Del(s.db, cid.Hash(), nil) + if err != nil { + return err + } + } + return nil + }) + }) +} + func (s *LMDBTrackingStore) ForEach(f func(cid.Cid, abi.ChainEpoch) error) error { return withMaxReadersRetry( func() error { diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index 92d50a925..edfe09ff9 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -423,10 +423,10 @@ func (s *SplitStore) compactSimple() { } log.Infow("collection done", "took", time.Since(startCollect)) + log.Infow("compaction stats", "hot", stHot, "cold", stCold) log.Info("moving cold objects to the coldstore") startMove := time.Now() - for cid := range cold { blk, err := s.hot.Get(cid) if err != nil { @@ -435,13 +435,12 @@ func (s *SplitStore) compactSimple() { // but before we have deleted it from the snoop; just delete the snoop. err = s.snoop.Delete(cid) if err != nil { - log.Errorf("error deleting cid %s from tracking store: %w", cid, err) + log.Errorf("error deleting cid %s from tracking store: %s", cid, err) // TODO do something better here -- just continue? panic(err) - } } else { - log.Errorf("error retrieving tracked block %s from hotstore: %w ", cid, err) + log.Errorf("error retrieving tracked block %s from hotstore: %s", cid, err) // TODO do something better here -- just continue? panic(err) } @@ -452,7 +451,7 @@ func (s *SplitStore) compactSimple() { // put the object in the coldstore err = s.cold.Put(blk) if err != nil { - log.Errorf("error puting block %s to coldstore: %w", cid, err) + log.Errorf("error puting block %s to coldstore: %s", cid, err) // TODO do something better here -- just continue? panic(err) } @@ -460,22 +459,24 @@ func (s *SplitStore) compactSimple() { // delete the object from the hotstore err = s.hot.DeleteBlock(cid) if err != nil { - log.Errorf("error deleting block %s from hotstore: %w", cid, err) - // TODO do something better here -- just continue? - panic(err) - } - - // remove the snoop tracking - err = s.snoop.Delete(cid) - if err != nil { - log.Errorf("error deleting cid %s from tracking store: %w", cid, err) + log.Errorf("error deleting block %s from hotstore: %s", cid, err) // TODO do something better here -- just continue? panic(err) } } - log.Infow("moving done", "took", time.Since(startMove)) - log.Infow("compaction stats", "hot", stHot, "cold", stCold) + + // remove the snoop tracking + purgeStart := time.Now() + log.Info("purging cold objects from tracking store") + + err = s.snoop.DeleteBatch(cold) + if err != nil { + log.Errorf("error purging cold objects from tracking store: %s", err) + // TODO do something better here -- just continue? + panic(err) + } + log.Infow("purging done", "took", time.Since(purgeStart)) err = s.snoop.Sync() if err != nil { @@ -617,6 +618,8 @@ func (s *SplitStore) compactFull() { panic(err) } + log.Infow("compaction stats", "hot", stHot, "cold", stCold, "dead", stDead) + log.Info("moving cold objects to the coldstore") for cid := range cold { blk, err := s.hot.Get(cid) @@ -626,13 +629,12 @@ func (s *SplitStore) compactFull() { // but before we have deleted it from the snoop; just delete the snoop. err = s.snoop.Delete(cid) if err != nil { - log.Errorf("error deleting cid %s from tracking store: %w", cid, err) + log.Errorf("error deleting cid %s from tracking store: %s", cid, err) // TODO do something better here -- just continue? panic(err) - } } else { - log.Errorf("error retrieving tracked block %s from hotstore: %w ", cid, err) + log.Errorf("error retrieving tracked block %s from hotstore: %s", cid, err) // TODO do something better here -- just continue? panic(err) } @@ -643,7 +645,7 @@ func (s *SplitStore) compactFull() { // put the object in the coldstore err = s.cold.Put(blk) if err != nil { - log.Errorf("error puting block %s to coldstore: %w", cid, err) + log.Errorf("error puting block %s to coldstore: %s", cid, err) // TODO do something better here -- just continue? panic(err) } @@ -651,20 +653,25 @@ func (s *SplitStore) compactFull() { // delete the object from the hotstore err = s.hot.DeleteBlock(cid) if err != nil { - log.Errorf("error deleting block %s from hotstore: %w", cid, err) - // TODO do something better here -- just continue? - panic(err) - } - - // remove the snoop tracking - err = s.snoop.Delete(cid) - if err != nil { - log.Errorf("error deleting cid %s from tracking store: %w", cid, err) + log.Errorf("error deleting block %s from hotstore: %s", cid, err) // TODO do something better here -- just continue? panic(err) } } + // remove the snoop tracking + purgeStart := time.Now() + log.Info("purging cold objects from tracking store") + + err = s.snoop.DeleteBatch(cold) + if err != nil { + log.Errorf("error purging cold objects from tracking store: %s", err) + // TODO do something better here -- just continue? + panic(err) + } + + log.Infow("purging done", "took", time.Since(purgeStart)) + if len(dead) > 0 { log.Info("deleting dead objects") @@ -672,23 +679,27 @@ func (s *SplitStore) compactFull() { // delete the object from the hotstore err = s.hot.DeleteBlock(cid) if err != nil { - log.Errorf("error deleting block %s from hotstore: %w", cid, err) - // TODO do something better here -- just continue? - panic(err) - } - - // remove the snoop tracking - err = s.snoop.Delete(cid) - if err != nil { - log.Errorf("error deleting cid %s from tracking store: %w", cid, err) + log.Errorf("error deleting block %s from hotstore: %s", cid, err) // TODO do something better here -- just continue? panic(err) } } + + // remove the snoop tracking + purgeStart := time.Now() + log.Info("purging dead objects from tracking store") + + err = s.snoop.DeleteBatch(dead) + if err != nil { + log.Errorf("error purging dead objects from tracking store: %s", err) + // TODO do something better here -- just continue? + panic(err) + } + + log.Infow("purging done", "took", time.Since(purgeStart)) } log.Infow("sweeping done", "took", time.Since(startSweep)) - log.Infow("compaction stats", "hot", stHot, "cold", stCold, "dead", stDead) err = s.snoop.Sync() if err != nil { From 97abbe1eca48896cfa15a278fc4748b6c4ba2855 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 28 Feb 2021 09:59:11 +0200 Subject: [PATCH 083/148] add (salted) bloom filter liveset --- chain/store/splitstore/liveset.go | 15 ++++-- chain/store/splitstore/liveset_bloom.go | 65 +++++++++++++++++++++++++ chain/store/splitstore/snoop.go | 13 +++-- chain/store/splitstore/splitstore.go | 10 ++-- node/builder.go | 6 +-- node/config/def.go | 13 ++++- node/modules/blockstore.go | 3 +- 7 files changed, 107 insertions(+), 18 deletions(-) create mode 100644 chain/store/splitstore/liveset_bloom.go diff --git a/chain/store/splitstore/liveset.go b/chain/store/splitstore/liveset.go index 1636c6efa..6b64a3a56 100644 --- a/chain/store/splitstore/liveset.go +++ b/chain/store/splitstore/liveset.go @@ -3,6 +3,8 @@ package splitstore import ( "path/filepath" + "golang.org/x/xerrors" + cid "github.com/ipfs/go-cid" ) @@ -19,10 +21,15 @@ type LiveSetEnv interface { Close() error } -func NewLiveSetEnv(path string, useLMDB bool) (LiveSetEnv, error) { - if useLMDB { +func NewLiveSetEnv(path string, liveSetType string) (LiveSetEnv, error) { + switch liveSetType { + case "", "bloom": + return NewBloomLiveSetEnv() + case "bolt": + return NewBoltLiveSetEnv(filepath.Join(path, "sweep.bolt")) + case "lmdb": return NewLMDBLiveSetEnv(filepath.Join(path, "sweep.lmdb")) + default: + return nil, xerrors.Errorf("unknown live set type %s", liveSetType) } - - return NewBoltLiveSetEnv(filepath.Join(path, "sweep.bolt")) } diff --git a/chain/store/splitstore/liveset_bloom.go b/chain/store/splitstore/liveset_bloom.go new file mode 100644 index 000000000..d553671a9 --- /dev/null +++ b/chain/store/splitstore/liveset_bloom.go @@ -0,0 +1,65 @@ +package splitstore + +import ( + "math/rand" + + "golang.org/x/xerrors" + + bbloom "github.com/ipfs/bbloom" + cid "github.com/ipfs/go-cid" +) + +type BloomLiveSetEnv struct{} + +var _ LiveSetEnv = (*BloomLiveSetEnv)(nil) + +type BloomLiveSet struct { + salt []byte + bf *bbloom.Bloom +} + +var _ LiveSet = (*BloomLiveSet)(nil) + +func NewBloomLiveSetEnv() (*BloomLiveSetEnv, error) { + return &BloomLiveSetEnv{}, nil +} + +func (e *BloomLiveSetEnv) NewLiveSet(name string) (LiveSet, error) { + salt := make([]byte, 4) + _, err := rand.Read(salt) + if err != nil { + return nil, xerrors.Errorf("error reading salt: %w", err) + } + + bf, err := bbloom.New(float64(10_000_000), float64(0.01)) + if err != nil { + return nil, xerrors.Errorf("error creating bloom filter: %w", err) + } + + return &BloomLiveSet{salt: salt, bf: bf}, nil +} + +func (e *BloomLiveSetEnv) Close() error { + return nil +} + +func (s *BloomLiveSet) saltedKey(cid cid.Cid) []byte { + hash := cid.Hash() + key := make([]byte, len(s.salt)+len(hash)) + n := copy(key, s.salt) + copy(key[n:], hash) + return key +} + +func (s *BloomLiveSet) Mark(cid cid.Cid) error { + s.bf.Add(s.saltedKey(cid)) + return nil +} + +func (s *BloomLiveSet) Has(cid cid.Cid) (bool, error) { + return s.bf.Has(s.saltedKey(cid)), nil +} + +func (s *BloomLiveSet) Close() error { + return nil +} diff --git a/chain/store/splitstore/snoop.go b/chain/store/splitstore/snoop.go index 02c270fc7..eeff09e0e 100644 --- a/chain/store/splitstore/snoop.go +++ b/chain/store/splitstore/snoop.go @@ -3,6 +3,8 @@ package splitstore import ( "path/filepath" + "golang.org/x/xerrors" + "github.com/filecoin-project/go-state-types/abi" cid "github.com/ipfs/go-cid" ) @@ -18,10 +20,13 @@ type TrackingStore interface { Close() error } -func NewTrackingStore(path string, useLMDB bool) (TrackingStore, error) { - if useLMDB { +func NewTrackingStore(path string, trackingStoreType string) (TrackingStore, error) { + switch trackingStoreType { + case "", "bolt": + return NewBoltTrackingStore(filepath.Join(path, "snoop.bolt")) + case "lmdb": return NewLMDBTrackingStore(filepath.Join(path, "snoop.lmdb")) + default: + return nil, xerrors.Errorf("unknown tracking store type %s", trackingStoreType) } - - return NewBoltTrackingStore(filepath.Join(path, "snoop.bolt")) } diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index edfe09ff9..45ec50d33 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -39,8 +39,10 @@ func init() { } type Config struct { - // use LMDB for tracking store and liveset instead of BoltDB - UseLMDB bool + // TrackingStore type; bolt (default) or lmdb + TrackingStoreType string + // LiveSet type; bloom (default), bolt, or lmdb + LiveSetType string // perform full reachability analysis (expensive) for compaction // You should enable this option if you plan to use the splitstore without a backing coldstore EnableFullCompaction bool @@ -83,13 +85,13 @@ var _ bstore.Blockstore = (*SplitStore)(nil) // compaction. func NewSplitStore(path string, ds dstore.Datastore, cold, hot bstore.Blockstore, cfg *Config) (*SplitStore, error) { // the tracking store - snoop, err := NewTrackingStore(path, cfg.UseLMDB) + snoop, err := NewTrackingStore(path, cfg.TrackingStoreType) if err != nil { return nil, err } // the liveset env - env, err := NewLiveSetEnv(path, cfg.UseLMDB) + env, err := NewLiveSetEnv(path, cfg.LiveSetType) if err != nil { snoop.Close() //nolint:errcheck return nil, err diff --git a/node/builder.go b/node/builder.go index 2efa52c52..c92ab1248 100644 --- a/node/builder.go +++ b/node/builder.go @@ -611,10 +611,10 @@ func Repo(r repo.Repo) Option { Override(new(dtypes.UniversalBlockstore), modules.UniversalBlockstore), If(cfg.EnableSplitstore, - If(cfg.Splitstore.UseLMDBHotstore, - Override(new(dtypes.HotBlockstore), modules.LMDBHotBlockstore)), - If(!cfg.Splitstore.UseLMDBHotstore, + If(cfg.Splitstore.GetHotStoreType() == "badger", Override(new(dtypes.HotBlockstore), modules.BadgerHotBlockstore)), + If(cfg.Splitstore.GetHotStoreType() == "lmdb", + Override(new(dtypes.HotBlockstore), modules.LMDBHotBlockstore)), Override(new(dtypes.SplitBlockstore), modules.SplitBlockstore(cfg)), Override(new(dtypes.ChainBlockstore), modules.ChainSplitBlockstore), Override(new(dtypes.StateBlockstore), modules.StateSplitBlockstore), diff --git a/node/config/def.go b/node/config/def.go index fcbdefda5..f66ac2ba0 100644 --- a/node/config/def.go +++ b/node/config/def.go @@ -126,13 +126,22 @@ type Blockstore struct { } type Splitstore struct { - UseLMDBHotstore bool - UseLMDBTracking bool + HotStoreType string + TrackingStoreType string + LiveSetType string EnableFullCompaction bool EnableGC bool // EXPERIMENTAL Archival bool } +func (s *Splitstore) GetHotStoreType() string { + // default is badger + if s.HotStoreType == "" { + return "badger" + } + return s.HotStoreType +} + // // Full Node type Metrics struct { diff --git a/node/modules/blockstore.go b/node/modules/blockstore.go index fdb6d1417..ae4f7a188 100644 --- a/node/modules/blockstore.go +++ b/node/modules/blockstore.go @@ -105,7 +105,8 @@ func SplitBlockstore(cfg *config.Blockstore) func(lc fx.Lifecycle, r repo.Locked ss, err := splitstore.NewSplitStore(path, ds, cold, hot, &splitstore.Config{ - UseLMDB: cfg.Splitstore.UseLMDBTracking, + TrackingStoreType: cfg.Splitstore.TrackingStoreType, + LiveSetType: cfg.Splitstore.LiveSetType, EnableFullCompaction: cfg.Splitstore.EnableFullCompaction, EnableGC: cfg.Splitstore.EnableGC, Archival: cfg.Splitstore.Archival, From 4cc672d0c5a764975e0e5148265b5e1a4f34bce5 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 28 Feb 2021 10:21:48 +0200 Subject: [PATCH 084/148] batch move objects from coldstore to hotstore --- chain/store/splitstore/splitstore.go | 88 +++++++++++++++++++++++----- 1 file changed, 73 insertions(+), 15 deletions(-) diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index 45ec50d33..8474cca45 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -394,6 +394,7 @@ func (s *SplitStore) compactSimple() { // some stats for logging var stHot, stCold int + // 2.1 iterate through the snoop and collect unreachable cold objects err = s.snoop.ForEach(func(cid cid.Cid, wrEpoch abi.ChainEpoch) error { // is the object stil hot? if wrEpoch > coldEpoch { @@ -427,8 +428,13 @@ func (s *SplitStore) compactSimple() { log.Infow("collection done", "took", time.Since(startCollect)) log.Infow("compaction stats", "hot", stHot, "cold", stCold) + // 2.2 copy the cold objects to the coldstore log.Info("moving cold objects to the coldstore") startMove := time.Now() + + const batchSize = 1024 + batch := make([]blocks.Block, 0, batchSize) + for cid := range cold { blk, err := s.hot.Get(cid) if err != nil { @@ -450,14 +456,33 @@ func (s *SplitStore) compactSimple() { continue } - // put the object in the coldstore - err = s.cold.Put(blk) + batch = append(batch, blk) + if len(batch) == batchSize { + err = s.cold.PutMany(batch) + if err != nil { + log.Errorf("error putting cold batch to coldstore: %s", err) + // TODO do something better here -- just continue? + panic(err) + } + batch = batch[:0] + } + } + + if len(batch) > 0 { + err = s.cold.PutMany(batch) if err != nil { - log.Errorf("error puting block %s to coldstore: %s", cid, err) + log.Errorf("error putting cold batch to coldstore: %s", err) // TODO do something better here -- just continue? panic(err) } + } + log.Infow("moving done", "took", time.Since(startMove)) + // 2.3 delete cold objects from the hotstore + // TODO we really want batching for this! + log.Info("purging cold objects from the hotstore") + purgeStart := time.Now() + for cid := range cold { // delete the object from the hotstore err = s.hot.DeleteBlock(cid) if err != nil { @@ -466,10 +491,10 @@ func (s *SplitStore) compactSimple() { panic(err) } } - log.Infow("moving done", "took", time.Since(startMove)) + log.Infow("purging cold from hotstore done", "took", time.Since(purgeStart)) - // remove the snoop tracking - purgeStart := time.Now() + // 2.4 remove the snoop tracking for cold objects + purgeStart = time.Now() log.Info("purging cold objects from tracking store") err = s.snoop.DeleteBatch(cold) @@ -478,8 +503,9 @@ func (s *SplitStore) compactSimple() { // TODO do something better here -- just continue? panic(err) } - log.Infow("purging done", "took", time.Since(purgeStart)) + log.Infow("purging cold from tracking store done", "took", time.Since(purgeStart)) + // we are done; do some housekeeping err = s.snoop.Sync() if err != nil { // TODO do something better here @@ -566,6 +592,7 @@ func (s *SplitStore) compactFull() { cold := make(map[cid.Cid]struct{}) dead := make(map[cid.Cid]struct{}) + // 2.1 iterate through the snoop and collect cold and dead objects err = s.snoop.ForEach(func(cid cid.Cid, wrEpoch abi.ChainEpoch) error { // is the object stil hot? if wrEpoch > coldEpoch { @@ -622,7 +649,13 @@ func (s *SplitStore) compactFull() { log.Infow("compaction stats", "hot", stHot, "cold", stCold, "dead", stDead) + // 2.2 copy the cold objects to the coldstore log.Info("moving cold objects to the coldstore") + startMove := time.Now() + + const batchSize = 1024 + batch := make([]blocks.Block, 0, batchSize) + for cid := range cold { blk, err := s.hot.Get(cid) if err != nil { @@ -644,14 +677,33 @@ func (s *SplitStore) compactFull() { continue } - // put the object in the coldstore - err = s.cold.Put(blk) + batch = append(batch, blk) + if len(batch) == batchSize { + err = s.cold.PutMany(batch) + if err != nil { + log.Errorf("error putting cold batch to coldstore: %s", err) + // TODO do something better here -- just continue? + panic(err) + } + batch = batch[:0] + } + } + + if len(batch) > 0 { + err = s.cold.PutMany(batch) if err != nil { - log.Errorf("error puting block %s to coldstore: %s", cid, err) + log.Errorf("error putting cold batch to coldstore: %s", err) // TODO do something better here -- just continue? panic(err) } + } + log.Infow("moving done", "took", time.Since(startMove)) + // 2.3 delete cold objects from the hotstore + // TODO we really want batching for this! + log.Info("purging cold objects from the hotstore") + purgeStart := time.Now() + for cid := range cold { // delete the object from the hotstore err = s.hot.DeleteBlock(cid) if err != nil { @@ -660,9 +712,10 @@ func (s *SplitStore) compactFull() { panic(err) } } + log.Infow("purging cold from hotstore done", "took", time.Since(purgeStart)) - // remove the snoop tracking - purgeStart := time.Now() + // 2.4 remove the snoop tracking for cold objects + purgeStart = time.Now() log.Info("purging cold objects from tracking store") err = s.snoop.DeleteBatch(cold) @@ -671,12 +724,15 @@ func (s *SplitStore) compactFull() { // TODO do something better here -- just continue? panic(err) } + log.Infow("purging cold from tracking store done", "took", time.Since(purgeStart)) - log.Infow("purging done", "took", time.Since(purgeStart)) - + // 3. if we have dead objects, delete them from the hotstore and remove the tracking if len(dead) > 0 { log.Info("deleting dead objects") + purgeStart = time.Now() + log.Info("purging dead objects from the hotstore") + // TODO we really want batching for this! for cid := range dead { // delete the object from the hotstore err = s.hot.DeleteBlock(cid) @@ -686,6 +742,7 @@ func (s *SplitStore) compactFull() { panic(err) } } + log.Infow("purging dead from hotstore done", "took", time.Since(purgeStart)) // remove the snoop tracking purgeStart := time.Now() @@ -698,11 +755,12 @@ func (s *SplitStore) compactFull() { panic(err) } - log.Infow("purging done", "took", time.Since(purgeStart)) + log.Infow("purging dead from trackingstore done", "took", time.Since(purgeStart)) } log.Infow("sweeping done", "took", time.Since(startSweep)) + // we are done; do some housekeeping err = s.snoop.Sync() if err != nil { // TODO do something better here From f4c6bc6a86a435abff9de2d27ac1b6e43a25c506 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 28 Feb 2021 10:25:35 +0200 Subject: [PATCH 085/148] comment nomenclature --- chain/store/splitstore/splitstore.go | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index 8474cca45..1d9046e11 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -443,7 +443,7 @@ func (s *SplitStore) compactSimple() { // but before we have deleted it from the snoop; just delete the snoop. err = s.snoop.Delete(cid) if err != nil { - log.Errorf("error deleting cid %s from tracking store: %s", cid, err) + log.Errorf("error deleting cid %s from snoop: %s", cid, err) // TODO do something better here -- just continue? panic(err) } @@ -495,15 +495,15 @@ func (s *SplitStore) compactSimple() { // 2.4 remove the snoop tracking for cold objects purgeStart = time.Now() - log.Info("purging cold objects from tracking store") + log.Info("purging cold objects from snoop") err = s.snoop.DeleteBatch(cold) if err != nil { - log.Errorf("error purging cold objects from tracking store: %s", err) + log.Errorf("error purging cold objects from snoop: %s", err) // TODO do something better here -- just continue? panic(err) } - log.Infow("purging cold from tracking store done", "took", time.Since(purgeStart)) + log.Infow("purging cold from snoop done", "took", time.Since(purgeStart)) // we are done; do some housekeeping err = s.snoop.Sync() @@ -664,7 +664,7 @@ func (s *SplitStore) compactFull() { // but before we have deleted it from the snoop; just delete the snoop. err = s.snoop.Delete(cid) if err != nil { - log.Errorf("error deleting cid %s from tracking store: %s", cid, err) + log.Errorf("error deleting cid %s from snoop: %s", cid, err) // TODO do something better here -- just continue? panic(err) } @@ -716,15 +716,15 @@ func (s *SplitStore) compactFull() { // 2.4 remove the snoop tracking for cold objects purgeStart = time.Now() - log.Info("purging cold objects from tracking store") + log.Info("purging cold objects from snoop") err = s.snoop.DeleteBatch(cold) if err != nil { - log.Errorf("error purging cold objects from tracking store: %s", err) + log.Errorf("error purging cold objects from snoop: %s", err) // TODO do something better here -- just continue? panic(err) } - log.Infow("purging cold from tracking store done", "took", time.Since(purgeStart)) + log.Infow("purging cold from snoop done", "took", time.Since(purgeStart)) // 3. if we have dead objects, delete them from the hotstore and remove the tracking if len(dead) > 0 { @@ -746,16 +746,16 @@ func (s *SplitStore) compactFull() { // remove the snoop tracking purgeStart := time.Now() - log.Info("purging dead objects from tracking store") + log.Info("purging dead objects from snoop") err = s.snoop.DeleteBatch(dead) if err != nil { - log.Errorf("error purging dead objects from tracking store: %s", err) + log.Errorf("error purging dead objects from snoop: %s", err) // TODO do something better here -- just continue? panic(err) } - log.Infow("purging dead from trackingstore done", "took", time.Since(purgeStart)) + log.Infow("purging dead from snoop done", "took", time.Since(purgeStart)) } log.Infow("sweeping done", "took", time.Since(startSweep)) From f5ce7957f3831f01667732b2f1d76c91b6001eaf Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 28 Feb 2021 10:29:00 +0200 Subject: [PATCH 086/148] size bloom filter for 50M objects --- chain/store/splitstore/liveset_bloom.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chain/store/splitstore/liveset_bloom.go b/chain/store/splitstore/liveset_bloom.go index d553671a9..4af3a7969 100644 --- a/chain/store/splitstore/liveset_bloom.go +++ b/chain/store/splitstore/liveset_bloom.go @@ -31,7 +31,7 @@ func (e *BloomLiveSetEnv) NewLiveSet(name string) (LiveSet, error) { return nil, xerrors.Errorf("error reading salt: %w", err) } - bf, err := bbloom.New(float64(10_000_000), float64(0.01)) + bf, err := bbloom.New(float64(50_000_000), float64(0.01)) if err != nil { return nil, xerrors.Errorf("error creating bloom filter: %w", err) } From 88849201ffab6cebfad7ae4313d01726176352a7 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 28 Feb 2021 10:35:27 +0200 Subject: [PATCH 087/148] fix tests --- chain/store/splitstore/liveset_test.go | 12 ++++++++---- chain/store/splitstore/snoop_test.go | 22 +++++++++++----------- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/chain/store/splitstore/liveset_test.go b/chain/store/splitstore/liveset_test.go index e08bf1865..cc0183f6a 100644 --- a/chain/store/splitstore/liveset_test.go +++ b/chain/store/splitstore/liveset_test.go @@ -9,14 +9,18 @@ import ( ) func TestLMDBLiveSet(t *testing.T) { - testLiveSet(t, true) + testLiveSet(t, "lmdb") } func TestBoltLiveSet(t *testing.T) { - testLiveSet(t, false) + testLiveSet(t, "bolt") } -func testLiveSet(t *testing.T, useLMDB bool) { +func TestBloomLiveSet(t *testing.T) { + testLiveSet(t, "bloom") +} + +func testLiveSet(t *testing.T, lsType string) { t.Helper() path := "/tmp/liveset-test" @@ -26,7 +30,7 @@ func testLiveSet(t *testing.T, useLMDB bool) { t.Fatal(err) } - env, err := NewLiveSetEnv(path, useLMDB) + env, err := NewLiveSetEnv(path, lsType) if err != nil { t.Fatal(err) } diff --git a/chain/store/splitstore/snoop_test.go b/chain/store/splitstore/snoop_test.go index 96cfb667d..b13953a4d 100644 --- a/chain/store/splitstore/snoop_test.go +++ b/chain/store/splitstore/snoop_test.go @@ -10,7 +10,15 @@ import ( "github.com/filecoin-project/go-state-types/abi" ) -func testTrackingStore(t *testing.T, useLMDB bool) { +func TestLMDBTrackingStore(t *testing.T) { + testTrackingStore(t, "lmdb") +} + +func TestBoltTrackingStore(t *testing.T) { + testTrackingStore(t, "bolt") +} + +func testTrackingStore(t *testing.T, tsType string) { t.Helper() makeCid := func(key string) cid.Cid { @@ -47,7 +55,7 @@ func testTrackingStore(t *testing.T, useLMDB bool) { t.Fatal(err) } - s, err := NewTrackingStore(path, useLMDB) + s, err := NewTrackingStore(path, tsType) if err != nil { t.Fatal(err) } @@ -114,7 +122,7 @@ func testTrackingStore(t *testing.T, useLMDB bool) { t.Fatal(err) } - s, err = NewTrackingStore(path, useLMDB) + s, err = NewTrackingStore(path, tsType) if err != nil { t.Fatal(err) } @@ -126,11 +134,3 @@ func testTrackingStore(t *testing.T, useLMDB bool) { s.Close() //nolint:errcheck } - -func TestLMDBTrackingStore(t *testing.T) { - testTrackingStore(t, true) -} - -func TestBoltTrackingStore(t *testing.T) { - testTrackingStore(t, false) -} From 44aadb931a0b59227592da61b7da07b56e1df0f5 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 28 Feb 2021 10:39:25 +0200 Subject: [PATCH 088/148] rehash salted keys in bloom filter --- chain/store/splitstore/liveset_bloom.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/chain/store/splitstore/liveset_bloom.go b/chain/store/splitstore/liveset_bloom.go index 4af3a7969..06d0144f9 100644 --- a/chain/store/splitstore/liveset_bloom.go +++ b/chain/store/splitstore/liveset_bloom.go @@ -7,6 +7,7 @@ import ( bbloom "github.com/ipfs/bbloom" cid "github.com/ipfs/go-cid" + blake2b "github.com/minio/blake2b-simd" ) type BloomLiveSetEnv struct{} @@ -48,7 +49,8 @@ func (s *BloomLiveSet) saltedKey(cid cid.Cid) []byte { key := make([]byte, len(s.salt)+len(hash)) n := copy(key, s.salt) copy(key[n:], hash) - return key + rehash := blake2b.Sum256(key) + return rehash[:] } func (s *BloomLiveSet) Mark(cid cid.Cid) error { From f62999d2b8c1d681df9f3545fee310d2f699fdb5 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 28 Feb 2021 10:57:38 +0200 Subject: [PATCH 089/148] use named constants for bloom filter parameters --- chain/store/splitstore/liveset_bloom.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/chain/store/splitstore/liveset_bloom.go b/chain/store/splitstore/liveset_bloom.go index 06d0144f9..a602ea332 100644 --- a/chain/store/splitstore/liveset_bloom.go +++ b/chain/store/splitstore/liveset_bloom.go @@ -10,6 +10,11 @@ import ( blake2b "github.com/minio/blake2b-simd" ) +const ( + BloomFilterSize = 50_000_000 + BloomFilterProbability = 0.01 +) + type BloomLiveSetEnv struct{} var _ LiveSetEnv = (*BloomLiveSetEnv)(nil) @@ -32,7 +37,7 @@ func (e *BloomLiveSetEnv) NewLiveSet(name string) (LiveSet, error) { return nil, xerrors.Errorf("error reading salt: %w", err) } - bf, err := bbloom.New(float64(50_000_000), float64(0.01)) + bf, err := bbloom.New(float64(BloomFilterSize), float64(BloomFilterProbability)) if err != nil { return nil, xerrors.Errorf("error creating bloom filter: %w", err) } From 05fee278405839ac932986ff46eb752822169bd2 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 28 Feb 2021 13:23:08 +0200 Subject: [PATCH 090/148] remove stale references to lmdb from splitstore implementation --- chain/store/splitstore/splitstore.go | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index 1d9046e11..1af411df6 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -10,8 +10,6 @@ import ( "golang.org/x/xerrors" - "github.com/ledgerwatch/lmdb-go/lmdb" - blocks "github.com/ipfs/go-block-format" cid "github.com/ipfs/go-cid" dstore "github.com/ipfs/go-datastore" @@ -171,7 +169,7 @@ func (s *SplitStore) Put(blk blocks.Block) error { s.mx.Unlock() err := s.snoop.Put(blk.Cid(), epoch) - if err != nil && !lmdb.IsErrno(err, lmdb.KeyExist) { + if err != nil { log.Errorf("error tracking CID in hotstore: %s; falling back to coldstore", err) return s.cold.Put(blk) } @@ -196,17 +194,6 @@ func (s *SplitStore) PutMany(blks []blocks.Block) error { err := s.snoop.PutBatch(batch, epoch) if err != nil { - if lmdb.IsErrno(err, lmdb.KeyExist) { - // a write is duplicate, but we don't know which; write each block separately - for _, blk := range blks { - err = s.Put(blk) - if err != nil { - return err - } - } - return nil - } - log.Errorf("error tracking CIDs in hotstore: %s; falling back to coldstore", err) return s.cold.PutMany(blks) } From e582f0b7134a094a744f37d0347be9f955ac0c63 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 28 Feb 2021 13:41:07 +0200 Subject: [PATCH 091/148] remove references to splitstore from lotus-shed --- cmd/lotus-shed/balances.go | 27 ++------------------------- cmd/lotus-shed/export.go | 14 +------------- 2 files changed, 3 insertions(+), 38 deletions(-) diff --git a/cmd/lotus-shed/balances.go b/cmd/lotus-shed/balances.go index 6a8551edf..8c5bfefb8 100644 --- a/cmd/lotus-shed/balances.go +++ b/cmd/lotus-shed/balances.go @@ -36,7 +36,6 @@ import ( "github.com/filecoin-project/lotus/chain/state" "github.com/filecoin-project/lotus/chain/stmgr" "github.com/filecoin-project/lotus/chain/store" - "github.com/filecoin-project/lotus/chain/store/splitstore" "github.com/filecoin-project/lotus/chain/types" "github.com/filecoin-project/lotus/chain/vm" lcli "github.com/filecoin-project/lotus/cli" @@ -194,18 +193,7 @@ var chainBalanceStateCmd = &cli.Command{ return err } - ssPath, err := lkrepo.SplitstorePath() - if err != nil { - return err - } - - ss, err := splitstore.NewSplitStore(ssPath, mds, bs) - if err != nil { - return err - } - defer ss.Close() //nolint:errcheck - - cs := store.NewChainStore(ss, ss, mds, vm.Syscalls(ffiwrapper.ProofVerifier), nil) + cs := store.NewChainStore(bs, bs, mds, vm.Syscalls(ffiwrapper.ProofVerifier), nil) defer cs.Close() //nolint:errcheck cst := cbor.NewCborStore(bs) @@ -426,18 +414,7 @@ var chainPledgeCmd = &cli.Command{ return err } - ssPath, err := lkrepo.SplitstorePath() - if err != nil { - return err - } - - ss, err := splitstore.NewSplitStore(ssPath, mds, bs) - if err != nil { - return err - } - defer ss.Close() //nolint:errcheck - - cs := store.NewChainStore(ss, ss, mds, vm.Syscalls(ffiwrapper.ProofVerifier), nil) + cs := store.NewChainStore(bs, bs, mds, vm.Syscalls(ffiwrapper.ProofVerifier), nil) defer cs.Close() //nolint:errcheck cst := cbor.NewCborStore(bs) diff --git a/cmd/lotus-shed/export.go b/cmd/lotus-shed/export.go index 7d1016e6c..e711ba2bb 100644 --- a/cmd/lotus-shed/export.go +++ b/cmd/lotus-shed/export.go @@ -12,7 +12,6 @@ import ( "github.com/filecoin-project/go-state-types/abi" "github.com/filecoin-project/lotus/chain/store" - "github.com/filecoin-project/lotus/chain/store/splitstore" "github.com/filecoin-project/lotus/chain/types" lcli "github.com/filecoin-project/lotus/cli" "github.com/filecoin-project/lotus/node/repo" @@ -91,18 +90,7 @@ var exportChainCmd = &cli.Command{ return err } - ssPath, err := lr.SplitstorePath() - if err != nil { - return err - } - - ss, err := splitstore.NewSplitStore(ssPath, mds, bs) - if err != nil { - return err - } - defer ss.Close() //nolint:errcheck - - cs := store.NewChainStore(ss, ss, mds, nil, nil) + cs := store.NewChainStore(bs, bs, mds, nil, nil) defer cs.Close() //nolint:errcheck if err := cs.Load(); err != nil { From 7587ab62340b85122299b770f79dfc23c13ab8c4 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 28 Feb 2021 13:48:38 +0200 Subject: [PATCH 092/148] quiet the stupid linter --- chain/store/splitstore/liveset_bloom.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chain/store/splitstore/liveset_bloom.go b/chain/store/splitstore/liveset_bloom.go index a602ea332..f4d28a5dd 100644 --- a/chain/store/splitstore/liveset_bloom.go +++ b/chain/store/splitstore/liveset_bloom.go @@ -32,7 +32,7 @@ func NewBloomLiveSetEnv() (*BloomLiveSetEnv, error) { func (e *BloomLiveSetEnv) NewLiveSet(name string) (LiveSet, error) { salt := make([]byte, 4) - _, err := rand.Read(salt) + _, err := rand.Read(salt) //nolint if err != nil { return nil, xerrors.Errorf("error reading salt: %w", err) } From 5639261e4411a2dafbded83f6614838fb4bffccb Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 28 Feb 2021 13:51:42 +0200 Subject: [PATCH 093/148] make compaction parameters variable --- chain/store/splitstore/splitstore.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index 1af411df6..8001f792c 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -22,7 +22,8 @@ import ( bstore "github.com/filecoin-project/lotus/lib/blockstore" ) -const ( +// these are variable so that 1) lotus-soup builds 2) we can change them in tests +var ( CompactionThreshold = 5 * build.Finality CompactionCold = build.Finality ) From cae5ddce88b437a8871a85e0dc214211df208a87 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 28 Feb 2021 21:35:18 +0200 Subject: [PATCH 094/148] dynamically size bloom filters --- chain/store/splitstore/liveset.go | 2 +- chain/store/splitstore/liveset_bloom.go | 9 ++++- chain/store/splitstore/liveset_bolt.go | 2 +- chain/store/splitstore/liveset_lmdb.go | 2 +- chain/store/splitstore/splitstore.go | 53 +++++++++++++++++++++++-- 5 files changed, 60 insertions(+), 8 deletions(-) diff --git a/chain/store/splitstore/liveset.go b/chain/store/splitstore/liveset.go index 6b64a3a56..62a5d6913 100644 --- a/chain/store/splitstore/liveset.go +++ b/chain/store/splitstore/liveset.go @@ -17,7 +17,7 @@ type LiveSet interface { var markBytes = []byte{} type LiveSetEnv interface { - NewLiveSet(name string) (LiveSet, error) + NewLiveSet(name string, sizeHint int64) (LiveSet, error) Close() error } diff --git a/chain/store/splitstore/liveset_bloom.go b/chain/store/splitstore/liveset_bloom.go index f4d28a5dd..9fc3503b3 100644 --- a/chain/store/splitstore/liveset_bloom.go +++ b/chain/store/splitstore/liveset_bloom.go @@ -30,14 +30,19 @@ func NewBloomLiveSetEnv() (*BloomLiveSetEnv, error) { return &BloomLiveSetEnv{}, nil } -func (e *BloomLiveSetEnv) NewLiveSet(name string) (LiveSet, error) { +func (e *BloomLiveSetEnv) NewLiveSet(name string, sizeHint int64) (LiveSet, error) { + size := int64(BloomFilterSize) + for size < sizeHint { + size += BloomFilterSize + } + salt := make([]byte, 4) _, err := rand.Read(salt) //nolint if err != nil { return nil, xerrors.Errorf("error reading salt: %w", err) } - bf, err := bbloom.New(float64(BloomFilterSize), float64(BloomFilterProbability)) + bf, err := bbloom.New(float64(size), float64(BloomFilterProbability)) if err != nil { return nil, xerrors.Errorf("error creating bloom filter: %w", err) } diff --git a/chain/store/splitstore/liveset_bolt.go b/chain/store/splitstore/liveset_bolt.go index 0d6d7cd2e..8c68d6a4a 100644 --- a/chain/store/splitstore/liveset_bolt.go +++ b/chain/store/splitstore/liveset_bolt.go @@ -35,7 +35,7 @@ func NewBoltLiveSetEnv(path string) (*BoltLiveSetEnv, error) { return &BoltLiveSetEnv{db: db}, nil } -func (e *BoltLiveSetEnv) NewLiveSet(name string) (LiveSet, error) { +func (e *BoltLiveSetEnv) NewLiveSet(name string, hint int64) (LiveSet, error) { bucketId := []byte(name) err := e.db.Update(func(tx *bolt.Tx) error { _, err := tx.CreateBucketIfNotExists(bucketId) diff --git a/chain/store/splitstore/liveset_lmdb.go b/chain/store/splitstore/liveset_lmdb.go index f41907207..e8123f104 100644 --- a/chain/store/splitstore/liveset_lmdb.go +++ b/chain/store/splitstore/liveset_lmdb.go @@ -57,7 +57,7 @@ func NewLMDBLiveSetEnv(path string) (*LMDBLiveSetEnv, error) { return &LMDBLiveSetEnv{env: env}, nil } -func (e *LMDBLiveSetEnv) NewLiveSet(name string) (LiveSet, error) { +func (e *LMDBLiveSetEnv) NewLiveSet(name string, hint int64) (LiveSet, error) { return NewLMDBLiveSet(e.env, name+".lmdb") } diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index 8001f792c..e20615e68 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -75,6 +75,8 @@ type SplitStore struct { snoop TrackingStore env LiveSetEnv + + liveSetSize int64 } var _ bstore.Blockstore = (*SplitStore)(nil) @@ -328,6 +330,15 @@ func (s *SplitStore) HeadChange(revert, apply []*types.TipSet) error { // Compaction/GC Algorithm func (s *SplitStore) compact() { + if s.liveSetSize == 0 { + start := time.Now() + log.Info("estimating live set size") + s.estimateLiveSetSize() + log.Infow("estimating live set size done", "took", time.Since(start), "size", s.liveSetSize) + } else { + log.Infow("current live set size estimate", "size", s.liveSetSize) + } + if s.fullCompaction { s.compactFull() } else { @@ -335,6 +346,24 @@ func (s *SplitStore) compact() { } } +func (s *SplitStore) estimateLiveSetSize() { + s.mx.Lock() + curTs := s.curTs + s.mx.Unlock() + + s.liveSetSize = 0 + err := s.cs.WalkSnapshot(context.Background(), curTs, 1, s.skipOldMsgs, s.skipMsgReceipts, + func(cid cid.Cid) error { + s.liveSetSize++ + return nil + }) + + if err != nil { + // TODO do something better here + panic(err) + } +} + func (s *SplitStore) compactSimple() { s.mx.Lock() curTs := s.curTs @@ -344,7 +373,7 @@ func (s *SplitStore) compactSimple() { log.Infow("running simple compaction", "currentEpoch", curTs.Height(), "baseEpoch", s.baseEpoch, "coldEpoch", coldEpoch) - coldSet, err := s.env.NewLiveSet("cold") + coldSet, err := s.env.NewLiveSet("cold", s.liveSetSize) if err != nil { // TODO do something better here panic(err) @@ -361,8 +390,10 @@ func (s *SplitStore) compactSimple() { panic(err) } + count := int64(0) err = s.cs.WalkSnapshot(context.Background(), coldTs, 1, s.skipOldMsgs, s.skipMsgReceipts, func(cid cid.Cid) error { + count++ return coldSet.Mark(cid) }) @@ -371,6 +402,10 @@ func (s *SplitStore) compactSimple() { panic(err) } + if count > s.liveSetSize { + s.liveSetSize = count + } + log.Infow("marking done", "took", time.Since(startMark)) // 2. move cold unreachable objects to the coldstore @@ -519,14 +554,14 @@ func (s *SplitStore) compactFull() { // create two live sets, one for marking the cold finality region // and one for marking the hot region - hotSet, err := s.env.NewLiveSet("hot") + hotSet, err := s.env.NewLiveSet("hot", s.liveSetSize) if err != nil { // TODO do something better here panic(err) } defer hotSet.Close() //nolint:errcheck - coldSet, err := s.env.NewLiveSet("cold") + coldSet, err := s.env.NewLiveSet("cold", s.liveSetSize) if err != nil { // TODO do something better here panic(err) @@ -538,8 +573,10 @@ func (s *SplitStore) compactFull() { startMark := time.Now() // Phase 1a: mark all reachable CIDs in the hot range + count := int64(0) err = s.cs.WalkSnapshot(context.Background(), curTs, epoch-coldEpoch, s.skipOldMsgs, s.skipMsgReceipts, func(cid cid.Cid) error { + count++ return hotSet.Mark(cid) }) @@ -548,6 +585,10 @@ func (s *SplitStore) compactFull() { panic(err) } + if count > s.liveSetSize { + s.liveSetSize = count + } + // Phase 1b: mark all reachable CIDs in the cold range coldTs, err := s.cs.GetTipsetByHeight(context.Background(), coldEpoch, curTs, true) if err != nil { @@ -555,8 +596,10 @@ func (s *SplitStore) compactFull() { panic(err) } + count = 0 err = s.cs.WalkSnapshot(context.Background(), coldTs, CompactionCold, s.skipOldMsgs, s.skipMsgReceipts, func(cid cid.Cid) error { + count++ return coldSet.Mark(cid) }) @@ -565,6 +608,10 @@ func (s *SplitStore) compactFull() { panic(err) } + if count > s.liveSetSize { + s.liveSetSize = count + } + log.Infow("marking done", "took", time.Since(startMark)) // Phase 2: sweep cold objects: From 99c6e4f48ff902c5a1aec58d234dbe7af9feb22a Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 28 Feb 2021 21:40:26 +0200 Subject: [PATCH 095/148] adjust min bloom filter size --- chain/store/splitstore/liveset_bloom.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/chain/store/splitstore/liveset_bloom.go b/chain/store/splitstore/liveset_bloom.go index 9fc3503b3..57335e624 100644 --- a/chain/store/splitstore/liveset_bloom.go +++ b/chain/store/splitstore/liveset_bloom.go @@ -11,7 +11,7 @@ import ( ) const ( - BloomFilterSize = 50_000_000 + BloomFilterMinSize = 10_000_000 BloomFilterProbability = 0.01 ) @@ -31,9 +31,9 @@ func NewBloomLiveSetEnv() (*BloomLiveSetEnv, error) { } func (e *BloomLiveSetEnv) NewLiveSet(name string, sizeHint int64) (LiveSet, error) { - size := int64(BloomFilterSize) + size := int64(BloomFilterMinSize) for size < sizeHint { - size += BloomFilterSize + size += BloomFilterMinSize } salt := make([]byte, 4) From 3282f856ecfa05eb7e8d7f3811fc0ea120de45e9 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 28 Feb 2021 21:46:27 +0200 Subject: [PATCH 096/148] fix tests --- chain/store/splitstore/liveset_test.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/chain/store/splitstore/liveset_test.go b/chain/store/splitstore/liveset_test.go index cc0183f6a..1d094b976 100644 --- a/chain/store/splitstore/liveset_test.go +++ b/chain/store/splitstore/liveset_test.go @@ -36,12 +36,12 @@ func testLiveSet(t *testing.T, lsType string) { } defer env.Close() //nolint:errcheck - hotSet, err := env.NewLiveSet("hot") + hotSet, err := env.NewLiveSet("hot", 0) if err != nil { t.Fatal(err) } - coldSet, err := env.NewLiveSet("cold") + coldSet, err := env.NewLiveSet("cold", 0) if err != nil { t.Fatal(err) } @@ -108,12 +108,12 @@ func testLiveSet(t *testing.T, lsType string) { t.Fatal(err) } - hotSet, err = env.NewLiveSet("hot") + hotSet, err = env.NewLiveSet("hot", 0) if err != nil { t.Fatal(err) } - coldSet, err = env.NewLiveSet("cold") + coldSet, err = env.NewLiveSet("cold", 0) if err != nil { t.Fatal(err) } From 0fc2f3a26fe1af71d6631c1455ae9c8c7e500231 Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 1 Mar 2021 09:25:52 +0200 Subject: [PATCH 097/148] fix post-rebase compilation errors --- chain/store/splitstore/splitstore.go | 2 +- node/modules/blockstore.go | 10 +++++----- node/repo/interface.go | 3 --- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/chain/store/splitstore/splitstore.go b/chain/store/splitstore/splitstore.go index e20615e68..db35357ba 100644 --- a/chain/store/splitstore/splitstore.go +++ b/chain/store/splitstore/splitstore.go @@ -16,10 +16,10 @@ import ( logging "github.com/ipfs/go-log/v2" "github.com/filecoin-project/go-state-types/abi" + bstore "github.com/filecoin-project/lotus/blockstore" "github.com/filecoin-project/lotus/build" "github.com/filecoin-project/lotus/chain/store" "github.com/filecoin-project/lotus/chain/types" - bstore "github.com/filecoin-project/lotus/lib/blockstore" ) // these are variable so that 1) lotus-soup builds 2) we can change them in tests diff --git a/node/modules/blockstore.go b/node/modules/blockstore.go index ae4f7a188..7d8e713ba 100644 --- a/node/modules/blockstore.go +++ b/node/modules/blockstore.go @@ -8,7 +8,7 @@ import ( "time" lmdbbs "github.com/filecoin-project/go-bs-lmdb" - badgerbs "github.com/filecoin-project/lotus/lib/blockstore/badger" + badgerbs "github.com/filecoin-project/lotus/blockstore/badger" bstore "github.com/ipfs/go-ipfs-blockstore" "go.uber.org/fx" "golang.org/x/xerrors" @@ -96,8 +96,8 @@ func BadgerHotBlockstore(lc fx.Lifecycle, r repo.LockedRepo) (dtypes.HotBlocksto return hot, err } -func SplitBlockstore(cfg *config.Blockstore) func(lc fx.Lifecycle, r repo.LockedRepo, ds dtypes.MetadataDS, cold dtypes.ColdBlockstore, hot dtypes.HotBlockstore) (dtypes.SplitBlockstore, error) { - return func(lc fx.Lifecycle, r repo.LockedRepo, ds dtypes.MetadataDS, cold dtypes.ColdBlockstore, hot dtypes.HotBlockstore) (dtypes.SplitBlockstore, error) { +func SplitBlockstore(cfg *config.Blockstore) func(lc fx.Lifecycle, r repo.LockedRepo, ds dtypes.MetadataDS, cold dtypes.UniversalBlockstore, hot dtypes.HotBlockstore) (dtypes.SplitBlockstore, error) { + return func(lc fx.Lifecycle, r repo.LockedRepo, ds dtypes.MetadataDS, cold dtypes.UniversalBlockstore, hot dtypes.HotBlockstore) (dtypes.SplitBlockstore, error) { path, err := r.SplitstorePath() if err != nil { return nil, err @@ -124,7 +124,7 @@ func SplitBlockstore(cfg *config.Blockstore) func(lc fx.Lifecycle, r repo.Locked } } -func StateFlatBlockstore(lc fx.Lifecycle, mctx helpers.MetricsCtx, bs dtypes.ColdBlockstore) (dtypes.StateBlockstore, error) { +func StateFlatBlockstore(lc fx.Lifecycle, mctx helpers.MetricsCtx, bs dtypes.UniversalBlockstore) (dtypes.StateBlockstore, error) { return bs, nil } @@ -132,7 +132,7 @@ func StateSplitBlockstore(lc fx.Lifecycle, mctx helpers.MetricsCtx, bs dtypes.Sp return bs, nil } -func ChainFlatBlockstore(lc fx.Lifecycle, mctx helpers.MetricsCtx, bs dtypes.ColdBlockstore) (dtypes.ChainBlockstore, error) { +func ChainFlatBlockstore(lc fx.Lifecycle, mctx helpers.MetricsCtx, bs dtypes.UniversalBlockstore) (dtypes.ChainBlockstore, error) { return bs, nil } diff --git a/node/repo/interface.go b/node/repo/interface.go index d4afbe2a0..b169ee5cc 100644 --- a/node/repo/interface.go +++ b/node/repo/interface.go @@ -67,9 +67,6 @@ type LockedRepo interface { // SplitstorePath returns the path for the SplitStore SplitstorePath() (string, error) - // SplitstorePath returns the path for the SplitStore - SplitstorePath() (string, error) - // Returns config in this repo Config() (interface{}, error) SetConfig(func(interface{})) error From 3733456bca5ee7da79994a7f8cab930b5819a804 Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 1 Mar 2021 09:26:08 +0200 Subject: [PATCH 098/148] go mod tidy --- go.sum | 28 ++++++---------------------- 1 file changed, 6 insertions(+), 22 deletions(-) diff --git a/go.sum b/go.sum index 1c5e6fce4..a1da7a182 100644 --- a/go.sum +++ b/go.sum @@ -241,23 +241,11 @@ github.com/filecoin-project/go-amt-ipld/v2 v2.1.1-0.20201006184820-924ee87a1349/ github.com/filecoin-project/go-amt-ipld/v3 v3.0.0 h1:Ou/q82QeHGOhpkedvaxxzpBYuqTxLCcj5OChkDNx4qc= github.com/filecoin-project/go-amt-ipld/v3 v3.0.0/go.mod h1:Qa95YNAbtoVCTSVtX38aAC1ptBnJfPma1R/zZsKmx4o= github.com/filecoin-project/go-bitfield v0.2.0/go.mod h1:CNl9WG8hgR5mttCnUErjcQjGvuiZjRqK9rHVBsQF4oM= +github.com/filecoin-project/go-bitfield v0.2.3 h1:pedK/7maYF06Z+BYJf2OeFFqIDEh6SP6mIOlLFpYXGs= +github.com/filecoin-project/go-bitfield v0.2.3/go.mod h1:CNl9WG8hgR5mttCnUErjcQjGvuiZjRqK9rHVBsQF4oM= github.com/filecoin-project/go-bitfield v0.2.3/go.mod h1:CNl9WG8hgR5mttCnUErjcQjGvuiZjRqK9rHVBsQF4oM= github.com/filecoin-project/go-bitfield v0.2.4 h1:uZ7MeE+XfM5lqrHJZ93OnhQKc/rveW8p9au0C68JPgk= github.com/filecoin-project/go-bitfield v0.2.4/go.mod h1:CNl9WG8hgR5mttCnUErjcQjGvuiZjRqK9rHVBsQF4oM= -github.com/filecoin-project/go-bitfield v0.2.3-0.20201110211213-fe2c1862e816 h1:RMdzMqe3mu2Z/3N3b9UEfkbGZxukstmZgNC024ybWhA= -github.com/filecoin-project/go-bitfield v0.2.3-0.20201110211213-fe2c1862e816/go.mod h1:CNl9WG8hgR5mttCnUErjcQjGvuiZjRqK9rHVBsQF4oM= -github.com/filecoin-project/go-bs-lmdb v0.0.2 h1:FeHjg3B0TPcVUVYyykNrfMaRujZr0aN14CxWGv3feXo= -github.com/filecoin-project/go-bs-lmdb v0.0.2/go.mod h1:Rt1cAcl80csj0aEvISab9g7r8PEqTdZGDnCaOR3tjFw= -github.com/filecoin-project/go-bs-lmdb v0.0.3-0.20201201162933-d7f9ef615d0f h1:PYFZWzcv0PlANuh64UopYppkulhD9O5uiJ19o359DKM= -github.com/filecoin-project/go-bs-lmdb v0.0.3-0.20201201162933-d7f9ef615d0f/go.mod h1:Oq9zP5FMx7IomcY79neGD76YsfeMY3N7BKGDkofn/Ao= -github.com/filecoin-project/go-bs-lmdb v0.0.3 h1:tSgG5S9+5zojhmnRSQxiabWFyHPxdDP9eU7sAgaECvI= -github.com/filecoin-project/go-bs-lmdb v0.0.3/go.mod h1:Oq9zP5FMx7IomcY79neGD76YsfeMY3N7BKGDkofn/Ao= -github.com/filecoin-project/go-bs-lmdb v1.0.1 h1:kAoPGgZqUQ0IowIzjdDo251X6smWuy286E6h3m03zqY= -github.com/filecoin-project/go-bs-lmdb v1.0.1/go.mod h1:peFIZ9XEE9OLFkCzi7FMlr84UexqVKj6+AyxZD5SiGs= -github.com/filecoin-project/go-bs-lmdb v1.0.2 h1:cj+M3DzlcWYtNJpea8AqeU2SKz2+eTr+1N1GIUU0iBQ= -github.com/filecoin-project/go-bs-lmdb v1.0.2/go.mod h1:peFIZ9XEE9OLFkCzi7FMlr84UexqVKj6+AyxZD5SiGs= -github.com/filecoin-project/go-bitfield v0.2.3 h1:pedK/7maYF06Z+BYJf2OeFFqIDEh6SP6mIOlLFpYXGs= -github.com/filecoin-project/go-bitfield v0.2.3/go.mod h1:CNl9WG8hgR5mttCnUErjcQjGvuiZjRqK9rHVBsQF4oM= github.com/filecoin-project/go-bs-lmdb v1.0.3 h1:QRf/yMw5hFjqMIpi9mi/Hkh4qberUI++56XAdB0VgwM= github.com/filecoin-project/go-bs-lmdb v1.0.3/go.mod h1:peFIZ9XEE9OLFkCzi7FMlr84UexqVKj6+AyxZD5SiGs= github.com/filecoin-project/go-cbor-util v0.0.0-20191219014500-08c40a1e63a2 h1:av5fw6wmm58FYMgJeoB/lK9XXrgdugYiTqkdxjTy9k8= @@ -1335,12 +1323,10 @@ github.com/prometheus/procfs v0.1.0 h1:jhMy6QXfi3y2HEzFoyuCj40z4OZIIHHPtFyCMftmv github.com/prometheus/procfs v0.1.0/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU= github.com/raulk/clock v1.1.0 h1:dpb29+UKMbLqiU/jqIJptgLR1nn23HLgMY0sTCDza5Y= github.com/raulk/clock v1.1.0/go.mod h1:3MpVxdZ/ODBQDxbN+kzshf5OSZwPjtMDx6BBXBmOeY0= +github.com/raulk/go-bs-tests v0.0.4 h1:gYUYmIFMBnp2mtZQuiP/ZGtSTSPvmDBjWBz0xTZz4X8= +github.com/raulk/go-bs-tests v0.0.4/go.mod h1:ZREaOSaReTvV4nY7Qh6Lkl+QisYXNBWcPRa0gjrIaG4= github.com/raulk/go-watchdog v1.0.1 h1:qgm3DIJAeb+2byneLrQJ7kvmDLGxN2vy3apXyGaDKN4= github.com/raulk/go-watchdog v1.0.1/go.mod h1:lzSbAl5sh4rtI8tYHU01BWIDzgzqaQLj6RcA1i4mlqI= -github.com/raulk/freecache v1.2.0 h1:1HDmZsDi+zvFe1/vMLkdrX9ThWdXQsIp4btrGJ824yc= -github.com/raulk/freecache v1.2.0/go.mod h1:Ixh8xigQnoxRDvSTzugeiLYv35Y/q9neOs90UaPT7N8= -github.com/raulk/go-bs-tests v0.0.3/go.mod h1:ZREaOSaReTvV4nY7Qh6Lkl+QisYXNBWcPRa0gjrIaG4= -github.com/raulk/go-bs-tests v0.0.4/go.mod h1:ZREaOSaReTvV4nY7Qh6Lkl+QisYXNBWcPRa0gjrIaG4= github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0 h1:OdAsTTz6OkFY5QxjkYwrChwuRruF69c169dPK26NUlk= github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= @@ -1517,8 +1503,6 @@ github.com/whyrusleeping/yamux v1.1.5/go.mod h1:E8LnQQ8HKx5KD29HZFUwM1PxCOdPRzGw github.com/x-cray/logrus-prefixed-formatter v0.5.2/go.mod h1:2duySbKsL6M18s5GU7VPsoEPHyzalCE06qoARUCeBBE= github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= github.com/xlab/c-for-go v0.0.0-20201112171043-ea6dce5809cb h1:/7/dQyiKnxAOj9L69FhST7uMe17U015XPzX7cy+5ykM= -github.com/xlab/c-for-go v0.0.0-20200718154222-87b0065af829 h1:wb7xrDzfkLgPHsSEBm+VSx6aDdi64VtV0xvP0E6j8bk= -github.com/xlab/c-for-go v0.0.0-20200718154222-87b0065af829/go.mod h1:h/1PEBwj7Ym/8kOuMWvO2ujZ6Lt+TMbySEXNhjjR87I= github.com/xlab/c-for-go v0.0.0-20201112171043-ea6dce5809cb/go.mod h1:pbNsDSxn1ICiNn9Ct4ZGNrwzfkkwYbx/lw8VuyutFIg= github.com/xlab/pkgconfig v0.0.0-20170226114623-cea12a0fd245 h1:Sw125DKxZhPUI4JLlWugkzsrlB50jR9v2khiD9FxuSo= github.com/xlab/pkgconfig v0.0.0-20170226114623-cea12a0fd245/go.mod h1:C+diUUz7pxhNY6KAoLgrTYARGWnt82zWTylZlxT92vk= @@ -1958,10 +1942,10 @@ howett.net/plist v0.0.0-20181124034731-591f970eefbb/go.mod h1:vMygbs4qMhSZSc4lCU modernc.org/cc v1.0.0 h1:nPibNuDEx6tvYrUAtvDTTw98rx5juGsa5zuDnKwEEQQ= modernc.org/cc v1.0.0/go.mod h1:1Sk4//wdnYJiUIxnW8ddKpaOJCF37yAdqYnkxUpaYxw= modernc.org/fileutil v1.0.0/go.mod h1:JHsWpkrk/CnVV1H/eGlFf85BEpfkrp56ro8nojIq9Q8= -modernc.org/golex v1.0.0/go.mod h1:b/QX9oBD/LhixY6NDh+IdGv17hgB+51fET1i2kPSmvk= -modernc.org/golex v1.0.1 h1:EYKY1a3wStt0RzHaH8mdSRNg78Ub0OHxYfCRWw35YtM= modernc.org/golex v1.0.0 h1:wWpDlbK8ejRfSyi0frMyhilD3JBvtcx2AdGDnU+JtsE= modernc.org/golex v1.0.0/go.mod h1:b/QX9oBD/LhixY6NDh+IdGv17hgB+51fET1i2kPSmvk= +modernc.org/golex v1.0.0/go.mod h1:b/QX9oBD/LhixY6NDh+IdGv17hgB+51fET1i2kPSmvk= +modernc.org/golex v1.0.1 h1:EYKY1a3wStt0RzHaH8mdSRNg78Ub0OHxYfCRWw35YtM= modernc.org/golex v1.0.1/go.mod h1:QCA53QtsT1NdGkaZZkF5ezFwk4IXh4BGNafAARTC254= modernc.org/lex v1.0.0/go.mod h1:G6rxMTy3cH2iA0iXL/HRRv4Znu8MK4higxph/lE7ypk= modernc.org/lexer v1.0.0/go.mod h1:F/Dld0YKYdZCLQ7bD0USbWL4YKCyTDRDHiDTOs0q0vk= From 1b51c10d78396ad9423694ddeb615ae42dd88d01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Kripalani?= Date: Mon, 1 Mar 2021 13:01:11 +0000 Subject: [PATCH 099/148] split off lmdb support to a different branch. --- chain/store/splitstore/liveset.go | 2 - chain/store/splitstore/liveset_lmdb.go | 117 ---------------- chain/store/splitstore/liveset_test.go | 4 - chain/store/splitstore/lmdb_util.go | 27 ---- chain/store/splitstore/snoop.go | 2 - chain/store/splitstore/snoop_lmdb.go | 180 ------------------------- chain/store/splitstore/snoop_test.go | 4 - go.mod | 2 - go.sum | 6 - node/builder.go | 2 - node/modules/blockstore.go | 32 +---- 11 files changed, 1 insertion(+), 377 deletions(-) delete mode 100644 chain/store/splitstore/liveset_lmdb.go delete mode 100644 chain/store/splitstore/lmdb_util.go delete mode 100644 chain/store/splitstore/snoop_lmdb.go diff --git a/chain/store/splitstore/liveset.go b/chain/store/splitstore/liveset.go index 62a5d6913..543c41371 100644 --- a/chain/store/splitstore/liveset.go +++ b/chain/store/splitstore/liveset.go @@ -27,8 +27,6 @@ func NewLiveSetEnv(path string, liveSetType string) (LiveSetEnv, error) { return NewBloomLiveSetEnv() case "bolt": return NewBoltLiveSetEnv(filepath.Join(path, "sweep.bolt")) - case "lmdb": - return NewLMDBLiveSetEnv(filepath.Join(path, "sweep.lmdb")) default: return nil, xerrors.Errorf("unknown live set type %s", liveSetType) } diff --git a/chain/store/splitstore/liveset_lmdb.go b/chain/store/splitstore/liveset_lmdb.go deleted file mode 100644 index e8123f104..000000000 --- a/chain/store/splitstore/liveset_lmdb.go +++ /dev/null @@ -1,117 +0,0 @@ -package splitstore - -import ( - "os" - - "golang.org/x/xerrors" - - cid "github.com/ipfs/go-cid" - "github.com/ledgerwatch/lmdb-go/lmdb" -) - -var LMDBLiveSetMapSize int64 = 1 << 34 // 16G; TODO grow the map dynamically - -type LMDBLiveSetEnv struct { - env *lmdb.Env -} - -var _ LiveSetEnv = (*LMDBLiveSetEnv)(nil) - -type LMDBLiveSet struct { - env *lmdb.Env - db lmdb.DBI -} - -var _ LiveSet = (*LMDBLiveSet)(nil) - -func NewLMDBLiveSetEnv(path string) (*LMDBLiveSetEnv, error) { - env, err := lmdb.NewEnv() - if err != nil { - return nil, xerrors.Errorf("failed to initialize LDMB env: %w", err) - } - if err = env.SetMapSize(LMDBLiveSetMapSize); err != nil { - return nil, xerrors.Errorf("failed to set LMDB map size: %w", err) - } - if err = env.SetMaxDBs(2); err != nil { - return nil, xerrors.Errorf("failed to set LMDB max dbs: %w", err) - } - // if err = env.SetMaxReaders(1); err != nil { - // return nil, xerrors.Errorf("failed to set LMDB max readers: %w", err) - // } - - if st, err := os.Stat(path); os.IsNotExist(err) { - if err := os.MkdirAll(path, 0777); err != nil { - return nil, xerrors.Errorf("failed to create LMDB data directory at %s: %w", path, err) - } - } else if err != nil { - return nil, xerrors.Errorf("failed to stat LMDB data dir: %w", err) - } else if !st.IsDir() { - return nil, xerrors.Errorf("LMDB path is not a directory %s", path) - } - err = env.Open(path, lmdb.NoSync|lmdb.WriteMap|lmdb.MapAsync|lmdb.NoReadahead, 0777) - if err != nil { - env.Close() //nolint:errcheck - return nil, xerrors.Errorf("error opening LMDB database: %w", err) - } - - return &LMDBLiveSetEnv{env: env}, nil -} - -func (e *LMDBLiveSetEnv) NewLiveSet(name string, hint int64) (LiveSet, error) { - return NewLMDBLiveSet(e.env, name+".lmdb") -} - -func (e *LMDBLiveSetEnv) Close() error { - return e.env.Close() -} - -func NewLMDBLiveSet(env *lmdb.Env, name string) (*LMDBLiveSet, error) { - var db lmdb.DBI - err := env.Update(func(txn *lmdb.Txn) (err error) { - db, err = txn.CreateDBI(name) - return - }) - - if err != nil { - return nil, err - } - - return &LMDBLiveSet{env: env, db: db}, nil -} - -func (s *LMDBLiveSet) Mark(cid cid.Cid) error { - return s.env.Update(func(txn *lmdb.Txn) error { - err := txn.Put(s.db, cid.Hash(), markBytes, 0) - if err == nil || lmdb.IsErrno(err, lmdb.KeyExist) { - return nil - } - return err - }) -} - -func (s *LMDBLiveSet) Has(cid cid.Cid) (has bool, err error) { - err = s.env.View(func(txn *lmdb.Txn) error { - txn.RawRead = true - - _, err := txn.Get(s.db, cid.Hash()) - if err != nil { - if lmdb.IsNotFound(err) { - has = false - return nil - } - - return err - } - - has = true - return nil - }) - - return -} - -func (s *LMDBLiveSet) Close() error { - return s.env.Update(func(txn *lmdb.Txn) error { - return txn.Drop(s.db, true) - }) -} diff --git a/chain/store/splitstore/liveset_test.go b/chain/store/splitstore/liveset_test.go index 1d094b976..0a1dd30da 100644 --- a/chain/store/splitstore/liveset_test.go +++ b/chain/store/splitstore/liveset_test.go @@ -8,10 +8,6 @@ import ( "github.com/multiformats/go-multihash" ) -func TestLMDBLiveSet(t *testing.T) { - testLiveSet(t, "lmdb") -} - func TestBoltLiveSet(t *testing.T) { testLiveSet(t, "bolt") } diff --git a/chain/store/splitstore/lmdb_util.go b/chain/store/splitstore/lmdb_util.go deleted file mode 100644 index 5aff2ed0d..000000000 --- a/chain/store/splitstore/lmdb_util.go +++ /dev/null @@ -1,27 +0,0 @@ -package splitstore - -import ( - "math/rand" - "time" - - "golang.org/x/xerrors" - - "github.com/ledgerwatch/lmdb-go/lmdb" -) - -func withMaxReadersRetry(f func() error) error { -retry: - err := f() - if err != nil && lmdb.IsErrno(err, lmdb.ReadersFull) { - dt := time.Microsecond + time.Duration(rand.Intn(int(10*time.Microsecond))) - log.Debugf("MDB_READERS_FULL; retrying operation in %s", dt) - time.Sleep(dt) - goto retry - } - - if err != nil { - return xerrors.Errorf("error performing lmdb operation: %w", err) - } - - return nil -} diff --git a/chain/store/splitstore/snoop.go b/chain/store/splitstore/snoop.go index eeff09e0e..3fdbcf0ac 100644 --- a/chain/store/splitstore/snoop.go +++ b/chain/store/splitstore/snoop.go @@ -24,8 +24,6 @@ func NewTrackingStore(path string, trackingStoreType string) (TrackingStore, err switch trackingStoreType { case "", "bolt": return NewBoltTrackingStore(filepath.Join(path, "snoop.bolt")) - case "lmdb": - return NewLMDBTrackingStore(filepath.Join(path, "snoop.lmdb")) default: return nil, xerrors.Errorf("unknown tracking store type %s", trackingStoreType) } diff --git a/chain/store/splitstore/snoop_lmdb.go b/chain/store/splitstore/snoop_lmdb.go deleted file mode 100644 index 4222c94be..000000000 --- a/chain/store/splitstore/snoop_lmdb.go +++ /dev/null @@ -1,180 +0,0 @@ -package splitstore - -import ( - "os" - - "golang.org/x/xerrors" - - cid "github.com/ipfs/go-cid" - "github.com/ledgerwatch/lmdb-go/lmdb" - - "github.com/filecoin-project/go-state-types/abi" -) - -var LMDBTrackingStoreMapSize int64 = 1 << 34 // 16G -- TODO grow the map dynamically - -type LMDBTrackingStore struct { - env *lmdb.Env - db lmdb.DBI -} - -var _ TrackingStore = (*LMDBTrackingStore)(nil) - -func NewLMDBTrackingStore(path string) (*LMDBTrackingStore, error) { - env, err := lmdb.NewEnv() - if err != nil { - return nil, xerrors.Errorf("failed to initialize LMDB env: %w", err) - } - if err = env.SetMapSize(LMDBTrackingStoreMapSize); err != nil { - return nil, xerrors.Errorf("failed to set LMDB map size: %w", err) - } - if err = env.SetMaxDBs(1); err != nil { - return nil, xerrors.Errorf("failed to set LMDB max dbs: %w", err) - } - - if st, err := os.Stat(path); os.IsNotExist(err) { - if err := os.MkdirAll(path, 0777); err != nil { - return nil, xerrors.Errorf("failed to create LMDB data directory at %s: %w", path, err) - } - } else if err != nil { - return nil, xerrors.Errorf("failed to stat LMDB data dir: %w", err) - } else if !st.IsDir() { - return nil, xerrors.Errorf("LMDB path is not a directory %s", path) - } - - err = env.Open(path, lmdb.NoSync|lmdb.WriteMap|lmdb.MapAsync|lmdb.NoReadahead, 0777) - if err != nil { - env.Close() //nolint:errcheck - return nil, xerrors.Errorf("error opening LMDB database: %w", err) - } - - s := new(LMDBTrackingStore) - s.env = env - err = env.Update(func(txn *lmdb.Txn) (err error) { - s.db, err = txn.CreateDBI("snoop") - return err - }) - - if err != nil { - return nil, xerrors.Errorf("error creating tracking store: %w", err) - } - - return s, nil -} - -func (s *LMDBTrackingStore) Put(cid cid.Cid, epoch abi.ChainEpoch) error { - val := epochToBytes(epoch) - return withMaxReadersRetry( - func() error { - return s.env.Update(func(txn *lmdb.Txn) error { - err := txn.Put(s.db, cid.Hash(), val, 0) - if err == nil || lmdb.IsErrno(err, lmdb.KeyExist) { - return nil - } - return err - }) - }) -} - -func (s *LMDBTrackingStore) PutBatch(cids []cid.Cid, epoch abi.ChainEpoch) error { - val := epochToBytes(epoch) - return withMaxReadersRetry( - func() error { - return s.env.Update(func(txn *lmdb.Txn) error { - for _, cid := range cids { - err := txn.Put(s.db, cid.Hash(), val, 0) - if err == nil || lmdb.IsErrno(err, lmdb.KeyExist) { - continue - } - return err - } - - return nil - }) - }) -} - -func (s *LMDBTrackingStore) Get(cid cid.Cid) (epoch abi.ChainEpoch, err error) { - err = withMaxReadersRetry( - func() error { - return s.env.View(func(txn *lmdb.Txn) error { - txn.RawRead = true - - val, err := txn.Get(s.db, cid.Hash()) - if err != nil { - return err - } - - epoch = bytesToEpoch(val) - return nil - }) - }) - - return -} - -func (s *LMDBTrackingStore) Delete(cid cid.Cid) error { - return withMaxReadersRetry( - func() error { - return s.env.Update(func(txn *lmdb.Txn) error { - return txn.Del(s.db, cid.Hash(), nil) - }) - }) -} - -func (s *LMDBTrackingStore) DeleteBatch(cids map[cid.Cid]struct{}) error { - return withMaxReadersRetry( - func() error { - return s.env.Update(func(txn *lmdb.Txn) error { - for cid := range cids { - err := txn.Del(s.db, cid.Hash(), nil) - if err != nil { - return err - } - } - return nil - }) - }) -} - -func (s *LMDBTrackingStore) ForEach(f func(cid.Cid, abi.ChainEpoch) error) error { - return withMaxReadersRetry( - func() error { - return s.env.View(func(txn *lmdb.Txn) error { - txn.RawRead = true - cur, err := txn.OpenCursor(s.db) - if err != nil { - return err - } - defer cur.Close() - - for { - k, v, err := cur.Get(nil, nil, lmdb.Next) - if err != nil { - if lmdb.IsNotFound(err) { - return nil - } - - return err - } - - cid := cid.NewCidV1(cid.Raw, k) - epoch := bytesToEpoch(v) - - err = f(cid, epoch) - if err != nil { - return err - } - } - }) - }) -} - -func (s *LMDBTrackingStore) Sync() error { - return nil -} - -func (s *LMDBTrackingStore) Close() error { - s.env.CloseDBI(s.db) - return s.env.Close() -} diff --git a/chain/store/splitstore/snoop_test.go b/chain/store/splitstore/snoop_test.go index b13953a4d..3d5d2ff64 100644 --- a/chain/store/splitstore/snoop_test.go +++ b/chain/store/splitstore/snoop_test.go @@ -10,10 +10,6 @@ import ( "github.com/filecoin-project/go-state-types/abi" ) -func TestLMDBTrackingStore(t *testing.T) { - testTrackingStore(t, "lmdb") -} - func TestBoltTrackingStore(t *testing.T) { testTrackingStore(t, "bolt") } diff --git a/go.mod b/go.mod index d3142a530..afa241977 100644 --- a/go.mod +++ b/go.mod @@ -28,7 +28,6 @@ require ( github.com/filecoin-project/go-address v0.0.5 github.com/filecoin-project/go-amt-ipld/v2 v2.1.1-0.20201006184820-924ee87a1349 // indirect github.com/filecoin-project/go-bitfield v0.2.4 - github.com/filecoin-project/go-bs-lmdb v1.0.3 github.com/filecoin-project/go-cbor-util v0.0.0-20191219014500-08c40a1e63a2 github.com/filecoin-project/go-commp-utils v0.0.0-20201119054358-b88f7a96a434 github.com/filecoin-project/go-crypto v0.0.0-20191218222705-effae4ea9f03 @@ -96,7 +95,6 @@ require ( github.com/ipld/go-car v0.1.1-0.20201119040415-11b6074b6d4d github.com/ipld/go-ipld-prime v0.5.1-0.20201021195245-109253e8a018 github.com/kelseyhightower/envconfig v1.4.0 - github.com/ledgerwatch/lmdb-go v1.17.4 github.com/lib/pq v1.7.0 github.com/libp2p/go-buffer-pool v0.0.2 github.com/libp2p/go-eventbus v0.2.1 diff --git a/go.sum b/go.sum index a1da7a182..b89cee2e5 100644 --- a/go.sum +++ b/go.sum @@ -246,8 +246,6 @@ github.com/filecoin-project/go-bitfield v0.2.3/go.mod h1:CNl9WG8hgR5mttCnUErjcQj github.com/filecoin-project/go-bitfield v0.2.3/go.mod h1:CNl9WG8hgR5mttCnUErjcQjGvuiZjRqK9rHVBsQF4oM= github.com/filecoin-project/go-bitfield v0.2.4 h1:uZ7MeE+XfM5lqrHJZ93OnhQKc/rveW8p9au0C68JPgk= github.com/filecoin-project/go-bitfield v0.2.4/go.mod h1:CNl9WG8hgR5mttCnUErjcQjGvuiZjRqK9rHVBsQF4oM= -github.com/filecoin-project/go-bs-lmdb v1.0.3 h1:QRf/yMw5hFjqMIpi9mi/Hkh4qberUI++56XAdB0VgwM= -github.com/filecoin-project/go-bs-lmdb v1.0.3/go.mod h1:peFIZ9XEE9OLFkCzi7FMlr84UexqVKj6+AyxZD5SiGs= github.com/filecoin-project/go-cbor-util v0.0.0-20191219014500-08c40a1e63a2 h1:av5fw6wmm58FYMgJeoB/lK9XXrgdugYiTqkdxjTy9k8= github.com/filecoin-project/go-cbor-util v0.0.0-20191219014500-08c40a1e63a2/go.mod h1:pqTiPHobNkOVM5thSRsHYjyQfq7O5QSCMhvuu9JoDlg= github.com/filecoin-project/go-commp-utils v0.0.0-20201119054358-b88f7a96a434 h1:0kHszkYP3hgApcjl5x4rpwONhN9+j7XDobf6at5XfHs= @@ -756,8 +754,6 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/pty v1.1.3/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= -github.com/ledgerwatch/lmdb-go v1.17.4 h1:dDgPXUrzFWG/EB3RwOKZ+P3XGAlbsZxmVahjc+qWwyA= -github.com/ledgerwatch/lmdb-go v1.17.4/go.mod h1:NKRpCxksoTQPyxsUcBiVOe0135uqnJsnf6cElxmOL0o= github.com/lib/pq v1.7.0 h1:h93mCPfUSkaul3Ka/VG8uZdmW1uMHDGxzu0NWHuJmHY= github.com/lib/pq v1.7.0/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= github.com/libp2p/go-addr-util v0.0.1/go.mod h1:4ac6O7n9rIAKB1dnd+s8IbbMXkt+oBpzX4/+RACcnlQ= @@ -1323,8 +1319,6 @@ github.com/prometheus/procfs v0.1.0 h1:jhMy6QXfi3y2HEzFoyuCj40z4OZIIHHPtFyCMftmv github.com/prometheus/procfs v0.1.0/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU= github.com/raulk/clock v1.1.0 h1:dpb29+UKMbLqiU/jqIJptgLR1nn23HLgMY0sTCDza5Y= github.com/raulk/clock v1.1.0/go.mod h1:3MpVxdZ/ODBQDxbN+kzshf5OSZwPjtMDx6BBXBmOeY0= -github.com/raulk/go-bs-tests v0.0.4 h1:gYUYmIFMBnp2mtZQuiP/ZGtSTSPvmDBjWBz0xTZz4X8= -github.com/raulk/go-bs-tests v0.0.4/go.mod h1:ZREaOSaReTvV4nY7Qh6Lkl+QisYXNBWcPRa0gjrIaG4= github.com/raulk/go-watchdog v1.0.1 h1:qgm3DIJAeb+2byneLrQJ7kvmDLGxN2vy3apXyGaDKN4= github.com/raulk/go-watchdog v1.0.1/go.mod h1:lzSbAl5sh4rtI8tYHU01BWIDzgzqaQLj6RcA1i4mlqI= github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= diff --git a/node/builder.go b/node/builder.go index c92ab1248..5f7e872a4 100644 --- a/node/builder.go +++ b/node/builder.go @@ -613,8 +613,6 @@ func Repo(r repo.Repo) Option { If(cfg.EnableSplitstore, If(cfg.Splitstore.GetHotStoreType() == "badger", Override(new(dtypes.HotBlockstore), modules.BadgerHotBlockstore)), - If(cfg.Splitstore.GetHotStoreType() == "lmdb", - Override(new(dtypes.HotBlockstore), modules.LMDBHotBlockstore)), Override(new(dtypes.SplitBlockstore), modules.SplitBlockstore(cfg)), Override(new(dtypes.ChainBlockstore), modules.ChainSplitBlockstore), Override(new(dtypes.StateBlockstore), modules.StateSplitBlockstore), diff --git a/node/modules/blockstore.go b/node/modules/blockstore.go index 7d8e713ba..0ab01a0ec 100644 --- a/node/modules/blockstore.go +++ b/node/modules/blockstore.go @@ -5,15 +5,13 @@ import ( "io" "os" "path/filepath" - "time" - lmdbbs "github.com/filecoin-project/go-bs-lmdb" - badgerbs "github.com/filecoin-project/lotus/blockstore/badger" bstore "github.com/ipfs/go-ipfs-blockstore" "go.uber.org/fx" "golang.org/x/xerrors" "github.com/filecoin-project/lotus/blockstore" + badgerbs "github.com/filecoin-project/lotus/blockstore/badger" "github.com/filecoin-project/lotus/chain/store/splitstore" "github.com/filecoin-project/lotus/node/config" "github.com/filecoin-project/lotus/node/modules/dtypes" @@ -38,34 +36,6 @@ func UniversalBlockstore(lc fx.Lifecycle, mctx helpers.MetricsCtx, r repo.Locked return bs, err } -func LMDBHotBlockstore(lc fx.Lifecycle, r repo.LockedRepo) (dtypes.HotBlockstore, error) { - path, err := r.SplitstorePath() - if err != nil { - return nil, err - } - - path = filepath.Join(path, "hot.lmdb") - bs, err := lmdbbs.Open(&lmdbbs.Options{ - Path: path, - InitialMmapSize: 4 << 30, // 4GiB. - MmapGrowthStepFactor: 1.25, // scale slower than the default of 1.5 - MmapGrowthStepMax: 4 << 30, // 4GiB - RetryDelay: 10 * time.Microsecond, - MaxReaders: 1024, - }) - if err != nil { - return nil, err - } - - lc.Append(fx.Hook{ - OnStop: func(_ context.Context) error { - return bs.Close() - }}) - - hot := blockstore.WrapIDStore(bs) - return hot, err -} - func BadgerHotBlockstore(lc fx.Lifecycle, r repo.LockedRepo) (dtypes.HotBlockstore, error) { path, err := r.SplitstorePath() if err != nil { From 1a804fbdec9abc2b2a6a24ac9dffd1b37c91dd41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Kripalani?= Date: Mon, 1 Mar 2021 16:15:39 +0000 Subject: [PATCH 100/148] move splitstore into blockstore package. --- {chain/store => blockstore}/splitstore/liveset.go | 0 {chain/store => blockstore}/splitstore/liveset_bloom.go | 0 {chain/store => blockstore}/splitstore/liveset_bolt.go | 0 {chain/store => blockstore}/splitstore/liveset_test.go | 0 {chain/store => blockstore}/splitstore/splitstore.go | 1 + .../splitstore/snoop.go => blockstore/splitstore/tracking.go | 0 .../snoop_bolt.go => blockstore/splitstore/tracking_bolt.go | 0 .../snoop_test.go => blockstore/splitstore/tracking_test.go | 0 node/modules/blockstore.go | 2 +- node/modules/chain.go | 2 +- 10 files changed, 3 insertions(+), 2 deletions(-) rename {chain/store => blockstore}/splitstore/liveset.go (100%) rename {chain/store => blockstore}/splitstore/liveset_bloom.go (100%) rename {chain/store => blockstore}/splitstore/liveset_bolt.go (100%) rename {chain/store => blockstore}/splitstore/liveset_test.go (100%) rename {chain/store => blockstore}/splitstore/splitstore.go (99%) rename chain/store/splitstore/snoop.go => blockstore/splitstore/tracking.go (100%) rename chain/store/splitstore/snoop_bolt.go => blockstore/splitstore/tracking_bolt.go (100%) rename chain/store/splitstore/snoop_test.go => blockstore/splitstore/tracking_test.go (100%) diff --git a/chain/store/splitstore/liveset.go b/blockstore/splitstore/liveset.go similarity index 100% rename from chain/store/splitstore/liveset.go rename to blockstore/splitstore/liveset.go diff --git a/chain/store/splitstore/liveset_bloom.go b/blockstore/splitstore/liveset_bloom.go similarity index 100% rename from chain/store/splitstore/liveset_bloom.go rename to blockstore/splitstore/liveset_bloom.go diff --git a/chain/store/splitstore/liveset_bolt.go b/blockstore/splitstore/liveset_bolt.go similarity index 100% rename from chain/store/splitstore/liveset_bolt.go rename to blockstore/splitstore/liveset_bolt.go diff --git a/chain/store/splitstore/liveset_test.go b/blockstore/splitstore/liveset_test.go similarity index 100% rename from chain/store/splitstore/liveset_test.go rename to blockstore/splitstore/liveset_test.go diff --git a/chain/store/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go similarity index 99% rename from chain/store/splitstore/splitstore.go rename to blockstore/splitstore/splitstore.go index db35357ba..56283eb92 100644 --- a/chain/store/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -16,6 +16,7 @@ import ( logging "github.com/ipfs/go-log/v2" "github.com/filecoin-project/go-state-types/abi" + bstore "github.com/filecoin-project/lotus/blockstore" "github.com/filecoin-project/lotus/build" "github.com/filecoin-project/lotus/chain/store" diff --git a/chain/store/splitstore/snoop.go b/blockstore/splitstore/tracking.go similarity index 100% rename from chain/store/splitstore/snoop.go rename to blockstore/splitstore/tracking.go diff --git a/chain/store/splitstore/snoop_bolt.go b/blockstore/splitstore/tracking_bolt.go similarity index 100% rename from chain/store/splitstore/snoop_bolt.go rename to blockstore/splitstore/tracking_bolt.go diff --git a/chain/store/splitstore/snoop_test.go b/blockstore/splitstore/tracking_test.go similarity index 100% rename from chain/store/splitstore/snoop_test.go rename to blockstore/splitstore/tracking_test.go diff --git a/node/modules/blockstore.go b/node/modules/blockstore.go index 0ab01a0ec..92abc6d6d 100644 --- a/node/modules/blockstore.go +++ b/node/modules/blockstore.go @@ -12,7 +12,7 @@ import ( "github.com/filecoin-project/lotus/blockstore" badgerbs "github.com/filecoin-project/lotus/blockstore/badger" - "github.com/filecoin-project/lotus/chain/store/splitstore" + "github.com/filecoin-project/lotus/blockstore/splitstore" "github.com/filecoin-project/lotus/node/config" "github.com/filecoin-project/lotus/node/modules/dtypes" "github.com/filecoin-project/lotus/node/modules/helpers" diff --git a/node/modules/chain.go b/node/modules/chain.go index a59418688..4ca60bbea 100644 --- a/node/modules/chain.go +++ b/node/modules/chain.go @@ -14,6 +14,7 @@ import ( "golang.org/x/xerrors" "github.com/filecoin-project/lotus/blockstore" + "github.com/filecoin-project/lotus/blockstore/splitstore" "github.com/filecoin-project/lotus/build" "github.com/filecoin-project/lotus/chain" "github.com/filecoin-project/lotus/chain/beacon" @@ -22,7 +23,6 @@ import ( "github.com/filecoin-project/lotus/chain/messagepool" "github.com/filecoin-project/lotus/chain/stmgr" "github.com/filecoin-project/lotus/chain/store" - "github.com/filecoin-project/lotus/chain/store/splitstore" "github.com/filecoin-project/lotus/chain/vm" "github.com/filecoin-project/lotus/extern/sector-storage/ffiwrapper" "github.com/filecoin-project/lotus/journal" From cb36d5b6a461731b9f5e929a40209b1a3269d295 Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 1 Mar 2021 18:41:51 +0200 Subject: [PATCH 101/148] warm up splitstore at first head change notification --- blockstore/splitstore/splitstore.go | 87 +++++++++++++++++++++++++++-- 1 file changed, 83 insertions(+), 4 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 56283eb92..6d6e8aba9 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -29,7 +29,10 @@ var ( CompactionCold = build.Finality ) -var baseEpochKey = dstore.NewKey("baseEpoch") +var ( + baseEpochKey = dstore.NewKey("/splitstore/baseEpoch") + warmupEpochKey = dstore.NewKey("/splitstore/warmupEpoch") +) var log = logging.Logger("splitstore") @@ -64,7 +67,8 @@ type SplitStore struct { skipOldMsgs bool skipMsgReceipts bool - baseEpoch abi.ChainEpoch + baseEpoch abi.ChainEpoch + warmupEpoch abi.ChainEpoch mx sync.Mutex curTs *types.TipSet @@ -275,11 +279,25 @@ func (s *SplitStore) Start(cs *store.ChainStore) error { err = s.setBaseEpoch(s.curTs.Height()) if err != nil { - return err + return xerrors.Errorf("error saving base epoch: %w", err) } default: - return err + return xerrors.Errorf("error loading base epoch: %w", err) + } + + // load warmup epoch from metadata ds + // if none, then the splitstore will warm up the hotstore at first head change notif + // by walking the current tipset + bs, err = s.ds.Get(warmupEpochKey) + switch err { + case nil: + s.warmupEpoch = bytesToEpoch(bs) + + case dstore.ErrNotFound: + + default: + return xerrors.Errorf("error loading warmup epoch: %w", err) } // watch the chain @@ -310,7 +328,25 @@ func (s *SplitStore) HeadChange(revert, apply []*types.TipSet) error { return nil } + if s.warmupEpoch == 0 { + // splitstore needs to warm up + s.warmupEpoch = epoch + go func() { + defer atomic.StoreInt32(&s.compacting, 0) + + log.Info("warming up hotstore") + start := time.Now() + + s.warmup() + + log.Infow("warm up done", "took", time.Since(start)) + }() + + return nil + } + if epoch-s.baseEpoch > CompactionThreshold { + // it's time to compact go func() { defer atomic.StoreInt32(&s.compacting, 0) @@ -329,6 +365,49 @@ func (s *SplitStore) HeadChange(revert, apply []*types.TipSet) error { return nil } +func (s *SplitStore) warmup() { + s.mx.Lock() + curTs := s.curTs + epoch := curTs.Height() + s.mx.Unlock() + + err := s.cs.WalkSnapshot(context.Background(), curTs, 1, s.skipOldMsgs, s.skipMsgReceipts, + func(cid cid.Cid) error { + has, err := s.hot.Has(cid) + if err != nil { + return err + } + + if has { + return nil + } + + blk, err := s.cold.Get(cid) + if err != nil { + return err + } + + err = s.snoop.Put(cid, epoch) + if err != nil { + return err + } + + return s.hot.Put(blk) + }) + + if err != nil { + log.Errorf("error warming up splitstore: %s", err) + return + } + + // save the warmup epoch + s.warmupEpoch = epoch + err = s.ds.Put(warmupEpochKey, epochToBytes(epoch)) + if err != nil { + log.Errorf("error saving warmup epoch: %s", err) + } +} + // Compaction/GC Algorithm func (s *SplitStore) compact() { if s.liveSetSize == 0 { From 748dd962d8d1969a96632916fe841130d46af7a8 Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 1 Mar 2021 18:47:47 +0200 Subject: [PATCH 102/148] snake current tipset from head change notification --- blockstore/splitstore/splitstore.go | 41 +++++++++-------------------- 1 file changed, 13 insertions(+), 28 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 6d6e8aba9..7afa46e3b 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -319,8 +319,9 @@ func (s *SplitStore) Close() error { func (s *SplitStore) HeadChange(revert, apply []*types.TipSet) error { s.mx.Lock() - s.curTs = apply[len(apply)-1] - epoch := s.curTs.Height() + curTs := apply[len(apply)-1] + epoch := curTs.Height() + s.curTs = curTs s.mx.Unlock() if !atomic.CompareAndSwapInt32(&s.compacting, 0, 1) { @@ -330,14 +331,13 @@ func (s *SplitStore) HeadChange(revert, apply []*types.TipSet) error { if s.warmupEpoch == 0 { // splitstore needs to warm up - s.warmupEpoch = epoch go func() { defer atomic.StoreInt32(&s.compacting, 0) log.Info("warming up hotstore") start := time.Now() - s.warmup() + s.warmup(curTs) log.Infow("warm up done", "took", time.Since(start)) }() @@ -353,7 +353,7 @@ func (s *SplitStore) HeadChange(revert, apply []*types.TipSet) error { log.Info("compacting splitstore") start := time.Now() - s.compact() + s.compact(curTs) log.Infow("compaction done", "took", time.Since(start)) }() @@ -365,11 +365,8 @@ func (s *SplitStore) HeadChange(revert, apply []*types.TipSet) error { return nil } -func (s *SplitStore) warmup() { - s.mx.Lock() - curTs := s.curTs +func (s *SplitStore) warmup(curTs *types.TipSet) { epoch := curTs.Height() - s.mx.Unlock() err := s.cs.WalkSnapshot(context.Background(), curTs, 1, s.skipOldMsgs, s.skipMsgReceipts, func(cid cid.Cid) error { @@ -409,28 +406,24 @@ func (s *SplitStore) warmup() { } // Compaction/GC Algorithm -func (s *SplitStore) compact() { +func (s *SplitStore) compact(curTs *types.TipSet) { if s.liveSetSize == 0 { start := time.Now() log.Info("estimating live set size") - s.estimateLiveSetSize() + s.estimateLiveSetSize(curTs) log.Infow("estimating live set size done", "took", time.Since(start), "size", s.liveSetSize) } else { log.Infow("current live set size estimate", "size", s.liveSetSize) } if s.fullCompaction { - s.compactFull() + s.compactFull(curTs) } else { - s.compactSimple() + s.compactSimple(curTs) } } -func (s *SplitStore) estimateLiveSetSize() { - s.mx.Lock() - curTs := s.curTs - s.mx.Unlock() - +func (s *SplitStore) estimateLiveSetSize(curTs *types.TipSet) { s.liveSetSize = 0 err := s.cs.WalkSnapshot(context.Background(), curTs, 1, s.skipOldMsgs, s.skipMsgReceipts, func(cid cid.Cid) error { @@ -444,11 +437,7 @@ func (s *SplitStore) estimateLiveSetSize() { } } -func (s *SplitStore) compactSimple() { - s.mx.Lock() - curTs := s.curTs - s.mx.Unlock() - +func (s *SplitStore) compactSimple(curTs *types.TipSet) { coldEpoch := s.baseEpoch + CompactionCold log.Infow("running simple compaction", "currentEpoch", curTs.Height(), "baseEpoch", s.baseEpoch, "coldEpoch", coldEpoch) @@ -622,11 +611,7 @@ func (s *SplitStore) compactSimple() { } } -func (s *SplitStore) compactFull() { - s.mx.Lock() - curTs := s.curTs - s.mx.Unlock() - +func (s *SplitStore) compactFull(curTs *types.TipSet) { epoch := curTs.Height() coldEpoch := s.baseEpoch + CompactionCold From e612fff1fed3c1ad23ac65112a20c13767d5e70d Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 1 Mar 2021 18:49:20 +0200 Subject: [PATCH 103/148] also estimate liveset size during warm up --- blockstore/splitstore/splitstore.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 7afa46e3b..f9cd12515 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -368,8 +368,11 @@ func (s *SplitStore) HeadChange(revert, apply []*types.TipSet) error { func (s *SplitStore) warmup(curTs *types.TipSet) { epoch := curTs.Height() + count := int64(0) err := s.cs.WalkSnapshot(context.Background(), curTs, 1, s.skipOldMsgs, s.skipMsgReceipts, func(cid cid.Cid) error { + count++ + has, err := s.hot.Has(cid) if err != nil { return err @@ -397,6 +400,10 @@ func (s *SplitStore) warmup(curTs *types.TipSet) { return } + if count > s.liveSetSize { + s.liveSetSize = count + } + // save the warmup epoch s.warmupEpoch = epoch err = s.ds.Put(warmupEpochKey, epochToBytes(epoch)) From b9400c590f9204fc29919cce506af7bbacbb9052 Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 1 Mar 2021 18:50:09 +0200 Subject: [PATCH 104/148] use crypto/rand for bloom salt --- blockstore/splitstore/liveset_bloom.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/blockstore/splitstore/liveset_bloom.go b/blockstore/splitstore/liveset_bloom.go index 57335e624..110dc6b81 100644 --- a/blockstore/splitstore/liveset_bloom.go +++ b/blockstore/splitstore/liveset_bloom.go @@ -1,7 +1,7 @@ package splitstore import ( - "math/rand" + "crypto/rand" "golang.org/x/xerrors" @@ -37,7 +37,7 @@ func (e *BloomLiveSetEnv) NewLiveSet(name string, sizeHint int64) (LiveSet, erro } salt := make([]byte, 4) - _, err := rand.Read(salt) //nolint + _, err := rand.Read(salt) if err != nil { return nil, xerrors.Errorf("error reading salt: %w", err) } From b1b452bc0fff6df61b43d564effc624d7aa1c0ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Kripalani?= Date: Mon, 1 Mar 2021 17:38:02 +0000 Subject: [PATCH 105/148] remove dependency from blockstore/splitstore => chain/store. --- blockstore/splitstore/splitstore.go | 34 ++++++++++++++++++----------- chain/store/store.go | 2 +- node/modules/blockstore.go | 11 +++++----- node/modules/chain.go | 11 +++++++--- 4 files changed, 36 insertions(+), 22 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index f9cd12515..bed879aaf 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -19,7 +19,6 @@ import ( bstore "github.com/filecoin-project/lotus/blockstore" "github.com/filecoin-project/lotus/build" - "github.com/filecoin-project/lotus/chain/store" "github.com/filecoin-project/lotus/chain/types" ) @@ -59,6 +58,15 @@ type Config struct { Archival bool } +// ChainAccessor allows the Splitstore to access the chain. It will most likely +// be a ChainStore at runtime. +type ChainAccessor interface { + GetTipsetByHeight(context.Context, abi.ChainEpoch, *types.TipSet, bool) (*types.TipSet, error) + GetHeaviestTipSet() *types.TipSet + SubscribeHeadChanges(change func(revert []*types.TipSet, apply []*types.TipSet) error) + WalkSnapshot(context.Context, *types.TipSet, abi.ChainEpoch, bool, bool, func(cid.Cid) error) error +} + type SplitStore struct { compacting int32 @@ -73,7 +81,7 @@ type SplitStore struct { mx sync.Mutex curTs *types.TipSet - cs *store.ChainStore + chain ChainAccessor ds dstore.Datastore hot bstore.Blockstore cold bstore.Blockstore @@ -260,9 +268,9 @@ func (s *SplitStore) View(cid cid.Cid, cb func([]byte) error) error { } // State tracking -func (s *SplitStore) Start(cs *store.ChainStore) error { - s.cs = cs - s.curTs = cs.GetHeaviestTipSet() +func (s *SplitStore) Start(chain ChainAccessor) error { + s.chain = chain + s.curTs = chain.GetHeaviestTipSet() // load base epoch from metadata ds // if none, then use current epoch because it's a fresh start @@ -301,7 +309,7 @@ func (s *SplitStore) Start(cs *store.ChainStore) error { } // watch the chain - cs.SubscribeHeadChanges(s.HeadChange) + chain.SubscribeHeadChanges(s.HeadChange) return nil } @@ -317,7 +325,7 @@ func (s *SplitStore) Close() error { return s.env.Close() } -func (s *SplitStore) HeadChange(revert, apply []*types.TipSet) error { +func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error { s.mx.Lock() curTs := apply[len(apply)-1] epoch := curTs.Height() @@ -432,7 +440,7 @@ func (s *SplitStore) compact(curTs *types.TipSet) { func (s *SplitStore) estimateLiveSetSize(curTs *types.TipSet) { s.liveSetSize = 0 - err := s.cs.WalkSnapshot(context.Background(), curTs, 1, s.skipOldMsgs, s.skipMsgReceipts, + err := s.chain.WalkSnapshot(context.Background(), curTs, 1, s.skipOldMsgs, s.skipMsgReceipts, func(cid cid.Cid) error { s.liveSetSize++ return nil @@ -460,14 +468,14 @@ func (s *SplitStore) compactSimple(curTs *types.TipSet) { log.Info("marking reachable cold objects") startMark := time.Now() - coldTs, err := s.cs.GetTipsetByHeight(context.Background(), coldEpoch, curTs, true) + coldTs, err := s.chain.GetTipsetByHeight(context.Background(), coldEpoch, curTs, true) if err != nil { // TODO do something better here panic(err) } count := int64(0) - err = s.cs.WalkSnapshot(context.Background(), coldTs, 1, s.skipOldMsgs, s.skipMsgReceipts, + err = s.chain.WalkSnapshot(context.Background(), coldTs, 1, s.skipOldMsgs, s.skipMsgReceipts, func(cid cid.Cid) error { count++ return coldSet.Mark(cid) @@ -646,7 +654,7 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) { // Phase 1a: mark all reachable CIDs in the hot range count := int64(0) - err = s.cs.WalkSnapshot(context.Background(), curTs, epoch-coldEpoch, s.skipOldMsgs, s.skipMsgReceipts, + err = s.chain.WalkSnapshot(context.Background(), curTs, epoch-coldEpoch, s.skipOldMsgs, s.skipMsgReceipts, func(cid cid.Cid) error { count++ return hotSet.Mark(cid) @@ -662,14 +670,14 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) { } // Phase 1b: mark all reachable CIDs in the cold range - coldTs, err := s.cs.GetTipsetByHeight(context.Background(), coldEpoch, curTs, true) + coldTs, err := s.chain.GetTipsetByHeight(context.Background(), coldEpoch, curTs, true) if err != nil { // TODO do something better here panic(err) } count = 0 - err = s.cs.WalkSnapshot(context.Background(), coldTs, CompactionCold, s.skipOldMsgs, s.skipMsgReceipts, + err = s.chain.WalkSnapshot(context.Background(), coldTs, CompactionCold, s.skipOldMsgs, s.skipMsgReceipts, func(cid cid.Cid) error { count++ return coldSet.Mark(cid) diff --git a/chain/store/store.go b/chain/store/store.go index e0660495e..e0d71f030 100644 --- a/chain/store/store.go +++ b/chain/store/store.go @@ -81,7 +81,7 @@ func init() { } // ReorgNotifee represents a callback that gets called upon reorgs. -type ReorgNotifee func(rev, app []*types.TipSet) error +type ReorgNotifee = func(rev, app []*types.TipSet) error // Journal event types. const ( diff --git a/node/modules/blockstore.go b/node/modules/blockstore.go index 92abc6d6d..c79553490 100644 --- a/node/modules/blockstore.go +++ b/node/modules/blockstore.go @@ -20,7 +20,8 @@ import ( ) // UniversalBlockstore returns a single universal blockstore that stores both -// chain data and state data. +// chain data and state data. It can be backed by a blockstore directly +// (e.g. Badger), or by a Splitstore. func UniversalBlockstore(lc fx.Lifecycle, mctx helpers.MetricsCtx, r repo.LockedRepo) (dtypes.UniversalBlockstore, error) { bs, err := r.Blockstore(helpers.LifecycleCtx(mctx, lc), repo.UniversalBlockstore) if err != nil { @@ -94,19 +95,19 @@ func SplitBlockstore(cfg *config.Blockstore) func(lc fx.Lifecycle, r repo.Locked } } -func StateFlatBlockstore(lc fx.Lifecycle, mctx helpers.MetricsCtx, bs dtypes.UniversalBlockstore) (dtypes.StateBlockstore, error) { +func StateFlatBlockstore(_ fx.Lifecycle, _ helpers.MetricsCtx, bs dtypes.UniversalBlockstore) (dtypes.StateBlockstore, error) { return bs, nil } -func StateSplitBlockstore(lc fx.Lifecycle, mctx helpers.MetricsCtx, bs dtypes.SplitBlockstore) (dtypes.StateBlockstore, error) { +func StateSplitBlockstore(_ fx.Lifecycle, _ helpers.MetricsCtx, bs dtypes.SplitBlockstore) (dtypes.StateBlockstore, error) { return bs, nil } -func ChainFlatBlockstore(lc fx.Lifecycle, mctx helpers.MetricsCtx, bs dtypes.UniversalBlockstore) (dtypes.ChainBlockstore, error) { +func ChainFlatBlockstore(_ fx.Lifecycle, _ helpers.MetricsCtx, bs dtypes.UniversalBlockstore) (dtypes.ChainBlockstore, error) { return bs, nil } -func ChainSplitBlockstore(lc fx.Lifecycle, mctx helpers.MetricsCtx, bs dtypes.SplitBlockstore) (dtypes.ChainBlockstore, error) { +func ChainSplitBlockstore(_ fx.Lifecycle, _ helpers.MetricsCtx, bs dtypes.SplitBlockstore) (dtypes.ChainBlockstore, error) { return bs, nil } diff --git a/node/modules/chain.go b/node/modules/chain.go index 4ca60bbea..ffdf3aa3a 100644 --- a/node/modules/chain.go +++ b/node/modules/chain.go @@ -80,14 +80,19 @@ func ChainStore(lc fx.Lifecycle, cbs dtypes.ChainBlockstore, sbs dtypes.StateBlo log.Warnf("loading chain state from disk: %s", err) } + var startHook func(context.Context) error if ss, ok := basebs.(*splitstore.SplitStore); ok { - err := ss.Start(chain) - if err != nil { - log.Errorf("error starting splitstore: %s", err) + startHook = func(_ context.Context) error { + err := ss.Start(chain) + if err != nil { + err = xerrors.Errorf("error starting splitstore: %w", err) + } + return err } } lc.Append(fx.Hook{ + OnStart: startHook, OnStop: func(_ context.Context) error { return chain.Close() }, From 8cfba5b092cddbcd91d5761a67ea7a367b366097 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Kripalani?= Date: Mon, 1 Mar 2021 17:39:00 +0000 Subject: [PATCH 106/148] renames and polish. --- blockstore/splitstore/liveset.go | 4 ++-- blockstore/splitstore/liveset_bloom.go | 2 +- blockstore/splitstore/liveset_test.go | 2 +- blockstore/splitstore/splitstore.go | 23 +++++++++++------------ blockstore/splitstore/tracking.go | 4 ++-- blockstore/splitstore/tracking_bolt.go | 14 +++++++------- blockstore/splitstore/tracking_test.go | 4 ++-- node/modules/blockstore.go | 16 ++++++++-------- 8 files changed, 34 insertions(+), 35 deletions(-) diff --git a/blockstore/splitstore/liveset.go b/blockstore/splitstore/liveset.go index 543c41371..2678d16d9 100644 --- a/blockstore/splitstore/liveset.go +++ b/blockstore/splitstore/liveset.go @@ -14,14 +14,14 @@ type LiveSet interface { Close() error } -var markBytes = []byte{} +var markBytes []byte type LiveSetEnv interface { NewLiveSet(name string, sizeHint int64) (LiveSet, error) Close() error } -func NewLiveSetEnv(path string, liveSetType string) (LiveSetEnv, error) { +func OpenLiveSetEnv(path string, liveSetType string) (LiveSetEnv, error) { switch liveSetType { case "", "bloom": return NewBloomLiveSetEnv() diff --git a/blockstore/splitstore/liveset_bloom.go b/blockstore/splitstore/liveset_bloom.go index 110dc6b81..06dc2e1f7 100644 --- a/blockstore/splitstore/liveset_bloom.go +++ b/blockstore/splitstore/liveset_bloom.go @@ -42,7 +42,7 @@ func (e *BloomLiveSetEnv) NewLiveSet(name string, sizeHint int64) (LiveSet, erro return nil, xerrors.Errorf("error reading salt: %w", err) } - bf, err := bbloom.New(float64(size), float64(BloomFilterProbability)) + bf, err := bbloom.New(float64(size), BloomFilterProbability) if err != nil { return nil, xerrors.Errorf("error creating bloom filter: %w", err) } diff --git a/blockstore/splitstore/liveset_test.go b/blockstore/splitstore/liveset_test.go index 0a1dd30da..07c37a21d 100644 --- a/blockstore/splitstore/liveset_test.go +++ b/blockstore/splitstore/liveset_test.go @@ -26,7 +26,7 @@ func testLiveSet(t *testing.T, lsType string) { t.Fatal(err) } - env, err := NewLiveSetEnv(path, lsType) + env, err := OpenLiveSetEnv(path, lsType) if err != nil { t.Fatal(err) } diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index bed879aaf..d3318b39c 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -31,13 +31,12 @@ var ( var ( baseEpochKey = dstore.NewKey("/splitstore/baseEpoch") warmupEpochKey = dstore.NewKey("/splitstore/warmupEpoch") + log = logging.Logger("splitstore") ) -var log = logging.Logger("splitstore") - func init() { // TODO temporary for debugging purposes; to be removed for merge. - logging.SetLogLevel("splitstore", "DEBUG") + _ = logging.SetLogLevel("splitstore", "DEBUG") } type Config struct { @@ -94,20 +93,20 @@ type SplitStore struct { var _ bstore.Blockstore = (*SplitStore)(nil) -// NewSplitStore creates a new SplitStore instance, given a path for the hotstore dbs and a cold -// blockstore. The SplitStore must be attached to the ChainStore with Start in order to trigger -// compaction. -func NewSplitStore(path string, ds dstore.Datastore, cold, hot bstore.Blockstore, cfg *Config) (*SplitStore, error) { +// Open opens an existing splistore, or creates a new splitstore. The splitstore +// is backed by the provided hot and cold stores. The returned SplitStore MUST be +// attached to the ChainStore with Start in order to trigger compaction. +func Open(path string, ds dstore.Datastore, hot, cold bstore.Blockstore, cfg *Config) (*SplitStore, error) { // the tracking store - snoop, err := NewTrackingStore(path, cfg.TrackingStoreType) + snoop, err := OpenTrackingStore(path, cfg.TrackingStoreType) if err != nil { return nil, err } // the liveset env - env, err := NewLiveSetEnv(path, cfg.LiveSetType) + env, err := OpenLiveSetEnv(path, cfg.LiveSetType) if err != nil { - snoop.Close() //nolint:errcheck + _ = snoop.Close() return nil, err } @@ -129,7 +128,7 @@ func NewSplitStore(path string, ds dstore.Datastore, cold, hot bstore.Blockstore } // Blockstore interface -func (s *SplitStore) DeleteBlock(cid cid.Cid) error { +func (s *SplitStore) DeleteBlock(_ cid.Cid) error { // afaict we don't seem to be using this method, so it's not implemented return errors.New("DeleteBlock not implemented on SplitStore; don't do this Luke!") //nolint } @@ -377,7 +376,7 @@ func (s *SplitStore) warmup(curTs *types.TipSet) { epoch := curTs.Height() count := int64(0) - err := s.cs.WalkSnapshot(context.Background(), curTs, 1, s.skipOldMsgs, s.skipMsgReceipts, + err := s.chain.WalkSnapshot(context.Background(), curTs, 1, s.skipOldMsgs, s.skipMsgReceipts, func(cid cid.Cid) error { count++ diff --git a/blockstore/splitstore/tracking.go b/blockstore/splitstore/tracking.go index 3fdbcf0ac..56f9294e7 100644 --- a/blockstore/splitstore/tracking.go +++ b/blockstore/splitstore/tracking.go @@ -20,10 +20,10 @@ type TrackingStore interface { Close() error } -func NewTrackingStore(path string, trackingStoreType string) (TrackingStore, error) { +func OpenTrackingStore(path string, trackingStoreType string) (TrackingStore, error) { switch trackingStoreType { case "", "bolt": - return NewBoltTrackingStore(filepath.Join(path, "snoop.bolt")) + return OpenBoltTrackingStore(filepath.Join(path, "snoop.bolt")) default: return nil, xerrors.Errorf("unknown tracking store type %s", trackingStoreType) } diff --git a/blockstore/splitstore/tracking_bolt.go b/blockstore/splitstore/tracking_bolt.go index 2fc5d4f6d..5cc50e05e 100644 --- a/blockstore/splitstore/tracking_bolt.go +++ b/blockstore/splitstore/tracking_bolt.go @@ -18,12 +18,12 @@ type BoltTrackingStore struct { var _ TrackingStore = (*BoltTrackingStore)(nil) -func NewBoltTrackingStore(path string) (*BoltTrackingStore, error) { - db, err := bolt.Open(path, 0644, - &bolt.Options{ - Timeout: 1 * time.Second, - NoSync: true, - }) +func OpenBoltTrackingStore(path string) (*BoltTrackingStore, error) { + opts := &bolt.Options{ + Timeout: 1 * time.Second, + NoSync: true, + } + db, err := bolt.Open(path, 0644, opts) if err != nil { return nil, err } @@ -38,7 +38,7 @@ func NewBoltTrackingStore(path string) (*BoltTrackingStore, error) { }) if err != nil { - db.Close() //nolint:errcheck + _ = db.Close() return nil, err } diff --git a/blockstore/splitstore/tracking_test.go b/blockstore/splitstore/tracking_test.go index 3d5d2ff64..55b520133 100644 --- a/blockstore/splitstore/tracking_test.go +++ b/blockstore/splitstore/tracking_test.go @@ -51,7 +51,7 @@ func testTrackingStore(t *testing.T, tsType string) { t.Fatal(err) } - s, err := NewTrackingStore(path, tsType) + s, err := OpenTrackingStore(path, tsType) if err != nil { t.Fatal(err) } @@ -118,7 +118,7 @@ func testTrackingStore(t *testing.T, tsType string) { t.Fatal(err) } - s, err = NewTrackingStore(path, tsType) + s, err = OpenTrackingStore(path, tsType) if err != nil { t.Fatal(err) } diff --git a/node/modules/blockstore.go b/node/modules/blockstore.go index c79553490..8fffb1536 100644 --- a/node/modules/blockstore.go +++ b/node/modules/blockstore.go @@ -74,14 +74,14 @@ func SplitBlockstore(cfg *config.Blockstore) func(lc fx.Lifecycle, r repo.Locked return nil, err } - ss, err := splitstore.NewSplitStore(path, ds, cold, hot, - &splitstore.Config{ - TrackingStoreType: cfg.Splitstore.TrackingStoreType, - LiveSetType: cfg.Splitstore.LiveSetType, - EnableFullCompaction: cfg.Splitstore.EnableFullCompaction, - EnableGC: cfg.Splitstore.EnableGC, - Archival: cfg.Splitstore.Archival, - }) + cfg := &splitstore.Config{ + TrackingStoreType: cfg.Splitstore.TrackingStoreType, + LiveSetType: cfg.Splitstore.LiveSetType, + EnableFullCompaction: cfg.Splitstore.EnableFullCompaction, + EnableGC: cfg.Splitstore.EnableGC, + Archival: cfg.Splitstore.Archival, + } + ss, err := splitstore.Open(path, ds, hot, cold, cfg) if err != nil { return nil, err } From ce68b9b2291893559c917457d3209240bc702770 Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 1 Mar 2021 20:11:35 +0200 Subject: [PATCH 107/148] batch writes during warm up --- blockstore/splitstore/splitstore.go | 42 ++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index d3318b39c..395b14031 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -375,6 +375,10 @@ func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error { func (s *SplitStore) warmup(curTs *types.TipSet) { epoch := curTs.Height() + const batchSize = 4096 + batchHot := make([]blocks.Block, 0, batchSize) + batchSnoop := make([]cid.Cid, 0, batchSize) + count := int64(0) err := s.chain.WalkSnapshot(context.Background(), curTs, 1, s.skipOldMsgs, s.skipMsgReceipts, func(cid cid.Cid) error { @@ -394,12 +398,24 @@ func (s *SplitStore) warmup(curTs *types.TipSet) { return err } - err = s.snoop.Put(cid, epoch) - if err != nil { - return err + batchHot = append(batchHot, blk) + batchSnoop = append(batchSnoop, cid) + + if len(batchHot) == batchSize { + err = s.snoop.PutBatch(batchSnoop, epoch) + if err != nil { + return err + } + batchSnoop = batchSnoop[:0] + + err = s.hot.PutMany(batchHot) + if err != nil { + return err + } + batchHot = batchHot[:0] } - return s.hot.Put(blk) + return nil }) if err != nil { @@ -407,6 +423,20 @@ func (s *SplitStore) warmup(curTs *types.TipSet) { return } + if len(batchHot) > 0 { + err = s.snoop.PutBatch(batchSnoop, epoch) + if err != nil { + log.Errorf("error warming up splitstore: %s", err) + return + } + + err = s.hot.PutMany(batchHot) + if err != nil { + log.Errorf("error warming up splitstore: %s", err) + return + } + } + if count > s.liveSetSize { s.liveSetSize = count } @@ -538,7 +568,7 @@ func (s *SplitStore) compactSimple(curTs *types.TipSet) { log.Info("moving cold objects to the coldstore") startMove := time.Now() - const batchSize = 1024 + const batchSize = 4096 batch := make([]blocks.Block, 0, batchSize) for cid := range cold { @@ -767,7 +797,7 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) { log.Info("moving cold objects to the coldstore") startMove := time.Now() - const batchSize = 1024 + const batchSize = 4096 batch := make([]blocks.Block, 0, batchSize) for cid := range cold { From 48f253328d5617732a339a344411a5e7810d7fb6 Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 1 Mar 2021 20:30:15 +0200 Subject: [PATCH 108/148] increase batch size to 16K --- blockstore/splitstore/splitstore.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 395b14031..ff4f2cae0 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -34,6 +34,8 @@ var ( log = logging.Logger("splitstore") ) +const batchSize = 16384 + func init() { // TODO temporary for debugging purposes; to be removed for merge. _ = logging.SetLogLevel("splitstore", "DEBUG") @@ -375,7 +377,6 @@ func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error { func (s *SplitStore) warmup(curTs *types.TipSet) { epoch := curTs.Height() - const batchSize = 4096 batchHot := make([]blocks.Block, 0, batchSize) batchSnoop := make([]cid.Cid, 0, batchSize) @@ -568,7 +569,6 @@ func (s *SplitStore) compactSimple(curTs *types.TipSet) { log.Info("moving cold objects to the coldstore") startMove := time.Now() - const batchSize = 4096 batch := make([]blocks.Block, 0, batchSize) for cid := range cold { @@ -797,9 +797,7 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) { log.Info("moving cold objects to the coldstore") startMove := time.Now() - const batchSize = 4096 batch := make([]blocks.Block, 0, batchSize) - for cid := range cold { blk, err := s.hot.Get(cid) if err != nil { From 4b1e1f4b525db2fe43489a742003dbbbee3de108 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Kripalani?= Date: Tue, 2 Mar 2021 00:47:21 +0000 Subject: [PATCH 109/148] rename liveset => markset; rename snoop => tracking store; docs. --- blockstore/splitstore/liveset.go | 33 --- blockstore/splitstore/markset.go | 38 ++++ .../{liveset_bloom.go => markset_bloom.go} | 26 +-- .../{liveset_bolt.go => markset_bolt.go} | 24 +-- .../{liveset_test.go => markset_test.go} | 26 +-- blockstore/splitstore/splitstore.go | 191 ++++++++++-------- blockstore/splitstore/tracking.go | 15 +- blockstore/splitstore/tracking_bolt.go | 2 +- blockstore/splitstore/tracking_test.go | 2 +- node/config/def.go | 2 +- node/modules/blockstore.go | 2 +- 11 files changed, 200 insertions(+), 161 deletions(-) delete mode 100644 blockstore/splitstore/liveset.go create mode 100644 blockstore/splitstore/markset.go rename blockstore/splitstore/{liveset_bloom.go => markset_bloom.go} (61%) rename blockstore/splitstore/{liveset_bolt.go => markset_bolt.go} (63%) rename blockstore/splitstore/{liveset_test.go => markset_test.go} (77%) diff --git a/blockstore/splitstore/liveset.go b/blockstore/splitstore/liveset.go deleted file mode 100644 index 2678d16d9..000000000 --- a/blockstore/splitstore/liveset.go +++ /dev/null @@ -1,33 +0,0 @@ -package splitstore - -import ( - "path/filepath" - - "golang.org/x/xerrors" - - cid "github.com/ipfs/go-cid" -) - -type LiveSet interface { - Mark(cid.Cid) error - Has(cid.Cid) (bool, error) - Close() error -} - -var markBytes []byte - -type LiveSetEnv interface { - NewLiveSet(name string, sizeHint int64) (LiveSet, error) - Close() error -} - -func OpenLiveSetEnv(path string, liveSetType string) (LiveSetEnv, error) { - switch liveSetType { - case "", "bloom": - return NewBloomLiveSetEnv() - case "bolt": - return NewBoltLiveSetEnv(filepath.Join(path, "sweep.bolt")) - default: - return nil, xerrors.Errorf("unknown live set type %s", liveSetType) - } -} diff --git a/blockstore/splitstore/markset.go b/blockstore/splitstore/markset.go new file mode 100644 index 000000000..ef14a2fc6 --- /dev/null +++ b/blockstore/splitstore/markset.go @@ -0,0 +1,38 @@ +package splitstore + +import ( + "path/filepath" + + "golang.org/x/xerrors" + + cid "github.com/ipfs/go-cid" +) + +// MarkSet is a utility to keep track of seen CID, and later query for them. +// +// * If the expected dataset is large, it can be backed by a datastore (e.g. bbolt). +// * If a probabilistic result is acceptable, it can be backed by a bloom filter (default). +type MarkSet interface { + Mark(cid.Cid) error + Has(cid.Cid) (bool, error) + Close() error +} + +// markBytes is deliberately a non-nil empty byte slice for serialization. +var markBytes = []byte{} + +type MarkSetEnv interface { + Create(name string, sizeHint int64) (MarkSet, error) + Close() error +} + +func OpenMarkSetEnv(path string, mtype string) (MarkSetEnv, error) { + switch mtype { + case "", "bloom": + return NewBloomMarkSetEnv() + case "bolt": + return NewBoltMarkSetEnv(filepath.Join(path, "markset.bolt")) + default: + return nil, xerrors.Errorf("unknown mark set type %s", mtype) + } +} diff --git a/blockstore/splitstore/liveset_bloom.go b/blockstore/splitstore/markset_bloom.go similarity index 61% rename from blockstore/splitstore/liveset_bloom.go rename to blockstore/splitstore/markset_bloom.go index 06dc2e1f7..b5c2fe176 100644 --- a/blockstore/splitstore/liveset_bloom.go +++ b/blockstore/splitstore/markset_bloom.go @@ -15,22 +15,22 @@ const ( BloomFilterProbability = 0.01 ) -type BloomLiveSetEnv struct{} +type BloomMarkSetEnv struct{} -var _ LiveSetEnv = (*BloomLiveSetEnv)(nil) +var _ MarkSetEnv = (*BloomMarkSetEnv)(nil) -type BloomLiveSet struct { +type BloomMarkSet struct { salt []byte bf *bbloom.Bloom } -var _ LiveSet = (*BloomLiveSet)(nil) +var _ MarkSet = (*BloomMarkSet)(nil) -func NewBloomLiveSetEnv() (*BloomLiveSetEnv, error) { - return &BloomLiveSetEnv{}, nil +func NewBloomMarkSetEnv() (*BloomMarkSetEnv, error) { + return &BloomMarkSetEnv{}, nil } -func (e *BloomLiveSetEnv) NewLiveSet(name string, sizeHint int64) (LiveSet, error) { +func (e *BloomMarkSetEnv) Create(name string, sizeHint int64) (MarkSet, error) { size := int64(BloomFilterMinSize) for size < sizeHint { size += BloomFilterMinSize @@ -47,14 +47,14 @@ func (e *BloomLiveSetEnv) NewLiveSet(name string, sizeHint int64) (LiveSet, erro return nil, xerrors.Errorf("error creating bloom filter: %w", err) } - return &BloomLiveSet{salt: salt, bf: bf}, nil + return &BloomMarkSet{salt: salt, bf: bf}, nil } -func (e *BloomLiveSetEnv) Close() error { +func (e *BloomMarkSetEnv) Close() error { return nil } -func (s *BloomLiveSet) saltedKey(cid cid.Cid) []byte { +func (s *BloomMarkSet) saltedKey(cid cid.Cid) []byte { hash := cid.Hash() key := make([]byte, len(s.salt)+len(hash)) n := copy(key, s.salt) @@ -63,15 +63,15 @@ func (s *BloomLiveSet) saltedKey(cid cid.Cid) []byte { return rehash[:] } -func (s *BloomLiveSet) Mark(cid cid.Cid) error { +func (s *BloomMarkSet) Mark(cid cid.Cid) error { s.bf.Add(s.saltedKey(cid)) return nil } -func (s *BloomLiveSet) Has(cid cid.Cid) (bool, error) { +func (s *BloomMarkSet) Has(cid cid.Cid) (bool, error) { return s.bf.Has(s.saltedKey(cid)), nil } -func (s *BloomLiveSet) Close() error { +func (s *BloomMarkSet) Close() error { return nil } diff --git a/blockstore/splitstore/liveset_bolt.go b/blockstore/splitstore/markset_bolt.go similarity index 63% rename from blockstore/splitstore/liveset_bolt.go rename to blockstore/splitstore/markset_bolt.go index 8c68d6a4a..cab0dd74a 100644 --- a/blockstore/splitstore/liveset_bolt.go +++ b/blockstore/splitstore/markset_bolt.go @@ -9,20 +9,20 @@ import ( bolt "go.etcd.io/bbolt" ) -type BoltLiveSetEnv struct { +type BoltMarkSetEnv struct { db *bolt.DB } -var _ LiveSetEnv = (*BoltLiveSetEnv)(nil) +var _ MarkSetEnv = (*BoltMarkSetEnv)(nil) -type BoltLiveSet struct { +type BoltMarkSet struct { db *bolt.DB bucketId []byte } -var _ LiveSet = (*BoltLiveSet)(nil) +var _ MarkSet = (*BoltMarkSet)(nil) -func NewBoltLiveSetEnv(path string) (*BoltLiveSetEnv, error) { +func NewBoltMarkSetEnv(path string) (*BoltMarkSetEnv, error) { db, err := bolt.Open(path, 0644, &bolt.Options{ Timeout: 1 * time.Second, @@ -32,10 +32,10 @@ func NewBoltLiveSetEnv(path string) (*BoltLiveSetEnv, error) { return nil, err } - return &BoltLiveSetEnv{db: db}, nil + return &BoltMarkSetEnv{db: db}, nil } -func (e *BoltLiveSetEnv) NewLiveSet(name string, hint int64) (LiveSet, error) { +func (e *BoltMarkSetEnv) Create(name string, hint int64) (MarkSet, error) { bucketId := []byte(name) err := e.db.Update(func(tx *bolt.Tx) error { _, err := tx.CreateBucketIfNotExists(bucketId) @@ -49,21 +49,21 @@ func (e *BoltLiveSetEnv) NewLiveSet(name string, hint int64) (LiveSet, error) { return nil, err } - return &BoltLiveSet{db: e.db, bucketId: bucketId}, nil + return &BoltMarkSet{db: e.db, bucketId: bucketId}, nil } -func (e *BoltLiveSetEnv) Close() error { +func (e *BoltMarkSetEnv) Close() error { return e.db.Close() } -func (s *BoltLiveSet) Mark(cid cid.Cid) error { +func (s *BoltMarkSet) Mark(cid cid.Cid) error { return s.db.Update(func(tx *bolt.Tx) error { b := tx.Bucket(s.bucketId) return b.Put(cid.Hash(), markBytes) }) } -func (s *BoltLiveSet) Has(cid cid.Cid) (result bool, err error) { +func (s *BoltMarkSet) Has(cid cid.Cid) (result bool, err error) { err = s.db.View(func(tx *bolt.Tx) error { b := tx.Bucket(s.bucketId) v := b.Get(cid.Hash()) @@ -74,7 +74,7 @@ func (s *BoltLiveSet) Has(cid cid.Cid) (result bool, err error) { return result, err } -func (s *BoltLiveSet) Close() error { +func (s *BoltMarkSet) Close() error { return s.db.Update(func(tx *bolt.Tx) error { return tx.DeleteBucket(s.bucketId) }) diff --git a/blockstore/splitstore/liveset_test.go b/blockstore/splitstore/markset_test.go similarity index 77% rename from blockstore/splitstore/liveset_test.go rename to blockstore/splitstore/markset_test.go index 07c37a21d..22f4b4ad5 100644 --- a/blockstore/splitstore/liveset_test.go +++ b/blockstore/splitstore/markset_test.go @@ -8,36 +8,36 @@ import ( "github.com/multiformats/go-multihash" ) -func TestBoltLiveSet(t *testing.T) { - testLiveSet(t, "bolt") +func TestBoltMarkSet(t *testing.T) { + testMarkSet(t, "bolt") } -func TestBloomLiveSet(t *testing.T) { - testLiveSet(t, "bloom") +func TestBloomMarkSet(t *testing.T) { + testMarkSet(t, "bloom") } -func testLiveSet(t *testing.T, lsType string) { +func testMarkSet(t *testing.T, lsType string) { t.Helper() - path := "/tmp/liveset-test" + path := "/tmp/markset-test" err := os.MkdirAll(path, 0777) if err != nil { t.Fatal(err) } - env, err := OpenLiveSetEnv(path, lsType) + env, err := OpenMarkSetEnv(path, lsType) if err != nil { t.Fatal(err) } defer env.Close() //nolint:errcheck - hotSet, err := env.NewLiveSet("hot", 0) + hotSet, err := env.Create("hot", 0) if err != nil { t.Fatal(err) } - coldSet, err := env.NewLiveSet("cold", 0) + coldSet, err := env.Create("cold", 0) if err != nil { t.Fatal(err) } @@ -51,7 +51,7 @@ func testLiveSet(t *testing.T, lsType string) { return cid.NewCidV1(cid.Raw, h) } - mustHave := func(s LiveSet, cid cid.Cid) { + mustHave := func(s MarkSet, cid cid.Cid) { has, err := s.Has(cid) if err != nil { t.Fatal(err) @@ -62,7 +62,7 @@ func testLiveSet(t *testing.T, lsType string) { } } - mustNotHave := func(s LiveSet, cid cid.Cid) { + mustNotHave := func(s MarkSet, cid cid.Cid) { has, err := s.Has(cid) if err != nil { t.Fatal(err) @@ -104,12 +104,12 @@ func testLiveSet(t *testing.T, lsType string) { t.Fatal(err) } - hotSet, err = env.NewLiveSet("hot", 0) + hotSet, err = env.Create("hot", 0) if err != nil { t.Fatal(err) } - coldSet, err = env.NewLiveSet("cold", 0) + coldSet, err = env.Create("cold", 0) if err != nil { t.Fatal(err) } diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index ff4f2cae0..bbc72150a 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -22,14 +22,36 @@ import ( "github.com/filecoin-project/lotus/chain/types" ) -// these are variable so that 1) lotus-soup builds 2) we can change them in tests var ( + // CompactionThreshold is the number of epochs that need to have elapsed + // from the previously compacted epoch to trigger a new compaction. + // + // |················· CompactionThreshold ··················| + // | | + // =======‖≡≡≡≡≡≡≡‖-------------------------------------------------» + // | | chain --> ↑__ current epoch + // |·······| + // ↑________ CompactionCold. + // + // === :: cold (already archived) + // ≡≡≡ :: to be archived in this compaction + // --- :: hot CompactionThreshold = 5 * build.Finality - CompactionCold = build.Finality + + // CompactionCold is the number of epochs that will be archived to the + // cold store on compaction. See diagram on CompactionThreshold for a + // better sense. + CompactionCold = build.Finality ) var ( - baseEpochKey = dstore.NewKey("/splitstore/baseEpoch") + // baseEpochKey stores the base epoch (last compaction epoch) in the + // metadata store. + baseEpochKey = dstore.NewKey("/splitstore/baseEpoch") + + // warmupEpochKey stores whether a hot store warmup has been performed. + // On first start, the splitstore will walk the state tree and will copy + // all active blocks into the hotstore. warmupEpochKey = dstore.NewKey("/splitstore/warmupEpoch") log = logging.Logger("splitstore") ) @@ -42,10 +64,15 @@ func init() { } type Config struct { - // TrackingStore type; bolt (default) or lmdb + // TrackingStore is the type of tracking store to use. + // + // Supported values are: "bolt". TrackingStoreType string - // LiveSet type; bloom (default), bolt, or lmdb - LiveSetType string + + // MarkSetType is the type of mark set to use. + // + // Supported values are: "bolt", "bloom". + MarkSetType string // perform full reachability analysis (expensive) for compaction // You should enable this option if you plan to use the splitstore without a backing coldstore EnableFullCompaction bool @@ -82,15 +109,15 @@ type SplitStore struct { mx sync.Mutex curTs *types.TipSet - chain ChainAccessor - ds dstore.Datastore - hot bstore.Blockstore - cold bstore.Blockstore - snoop TrackingStore + chain ChainAccessor + ds dstore.Datastore + hot bstore.Blockstore + cold bstore.Blockstore + tracker TrackingStore - env LiveSetEnv + env MarkSetEnv - liveSetSize int64 + markSetSize int64 } var _ bstore.Blockstore = (*SplitStore)(nil) @@ -100,25 +127,25 @@ var _ bstore.Blockstore = (*SplitStore)(nil) // attached to the ChainStore with Start in order to trigger compaction. func Open(path string, ds dstore.Datastore, hot, cold bstore.Blockstore, cfg *Config) (*SplitStore, error) { // the tracking store - snoop, err := OpenTrackingStore(path, cfg.TrackingStoreType) + tracker, err := OpenTrackingStore(path, cfg.TrackingStoreType) if err != nil { return nil, err } - // the liveset env - env, err := OpenLiveSetEnv(path, cfg.LiveSetType) + // the markset env + env, err := OpenMarkSetEnv(path, cfg.MarkSetType) if err != nil { - _ = snoop.Close() + _ = tracker.Close() return nil, err } // and now we can make a SplitStore ss := &SplitStore{ - ds: ds, - hot: hot, - cold: cold, - snoop: snoop, - env: env, + ds: ds, + hot: hot, + cold: cold, + tracker: tracker, + env: env, fullCompaction: cfg.EnableFullCompaction, enableGC: cfg.EnableGC, @@ -185,7 +212,7 @@ func (s *SplitStore) Put(blk blocks.Block) error { epoch := s.curTs.Height() s.mx.Unlock() - err := s.snoop.Put(blk.Cid(), epoch) + err := s.tracker.Put(blk.Cid(), epoch) if err != nil { log.Errorf("error tracking CID in hotstore: %s; falling back to coldstore", err) return s.cold.Put(blk) @@ -209,7 +236,7 @@ func (s *SplitStore) PutMany(blks []blocks.Block) error { batch = append(batch, blk.Cid()) } - err := s.snoop.PutBatch(batch, epoch) + err := s.tracker.PutBatch(batch, epoch) if err != nil { log.Errorf("error tracking CIDs in hotstore: %s; falling back to coldstore", err) return s.cold.PutMany(blks) @@ -403,7 +430,7 @@ func (s *SplitStore) warmup(curTs *types.TipSet) { batchSnoop = append(batchSnoop, cid) if len(batchHot) == batchSize { - err = s.snoop.PutBatch(batchSnoop, epoch) + err = s.tracker.PutBatch(batchSnoop, epoch) if err != nil { return err } @@ -425,7 +452,7 @@ func (s *SplitStore) warmup(curTs *types.TipSet) { } if len(batchHot) > 0 { - err = s.snoop.PutBatch(batchSnoop, epoch) + err = s.tracker.PutBatch(batchSnoop, epoch) if err != nil { log.Errorf("error warming up splitstore: %s", err) return @@ -438,8 +465,8 @@ func (s *SplitStore) warmup(curTs *types.TipSet) { } } - if count > s.liveSetSize { - s.liveSetSize = count + if count > s.markSetSize { + s.markSetSize = count } // save the warmup epoch @@ -452,13 +479,13 @@ func (s *SplitStore) warmup(curTs *types.TipSet) { // Compaction/GC Algorithm func (s *SplitStore) compact(curTs *types.TipSet) { - if s.liveSetSize == 0 { + if s.markSetSize == 0 { start := time.Now() - log.Info("estimating live set size") - s.estimateLiveSetSize(curTs) - log.Infow("estimating live set size done", "took", time.Since(start), "size", s.liveSetSize) + log.Info("estimating mark set size") + s.estimateMarkSetSize(curTs) + log.Infow("estimating mark set size done", "took", time.Since(start), "size", s.markSetSize) } else { - log.Infow("current live set size estimate", "size", s.liveSetSize) + log.Infow("current mark set size estimate", "size", s.markSetSize) } if s.fullCompaction { @@ -468,11 +495,11 @@ func (s *SplitStore) compact(curTs *types.TipSet) { } } -func (s *SplitStore) estimateLiveSetSize(curTs *types.TipSet) { - s.liveSetSize = 0 +func (s *SplitStore) estimateMarkSetSize(curTs *types.TipSet) { + s.markSetSize = 0 err := s.chain.WalkSnapshot(context.Background(), curTs, 1, s.skipOldMsgs, s.skipMsgReceipts, func(cid cid.Cid) error { - s.liveSetSize++ + s.markSetSize++ return nil }) @@ -487,7 +514,7 @@ func (s *SplitStore) compactSimple(curTs *types.TipSet) { log.Infow("running simple compaction", "currentEpoch", curTs.Height(), "baseEpoch", s.baseEpoch, "coldEpoch", coldEpoch) - coldSet, err := s.env.NewLiveSet("cold", s.liveSetSize) + coldSet, err := s.env.Create("cold", s.markSetSize) if err != nil { // TODO do something better here panic(err) @@ -495,7 +522,7 @@ func (s *SplitStore) compactSimple(curTs *types.TipSet) { defer coldSet.Close() //nolint:errcheck // 1. mark reachable cold objects by looking at the objects reachable only from the cold epoch - log.Info("marking reachable cold objects") + log.Infow("marking reachable cold objects", "cold_epoch", coldEpoch) startMark := time.Now() coldTs, err := s.chain.GetTipsetByHeight(context.Background(), coldEpoch, curTs, true) @@ -504,7 +531,7 @@ func (s *SplitStore) compactSimple(curTs *types.TipSet) { panic(err) } - count := int64(0) + var count int64 err = s.chain.WalkSnapshot(context.Background(), coldTs, 1, s.skipOldMsgs, s.skipMsgReceipts, func(cid cid.Cid) error { count++ @@ -516,8 +543,8 @@ func (s *SplitStore) compactSimple(curTs *types.TipSet) { panic(err) } - if count > s.liveSetSize { - s.liveSetSize = count + if count > s.markSetSize { + s.markSetSize = count } log.Infow("marking done", "took", time.Since(startMark)) @@ -529,14 +556,14 @@ func (s *SplitStore) compactSimple(curTs *types.TipSet) { cold := make(map[cid.Cid]struct{}) // some stats for logging - var stHot, stCold int + var hotCnt, coldCnt int - // 2.1 iterate through the snoop and collect unreachable cold objects - err = s.snoop.ForEach(func(cid cid.Cid, wrEpoch abi.ChainEpoch) error { - // is the object stil hot? - if wrEpoch > coldEpoch { + // 2.1 iterate through the tracking store and collect unreachable cold objects + err = s.tracker.ForEach(func(cid cid.Cid, writeEpoch abi.ChainEpoch) error { + // is the object still hot? + if writeEpoch > coldEpoch { // yes, stay in the hotstore - stHot++ + hotCnt++ return nil } @@ -547,13 +574,13 @@ func (s *SplitStore) compactSimple(curTs *types.TipSet) { } if mark { - stHot++ + hotCnt++ return nil } // it's cold, mark it for move cold[cid] = struct{}{} - stCold++ + coldCnt++ return nil }) @@ -563,7 +590,7 @@ func (s *SplitStore) compactSimple(curTs *types.TipSet) { } log.Infow("collection done", "took", time.Since(startCollect)) - log.Infow("compaction stats", "hot", stHot, "cold", stCold) + log.Infow("compaction stats", "hot", hotCnt, "cold", coldCnt) // 2.2 copy the cold objects to the coldstore log.Info("moving cold objects to the coldstore") @@ -576,10 +603,10 @@ func (s *SplitStore) compactSimple(curTs *types.TipSet) { if err != nil { if err == dstore.ErrNotFound { // this can happen if the node is killed after we have deleted the block from the hotstore - // but before we have deleted it from the snoop; just delete the snoop. - err = s.snoop.Delete(cid) + // but before we have deleted it from the tracker; just delete the tracker. + err = s.tracker.Delete(cid) if err != nil { - log.Errorf("error deleting cid %s from snoop: %s", cid, err) + log.Errorf("error deleting cid %s from tracker: %s", cid, err) // TODO do something better here -- just continue? panic(err) } @@ -629,20 +656,20 @@ func (s *SplitStore) compactSimple(curTs *types.TipSet) { } log.Infow("purging cold from hotstore done", "took", time.Since(purgeStart)) - // 2.4 remove the snoop tracking for cold objects + // 2.4 remove the tracker tracking for cold objects purgeStart = time.Now() - log.Info("purging cold objects from snoop") + log.Info("purging cold objects from tracker") - err = s.snoop.DeleteBatch(cold) + err = s.tracker.DeleteBatch(cold) if err != nil { - log.Errorf("error purging cold objects from snoop: %s", err) + log.Errorf("error purging cold objects from tracker: %s", err) // TODO do something better here -- just continue? panic(err) } - log.Infow("purging cold from snoop done", "took", time.Since(purgeStart)) + log.Infow("purging cold from tracker done", "took", time.Since(purgeStart)) // we are done; do some housekeeping - err = s.snoop.Sync() + err = s.tracker.Sync() if err != nil { // TODO do something better here panic(err) @@ -661,16 +688,16 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) { log.Infow("running full compaction", "currentEpoch", curTs.Height(), "baseEpoch", s.baseEpoch, "coldEpoch", coldEpoch) - // create two live sets, one for marking the cold finality region + // create two mark sets, one for marking the cold finality region // and one for marking the hot region - hotSet, err := s.env.NewLiveSet("hot", s.liveSetSize) + hotSet, err := s.env.Create("hot", s.markSetSize) if err != nil { // TODO do something better here panic(err) } defer hotSet.Close() //nolint:errcheck - coldSet, err := s.env.NewLiveSet("cold", s.liveSetSize) + coldSet, err := s.env.Create("cold", s.markSetSize) if err != nil { // TODO do something better here panic(err) @@ -694,8 +721,8 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) { panic(err) } - if count > s.liveSetSize { - s.liveSetSize = count + if count > s.markSetSize { + s.markSetSize = count } // Phase 1b: mark all reachable CIDs in the cold range @@ -717,8 +744,8 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) { panic(err) } - if count > s.liveSetSize { - s.liveSetSize = count + if count > s.markSetSize { + s.markSetSize = count } log.Infow("marking done", "took", time.Since(startMark)) @@ -736,8 +763,8 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) { cold := make(map[cid.Cid]struct{}) dead := make(map[cid.Cid]struct{}) - // 2.1 iterate through the snoop and collect cold and dead objects - err = s.snoop.ForEach(func(cid cid.Cid, wrEpoch abi.ChainEpoch) error { + // 2.1 iterate through the tracker and collect cold and dead objects + err = s.tracker.ForEach(func(cid cid.Cid, wrEpoch abi.ChainEpoch) error { // is the object stil hot? if wrEpoch > coldEpoch { // yes, stay in the hotstore @@ -803,10 +830,10 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) { if err != nil { if err == dstore.ErrNotFound { // this can happen if the node is killed after we have deleted the block from the hotstore - // but before we have deleted it from the snoop; just delete the snoop. - err = s.snoop.Delete(cid) + // but before we have deleted it from the tracker; just delete the tracker. + err = s.tracker.Delete(cid) if err != nil { - log.Errorf("error deleting cid %s from snoop: %s", cid, err) + log.Errorf("error deleting cid %s from tracker: %s", cid, err) // TODO do something better here -- just continue? panic(err) } @@ -856,17 +883,17 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) { } log.Infow("purging cold from hotstore done", "took", time.Since(purgeStart)) - // 2.4 remove the snoop tracking for cold objects + // 2.4 remove the tracker tracking for cold objects purgeStart = time.Now() - log.Info("purging cold objects from snoop") + log.Info("purging cold objects from tracker") - err = s.snoop.DeleteBatch(cold) + err = s.tracker.DeleteBatch(cold) if err != nil { - log.Errorf("error purging cold objects from snoop: %s", err) + log.Errorf("error purging cold objects from tracker: %s", err) // TODO do something better here -- just continue? panic(err) } - log.Infow("purging cold from snoop done", "took", time.Since(purgeStart)) + log.Infow("purging cold from tracker done", "took", time.Since(purgeStart)) // 3. if we have dead objects, delete them from the hotstore and remove the tracking if len(dead) > 0 { @@ -886,24 +913,24 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) { } log.Infow("purging dead from hotstore done", "took", time.Since(purgeStart)) - // remove the snoop tracking + // remove the tracker tracking purgeStart := time.Now() - log.Info("purging dead objects from snoop") + log.Info("purging dead objects from tracker") - err = s.snoop.DeleteBatch(dead) + err = s.tracker.DeleteBatch(dead) if err != nil { - log.Errorf("error purging dead objects from snoop: %s", err) + log.Errorf("error purging dead objects from tracker: %s", err) // TODO do something better here -- just continue? panic(err) } - log.Infow("purging dead from snoop done", "took", time.Since(purgeStart)) + log.Infow("purging dead from tracker done", "took", time.Since(purgeStart)) } log.Infow("sweeping done", "took", time.Since(startSweep)) // we are done; do some housekeeping - err = s.snoop.Sync() + err = s.tracker.Sync() if err != nil { // TODO do something better here panic(err) diff --git a/blockstore/splitstore/tracking.go b/blockstore/splitstore/tracking.go index 56f9294e7..af944ba08 100644 --- a/blockstore/splitstore/tracking.go +++ b/blockstore/splitstore/tracking.go @@ -9,6 +9,11 @@ import ( cid "github.com/ipfs/go-cid" ) +// TrackingStore is a persistent store that tracks blocks that are added +// within the current compaction range, including the epoch at which they are +// written. +// +// On every compaction, we iterate over type TrackingStore interface { Put(cid.Cid, abi.ChainEpoch) error PutBatch([]cid.Cid, abi.ChainEpoch) error @@ -20,11 +25,13 @@ type TrackingStore interface { Close() error } -func OpenTrackingStore(path string, trackingStoreType string) (TrackingStore, error) { - switch trackingStoreType { +// OpenTrackingStore opens a tracking store of the specified type in the +// specified path. +func OpenTrackingStore(path string, ttype string) (TrackingStore, error) { + switch ttype { case "", "bolt": - return OpenBoltTrackingStore(filepath.Join(path, "snoop.bolt")) + return OpenBoltTrackingStore(filepath.Join(path, "tracker.bolt")) default: - return nil, xerrors.Errorf("unknown tracking store type %s", trackingStoreType) + return nil, xerrors.Errorf("unknown tracking store type %s", ttype) } } diff --git a/blockstore/splitstore/tracking_bolt.go b/blockstore/splitstore/tracking_bolt.go index 5cc50e05e..8c491043e 100644 --- a/blockstore/splitstore/tracking_bolt.go +++ b/blockstore/splitstore/tracking_bolt.go @@ -28,7 +28,7 @@ func OpenBoltTrackingStore(path string) (*BoltTrackingStore, error) { return nil, err } - bucketId := []byte("snoop") + bucketId := []byte("tracker") err = db.Update(func(tx *bolt.Tx) error { _, err := tx.CreateBucketIfNotExists(bucketId) if err != nil { diff --git a/blockstore/splitstore/tracking_test.go b/blockstore/splitstore/tracking_test.go index 55b520133..0cd47ecb8 100644 --- a/blockstore/splitstore/tracking_test.go +++ b/blockstore/splitstore/tracking_test.go @@ -44,7 +44,7 @@ func testTrackingStore(t *testing.T, tsType string) { } } - path := "/tmp/liveset-test" + path := "/tmp/markset-test" err := os.MkdirAll(path, 0777) if err != nil { diff --git a/node/config/def.go b/node/config/def.go index f66ac2ba0..3aff08468 100644 --- a/node/config/def.go +++ b/node/config/def.go @@ -128,7 +128,7 @@ type Blockstore struct { type Splitstore struct { HotStoreType string TrackingStoreType string - LiveSetType string + MarkSetType string EnableFullCompaction bool EnableGC bool // EXPERIMENTAL Archival bool diff --git a/node/modules/blockstore.go b/node/modules/blockstore.go index 8fffb1536..500fcffcd 100644 --- a/node/modules/blockstore.go +++ b/node/modules/blockstore.go @@ -76,7 +76,7 @@ func SplitBlockstore(cfg *config.Blockstore) func(lc fx.Lifecycle, r repo.Locked cfg := &splitstore.Config{ TrackingStoreType: cfg.Splitstore.TrackingStoreType, - LiveSetType: cfg.Splitstore.LiveSetType, + MarkSetType: cfg.Splitstore.MarkSetType, EnableFullCompaction: cfg.Splitstore.EnableFullCompaction, EnableGC: cfg.Splitstore.EnableGC, Archival: cfg.Splitstore.Archival, From f651f43c5ecc5e6be8823ebc1d9a47ba8381d2ed Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 2 Mar 2021 10:04:02 +0200 Subject: [PATCH 110/148] improve comment accuracy --- blockstore/splitstore/splitstore.go | 7 ++++--- blockstore/splitstore/tracking.go | 5 +---- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index bbc72150a..0fb216cbd 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -53,7 +53,8 @@ var ( // On first start, the splitstore will walk the state tree and will copy // all active blocks into the hotstore. warmupEpochKey = dstore.NewKey("/splitstore/warmupEpoch") - log = logging.Logger("splitstore") + + log = logging.Logger("splitstore") ) const batchSize = 16384 @@ -66,12 +67,12 @@ func init() { type Config struct { // TrackingStore is the type of tracking store to use. // - // Supported values are: "bolt". + // Supported values are: "bolt" (default if omitted). TrackingStoreType string // MarkSetType is the type of mark set to use. // - // Supported values are: "bolt", "bloom". + // Supported values are: "bloom" (default if omitted), "bolt". MarkSetType string // perform full reachability analysis (expensive) for compaction // You should enable this option if you plan to use the splitstore without a backing coldstore diff --git a/blockstore/splitstore/tracking.go b/blockstore/splitstore/tracking.go index af944ba08..fc1895e49 100644 --- a/blockstore/splitstore/tracking.go +++ b/blockstore/splitstore/tracking.go @@ -10,10 +10,7 @@ import ( ) // TrackingStore is a persistent store that tracks blocks that are added -// within the current compaction range, including the epoch at which they are -// written. -// -// On every compaction, we iterate over +// to the hotstore, tracking the epoch at which they are written. type TrackingStore interface { Put(cid.Cid, abi.ChainEpoch) error PutBatch([]cid.Cid, abi.ChainEpoch) error From 35d466d847986268894c87ca783059e8623f2ef1 Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 2 Mar 2021 10:48:59 +0200 Subject: [PATCH 111/148] use sha256 for bloom key rehashing --- blockstore/splitstore/markset_bloom.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/blockstore/splitstore/markset_bloom.go b/blockstore/splitstore/markset_bloom.go index b5c2fe176..c213436c8 100644 --- a/blockstore/splitstore/markset_bloom.go +++ b/blockstore/splitstore/markset_bloom.go @@ -2,12 +2,12 @@ package splitstore import ( "crypto/rand" + "crypto/sha256" "golang.org/x/xerrors" bbloom "github.com/ipfs/bbloom" cid "github.com/ipfs/go-cid" - blake2b "github.com/minio/blake2b-simd" ) const ( @@ -59,7 +59,7 @@ func (s *BloomMarkSet) saltedKey(cid cid.Cid) []byte { key := make([]byte, len(s.salt)+len(hash)) n := copy(key, s.salt) copy(key[n:], hash) - rehash := blake2b.Sum256(key) + rehash := sha256.Sum256(key) return rehash[:] } From 68213a92cb9e26a09e1e2e1db5f5ab132ed3dc52 Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 2 Mar 2021 10:53:24 +0200 Subject: [PATCH 112/148] use ioutil.TempDir for test directories --- blockstore/splitstore/markset_test.go | 6 ++---- blockstore/splitstore/tracking_test.go | 6 ++---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/blockstore/splitstore/markset_test.go b/blockstore/splitstore/markset_test.go index 22f4b4ad5..367ab8d06 100644 --- a/blockstore/splitstore/markset_test.go +++ b/blockstore/splitstore/markset_test.go @@ -1,7 +1,7 @@ package splitstore import ( - "os" + "io/ioutil" "testing" cid "github.com/ipfs/go-cid" @@ -19,9 +19,7 @@ func TestBloomMarkSet(t *testing.T) { func testMarkSet(t *testing.T, lsType string) { t.Helper() - path := "/tmp/markset-test" - - err := os.MkdirAll(path, 0777) + path, err := ioutil.TempDir("", "sweep-test.*") if err != nil { t.Fatal(err) } diff --git a/blockstore/splitstore/tracking_test.go b/blockstore/splitstore/tracking_test.go index 0cd47ecb8..afd475da5 100644 --- a/blockstore/splitstore/tracking_test.go +++ b/blockstore/splitstore/tracking_test.go @@ -1,7 +1,7 @@ package splitstore import ( - "os" + "io/ioutil" "testing" cid "github.com/ipfs/go-cid" @@ -44,9 +44,7 @@ func testTrackingStore(t *testing.T, tsType string) { } } - path := "/tmp/markset-test" - - err := os.MkdirAll(path, 0777) + path, err := ioutil.TempDir("", "snoop-test.*") if err != nil { t.Fatal(err) } From 5184bc5c405309d97e2f84ec003d26b0cc044778 Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 2 Mar 2021 11:02:19 +0200 Subject: [PATCH 113/148] log consistency for full compaction --- blockstore/splitstore/splitstore.go | 31 ++++++++++++++--------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 0fb216cbd..208701d25 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -645,7 +645,7 @@ func (s *SplitStore) compactSimple(curTs *types.TipSet) { // 2.3 delete cold objects from the hotstore // TODO we really want batching for this! log.Info("purging cold objects from the hotstore") - purgeStart := time.Now() + startPurge := time.Now() for cid := range cold { // delete the object from the hotstore err = s.hot.DeleteBlock(cid) @@ -655,10 +655,10 @@ func (s *SplitStore) compactSimple(curTs *types.TipSet) { panic(err) } } - log.Infow("purging cold from hotstore done", "took", time.Since(purgeStart)) + log.Infow("purging cold from hotstore done", "took", time.Since(startPurge)) // 2.4 remove the tracker tracking for cold objects - purgeStart = time.Now() + startPurge = time.Now() log.Info("purging cold objects from tracker") err = s.tracker.DeleteBatch(cold) @@ -667,7 +667,7 @@ func (s *SplitStore) compactSimple(curTs *types.TipSet) { // TODO do something better here -- just continue? panic(err) } - log.Infow("purging cold from tracker done", "took", time.Since(purgeStart)) + log.Infow("purging cold from tracker done", "took", time.Since(startPurge)) // we are done; do some housekeeping err = s.tracker.Sync() @@ -755,8 +755,8 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) { // - If a cold object is reachable in the hot range, it stays in the hotstore. // - If a cold object is reachable in the cold range, it is moved to the coldstore. // - If a cold object is unreachable, it is deleted if GC is enabled, otherwise moved to the coldstore. - startSweep := time.Now() - log.Info("sweeping cold objects") + log.Info("collecting cold objects") + startCollect := time.Now() // some stats for logging var stHot, stCold, stDead int @@ -819,6 +819,7 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) { panic(err) } + log.Infow("collection done", "took", time.Since(startCollect)) log.Infow("compaction stats", "hot", stHot, "cold", stCold, "dead", stDead) // 2.2 copy the cold objects to the coldstore @@ -872,7 +873,7 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) { // 2.3 delete cold objects from the hotstore // TODO we really want batching for this! log.Info("purging cold objects from the hotstore") - purgeStart := time.Now() + startPurge := time.Now() for cid := range cold { // delete the object from the hotstore err = s.hot.DeleteBlock(cid) @@ -882,10 +883,10 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) { panic(err) } } - log.Infow("purging cold from hotstore done", "took", time.Since(purgeStart)) + log.Infow("purging cold from hotstore done", "took", time.Since(startPurge)) // 2.4 remove the tracker tracking for cold objects - purgeStart = time.Now() + startPurge = time.Now() log.Info("purging cold objects from tracker") err = s.tracker.DeleteBatch(cold) @@ -894,13 +895,13 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) { // TODO do something better here -- just continue? panic(err) } - log.Infow("purging cold from tracker done", "took", time.Since(purgeStart)) + log.Infow("purging cold from tracker done", "took", time.Since(startPurge)) // 3. if we have dead objects, delete them from the hotstore and remove the tracking if len(dead) > 0 { log.Info("deleting dead objects") - purgeStart = time.Now() + startPurge = time.Now() log.Info("purging dead objects from the hotstore") // TODO we really want batching for this! for cid := range dead { @@ -912,10 +913,10 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) { panic(err) } } - log.Infow("purging dead from hotstore done", "took", time.Since(purgeStart)) + log.Infow("purging dead from hotstore done", "took", time.Since(startPurge)) // remove the tracker tracking - purgeStart := time.Now() + startPurge := time.Now() log.Info("purging dead objects from tracker") err = s.tracker.DeleteBatch(dead) @@ -925,11 +926,9 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) { panic(err) } - log.Infow("purging dead from tracker done", "took", time.Since(purgeStart)) + log.Infow("purging dead from tracker done", "took", time.Since(startPurge)) } - log.Infow("sweeping done", "took", time.Since(startSweep)) - // we are done; do some housekeeping err = s.tracker.Sync() if err != nil { From c762536dcbcb67dd7aa6929d66ac501c5174fb12 Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 2 Mar 2021 11:20:39 +0200 Subject: [PATCH 114/148] deduplicate code --- blockstore/splitstore/splitstore.go | 217 +++++++++++----------------- 1 file changed, 88 insertions(+), 129 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 208701d25..9764f508d 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -596,75 +596,29 @@ func (s *SplitStore) compactSimple(curTs *types.TipSet) { // 2.2 copy the cold objects to the coldstore log.Info("moving cold objects to the coldstore") startMove := time.Now() - - batch := make([]blocks.Block, 0, batchSize) - - for cid := range cold { - blk, err := s.hot.Get(cid) - if err != nil { - if err == dstore.ErrNotFound { - // this can happen if the node is killed after we have deleted the block from the hotstore - // but before we have deleted it from the tracker; just delete the tracker. - err = s.tracker.Delete(cid) - if err != nil { - log.Errorf("error deleting cid %s from tracker: %s", cid, err) - // TODO do something better here -- just continue? - panic(err) - } - } else { - log.Errorf("error retrieving tracked block %s from hotstore: %s", cid, err) - // TODO do something better here -- just continue? - panic(err) - } - - continue - } - - batch = append(batch, blk) - if len(batch) == batchSize { - err = s.cold.PutMany(batch) - if err != nil { - log.Errorf("error putting cold batch to coldstore: %s", err) - // TODO do something better here -- just continue? - panic(err) - } - batch = batch[:0] - } - } - - if len(batch) > 0 { - err = s.cold.PutMany(batch) - if err != nil { - log.Errorf("error putting cold batch to coldstore: %s", err) - // TODO do something better here -- just continue? - panic(err) - } + err = s.moveColdBlocks(cold) + if err != nil { + // TODO do something better here + panic(err) } log.Infow("moving done", "took", time.Since(startMove)) // 2.3 delete cold objects from the hotstore - // TODO we really want batching for this! log.Info("purging cold objects from the hotstore") startPurge := time.Now() - for cid := range cold { - // delete the object from the hotstore - err = s.hot.DeleteBlock(cid) - if err != nil { - log.Errorf("error deleting block %s from hotstore: %s", cid, err) - // TODO do something better here -- just continue? - panic(err) - } + err = s.purgeBlocks(cold) + if err != nil { + // TODO do something better here + panic(err) } log.Infow("purging cold from hotstore done", "took", time.Since(startPurge)) // 2.4 remove the tracker tracking for cold objects startPurge = time.Now() log.Info("purging cold objects from tracker") - - err = s.tracker.DeleteBatch(cold) + err = s.purgeTracking(cold) if err != nil { - log.Errorf("error purging cold objects from tracker: %s", err) - // TODO do something better here -- just continue? + // TODO do something better here panic(err) } log.Infow("purging cold from tracker done", "took", time.Since(startPurge)) @@ -683,6 +637,68 @@ func (s *SplitStore) compactSimple(curTs *types.TipSet) { } } +func (s *SplitStore) moveColdBlocks(cold map[cid.Cid]struct{}) error { + batch := make([]blocks.Block, 0, batchSize) + + for cid := range cold { + blk, err := s.hot.Get(cid) + if err != nil { + if err == dstore.ErrNotFound { + // this can happen if the node is killed after we have deleted the block from the hotstore + // but before we have deleted it from the tracker; just delete the tracker. + err = s.tracker.Delete(cid) + if err != nil { + return xerrors.Errorf("error deleting unreachable cid %s from tracker: %w", cid, err) + } + } else { + return xerrors.Errorf("error retrieving tracked block %s from hotstore: %w", cid, err) + } + + continue + } + + batch = append(batch, blk) + if len(batch) == batchSize { + err = s.cold.PutMany(batch) + if err != nil { + return xerrors.Errorf("error putting batch to coldstore: %w", err) + } + batch = batch[:0] + } + } + + if len(batch) > 0 { + err := s.cold.PutMany(batch) + if err != nil { + return xerrors.Errorf("error putting cold to coldstore: %w", err) + } + } + + return nil +} + +func (s *SplitStore) purgeBlocks(cids map[cid.Cid]struct{}) error { + // TODO batch deletion -- this is very slow with many objects, but we need + // a DeleteBatch method in the blockstore interface + for cid := range cids { + err := s.hot.DeleteBlock(cid) + if err != nil { + return xerrors.Errorf("error deleting block %s from hotstore: %e", cid, err) + } + } + + return nil +} + +func (s *SplitStore) purgeTracking(cids map[cid.Cid]struct{}) error { + err := s.tracker.DeleteBatch(cids) + if err != nil { + return xerrors.Errorf("error deleting batch from tracker: %w", err) + } + + return nil +} + func (s *SplitStore) compactFull(curTs *types.TipSet) { epoch := curTs.Height() coldEpoch := s.baseEpoch + CompactionCold @@ -825,74 +841,29 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) { // 2.2 copy the cold objects to the coldstore log.Info("moving cold objects to the coldstore") startMove := time.Now() - - batch := make([]blocks.Block, 0, batchSize) - for cid := range cold { - blk, err := s.hot.Get(cid) - if err != nil { - if err == dstore.ErrNotFound { - // this can happen if the node is killed after we have deleted the block from the hotstore - // but before we have deleted it from the tracker; just delete the tracker. - err = s.tracker.Delete(cid) - if err != nil { - log.Errorf("error deleting cid %s from tracker: %s", cid, err) - // TODO do something better here -- just continue? - panic(err) - } - } else { - log.Errorf("error retrieving tracked block %s from hotstore: %s", cid, err) - // TODO do something better here -- just continue? - panic(err) - } - - continue - } - - batch = append(batch, blk) - if len(batch) == batchSize { - err = s.cold.PutMany(batch) - if err != nil { - log.Errorf("error putting cold batch to coldstore: %s", err) - // TODO do something better here -- just continue? - panic(err) - } - batch = batch[:0] - } - } - - if len(batch) > 0 { - err = s.cold.PutMany(batch) - if err != nil { - log.Errorf("error putting cold batch to coldstore: %s", err) - // TODO do something better here -- just continue? - panic(err) - } + err = s.moveColdBlocks(cold) + if err != nil { + // TODO do something better here + panic(err) } log.Infow("moving done", "took", time.Since(startMove)) // 2.3 delete cold objects from the hotstore - // TODO we really want batching for this! log.Info("purging cold objects from the hotstore") startPurge := time.Now() - for cid := range cold { - // delete the object from the hotstore - err = s.hot.DeleteBlock(cid) - if err != nil { - log.Errorf("error deleting block %s from hotstore: %s", cid, err) - // TODO do something better here -- just continue? - panic(err) - } + err = s.purgeBlocks(cold) + if err != nil { + // TODO do something better here + panic(err) } log.Infow("purging cold from hotstore done", "took", time.Since(startPurge)) // 2.4 remove the tracker tracking for cold objects startPurge = time.Now() log.Info("purging cold objects from tracker") - - err = s.tracker.DeleteBatch(cold) + err = s.purgeTracking(cold) if err != nil { - log.Errorf("error purging cold objects from tracker: %s", err) - // TODO do something better here -- just continue? + // TODO do something better here panic(err) } log.Infow("purging cold from tracker done", "took", time.Since(startPurge)) @@ -900,32 +871,20 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) { // 3. if we have dead objects, delete them from the hotstore and remove the tracking if len(dead) > 0 { log.Info("deleting dead objects") - - startPurge = time.Now() - log.Info("purging dead objects from the hotstore") - // TODO we really want batching for this! - for cid := range dead { - // delete the object from the hotstore - err = s.hot.DeleteBlock(cid) - if err != nil { - log.Errorf("error deleting block %s from hotstore: %s", cid, err) - // TODO do something better here -- just continue? - panic(err) - } + err = s.purgeBlocks(dead) + if err != nil { + // TODO do something better here + panic(err) } - log.Infow("purging dead from hotstore done", "took", time.Since(startPurge)) // remove the tracker tracking startPurge := time.Now() log.Info("purging dead objects from tracker") - - err = s.tracker.DeleteBatch(dead) + err = s.purgeTracking(dead) if err != nil { - log.Errorf("error purging dead objects from tracker: %s", err) - // TODO do something better here -- just continue? + // TODO do something better here panic(err) } - log.Infow("purging dead from tracker done", "took", time.Since(startPurge)) } From 6014273e69180244339ae97cfd7c8419628eb8de Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 2 Mar 2021 11:27:25 +0200 Subject: [PATCH 115/148] storage miner doesn't need a splitstore --- node/builder.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/node/builder.go b/node/builder.go index 5f7e872a4..dfc6eab29 100644 --- a/node/builder.go +++ b/node/builder.go @@ -594,12 +594,6 @@ func Repo(r repo.Repo) Option { return xerrors.Errorf("invalid config from repo, got: %T", c) } cfg = &cfgp.Blockstore - case repo.StorageMiner: - cfgp, ok := c.(*config.StorageMiner) - if !ok { - return xerrors.Errorf("invalid config from repo, got: %T", c) - } - cfg = &cfgp.Blockstore default: cfg = &config.Blockstore{} } From dd0c308427d978eddbe1bcdb90e267955f538d77 Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 2 Mar 2021 15:45:36 +0200 Subject: [PATCH 116/148] move Blockstore config to FullNode, rename to Chainstore and add default for HotStoreType --- node/builder.go | 8 ++++---- node/config/def.go | 32 +++++++++++++++----------------- node/modules/blockstore.go | 2 +- 3 files changed, 20 insertions(+), 22 deletions(-) diff --git a/node/builder.go b/node/builder.go index dfc6eab29..47e685543 100644 --- a/node/builder.go +++ b/node/builder.go @@ -586,16 +586,16 @@ func Repo(r repo.Repo) Option { return err } - var cfg *config.Blockstore + var cfg *config.Chainstore switch settings.nodeType { case repo.FullNode: cfgp, ok := c.(*config.FullNode) if !ok { return xerrors.Errorf("invalid config from repo, got: %T", c) } - cfg = &cfgp.Blockstore + cfg = &cfgp.Chainstore default: - cfg = &config.Blockstore{} + cfg = &config.Chainstore{} } return Options( @@ -605,7 +605,7 @@ func Repo(r repo.Repo) Option { Override(new(dtypes.UniversalBlockstore), modules.UniversalBlockstore), If(cfg.EnableSplitstore, - If(cfg.Splitstore.GetHotStoreType() == "badger", + If(cfg.Splitstore.HotStoreType == "badger", Override(new(dtypes.HotBlockstore), modules.BadgerHotBlockstore)), Override(new(dtypes.SplitBlockstore), modules.SplitBlockstore(cfg)), Override(new(dtypes.ChainBlockstore), modules.ChainSplitBlockstore), diff --git a/node/config/def.go b/node/config/def.go index 3aff08468..5e1b14128 100644 --- a/node/config/def.go +++ b/node/config/def.go @@ -12,19 +12,19 @@ import ( // Common is common config between full node and miner type Common struct { - API API - Libp2p Libp2p - Pubsub Pubsub - Blockstore Blockstore + API API + Libp2p Libp2p + Pubsub Pubsub } // FullNode is a full node config type FullNode struct { Common - Client Client - Metrics Metrics - Wallet Wallet - Fees FeeConfig + Client Client + Metrics Metrics + Wallet Wallet + Fees FeeConfig + Chainstore Chainstore } // // Common @@ -120,7 +120,7 @@ type Pubsub struct { RemoteTracer string } -type Blockstore struct { +type Chainstore struct { EnableSplitstore bool Splitstore Splitstore } @@ -134,14 +134,6 @@ type Splitstore struct { Archival bool } -func (s *Splitstore) GetHotStoreType() string { - // default is badger - if s.HotStoreType == "" { - return "badger" - } - return s.HotStoreType -} - // // Full Node type Metrics struct { @@ -207,6 +199,12 @@ func DefaultFullNode() *FullNode { Client: Client{ SimultaneousTransfers: DefaultSimultaneousTransfers, }, + Chainstore: Chainstore{ + EnableSplitstore: false, + Splitstore: Splitstore{ + HotStoreType: "badger", + }, + }, } } diff --git a/node/modules/blockstore.go b/node/modules/blockstore.go index 500fcffcd..c85010f40 100644 --- a/node/modules/blockstore.go +++ b/node/modules/blockstore.go @@ -67,7 +67,7 @@ func BadgerHotBlockstore(lc fx.Lifecycle, r repo.LockedRepo) (dtypes.HotBlocksto return hot, err } -func SplitBlockstore(cfg *config.Blockstore) func(lc fx.Lifecycle, r repo.LockedRepo, ds dtypes.MetadataDS, cold dtypes.UniversalBlockstore, hot dtypes.HotBlockstore) (dtypes.SplitBlockstore, error) { +func SplitBlockstore(cfg *config.Chainstore) func(lc fx.Lifecycle, r repo.LockedRepo, ds dtypes.MetadataDS, cold dtypes.UniversalBlockstore, hot dtypes.HotBlockstore) (dtypes.SplitBlockstore, error) { return func(lc fx.Lifecycle, r repo.LockedRepo, ds dtypes.MetadataDS, cold dtypes.UniversalBlockstore, hot dtypes.HotBlockstore) (dtypes.SplitBlockstore, error) { path, err := r.SplitstorePath() if err != nil { From 86b73d651e1c28442b9861ad8ba6578e9c350bf2 Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 2 Mar 2021 16:45:45 +0200 Subject: [PATCH 117/148] add DeleteMany to Blockstore interface --- blockstore/badger/blockstore.go | 38 +++++++++++++++++++++++++++++ blockstore/blockstore.go | 16 ++++++++++++ blockstore/buffered.go | 8 ++++++ blockstore/mem.go | 7 ++++++ blockstore/splitstore/splitstore.go | 5 ++++ blockstore/sync.go | 6 +++++ blockstore/timed.go | 6 +++++ 7 files changed, 86 insertions(+) diff --git a/blockstore/badger/blockstore.go b/blockstore/badger/blockstore.go index 22f9036e3..cd740e650 100644 --- a/blockstore/badger/blockstore.go +++ b/blockstore/badger/blockstore.go @@ -318,6 +318,44 @@ func (b *Blockstore) DeleteBlock(cid cid.Cid) error { }) } +func (b *Blockstore) DeleteMany(cids []cid.Cid) error { + if atomic.LoadInt64(&b.state) != stateOpen { + return ErrBlockstoreClosed + } + + batch := b.DB.NewWriteBatch() + defer batch.Cancel() + + // toReturn tracks the byte slices to return to the pool, if we're using key + // prefixing. we can't return each slice to the pool after each Set, because + // badger holds on to the slice. + var toReturn [][]byte + if b.prefixing { + toReturn = make([][]byte, 0, len(cids)) + defer func() { + for _, b := range toReturn { + KeyPool.Put(b) + } + }() + } + + for _, cid := range cids { + k, pooled := b.PooledStorageKey(cid) + if pooled { + toReturn = append(toReturn, k) + } + if err := batch.Delete(k); err != nil { + return err + } + } + + err := batch.Flush() + if err != nil { + err = fmt.Errorf("failed to delete blocks from badger blockstore: %w", err) + } + return err +} + // AllKeysChan implements Blockstore.AllKeysChan. func (b *Blockstore) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) { if atomic.LoadInt64(&b.state) != stateOpen { diff --git a/blockstore/blockstore.go b/blockstore/blockstore.go index 5d4578777..2414dbad0 100644 --- a/blockstore/blockstore.go +++ b/blockstore/blockstore.go @@ -18,6 +18,7 @@ var ErrNotFound = blockstore.ErrNotFound type Blockstore interface { blockstore.Blockstore blockstore.Viewer + BatchDeleter } // BasicBlockstore is an alias to the original IPFS Blockstore. @@ -25,6 +26,10 @@ type BasicBlockstore = blockstore.Blockstore type Viewer = blockstore.Viewer +type BatchDeleter interface { + DeleteMany(cids []cid.Cid) error +} + // WrapIDStore wraps the underlying blockstore in an "identity" blockstore. // The ID store filters out all puts for blocks with CIDs using the "identity" // hash function. It also extracts inlined blocks from CIDs using the identity @@ -53,6 +58,17 @@ func (a *adaptedBlockstore) View(cid cid.Cid, callback func([]byte) error) error return callback(blk.RawData()) } +func (a *adaptedBlockstore) DeleteMany(cids []cid.Cid) error { + for _, cid := range cids { + err := a.DeleteBlock(cid) + if err != nil { + return err + } + } + + return nil +} + // Adapt adapts a standard blockstore to a Lotus blockstore by // enriching it with the extra methods that Lotus requires (e.g. View, Sync). // diff --git a/blockstore/buffered.go b/blockstore/buffered.go index 200e9b995..5d3d38f78 100644 --- a/blockstore/buffered.go +++ b/blockstore/buffered.go @@ -96,6 +96,14 @@ func (bs *BufferedBlockstore) DeleteBlock(c cid.Cid) error { return bs.write.DeleteBlock(c) } +func (bs *BufferedBlockstore) DeleteMany(cids []cid.Cid) error { + if err := bs.read.DeleteMany(cids); err != nil { + return err + } + + return bs.write.DeleteMany(cids) +} + func (bs *BufferedBlockstore) View(c cid.Cid, callback func([]byte) error) error { // both stores are viewable. if err := bs.write.View(c, callback); err == ErrNotFound { diff --git a/blockstore/mem.go b/blockstore/mem.go index c8de3e3e8..8ea69d46a 100644 --- a/blockstore/mem.go +++ b/blockstore/mem.go @@ -20,6 +20,13 @@ func (m MemBlockstore) DeleteBlock(k cid.Cid) error { return nil } +func (m MemBlockstore) DeleteMany(ks []cid.Cid) error { + for _, k := range ks { + delete(m, k) + } + return nil +} + func (m MemBlockstore) Has(k cid.Cid) (bool, error) { _, ok := m[k] return ok, nil diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 9764f508d..1431b0496 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -163,6 +163,11 @@ func (s *SplitStore) DeleteBlock(_ cid.Cid) error { return errors.New("DeleteBlock not implemented on SplitStore; don't do this Luke!") //nolint } +func (s *SplitStore) DeleteMany(_ []cid.Cid) error { + // afaict we don't seem to be using this method, so it's not implemented + return errors.New("DeleteMany not implemented on SplitStore; don't do this Luke!") //nolint +} + func (s *SplitStore) Has(cid cid.Cid) (bool, error) { has, err := s.hot.Has(cid) diff --git a/blockstore/sync.go b/blockstore/sync.go index 2da71a898..848ccd19d 100644 --- a/blockstore/sync.go +++ b/blockstore/sync.go @@ -26,6 +26,12 @@ func (m *SyncBlockstore) DeleteBlock(k cid.Cid) error { return m.bs.DeleteBlock(k) } +func (m *SyncBlockstore) DeleteMany(ks []cid.Cid) error { + m.mu.Lock() + defer m.mu.Unlock() + return m.bs.DeleteMany(ks) +} + func (m *SyncBlockstore) Has(k cid.Cid) (bool, error) { m.mu.RLock() defer m.mu.RUnlock() diff --git a/blockstore/timed.go b/blockstore/timed.go index 138375028..ce25bb5bc 100644 --- a/blockstore/timed.go +++ b/blockstore/timed.go @@ -153,6 +153,12 @@ func (t *TimedCacheBlockstore) DeleteBlock(k cid.Cid) error { return multierr.Combine(t.active.DeleteBlock(k), t.inactive.DeleteBlock(k)) } +func (t *TimedCacheBlockstore) DeleteMany(ks []cid.Cid) error { + t.mu.Lock() + defer t.mu.Unlock() + return multierr.Combine(t.active.DeleteMany(ks), t.inactive.DeleteMany(ks)) +} + func (t *TimedCacheBlockstore) AllKeysChan(_ context.Context) (<-chan cid.Cid, error) { t.mu.RLock() defer t.mu.RUnlock() From 8a55b73146a92a62ada5ddc856c455d93ccf29bd Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 2 Mar 2021 18:06:19 +0200 Subject: [PATCH 118/148] fix the situation with WrapIDStore --- blockstore/blockstore.go | 16 +++- blockstore/idstore.go | 170 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 183 insertions(+), 3 deletions(-) create mode 100644 blockstore/idstore.go diff --git a/blockstore/blockstore.go b/blockstore/blockstore.go index 2414dbad0..7e8ff454b 100644 --- a/blockstore/blockstore.go +++ b/blockstore/blockstore.go @@ -1,7 +1,7 @@ package blockstore import ( - "github.com/ipfs/go-cid" + cid "github.com/ipfs/go-cid" ds "github.com/ipfs/go-datastore" logging "github.com/ipfs/go-log/v2" @@ -36,12 +36,22 @@ type BatchDeleter interface { // hash function and returns them on get/has, ignoring the contents of the // blockstore. func WrapIDStore(bstore blockstore.Blockstore) Blockstore { - return blockstore.NewIdStore(bstore).(Blockstore) + if is, ok := bstore.(*idstore); ok { + // already wrapped + return is + } + + if bs, ok := bstore.(Blockstore); ok { + // we need to wrap our own becase we don't want to neuter the DeleteMany method + return NewIDStore(bs) + } + + return NewIDStore(Adapt(bstore)) } // FromDatastore creates a new blockstore backed by the given datastore. func FromDatastore(dstore ds.Batching) Blockstore { - return WrapIDStore(blockstore.NewBlockstore(dstore)) + return WrapIDStore(Adapt(blockstore.NewBlockstore(dstore))) } type adaptedBlockstore struct { diff --git a/blockstore/idstore.go b/blockstore/idstore.go new file mode 100644 index 000000000..ab6d8e105 --- /dev/null +++ b/blockstore/idstore.go @@ -0,0 +1,170 @@ +package blockstore + +import ( + "context" + "io" + + "golang.org/x/xerrors" + + blocks "github.com/ipfs/go-block-format" + cid "github.com/ipfs/go-cid" + mh "github.com/multiformats/go-multihash" +) + +var _ Blockstore = (*idstore)(nil) + +type idstore struct { + bs Blockstore +} + +func NewIDStore(bs Blockstore) Blockstore { + return &idstore{bs: bs} +} + +func decodeCid(cid cid.Cid) (inline bool, data []byte, err error) { + dmh, err := mh.Decode(cid.Hash()) + if err != nil { + return false, nil, err + } + + if dmh.Code == mh.ID { + return true, dmh.Digest, nil + } + + return false, nil, err +} + +func (b *idstore) Has(cid cid.Cid) (bool, error) { + inline, _, err := decodeCid(cid) + if err != nil { + return false, xerrors.Errorf("error decoding Cid: %w", err) + } + + if inline { + return true, nil + } + + return b.bs.Has(cid) +} + +func (b *idstore) Get(cid cid.Cid) (blocks.Block, error) { + inline, data, err := decodeCid(cid) + if err != nil { + return nil, xerrors.Errorf("error decoding Cid: %w", err) + } + + if inline { + return blocks.NewBlockWithCid(data, cid) + } + + return b.bs.Get(cid) +} + +func (b *idstore) GetSize(cid cid.Cid) (int, error) { + inline, data, err := decodeCid(cid) + if err != nil { + return 0, xerrors.Errorf("error decoding Cid: %w", err) + } + + if inline { + return len(data), err + } + + return b.bs.GetSize(cid) +} + +func (b *idstore) View(cid cid.Cid, cb func([]byte) error) error { + inline, data, err := decodeCid(cid) + if err != nil { + return xerrors.Errorf("error decoding Cid: %w", err) + } + + if inline { + return cb(data) + } + + return b.bs.View(cid, cb) +} + +func (b *idstore) Put(blk blocks.Block) error { + inline, _, err := decodeCid(blk.Cid()) + if err != nil { + return xerrors.Errorf("error decoding Cid: %w", err) + } + + if inline { + return nil + } + + return b.bs.Put(blk) +} + +func (b *idstore) PutMany(blks []blocks.Block) error { + toPut := make([]blocks.Block, 0, len(blks)) + for _, blk := range blks { + inline, _, err := decodeCid(blk.Cid()) + if err != nil { + return xerrors.Errorf("error decoding Cid: %w", err) + } + + if inline { + continue + } + toPut = append(toPut, blk) + } + + if len(toPut) > 0 { + return b.bs.PutMany(toPut) + } + + return nil +} + +func (b *idstore) DeleteBlock(cid cid.Cid) error { + inline, _, err := decodeCid(cid) + if err != nil { + return xerrors.Errorf("error decoding Cid: %w", err) + } + + if inline { + return nil + } + + return b.bs.DeleteBlock(cid) +} + +func (b *idstore) DeleteMany(cids []cid.Cid) error { + toDelete := make([]cid.Cid, 0, len(cids)) + for _, cid := range cids { + inline, _, err := decodeCid(cid) + if err != nil { + return xerrors.Errorf("error decoding Cid: %w", err) + } + + if inline { + continue + } + toDelete = append(toDelete, cid) + } + + if len(toDelete) > 0 { + return b.bs.DeleteMany(toDelete) + } + + return nil +} + +func (b *idstore) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) { + return b.bs.AllKeysChan(ctx) +} + +func (b *idstore) HashOnRead(enabled bool) { + b.bs.HashOnRead(enabled) +} + +func (b *idstore) Close() error { + if c, ok := b.bs.(io.Closer); ok { + return c.Close() + } + return nil +} From 2ff5aec80ea4083840bcc6c1a90be39fc260423c Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 2 Mar 2021 18:15:05 +0200 Subject: [PATCH 119/148] satisfy linter, use Prefix for common path of non inline CIDs --- blockstore/idstore.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/blockstore/idstore.go b/blockstore/idstore.go index ab6d8e105..e6148ff04 100644 --- a/blockstore/idstore.go +++ b/blockstore/idstore.go @@ -22,12 +22,16 @@ func NewIDStore(bs Blockstore) Blockstore { } func decodeCid(cid cid.Cid) (inline bool, data []byte, err error) { + if cid.Prefix().MhType != mh.IDENTITY { + return false, nil, nil + } + dmh, err := mh.Decode(cid.Hash()) if err != nil { return false, nil, err } - if dmh.Code == mh.ID { + if dmh.Code == mh.IDENTITY { return true, dmh.Digest, nil } From 86fdad2e310ce4d5dc3996245d21dd4ddbbbfa90 Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 2 Mar 2021 18:19:20 +0200 Subject: [PATCH 120/148] fix typo Co-authored-by: raulk --- blockstore/blockstore.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/blockstore/blockstore.go b/blockstore/blockstore.go index 7e8ff454b..4c3b8ad5a 100644 --- a/blockstore/blockstore.go +++ b/blockstore/blockstore.go @@ -42,7 +42,8 @@ func WrapIDStore(bstore blockstore.Blockstore) Blockstore { } if bs, ok := bstore.(Blockstore); ok { - // we need to wrap our own becase we don't want to neuter the DeleteMany method + // we need to wrap our own because we don't want to neuter the DeleteMany method + // the underlying blockstore has implemented an (efficient) DeleteMany return NewIDStore(bs) } From ab52e34e6a48020f6af5ab62631f4e7f0bff5967 Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 2 Mar 2021 18:19:47 +0200 Subject: [PATCH 121/148] add comment Co-authored-by: raulk --- blockstore/blockstore.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/blockstore/blockstore.go b/blockstore/blockstore.go index 4c3b8ad5a..2624a7a9d 100644 --- a/blockstore/blockstore.go +++ b/blockstore/blockstore.go @@ -47,6 +47,8 @@ func WrapIDStore(bstore blockstore.Blockstore) Blockstore { return NewIDStore(bs) } + // The underlying blockstore does not implement DeleteMany, so we need to shim it. + // This is less efficient as it'll iterate and perform single deletes. return NewIDStore(Adapt(bstore)) } From 4c05ec28ba45e6947118c5a0bc836f312442d0e9 Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 2 Mar 2021 18:21:17 +0200 Subject: [PATCH 122/148] fix FromDatastore to not do double adapting --- blockstore/blockstore.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/blockstore/blockstore.go b/blockstore/blockstore.go index 2624a7a9d..23f0bd754 100644 --- a/blockstore/blockstore.go +++ b/blockstore/blockstore.go @@ -47,14 +47,14 @@ func WrapIDStore(bstore blockstore.Blockstore) Blockstore { return NewIDStore(bs) } - // The underlying blockstore does not implement DeleteMany, so we need to shim it. - // This is less efficient as it'll iterate and perform single deletes. + // The underlying blockstore does not implement DeleteMany, so we need to shim it. + // This is less efficient as it'll iterate and perform single deletes. return NewIDStore(Adapt(bstore)) } // FromDatastore creates a new blockstore backed by the given datastore. func FromDatastore(dstore ds.Batching) Blockstore { - return WrapIDStore(Adapt(blockstore.NewBlockstore(dstore))) + return WrapIDStore(blockstore.NewBlockstore(dstore)) } type adaptedBlockstore struct { From 06d8ea10b1051341fb2d3759433aa995f839f5e2 Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 2 Mar 2021 18:59:00 +0200 Subject: [PATCH 123/148] batch delete during the cold purge --- blockstore/splitstore/splitstore.go | 69 ++++++++++++++++---------- blockstore/splitstore/tracking.go | 2 +- blockstore/splitstore/tracking_bolt.go | 4 +- 3 files changed, 45 insertions(+), 30 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 1431b0496..375ec22d4 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -57,7 +57,12 @@ var ( log = logging.Logger("splitstore") ) -const batchSize = 16384 +const ( + batchSize = 16384 + + defaultColdPurgeSize = 7_000_000 + defaultDeadPurgeSize = 1_000_000 +) func init() { // TODO temporary for debugging purposes; to be removed for merge. @@ -107,6 +112,9 @@ type SplitStore struct { baseEpoch abi.ChainEpoch warmupEpoch abi.ChainEpoch + coldPurgeSize int + deadPurgeSize int + mx sync.Mutex curTs *types.TipSet @@ -152,6 +160,12 @@ func Open(path string, ds dstore.Datastore, hot, cold bstore.Blockstore, cfg *Co enableGC: cfg.EnableGC, skipOldMsgs: !(cfg.EnableFullCompaction && cfg.Archival), skipMsgReceipts: !(cfg.EnableFullCompaction && cfg.Archival), + + coldPurgeSize: defaultColdPurgeSize, + } + + if cfg.EnableGC { + ss.deadPurgeSize = defaultDeadPurgeSize } return ss, nil @@ -559,7 +573,7 @@ func (s *SplitStore) compactSimple(curTs *types.TipSet) { log.Info("collecting cold objects") startCollect := time.Now() - cold := make(map[cid.Cid]struct{}) + cold := make([]cid.Cid, 0, s.coldPurgeSize) // some stats for logging var hotCnt, coldCnt int @@ -585,7 +599,7 @@ func (s *SplitStore) compactSimple(curTs *types.TipSet) { } // it's cold, mark it for move - cold[cid] = struct{}{} + cold = append(cold, cid) coldCnt++ return nil }) @@ -595,6 +609,8 @@ func (s *SplitStore) compactSimple(curTs *types.TipSet) { panic(err) } + s.coldPurgeSize = coldCnt + log.Infow("collection done", "took", time.Since(startCollect)) log.Infow("compaction stats", "hot", hotCnt, "cold", coldCnt) @@ -642,10 +658,10 @@ func (s *SplitStore) compactSimple(curTs *types.TipSet) { } } -func (s *SplitStore) moveColdBlocks(cold map[cid.Cid]struct{}) error { +func (s *SplitStore) moveColdBlocks(cold []cid.Cid) error { batch := make([]blocks.Block, 0, batchSize) - for cid := range cold { + for _, cid := range cold { blk, err := s.hot.Get(cid) if err != nil { if err == dstore.ErrNotFound { @@ -682,20 +698,16 @@ func (s *SplitStore) moveColdBlocks(cold map[cid.Cid]struct{}) error { return nil } -func (s *SplitStore) purgeBlocks(cids map[cid.Cid]struct{}) error { - // TODO batch deletion -- this is very slow with many objects, but we need - // a DeleteBatch method in the blockstore interface - for cid := range cids { - err := s.hot.DeleteBlock(cid) - if err != nil { - return xerrors.Errorf("error deleting block %s from hotstore: %e", cid, err) - } +func (s *SplitStore) purgeBlocks(cids []cid.Cid) error { + err := s.hot.DeleteMany(cids) + if err != nil { + return xerrors.Errorf("error deleting batch from hotstore: %e", err) } return nil } -func (s *SplitStore) purgeTracking(cids map[cid.Cid]struct{}) error { +func (s *SplitStore) purgeTracking(cids []cid.Cid) error { err := s.tracker.DeleteBatch(cids) if err != nil { return xerrors.Errorf("error deleting batch from tracker: %w", err) @@ -780,17 +792,17 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) { startCollect := time.Now() // some stats for logging - var stHot, stCold, stDead int + var hotCnt, coldCnt, deadCnt int - cold := make(map[cid.Cid]struct{}) - dead := make(map[cid.Cid]struct{}) + cold := make([]cid.Cid, 0, s.coldPurgeSize) + dead := make([]cid.Cid, 0, s.deadPurgeSize) // 2.1 iterate through the tracker and collect cold and dead objects err = s.tracker.ForEach(func(cid cid.Cid, wrEpoch abi.ChainEpoch) error { // is the object stil hot? if wrEpoch > coldEpoch { // yes, stay in the hotstore - stHot++ + hotCnt++ return nil } @@ -802,7 +814,7 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) { if mark { // the object is reachable in the hot range, stay in the hotstore - stHot++ + hotCnt++ return nil } @@ -815,20 +827,20 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) { if s.enableGC { if mark { // the object is reachable in the cold range, move it to the cold store - cold[cid] = struct{}{} - stCold++ + cold = append(cold, cid) + coldCnt++ } else { // the object is dead and will be deleted - dead[cid] = struct{}{} - stDead++ + dead = append(dead, cid) + deadCnt++ } } else { // if GC is disabled, we move both cold and dead objects to the coldstore - cold[cid] = struct{}{} + cold = append(cold, cid) if mark { - stCold++ + coldCnt++ } else { - stDead++ + deadCnt++ } } @@ -840,8 +852,11 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) { panic(err) } + s.coldPurgeSize = coldCnt + coldCnt>>2 // overestimate a bit + s.deadPurgeSize = deadCnt + deadCnt>>2 // overestimate a bit + log.Infow("collection done", "took", time.Since(startCollect)) - log.Infow("compaction stats", "hot", stHot, "cold", stCold, "dead", stDead) + log.Infow("compaction stats", "hot", hotCnt, "cold", coldCnt, "dead", deadCnt) // 2.2 copy the cold objects to the coldstore log.Info("moving cold objects to the coldstore") diff --git a/blockstore/splitstore/tracking.go b/blockstore/splitstore/tracking.go index fc1895e49..1772a4305 100644 --- a/blockstore/splitstore/tracking.go +++ b/blockstore/splitstore/tracking.go @@ -16,7 +16,7 @@ type TrackingStore interface { PutBatch([]cid.Cid, abi.ChainEpoch) error Get(cid.Cid) (abi.ChainEpoch, error) Delete(cid.Cid) error - DeleteBatch(map[cid.Cid]struct{}) error + DeleteBatch([]cid.Cid) error ForEach(func(cid.Cid, abi.ChainEpoch) error) error Sync() error Close() error diff --git a/blockstore/splitstore/tracking_bolt.go b/blockstore/splitstore/tracking_bolt.go index 8c491043e..c5c451e15 100644 --- a/blockstore/splitstore/tracking_bolt.go +++ b/blockstore/splitstore/tracking_bolt.go @@ -87,10 +87,10 @@ func (s *BoltTrackingStore) Delete(cid cid.Cid) error { }) } -func (s *BoltTrackingStore) DeleteBatch(cids map[cid.Cid]struct{}) error { +func (s *BoltTrackingStore) DeleteBatch(cids []cid.Cid) error { return s.db.Batch(func(tx *bolt.Tx) error { b := tx.Bucket(s.bucketId) - for cid := range cids { + for _, cid := range cids { err := b.Delete(cid.Hash()) if err != nil { return xerrors.Errorf("error deleting %s", cid) From 006c55a7c9c7a74ea07c4cea0e5bee7a573f539d Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 2 Mar 2021 19:07:54 +0200 Subject: [PATCH 124/148] add startup log --- blockstore/splitstore/splitstore.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 375ec22d4..e64c33075 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -356,6 +356,8 @@ func (s *SplitStore) Start(chain ChainAccessor) error { return xerrors.Errorf("error loading warmup epoch: %w", err) } + log.Infof("starting splitstore at base epoch %d", s.baseEpoch) + // watch the chain chain.SubscribeHeadChanges(s.HeadChange) From 70ebb2ad8d5d3487c5a36e289a2b862778f9e9bd Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 2 Mar 2021 19:28:53 +0200 Subject: [PATCH 125/148] improve startup log --- blockstore/splitstore/splitstore.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index e64c33075..15d16afb7 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -356,7 +356,7 @@ func (s *SplitStore) Start(chain ChainAccessor) error { return xerrors.Errorf("error loading warmup epoch: %w", err) } - log.Infof("starting splitstore at base epoch %d", s.baseEpoch) + log.Infow("starting splitstore", "baseEpoch", s.baseEpoch, "warmupEpoch", s.warmupEpoch) // watch the chain chain.SubscribeHeadChanges(s.HeadChange) From d2d0980532c9bb12043ef561b802ac6e69999ee5 Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 2 Mar 2021 20:20:07 +0200 Subject: [PATCH 126/148] don't delete in one giant batch, use smaller chunks of batchSize --- blockstore/splitstore/splitstore.go | 33 +++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 15d16afb7..0e4c3e150 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -611,7 +611,9 @@ func (s *SplitStore) compactSimple(curTs *types.TipSet) { panic(err) } - s.coldPurgeSize = coldCnt + if coldCnt > 0 { + s.coldPurgeSize = coldCnt + coldCnt>>2 // overestimate a bit + } log.Infow("collection done", "took", time.Since(startCollect)) log.Infow("compaction stats", "hot", hotCnt, "cold", coldCnt) @@ -701,9 +703,24 @@ func (s *SplitStore) moveColdBlocks(cold []cid.Cid) error { } func (s *SplitStore) purgeBlocks(cids []cid.Cid) error { - err := s.hot.DeleteMany(cids) - if err != nil { - return xerrors.Errorf("error deleting batch from hotstore: %e", err) + if len(cids) == 0 { + return nil + } + + // don't delete one giant batch of 7M objects, but rather do smaller batches + done := false + for i := 0; done; i++ { + start := i * batchSize + end := start + batchSize + if end >= len(cids) { + end = len(cids) + done = true + } + + err := s.hot.DeleteMany(cids[start:end]) + if err != nil { + return xerrors.Errorf("error deleting batch from hotstore: %e", err) + } } return nil @@ -854,8 +871,12 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) { panic(err) } - s.coldPurgeSize = coldCnt + coldCnt>>2 // overestimate a bit - s.deadPurgeSize = deadCnt + deadCnt>>2 // overestimate a bit + if coldCnt > 0 { + s.coldPurgeSize = coldCnt + coldCnt>>2 // overestimate a bit + } + if deadCnt > 0 { + s.deadPurgeSize = deadCnt + deadCnt>>2 // overestimate a bit + } log.Infow("collection done", "took", time.Since(startCollect)) log.Infow("compaction stats", "hot", hotCnt, "cold", coldCnt, "dead", deadCnt) From 6b8c60a65971cc8d53bdad136d2ae2d2f338ede3 Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 2 Mar 2021 20:36:17 +0200 Subject: [PATCH 127/148] don't ID wrap the hotstore --- node/modules/blockstore.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/node/modules/blockstore.go b/node/modules/blockstore.go index c85010f40..c1c52fafe 100644 --- a/node/modules/blockstore.go +++ b/node/modules/blockstore.go @@ -63,8 +63,7 @@ func BadgerHotBlockstore(lc fx.Lifecycle, r repo.LockedRepo) (dtypes.HotBlocksto return bs.Close() }}) - hot := blockstore.WrapIDStore(bs) - return hot, err + return bs, nil } func SplitBlockstore(cfg *config.Chainstore) func(lc fx.Lifecycle, r repo.LockedRepo, ds dtypes.MetadataDS, cold dtypes.UniversalBlockstore, hot dtypes.HotBlockstore) (dtypes.SplitBlockstore, error) { From 6b680d112b1fb1652d3dc69abd4be9486d5ea257 Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 2 Mar 2021 22:48:35 +0200 Subject: [PATCH 128/148] do tracker purge in smaller batches --- blockstore/splitstore/splitstore.go | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 0e4c3e150..67588fe9c 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -727,9 +727,24 @@ func (s *SplitStore) purgeBlocks(cids []cid.Cid) error { } func (s *SplitStore) purgeTracking(cids []cid.Cid) error { - err := s.tracker.DeleteBatch(cids) - if err != nil { - return xerrors.Errorf("error deleting batch from tracker: %w", err) + if len(cids) == 0 { + return nil + } + + // don't delete one giant batch of 7M objects, but rather do smaller batches + done := false + for i := 0; done; i++ { + start := i * batchSize + end := start + batchSize + if end >= len(cids) { + end = len(cids) + done = true + } + + err := s.tracker.DeleteBatch(cids[start:end]) + if err != nil { + return xerrors.Errorf("error deleting batch from tracker: %w", err) + } } return nil From 11b2f41804e4a2094c4c271fe1c1f0ebe451fde7 Mon Sep 17 00:00:00 2001 From: vyzo Date: Wed, 3 Mar 2021 09:46:12 +0200 Subject: [PATCH 129/148] overestimate markSetSize a bit --- blockstore/splitstore/splitstore.go | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 67588fe9c..8489e5d8b 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -518,10 +518,10 @@ func (s *SplitStore) compact(curTs *types.TipSet) { } func (s *SplitStore) estimateMarkSetSize(curTs *types.TipSet) { - s.markSetSize = 0 + var count int64 err := s.chain.WalkSnapshot(context.Background(), curTs, 1, s.skipOldMsgs, s.skipMsgReceipts, func(cid cid.Cid) error { - s.markSetSize++ + count++ return nil }) @@ -529,6 +529,8 @@ func (s *SplitStore) estimateMarkSetSize(curTs *types.TipSet) { // TODO do something better here panic(err) } + + s.markSetSize = count + count>>2 } func (s *SplitStore) compactSimple(curTs *types.TipSet) { @@ -566,7 +568,7 @@ func (s *SplitStore) compactSimple(curTs *types.TipSet) { } if count > s.markSetSize { - s.markSetSize = count + s.markSetSize = count + count>>2 } log.Infow("marking done", "took", time.Since(startMark)) @@ -790,7 +792,7 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) { } if count > s.markSetSize { - s.markSetSize = count + s.markSetSize = count + count>>2 } // Phase 1b: mark all reachable CIDs in the cold range @@ -813,7 +815,7 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) { } if count > s.markSetSize { - s.markSetSize = count + s.markSetSize = count + count>>2 } log.Infow("marking done", "took", time.Since(startMark)) From 47d8c874862e7cc6cc123a1db0ddcc9bb31c2e32 Mon Sep 17 00:00:00 2001 From: vyzo Date: Wed, 3 Mar 2021 09:47:04 +0200 Subject: [PATCH 130/148] fix log --- blockstore/splitstore/splitstore.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 8489e5d8b..e342b28a7 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -546,7 +546,7 @@ func (s *SplitStore) compactSimple(curTs *types.TipSet) { defer coldSet.Close() //nolint:errcheck // 1. mark reachable cold objects by looking at the objects reachable only from the cold epoch - log.Infow("marking reachable cold objects", "cold_epoch", coldEpoch) + log.Infow("marking reachable cold objects", "coldEpoch", coldEpoch) startMark := time.Now() coldTs, err := s.chain.GetTipsetByHeight(context.Background(), coldEpoch, curTs, true) From 508fcb9d266cb60b8d174ca1311a3db01d12cdcf Mon Sep 17 00:00:00 2001 From: vyzo Date: Wed, 3 Mar 2021 10:56:41 +0200 Subject: [PATCH 131/148] properly close snoop at shutdown --- blockstore/splitstore/splitstore.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index e342b28a7..92e765261 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -8,6 +8,7 @@ import ( "sync/atomic" "time" + "go.uber.org/multierr" "golang.org/x/xerrors" blocks "github.com/ipfs/go-block-format" @@ -372,7 +373,7 @@ func (s *SplitStore) Close() error { } } - return s.env.Close() + return multierr.Combine(s.tracker.Close(), s.env.Close()) } func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error { From fdd877534f6b55f2f84c3302ab4a2b72549014a9 Mon Sep 17 00:00:00 2001 From: vyzo Date: Wed, 3 Mar 2021 11:15:26 +0200 Subject: [PATCH 132/148] walk at boundary epoch, 2 finalities from current epoch, to find live objects objects written after that are retained anyway. --- blockstore/splitstore/splitstore.go | 35 ++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 92e765261..787c8cd46 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -29,10 +29,10 @@ var ( // // |················· CompactionThreshold ··················| // | | - // =======‖≡≡≡≡≡≡≡‖-------------------------------------------------» - // | | chain --> ↑__ current epoch - // |·······| - // ↑________ CompactionCold. + // =======‖≡≡≡≡≡≡≡‖-----------------------|------------------------» + // | | | chain --> ↑__ current epoch + // |·······| | + // ↑________ CompactionCold ↑________ CompactionBoundary // // === :: cold (already archived) // ≡≡≡ :: to be archived in this compaction @@ -43,6 +43,10 @@ var ( // cold store on compaction. See diagram on CompactionThreshold for a // better sense. CompactionCold = build.Finality + + // CompactionBoundary is the number of epochs from the current epoch at which + // we will walk the chain for live objects + CompactionBoundary = 2 * build.Finality ) var ( @@ -536,8 +540,10 @@ func (s *SplitStore) estimateMarkSetSize(curTs *types.TipSet) { func (s *SplitStore) compactSimple(curTs *types.TipSet) { coldEpoch := s.baseEpoch + CompactionCold + currentEpoch := curTs.Height() + boundaryEpoch := currentEpoch - CompactionBoundary - log.Infow("running simple compaction", "currentEpoch", curTs.Height(), "baseEpoch", s.baseEpoch, "coldEpoch", coldEpoch) + log.Infow("running simple compaction", "currentEpoch", currentEpoch, "baseEpoch", s.baseEpoch, "coldEpoch", coldEpoch, "boundaryEpoch", boundaryEpoch) coldSet, err := s.env.Create("cold", s.markSetSize) if err != nil { @@ -547,17 +553,17 @@ func (s *SplitStore) compactSimple(curTs *types.TipSet) { defer coldSet.Close() //nolint:errcheck // 1. mark reachable cold objects by looking at the objects reachable only from the cold epoch - log.Infow("marking reachable cold objects", "coldEpoch", coldEpoch) + log.Infow("marking reachable cold objects", "boundaryEpoch", boundaryEpoch) startMark := time.Now() - coldTs, err := s.chain.GetTipsetByHeight(context.Background(), coldEpoch, curTs, true) + boundaryTs, err := s.chain.GetTipsetByHeight(context.Background(), boundaryEpoch, curTs, true) if err != nil { // TODO do something better here panic(err) } var count int64 - err = s.chain.WalkSnapshot(context.Background(), coldTs, 1, s.skipOldMsgs, s.skipMsgReceipts, + err = s.chain.WalkSnapshot(context.Background(), boundaryTs, 1, s.skipOldMsgs, s.skipMsgReceipts, func(cid cid.Cid) error { count++ return coldSet.Mark(cid) @@ -754,10 +760,11 @@ func (s *SplitStore) purgeTracking(cids []cid.Cid) error { } func (s *SplitStore) compactFull(curTs *types.TipSet) { - epoch := curTs.Height() + currentEpoch := curTs.Height() coldEpoch := s.baseEpoch + CompactionCold + boundaryEpoch := currentEpoch - CompactionBoundary - log.Infow("running full compaction", "currentEpoch", curTs.Height(), "baseEpoch", s.baseEpoch, "coldEpoch", coldEpoch) + log.Infow("running full compaction", "currentEpoch", currentEpoch, "baseEpoch", s.baseEpoch, "coldEpoch", coldEpoch, "boundaryEpoch", boundaryEpoch) // create two mark sets, one for marking the cold finality region // and one for marking the hot region @@ -780,8 +787,14 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) { startMark := time.Now() // Phase 1a: mark all reachable CIDs in the hot range + boundaryTs, err := s.chain.GetTipsetByHeight(context.Background(), boundaryEpoch, curTs, true) + if err != nil { + // TODO do something better here + panic(err) + } + count := int64(0) - err = s.chain.WalkSnapshot(context.Background(), curTs, epoch-coldEpoch, s.skipOldMsgs, s.skipMsgReceipts, + err = s.chain.WalkSnapshot(context.Background(), boundaryTs, boundaryEpoch-coldEpoch, s.skipOldMsgs, s.skipMsgReceipts, func(cid cid.Cid) error { count++ return hotSet.Mark(cid) From 98a7b884fee37e395711361439ea15d02b5217fa Mon Sep 17 00:00:00 2001 From: vyzo Date: Wed, 3 Mar 2021 12:26:10 +0200 Subject: [PATCH 133/148] implement DeleteMany in union blockstore --- blockstore/union.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/blockstore/union.go b/blockstore/union.go index dfe5ea70c..a99ba2591 100644 --- a/blockstore/union.go +++ b/blockstore/union.go @@ -82,6 +82,15 @@ func (m unionBlockstore) DeleteBlock(cid cid.Cid) (err error) { return err } +func (m unionBlockstore) DeleteMany(cids []cid.Cid) (err error) { + for _, bs := range m { + if err = bs.DeleteMany(cids); err != nil { + break + } + } + return err +} + func (m unionBlockstore) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) { // this does not deduplicate; this interface needs to be revisited. outCh := make(chan cid.Cid) From 5fb6a907cb6361fb52d5d0829c963952a12d15c4 Mon Sep 17 00:00:00 2001 From: vyzo Date: Wed, 3 Mar 2021 19:36:13 +0200 Subject: [PATCH 134/148] fix loop condition in batch deletion --- blockstore/splitstore/splitstore.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 787c8cd46..0fe3d0341 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -718,7 +718,7 @@ func (s *SplitStore) purgeBlocks(cids []cid.Cid) error { // don't delete one giant batch of 7M objects, but rather do smaller batches done := false - for i := 0; done; i++ { + for i := 0; !done; i++ { start := i * batchSize end := start + batchSize if end >= len(cids) { @@ -742,7 +742,7 @@ func (s *SplitStore) purgeTracking(cids []cid.Cid) error { // don't delete one giant batch of 7M objects, but rather do smaller batches done := false - for i := 0; done; i++ { + for i := 0; !done; i++ { start := i * batchSize end := start + batchSize if end >= len(cids) { From aff0f1ed4cd4f938898a3f34d908be60a69daa28 Mon Sep 17 00:00:00 2001 From: vyzo Date: Wed, 3 Mar 2021 20:04:29 +0200 Subject: [PATCH 135/148] deduplicate code for batch deletion --- blockstore/splitstore/splitstore.go | 32 ++++++++--------------------- 1 file changed, 8 insertions(+), 24 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 0fe3d0341..919cb0c10 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -711,7 +711,7 @@ func (s *SplitStore) moveColdBlocks(cold []cid.Cid) error { return nil } -func (s *SplitStore) purgeBlocks(cids []cid.Cid) error { +func (s *SplitStore) purgeBatch(cids []cid.Cid, deleteBatch func([]cid.Cid) error) error { if len(cids) == 0 { return nil } @@ -726,37 +726,21 @@ func (s *SplitStore) purgeBlocks(cids []cid.Cid) error { done = true } - err := s.hot.DeleteMany(cids[start:end]) + err := deleteBatch(cids[start:end]) if err != nil { - return xerrors.Errorf("error deleting batch from hotstore: %e", err) + return xerrors.Errorf("error deleting batch: %w", err) } } return nil } +func (s *SplitStore) purgeBlocks(cids []cid.Cid) error { + return s.purgeBatch(cids, s.hot.DeleteMany) +} + func (s *SplitStore) purgeTracking(cids []cid.Cid) error { - if len(cids) == 0 { - return nil - } - - // don't delete one giant batch of 7M objects, but rather do smaller batches - done := false - for i := 0; !done; i++ { - start := i * batchSize - end := start + batchSize - if end >= len(cids) { - end = len(cids) - done = true - } - - err := s.tracker.DeleteBatch(cids[start:end]) - if err != nil { - return xerrors.Errorf("error deleting batch from tracker: %w", err) - } - } - - return nil + return s.purgeBatch(cids, s.tracker.DeleteBatch) } func (s *SplitStore) compactFull(curTs *types.TipSet) { From 17be7d3919597d0e5692989543aac74ebd77e27e Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 5 Mar 2021 10:00:17 +0200 Subject: [PATCH 136/148] save markSetSize --- blockstore/splitstore/splitstore.go | 71 +++++++++++++++++++++++------ 1 file changed, 57 insertions(+), 14 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 919cb0c10..79c43b2d8 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -59,6 +59,10 @@ var ( // all active blocks into the hotstore. warmupEpochKey = dstore.NewKey("/splitstore/warmupEpoch") + // markSetSizeKey stores the current estimate for the mark set size. + // this is first computed at warmup and updated in every compaction + markSetSizeKey = dstore.NewKey("/splitstore/markSetSize") + log = logging.Logger("splitstore") ) @@ -356,11 +360,22 @@ func (s *SplitStore) Start(chain ChainAccessor) error { s.warmupEpoch = bytesToEpoch(bs) case dstore.ErrNotFound: - default: return xerrors.Errorf("error loading warmup epoch: %w", err) } + // load markSetSize from metadata ds + // if none, the splitstore will compute it during warmup and update in every compaction + bs, err = s.ds.Get(markSetSizeKey) + switch err { + case nil: + s.markSetSize = bytesToInt64(bs) + + case dstore.ErrNotFound: + default: + return xerrors.Errorf("error loading mark set size: %w", err) + } + log.Infow("starting splitstore", "baseEpoch", s.baseEpoch, "warmupEpoch", s.warmupEpoch) // watch the chain @@ -493,7 +508,7 @@ func (s *SplitStore) warmup(curTs *types.TipSet) { } if count > s.markSetSize { - s.markSetSize = count + s.markSetSize = count + count>>2 // overestimate a bit } // save the warmup epoch @@ -502,6 +517,11 @@ func (s *SplitStore) warmup(curTs *types.TipSet) { if err != nil { log.Errorf("error saving warmup epoch: %s", err) } + + err = s.ds.Put(markSetSizeKey, int64ToBytes(s.markSetSize)) + if err != nil { + log.Errorf("error saving mark set size: %s", err) + } } // Compaction/GC Algorithm @@ -535,7 +555,7 @@ func (s *SplitStore) estimateMarkSetSize(curTs *types.TipSet) { panic(err) } - s.markSetSize = count + count>>2 + s.markSetSize = count + count>>2 // overestimate a bit } func (s *SplitStore) compactSimple(curTs *types.TipSet) { @@ -575,7 +595,7 @@ func (s *SplitStore) compactSimple(curTs *types.TipSet) { } if count > s.markSetSize { - s.markSetSize = count + count>>2 + s.markSetSize = count + count>>2 // overestimate a bit } log.Infow("marking done", "took", time.Since(startMark)) @@ -669,6 +689,11 @@ func (s *SplitStore) compactSimple(curTs *types.TipSet) { // TODO do something better here panic(err) } + + err = s.ds.Put(markSetSizeKey, int64ToBytes(s.markSetSize)) + if err != nil { + log.Errorf("error saving mark set size: %s", err) + } } func (s *SplitStore) moveColdBlocks(cold []cid.Cid) error { @@ -790,7 +815,7 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) { } if count > s.markSetSize { - s.markSetSize = count + count>>2 + s.markSetSize = count + count>>2 // overestimate a bit } // Phase 1b: mark all reachable CIDs in the cold range @@ -813,7 +838,7 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) { } if count > s.markSetSize { - s.markSetSize = count + count>>2 + s.markSetSize = count + count>>2 // overestimate a bit } log.Infow("marking done", "took", time.Since(startMark)) @@ -958,6 +983,11 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) { // TODO do something better here panic(err) } + + err = s.ds.Put(markSetSizeKey, int64ToBytes(s.markSetSize)) + if err != nil { + log.Errorf("error saving mark set size: %s", err) + } } func (s *SplitStore) setBaseEpoch(epoch abi.ChainEpoch) error { @@ -967,15 +997,28 @@ func (s *SplitStore) setBaseEpoch(epoch abi.ChainEpoch) error { } func epochToBytes(epoch abi.ChainEpoch) []byte { - buf := make([]byte, 16) - n := binary.PutUvarint(buf, uint64(epoch)) - return buf[:n] + return uint64ToBytes(uint64(epoch)) } func bytesToEpoch(buf []byte) abi.ChainEpoch { - epoch, n := binary.Uvarint(buf) - if n < 0 { - panic("bogus base epoch bytes") - } - return abi.ChainEpoch(epoch) + return abi.ChainEpoch(bytesToUint64(buf)) +} + +func int64ToBytes(i int64) []byte { + return uint64ToBytes(uint64(i)) +} + +func bytesToInt64(buf []byte) int64 { + return int64(bytesToUint64(buf)) +} + +func uint64ToBytes(i uint64) []byte { + buf := make([]byte, 16) + n := binary.PutUvarint(buf, i) + return buf[:n] +} + +func bytesToUint64(buf []byte) uint64 { + i, _ := binary.Uvarint(buf) + return i } From 9bd009d7957603083d9c9081d24c6600f9f988f4 Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 5 Mar 2021 10:11:54 +0200 Subject: [PATCH 137/148] use atomics to demarkate critical section and limit close delay --- blockstore/splitstore/splitstore.go | 32 +++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 79c43b2d8..7477fb20d 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -111,7 +111,9 @@ type ChainAccessor interface { } type SplitStore struct { - compacting int32 + compacting int32 // compaction (or warmp up) in progress + critsection int32 // compaction critical section + closing int32 // the split store is closing fullCompaction bool enableGC bool @@ -385,9 +387,11 @@ func (s *SplitStore) Start(chain ChainAccessor) error { } func (s *SplitStore) Close() error { - if atomic.LoadInt32(&s.compacting) == 1 { - log.Warn("ongoing compaction; waiting for it to finish...") - for atomic.LoadInt32(&s.compacting) == 1 { + atomic.StoreInt32(&s.closing, 1) + + if atomic.LoadInt32(&s.critsection) == 1 { + log.Warn("ongoing compaction in critical section; waiting for it to finish...") + for atomic.LoadInt32(&s.critsection) == 1 { time.Sleep(time.Second) } } @@ -647,6 +651,16 @@ func (s *SplitStore) compactSimple(curTs *types.TipSet) { log.Infow("collection done", "took", time.Since(startCollect)) log.Infow("compaction stats", "hot", hotCnt, "cold", coldCnt) + // Enter critical section + atomic.StoreInt32(&s.critsection, 1) + defer atomic.StoreInt32(&s.critsection, 0) + + // check to see if we are closing first; if that's the case just return + if atomic.LoadInt32(&s.closing) == 1 { + log.Info("splitstore is closing; aborting compaction") + return + } + // 2.2 copy the cold objects to the coldstore log.Info("moving cold objects to the coldstore") startMove := time.Now() @@ -921,6 +935,16 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) { log.Infow("collection done", "took", time.Since(startCollect)) log.Infow("compaction stats", "hot", hotCnt, "cold", coldCnt, "dead", deadCnt) + // Enter critical section + atomic.StoreInt32(&s.critsection, 1) + defer atomic.StoreInt32(&s.critsection, 0) + + // check to see if we are closing first; if that's the case just return + if atomic.LoadInt32(&s.closing) == 1 { + log.Info("splitstore is closing; aborting compaction") + return + } + // 2.2 copy the cold objects to the coldstore log.Info("moving cold objects to the coldstore") startMove := time.Now() From c58df3f079eee28a3b77a1a83afc118307d53487 Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 5 Mar 2021 10:29:49 +0200 Subject: [PATCH 138/148] don't panic on compaction errors --- blockstore/splitstore/splitstore.go | 112 +++++++++++++--------------- 1 file changed, 51 insertions(+), 61 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 7477fb20d..25b906750 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -530,23 +530,32 @@ func (s *SplitStore) warmup(curTs *types.TipSet) { // Compaction/GC Algorithm func (s *SplitStore) compact(curTs *types.TipSet) { + var err error if s.markSetSize == 0 { start := time.Now() log.Info("estimating mark set size") - s.estimateMarkSetSize(curTs) + err = s.estimateMarkSetSize(curTs) + if err != nil { + log.Errorf("error estimating mark set size: %s; aborting compaction", err) + return + } log.Infow("estimating mark set size done", "took", time.Since(start), "size", s.markSetSize) } else { log.Infow("current mark set size estimate", "size", s.markSetSize) } if s.fullCompaction { - s.compactFull(curTs) + err = s.compactFull(curTs) } else { - s.compactSimple(curTs) + err = s.compactSimple(curTs) + } + + if err != nil { + log.Errorf("COMPACTION ERROR: %s", err) } } -func (s *SplitStore) estimateMarkSetSize(curTs *types.TipSet) { +func (s *SplitStore) estimateMarkSetSize(curTs *types.TipSet) error { var count int64 err := s.chain.WalkSnapshot(context.Background(), curTs, 1, s.skipOldMsgs, s.skipMsgReceipts, func(cid cid.Cid) error { @@ -555,14 +564,14 @@ func (s *SplitStore) estimateMarkSetSize(curTs *types.TipSet) { }) if err != nil { - // TODO do something better here - panic(err) + return err } s.markSetSize = count + count>>2 // overestimate a bit + return nil } -func (s *SplitStore) compactSimple(curTs *types.TipSet) { +func (s *SplitStore) compactSimple(curTs *types.TipSet) error { coldEpoch := s.baseEpoch + CompactionCold currentEpoch := curTs.Height() boundaryEpoch := currentEpoch - CompactionBoundary @@ -571,19 +580,17 @@ func (s *SplitStore) compactSimple(curTs *types.TipSet) { coldSet, err := s.env.Create("cold", s.markSetSize) if err != nil { - // TODO do something better here - panic(err) + return xerrors.Errorf("error creating mark set: %w", err) } defer coldSet.Close() //nolint:errcheck // 1. mark reachable cold objects by looking at the objects reachable only from the cold epoch - log.Infow("marking reachable cold objects", "boundaryEpoch", boundaryEpoch) + log.Infow("marking reachable cold blocks", "boundaryEpoch", boundaryEpoch) startMark := time.Now() boundaryTs, err := s.chain.GetTipsetByHeight(context.Background(), boundaryEpoch, curTs, true) if err != nil { - // TODO do something better here - panic(err) + return xerrors.Errorf("error getting tipset at boundary epoch: %w", err) } var count int64 @@ -594,8 +601,7 @@ func (s *SplitStore) compactSimple(curTs *types.TipSet) { }) if err != nil { - // TODO do something better here - panic(err) + return xerrors.Errorf("error marking cold blocks: %w", err) } if count > s.markSetSize { @@ -640,8 +646,7 @@ func (s *SplitStore) compactSimple(curTs *types.TipSet) { }) if err != nil { - // TODO do something better here - panic(err) + return xerrors.Errorf("error collecting cold objects: %w", err) } if coldCnt > 0 { @@ -658,16 +663,15 @@ func (s *SplitStore) compactSimple(curTs *types.TipSet) { // check to see if we are closing first; if that's the case just return if atomic.LoadInt32(&s.closing) == 1 { log.Info("splitstore is closing; aborting compaction") - return + return xerrors.Errorf("compaction aborted") } // 2.2 copy the cold objects to the coldstore - log.Info("moving cold objects to the coldstore") + log.Info("moving cold blocks to the coldstore") startMove := time.Now() err = s.moveColdBlocks(cold) if err != nil { - // TODO do something better here - panic(err) + return xerrors.Errorf("error moving cold blocks: %w", err) } log.Infow("moving done", "took", time.Since(startMove)) @@ -676,8 +680,7 @@ func (s *SplitStore) compactSimple(curTs *types.TipSet) { startPurge := time.Now() err = s.purgeBlocks(cold) if err != nil { - // TODO do something better here - panic(err) + return xerrors.Errorf("error purging cold blocks: %w", err) } log.Infow("purging cold from hotstore done", "took", time.Since(startPurge)) @@ -686,28 +689,27 @@ func (s *SplitStore) compactSimple(curTs *types.TipSet) { log.Info("purging cold objects from tracker") err = s.purgeTracking(cold) if err != nil { - // TODO do something better here - panic(err) + return xerrors.Errorf("error purging tracking for cold blocks: %w", err) } log.Infow("purging cold from tracker done", "took", time.Since(startPurge)) // we are done; do some housekeeping err = s.tracker.Sync() if err != nil { - // TODO do something better here - panic(err) + return xerrors.Errorf("error syncing tracker: %w", err) } err = s.setBaseEpoch(coldEpoch) if err != nil { - // TODO do something better here - panic(err) + return xerrors.Errorf("error saving base epoch: %w", err) } err = s.ds.Put(markSetSizeKey, int64ToBytes(s.markSetSize)) if err != nil { - log.Errorf("error saving mark set size: %s", err) + return xerrors.Errorf("error saving mark set size: %w", err) } + + return nil } func (s *SplitStore) moveColdBlocks(cold []cid.Cid) error { @@ -782,7 +784,7 @@ func (s *SplitStore) purgeTracking(cids []cid.Cid) error { return s.purgeBatch(cids, s.tracker.DeleteBatch) } -func (s *SplitStore) compactFull(curTs *types.TipSet) { +func (s *SplitStore) compactFull(curTs *types.TipSet) error { currentEpoch := curTs.Height() coldEpoch := s.baseEpoch + CompactionCold boundaryEpoch := currentEpoch - CompactionBoundary @@ -793,27 +795,24 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) { // and one for marking the hot region hotSet, err := s.env.Create("hot", s.markSetSize) if err != nil { - // TODO do something better here - panic(err) + return xerrors.Errorf("error creating hot mark set: %w", err) } defer hotSet.Close() //nolint:errcheck coldSet, err := s.env.Create("cold", s.markSetSize) if err != nil { - // TODO do something better here - panic(err) + return xerrors.Errorf("error creating cold mark set: %w", err) } defer coldSet.Close() //nolint:errcheck // Phase 1: marking - log.Info("marking live objects") + log.Info("marking live blocks") startMark := time.Now() // Phase 1a: mark all reachable CIDs in the hot range boundaryTs, err := s.chain.GetTipsetByHeight(context.Background(), boundaryEpoch, curTs, true) if err != nil { - // TODO do something better here - panic(err) + return xerrors.Errorf("error getting tipset at boundary epoch: %w", err) } count := int64(0) @@ -824,8 +823,7 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) { }) if err != nil { - // TODO do something better here - panic(err) + return xerrors.Errorf("error marking hot blocks: %w", err) } if count > s.markSetSize { @@ -835,8 +833,7 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) { // Phase 1b: mark all reachable CIDs in the cold range coldTs, err := s.chain.GetTipsetByHeight(context.Background(), coldEpoch, curTs, true) if err != nil { - // TODO do something better here - panic(err) + return xerrors.Errorf("error getting tipset at cold epoch: %w", err) } count = 0 @@ -847,8 +844,7 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) { }) if err != nil { - // TODO do something better here - panic(err) + return xerrors.Errorf("error marking cold blocks: %w", err) } if count > s.markSetSize { @@ -921,8 +917,7 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) { }) if err != nil { - // TODO do something better here - panic(err) + return xerrors.Errorf("error collecting cold objects: %w", err) } if coldCnt > 0 { @@ -942,7 +937,7 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) { // check to see if we are closing first; if that's the case just return if atomic.LoadInt32(&s.closing) == 1 { log.Info("splitstore is closing; aborting compaction") - return + return xerrors.Errorf("compaction aborted") } // 2.2 copy the cold objects to the coldstore @@ -950,8 +945,7 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) { startMove := time.Now() err = s.moveColdBlocks(cold) if err != nil { - // TODO do something better here - panic(err) + return xerrors.Errorf("error moving cold blocks: %w", err) } log.Infow("moving done", "took", time.Since(startMove)) @@ -960,8 +954,7 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) { startPurge := time.Now() err = s.purgeBlocks(cold) if err != nil { - // TODO do something better here - panic(err) + return xerrors.Errorf("error purging cold blocks: %w", err) } log.Infow("purging cold from hotstore done", "took", time.Since(startPurge)) @@ -970,8 +963,7 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) { log.Info("purging cold objects from tracker") err = s.purgeTracking(cold) if err != nil { - // TODO do something better here - panic(err) + return xerrors.Errorf("error purging tracking for cold blocks: %w", err) } log.Infow("purging cold from tracker done", "took", time.Since(startPurge)) @@ -980,8 +972,7 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) { log.Info("deleting dead objects") err = s.purgeBlocks(dead) if err != nil { - // TODO do something better here - panic(err) + return xerrors.Errorf("error purging dead blocks: %w", err) } // remove the tracker tracking @@ -989,8 +980,7 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) { log.Info("purging dead objects from tracker") err = s.purgeTracking(dead) if err != nil { - // TODO do something better here - panic(err) + return xerrors.Errorf("error purging tracking for dead blocks: %w", err) } log.Infow("purging dead from tracker done", "took", time.Since(startPurge)) } @@ -998,20 +988,20 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) { // we are done; do some housekeeping err = s.tracker.Sync() if err != nil { - // TODO do something better here - panic(err) + return xerrors.Errorf("error syncing tracker: %w", err) } err = s.setBaseEpoch(coldEpoch) if err != nil { - // TODO do something better here - panic(err) + return xerrors.Errorf("error saving base epoch: %w", err) } err = s.ds.Put(markSetSizeKey, int64ToBytes(s.markSetSize)) if err != nil { - log.Errorf("error saving mark set size: %s", err) + return xerrors.Errorf("error saving mark set size: %w", err) } + + return nil } func (s *SplitStore) setBaseEpoch(epoch abi.ChainEpoch) error { From 99d21573dad619c1619706fcf28c330168ca1c1a Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 5 Mar 2021 10:54:13 +0200 Subject: [PATCH 139/148] remove DEBUG log spam --- blockstore/splitstore/splitstore.go | 5 ----- 1 file changed, 5 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 25b906750..ac2711fa4 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -73,11 +73,6 @@ const ( defaultDeadPurgeSize = 1_000_000 ) -func init() { - // TODO temporary for debugging purposes; to be removed for merge. - _ = logging.SetLogLevel("splitstore", "DEBUG") -} - type Config struct { // TrackingStore is the type of tracking store to use. // From 2b32c2e597a01412eb9a56503373590be7b10e48 Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 5 Mar 2021 11:54:06 +0200 Subject: [PATCH 140/148] add some metrics --- blockstore/splitstore/splitstore.go | 23 +++++++++++++++++-- metrics/metrics.go | 34 +++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 2 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index ac2711fa4..6a18e898e 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -21,6 +21,9 @@ import ( bstore "github.com/filecoin-project/lotus/blockstore" "github.com/filecoin-project/lotus/build" "github.com/filecoin-project/lotus/chain/types" + "github.com/filecoin-project/lotus/metrics" + + "go.opencensus.io/stats" ) var ( @@ -206,7 +209,11 @@ func (s *SplitStore) Get(cid cid.Cid) (blocks.Block, error) { return blk, nil case bstore.ErrNotFound: - return s.cold.Get(cid) + blk, err = s.cold.Get(cid) + if err != nil { + stats.Record(context.Background(), metrics.SplitstoreMiss.M(1)) + } + return blk, err default: return nil, err @@ -221,7 +228,11 @@ func (s *SplitStore) GetSize(cid cid.Cid) (int, error) { return size, nil case bstore.ErrNotFound: - return s.cold.GetSize(cid) + size, err = s.cold.GetSize(cid) + if err != nil { + stats.Record(context.Background(), metrics.SplitstoreMiss.M(1)) + } + return size, err default: return 0, err @@ -539,11 +550,14 @@ func (s *SplitStore) compact(curTs *types.TipSet) { log.Infow("current mark set size estimate", "size", s.markSetSize) } + start := time.Now() if s.fullCompaction { err = s.compactFull(curTs) } else { err = s.compactSimple(curTs) } + took := time.Since(start).Milliseconds() + stats.Record(context.Background(), metrics.SplitstoreCompactionTimeSeconds.M(float64(took)/1e3)) if err != nil { log.Errorf("COMPACTION ERROR: %s", err) @@ -650,6 +664,8 @@ func (s *SplitStore) compactSimple(curTs *types.TipSet) error { log.Infow("collection done", "took", time.Since(startCollect)) log.Infow("compaction stats", "hot", hotCnt, "cold", coldCnt) + stats.Record(context.Background(), metrics.SplitstoreCompactionHot.M(int64(hotCnt))) + stats.Record(context.Background(), metrics.SplitstoreCompactionCold.M(int64(coldCnt))) // Enter critical section atomic.StoreInt32(&s.critsection, 1) @@ -924,6 +940,9 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) error { log.Infow("collection done", "took", time.Since(startCollect)) log.Infow("compaction stats", "hot", hotCnt, "cold", coldCnt, "dead", deadCnt) + stats.Record(context.Background(), metrics.SplitstoreCompactionHot.M(int64(hotCnt))) + stats.Record(context.Background(), metrics.SplitstoreCompactionCold.M(int64(coldCnt))) + stats.Record(context.Background(), metrics.SplitstoreCompactionDead.M(int64(deadCnt))) // Enter critical section atomic.StoreInt32(&s.critsection, 1) diff --git a/metrics/metrics.go b/metrics/metrics.go index 45869ea91..5428a81bc 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -82,6 +82,13 @@ var ( WorkerCallsReturnedCount = stats.Int64("sealing/worker_calls_returned_count", "Counter of returned worker tasks", stats.UnitDimensionless) WorkerCallsReturnedDuration = stats.Float64("sealing/worker_calls_returned_ms", "Counter of returned worker tasks", stats.UnitMilliseconds) WorkerUntrackedCallsReturned = stats.Int64("sealing/worker_untracked_calls_returned", "Counter of returned untracked worker tasks", stats.UnitDimensionless) + + // splitstore + SplitstoreMiss = stats.Int64("splitstore/miss", "Number of misses in hotstre access", stats.UnitDimensionless) + SplitstoreCompactionTimeSeconds = stats.Float64("splitstore/compaction_time", "Compaction time in seconds", stats.UnitSeconds) + SplitstoreCompactionHot = stats.Int64("splitstore/hot", "Number of hot blocks in last compaction", stats.UnitDimensionless) + SplitstoreCompactionCold = stats.Int64("splitstore/cold", "Number of cold blocks in last compaction", stats.UnitDimensionless) + SplitstoreCompactionDead = stats.Int64("splitstore/dead", "Number of dead blocks in last compaction", stats.UnitDimensionless) ) var ( @@ -222,6 +229,28 @@ var ( Aggregation: workMillisecondsDistribution, TagKeys: []tag.Key{TaskType, WorkerHostname}, } + + // splitstore + SplitstoreMissView = &view.View{ + Measure: SplitstoreMiss, + Aggregation: view.Count(), + } + SplitstoreCompactionTimeSecondsView = &view.View{ + Measure: SplitstoreCompactionTimeSeconds, + Aggregation: view.LastValue(), + } + SplitstoreCompactionHotView = &view.View{ + Measure: SplitstoreCompactionHot, + Aggregation: view.LastValue(), + } + SplitstoreCompactionColdView = &view.View{ + Measure: SplitstoreCompactionCold, + Aggregation: view.Sum(), + } + SplitstoreCompactionDeadView = &view.View{ + Measure: SplitstoreCompactionDead, + Aggregation: view.Sum(), + } ) // DefaultViews is an array of OpenCensus views for metric gathering purposes @@ -258,6 +287,11 @@ var ChainNodeViews = append([]*view.View{ PubsubDropRPCView, VMFlushCopyCountView, VMFlushCopyDurationView, + SplitstoreMissView, + SplitstoreCompactionTimeSecondsView, + SplitstoreCompactionHotView, + SplitstoreCompactionColdView, + SplitstoreCompactionDeadView, }, DefaultViews...) var MinerNodeViews = append([]*view.View{ From 0a2f2cf00df7b812c66603535e1410a22766af7b Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 5 Mar 2021 12:04:40 +0200 Subject: [PATCH 141/148] use the right condition for triggering the miss metric --- blockstore/splitstore/splitstore.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 6a18e898e..559c26264 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -210,7 +210,7 @@ func (s *SplitStore) Get(cid cid.Cid) (blocks.Block, error) { case bstore.ErrNotFound: blk, err = s.cold.Get(cid) - if err != nil { + if err == nil { stats.Record(context.Background(), metrics.SplitstoreMiss.M(1)) } return blk, err @@ -229,7 +229,7 @@ func (s *SplitStore) GetSize(cid cid.Cid) (int, error) { case bstore.ErrNotFound: size, err = s.cold.GetSize(cid) - if err != nil { + if err == nil { stats.Record(context.Background(), metrics.SplitstoreMiss.M(1)) } return size, err From 09f5ba177a936636bd94013d4cfef249829f2056 Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 5 Mar 2021 19:55:32 +0200 Subject: [PATCH 142/148] add splitstore unit test --- blockstore/splitstore/splitstore.go | 2 +- blockstore/splitstore/splitstore_test.go | 248 +++++++++++++++++++++++ blockstore/splitstore/tracking.go | 75 +++++++ 3 files changed, 324 insertions(+), 1 deletion(-) create mode 100644 blockstore/splitstore/splitstore_test.go diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 559c26264..5ed64b54e 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -79,7 +79,7 @@ const ( type Config struct { // TrackingStore is the type of tracking store to use. // - // Supported values are: "bolt" (default if omitted). + // Supported values are: "bolt" (default if omitted), "mem" (for tests and readonly access). TrackingStoreType string // MarkSetType is the type of mark set to use. diff --git a/blockstore/splitstore/splitstore_test.go b/blockstore/splitstore/splitstore_test.go new file mode 100644 index 000000000..69d308503 --- /dev/null +++ b/blockstore/splitstore/splitstore_test.go @@ -0,0 +1,248 @@ +package splitstore + +import ( + "context" + "fmt" + "sync" + "testing" + "time" + + "github.com/filecoin-project/go-state-types/abi" + "github.com/filecoin-project/lotus/blockstore" + "github.com/filecoin-project/lotus/chain/types" + "github.com/filecoin-project/lotus/chain/types/mock" + + cid "github.com/ipfs/go-cid" + datastore "github.com/ipfs/go-datastore" + logging "github.com/ipfs/go-log/v2" +) + +func init() { + CompactionThreshold = 5 + CompactionCold = 1 + CompactionBoundary = 2 + logging.SetLogLevel("splitstore", "DEBUG") +} + +func testSplitStore(t *testing.T, cfg *Config) { + t.Helper() + + chain := &mockChain{} + // genesis + genBlock := mock.MkBlock(nil, 0, 0) + genTs := mock.TipSet(genBlock) + chain.push(genTs) + + // the myriads of stores + ds := datastore.NewMapDatastore() + hot := blockstore.NewMemorySync() + cold := blockstore.NewMemorySync() + + // put the genesis block to cold store + blk, err := genBlock.ToStorageBlock() + if err != nil { + t.Fatal(err) + } + + err = cold.Put(blk) + if err != nil { + t.Fatal(err) + } + + // open the splitstore + ss, err := Open("", ds, hot, cold, cfg) + if err != nil { + t.Fatal(err) + } + defer ss.Close() + + err = ss.Start(chain) + if err != nil { + t.Fatal(err) + } + + // make some tipsets, but not enough to cause compaction + mkBlock := func(curTs *types.TipSet, i int) *types.TipSet { + blk := mock.MkBlock(curTs, uint64(i), uint64(i)) + sblk, err := blk.ToStorageBlock() + if err != nil { + t.Fatal(err) + } + err = ss.Put(sblk) + if err != nil { + t.Fatal(err) + } + ts := mock.TipSet(blk) + chain.push(ts) + + return ts + } + + mkGarbageBlock := func(curTs *types.TipSet, i int) { + blk := mock.MkBlock(curTs, uint64(i), uint64(i)) + sblk, err := blk.ToStorageBlock() + if err != nil { + t.Fatal(err) + } + err = ss.Put(sblk) + if err != nil { + t.Fatal(err) + } + } + + curTs := genTs + for i := 1; i < 5; i++ { + curTs = mkBlock(curTs, i) + } + + mkGarbageBlock(genTs, 1) + + // count objects in the cold and hot stores + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + countBlocks := func(bs blockstore.Blockstore) int { + count := 0 + ch, err := bs.AllKeysChan(ctx) + if err != nil { + t.Fatal(err) + } + for _ = range ch { + count++ + } + return count + } + + coldCnt := countBlocks(cold) + hotCnt := countBlocks(hot) + + if coldCnt != 1 { + t.Fatalf("expected %d blocks, but got %d", 1, coldCnt) + } + + if hotCnt != 4 { + t.Fatalf("expected %d blocks, but got %d", 4, hotCnt) + } + + // trigger a compaction + for i := 5; i < 10; i++ { + curTs = mkBlock(curTs, i) + time.Sleep(time.Second) + } + + coldCnt = countBlocks(cold) + hotCnt = countBlocks(hot) + + if !cfg.EnableFullCompaction { + if coldCnt != 5 { + t.Fatalf("expected %d cold blocks, but got %d", 5, coldCnt) + } + + if hotCnt != 5 { + t.Fatalf("expected %d hot blocks, but got %d", 5, hotCnt) + } + } + + if cfg.EnableFullCompaction && !cfg.EnableGC { + if coldCnt != 3 { + t.Fatalf("expected %d cold blocks, but got %d", 3, coldCnt) + } + + if hotCnt != 7 { + t.Fatalf("expected %d hot blocks, but got %d", 7, hotCnt) + } + } + + if cfg.EnableFullCompaction && cfg.EnableGC { + if coldCnt != 2 { + t.Fatalf("expected %d cold blocks, but got %d", 2, coldCnt) + } + + if hotCnt != 7 { + t.Fatalf("expected %d hot blocks, but got %d", 7, hotCnt) + } + } +} + +func TestSplitStoreSimpleCompaction(t *testing.T) { + testSplitStore(t, &Config{TrackingStoreType: "mem"}) +} + +func TestSplitStoreFullCompactionWithoutGC(t *testing.T) { + testSplitStore(t, &Config{ + TrackingStoreType: "mem", + EnableFullCompaction: true, + }) +} + +func TestSplitStoreFullCompactionWithGC(t *testing.T) { + testSplitStore(t, &Config{ + TrackingStoreType: "mem", + EnableFullCompaction: true, + EnableGC: true, + }) +} + +type mockChain struct { + sync.Mutex + tipsets []*types.TipSet + listener func(revert []*types.TipSet, apply []*types.TipSet) error +} + +func (c *mockChain) push(ts *types.TipSet) { + c.Lock() + c.tipsets = append(c.tipsets, ts) + c.Unlock() + + if c.listener != nil { + err := c.listener(nil, []*types.TipSet{ts}) + if err != nil { + log.Errorf("mockchain: error dispatching listener: %s", err) + } + } +} + +func (c *mockChain) GetTipsetByHeight(_ context.Context, epoch abi.ChainEpoch, _ *types.TipSet, _ bool) (*types.TipSet, error) { + c.Lock() + defer c.Unlock() + + iEpoch := int(epoch) + if iEpoch > len(c.tipsets) { + return nil, fmt.Errorf("bad epoch %d", epoch) + } + + return c.tipsets[iEpoch-1], nil +} + +func (c *mockChain) GetHeaviestTipSet() *types.TipSet { + c.Lock() + defer c.Unlock() + + return c.tipsets[len(c.tipsets)-1] +} + +func (c *mockChain) SubscribeHeadChanges(change func(revert []*types.TipSet, apply []*types.TipSet) error) { + c.listener = change +} + +func (c *mockChain) WalkSnapshot(_ context.Context, ts *types.TipSet, epochs abi.ChainEpoch, _ bool, _ bool, f func(cid.Cid) error) error { + c.Lock() + defer c.Unlock() + + start := int(ts.Height()) - 1 + end := start - int(epochs) + if end < 0 { + end = -1 + } + for i := start; i > end; i-- { + ts := c.tipsets[i] + for _, cid := range ts.Cids() { + err := f(cid) + if err != nil { + return err + } + } + } + + return nil +} diff --git a/blockstore/splitstore/tracking.go b/blockstore/splitstore/tracking.go index 1772a4305..d57fd45ef 100644 --- a/blockstore/splitstore/tracking.go +++ b/blockstore/splitstore/tracking.go @@ -2,6 +2,7 @@ package splitstore import ( "path/filepath" + "sync" "golang.org/x/xerrors" @@ -28,7 +29,81 @@ func OpenTrackingStore(path string, ttype string) (TrackingStore, error) { switch ttype { case "", "bolt": return OpenBoltTrackingStore(filepath.Join(path, "tracker.bolt")) + case "mem": + return NewMemTrackingStore(), nil default: return nil, xerrors.Errorf("unknown tracking store type %s", ttype) } } + +// NewMemTrackingStore creates an in-memory tracking store. +// This is only useful for test or situations where you don't want to open the +// real tracking store (eg concurrent read only access on a node's datastore) +func NewMemTrackingStore() *MemTrackingStore { + return &MemTrackingStore{tab: make(map[cid.Cid]abi.ChainEpoch)} +} + +// MemTrackingStore is a simple in-memory tracking store +type MemTrackingStore struct { + sync.Mutex + tab map[cid.Cid]abi.ChainEpoch +} + +var _ TrackingStore = (*MemTrackingStore)(nil) + +func (s *MemTrackingStore) Put(cid cid.Cid, epoch abi.ChainEpoch) error { + s.Lock() + defer s.Unlock() + s.tab[cid] = epoch + return nil +} + +func (s *MemTrackingStore) PutBatch(cids []cid.Cid, epoch abi.ChainEpoch) error { + s.Lock() + defer s.Unlock() + for _, cid := range cids { + s.tab[cid] = epoch + } + return nil +} + +func (s *MemTrackingStore) Get(cid cid.Cid) (abi.ChainEpoch, error) { + s.Lock() + defer s.Unlock() + epoch, ok := s.tab[cid] + if ok { + return epoch, nil + } + return 0, xerrors.Errorf("missing tracking epoch for %s", cid) +} + +func (s *MemTrackingStore) Delete(cid cid.Cid) error { + s.Lock() + defer s.Unlock() + delete(s.tab, cid) + return nil +} + +func (s *MemTrackingStore) DeleteBatch(cids []cid.Cid) error { + s.Lock() + defer s.Unlock() + for _, cid := range cids { + delete(s.tab, cid) + } + return nil +} + +func (s *MemTrackingStore) ForEach(f func(cid.Cid, abi.ChainEpoch) error) error { + s.Lock() + defer s.Unlock() + for cid, epoch := range s.tab { + err := f(cid, epoch) + if err != nil { + return err + } + } + return nil +} + +func (s *MemTrackingStore) Sync() error { return nil } +func (s *MemTrackingStore) Close() error { return nil } From e85391b46c7e29b27cc9be0c0d66f29977a85b6f Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 5 Mar 2021 20:05:32 +0200 Subject: [PATCH 143/148] quiet stupid linter --- blockstore/splitstore/splitstore_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/blockstore/splitstore/splitstore_test.go b/blockstore/splitstore/splitstore_test.go index 69d308503..db5144039 100644 --- a/blockstore/splitstore/splitstore_test.go +++ b/blockstore/splitstore/splitstore_test.go @@ -54,7 +54,7 @@ func testSplitStore(t *testing.T, cfg *Config) { if err != nil { t.Fatal(err) } - defer ss.Close() + defer ss.Close() //nolint err = ss.Start(chain) if err != nil { @@ -107,7 +107,7 @@ func testSplitStore(t *testing.T, cfg *Config) { if err != nil { t.Fatal(err) } - for _ = range ch { + for range ch { count++ } return count From 8562a9bb8208b70fcdf151852d1e0dde5e1f99e8 Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 8 Mar 2021 18:12:09 +0200 Subject: [PATCH 144/148] garbage collect hotstore after compaction --- blockstore/badger/blockstore.go | 9 +++++++++ blockstore/splitstore/splitstore.go | 11 +++++++++++ 2 files changed, 20 insertions(+) diff --git a/blockstore/badger/blockstore.go b/blockstore/badger/blockstore.go index cd740e650..7c249880c 100644 --- a/blockstore/badger/blockstore.go +++ b/blockstore/badger/blockstore.go @@ -131,6 +131,15 @@ func (b *Blockstore) Close() error { return b.DB.Close() } +// GC runs garbage collection on the value log +func (b *Blockstore) GC() error { + if atomic.LoadInt64(&b.state) != stateOpen { + return ErrBlockstoreClosed + } + + return b.DB.RunValueLogGC(0.125) +} + // View implements blockstore.Viewer, which leverages zero-copy read-only // access to values. func (b *Blockstore) View(cid cid.Cid, fn func([]byte) error) error { diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 5ed64b54e..c69e26cc2 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -710,6 +710,17 @@ func (s *SplitStore) compactSimple(curTs *types.TipSet) error { return xerrors.Errorf("error syncing tracker: %w", err) } + if gc, ok := s.hot.(interface{ GC() error }); ok { + log.Infof("garbage collecting hotstore") + startGC := time.Now() + err = gc.GC() + if err != nil { + log.Warnf("error garbage collecting hotstore: %s", err) + } else { + log.Infow("garbage collecting done", "took", time.Since(startGC)) + } + } + err = s.setBaseEpoch(coldEpoch) if err != nil { return xerrors.Errorf("error saving base epoch: %w", err) From 52de95d3443841d0b51594544cf744317042c139 Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 8 Mar 2021 18:30:09 +0200 Subject: [PATCH 145/148] also gc in compactFull, not just compactSimple --- blockstore/splitstore/splitstore.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index c69e26cc2..6551b6d51 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -1016,6 +1016,17 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) error { return xerrors.Errorf("error syncing tracker: %w", err) } + if gc, ok := s.hot.(interface{ GC() error }); ok { + log.Infof("garbage collecting hotstore") + startGC := time.Now() + err = gc.GC() + if err != nil { + log.Warnf("error garbage collecting hotstore: %s", err) + } else { + log.Infow("garbage collecting done", "took", time.Since(startGC)) + } + } + err = s.setBaseEpoch(coldEpoch) if err != nil { return xerrors.Errorf("error saving base epoch: %w", err) From 3d1b855f20149c4e5ff6d7493d0a1c45e49706ef Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 8 Mar 2021 19:22:53 +0200 Subject: [PATCH 146/148] rename GC to CollectGarbage, ignore badger.ErrNoRewrite --- blockstore/badger/blockstore.go | 12 +++++++++--- blockstore/splitstore/splitstore.go | 12 ++++++------ 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/blockstore/badger/blockstore.go b/blockstore/badger/blockstore.go index 7c249880c..16f9331f2 100644 --- a/blockstore/badger/blockstore.go +++ b/blockstore/badger/blockstore.go @@ -131,13 +131,19 @@ func (b *Blockstore) Close() error { return b.DB.Close() } -// GC runs garbage collection on the value log -func (b *Blockstore) GC() error { +// CollectGarbage runs garbage collection on the value log +func (b *Blockstore) CollectGarbage() error { if atomic.LoadInt64(&b.state) != stateOpen { return ErrBlockstoreClosed } - return b.DB.RunValueLogGC(0.125) + err := b.DB.RunValueLogGC(0.125) + if err == badger.ErrNoRewrite { + // not really an error in this case + return nil + } + + return err } // View implements blockstore.Viewer, which leverages zero-copy read-only diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 6551b6d51..4e20dd7d2 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -710,14 +710,14 @@ func (s *SplitStore) compactSimple(curTs *types.TipSet) error { return xerrors.Errorf("error syncing tracker: %w", err) } - if gc, ok := s.hot.(interface{ GC() error }); ok { + if gc, ok := s.hot.(interface{ CollectGarbage() error }); ok { log.Infof("garbage collecting hotstore") startGC := time.Now() - err = gc.GC() + err = gc.CollectGarbage() if err != nil { log.Warnf("error garbage collecting hotstore: %s", err) } else { - log.Infow("garbage collecting done", "took", time.Since(startGC)) + log.Infow("garbage collection done", "took", time.Since(startGC)) } } @@ -1016,14 +1016,14 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) error { return xerrors.Errorf("error syncing tracker: %w", err) } - if gc, ok := s.hot.(interface{ GC() error }); ok { + if gc, ok := s.hot.(interface{ CollectGarbage() error }); ok { log.Infof("garbage collecting hotstore") startGC := time.Now() - err = gc.GC() + err = gc.CollectGarbage() if err != nil { log.Warnf("error garbage collecting hotstore: %s", err) } else { - log.Infow("garbage collecting done", "took", time.Since(startGC)) + log.Infow("garbage collection done", "took", time.Since(startGC)) } } From 3bd77701d8acdb3f0eb304107452bcecbb71db50 Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 8 Mar 2021 19:46:21 +0200 Subject: [PATCH 147/148] deduplicate code --- blockstore/splitstore/splitstore.go | 35 +++++++++++++---------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 4e20dd7d2..fb3e28803 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -710,16 +710,7 @@ func (s *SplitStore) compactSimple(curTs *types.TipSet) error { return xerrors.Errorf("error syncing tracker: %w", err) } - if gc, ok := s.hot.(interface{ CollectGarbage() error }); ok { - log.Infof("garbage collecting hotstore") - startGC := time.Now() - err = gc.CollectGarbage() - if err != nil { - log.Warnf("error garbage collecting hotstore: %s", err) - } else { - log.Infow("garbage collection done", "took", time.Since(startGC)) - } - } + s.gcHotstore() err = s.setBaseEpoch(coldEpoch) if err != nil { @@ -806,6 +797,19 @@ func (s *SplitStore) purgeTracking(cids []cid.Cid) error { return s.purgeBatch(cids, s.tracker.DeleteBatch) } +func (s *SplitStore) gcHotstore() { + if gc, ok := s.hot.(interface{ CollectGarbage() error }); ok { + log.Infof("garbage collecting hotstore") + startGC := time.Now() + err := gc.CollectGarbage() + if err != nil { + log.Warnf("error garbage collecting hotstore: %s", err) + } else { + log.Infow("garbage collection done", "took", time.Since(startGC)) + } + } +} + func (s *SplitStore) compactFull(curTs *types.TipSet) error { currentEpoch := curTs.Height() coldEpoch := s.baseEpoch + CompactionCold @@ -1016,16 +1020,7 @@ func (s *SplitStore) compactFull(curTs *types.TipSet) error { return xerrors.Errorf("error syncing tracker: %w", err) } - if gc, ok := s.hot.(interface{ CollectGarbage() error }); ok { - log.Infof("garbage collecting hotstore") - startGC := time.Now() - err = gc.CollectGarbage() - if err != nil { - log.Warnf("error garbage collecting hotstore: %s", err) - } else { - log.Infow("garbage collection done", "took", time.Since(startGC)) - } - } + s.gcHotstore() err = s.setBaseEpoch(coldEpoch) if err != nil { From 90741da019bba15b97952b3952ca5326aa2914e2 Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 8 Mar 2021 21:46:44 +0200 Subject: [PATCH 148/148] tune badger gc to repeated gc the value log until there is no rewrite --- blockstore/badger/blockstore.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/blockstore/badger/blockstore.go b/blockstore/badger/blockstore.go index 16f9331f2..2c00f4240 100644 --- a/blockstore/badger/blockstore.go +++ b/blockstore/badger/blockstore.go @@ -137,7 +137,11 @@ func (b *Blockstore) CollectGarbage() error { return ErrBlockstoreClosed } - err := b.DB.RunValueLogGC(0.125) + var err error + for err == nil { + err = b.DB.RunValueLogGC(0.125) + } + if err == badger.ErrNoRewrite { // not really an error in this case return nil