From d8d85373f56e24b75cf751b81d32da870ff31a5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Kripalani?= Date: Sun, 1 Nov 2020 13:50:41 +0000 Subject: [PATCH] fix lotus-shed datastore commands. --- cmd/lotus-shed/datastore.go | 31 ++++++++++++++------- node/repo/blockstore_opts.go | 52 ++++++++++++++++++++++++++++++++++++ node/repo/fsrepo.go | 42 +++++------------------------ 3 files changed, 79 insertions(+), 46 deletions(-) create mode 100644 node/repo/blockstore_opts.go diff --git a/cmd/lotus-shed/datastore.go b/cmd/lotus-shed/datastore.go index 83422e77b..8cdc1630c 100644 --- a/cmd/lotus-shed/datastore.go +++ b/cmd/lotus-shed/datastore.go @@ -8,10 +8,10 @@ import ( "os" "strings" + "github.com/dgraph-io/badger/v2" "github.com/docker/go-units" "github.com/ipfs/go-datastore" dsq "github.com/ipfs/go-datastore/query" - badgerds "github.com/ipfs/go-ds-badger2" logging "github.com/ipfs/go-log" "github.com/mitchellh/go-homedir" "github.com/polydawn/refmt/cbor" @@ -312,30 +312,41 @@ var datastoreRewriteCmd = &cli.Command{ return xerrors.Errorf("cannot get toPath: %w", err) } - opts := repo.ChainBadgerOptions() - opts.Options = opts.Options.WithSyncWrites(false) - to, err := badgerds.NewDatastore(toPath, &opts) + var ( + from *badger.DB + to *badger.DB + ) + + // open the destination (to) store. + opts, err := repo.BadgerBlockstoreOptions(repo.BlockstoreChain, toPath, false) if err != nil { - return xerrors.Errorf("opennig 'to' datastore: %w", err) + return xerrors.Errorf("failed to get badger options: %w", err) + } + opts.SyncWrites = false + if to, err = badger.Open(opts.Options); err != nil { + return xerrors.Errorf("opening 'to' badger store: %w", err) } - opts.Options = opts.Options.WithReadOnly(false) - from, err := badgerds.NewDatastore(fromPath, &opts) + // open the source (from) store. + opts, err = repo.BadgerBlockstoreOptions(repo.BlockstoreChain, fromPath, true) if err != nil { - return xerrors.Errorf("opennig 'from' datastore: %w", err) + return xerrors.Errorf("failed to get badger options: %w", err) + } + if from, err = badger.Open(opts.Options); err != nil { + return xerrors.Errorf("opening 'from' datastore: %w", err) } pr, pw := io.Pipe() errCh := make(chan error) go func() { bw := bufio.NewWriterSize(pw, 64<<20) - _, err := from.DB.Backup(bw, 0) + _, err := from.Backup(bw, 0) _ = bw.Flush() _ = pw.CloseWithError(err) errCh <- err }() go func() { - err := to.DB.Load(pr, 256) + err := to.Load(pr, 256) errCh <- err }() diff --git a/node/repo/blockstore_opts.go b/node/repo/blockstore_opts.go new file mode 100644 index 000000000..3744d7647 --- /dev/null +++ b/node/repo/blockstore_opts.go @@ -0,0 +1,52 @@ +package repo + +import badgerbs "github.com/filecoin-project/lotus/lib/blockstore/badger" + +// BadgerBlockstoreOptions returns the badger options to apply for the provided +// domain. +func BadgerBlockstoreOptions(domain BlockstoreDomain, path string, readonly bool) (badgerbs.Options, error) { + if domain != BlockstoreChain { + return badgerbs.Options{}, ErrInvalidBlockstoreDomain + } + + opts := badgerbs.DefaultOptions(path) + + // Due to legacy usage of blockstore.Blockstore, over a datastore, all + // blocks are prefixed with this namespace. In the future, this can go away, + // in order to shorten keys, but it'll require a migration. + opts.Prefix = "/blocks/" + + // Blockstore values are immutable; therefore we do not expect any + // conflicts to emerge. + opts.DetectConflicts = false + + // This is to optimize the database on close so it can be opened + // read-only and efficiently queried. We don't do that and hanging on + // stop isn't nice. + opts.CompactL0OnClose = false + + // The alternative is "crash on start and tell the user to fix it". This + // will truncate corrupt and unsynced data, which we don't guarantee to + // persist anyways. + opts.Truncate = true + + // We mmap the index into memory, and access values from disk. + // Ideally the table loading mode would be settable by LSM level. + opts.ValueLogLoadingMode = badgerbs.FileIO + opts.TableLoadingMode = badgerbs.MemoryMap + + // Embed only values < 128 bytes in the LSM; larger values in value logs. + opts.ValueThreshold = 128 + + // Reduce this from 64MiB to 16MiB. That means badger will hold on to + // 20MiB by default instead of 80MiB. This does not appear to have a + // significant performance hit. + opts.MaxTableSize = 16 << 20 + + // NOTE: The chain blockstore doesn't require any GC (blocks are never + // deleted). This will change if we move to a tiered blockstore. + + opts.ReadOnly = readonly + + return opts, nil +} diff --git a/node/repo/fsrepo.go b/node/repo/fsrepo.go index 2883e57d6..39495d613 100644 --- a/node/repo/fsrepo.go +++ b/node/repo/fsrepo.go @@ -286,49 +286,19 @@ func (fsr *fsLockedRepo) Close() error { return err } +// Blockstore returns a blockstore for the provided data domain. func (fsr *fsLockedRepo) Blockstore(domain BlockstoreDomain) (blockstore.Blockstore, error) { if domain != BlockstoreChain { return nil, ErrInvalidBlockstoreDomain } path := fsr.join(filepath.Join(fsDatastore, "chain")) - opts := badgerbs.DefaultOptions(path) - - // Due to legacy usage of blockstore.Blockstore, over a datastore, all - // blocks are prefixed with this namespace. In the future, this can go away, - // in order to shorten keys, but it'll require a migration. - opts.Prefix = "/blocks/" - - // Blockstore values are immutable; therefore we do not expect any - // conflicts to emerge. - opts.DetectConflicts = false - - // This is to optimize the database on close so it can be opened - // read-only and efficiently queried. We don't do that and hanging on - // stop isn't nice. - opts.CompactL0OnClose = false - - // The alternative is "crash on start and tell the user to fix it". This - // will truncate corrupt and unsynced data, which we don't guarantee to - // persist anyways. - opts.Truncate = true - - // We mmap the index into memory, and access values from disk. - // Ideally the table loading mode would be settable by LSM level. - opts.ValueLogLoadingMode = badgerbs.FileIO - opts.TableLoadingMode = badgerbs.MemoryMap - - // Embed only values < 128 bytes in the LSM; larger values in value logs. - opts.ValueThreshold = 128 - - // Reduce this from 64MiB to 16MiB. That means badger will hold on to - // 20MiB by default instead of 80MiB. This does not appear to have a - // significant performance hit. - opts.MaxTableSize = 16 << 20 - - // NOTE: The chain blockstore doesn't require any GC (blocks are never - // deleted). This will change if we move to a tiered blockstore. + readonly := fsr.readonly + opts, err := BadgerBlockstoreOptions(domain, path, readonly) + if err != nil { + return nil, err + } return badgerbs.Open(opts) }