Merge pull request #6905 from filecoin-project/fix/moving-gc-followup

Moving GC Followup
This commit is contained in:
vyzo 2021-07-29 08:54:57 +03:00 committed by GitHub
commit 2fd12daf9f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 74 additions and 30 deletions

View File

@ -258,16 +258,16 @@ func (b *Blockstore) movingGC() error {
b.moveCond.Broadcast() b.moveCond.Broadcast()
b.moveMx.Unlock() b.moveMx.Unlock()
var path string var newPath string
defer func() { defer func() {
b.lockMove() b.lockMove()
db2 := b.dbNext dbNext := b.dbNext
b.dbNext = nil b.dbNext = nil
var state bsMoveState var state bsMoveState
if db2 != nil { if dbNext != nil {
state = moveStateCleanup state = moveStateCleanup
} else { } else {
state = moveStateNone state = moveStateNone
@ -275,12 +275,13 @@ func (b *Blockstore) movingGC() error {
b.unlockMove(state) b.unlockMove(state)
if db2 != nil { if dbNext != nil {
err := db2.Close() // the move failed and we have a left-over db; delete it.
err := dbNext.Close()
if err != nil { if err != nil {
log.Warnf("error closing badger db: %s", err) log.Warnf("error closing badger db: %s", err)
} }
b.deleteDB(path) b.deleteDB(newPath)
b.lockMove() b.lockMove()
b.unlockMove(moveStateNone) b.unlockMove(moveStateNone)
@ -296,68 +297,96 @@ func (b *Blockstore) movingGC() error {
} }
if basePath == linkPath { if basePath == linkPath {
path = basePath newPath = basePath
} else { } else {
// we do this dance to create a name adjacent to the current one, while avoiding clown
// shoes with multiple moves (i.e. we can't just take the basename of the linkPath, as it
// could have been created in a previous move and have the timestamp suffix, which would then
// perpetuate itself.
name := filepath.Base(basePath) name := filepath.Base(basePath)
dir := filepath.Dir(linkPath) dir := filepath.Dir(linkPath)
path = filepath.Join(dir, name) newPath = filepath.Join(dir, name)
} }
path = fmt.Sprintf("%s.%d", path, time.Now().UnixNano()) newPath = fmt.Sprintf("%s.%d", newPath, time.Now().UnixNano())
log.Infof("moving blockstore from %s to %s", b.opts.Dir, path) log.Infof("moving blockstore from %s to %s", b.opts.Dir, newPath)
opts := b.opts opts := b.opts
opts.Dir = path opts.Dir = newPath
opts.ValueDir = path opts.ValueDir = newPath
db2, err := badger.Open(opts.Options) dbNew, err := badger.Open(opts.Options)
if err != nil { if err != nil {
return fmt.Errorf("failed to open badger blockstore in %s: %w", path, err) return fmt.Errorf("failed to open badger blockstore in %s: %w", newPath, err)
} }
b.lockMove() b.lockMove()
b.dbNext = db2 b.dbNext = dbNew
b.unlockMove(moveStateMoving) b.unlockMove(moveStateMoving)
log.Info("copying blockstore") log.Info("copying blockstore")
err = b.doCopy(b.db, b.dbNext) err = b.doCopy(b.db, b.dbNext)
if err != nil { if err != nil {
return fmt.Errorf("error moving badger blockstore to %s: %w", path, err) return fmt.Errorf("error moving badger blockstore to %s: %w", newPath, err)
} }
b.lockMove() b.lockMove()
db1 := b.db dbOld := b.db
b.db = b.dbNext b.db = b.dbNext
b.dbNext = nil b.dbNext = nil
b.unlockMove(moveStateCleanup) b.unlockMove(moveStateCleanup)
err = db1.Close() err = dbOld.Close()
if err != nil { if err != nil {
log.Warnf("error closing old badger db: %s", err) log.Warnf("error closing old badger db: %s", err)
} }
dbpath := b.opts.Dir // this is the canonical db path; this is where our db lives.
oldpath := fmt.Sprintf("%s.old.%d", dbpath, time.Now().Unix()) dbPath := b.opts.Dir
if err = os.Rename(dbpath, oldpath); err != nil { // we first move the existing db out of the way, and only delete it after we have symlinked the
// new db to the canonical path
backupPath := fmt.Sprintf("%s.old.%d", dbPath, time.Now().Unix())
if err = os.Rename(dbPath, backupPath); err != nil {
// this is not catastrophic in the sense that we have not lost any data. // this is not catastrophic in the sense that we have not lost any data.
// but it is pretty bad, as the db path points to the old db, while we are now using to the new // but it is pretty bad, as the db path points to the old db, while we are now using to the new
// db; we can't continue and leave a ticking bomb for the next restart. // db; we can't continue and leave a ticking bomb for the next restart.
// so a panic is appropriate and user can fix. // so a panic is appropriate and user can fix.
panic(fmt.Errorf("error renaming old badger db dir from %s to %s: %w; USER ACTION REQUIRED", dbpath, oldpath, err)) //nolint panic(fmt.Errorf("error renaming old badger db dir from %s to %s: %w; USER ACTION REQUIRED", dbPath, backupPath, err)) //nolint
} }
if err = os.Symlink(path, dbpath); err != nil { if err = symlink(newPath, dbPath); err != nil {
// same here; the db path is pointing to the void. panic and let the user fix. // same here; the db path is pointing to the void. panic and let the user fix.
panic(fmt.Errorf("error symlinking new badger db dir from %s to %s: %w; USER ACTION REQUIRED", path, dbpath, err)) //nolint panic(fmt.Errorf("error symlinking new badger db dir from %s to %s: %w; USER ACTION REQUIRED", newPath, dbPath, err)) //nolint
} }
b.deleteDB(oldpath) // the delete follows symlinks
b.deleteDB(backupPath)
log.Info("moving blockstore done") log.Info("moving blockstore done")
return nil return nil
} }
// symlink creates a symlink from path to linkTo; the link is relative if the two are
// in the same directory
func symlink(path, linkTo string) error {
resolvedPathDir, err := filepath.EvalSymlinks(filepath.Dir(path))
if err != nil {
return fmt.Errorf("error resolving links in %s: %w", path, err)
}
resolvedLinkDir, err := filepath.EvalSymlinks(filepath.Dir(linkTo))
if err != nil {
return fmt.Errorf("error resolving links in %s: %w", linkTo, err)
}
if resolvedPathDir == resolvedLinkDir {
path = filepath.Base(path)
}
return os.Symlink(path, linkTo)
}
// doCopy copies a badger blockstore to another, with an optional filter; if the filter // doCopy copies a badger blockstore to another, with an optional filter; if the filter
// is not nil, then only cids that satisfy the filter will be copied. // is not nil, then only cids that satisfy the filter will be copied.
func (b *Blockstore) doCopy(from, to *badger.DB) error { func (b *Blockstore) doCopy(from, to *badger.DB) error {
@ -390,19 +419,19 @@ func (b *Blockstore) doCopy(from, to *badger.DB) error {
func (b *Blockstore) deleteDB(path string) { func (b *Blockstore) deleteDB(path string) {
// follow symbolic links, otherwise the data wil be left behind // follow symbolic links, otherwise the data wil be left behind
lpath, err := filepath.EvalSymlinks(path) linkPath, err := filepath.EvalSymlinks(path)
if err != nil { if err != nil {
log.Warnf("error resolving symlinks in %s", path) log.Warnf("error resolving symlinks in %s", path)
return return
} }
log.Infof("removing data directory %s", lpath) log.Infof("removing data directory %s", linkPath)
if err := os.RemoveAll(lpath); err != nil { if err := os.RemoveAll(linkPath); err != nil {
log.Warnf("error deleting db at %s: %s", lpath, err) log.Warnf("error deleting db at %s: %s", linkPath, err)
return return
} }
if path != lpath { if path != linkPath {
log.Infof("removing link %s", path) log.Infof("removing link %s", path)
if err := os.Remove(path); err != nil { if err := os.Remove(path); err != nil {
log.Warnf("error removing symbolic link %s", err) log.Warnf("error removing symbolic link %s", err)

View File

@ -245,6 +245,21 @@ func testMove(t *testing.T, optsF func(string) Options) {
checkBlocks() checkBlocks()
checkPath() checkPath()
// reopen the db to make sure our relative link works:
err = db.Close()
if err != nil {
t.Fatal(err)
}
db, err = Open(optsF(dbPath))
if err != nil {
t.Fatal(err)
}
// db.Close() is already deferred
checkBlocks()
} }
func TestMoveNoPrefix(t *testing.T) { func TestMoveNoPrefix(t *testing.T) {