From 0b53b29078d55fb69ae347d6944bf6521e00bdb4 Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Mon, 16 Jan 2023 03:57:27 -0500 Subject: [PATCH] core/rawdb: fix cornercase shutdown behaviour in freezer (#26485) This PR does a few things. It fixes a shutdown-order flaw in the chainfreezer. Previously, the chain-freezer would shutdown the freezer backend first, and then signal for the loop to exit. This can lead to a scenario where the freezer tries to fsync closed files, which is an error-conditon that could lead to exit via log.Crit. It also makes the printout more detailed when truncating 'dangling' items, by showing the exact number instead of approximate MB. This PR also adds calls to fsync files before closing them, and also makes the `db inspect` command slightly more robust. --- cmd/geth/dbcmd.go | 12 ++------ core/rawdb/chain_freezer.go | 3 +- core/rawdb/freezer_table.go | 57 ++++++++++++++++++++++++------------- core/rawdb/freezer_test.go | 22 ++++++++++++++ 4 files changed, 63 insertions(+), 31 deletions(-) diff --git a/cmd/geth/dbcmd.go b/cmd/geth/dbcmd.go index 5231ed116..4deb081ed 100644 --- a/cmd/geth/dbcmd.go +++ b/cmd/geth/dbcmd.go @@ -551,16 +551,8 @@ func freezerInspect(ctx *cli.Context) error { return err } stack, _ := makeConfigNode(ctx) - defer stack.Close() - - db := utils.MakeChainDatabase(ctx, stack, true) - defer db.Close() - - ancient, err := db.AncientDatadir() - if err != nil { - log.Info("Failed to retrieve ancient root", "err", err) - return err - } + ancient := stack.ResolveAncient("chaindata", ctx.String(utils.AncientFlag.Name)) + stack.Close() return rawdb.InspectFreezerTable(ancient, freezer, table, start, end) } diff --git a/core/rawdb/chain_freezer.go b/core/rawdb/chain_freezer.go index 4348fb5d7..738295cfb 100644 --- a/core/rawdb/chain_freezer.go +++ b/core/rawdb/chain_freezer.go @@ -70,14 +70,13 @@ func newChainFreezer(datadir string, namespace string, readonly bool, maxTableSi // Close closes the chain freezer instance and terminates the background thread. func (f *chainFreezer) Close() error { - err := f.Freezer.Close() select { case <-f.quit: default: close(f.quit) } f.wg.Wait() - return err + return f.Freezer.Close() } // freeze is a background thread that periodically checks the blockchain for any diff --git a/core/rawdb/freezer_table.go b/core/rawdb/freezer_table.go index 394bd5761..b111797d5 100644 --- a/core/rawdb/freezer_table.go +++ b/core/rawdb/freezer_table.go @@ -229,6 +229,7 @@ func (t *freezerTable) repair() error { lastIndex indexEntry contentSize int64 contentExp int64 + verbose bool ) // Read index zero, determine what file is the earliest // and what item offset to use @@ -272,9 +273,10 @@ func (t *freezerTable) repair() error { // Keep truncating both files until they come in sync contentExp = int64(lastIndex.offset) for contentExp != contentSize { + verbose = true // Truncate the head file to the last offset pointer if contentExp < contentSize { - t.logger.Warn("Truncating dangling head", "indexed", common.StorageSize(contentExp), "stored", common.StorageSize(contentSize)) + t.logger.Warn("Truncating dangling head", "indexed", contentExp, "stored", contentSize) if err := truncateFreezerFile(t.head, contentExp); err != nil { return err } @@ -282,7 +284,7 @@ func (t *freezerTable) repair() error { } // Truncate the index to point within the head file if contentExp > contentSize { - t.logger.Warn("Truncating dangling indexes", "indexed", common.StorageSize(contentExp), "stored", common.StorageSize(contentSize)) + t.logger.Warn("Truncating dangling indexes", "indexes", offsetsSize/indexEntrySize, "indexed", contentExp, "stored", contentSize) if err := truncateFreezerFile(t.index, offsetsSize-indexEntrySize); err != nil { return err } @@ -343,7 +345,11 @@ func (t *freezerTable) repair() error { if err := t.preopen(); err != nil { return err } - t.logger.Debug("Chain freezer table opened", "items", t.items, "size", common.StorageSize(t.headBytes)) + if verbose { + t.logger.Info("Chain freezer table opened", "items", t.items, "size", t.headBytes) + } else { + t.logger.Debug("Chain freezer table opened", "items", t.items, "size", common.StorageSize(t.headBytes)) + } return nil } @@ -553,21 +559,31 @@ func (t *freezerTable) Close() error { defer t.lock.Unlock() var errs []error - if err := t.index.Close(); err != nil { - errs = append(errs, err) - } - t.index = nil - - if err := t.meta.Close(); err != nil { - errs = append(errs, err) - } - t.meta = nil - - for _, f := range t.files { - if err := f.Close(); err != nil { - errs = append(errs, err) + doClose := func(f *os.File, sync bool, close bool) { + if sync && !t.readonly { + if err := f.Sync(); err != nil { + errs = append(errs, err) + } + } + if close { + if err := f.Close(); err != nil { + errs = append(errs, err) + } } } + // Trying to fsync a file opened in rdonly causes "Access denied" + // error on Windows. + doClose(t.index, true, true) + doClose(t.meta, true, true) + // The preopened non-head data-files are all opened in readonly. + // The head is opened in rw-mode, so we sync it here - but since it's also + // part of t.files, it will be closed in the loop below. + doClose(t.head, true, false) // sync but do not close + for _, f := range t.files { + doClose(f, false, true) // close but do not sync + } + t.index = nil + t.meta = nil t.head = nil if errs != nil { @@ -724,7 +740,7 @@ func (t *freezerTable) retrieveItems(start, count, maxBytes uint64) ([]byte, []i defer t.lock.RUnlock() // Ensure the table and the item are accessible - if t.index == nil || t.head == nil { + if t.index == nil || t.head == nil || t.meta == nil { return nil, nil, errClosed } var ( @@ -872,7 +888,9 @@ func (t *freezerTable) advanceHead() error { func (t *freezerTable) Sync() error { t.lock.Lock() defer t.lock.Unlock() - + if t.index == nil || t.head == nil || t.meta == nil { + return errClosed + } var err error trackError := func(e error) { if e != nil && err == nil { @@ -903,7 +921,8 @@ func (t *freezerTable) dumpIndex(w io.Writer, start, stop int64) { fmt.Fprintf(w, "Failed to decode freezer table %v\n", err) return } - fmt.Fprintf(w, "Version %d deleted %d, hidden %d\n", meta.Version, atomic.LoadUint64(&t.itemOffset), atomic.LoadUint64(&t.itemHidden)) + fmt.Fprintf(w, "Version %d count %d, deleted %d, hidden %d\n", meta.Version, + atomic.LoadUint64(&t.items), atomic.LoadUint64(&t.itemOffset), atomic.LoadUint64(&t.itemHidden)) buf := make([]byte, indexEntrySize) diff --git a/core/rawdb/freezer_test.go b/core/rawdb/freezer_test.go index 8deb04a79..5896e43ce 100644 --- a/core/rawdb/freezer_test.go +++ b/core/rawdb/freezer_test.go @@ -407,3 +407,25 @@ func TestRenameWindows(t *testing.T) { t.Errorf("unexpected file contents. Got %v\n", buf) } } + +func TestFreezerCloseSync(t *testing.T) { + t.Parallel() + f, _ := newFreezerForTesting(t, map[string]bool{"a": true, "b": true}) + defer f.Close() + + // Now, close and sync. This mimics the behaviour if the node is shut down, + // just as the chain freezer is writing. + // 1: thread-1: chain treezer writes, via freezeRange (holds lock) + // 2: thread-2: Close called, waits for write to finish + // 3: thread-1: finishes writing, releases lock + // 4: thread-2: obtains lock, completes Close() + // 5: thread-1: calls f.Sync() + if err := f.Close(); err != nil { + t.Fatal(err) + } + if err := f.Sync(); err == nil { + t.Fatalf("want error, have nil") + } else if have, want := err.Error(), "[closed closed]"; have != want { + t.Fatalf("want %v, have %v", have, want) + } +}