core/rawdb, cmd, ethdb, eth: implement freezer tail deletion (#23954)

* core/rawdb, cmd, ethdb, eth: implement freezer tail deletion

* core/rawdb: address comments from martin and sina

* core/rawdb: fixes cornercase in tail deletion

* core/rawdb: separate metadata into a standalone file

* core/rawdb: remove unused code

* core/rawdb: add random test

* core/rawdb: polish code

* core/rawdb: fsync meta file before manipulating the index

* core/rawdb: fix typo

* core/rawdb: address comments
This commit is contained in:
rjl493456442 2022-03-10 16:37:23 +08:00 committed by GitHub
parent 8c8a9e5ca1
commit 538a868384
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 1102 additions and 132 deletions

View File

@ -592,7 +592,7 @@ func (bc *BlockChain) setHeadBeyondRoot(head uint64, root common.Hash, repair bo
if num+1 <= frozen { if num+1 <= frozen {
// Truncate all relative data(header, total difficulty, body, receipt // Truncate all relative data(header, total difficulty, body, receipt
// and canonical hash) from ancient store. // and canonical hash) from ancient store.
if err := bc.db.TruncateAncients(num); err != nil { if err := bc.db.TruncateHead(num); err != nil {
log.Crit("Failed to truncate ancient data", "number", num, "err", err) log.Crit("Failed to truncate ancient data", "number", num, "err", err)
} }
// Remove the hash <-> number mapping from the active store. // Remove the hash <-> number mapping from the active store.
@ -991,7 +991,7 @@ func (bc *BlockChain) InsertReceiptChain(blockChain types.Blocks, receiptChain [
size += int64(batch.ValueSize()) size += int64(batch.ValueSize())
if err = batch.Write(); err != nil { if err = batch.Write(); err != nil {
fastBlock := bc.CurrentFastBlock().NumberU64() fastBlock := bc.CurrentFastBlock().NumberU64()
if err := bc.db.TruncateAncients(fastBlock + 1); err != nil { if err := bc.db.TruncateHead(fastBlock + 1); err != nil {
log.Error("Can't truncate ancient store after failed insert", "err", err) log.Error("Can't truncate ancient store after failed insert", "err", err)
} }
return 0, err return 0, err
@ -1009,7 +1009,7 @@ func (bc *BlockChain) InsertReceiptChain(blockChain types.Blocks, receiptChain [
if !updateHead(blockChain[len(blockChain)-1]) { if !updateHead(blockChain[len(blockChain)-1]) {
// We end up here if the header chain has reorg'ed, and the blocks/receipts // We end up here if the header chain has reorg'ed, and the blocks/receipts
// don't match the canonical chain. // don't match the canonical chain.
if err := bc.db.TruncateAncients(previousFastBlock + 1); err != nil { if err := bc.db.TruncateHead(previousFastBlock + 1); err != nil {
log.Error("Can't truncate ancient store after failed insert", "err", err) log.Error("Can't truncate ancient store after failed insert", "err", err)
} }
return 0, errSideChainReceipts return 0, errSideChainReceipts

View File

@ -83,8 +83,8 @@ type NumberHash struct {
Hash common.Hash Hash common.Hash
} }
// ReadAllHashes retrieves all the hashes assigned to blocks at a certain heights, // ReadAllHashesInRange retrieves all the hashes assigned to blocks at certain
// both canonical and reorged forks included. // heights, both canonical and reorged forks included.
// This method considers both limits to be _inclusive_. // This method considers both limits to be _inclusive_.
func ReadAllHashesInRange(db ethdb.Iteratee, first, last uint64) []*NumberHash { func ReadAllHashesInRange(db ethdb.Iteratee, first, last uint64) []*NumberHash {
var ( var (
@ -776,7 +776,7 @@ func WriteBlock(db ethdb.KeyValueWriter, block *types.Block) {
WriteHeader(db, block.Header()) WriteHeader(db, block.Header())
} }
// WriteAncientBlock writes entire block data into ancient store and returns the total written size. // WriteAncientBlocks writes entire block data into ancient store and returns the total written size.
func WriteAncientBlocks(db ethdb.AncientWriter, blocks []*types.Block, receipts []types.Receipts, td *big.Int) (int64, error) { func WriteAncientBlocks(db ethdb.AncientWriter, blocks []*types.Block, receipts []types.Receipts, td *big.Int) (int64, error) {
var ( var (
tdSum = new(big.Int).Set(td) tdSum = new(big.Int).Set(td)

View File

@ -99,6 +99,11 @@ func (db *nofreezedb) Ancients() (uint64, error) {
return 0, errNotSupported return 0, errNotSupported
} }
// Tail returns an error as we don't have a backing chain freezer.
func (db *nofreezedb) Tail() (uint64, error) {
return 0, errNotSupported
}
// AncientSize returns an error as we don't have a backing chain freezer. // AncientSize returns an error as we don't have a backing chain freezer.
func (db *nofreezedb) AncientSize(kind string) (uint64, error) { func (db *nofreezedb) AncientSize(kind string) (uint64, error) {
return 0, errNotSupported return 0, errNotSupported
@ -109,8 +114,13 @@ func (db *nofreezedb) ModifyAncients(func(ethdb.AncientWriteOp) error) (int64, e
return 0, errNotSupported return 0, errNotSupported
} }
// TruncateAncients returns an error as we don't have a backing chain freezer. // TruncateHead returns an error as we don't have a backing chain freezer.
func (db *nofreezedb) TruncateAncients(items uint64) error { func (db *nofreezedb) TruncateHead(items uint64) error {
return errNotSupported
}
// TruncateTail returns an error as we don't have a backing chain freezer.
func (db *nofreezedb) TruncateTail(items uint64) error {
return errNotSupported return errNotSupported
} }
@ -211,7 +221,7 @@ func NewDatabaseWithFreezer(db ethdb.KeyValueStore, freezer string, namespace st
// Block #1 is still in the database, we're allowed to init a new feezer // Block #1 is still in the database, we're allowed to init a new feezer
} }
// Otherwise, the head header is still the genesis, we're allowed to init a new // Otherwise, the head header is still the genesis, we're allowed to init a new
// feezer. // freezer.
} }
} }
// Freezer is consistent with the key-value database, permit combining the two // Freezer is consistent with the key-value database, permit combining the two

View File

@ -66,7 +66,7 @@ const (
freezerTableSize = 2 * 1000 * 1000 * 1000 freezerTableSize = 2 * 1000 * 1000 * 1000
) )
// freezer is an memory mapped append-only database to store immutable chain data // freezer is a memory mapped append-only database to store immutable chain data
// into flat files: // into flat files:
// //
// - The append only nature ensures that disk writes are minimized. // - The append only nature ensures that disk writes are minimized.
@ -78,6 +78,7 @@ type freezer struct {
// 64-bit aligned fields can be atomic. The struct is guaranteed to be so aligned, // 64-bit aligned fields can be atomic. The struct is guaranteed to be so aligned,
// so take advantage of that (https://golang.org/pkg/sync/atomic/#pkg-note-BUG). // so take advantage of that (https://golang.org/pkg/sync/atomic/#pkg-note-BUG).
frozen uint64 // Number of blocks already frozen frozen uint64 // Number of blocks already frozen
tail uint64 // Number of the first stored item in the freezer
threshold uint64 // Number of recent blocks not to freeze (params.FullImmutabilityThreshold apart from tests) threshold uint64 // Number of recent blocks not to freeze (params.FullImmutabilityThreshold apart from tests)
// This lock synchronizes writers and the truncate operation, as well as // This lock synchronizes writers and the truncate operation, as well as
@ -226,6 +227,11 @@ func (f *freezer) Ancients() (uint64, error) {
return atomic.LoadUint64(&f.frozen), nil return atomic.LoadUint64(&f.frozen), nil
} }
// Tail returns the number of first stored item in the freezer.
func (f *freezer) Tail() (uint64, error) {
return atomic.LoadUint64(&f.tail), nil
}
// AncientSize returns the ancient size of the specified category. // AncientSize returns the ancient size of the specified category.
func (f *freezer) AncientSize(kind string) (uint64, error) { func (f *freezer) AncientSize(kind string) (uint64, error) {
// This needs the write lock to avoid data races on table fields. // This needs the write lock to avoid data races on table fields.
@ -261,7 +267,7 @@ func (f *freezer) ModifyAncients(fn func(ethdb.AncientWriteOp) error) (writeSize
if err != nil { if err != nil {
// The write operation has failed. Go back to the previous item position. // The write operation has failed. Go back to the previous item position.
for name, table := range f.tables { for name, table := range f.tables {
err := table.truncate(prevItem) err := table.truncateHead(prevItem)
if err != nil { if err != nil {
log.Error("Freezer table roll-back failed", "table", name, "index", prevItem, "err", err) log.Error("Freezer table roll-back failed", "table", name, "index", prevItem, "err", err)
} }
@ -281,8 +287,8 @@ func (f *freezer) ModifyAncients(fn func(ethdb.AncientWriteOp) error) (writeSize
return writeSize, nil return writeSize, nil
} }
// TruncateAncients discards any recent data above the provided threshold number. // TruncateHead discards any recent data above the provided threshold number.
func (f *freezer) TruncateAncients(items uint64) error { func (f *freezer) TruncateHead(items uint64) error {
if f.readonly { if f.readonly {
return errReadOnly return errReadOnly
} }
@ -293,7 +299,7 @@ func (f *freezer) TruncateAncients(items uint64) error {
return nil return nil
} }
for _, table := range f.tables { for _, table := range f.tables {
if err := table.truncate(items); err != nil { if err := table.truncateHead(items); err != nil {
return err return err
} }
} }
@ -301,6 +307,26 @@ func (f *freezer) TruncateAncients(items uint64) error {
return nil return nil
} }
// TruncateTail discards any recent data below the provided threshold number.
func (f *freezer) TruncateTail(tail uint64) error {
if f.readonly {
return errReadOnly
}
f.writeLock.Lock()
defer f.writeLock.Unlock()
if atomic.LoadUint64(&f.tail) >= tail {
return nil
}
for _, table := range f.tables {
if err := table.truncateTail(tail); err != nil {
return err
}
}
atomic.StoreUint64(&f.tail, tail)
return nil
}
// Sync flushes all data tables to disk. // Sync flushes all data tables to disk.
func (f *freezer) Sync() error { func (f *freezer) Sync() error {
var errs []error var errs []error
@ -344,19 +370,30 @@ func (f *freezer) validate() error {
// repair truncates all data tables to the same length. // repair truncates all data tables to the same length.
func (f *freezer) repair() error { func (f *freezer) repair() error {
min := uint64(math.MaxUint64) var (
head = uint64(math.MaxUint64)
tail = uint64(0)
)
for _, table := range f.tables { for _, table := range f.tables {
items := atomic.LoadUint64(&table.items) items := atomic.LoadUint64(&table.items)
if min > items { if head > items {
min = items head = items
}
hidden := atomic.LoadUint64(&table.itemHidden)
if hidden > tail {
tail = hidden
} }
} }
for _, table := range f.tables { for _, table := range f.tables {
if err := table.truncate(min); err != nil { if err := table.truncateHead(head); err != nil {
return err
}
if err := table.truncateTail(tail); err != nil {
return err return err
} }
} }
atomic.StoreUint64(&f.frozen, min) atomic.StoreUint64(&f.frozen, head)
atomic.StoreUint64(&f.tail, tail)
return nil return nil
} }

View File

@ -191,7 +191,7 @@ func (batch *freezerTableBatch) commit() error {
dataSize := int64(len(batch.dataBuffer)) dataSize := int64(len(batch.dataBuffer))
batch.dataBuffer = batch.dataBuffer[:0] batch.dataBuffer = batch.dataBuffer[:0]
// Write index. // Write indices.
_, err = batch.t.index.Write(batch.indexBuffer) _, err = batch.t.index.Write(batch.indexBuffer)
if err != nil { if err != nil {
return err return err

109
core/rawdb/freezer_meta.go Normal file
View File

@ -0,0 +1,109 @@
// Copyright 2022 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>
package rawdb
import (
"io"
"os"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/rlp"
)
const freezerVersion = 1 // The initial version tag of freezer table metadata
// freezerTableMeta wraps all the metadata of the freezer table.
type freezerTableMeta struct {
// Version is the versioning descriptor of the freezer table.
Version uint16
// VirtualTail indicates how many items have been marked as deleted.
// Its value is equal to the number of items removed from the table
// plus the number of items hidden in the table, so it should never
// be lower than the "actual tail".
VirtualTail uint64
}
// newMetadata initializes the metadata object with the given virtual tail.
func newMetadata(tail uint64) *freezerTableMeta {
return &freezerTableMeta{
Version: freezerVersion,
VirtualTail: tail,
}
}
// readMetadata reads the metadata of the freezer table from the
// given metadata file.
func readMetadata(file *os.File) (*freezerTableMeta, error) {
_, err := file.Seek(0, io.SeekStart)
if err != nil {
return nil, err
}
var meta freezerTableMeta
if err := rlp.Decode(file, &meta); err != nil {
return nil, err
}
return &meta, nil
}
// writeMetadata writes the metadata of the freezer table into the
// given metadata file.
func writeMetadata(file *os.File, meta *freezerTableMeta) error {
_, err := file.Seek(0, io.SeekStart)
if err != nil {
return err
}
return rlp.Encode(file, meta)
}
// loadMetadata loads the metadata from the given metadata file.
// Initializes the metadata file with the given "actual tail" if
// it's empty.
func loadMetadata(file *os.File, tail uint64) (*freezerTableMeta, error) {
stat, err := file.Stat()
if err != nil {
return nil, err
}
// Write the metadata with the given actual tail into metadata file
// if it's non-existent. There are two possible scenarios here:
// - the freezer table is empty
// - the freezer table is legacy
// In both cases, write the meta into the file with the actual tail
// as the virtual tail.
if stat.Size() == 0 {
m := newMetadata(tail)
if err := writeMetadata(file, m); err != nil {
return nil, err
}
return m, nil
}
m, err := readMetadata(file)
if err != nil {
return nil, err
}
// Update the virtual tail with the given actual tail if it's even
// lower than it. Theoretically it shouldn't happen at all, print
// a warning here.
if m.VirtualTail < tail {
log.Warn("Updated virtual tail", "have", m.VirtualTail, "now", tail)
m.VirtualTail = tail
if err := writeMetadata(file, m); err != nil {
return nil, err
}
}
return m, nil
}

View File

@ -0,0 +1,61 @@
// Copyright 2022 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>
package rawdb
import (
"io/ioutil"
"os"
"testing"
)
func TestReadWriteFreezerTableMeta(t *testing.T) {
f, err := ioutil.TempFile(os.TempDir(), "*")
if err != nil {
t.Fatalf("Failed to create file %v", err)
}
err = writeMetadata(f, newMetadata(100))
if err != nil {
t.Fatalf("Failed to write metadata %v", err)
}
meta, err := readMetadata(f)
if err != nil {
t.Fatalf("Failed to read metadata %v", err)
}
if meta.Version != freezerVersion {
t.Fatalf("Unexpected version field")
}
if meta.VirtualTail != uint64(100) {
t.Fatalf("Unexpected virtual tail field")
}
}
func TestInitializeFreezerTableMeta(t *testing.T) {
f, err := ioutil.TempFile(os.TempDir(), "*")
if err != nil {
t.Fatalf("Failed to create file %v", err)
}
meta, err := loadMetadata(f, uint64(100))
if err != nil {
t.Fatalf("Failed to read metadata %v", err)
}
if meta.Version != freezerVersion {
t.Fatalf("Unexpected version field")
}
if meta.VirtualTail != uint64(100) {
t.Fatalf("Unexpected virtual tail field")
}
}

View File

@ -47,20 +47,19 @@ var (
) )
// indexEntry contains the number/id of the file that the data resides in, aswell as the // indexEntry contains the number/id of the file that the data resides in, aswell as the
// offset within the file to the end of the data // offset within the file to the end of the data.
// In serialized form, the filenum is stored as uint16. // In serialized form, the filenum is stored as uint16.
type indexEntry struct { type indexEntry struct {
filenum uint32 // stored as uint16 ( 2 bytes) filenum uint32 // stored as uint16 ( 2 bytes )
offset uint32 // stored as uint32 ( 4 bytes) offset uint32 // stored as uint32 ( 4 bytes )
} }
const indexEntrySize = 6 const indexEntrySize = 6
// unmarshalBinary deserializes binary b into the rawIndex entry. // unmarshalBinary deserializes binary b into the rawIndex entry.
func (i *indexEntry) unmarshalBinary(b []byte) error { func (i *indexEntry) unmarshalBinary(b []byte) {
i.filenum = uint32(binary.BigEndian.Uint16(b[:2])) i.filenum = uint32(binary.BigEndian.Uint16(b[:2]))
i.offset = binary.BigEndian.Uint32(b[2:6]) i.offset = binary.BigEndian.Uint32(b[2:6])
return nil
} }
// append adds the encoded entry to the end of b. // append adds the encoded entry to the end of b.
@ -75,14 +74,14 @@ func (i *indexEntry) append(b []byte) []byte {
// bounds returns the start- and end- offsets, and the file number of where to // bounds returns the start- and end- offsets, and the file number of where to
// read there data item marked by the two index entries. The two entries are // read there data item marked by the two index entries. The two entries are
// assumed to be sequential. // assumed to be sequential.
func (start *indexEntry) bounds(end *indexEntry) (startOffset, endOffset, fileId uint32) { func (i *indexEntry) bounds(end *indexEntry) (startOffset, endOffset, fileId uint32) {
if start.filenum != end.filenum { if i.filenum != end.filenum {
// If a piece of data 'crosses' a data-file, // If a piece of data 'crosses' a data-file,
// it's actually in one piece on the second data-file. // it's actually in one piece on the second data-file.
// We return a zero-indexEntry for the second file as start // We return a zero-indexEntry for the second file as start
return 0, end.offset, end.filenum return 0, end.offset, end.filenum
} }
return start.offset, end.offset, end.filenum return i.offset, end.offset, end.filenum
} }
// freezerTable represents a single chained data table within the freezer (e.g. blocks). // freezerTable represents a single chained data table within the freezer (e.g. blocks).
@ -92,7 +91,15 @@ type freezerTable struct {
// WARNING: The `items` field is accessed atomically. On 32 bit platforms, only // WARNING: The `items` field is accessed atomically. On 32 bit platforms, only
// 64-bit aligned fields can be atomic. The struct is guaranteed to be so aligned, // 64-bit aligned fields can be atomic. The struct is guaranteed to be so aligned,
// so take advantage of that (https://golang.org/pkg/sync/atomic/#pkg-note-BUG). // so take advantage of that (https://golang.org/pkg/sync/atomic/#pkg-note-BUG).
items uint64 // Number of items stored in the table (including items removed from tail) items uint64 // Number of items stored in the table (including items removed from tail)
itemOffset uint64 // Number of items removed from the table
// itemHidden is the number of items marked as deleted. Tail deletion is
// only supported at file level which means the actual deletion will be
// delayed until the entire data file is marked as deleted. Before that
// these items will be hidden to prevent being visited again. The value
// should never be lower than itemOffset.
itemHidden uint64
noCompression bool // if true, disables snappy compression. Note: does not work retroactively noCompression bool // if true, disables snappy compression. Note: does not work retroactively
readonly bool readonly bool
@ -101,14 +108,11 @@ type freezerTable struct {
path string path string
head *os.File // File descriptor for the data head of the table head *os.File // File descriptor for the data head of the table
index *os.File // File descriptor for the indexEntry file of the table
meta *os.File // File descriptor for metadata of the table
files map[uint32]*os.File // open files files map[uint32]*os.File // open files
headId uint32 // number of the currently active head file headId uint32 // number of the currently active head file
tailId uint32 // number of the earliest file tailId uint32 // number of the earliest file
index *os.File // File descriptor for the indexEntry file of the table
// In the case that old items are deleted (from the tail), we use itemOffset
// to count how many historic items have gone missing.
itemOffset uint32 // Offset (number of discarded items)
headBytes int64 // Number of bytes written to the head file headBytes int64 // Number of bytes written to the head file
readMeter metrics.Meter // Meter for measuring the effective amount of data read readMeter metrics.Meter // Meter for measuring the effective amount of data read
@ -124,46 +128,8 @@ func NewFreezerTable(path, name string, disableSnappy, readonly bool) (*freezerT
return newTable(path, name, metrics.NilMeter{}, metrics.NilMeter{}, metrics.NilGauge{}, freezerTableSize, disableSnappy, readonly) return newTable(path, name, metrics.NilMeter{}, metrics.NilMeter{}, metrics.NilGauge{}, freezerTableSize, disableSnappy, readonly)
} }
// openFreezerFileForAppend opens a freezer table file and seeks to the end
func openFreezerFileForAppend(filename string) (*os.File, error) {
// Open the file without the O_APPEND flag
// because it has differing behaviour during Truncate operations
// on different OS's
file, err := os.OpenFile(filename, os.O_RDWR|os.O_CREATE, 0644)
if err != nil {
return nil, err
}
// Seek to end for append
if _, err = file.Seek(0, io.SeekEnd); err != nil {
return nil, err
}
return file, nil
}
// openFreezerFileForReadOnly opens a freezer table file for read only access
func openFreezerFileForReadOnly(filename string) (*os.File, error) {
return os.OpenFile(filename, os.O_RDONLY, 0644)
}
// openFreezerFileTruncated opens a freezer table making sure it is truncated
func openFreezerFileTruncated(filename string) (*os.File, error) {
return os.OpenFile(filename, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0644)
}
// truncateFreezerFile resizes a freezer table file and seeks to the end
func truncateFreezerFile(file *os.File, size int64) error {
if err := file.Truncate(size); err != nil {
return err
}
// Seek to end for append
if _, err := file.Seek(0, io.SeekEnd); err != nil {
return err
}
return nil
}
// newTable opens a freezer table, creating the data and index files if they are // newTable opens a freezer table, creating the data and index files if they are
// non existent. Both files are truncated to the shortest common length to ensure // non-existent. Both files are truncated to the shortest common length to ensure
// they don't go out of sync. // they don't go out of sync.
func newTable(path string, name string, readMeter metrics.Meter, writeMeter metrics.Meter, sizeGauge metrics.Gauge, maxFilesize uint32, noCompression, readonly bool) (*freezerTable, error) { func newTable(path string, name string, readMeter metrics.Meter, writeMeter metrics.Meter, sizeGauge metrics.Gauge, maxFilesize uint32, noCompression, readonly bool) (*freezerTable, error) {
// Ensure the containing directory exists and open the indexEntry file // Ensure the containing directory exists and open the indexEntry file
@ -172,28 +138,40 @@ func newTable(path string, name string, readMeter metrics.Meter, writeMeter metr
} }
var idxName string var idxName string
if noCompression { if noCompression {
// Raw idx idxName = fmt.Sprintf("%s.ridx", name) // raw index file
idxName = fmt.Sprintf("%s.ridx", name)
} else { } else {
// Compressed idx idxName = fmt.Sprintf("%s.cidx", name) // compressed index file
idxName = fmt.Sprintf("%s.cidx", name)
} }
var ( var (
err error err error
offsets *os.File index *os.File
meta *os.File
) )
if readonly { if readonly {
// Will fail if table doesn't exist // Will fail if table doesn't exist
offsets, err = openFreezerFileForReadOnly(filepath.Join(path, idxName)) index, err = openFreezerFileForReadOnly(filepath.Join(path, idxName))
if err != nil {
return nil, err
}
// Will fail if the table is legacy(no metadata)
meta, err = openFreezerFileForReadOnly(filepath.Join(path, fmt.Sprintf("%s.meta", name)))
if err != nil {
return nil, err
}
} else { } else {
offsets, err = openFreezerFileForAppend(filepath.Join(path, idxName)) index, err = openFreezerFileForAppend(filepath.Join(path, idxName))
} if err != nil {
if err != nil { return nil, err
return nil, err }
meta, err = openFreezerFileForAppend(filepath.Join(path, fmt.Sprintf("%s.meta", name)))
if err != nil {
return nil, err
}
} }
// Create the table and repair any past inconsistency // Create the table and repair any past inconsistency
tab := &freezerTable{ tab := &freezerTable{
index: offsets, index: index,
meta: meta,
files: make(map[uint32]*os.File), files: make(map[uint32]*os.File),
readMeter: readMeter, readMeter: readMeter,
writeMeter: writeMeter, writeMeter: writeMeter,
@ -220,7 +198,7 @@ func newTable(path string, name string, readMeter metrics.Meter, writeMeter metr
return tab, nil return tab, nil
} }
// repair cross checks the head and the index file and truncates them to // repair cross-checks the head and the index file and truncates them to
// be in sync with each other after a potential crash / data loss. // be in sync with each other after a potential crash / data loss.
func (t *freezerTable) repair() error { func (t *freezerTable) repair() error {
// Create a temporary offset buffer to init files with and read indexEntry into // Create a temporary offset buffer to init files with and read indexEntry into
@ -258,11 +236,27 @@ func (t *freezerTable) repair() error {
t.index.ReadAt(buffer, 0) t.index.ReadAt(buffer, 0)
firstIndex.unmarshalBinary(buffer) firstIndex.unmarshalBinary(buffer)
// Assign the tail fields with the first stored index.
// The total removed items is represented with an uint32,
// which is not enough in theory but enough in practice.
// TODO: use uint64 to represent total removed items.
t.tailId = firstIndex.filenum t.tailId = firstIndex.filenum
t.itemOffset = firstIndex.offset t.itemOffset = uint64(firstIndex.offset)
t.index.ReadAt(buffer, offsetsSize-indexEntrySize) // Load metadata from the file
lastIndex.unmarshalBinary(buffer) meta, err := loadMetadata(t.meta, t.itemOffset)
if err != nil {
return err
}
t.itemHidden = meta.VirtualTail
// Read the last index, use the default value in case the freezer is empty
if offsetsSize == indexEntrySize {
lastIndex = indexEntry{filenum: t.tailId, offset: 0}
} else {
t.index.ReadAt(buffer, offsetsSize-indexEntrySize)
lastIndex.unmarshalBinary(buffer)
}
if t.readonly { if t.readonly {
t.head, err = t.openFile(lastIndex.filenum, openFreezerFileForReadOnly) t.head, err = t.openFile(lastIndex.filenum, openFreezerFileForReadOnly)
} else { } else {
@ -278,7 +272,6 @@ func (t *freezerTable) repair() error {
// Keep truncating both files until they come in sync // Keep truncating both files until they come in sync
contentExp = int64(lastIndex.offset) contentExp = int64(lastIndex.offset)
for contentExp != contentSize { for contentExp != contentSize {
// Truncate the head file to the last offset pointer // Truncate the head file to the last offset pointer
if contentExp < contentSize { if contentExp < contentSize {
@ -295,9 +288,16 @@ func (t *freezerTable) repair() error {
return err return err
} }
offsetsSize -= indexEntrySize offsetsSize -= indexEntrySize
t.index.ReadAt(buffer, offsetsSize-indexEntrySize)
// Read the new head index, use the default value in case
// the freezer is already empty.
var newLastIndex indexEntry var newLastIndex indexEntry
newLastIndex.unmarshalBinary(buffer) if offsetsSize == indexEntrySize {
newLastIndex = indexEntry{filenum: t.tailId, offset: 0}
} else {
t.index.ReadAt(buffer, offsetsSize-indexEntrySize)
newLastIndex.unmarshalBinary(buffer)
}
// We might have slipped back into an earlier head-file here // We might have slipped back into an earlier head-file here
if newLastIndex.filenum != lastIndex.filenum { if newLastIndex.filenum != lastIndex.filenum {
// Release earlier opened file // Release earlier opened file
@ -325,12 +325,21 @@ func (t *freezerTable) repair() error {
if err := t.head.Sync(); err != nil { if err := t.head.Sync(); err != nil {
return err return err
} }
if err := t.meta.Sync(); err != nil {
return err
}
} }
// Update the item and byte counters and return // Update the item and byte counters and return
t.items = uint64(t.itemOffset) + uint64(offsetsSize/indexEntrySize-1) // last indexEntry points to the end of the data file t.items = t.itemOffset + uint64(offsetsSize/indexEntrySize-1) // last indexEntry points to the end of the data file
t.headBytes = contentSize t.headBytes = contentSize
t.headId = lastIndex.filenum t.headId = lastIndex.filenum
// Delete the leftover files because of head deletion
t.releaseFilesAfter(t.headId, true)
// Delete the leftover files because of tail deletion
t.releaseFilesBefore(t.tailId, true)
// Close opened files and preopen all files // Close opened files and preopen all files
if err := t.preopen(); err != nil { if err := t.preopen(); err != nil {
return err return err
@ -346,6 +355,7 @@ func (t *freezerTable) repair() error {
func (t *freezerTable) preopen() (err error) { func (t *freezerTable) preopen() (err error) {
// The repair might have already opened (some) files // The repair might have already opened (some) files
t.releaseFilesAfter(0, false) t.releaseFilesAfter(0, false)
// Open all except head in RDONLY // Open all except head in RDONLY
for i := t.tailId; i < t.headId; i++ { for i := t.tailId; i < t.headId; i++ {
if _, err = t.openFile(i, openFreezerFileForReadOnly); err != nil { if _, err = t.openFile(i, openFreezerFileForReadOnly); err != nil {
@ -361,16 +371,19 @@ func (t *freezerTable) preopen() (err error) {
return err return err
} }
// truncate discards any recent data above the provided threshold number. // truncateHead discards any recent data above the provided threshold number.
func (t *freezerTable) truncate(items uint64) error { func (t *freezerTable) truncateHead(items uint64) error {
t.lock.Lock() t.lock.Lock()
defer t.lock.Unlock() defer t.lock.Unlock()
// If our item count is correct, don't do anything // Ensure the given truncate target falls in the correct range
existing := atomic.LoadUint64(&t.items) existing := atomic.LoadUint64(&t.items)
if existing <= items { if existing <= items {
return nil return nil
} }
if items < atomic.LoadUint64(&t.itemHidden) {
return errors.New("truncation below tail")
}
// We need to truncate, save the old size for metrics tracking // We need to truncate, save the old size for metrics tracking
oldSize, err := t.sizeNolock() oldSize, err := t.sizeNolock()
if err != nil { if err != nil {
@ -382,17 +395,24 @@ func (t *freezerTable) truncate(items uint64) error {
log = t.logger.Warn // Only loud warn if we delete multiple items log = t.logger.Warn // Only loud warn if we delete multiple items
} }
log("Truncating freezer table", "items", existing, "limit", items) log("Truncating freezer table", "items", existing, "limit", items)
if err := truncateFreezerFile(t.index, int64(items+1)*indexEntrySize); err != nil {
// Truncate the index file first, the tail position is also considered
// when calculating the new freezer table length.
length := items - atomic.LoadUint64(&t.itemOffset)
if err := truncateFreezerFile(t.index, int64(length+1)*indexEntrySize); err != nil {
return err return err
} }
// Calculate the new expected size of the data file and truncate it // Calculate the new expected size of the data file and truncate it
buffer := make([]byte, indexEntrySize)
if _, err := t.index.ReadAt(buffer, int64(items*indexEntrySize)); err != nil {
return err
}
var expected indexEntry var expected indexEntry
expected.unmarshalBinary(buffer) if length == 0 {
expected = indexEntry{filenum: t.tailId, offset: 0}
} else {
buffer := make([]byte, indexEntrySize)
if _, err := t.index.ReadAt(buffer, int64(length*indexEntrySize)); err != nil {
return err
}
expected.unmarshalBinary(buffer)
}
// We might need to truncate back to older files // We might need to truncate back to older files
if expected.filenum != t.headId { if expected.filenum != t.headId {
// If already open for reading, force-reopen for writing // If already open for reading, force-reopen for writing
@ -421,7 +441,110 @@ func (t *freezerTable) truncate(items uint64) error {
return err return err
} }
t.sizeGauge.Dec(int64(oldSize - newSize)) t.sizeGauge.Dec(int64(oldSize - newSize))
return nil
}
// truncateTail discards any recent data before the provided threshold number.
func (t *freezerTable) truncateTail(items uint64) error {
t.lock.Lock()
defer t.lock.Unlock()
// Ensure the given truncate target falls in the correct range
if atomic.LoadUint64(&t.itemHidden) >= items {
return nil
}
if atomic.LoadUint64(&t.items) < items {
return errors.New("truncation above head")
}
// Load the new tail index by the given new tail position
var (
newTailId uint32
buffer = make([]byte, indexEntrySize)
)
if atomic.LoadUint64(&t.items) == items {
newTailId = t.headId
} else {
offset := items - atomic.LoadUint64(&t.itemOffset)
if _, err := t.index.ReadAt(buffer, int64((offset+1)*indexEntrySize)); err != nil {
return err
}
var newTail indexEntry
newTail.unmarshalBinary(buffer)
newTailId = newTail.filenum
}
// Update the virtual tail marker and hidden these entries in table.
atomic.StoreUint64(&t.itemHidden, items)
if err := writeMetadata(t.meta, newMetadata(items)); err != nil {
return err
}
// Hidden items still fall in the current tail file, no data file
// can be dropped.
if t.tailId == newTailId {
return nil
}
// Hidden items fall in the incorrect range, returns the error.
if t.tailId > newTailId {
return fmt.Errorf("invalid index, tail-file %d, item-file %d", t.tailId, newTailId)
}
// Hidden items exceed the current tail file, drop the relevant
// data files. We need to truncate, save the old size for metrics
// tracking.
oldSize, err := t.sizeNolock()
if err != nil {
return err
}
// Count how many items can be deleted from the file.
var (
newDeleted = items
deleted = atomic.LoadUint64(&t.itemOffset)
)
for current := items - 1; current >= deleted; current -= 1 {
if _, err := t.index.ReadAt(buffer, int64((current-deleted+1)*indexEntrySize)); err != nil {
return err
}
var pre indexEntry
pre.unmarshalBinary(buffer)
if pre.filenum != newTailId {
break
}
newDeleted = current
}
// Commit the changes of metadata file first before manipulating
// the indexes file.
if err := t.meta.Sync(); err != nil {
return err
}
// Truncate the deleted index entries from the index file.
err = copyFrom(t.index.Name(), t.index.Name(), indexEntrySize*(newDeleted-deleted+1), func(f *os.File) error {
tailIndex := indexEntry{
filenum: newTailId,
offset: uint32(newDeleted),
}
_, err := f.Write(tailIndex.append(nil))
return err
})
if err != nil {
return err
}
// Reopen the modified index file to load the changes
if err := t.index.Close(); err != nil {
return err
}
t.index, err = openFreezerFileForAppend(t.index.Name())
if err != nil {
return err
}
// Release any files before the current tail
t.tailId = newTailId
atomic.StoreUint64(&t.itemOffset, newDeleted)
t.releaseFilesBefore(t.tailId, true)
// Retrieve the new size and update the total size counter
newSize, err := t.sizeNolock()
if err != nil {
return err
}
t.sizeGauge.Dec(int64(oldSize - newSize))
return nil return nil
} }
@ -436,6 +559,11 @@ func (t *freezerTable) Close() error {
} }
t.index = nil t.index = nil
if err := t.meta.Close(); err != nil {
errs = append(errs, err)
}
t.meta = nil
for _, f := range t.files { for _, f := range t.files {
if err := f.Close(); err != nil { if err := f.Close(); err != nil {
errs = append(errs, err) errs = append(errs, err)
@ -490,6 +618,19 @@ func (t *freezerTable) releaseFilesAfter(num uint32, remove bool) {
} }
} }
// releaseFilesBefore closes all open files with a lower number, and optionally also deletes the files
func (t *freezerTable) releaseFilesBefore(num uint32, remove bool) {
for fnum, f := range t.files {
if fnum < num {
delete(t.files, fnum)
f.Close()
if remove {
os.Remove(f.Name())
}
}
}
}
// getIndices returns the index entries for the given from-item, covering 'count' items. // getIndices returns the index entries for the given from-item, covering 'count' items.
// N.B: The actual number of returned indices for N items will always be N+1 (unless an // N.B: The actual number of returned indices for N items will always be N+1 (unless an
// error is returned). // error is returned).
@ -498,7 +639,7 @@ func (t *freezerTable) releaseFilesAfter(num uint32, remove bool) {
// it will return error. // it will return error.
func (t *freezerTable) getIndices(from, count uint64) ([]*indexEntry, error) { func (t *freezerTable) getIndices(from, count uint64) ([]*indexEntry, error) {
// Apply the table-offset // Apply the table-offset
from = from - uint64(t.itemOffset) from = from - t.itemOffset
// For reading N items, we need N+1 indices. // For reading N items, we need N+1 indices.
buffer := make([]byte, (count+1)*indexEntrySize) buffer := make([]byte, (count+1)*indexEntrySize)
if _, err := t.index.ReadAt(buffer, int64(from*indexEntrySize)); err != nil { if _, err := t.index.ReadAt(buffer, int64(from*indexEntrySize)); err != nil {
@ -583,18 +724,21 @@ func (t *freezerTable) retrieveItems(start, count, maxBytes uint64) ([]byte, []i
t.lock.RLock() t.lock.RLock()
defer t.lock.RUnlock() defer t.lock.RUnlock()
// Ensure the table and the item is accessible // Ensure the table and the item are accessible
if t.index == nil || t.head == nil { if t.index == nil || t.head == nil {
return nil, nil, errClosed return nil, nil, errClosed
} }
itemCount := atomic.LoadUint64(&t.items) // max number var (
items = atomic.LoadUint64(&t.items) // the total items(head + 1)
hidden = atomic.LoadUint64(&t.itemHidden) // the number of hidden items
)
// Ensure the start is written, not deleted from the tail, and that the // Ensure the start is written, not deleted from the tail, and that the
// caller actually wants something // caller actually wants something
if itemCount <= start || uint64(t.itemOffset) > start || count == 0 { if items <= start || hidden > start || count == 0 {
return nil, nil, errOutOfBounds return nil, nil, errOutOfBounds
} }
if start+count > itemCount { if start+count > items {
count = itemCount - start count = items - start
} }
var ( var (
output = make([]byte, maxBytes) // Buffer to read data into output = make([]byte, maxBytes) // Buffer to read data into
@ -670,10 +814,10 @@ func (t *freezerTable) retrieveItems(start, count, maxBytes uint64) ([]byte, []i
return output[:outputSize], sizes, nil return output[:outputSize], sizes, nil
} }
// has returns an indicator whether the specified number data // has returns an indicator whether the specified number data is still accessible
// exists in the freezer table. // in the freezer table.
func (t *freezerTable) has(number uint64) bool { func (t *freezerTable) has(number uint64) bool {
return atomic.LoadUint64(&t.items) > number return atomic.LoadUint64(&t.items) > number && atomic.LoadUint64(&t.itemHidden) <= number
} }
// size returns the total data size in the freezer table. // size returns the total data size in the freezer table.
@ -727,6 +871,9 @@ func (t *freezerTable) Sync() error {
if err := t.index.Sync(); err != nil { if err := t.index.Sync(); err != nil {
return err return err
} }
if err := t.meta.Sync(); err != nil {
return err
}
return t.head.Sync() return t.head.Sync()
} }
@ -744,13 +891,20 @@ func (t *freezerTable) dumpIndexString(start, stop int64) string {
} }
func (t *freezerTable) dumpIndex(w io.Writer, start, stop int64) { func (t *freezerTable) dumpIndex(w io.Writer, start, stop int64) {
meta, err := readMetadata(t.meta)
if err != nil {
fmt.Fprintf(w, "Failed to decode freezer table %v\n", err)
return
}
fmt.Fprintf(w, "Version %d deleted %d, hidden %d\n", meta.Version, atomic.LoadUint64(&t.itemOffset), atomic.LoadUint64(&t.itemHidden))
buf := make([]byte, indexEntrySize) buf := make([]byte, indexEntrySize)
fmt.Fprintf(w, "| number | fileno | offset |\n") fmt.Fprintf(w, "| number | fileno | offset |\n")
fmt.Fprintf(w, "|--------|--------|--------|\n") fmt.Fprintf(w, "|--------|--------|--------|\n")
for i := uint64(start); ; i++ { for i := uint64(start); ; i++ {
if _, err := t.index.ReadAt(buf, int64(i*indexEntrySize)); err != nil { if _, err := t.index.ReadAt(buf, int64((i+1)*indexEntrySize)); err != nil {
break break
} }
var entry indexEntry var entry indexEntry

View File

@ -18,13 +18,18 @@ package rawdb
import ( import (
"bytes" "bytes"
"encoding/binary"
"fmt" "fmt"
"math/rand" "math/rand"
"os" "os"
"path/filepath" "path/filepath"
"reflect"
"sync/atomic"
"testing" "testing"
"testing/quick"
"time" "time"
"github.com/davecgh/go-spew/spew"
"github.com/ethereum/go-ethereum/metrics" "github.com/ethereum/go-ethereum/metrics"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
) )
@ -204,7 +209,7 @@ func TestFreezerRepairDanglingHeadLarge(t *testing.T) {
} }
// Remove everything but the first item, and leave data unaligned // Remove everything but the first item, and leave data unaligned
// 0-indexEntry, 1-indexEntry, corrupt-indexEntry // 0-indexEntry, 1-indexEntry, corrupt-indexEntry
idxFile.Truncate(indexEntrySize + indexEntrySize + indexEntrySize/2) idxFile.Truncate(2*indexEntrySize + indexEntrySize/2)
idxFile.Close() idxFile.Close()
// Now open it again // Now open it again
@ -387,7 +392,7 @@ func TestFreezerTruncate(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
defer f.Close() defer f.Close()
f.truncate(10) // 150 bytes f.truncateHead(10) // 150 bytes
if f.items != 10 { if f.items != 10 {
t.Fatalf("expected %d items, got %d", 10, f.items) t.Fatalf("expected %d items, got %d", 10, f.items)
} }
@ -504,7 +509,7 @@ func TestFreezerReadAndTruncate(t *testing.T) {
} }
// Now, truncate back to zero // Now, truncate back to zero
f.truncate(0) f.truncateHead(0)
// Write the data again // Write the data again
batch := f.newBatch() batch := f.newBatch()
@ -565,18 +570,19 @@ func TestFreezerOffset(t *testing.T) {
// Update the index file, so that we store // Update the index file, so that we store
// [ file = 2, offset = 4 ] at index zero // [ file = 2, offset = 4 ] at index zero
tailId := uint32(2) // First file is 2
itemOffset := uint32(4) // We have removed four items
zeroIndex := indexEntry{ zeroIndex := indexEntry{
filenum: tailId, filenum: uint32(2), // First file is 2
offset: itemOffset, offset: uint32(4), // We have removed four items
} }
buf := zeroIndex.append(nil) buf := zeroIndex.append(nil)
// Overwrite index zero // Overwrite index zero
copy(indexBuf, buf) copy(indexBuf, buf)
// Remove the four next indices by overwriting // Remove the four next indices by overwriting
copy(indexBuf[indexEntrySize:], indexBuf[indexEntrySize*5:]) copy(indexBuf[indexEntrySize:], indexBuf[indexEntrySize*5:])
indexFile.WriteAt(indexBuf, 0) indexFile.WriteAt(indexBuf, 0)
// Need to truncate the moved index items // Need to truncate the moved index items
indexFile.Truncate(indexEntrySize * (1 + 2)) indexFile.Truncate(indexEntrySize * (1 + 2))
indexFile.Close() indexFile.Close()
@ -623,13 +629,12 @@ func TestFreezerOffset(t *testing.T) {
// Update the index file, so that we store // Update the index file, so that we store
// [ file = 2, offset = 1M ] at index zero // [ file = 2, offset = 1M ] at index zero
tailId := uint32(2) // First file is 2
itemOffset := uint32(1000000) // We have removed 1M items
zeroIndex := indexEntry{ zeroIndex := indexEntry{
offset: itemOffset, offset: uint32(1000000), // We have removed 1M items
filenum: tailId, filenum: uint32(2), // First file is 2
} }
buf := zeroIndex.append(nil) buf := zeroIndex.append(nil)
// Overwrite index zero // Overwrite index zero
copy(indexBuf, buf) copy(indexBuf, buf)
indexFile.WriteAt(indexBuf, 0) indexFile.WriteAt(indexBuf, 0)
@ -659,6 +664,171 @@ func TestFreezerOffset(t *testing.T) {
} }
} }
func TestTruncateTail(t *testing.T) {
t.Parallel()
rm, wm, sg := metrics.NewMeter(), metrics.NewMeter(), metrics.NewGauge()
fname := fmt.Sprintf("truncate-tail-%d", rand.Uint64())
// Fill table
f, err := newTable(os.TempDir(), fname, rm, wm, sg, 40, true, false)
if err != nil {
t.Fatal(err)
}
// Write 7 x 20 bytes, splitting out into four files
batch := f.newBatch()
require.NoError(t, batch.AppendRaw(0, getChunk(20, 0xFF)))
require.NoError(t, batch.AppendRaw(1, getChunk(20, 0xEE)))
require.NoError(t, batch.AppendRaw(2, getChunk(20, 0xdd)))
require.NoError(t, batch.AppendRaw(3, getChunk(20, 0xcc)))
require.NoError(t, batch.AppendRaw(4, getChunk(20, 0xbb)))
require.NoError(t, batch.AppendRaw(5, getChunk(20, 0xaa)))
require.NoError(t, batch.AppendRaw(6, getChunk(20, 0x11)))
require.NoError(t, batch.commit())
// nothing to do, all the items should still be there.
f.truncateTail(0)
fmt.Println(f.dumpIndexString(0, 1000))
checkRetrieve(t, f, map[uint64][]byte{
0: getChunk(20, 0xFF),
1: getChunk(20, 0xEE),
2: getChunk(20, 0xdd),
3: getChunk(20, 0xcc),
4: getChunk(20, 0xbb),
5: getChunk(20, 0xaa),
6: getChunk(20, 0x11),
})
// truncate single element( item 0 ), deletion is only supported at file level
f.truncateTail(1)
fmt.Println(f.dumpIndexString(0, 1000))
checkRetrieveError(t, f, map[uint64]error{
0: errOutOfBounds,
})
checkRetrieve(t, f, map[uint64][]byte{
1: getChunk(20, 0xEE),
2: getChunk(20, 0xdd),
3: getChunk(20, 0xcc),
4: getChunk(20, 0xbb),
5: getChunk(20, 0xaa),
6: getChunk(20, 0x11),
})
// Reopen the table, the deletion information should be persisted as well
f.Close()
f, err = newTable(os.TempDir(), fname, rm, wm, sg, 40, true, false)
if err != nil {
t.Fatal(err)
}
checkRetrieveError(t, f, map[uint64]error{
0: errOutOfBounds,
})
checkRetrieve(t, f, map[uint64][]byte{
1: getChunk(20, 0xEE),
2: getChunk(20, 0xdd),
3: getChunk(20, 0xcc),
4: getChunk(20, 0xbb),
5: getChunk(20, 0xaa),
6: getChunk(20, 0x11),
})
// truncate two elements( item 0, item 1 ), the file 0 should be deleted
f.truncateTail(2)
checkRetrieveError(t, f, map[uint64]error{
0: errOutOfBounds,
1: errOutOfBounds,
})
checkRetrieve(t, f, map[uint64][]byte{
2: getChunk(20, 0xdd),
3: getChunk(20, 0xcc),
4: getChunk(20, 0xbb),
5: getChunk(20, 0xaa),
6: getChunk(20, 0x11),
})
// Reopen the table, the above testing should still pass
f.Close()
f, err = newTable(os.TempDir(), fname, rm, wm, sg, 40, true, false)
if err != nil {
t.Fatal(err)
}
defer f.Close()
checkRetrieveError(t, f, map[uint64]error{
0: errOutOfBounds,
1: errOutOfBounds,
})
checkRetrieve(t, f, map[uint64][]byte{
2: getChunk(20, 0xdd),
3: getChunk(20, 0xcc),
4: getChunk(20, 0xbb),
5: getChunk(20, 0xaa),
6: getChunk(20, 0x11),
})
// truncate all, the entire freezer should be deleted
f.truncateTail(7)
checkRetrieveError(t, f, map[uint64]error{
0: errOutOfBounds,
1: errOutOfBounds,
2: errOutOfBounds,
3: errOutOfBounds,
4: errOutOfBounds,
5: errOutOfBounds,
6: errOutOfBounds,
})
}
func TestTruncateHead(t *testing.T) {
t.Parallel()
rm, wm, sg := metrics.NewMeter(), metrics.NewMeter(), metrics.NewGauge()
fname := fmt.Sprintf("truncate-head-blow-tail-%d", rand.Uint64())
// Fill table
f, err := newTable(os.TempDir(), fname, rm, wm, sg, 40, true, false)
if err != nil {
t.Fatal(err)
}
// Write 7 x 20 bytes, splitting out into four files
batch := f.newBatch()
require.NoError(t, batch.AppendRaw(0, getChunk(20, 0xFF)))
require.NoError(t, batch.AppendRaw(1, getChunk(20, 0xEE)))
require.NoError(t, batch.AppendRaw(2, getChunk(20, 0xdd)))
require.NoError(t, batch.AppendRaw(3, getChunk(20, 0xcc)))
require.NoError(t, batch.AppendRaw(4, getChunk(20, 0xbb)))
require.NoError(t, batch.AppendRaw(5, getChunk(20, 0xaa)))
require.NoError(t, batch.AppendRaw(6, getChunk(20, 0x11)))
require.NoError(t, batch.commit())
f.truncateTail(4) // Tail = 4
// NewHead is required to be 3, the entire table should be truncated
f.truncateHead(4)
checkRetrieveError(t, f, map[uint64]error{
0: errOutOfBounds, // Deleted by tail
1: errOutOfBounds, // Deleted by tail
2: errOutOfBounds, // Deleted by tail
3: errOutOfBounds, // Deleted by tail
4: errOutOfBounds, // Deleted by Head
5: errOutOfBounds, // Deleted by Head
6: errOutOfBounds, // Deleted by Head
})
// Append new items
batch = f.newBatch()
require.NoError(t, batch.AppendRaw(4, getChunk(20, 0xbb)))
require.NoError(t, batch.AppendRaw(5, getChunk(20, 0xaa)))
require.NoError(t, batch.AppendRaw(6, getChunk(20, 0x11)))
require.NoError(t, batch.commit())
checkRetrieve(t, f, map[uint64][]byte{
4: getChunk(20, 0xbb),
5: getChunk(20, 0xaa),
6: getChunk(20, 0x11),
})
}
func checkRetrieve(t *testing.T, f *freezerTable, items map[uint64][]byte) { func checkRetrieve(t *testing.T, f *freezerTable, items map[uint64][]byte) {
t.Helper() t.Helper()
@ -915,3 +1085,212 @@ func TestFreezerReadonly(t *testing.T) {
t.Fatalf("Writing to readonly table should fail") t.Fatalf("Writing to readonly table should fail")
} }
} }
// randTest performs random freezer table operations.
// Instances of this test are created by Generate.
type randTest []randTestStep
type randTestStep struct {
op int
items []uint64 // for append and retrieve
blobs [][]byte // for append
target uint64 // for truncate(head/tail)
err error // for debugging
}
const (
opReload = iota
opAppend
opRetrieve
opTruncateHead
opTruncateHeadAll
opTruncateTail
opTruncateTailAll
opCheckAll
opMax // boundary value, not an actual op
)
func getVals(first uint64, n int) [][]byte {
var ret [][]byte
for i := 0; i < n; i++ {
val := make([]byte, 8)
binary.BigEndian.PutUint64(val, first+uint64(i))
ret = append(ret, val)
}
return ret
}
func (randTest) Generate(r *rand.Rand, size int) reflect.Value {
var (
deleted uint64 // The number of deleted items from tail
items []uint64 // The index of entries in table
// getItems retrieves the indexes for items in table.
getItems = func(n int) []uint64 {
length := len(items)
if length == 0 {
return nil
}
var ret []uint64
index := rand.Intn(length)
for i := index; len(ret) < n && i < length; i++ {
ret = append(ret, items[i])
}
return ret
}
// addItems appends the given length items into the table.
addItems = func(n int) []uint64 {
var first = deleted
if len(items) != 0 {
first = items[len(items)-1] + 1
}
var ret []uint64
for i := 0; i < n; i++ {
ret = append(ret, first+uint64(i))
}
items = append(items, ret...)
return ret
}
)
var steps randTest
for i := 0; i < size; i++ {
step := randTestStep{op: r.Intn(opMax)}
switch step.op {
case opReload, opCheckAll:
case opAppend:
num := r.Intn(3)
step.items = addItems(num)
if len(step.items) == 0 {
step.blobs = nil
} else {
step.blobs = getVals(step.items[0], num)
}
case opRetrieve:
step.items = getItems(r.Intn(3))
case opTruncateHead:
if len(items) == 0 {
step.target = deleted
} else {
index := r.Intn(len(items))
items = items[:index]
step.target = deleted + uint64(index)
}
case opTruncateHeadAll:
step.target = deleted
items = items[:0]
case opTruncateTail:
if len(items) == 0 {
step.target = deleted
} else {
index := r.Intn(len(items))
items = items[index:]
deleted += uint64(index)
step.target = deleted
}
case opTruncateTailAll:
step.target = deleted + uint64(len(items))
items = items[:0]
deleted = step.target
}
steps = append(steps, step)
}
return reflect.ValueOf(steps)
}
func runRandTest(rt randTest) bool {
fname := fmt.Sprintf("randtest-%d", rand.Uint64())
f, err := newTable(os.TempDir(), fname, metrics.NewMeter(), metrics.NewMeter(), metrics.NewGauge(), 50, true, false)
if err != nil {
panic("failed to initialize table")
}
var values [][]byte
for i, step := range rt {
switch step.op {
case opReload:
f.Close()
f, err = newTable(os.TempDir(), fname, metrics.NewMeter(), metrics.NewMeter(), metrics.NewGauge(), 50, true, false)
if err != nil {
rt[i].err = fmt.Errorf("failed to reload table %v", err)
}
case opCheckAll:
tail := atomic.LoadUint64(&f.itemHidden)
head := atomic.LoadUint64(&f.items)
if tail == head {
continue
}
got, err := f.RetrieveItems(atomic.LoadUint64(&f.itemHidden), head-tail, 100000)
if err != nil {
rt[i].err = err
} else {
if !reflect.DeepEqual(got, values) {
rt[i].err = fmt.Errorf("mismatch on retrieved values %v %v", got, values)
}
}
case opAppend:
batch := f.newBatch()
for i := 0; i < len(step.items); i++ {
batch.AppendRaw(step.items[i], step.blobs[i])
}
batch.commit()
values = append(values, step.blobs...)
case opRetrieve:
var blobs [][]byte
if len(step.items) == 0 {
continue
}
tail := atomic.LoadUint64(&f.itemHidden)
for i := 0; i < len(step.items); i++ {
blobs = append(blobs, values[step.items[i]-tail])
}
got, err := f.RetrieveItems(step.items[0], uint64(len(step.items)), 100000)
if err != nil {
rt[i].err = err
} else {
if !reflect.DeepEqual(got, blobs) {
rt[i].err = fmt.Errorf("mismatch on retrieved values %v %v %v", got, blobs, step.items)
}
}
case opTruncateHead:
f.truncateHead(step.target)
length := atomic.LoadUint64(&f.items) - atomic.LoadUint64(&f.itemHidden)
values = values[:length]
case opTruncateHeadAll:
f.truncateHead(step.target)
values = nil
case opTruncateTail:
prev := atomic.LoadUint64(&f.itemHidden)
f.truncateTail(step.target)
truncated := atomic.LoadUint64(&f.itemHidden) - prev
values = values[truncated:]
case opTruncateTailAll:
f.truncateTail(step.target)
values = nil
}
// Abort the test on error.
if rt[i].err != nil {
return false
}
}
f.Close()
return true
}
func TestRandom(t *testing.T) {
if err := quick.Check(runRandTest, nil); err != nil {
if cerr, ok := err.(*quick.CheckError); ok {
t.Fatalf("random test iteration %d failed: %s", cerr.Count, spew.Sdump(cerr.In))
}
t.Fatal(err)
}
}

View File

@ -186,7 +186,7 @@ func TestFreezerConcurrentModifyRetrieve(t *testing.T) {
wg.Wait() wg.Wait()
} }
// This test runs ModifyAncients and TruncateAncients concurrently with each other. // This test runs ModifyAncients and TruncateHead concurrently with each other.
func TestFreezerConcurrentModifyTruncate(t *testing.T) { func TestFreezerConcurrentModifyTruncate(t *testing.T) {
f, dir := newFreezerForTesting(t, freezerTestTableDef) f, dir := newFreezerForTesting(t, freezerTestTableDef)
defer os.RemoveAll(dir) defer os.RemoveAll(dir)
@ -196,7 +196,7 @@ func TestFreezerConcurrentModifyTruncate(t *testing.T) {
for i := 0; i < 1000; i++ { for i := 0; i < 1000; i++ {
// First reset and write 100 items. // First reset and write 100 items.
if err := f.TruncateAncients(0); err != nil { if err := f.TruncateHead(0); err != nil {
t.Fatal("truncate failed:", err) t.Fatal("truncate failed:", err)
} }
_, err := f.ModifyAncients(func(op ethdb.AncientWriteOp) error { _, err := f.ModifyAncients(func(op ethdb.AncientWriteOp) error {
@ -231,7 +231,7 @@ func TestFreezerConcurrentModifyTruncate(t *testing.T) {
wg.Done() wg.Done()
}() }()
go func() { go func() {
truncateErr = f.TruncateAncients(10) truncateErr = f.TruncateHead(10)
wg.Done() wg.Done()
}() }()
go func() { go func() {

120
core/rawdb/freezer_utils.go Normal file
View File

@ -0,0 +1,120 @@
// Copyright 2022 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package rawdb
import (
"io"
"io/ioutil"
"os"
"path/filepath"
)
// copyFrom copies data from 'srcPath' at offset 'offset' into 'destPath'.
// The 'destPath' is created if it doesn't exist, otherwise it is overwritten.
// Before the copy is executed, there is a callback can be registered to
// manipulate the dest file.
// It is perfectly valid to have destPath == srcPath.
func copyFrom(srcPath, destPath string, offset uint64, before func(f *os.File) error) error {
// Create a temp file in the same dir where we want it to wind up
f, err := ioutil.TempFile(filepath.Dir(destPath), "*")
if err != nil {
return err
}
fname := f.Name()
// Clean up the leftover file
defer func() {
if f != nil {
f.Close()
}
os.Remove(fname)
}()
// Apply the given function if it's not nil before we copy
// the content from the src.
if before != nil {
if err := before(f); err != nil {
return err
}
}
// Open the source file
src, err := os.Open(srcPath)
if err != nil {
return err
}
if _, err = src.Seek(int64(offset), 0); err != nil {
src.Close()
return err
}
// io.Copy uses 32K buffer internally.
_, err = io.Copy(f, src)
if err != nil {
src.Close()
return err
}
// Rename the temporary file to the specified dest name.
// src may be same as dest, so needs to be closed before
// we do the final move.
src.Close()
if err := f.Close(); err != nil {
return err
}
f = nil
if err := os.Rename(fname, destPath); err != nil {
return err
}
return nil
}
// openFreezerFileForAppend opens a freezer table file and seeks to the end
func openFreezerFileForAppend(filename string) (*os.File, error) {
// Open the file without the O_APPEND flag
// because it has differing behaviour during Truncate operations
// on different OS's
file, err := os.OpenFile(filename, os.O_RDWR|os.O_CREATE, 0644)
if err != nil {
return nil, err
}
// Seek to end for append
if _, err = file.Seek(0, io.SeekEnd); err != nil {
return nil, err
}
return file, nil
}
// openFreezerFileForReadOnly opens a freezer table file for read only access
func openFreezerFileForReadOnly(filename string) (*os.File, error) {
return os.OpenFile(filename, os.O_RDONLY, 0644)
}
// openFreezerFileTruncated opens a freezer table making sure it is truncated
func openFreezerFileTruncated(filename string) (*os.File, error) {
return os.OpenFile(filename, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0644)
}
// truncateFreezerFile resizes a freezer table file and seeks to the end
func truncateFreezerFile(file *os.File, size int64) error {
if err := file.Truncate(size); err != nil {
return err
}
// Seek to end for append
if _, err := file.Seek(0, io.SeekEnd); err != nil {
return err
}
return nil
}

View File

@ -0,0 +1,76 @@
// Copyright 2022 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package rawdb
import (
"bytes"
"io/ioutil"
"os"
"testing"
)
func TestCopyFrom(t *testing.T) {
var (
content = []byte{0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8}
prefix = []byte{0x9, 0xa, 0xb, 0xc, 0xd, 0xf}
)
var cases = []struct {
src, dest string
offset uint64
writePrefix bool
}{
{"foo", "bar", 0, false},
{"foo", "bar", 1, false},
{"foo", "bar", 8, false},
{"foo", "foo", 0, false},
{"foo", "foo", 1, false},
{"foo", "foo", 8, false},
{"foo", "bar", 0, true},
{"foo", "bar", 1, true},
{"foo", "bar", 8, true},
}
for _, c := range cases {
ioutil.WriteFile(c.src, content, 0644)
if err := copyFrom(c.src, c.dest, c.offset, func(f *os.File) error {
if !c.writePrefix {
return nil
}
f.Write(prefix)
return nil
}); err != nil {
os.Remove(c.src)
t.Fatalf("Failed to copy %v", err)
}
blob, err := ioutil.ReadFile(c.dest)
if err != nil {
os.Remove(c.src)
os.Remove(c.dest)
t.Fatalf("Failed to read %v", err)
}
want := content[c.offset:]
if c.writePrefix {
want = append(prefix, want...)
}
if !bytes.Equal(blob, want) {
t.Fatal("Unexpected value")
}
os.Remove(c.src)
os.Remove(c.dest)
}
}

View File

@ -74,6 +74,12 @@ func (t *table) Ancients() (uint64, error) {
return t.db.Ancients() return t.db.Ancients()
} }
// Tail is a noop passthrough that just forwards the request to the underlying
// database.
func (t *table) Tail() (uint64, error) {
return t.db.Tail()
}
// AncientSize is a noop passthrough that just forwards the request to the underlying // AncientSize is a noop passthrough that just forwards the request to the underlying
// database. // database.
func (t *table) AncientSize(kind string) (uint64, error) { func (t *table) AncientSize(kind string) (uint64, error) {
@ -89,10 +95,16 @@ func (t *table) ReadAncients(fn func(reader ethdb.AncientReader) error) (err err
return t.db.ReadAncients(fn) return t.db.ReadAncients(fn)
} }
// TruncateAncients is a noop passthrough that just forwards the request to the underlying // TruncateHead is a noop passthrough that just forwards the request to the underlying
// database. // database.
func (t *table) TruncateAncients(items uint64) error { func (t *table) TruncateHead(items uint64) error {
return t.db.TruncateAncients(items) return t.db.TruncateHead(items)
}
// TruncateTail is a noop passthrough that just forwards the request to the underlying
// database.
func (t *table) TruncateTail(items uint64) error {
return t.db.TruncateTail(items)
} }
// Sync is a noop passthrough that just forwards the request to the underlying // Sync is a noop passthrough that just forwards the request to the underlying

View File

@ -87,6 +87,10 @@ type AncientReader interface {
// Ancients returns the ancient item numbers in the ancient store. // Ancients returns the ancient item numbers in the ancient store.
Ancients() (uint64, error) Ancients() (uint64, error)
// Tail returns the number of first stored item in the freezer.
// This number can also be interpreted as the total deleted item numbers.
Tail() (uint64, error)
// AncientSize returns the ancient size of the specified category. // AncientSize returns the ancient size of the specified category.
AncientSize(kind string) (uint64, error) AncientSize(kind string) (uint64, error)
} }
@ -107,8 +111,16 @@ type AncientWriter interface {
// The integer return value is the total size of the written data. // The integer return value is the total size of the written data.
ModifyAncients(func(AncientWriteOp) error) (int64, error) ModifyAncients(func(AncientWriteOp) error) (int64, error)
// TruncateAncients discards all but the first n ancient data from the ancient store. // TruncateHead discards all but the first n ancient data from the ancient store.
TruncateAncients(n uint64) error // After the truncation, the latest item can be accessed it item_n-1(start from 0).
TruncateHead(n uint64) error
// TruncateTail discards the first n ancient data from the ancient store. The already
// deleted items are ignored. After the truncation, the earliest item can be accessed
// is item_n(start from 0). The deleted items may not be removed from the ancient store
// immediately, but only when the accumulated deleted data reach the threshold then
// will be removed all together.
TruncateTail(n uint64) error
// Sync flushes all in-memory ancient store data to disk. // Sync flushes all in-memory ancient store data to disk.
Sync() error Sync() error