f566dd305e
* cmd, core, tests: initial state pruner core: fix db inspector cmd/geth: add verify-state cmd/geth: add verification tool core/rawdb: implement flatdb cmd, core: fix rebase core/state: use new contract code layout core/state/pruner: avoid deleting genesis state cmd/geth: add helper function core, cmd: fix extract genesis core: minor fixes contracts: remove useless core/state/snapshot: plugin stacktrie core: polish core/state/snapshot: iterate storage concurrently core/state/snapshot: fix iteration core: add comments core/state/snapshot: polish code core/state: polish core/state/snapshot: rebase core/rawdb: add comments core/rawdb: fix tests core/rawdb: improve tests core/state/snapshot: fix concurrent iteration core/state: run pruning during the recovery core, trie: implement martin's idea core, eth: delete flatdb and polish pruner trie: fix import core/state/pruner: add log core/state/pruner: fix issues core/state/pruner: don't read back core/state/pruner: fix contract code write core/state/pruner: check root node presence cmd, core: polish log core/state: use HEAD-127 as the target core/state/snapshot: improve tests cmd/geth: fix verification tool cmd/geth: use HEAD as the verification default target all: replace the bloomfilter with martin's fork cmd, core: polish code core, cmd: forcibly delete state root core/state/pruner: add hash64 core/state/pruner: fix blacklist core/state: remove blacklist cmd, core: delete trie clean cache before pruning cmd, core: fix lint cmd, core: fix rebase core/state: fix the special case for clique networks core/state/snapshot: remove useless code core/state/pruner: capping the snapshot after pruning cmd, core, eth: fixes core/rawdb: update db inspector cmd/geth: polish code core/state/pruner: fsync bloom filter cmd, core: print warning log core/state/pruner: adjust the parameters for bloom filter cmd, core: create the bloom filter by size core: polish core/state/pruner: sanitize invalid bloomfilter size cmd: address comments cmd/geth: address comments cmd/geth: address comment core/state/pruner: address comments core/state/pruner: rename homedir to datadir cmd, core: address comments core/state/pruner: address comment core/state: address comments core, cmd, tests: address comments core: address comments core/state/pruner: release the iterator after each commit core/state/pruner: improve pruner cmd, core: adjust bloom paramters core/state/pruner: fix lint core/state/pruner: fix tests core: fix rebase core/state/pruner: remove atomic rename core/state/pruner: address comments all: run go mod tidy core/state/pruner: avoid false-positive for the middle state roots core/state/pruner: add checks for middle roots cmd/geth: replace crit with error * core/state/pruner: fix lint * core: drop legacy bloom filter * core/state/snapshot: improve pruner * core/state/snapshot: polish concurrent logs to report ETA vs. hashes * core/state/pruner: add progress report for pruning and compaction too * core: fix snapshot test API * core/state: fix some pruning logs * core/state/pruner: support recovering from bloom flush fail Co-authored-by: Péter Szilágyi <peterke@gmail.com>
133 lines
5.2 KiB
Go
133 lines
5.2 KiB
Go
// Copyright 2020 The go-ethereum Authors
|
|
// This file is part of the go-ethereum library.
|
|
//
|
|
// The go-ethereum library is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU Lesser General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// The go-ethereum library is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU Lesser General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Lesser General Public License
|
|
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
package pruner
|
|
|
|
import (
|
|
"encoding/binary"
|
|
"errors"
|
|
"os"
|
|
|
|
"github.com/ethereum/go-ethereum/common"
|
|
"github.com/ethereum/go-ethereum/core/rawdb"
|
|
"github.com/ethereum/go-ethereum/log"
|
|
bloomfilter "github.com/holiman/bloomfilter/v2"
|
|
)
|
|
|
|
// stateBloomHasher is a wrapper around a byte blob to satisfy the interface API
|
|
// requirements of the bloom library used. It's used to convert a trie hash or
|
|
// contract code hash into a 64 bit mini hash.
|
|
type stateBloomHasher []byte
|
|
|
|
func (f stateBloomHasher) Write(p []byte) (n int, err error) { panic("not implemented") }
|
|
func (f stateBloomHasher) Sum(b []byte) []byte { panic("not implemented") }
|
|
func (f stateBloomHasher) Reset() { panic("not implemented") }
|
|
func (f stateBloomHasher) BlockSize() int { panic("not implemented") }
|
|
func (f stateBloomHasher) Size() int { return 8 }
|
|
func (f stateBloomHasher) Sum64() uint64 { return binary.BigEndian.Uint64(f) }
|
|
|
|
// stateBloom is a bloom filter used during the state convesion(snapshot->state).
|
|
// The keys of all generated entries will be recorded here so that in the pruning
|
|
// stage the entries belong to the specific version can be avoided for deletion.
|
|
//
|
|
// The false-positive is allowed here. The "false-positive" entries means they
|
|
// actually don't belong to the specific version but they are not deleted in the
|
|
// pruning. The downside of the false-positive allowance is we may leave some "dangling"
|
|
// nodes in the disk. But in practice the it's very unlike the dangling node is
|
|
// state root. So in theory this pruned state shouldn't be visited anymore. Another
|
|
// potential issue is for fast sync. If we do another fast sync upon the pruned
|
|
// database, it's problematic which will stop the expansion during the syncing.
|
|
// TODO address it @rjl493456442 @holiman @karalabe.
|
|
//
|
|
// After the entire state is generated, the bloom filter should be persisted into
|
|
// the disk. It indicates the whole generation procedure is finished.
|
|
type stateBloom struct {
|
|
bloom *bloomfilter.Filter
|
|
}
|
|
|
|
// newStateBloomWithSize creates a brand new state bloom for state generation.
|
|
// The bloom filter will be created by the passing bloom filter size. According
|
|
// to the https://hur.st/bloomfilter/?n=600000000&p=&m=2048MB&k=4, the parameters
|
|
// are picked so that the false-positive rate for mainnet is low enough.
|
|
func newStateBloomWithSize(size uint64) (*stateBloom, error) {
|
|
bloom, err := bloomfilter.New(size*1024*1024*8, 4)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
log.Info("Initialized state bloom", "size", common.StorageSize(float64(bloom.M()/8)))
|
|
return &stateBloom{bloom: bloom}, nil
|
|
}
|
|
|
|
// NewStateBloomFromDisk loads the state bloom from the given file.
|
|
// In this case the assumption is held the bloom filter is complete.
|
|
func NewStateBloomFromDisk(filename string) (*stateBloom, error) {
|
|
bloom, _, err := bloomfilter.ReadFile(filename)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &stateBloom{bloom: bloom}, nil
|
|
}
|
|
|
|
// Commit flushes the bloom filter content into the disk and marks the bloom
|
|
// as complete.
|
|
func (bloom *stateBloom) Commit(filename, tempname string) error {
|
|
// Write the bloom out into a temporary file
|
|
_, err := bloom.bloom.WriteFile(tempname)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
// Ensure the file is synced to disk
|
|
f, err := os.Open(tempname)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if err := f.Sync(); err != nil {
|
|
f.Close()
|
|
return err
|
|
}
|
|
f.Close()
|
|
|
|
// Move the teporary file into it's final location
|
|
return os.Rename(tempname, filename)
|
|
}
|
|
|
|
// Put implements the KeyValueWriter interface. But here only the key is needed.
|
|
func (bloom *stateBloom) Put(key []byte, value []byte) error {
|
|
// If the key length is not 32bytes, ensure it's contract code
|
|
// entry with new scheme.
|
|
if len(key) != common.HashLength {
|
|
isCode, codeKey := rawdb.IsCodeKey(key)
|
|
if !isCode {
|
|
return errors.New("invalid entry")
|
|
}
|
|
bloom.bloom.Add(stateBloomHasher(codeKey))
|
|
return nil
|
|
}
|
|
bloom.bloom.Add(stateBloomHasher(key))
|
|
return nil
|
|
}
|
|
|
|
// Delete removes the key from the key-value data store.
|
|
func (bloom *stateBloom) Delete(key []byte) error { panic("not supported") }
|
|
|
|
// Contain is the wrapper of the underlying contains function which
|
|
// reports whether the key is contained.
|
|
// - If it says yes, the key may be contained
|
|
// - If it says no, the key is definitely not contained.
|
|
func (bloom *stateBloom) Contain(key []byte) (bool, error) {
|
|
return bloom.bloom.Contains(stateBloomHasher(key)), nil
|
|
}
|