* cmd, core, tests: initial state pruner core: fix db inspector cmd/geth: add verify-state cmd/geth: add verification tool core/rawdb: implement flatdb cmd, core: fix rebase core/state: use new contract code layout core/state/pruner: avoid deleting genesis state cmd/geth: add helper function core, cmd: fix extract genesis core: minor fixes contracts: remove useless core/state/snapshot: plugin stacktrie core: polish core/state/snapshot: iterate storage concurrently core/state/snapshot: fix iteration core: add comments core/state/snapshot: polish code core/state: polish core/state/snapshot: rebase core/rawdb: add comments core/rawdb: fix tests core/rawdb: improve tests core/state/snapshot: fix concurrent iteration core/state: run pruning during the recovery core, trie: implement martin's idea core, eth: delete flatdb and polish pruner trie: fix import core/state/pruner: add log core/state/pruner: fix issues core/state/pruner: don't read back core/state/pruner: fix contract code write core/state/pruner: check root node presence cmd, core: polish log core/state: use HEAD-127 as the target core/state/snapshot: improve tests cmd/geth: fix verification tool cmd/geth: use HEAD as the verification default target all: replace the bloomfilter with martin's fork cmd, core: polish code core, cmd: forcibly delete state root core/state/pruner: add hash64 core/state/pruner: fix blacklist core/state: remove blacklist cmd, core: delete trie clean cache before pruning cmd, core: fix lint cmd, core: fix rebase core/state: fix the special case for clique networks core/state/snapshot: remove useless code core/state/pruner: capping the snapshot after pruning cmd, core, eth: fixes core/rawdb: update db inspector cmd/geth: polish code core/state/pruner: fsync bloom filter cmd, core: print warning log core/state/pruner: adjust the parameters for bloom filter cmd, core: create the bloom filter by size core: polish core/state/pruner: sanitize invalid bloomfilter size cmd: address comments cmd/geth: address comments cmd/geth: address comment core/state/pruner: address comments core/state/pruner: rename homedir to datadir cmd, core: address comments core/state/pruner: address comment core/state: address comments core, cmd, tests: address comments core: address comments core/state/pruner: release the iterator after each commit core/state/pruner: improve pruner cmd, core: adjust bloom paramters core/state/pruner: fix lint core/state/pruner: fix tests core: fix rebase core/state/pruner: remove atomic rename core/state/pruner: address comments all: run go mod tidy core/state/pruner: avoid false-positive for the middle state roots core/state/pruner: add checks for middle roots cmd/geth: replace crit with error * core/state/pruner: fix lint * core: drop legacy bloom filter * core/state/snapshot: improve pruner * core/state/snapshot: polish concurrent logs to report ETA vs. hashes * core/state/pruner: add progress report for pruning and compaction too * core: fix snapshot test API * core/state: fix some pruning logs * core/state/pruner: support recovering from bloom flush fail Co-authored-by: Péter Szilágyi <peterke@gmail.com>
		
			
				
	
	
		
			376 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			376 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| // Copyright 2020 The go-ethereum Authors
 | |
| // This file is part of the go-ethereum library.
 | |
| //
 | |
| // The go-ethereum library is free software: you can redistribute it and/or modify
 | |
| // it under the terms of the GNU Lesser General Public License as published by
 | |
| // the Free Software Foundation, either version 3 of the License, or
 | |
| // (at your option) any later version.
 | |
| //
 | |
| // The go-ethereum library is distributed in the hope that it will be useful,
 | |
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 | |
| // GNU Lesser General Public License for more details.
 | |
| //
 | |
| // You should have received a copy of the GNU Lesser General Public License
 | |
| // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
 | |
| 
 | |
| package snapshot
 | |
| 
 | |
| import (
 | |
| 	"bytes"
 | |
| 	"encoding/binary"
 | |
| 	"errors"
 | |
| 	"fmt"
 | |
| 	"math"
 | |
| 	"runtime"
 | |
| 	"sync"
 | |
| 	"time"
 | |
| 
 | |
| 	"github.com/ethereum/go-ethereum/common"
 | |
| 	"github.com/ethereum/go-ethereum/core/rawdb"
 | |
| 	"github.com/ethereum/go-ethereum/ethdb"
 | |
| 	"github.com/ethereum/go-ethereum/log"
 | |
| 	"github.com/ethereum/go-ethereum/rlp"
 | |
| 	"github.com/ethereum/go-ethereum/trie"
 | |
| )
 | |
| 
 | |
| // trieKV represents a trie key-value pair
 | |
| type trieKV struct {
 | |
| 	key   common.Hash
 | |
| 	value []byte
 | |
| }
 | |
| 
 | |
| type (
 | |
| 	// trieGeneratorFn is the interface of trie generation which can
 | |
| 	// be implemented by different trie algorithm.
 | |
| 	trieGeneratorFn func(db ethdb.KeyValueWriter, in chan (trieKV), out chan (common.Hash))
 | |
| 
 | |
| 	// leafCallbackFn is the callback invoked at the leaves of the trie,
 | |
| 	// returns the subtrie root with the specified subtrie identifier.
 | |
| 	leafCallbackFn func(db ethdb.KeyValueWriter, accountHash, codeHash common.Hash, stat *generateStats) (common.Hash, error)
 | |
| )
 | |
| 
 | |
| // GenerateAccountTrieRoot takes an account iterator and reproduces the root hash.
 | |
| func GenerateAccountTrieRoot(it AccountIterator) (common.Hash, error) {
 | |
| 	return generateTrieRoot(nil, it, common.Hash{}, stackTrieGenerate, nil, newGenerateStats(), true)
 | |
| }
 | |
| 
 | |
| // GenerateStorageTrieRoot takes a storage iterator and reproduces the root hash.
 | |
| func GenerateStorageTrieRoot(account common.Hash, it StorageIterator) (common.Hash, error) {
 | |
| 	return generateTrieRoot(nil, it, account, stackTrieGenerate, nil, newGenerateStats(), true)
 | |
| }
 | |
| 
 | |
| // GenerateTrie takes the whole snapshot tree as the input, traverses all the
 | |
| // accounts as well as the corresponding storages and regenerate the whole state
 | |
| // (account trie + all storage tries).
 | |
| func GenerateTrie(snaptree *Tree, root common.Hash, src ethdb.Database, dst ethdb.KeyValueWriter) error {
 | |
| 	// Traverse all state by snapshot, re-generate the whole state trie
 | |
| 	acctIt, err := snaptree.AccountIterator(root, common.Hash{})
 | |
| 	if err != nil {
 | |
| 		return err // The required snapshot might not exist.
 | |
| 	}
 | |
| 	defer acctIt.Release()
 | |
| 
 | |
| 	got, err := generateTrieRoot(dst, acctIt, common.Hash{}, stackTrieGenerate, func(dst ethdb.KeyValueWriter, accountHash, codeHash common.Hash, stat *generateStats) (common.Hash, error) {
 | |
| 		// Migrate the code first, commit the contract code into the tmp db.
 | |
| 		if codeHash != emptyCode {
 | |
| 			code := rawdb.ReadCode(src, codeHash)
 | |
| 			if len(code) == 0 {
 | |
| 				return common.Hash{}, errors.New("failed to read contract code")
 | |
| 			}
 | |
| 			rawdb.WriteCode(dst, codeHash, code)
 | |
| 		}
 | |
| 		// Then migrate all storage trie nodes into the tmp db.
 | |
| 		storageIt, err := snaptree.StorageIterator(root, accountHash, common.Hash{})
 | |
| 		if err != nil {
 | |
| 			return common.Hash{}, err
 | |
| 		}
 | |
| 		defer storageIt.Release()
 | |
| 
 | |
| 		hash, err := generateTrieRoot(dst, storageIt, accountHash, stackTrieGenerate, nil, stat, false)
 | |
| 		if err != nil {
 | |
| 			return common.Hash{}, err
 | |
| 		}
 | |
| 		return hash, nil
 | |
| 	}, newGenerateStats(), true)
 | |
| 
 | |
| 	if err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 	if got != root {
 | |
| 		return fmt.Errorf("state root hash mismatch: got %x, want %x", got, root)
 | |
| 	}
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // generateStats is a collection of statistics gathered by the trie generator
 | |
| // for logging purposes.
 | |
| type generateStats struct {
 | |
| 	head  common.Hash
 | |
| 	start time.Time
 | |
| 
 | |
| 	accounts uint64 // Number of accounts done (including those being crawled)
 | |
| 	slots    uint64 // Number of storage slots done (including those being crawled)
 | |
| 
 | |
| 	slotsStart map[common.Hash]time.Time   // Start time for account slot crawling
 | |
| 	slotsHead  map[common.Hash]common.Hash // Slot head for accounts being crawled
 | |
| 
 | |
| 	lock sync.RWMutex
 | |
| }
 | |
| 
 | |
| // newGenerateStats creates a new generator stats.
 | |
| func newGenerateStats() *generateStats {
 | |
| 	return &generateStats{
 | |
| 		slotsStart: make(map[common.Hash]time.Time),
 | |
| 		slotsHead:  make(map[common.Hash]common.Hash),
 | |
| 		start:      time.Now(),
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // progressAccounts updates the generator stats for the account range.
 | |
| func (stat *generateStats) progressAccounts(account common.Hash, done uint64) {
 | |
| 	stat.lock.Lock()
 | |
| 	defer stat.lock.Unlock()
 | |
| 
 | |
| 	stat.accounts += done
 | |
| 	stat.head = account
 | |
| }
 | |
| 
 | |
| // finishAccounts updates the gemerator stats for the finished account range.
 | |
| func (stat *generateStats) finishAccounts(done uint64) {
 | |
| 	stat.lock.Lock()
 | |
| 	defer stat.lock.Unlock()
 | |
| 
 | |
| 	stat.accounts += done
 | |
| }
 | |
| 
 | |
| // progressContract updates the generator stats for a specific in-progress contract.
 | |
| func (stat *generateStats) progressContract(account common.Hash, slot common.Hash, done uint64) {
 | |
| 	stat.lock.Lock()
 | |
| 	defer stat.lock.Unlock()
 | |
| 
 | |
| 	stat.slots += done
 | |
| 	stat.slotsHead[account] = slot
 | |
| 	if _, ok := stat.slotsStart[account]; !ok {
 | |
| 		stat.slotsStart[account] = time.Now()
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // finishContract updates the generator stats for a specific just-finished contract.
 | |
| func (stat *generateStats) finishContract(account common.Hash, done uint64) {
 | |
| 	stat.lock.Lock()
 | |
| 	defer stat.lock.Unlock()
 | |
| 
 | |
| 	stat.slots += done
 | |
| 	delete(stat.slotsHead, account)
 | |
| 	delete(stat.slotsStart, account)
 | |
| }
 | |
| 
 | |
| // report prints the cumulative progress statistic smartly.
 | |
| func (stat *generateStats) report() {
 | |
| 	stat.lock.RLock()
 | |
| 	defer stat.lock.RUnlock()
 | |
| 
 | |
| 	ctx := []interface{}{
 | |
| 		"accounts", stat.accounts,
 | |
| 		"slots", stat.slots,
 | |
| 		"elapsed", common.PrettyDuration(time.Since(stat.start)),
 | |
| 	}
 | |
| 	if stat.accounts > 0 {
 | |
| 		// If there's progress on the account trie, estimate the time to finish crawling it
 | |
| 		if done := binary.BigEndian.Uint64(stat.head[:8]) / stat.accounts; done > 0 {
 | |
| 			var (
 | |
| 				left  = (math.MaxUint64 - binary.BigEndian.Uint64(stat.head[:8])) / stat.accounts
 | |
| 				speed = done/uint64(time.Since(stat.start)/time.Millisecond+1) + 1 // +1s to avoid division by zero
 | |
| 				eta   = time.Duration(left/speed) * time.Millisecond
 | |
| 			)
 | |
| 			// If there are large contract crawls in progress, estimate their finish time
 | |
| 			for acc, head := range stat.slotsHead {
 | |
| 				start := stat.slotsStart[acc]
 | |
| 				if done := binary.BigEndian.Uint64(head[:8]); done > 0 {
 | |
| 					var (
 | |
| 						left  = math.MaxUint64 - binary.BigEndian.Uint64(head[:8])
 | |
| 						speed = done/uint64(time.Since(start)/time.Millisecond+1) + 1 // +1s to avoid division by zero
 | |
| 					)
 | |
| 					// Override the ETA if larger than the largest until now
 | |
| 					if slotETA := time.Duration(left/speed) * time.Millisecond; eta < slotETA {
 | |
| 						eta = slotETA
 | |
| 					}
 | |
| 				}
 | |
| 			}
 | |
| 			ctx = append(ctx, []interface{}{
 | |
| 				"eta", common.PrettyDuration(eta),
 | |
| 			}...)
 | |
| 		}
 | |
| 	}
 | |
| 	log.Info("Iterating state snapshot", ctx...)
 | |
| }
 | |
| 
 | |
| // reportDone prints the last log when the whole generation is finished.
 | |
| func (stat *generateStats) reportDone() {
 | |
| 	stat.lock.RLock()
 | |
| 	defer stat.lock.RUnlock()
 | |
| 
 | |
| 	var ctx []interface{}
 | |
| 	ctx = append(ctx, []interface{}{"accounts", stat.accounts}...)
 | |
| 	if stat.slots != 0 {
 | |
| 		ctx = append(ctx, []interface{}{"slots", stat.slots}...)
 | |
| 	}
 | |
| 	ctx = append(ctx, []interface{}{"elapsed", common.PrettyDuration(time.Since(stat.start))}...)
 | |
| 	log.Info("Iterated snapshot", ctx...)
 | |
| }
 | |
| 
 | |
| // runReport periodically prints the progress information.
 | |
| func runReport(stats *generateStats, stop chan bool) {
 | |
| 	timer := time.NewTimer(0)
 | |
| 	defer timer.Stop()
 | |
| 
 | |
| 	for {
 | |
| 		select {
 | |
| 		case <-timer.C:
 | |
| 			stats.report()
 | |
| 			timer.Reset(time.Second * 8)
 | |
| 		case success := <-stop:
 | |
| 			if success {
 | |
| 				stats.reportDone()
 | |
| 			}
 | |
| 			return
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // generateTrieRoot generates the trie hash based on the snapshot iterator.
 | |
| // It can be used for generating account trie, storage trie or even the
 | |
| // whole state which connects the accounts and the corresponding storages.
 | |
| func generateTrieRoot(db ethdb.KeyValueWriter, it Iterator, account common.Hash, generatorFn trieGeneratorFn, leafCallback leafCallbackFn, stats *generateStats, report bool) (common.Hash, error) {
 | |
| 	var (
 | |
| 		in      = make(chan trieKV)         // chan to pass leaves
 | |
| 		out     = make(chan common.Hash, 1) // chan to collect result
 | |
| 		stoplog = make(chan bool, 1)        // 1-size buffer, works when logging is not enabled
 | |
| 		wg      sync.WaitGroup
 | |
| 	)
 | |
| 	// Spin up a go-routine for trie hash re-generation
 | |
| 	wg.Add(1)
 | |
| 	go func() {
 | |
| 		defer wg.Done()
 | |
| 		generatorFn(db, in, out)
 | |
| 	}()
 | |
| 	// Spin up a go-routine for progress logging
 | |
| 	if report && stats != nil {
 | |
| 		wg.Add(1)
 | |
| 		go func() {
 | |
| 			defer wg.Done()
 | |
| 			runReport(stats, stoplog)
 | |
| 		}()
 | |
| 	}
 | |
| 	// Create a semaphore to assign tasks and collect results through. We'll pre-
 | |
| 	// fill it with nils, thus using the same channel for both limiting concurrent
 | |
| 	// processing and gathering results.
 | |
| 	threads := runtime.NumCPU()
 | |
| 	results := make(chan error, threads)
 | |
| 	for i := 0; i < threads; i++ {
 | |
| 		results <- nil // fill the semaphore
 | |
| 	}
 | |
| 	// stop is a helper function to shutdown the background threads
 | |
| 	// and return the re-generated trie hash.
 | |
| 	stop := func(fail error) (common.Hash, error) {
 | |
| 		close(in)
 | |
| 		result := <-out
 | |
| 		for i := 0; i < threads; i++ {
 | |
| 			if err := <-results; err != nil && fail == nil {
 | |
| 				fail = err
 | |
| 			}
 | |
| 		}
 | |
| 		stoplog <- fail == nil
 | |
| 
 | |
| 		wg.Wait()
 | |
| 		return result, fail
 | |
| 	}
 | |
| 	var (
 | |
| 		logged    = time.Now()
 | |
| 		processed = uint64(0)
 | |
| 		leaf      trieKV
 | |
| 	)
 | |
| 	// Start to feed leaves
 | |
| 	for it.Next() {
 | |
| 		if account == (common.Hash{}) {
 | |
| 			var (
 | |
| 				err      error
 | |
| 				fullData []byte
 | |
| 			)
 | |
| 			if leafCallback == nil {
 | |
| 				fullData, err = FullAccountRLP(it.(AccountIterator).Account())
 | |
| 				if err != nil {
 | |
| 					return stop(err)
 | |
| 				}
 | |
| 			} else {
 | |
| 				// Wait until the semaphore allows us to continue, aborting if
 | |
| 				// a sub-task failed
 | |
| 				if err := <-results; err != nil {
 | |
| 					results <- nil // stop will drain the results, add a noop back for this error we just consumed
 | |
| 					return stop(err)
 | |
| 				}
 | |
| 				// Fetch the next account and process it concurrently
 | |
| 				account, err := FullAccount(it.(AccountIterator).Account())
 | |
| 				if err != nil {
 | |
| 					return stop(err)
 | |
| 				}
 | |
| 				go func(hash common.Hash) {
 | |
| 					subroot, err := leafCallback(db, hash, common.BytesToHash(account.CodeHash), stats)
 | |
| 					if err != nil {
 | |
| 						results <- err
 | |
| 						return
 | |
| 					}
 | |
| 					if !bytes.Equal(account.Root, subroot.Bytes()) {
 | |
| 						results <- fmt.Errorf("invalid subroot(%x), want %x, got %x", it.Hash(), account.Root, subroot)
 | |
| 						return
 | |
| 					}
 | |
| 					results <- nil
 | |
| 				}(it.Hash())
 | |
| 				fullData, err = rlp.EncodeToBytes(account)
 | |
| 				if err != nil {
 | |
| 					return stop(err)
 | |
| 				}
 | |
| 			}
 | |
| 			leaf = trieKV{it.Hash(), fullData}
 | |
| 		} else {
 | |
| 			leaf = trieKV{it.Hash(), common.CopyBytes(it.(StorageIterator).Slot())}
 | |
| 		}
 | |
| 		in <- leaf
 | |
| 
 | |
| 		// Accumulate the generation statistic if it's required.
 | |
| 		processed++
 | |
| 		if time.Since(logged) > 3*time.Second && stats != nil {
 | |
| 			if account == (common.Hash{}) {
 | |
| 				stats.progressAccounts(it.Hash(), processed)
 | |
| 			} else {
 | |
| 				stats.progressContract(account, it.Hash(), processed)
 | |
| 			}
 | |
| 			logged, processed = time.Now(), 0
 | |
| 		}
 | |
| 	}
 | |
| 	// Commit the last part statistic.
 | |
| 	if processed > 0 && stats != nil {
 | |
| 		if account == (common.Hash{}) {
 | |
| 			stats.finishAccounts(processed)
 | |
| 		} else {
 | |
| 			stats.finishContract(account, processed)
 | |
| 		}
 | |
| 	}
 | |
| 	return stop(nil)
 | |
| }
 | |
| 
 | |
| func stackTrieGenerate(db ethdb.KeyValueWriter, in chan trieKV, out chan common.Hash) {
 | |
| 	t := trie.NewStackTrie(db)
 | |
| 	for leaf := range in {
 | |
| 		t.TryUpdate(leaf.key[:], leaf.value)
 | |
| 	}
 | |
| 	var root common.Hash
 | |
| 	if db == nil {
 | |
| 		root = t.Hash()
 | |
| 	} else {
 | |
| 		root, _ = t.Commit()
 | |
| 	}
 | |
| 	out <- root
 | |
| }
 |