ipld-eth-state-snapshot/pkg/snapshot/service.go

341 lines
9.0 KiB
Go
Raw Normal View History

2020-07-01 18:44:59 +00:00
// Copyright © 2020 Vulcanize, Inc
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package snapshot
import (
"bytes"
2020-07-01 23:07:56 +00:00
"errors"
2020-07-01 18:44:59 +00:00
"fmt"
2020-08-20 10:23:36 +00:00
"sync"
2020-07-01 18:44:59 +00:00
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/core/state"
2022-01-08 03:32:45 +00:00
"github.com/ethereum/go-ethereum/core/types"
2020-07-01 18:44:59 +00:00
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/rlp"
"github.com/ethereum/go-ethereum/trie"
"github.com/sirupsen/logrus"
2020-07-01 18:44:59 +00:00
. "github.com/vulcanize/eth-pg-ipfs-state-snapshot/pkg/types"
2022-01-08 03:32:45 +00:00
iter "github.com/vulcanize/go-eth-state-node-iterator"
2020-07-01 18:44:59 +00:00
)
var (
emptyNode, _ = rlp.EncodeToBytes([]byte{})
emptyCodeHash = crypto.Keccak256([]byte{})
2020-07-01 18:44:59 +00:00
emptyContractRoot = crypto.Keccak256Hash(emptyNode)
defaultBatchSize = uint(100)
2020-07-01 18:44:59 +00:00
)
2021-12-14 06:50:19 +00:00
// Service holds ethDB and stateDB to read data from lvldb and Publisher
// to publish trie in postgres DB.
2020-07-01 18:44:59 +00:00
type Service struct {
ethDB ethdb.Database
stateDB state.Database
ipfsPublisher Publisher
maxBatchSize uint
tracker iteratorTracker
recoveryFile string
2020-07-01 18:44:59 +00:00
}
2022-01-11 05:37:27 +00:00
func NewLevelDB(con *EthConfig) (ethdb.Database, error) {
return rawdb.NewLevelDBDatabaseWithFreezer(
con.LevelDBPath, 1024, 256, con.AncientDBPath, "eth-pg-ipfs-state-snapshot", false,
)
}
2022-01-11 05:37:27 +00:00
// NewSnapshotService creates Service.
func NewSnapshotService(edb ethdb.Database, pub Publisher, recoveryFile string) (*Service, error) {
2020-07-01 18:44:59 +00:00
return &Service{
ethDB: edb,
stateDB: state.NewDatabase(edb),
2022-01-11 00:59:26 +00:00
ipfsPublisher: pub,
maxBatchSize: defaultBatchSize,
recoveryFile: recoveryFile,
2020-07-01 18:44:59 +00:00
}, nil
}
2020-08-20 10:23:36 +00:00
type SnapshotParams struct {
2022-01-08 03:32:45 +00:00
Height uint64
Workers uint
2020-08-20 10:23:36 +00:00
}
2020-08-23 04:38:31 +00:00
func (s *Service) CreateSnapshot(params SnapshotParams) error {
// extract header from lvldb and publish to PG-IPFS
// hold onto the headerID so that we can link the state nodes to this header
2020-08-23 04:38:31 +00:00
logrus.Infof("Creating snapshot at height %d", params.Height)
hash := rawdb.ReadCanonicalHash(s.ethDB, params.Height)
header := rawdb.ReadHeader(s.ethDB, hash, params.Height)
if header == nil {
2020-08-23 04:38:31 +00:00
return fmt.Errorf("unable to read canonical header at height %d", params.Height)
}
logrus.Infof("head hash: %s head height: %d", hash.Hex(), params.Height)
err := s.ipfsPublisher.PublishHeader(header)
if err != nil {
return err
}
2020-08-23 04:38:31 +00:00
tree, err := s.stateDB.OpenTrie(header.Root)
if err != nil {
return err
}
headerID := header.Hash().String()
s.tracker = newTracker(int(params.Workers))
go s.tracker.run()
var iters []trie.NodeIterator
// attempt to restore from recovery file if it exists
iters, err = s.tracker.restore(tree, s.recoveryFile)
if err != nil {
return err
}
if iters != nil {
if params.Workers < uint(len(iters)) {
return fmt.Errorf(
"number of recovered workers (%d) is greater than number configured (%d)",
len(iters), params.Workers,
)
}
} else { // nothing to restore
if params.Workers > 1 {
iters = iter.SubtrieIterators(tree, params.Workers)
} else {
iters = []trie.NodeIterator{tree.NodeIterator(nil)}
}
for i, it := range iters {
iters[i] = s.tracker.tracked(it)
}
}
defer func() {
err := s.tracker.haltAndDump(s.recoveryFile)
if err != nil {
logrus.Error("failed to write recovery file: ", err)
}
}()
if len(iters) > 0 {
return s.createSnapshotAsync(iters, headerID)
2020-08-20 10:23:36 +00:00
} else {
return s.createSnapshot(iters[0], headerID)
2020-08-20 10:23:36 +00:00
}
return nil
}
2020-08-20 10:23:36 +00:00
// Create snapshot up to head (ignores height param)
func (s *Service) CreateLatestSnapshot(workers uint) error {
2020-08-20 10:23:36 +00:00
logrus.Info("Creating snapshot at head")
hash := rawdb.ReadHeadHeaderHash(s.ethDB)
height := rawdb.ReadHeaderNumber(s.ethDB, hash)
if height == nil {
return fmt.Errorf("unable to read header height for header hash %s", hash.String())
2020-07-16 15:02:16 +00:00
}
return s.CreateSnapshot(SnapshotParams{Height: *height, Workers: workers})
2020-08-20 10:23:36 +00:00
}
type nodeResult struct {
node Node
2020-08-20 10:23:36 +00:00
elements []interface{}
}
2020-09-06 07:36:36 +00:00
func resolveNode(it trie.NodeIterator, trieDB *trie.Database) (*nodeResult, error) {
// "leaf" nodes are actually "value" nodes, whose parents are the actual leaves
if it.Leaf() {
return nil, nil
}
if IsNullHash(it.Hash()) {
return nil, nil
}
2022-01-11 00:06:29 +00:00
path := make([]byte, len(it.Path()))
copy(path, it.Path())
n, err := trieDB.Node(it.Hash())
2020-07-01 18:44:59 +00:00
if err != nil {
2020-08-20 10:23:36 +00:00
return nil, err
2020-07-01 18:44:59 +00:00
}
2022-01-11 00:06:29 +00:00
var elements []interface{}
if err := rlp.DecodeBytes(n, &elements); err != nil {
2020-08-20 10:23:36 +00:00
return nil, err
}
2022-01-11 00:06:29 +00:00
ty, err := CheckKeyType(elements)
2020-07-01 18:44:59 +00:00
if err != nil {
2020-08-20 10:23:36 +00:00
return nil, err
2020-07-01 18:44:59 +00:00
}
2020-08-20 10:23:36 +00:00
return &nodeResult{
node: Node{
NodeType: ty,
Path: path,
Value: n,
2020-08-20 10:23:36 +00:00
},
2022-01-11 00:06:29 +00:00
elements: elements,
2020-08-20 10:23:36 +00:00
}, nil
2020-07-01 18:44:59 +00:00
}
func (s *Service) createSnapshot(it trie.NodeIterator, headerID string) error {
tx, err := s.ipfsPublisher.BeginTx()
2022-01-11 00:06:29 +00:00
if err != nil {
return err
}
defer func() { err = CommitOrRollback(tx, err) }()
2022-01-11 00:06:29 +00:00
2022-01-08 03:32:45 +00:00
for it.Next(true) {
res, err := resolveNode(it, s.stateDB.TrieDB())
if err != nil {
return err
}
if res == nil {
continue
}
tx, err = s.ipfsPublisher.PrepareTxForBatch(tx, s.maxBatchSize)
2020-07-01 18:44:59 +00:00
if err != nil {
return err
}
switch res.node.NodeType {
case Leaf:
// if the node is a leaf, decode the account and publish the associated storage trie
// nodes if there are any
2022-01-08 03:32:45 +00:00
var account types.StateAccount
if err := rlp.DecodeBytes(res.elements[1].([]byte), &account); err != nil {
return fmt.Errorf(
"error decoding account for leaf node at path %x nerror: %v", res.node.Path, err)
2022-01-08 03:32:45 +00:00
}
partialPath := trie.CompactToHex(res.elements[0].([]byte))
valueNodePath := append(res.node.Path, partialPath...)
2022-01-08 03:32:45 +00:00
encodedPath := trie.HexToCompact(valueNodePath)
leafKey := encodedPath[1:]
res.node.Key = common.BytesToHash(leafKey)
err := s.ipfsPublisher.PublishStateNode(&res.node, headerID, tx)
2020-07-01 23:07:56 +00:00
if err != nil {
return err
}
2022-01-08 03:32:45 +00:00
// publish any non-nil code referenced by codehash
if !bytes.Equal(account.CodeHash, emptyCodeHash) {
2021-12-15 07:23:18 +00:00
codeHash := common.BytesToHash(account.CodeHash)
codeBytes := rawdb.ReadCode(s.ethDB, codeHash)
if len(codeBytes) == 0 {
logrus.Error("Code is missing", "account", common.BytesToHash(it.LeafKey()))
return errors.New("missing code")
2022-01-08 03:32:45 +00:00
}
2021-12-15 07:23:18 +00:00
if err = s.ipfsPublisher.PublishCode(codeHash, codeBytes, tx); err != nil {
2022-01-08 03:32:45 +00:00
return err
}
}
if tx, err = s.storageSnapshot(account.Root, headerID, res.node.Path, tx); err != nil {
2021-12-14 06:50:19 +00:00
return fmt.Errorf("failed building storage snapshot for account %+v\r\nerror: %w", account, err)
2022-01-08 03:32:45 +00:00
}
case Extension, Branch:
res.node.Key = common.BytesToHash([]byte{})
if err := s.ipfsPublisher.PublishStateNode(&res.node, headerID, tx); err != nil {
2020-07-01 23:07:56 +00:00
return err
}
2022-01-08 03:32:45 +00:00
default:
return errors.New("unexpected node type")
2020-08-20 10:23:36 +00:00
}
2020-07-01 18:44:59 +00:00
}
2020-08-03 15:46:35 +00:00
return it.Error()
2020-07-01 18:44:59 +00:00
}
2022-01-08 03:32:45 +00:00
// Full-trie concurrent snapshot
func (s *Service) createSnapshotAsync(iters []trie.NodeIterator, headerID string) error {
2020-08-23 04:38:31 +00:00
errors := make(chan error)
var wg sync.WaitGroup
for _, it := range iters {
2020-08-23 04:38:31 +00:00
wg.Add(1)
go func(it trie.NodeIterator) {
2020-08-23 04:38:31 +00:00
defer wg.Done()
if err := s.createSnapshot(it, headerID); err != nil {
errors <- err
}
}(it)
2020-09-06 07:36:36 +00:00
}
done := make(chan struct{})
2020-08-23 04:38:31 +00:00
go func() {
wg.Wait()
done <- struct{}{}
2020-08-23 04:38:31 +00:00
}()
2020-08-20 10:23:36 +00:00
var err error
2020-08-23 04:38:31 +00:00
select {
case err = <-errors:
case <-done:
close(errors)
2020-08-20 10:23:36 +00:00
}
return err
2020-08-20 10:23:36 +00:00
}
func (s *Service) storageSnapshot(sr common.Hash, headerID string, statePath []byte, tx Tx) (Tx, error) {
2020-07-01 18:44:59 +00:00
if bytes.Equal(sr.Bytes(), emptyContractRoot.Bytes()) {
return tx, nil
2020-07-01 18:44:59 +00:00
}
2020-07-01 18:44:59 +00:00
sTrie, err := s.stateDB.OpenTrie(sr)
if err != nil {
return nil, err
2020-07-01 18:44:59 +00:00
}
2020-07-01 18:44:59 +00:00
it := sTrie.NodeIterator(make([]byte, 0))
for it.Next(true) {
2020-08-20 10:23:36 +00:00
res, err := resolveNode(it, s.stateDB.TrieDB())
2020-07-01 18:44:59 +00:00
if err != nil {
return nil, err
}
if res == nil {
continue
}
tx, err = s.ipfsPublisher.PrepareTxForBatch(tx, s.maxBatchSize)
2020-07-01 18:44:59 +00:00
if err != nil {
return nil, err
2020-07-01 18:44:59 +00:00
}
2022-01-11 00:06:29 +00:00
var nodeData []byte
nodeData, err = s.stateDB.TrieDB().Node(it.Hash())
2020-07-01 18:44:59 +00:00
if err != nil {
return nil, err
2020-07-01 18:44:59 +00:00
}
res.node.Value = nodeData
2022-01-11 00:06:29 +00:00
switch res.node.NodeType {
case Leaf:
2020-08-20 10:23:36 +00:00
partialPath := trie.CompactToHex(res.elements[0].([]byte))
valueNodePath := append(res.node.Path, partialPath...)
2020-07-01 18:44:59 +00:00
encodedPath := trie.HexToCompact(valueNodePath)
leafKey := encodedPath[1:]
res.node.Key = common.BytesToHash(leafKey)
case Extension, Branch:
res.node.Key = common.BytesToHash([]byte{})
2020-07-01 18:44:59 +00:00
default:
return nil, errors.New("unexpected node type")
2020-07-01 23:07:56 +00:00
}
if err = s.ipfsPublisher.PublishStorageNode(&res.node, headerID, statePath, tx); err != nil {
2022-01-11 00:06:29 +00:00
return nil, err
2020-07-01 18:44:59 +00:00
}
}
return tx, it.Error()
2020-07-01 18:44:59 +00:00
}