diff --git a/README.md b/README.md index bda0b6b..77f0d94 100644 --- a/README.md +++ b/README.md @@ -3,3 +3,23 @@ > Tool for extracting the entire Ethereum state at a particular block height from leveldb into Postgres-backed IPFS [![Go Report Card](https://goreportcard.com/badge/github.com/vulcanize/eth-pg-ipfs-state-snapshot)](https://goreportcard.com/report/github.com/vulcanize/eth-pg-ipfs-state-snapshot) + +## Usage + +./eth-pg-ipfs-state-snapshot stateSnapshot --config={path to toml config file} + +Config format: + +```toml +[database] + name = "vulcanize_public" + hostname = "localhost" + port = 5432 + user = "postgres" + +[leveldb] + path = "/Users/user/Library/Ethereum/geth/chaindata" + +[snapshot] + blockHeight = 0 +``` \ No newline at end of file diff --git a/cmd/stateSnapshot.go b/cmd/stateSnapshot.go index 1438414..0df439b 100644 --- a/cmd/stateSnapshot.go +++ b/cmd/stateSnapshot.go @@ -16,7 +16,6 @@ package cmd import ( - "github.com/ethereum/go-ethereum/common" "github.com/sirupsen/logrus" "github.com/spf13/cobra" "github.com/spf13/viper" @@ -27,12 +26,9 @@ import ( var stateSnapshotCmd = &cobra.Command{ Use: "stateSnapshot", Short: "Extract the entire Ethereum state from leveldb and publish into PG-IPFS", - Long: `A longer description that spans multiple lines and likely contains examples -and usage of using your command. For example: + Long: `Usage -Cobra is a CLI library for Go that empowers applications. -This application is a tool to generate the needed files -to quickly create a Cobra application.`, +./eth-pg-ipfs-state-snapshot stateSnapshot --config={path to toml config file}`, Run: func(cmd *cobra.Command, args []string) { subCommand = cmd.CalledAs() logWithCommand = *logrus.WithField("SubCommand", subCommand) @@ -47,17 +43,11 @@ func stateSnapshot() { if err != nil { logWithCommand.Fatal(err) } - height := viper.Get("snapshot.blockHeight") - uHeight, ok := height.(uint64) - if !ok { - logWithCommand.Fatal("snapshot.blockHeight needs to be a uint") - } - hashStr := viper.GetString("snapshot.blockHash") - hash := common.HexToHash(hashStr) - if err := snapshotService.CreateSnapshot(uHeight, hash); err != nil { + height := uint64(viper.GetInt64("snapshot.blockHeight")) + if err := snapshotService.CreateSnapshot(height); err != nil { logWithCommand.Fatal(err) } - logWithCommand.Infof("state snapshot for height %d and hash %s is complete", uHeight, hashStr) + logWithCommand.Infof("state snapshot at height %d is complete", height) } func init() { @@ -65,9 +55,7 @@ func init() { stateSnapshotCmd.PersistentFlags().String("leveldb-path", "", "path to leveldb") stateSnapshotCmd.PersistentFlags().String("block-height", "", "blockheight to extract state at") - stateSnapshotCmd.PersistentFlags().String("block-hash", "", "blockhash to extract state at") viper.BindPFlag("leveldb.path", stateSnapshotCmd.PersistentFlags().Lookup("leveldb-path")) viper.BindPFlag("snapshot.blockHeight", stateSnapshotCmd.PersistentFlags().Lookup("block-height")) - viper.BindPFlag("snapshot.blockHash", stateSnapshotCmd.PersistentFlags().Lookup("block-hash")) } diff --git a/environments/example.toml b/environments/example.toml new file mode 100644 index 0000000..2e80743 --- /dev/null +++ b/environments/example.toml @@ -0,0 +1,11 @@ +[database] + name = "vulcanize_public" + hostname = "localhost" + port = 5432 + user = "postgres" + +[leveldb] + path = "/Users/user/Library/Ethereum/geth/chaindata" + +[snapshot] + blockHeight = 0 \ No newline at end of file diff --git a/pkg/snapshot/publisher.go b/pkg/snapshot/publisher.go index eeb58ee..0a6f76f 100644 --- a/pkg/snapshot/publisher.go +++ b/pkg/snapshot/publisher.go @@ -58,13 +58,14 @@ func (p *Publisher) PublishHeader(header *types.Header) (int64, error) { if err := shared.PublishIPLD(tx, headerNode); err != nil { return 0, err } + mhKey, _ := shared.MultihashKeyFromCIDString(headerNode.Cid().String()) var headerID int64 - err = tx.QueryRowx(`INSERT INTO eth.header_cids (block_number, block_hash, parent_hash, cid, td, node_id, reward, state_root, tx_root, receipt_root, uncle_root, bloom, timestamp, times_validated) - VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14) + err = tx.QueryRowx(`INSERT INTO eth.header_cids (block_number, block_hash, parent_hash, cid, td, node_id, reward, state_root, tx_root, receipt_root, uncle_root, bloom, timestamp, mh_key, times_validated) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15) ON CONFLICT (block_number, block_hash) DO UPDATE SET block_number = header_cids.block_number RETURNING id`, - header.Number.Uint64(), header.Hash().Hex(), header.ParentHash.Hex(), headerNode.Cid(), "0", p.db.NodeID, "0", header.Root.Hex(), header.TxHash.Hex(), - header.ReceiptHash.Hex(), header.UncleHash.Hex(), header.Bloom.Bytes(), header.Time, 0).Scan(&headerID) + header.Number.Uint64(), header.Hash().Hex(), header.ParentHash.Hex(), headerNode.Cid().String(), "0", p.db.NodeID, "0", header.Root.Hex(), header.TxHash.Hex(), + header.ReceiptHash.Hex(), header.UncleHash.Hex(), header.Bloom.Bytes(), header.Time, mhKey, 0).Scan(&headerID) return headerID, err } @@ -92,10 +93,11 @@ func (p *Publisher) PublishStateNode(node Node, headerID int64) (int64, error) { if err != nil { return 0, err } - err = tx.QueryRowx(`INSERT INTO eth.state_cids (header_id, state_leaf_key, cid, state_path, node_type, diff) VALUES ($1, $2, $3, $4, $5, $6) - ON CONFLICT (header_id, state_path, diff) DO UPDATE SET (state_leaf_key, cid, node_type) = ($2, $3, $5, $6) + mhKey, _ := shared.MultihashKeyFromCIDString(stateCIDStr) + err = tx.QueryRowx(`INSERT INTO eth.state_cids (header_id, state_leaf_key, cid, state_path, node_type, diff, mh_key) VALUES ($1, $2, $3, $4, $5, $6, $7) + ON CONFLICT (header_id, state_path, diff) DO UPDATE SET (state_leaf_key, cid, node_type, mh_key) = ($2, $3, $5, $7) RETURNING id`, - headerID, stateKey, stateCIDStr, node.Path, node.NodeType, false).Scan(&stateID) + headerID, stateKey, stateCIDStr, node.Path, node.NodeType, false, mhKey).Scan(&stateID) return stateID, err } @@ -122,8 +124,9 @@ func (p *Publisher) PublishStorageNode(node Node, stateID int64) error { if err != nil { return err } - _, err = tx.Exec(`INSERT INTO eth.storage_cids (state_id, storage_leaf_key, cid, storage_path, node_type, diff) VALUES ($1, $2, $3, $4, $5, $6) - ON CONFLICT (state_id, storage_path) DO UPDATE SET (storage_leaf_key, cid, node_type, diff) = ($2, $3, $5, $6)`, - stateID, storageKey, storageCIDStr, node.Path, node.NodeType, false) + mhKey, _ := shared.MultihashKeyFromCIDString(storageCIDStr) + _, err = tx.Exec(`INSERT INTO eth.storage_cids (state_id, storage_leaf_key, cid, storage_path, node_type, diff, mh_key) VALUES ($1, $2, $3, $4, $5, $6, $7) + ON CONFLICT (state_id, storage_path) DO UPDATE SET (storage_leaf_key, cid, node_type, diff, mh_key) = ($2, $3, $5, $6, $7)`, + stateID, storageKey, storageCIDStr, node.Path, node.NodeType, false, mhKey) return err } diff --git a/pkg/snapshot/service.go b/pkg/snapshot/service.go index 8ada3f3..9619249 100644 --- a/pkg/snapshot/service.go +++ b/pkg/snapshot/service.go @@ -20,12 +20,13 @@ import ( "errors" "fmt" + "github.com/sirupsen/logrus" + "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/state" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/ethdb" - "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie" @@ -49,7 +50,7 @@ func NewSnapshotService(con Config) (*Service, error) { if err != nil { return nil, err } - edb, err := rawdb.NewLevelDBDatabase(con.LevelDBPath, 256, 0, "") + edb, err := rawdb.NewLevelDBDatabase(con.LevelDBPath, 256, 1024, "eth-pg-ipfs-state-snapshot") if err != nil { return nil, err } @@ -60,9 +61,11 @@ func NewSnapshotService(con Config) (*Service, error) { }, nil } -func (s *Service) CreateSnapshot(height uint64, hash common.Hash) error { +func (s *Service) CreateSnapshot(height uint64) error { // extract header from lvldb and publish to PG-IPFS // hold onto the headerID so that we can link the state nodes to this header + logrus.Infof("Creating snapshot at height %d", height) + hash := rawdb.ReadCanonicalHash(s.ethDB, height) header := rawdb.ReadHeader(s.ethDB, hash, height) headerID, err := s.ipfsPublisher.PublishHeader(header) if err != nil { @@ -105,6 +108,7 @@ func (s *Service) createSnapshot(it trie.NodeIterator, trieDB *trie.Database, he } switch ty { case Leaf: + // if the node is a leaf, decode the account and if publish the associated storage trie nodes if there are any var account state.Account if err := rlp.DecodeBytes(nodeElements[1].([]byte), &account); err != nil { return fmt.Errorf("error decoding account for leaf node at path %x nerror: %v", nodePath, err) @@ -119,7 +123,7 @@ func (s *Service) createSnapshot(it trie.NodeIterator, trieDB *trie.Database, he return err } if err := s.storageSnapshot(account.Root, stateID); err != nil { - return fmt.Errorf("failed building eventual storage diffs for account %+v\r\nerror: %v", account, err) + return fmt.Errorf("failed building storage snapshot for account %+v\r\nerror: %v", account, err) } case Extension, Branch: stateNode.Key = common.BytesToHash([]byte{}) @@ -133,16 +137,12 @@ func (s *Service) createSnapshot(it trie.NodeIterator, trieDB *trie.Database, he return nil } -// buildStorageNodesEventual builds the storage diff node objects for a created account -// i.e. it returns all the storage nodes at this state, since there is no previous state func (s *Service) storageSnapshot(sr common.Hash, stateID int64) error { if bytes.Equal(sr.Bytes(), emptyContractRoot.Bytes()) { return nil } - log.Debug("Storage Root For Eventual Diff", "root", sr.Hex()) sTrie, err := s.stateDB.OpenTrie(sr) if err != nil { - log.Info("error in build storage diff eventual", "error", err) return err } it := sTrie.NodeIterator(make([]byte, 0))