Add instructions to import snapshot data into database (#52)
* Add instructions to import snapshot data into database * Add monitoring and update data processing in README * Update instructions to import snapshot
This commit is contained in:
parent
ac00ee280f
commit
2af26ad583
172
README.md
172
README.md
@ -4,64 +4,178 @@
|
|||||||
|
|
||||||
[![Go Report Card](https://goreportcard.com/badge/github.com/vulcanize/ipld-eth-state-snapshot)](https://goreportcard.com/report/github.com/vulcanize/ipld-eth-state-snapshot)
|
[![Go Report Card](https://goreportcard.com/badge/github.com/vulcanize/ipld-eth-state-snapshot)](https://goreportcard.com/report/github.com/vulcanize/ipld-eth-state-snapshot)
|
||||||
|
|
||||||
## Usage
|
## Setup
|
||||||
|
|
||||||
For state snapshot from LevelDB
|
* Build the binary:
|
||||||
```bash
|
|
||||||
./ipld-eth-state-snapshot stateSnapshot --config={path to toml config file}
|
|
||||||
```
|
|
||||||
|
|
||||||
For in-place snapshot in database
|
```bash
|
||||||
```bash
|
make build
|
||||||
./ipld-eth-state-snapshot inPlaceStateSnapshot --config={path to toml config file}
|
```
|
||||||
```
|
|
||||||
|
|
||||||
### Config
|
## Configuration
|
||||||
|
|
||||||
Config format:
|
Config format:
|
||||||
|
|
||||||
```toml
|
```toml
|
||||||
[snapshot]
|
[snapshot]
|
||||||
mode = "file" # indicates output mode ("postgres" or "file")
|
mode = "file" # indicates output mode <postgres | file>
|
||||||
workers = 4 # degree of concurrency, the state trie is subdivided into sectiosn that are traversed and processed concurrently
|
workers = 4 # degree of concurrency, the state trie is subdivided into sections that are traversed and processed concurrently
|
||||||
blockHeight = -1 # blockheight to perform the snapshot at (-1 indicates to use the latest blockheight found in leveldb)
|
blockHeight = -1 # blockheight to perform the snapshot at (-1 indicates to use the latest blockheight found in leveldb)
|
||||||
recoveryFile = "recovery_file" # specifies a file to output recovery information on error or premature closure
|
recoveryFile = "recovery_file" # specifies a file to output recovery information on error or premature closure
|
||||||
|
|
||||||
[leveldb]
|
[leveldb]
|
||||||
path = "/Users/user/Library/Ethereum/geth/chaindata" # path to geth leveldb
|
# path to geth leveldb
|
||||||
ancient = "/Users/user/Library/Ethereum/geth/chaindata/ancient" # path to geth ancient database
|
path = "/Users/user/Library/Ethereum/geth/chaindata" # ANCIENT_DB_PATH
|
||||||
|
# path to geth ancient database
|
||||||
|
ancient = "/Users/user/Library/Ethereum/geth/chaindata/ancient" # LVL_DB_PATH
|
||||||
|
|
||||||
[database]
|
[database]
|
||||||
name = "vulcanize_public" # postgres database name
|
# when operating in 'postgres' output mode
|
||||||
hostname = "localhost" # postgres host
|
# db credentials
|
||||||
port = 5432 # postgres port
|
name = "vulcanize_public" # DATABASE_NAME
|
||||||
user = "postgres" # postgres user
|
hostname = "localhost" # DATABASE_HOSTNAME
|
||||||
password = "" # postgres password
|
port = 5432 # DATABASE_PORT
|
||||||
|
user = "postgres" # DATABASE_USER
|
||||||
|
password = "" # DATABASE_PASSWORD
|
||||||
|
|
||||||
[file]
|
[file]
|
||||||
outputDir = "output_dir/" # when operating in 'file' output mode, this is the directory the files are written to
|
# when operating in 'file' output mode
|
||||||
|
# directory the CSV files are written to
|
||||||
|
outputDir = "output_dir/" # FILE_OUTPUT_DIR
|
||||||
|
|
||||||
[log]
|
[log]
|
||||||
level = "info" # log level (trace, debug, info, warn, error, fatal, panic) (default: info)
|
level = "info" # log level (trace, debug, info, warn, error, fatal, panic) (default: info)
|
||||||
file = "log_file" # file path for logging
|
file = "log_file" # file path for logging, leave unset to log to stdout
|
||||||
|
|
||||||
[prom]
|
[prom]
|
||||||
|
# prometheus metrics
|
||||||
metrics = true # enable prometheus metrics (default: false)
|
metrics = true # enable prometheus metrics (default: false)
|
||||||
http = true # enable prometheus http service (default: false)
|
http = true # enable prometheus http service (default: false)
|
||||||
httpAddr = "0.0.0.0" # prometheus http host (default: 127.0.0.1)
|
httpAddr = "0.0.0.0" # prometheus http host (default: 127.0.0.1)
|
||||||
httpPort = 9101 # prometheus http port (default: 8086)
|
httpPort = 9101 # prometheus http port (default: 8086)
|
||||||
dbStats = true # enable prometheus db stats (default: false)
|
dbStats = true # enable prometheus db stats (default: false)
|
||||||
|
|
||||||
# node info
|
|
||||||
[ethereum]
|
[ethereum]
|
||||||
clientName = "Geth" # $ETH_CLIENT_NAME
|
# node info
|
||||||
nodeID = "arch1" # $ETH_NODE_ID
|
clientName = "Geth" # ETH_CLIENT_NAME
|
||||||
networkID = "1" # $ETH_NETWORK_ID
|
nodeID = "arch1" # ETH_NODE_ID
|
||||||
chainID = "1" # $ETH_CHAIN_ID
|
networkID = "1" # ETH_NETWORK_ID
|
||||||
genesisBlock = "0xd4e56740f876aef8c010b86a40d5f56745a118d0906a34e69aec8c0db1cb8fa3" # $ETH_GENESIS_BLOCK
|
chainID = "1" # ETH_CHAIN_ID
|
||||||
|
genesisBlock = "0xd4e56740f876aef8c010b86a40d5f56745a118d0906a34e69aec8c0db1cb8fa3" # ETH_GENESIS_BLOCK
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
* For state snapshot from LevelDB:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./ipld-eth-state-snapshot stateSnapshot --config={path to toml config file}
|
||||||
|
```
|
||||||
|
|
||||||
|
* For in-place snapshot in the database:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./ipld-eth-state-snapshot inPlaceStateSnapshot --config={path to toml config file}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Monitoring
|
||||||
|
|
||||||
|
* Enable metrics using config parameters `prom.metrics` and `prom.http`.
|
||||||
|
* `ipld-eth-state-snapshot` exposes following prometheus metrics at `/metrics` endpoint:
|
||||||
|
* `state_node_count`: Number of state nodes processed.
|
||||||
|
* `storage_node_count`: Number of storage nodes processed.
|
||||||
|
* `code_node_count`: Number of code nodes processed.
|
||||||
|
* DB stats if operating in `postgres` mode.
|
||||||
|
|
||||||
## Tests
|
## Tests
|
||||||
|
|
||||||
* Install [mockgen](https://github.com/golang/mock#installation)
|
* Run unit tests:
|
||||||
* `make test`
|
|
||||||
|
```bash
|
||||||
|
# setup db
|
||||||
|
docker-compose up -d
|
||||||
|
|
||||||
|
# run tests after db migrations are run
|
||||||
|
make dbtest
|
||||||
|
|
||||||
|
# tear down db
|
||||||
|
docker-compose down -v --remove-orphans
|
||||||
|
```
|
||||||
|
|
||||||
|
## Import output data in file mode into a database
|
||||||
|
|
||||||
|
* When `ipld-eth-state-snapshot stateSnapshot` is run in file mode (`database.type`), the output is in form of CSV files.
|
||||||
|
|
||||||
|
* Assuming the output files are located in host's `./output_dir` directory.
|
||||||
|
|
||||||
|
* Data post-processing:
|
||||||
|
|
||||||
|
* Create a directory to store post-processed output:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
mkdir -p output_dir/processed_output
|
||||||
|
```
|
||||||
|
|
||||||
|
* Combine output from multiple workers and copy to post-processed output directory:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# public.blocks
|
||||||
|
cat {output_dir,output_dir/*}/public.blocks.csv > output_dir/processed_output/combined-public.blocks.csv
|
||||||
|
|
||||||
|
# eth.state_cids
|
||||||
|
cat output_dir/*/eth.state_cids.csv > output_dir/processed_output/combined-eth.state_cids.csv
|
||||||
|
|
||||||
|
# eth.storage_cids
|
||||||
|
cat output_dir/*/eth.storage_cids.csv > output_dir/processed_output/combined-eth.storage_cids.csv
|
||||||
|
|
||||||
|
# public.nodes
|
||||||
|
cp output_dir/public.nodes.csv output_dir/processed_output/public.nodes.csv
|
||||||
|
|
||||||
|
# eth.header_cids
|
||||||
|
cp output_dir/eth.header_cids.csv output_dir/processed_output/eth.header_cids.csv
|
||||||
|
```
|
||||||
|
|
||||||
|
* De-duplicate data:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# public.blocks
|
||||||
|
sort -u output_dir/processed_output/combined-public.blocks.csv -o output_dir/processed_output/deduped-combined-public.blocks.csv
|
||||||
|
|
||||||
|
# eth.header_cids
|
||||||
|
sort -u output_dir/processed_output/eth.header_cids.csv -o output_dir/processed_output/deduped-eth.header_cids.csv
|
||||||
|
|
||||||
|
# eth.state_cids
|
||||||
|
sort -u output_dir/processed_output/combined-eth.state_cids.csv -o output_dir/processed_output/deduped-combined-eth.state_cids.csv
|
||||||
|
|
||||||
|
# eth.storage_cids
|
||||||
|
sort -u output_dir/processed_output/combined-eth.storage_cids.csv -o output_dir/processed_output/deduped-combined-eth.storage_cids.csv
|
||||||
|
```
|
||||||
|
|
||||||
|
* Copy over the post-processed output files to the DB server (say in `/output_dir`).
|
||||||
|
|
||||||
|
* Start `psql` to run the import commands:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
psql -U <DATABASE_USER> -h <DATABASE_HOSTNAME> -p <DATABASE_PORT> <DATABASE_NAME>
|
||||||
|
```
|
||||||
|
|
||||||
|
* Run the following to import data:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# public.nodes
|
||||||
|
COPY public.nodes FROM '/output_dir/processed_output/public.nodes.csv' CSV;
|
||||||
|
|
||||||
|
# public.blocks
|
||||||
|
COPY public.blocks FROM '/output_dir/processed_output/deduped-combined-public.blocks.csv' CSV;
|
||||||
|
|
||||||
|
# eth.header_cids
|
||||||
|
COPY eth.header_cids FROM '/output_dir/processed_output/deduped-eth.header_cids.csv' CSV;
|
||||||
|
|
||||||
|
# eth.state_cids
|
||||||
|
COPY eth.state_cids FROM '/output_dir/processed_output/deduped-combined-eth.state_cids.csv' CSV FORCE NOT NULL state_leaf_key;
|
||||||
|
|
||||||
|
# eth.storage_cids
|
||||||
|
COPY eth.storage_cids FROM '/output_dir/processed_output/deduped-combined-eth.storage_cids.csv' CSV FORCE NOT NULL storage_leaf_key;
|
||||||
|
```
|
||||||
|
|
||||||
|
* NOTE: `COPY` command on CSVs inserts empty strings as `NULL` in the DB. Passing `FORCE_NOT_NULL <COLUMN_NAME>` forces it to insert empty strings instead. This is required to maintain compatibility of the imported snapshot data with the data generated by statediffing. Reference: https://www.postgresql.org/docs/14/sql-copy.html
|
||||||
|
Loading…
Reference in New Issue
Block a user