Refactor to use statediff plugin #1

Merged
roysc merged 27 commits from refactor-use-plugin into v5 2023-09-29 18:43:28 +00:00
2 changed files with 16 additions and 16 deletions
Showing only changes of commit e0f6b93460 - Show all commits

View File

@ -125,8 +125,8 @@ Config format:
* Combine output from multiple workers and copy to post-processed output directory: * Combine output from multiple workers and copy to post-processed output directory:
```bash ```bash
# public.blocks # ipld.blocks
cat {output_dir,output_dir/*}/public.blocks.csv > output_dir/processed_output/combined-public.blocks.csv cat {output_dir,output_dir/*}/ipld.blocks.csv > output_dir/processed_output/combined-ipld.blocks.csv
# eth.state_cids # eth.state_cids
cat output_dir/*/eth.state_cids.csv > output_dir/processed_output/combined-eth.state_cids.csv cat output_dir/*/eth.state_cids.csv > output_dir/processed_output/combined-eth.state_cids.csv
@ -144,8 +144,8 @@ Config format:
* De-duplicate data: * De-duplicate data:
```bash ```bash
# public.blocks # ipld.blocks
sort -u output_dir/processed_output/combined-public.blocks.csv -o output_dir/processed_output/deduped-combined-public.blocks.csv sort -u output_dir/processed_output/combined-ipld.blocks.csv -o output_dir/processed_output/deduped-combined-ipld.blocks.csv
# eth.header_cids # eth.header_cids
sort -u output_dir/processed_output/eth.header_cids.csv -o output_dir/processed_output/deduped-eth.header_cids.csv sort -u output_dir/processed_output/eth.header_cids.csv -o output_dir/processed_output/deduped-eth.header_cids.csv
@ -171,8 +171,8 @@ Config format:
# public.nodes # public.nodes
COPY public.nodes FROM '/output_dir/processed_output/public.nodes.csv' CSV; COPY public.nodes FROM '/output_dir/processed_output/public.nodes.csv' CSV;
# public.blocks # ipld.blocks
COPY public.blocks FROM '/output_dir/processed_output/deduped-combined-public.blocks.csv' CSV; COPY ipld.blocks FROM '/output_dir/processed_output/deduped-combined-ipld.blocks.csv' CSV;
# eth.header_cids # eth.header_cids
COPY eth.header_cids FROM '/output_dir/processed_output/deduped-eth.header_cids.csv' CSV; COPY eth.header_cids FROM '/output_dir/processed_output/deduped-eth.header_cids.csv' CSV;

View File

@ -2,13 +2,13 @@
* For a given table in the `ipld-eth-db` schema, we know the number of columns to be expected in each row in the data dump: * For a given table in the `ipld-eth-db` schema, we know the number of columns to be expected in each row in the data dump:
| Table | Expected columns | | Table | Expected columns |
| ----------------- |:----------------:| |--------------------|:----------------:|
| public.nodes | 5 | | `public.nodes` | 5 |
| public.blocks | 3 | | `ipld.blocks` | 3 |
| eth.header_cids | 16 | | `eth.header_cids` | 16 |
| eth.state_cids | 8 | | `eth.state_cids` | 8 |
| eth.storage_cids | 9 | | `eth.storage_cids` | 9 |
### Find Bad Data ### Find Bad Data
@ -40,7 +40,7 @@
```bash ```bash
./scripts/find-bad-rows.sh -i public.nodes.csv -c 5 -o res.txt -d true ./scripts/find-bad-rows.sh -i public.nodes.csv -c 5 -o res.txt -d true
./scripts/find-bad-rows.sh -i public.blocks.csv -c 3 -o res.txt -d true ./scripts/find-bad-rows.sh -i ipld.blocks.csv -c 3 -o res.txt -d true
./scripts/find-bad-rows.sh -i eth.header_cids.csv -c 16 -o res.txt -d true ./scripts/find-bad-rows.sh -i eth.header_cids.csv -c 16 -o res.txt -d true
./scripts/find-bad-rows.sh -i eth.state_cids.csv -c 8 -o res.txt -d true ./scripts/find-bad-rows.sh -i eth.state_cids.csv -c 8 -o res.txt -d true
./scripts/find-bad-rows.sh -i eth.storage_cids.csv -c 9 -o res.txt -d true ./scripts/find-bad-rows.sh -i eth.storage_cids.csv -c 9 -o res.txt -d true
@ -66,7 +66,7 @@
```bash ```bash
./scripts/filter-bad-rows.sh -i public.nodes.csv -c 5 -o cleaned-public.nodes.csv ./scripts/filter-bad-rows.sh -i public.nodes.csv -c 5 -o cleaned-public.nodes.csv
./scripts/filter-bad-rows.sh -i public.blocks.csv -c 3 -o cleaned-public.blocks.csv ./scripts/filter-bad-rows.sh -i ipld.blocks.csv -c 3 -o cleaned-ipld.blocks.csv
./scripts/filter-bad-rows.sh -i eth.header_cids.csv -c 16 -o cleaned-eth.header_cids.csv ./scripts/filter-bad-rows.sh -i eth.header_cids.csv -c 16 -o cleaned-eth.header_cids.csv
./scripts/filter-bad-rows.sh -i eth.state_cids.csv -c 8 -o cleaned-eth.state_cids.csv ./scripts/filter-bad-rows.sh -i eth.state_cids.csv -c 8 -o cleaned-eth.state_cids.csv
./scripts/filter-bad-rows.sh -i eth.storage_cids.csv -c 9 -o cleaned-eth.storage_cids.csv ./scripts/filter-bad-rows.sh -i eth.storage_cids.csv -c 9 -o cleaned-eth.storage_cids.csv