2021-04-09 02:32:04 +00:00
18 changed files with 1912 additions and 58 deletions
--- a/statediff/builder_test.go
+++ b/statediff/builder_test.go
@ -32,7 +32,6 @@ import (
 	sdtypes "github.com/ethereum/go-ethereum/statediff/types"
 )
 // TODO: add test that filters on address
 var (
 	contractLeafKey                                        []byte
 	emptyDiffs                                             = make([]sdtypes.StateNode, 0)
--- a/statediff/db/migrations/00001_create_ipfs_blocks_table.sql
+++ b/statediff/db/migrations/00001_create_ipfs_blocks_table.sql
@ -0,0 +1,8 @@
 -- +goose Up
 CREATE TABLE IF NOT EXISTS public.blocks (
  key TEXT UNIQUE NOT NULL,
  data BYTEA NOT NULL
 );
 -- +goose Down
 DROP TABLE public.blocks;
--- a/statediff/db/migrations/00002_create_nodes_table.sql
+++ b/statediff/db/migrations/00002_create_nodes_table.sql
@ -0,0 +1,13 @@
 -- +goose Up
 CREATE TABLE nodes (
  id            SERIAL PRIMARY KEY,
  client_name   VARCHAR,
  genesis_block VARCHAR(66),
  network_id    VARCHAR,
  node_id       VARCHAR(128),
  chain_id      INTEGER DEFAULT 1,
  CONSTRAINT node_uc UNIQUE (genesis_block, network_id, node_id, chain_id)
 );
 -- +goose Down
 DROP TABLE nodes;
--- a/statediff/db/migrations/00003_create_eth_schema.sql
+++ b/statediff/db/migrations/00003_create_eth_schema.sql
@ -0,0 +1,5 @@
 -- +goose Up
 CREATE SCHEMA eth;
 -- +goose Down
 DROP SCHEMA eth;
--- a/statediff/db/migrations/00004_create_eth_header_cids_table.sql
+++ b/statediff/db/migrations/00004_create_eth_header_cids_table.sql
@ -0,0 +1,23 @@
 -- +goose Up
 CREATE TABLE eth.header_cids (
  id                    SERIAL PRIMARY KEY,
  block_number          BIGINT NOT NULL,
  block_hash            VARCHAR(66) NOT NULL,
  parent_hash           VARCHAR(66) NOT NULL,
  cid                   TEXT NOT NULL,
  mh_key                TEXT NOT NULL REFERENCES public.blocks (key) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
  td                    NUMERIC NOT NULL,
  node_id               INTEGER NOT NULL REFERENCES nodes (id) ON DELETE CASCADE,
  reward                NUMERIC NOT NULL,
  state_root            VARCHAR(66) NOT NULL,
  tx_root               VARCHAR(66) NOT NULL,
  receipt_root          VARCHAR(66) NOT NULL,
  uncle_root            VARCHAR(66) NOT NULL,
  bloom                 BYTEA NOT NULL,
  timestamp             NUMERIC NOT NULL,
  times_validated       INTEGER NOT NULL DEFAULT 1,
  UNIQUE (block_number, block_hash)
 );
 -- +goose Down
 DROP TABLE eth.header_cids;
--- a/statediff/db/migrations/00005_create_eth_uncle_cids_table.sql
+++ b/statediff/db/migrations/00005_create_eth_uncle_cids_table.sql
@ -0,0 +1,14 @@
 -- +goose Up
 CREATE TABLE eth.uncle_cids (
  id                    SERIAL PRIMARY KEY,
  header_id             INTEGER NOT NULL REFERENCES eth.header_cids (id) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
  block_hash            VARCHAR(66) NOT NULL,
  parent_hash           VARCHAR(66) NOT NULL,
  cid                   TEXT NOT NULL,
  mh_key                TEXT NOT NULL REFERENCES public.blocks (key) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
  reward                NUMERIC NOT NULL,
  UNIQUE (header_id, block_hash)
 );
 -- +goose Down
 DROP TABLE eth.uncle_cids;
--- a/statediff/db/migrations/00006_create_eth_transaction_cids_table.sql
+++ b/statediff/db/migrations/00006_create_eth_transaction_cids_table.sql
@ -0,0 +1,16 @@
 -- +goose Up
 CREATE TABLE eth.transaction_cids (
  id                    SERIAL PRIMARY KEY,
  header_id             INTEGER NOT NULL REFERENCES eth.header_cids (id) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
  tx_hash               VARCHAR(66) NOT NULL,
  index                 INTEGER NOT NULL,
  cid                   TEXT NOT NULL,
  mh_key                TEXT NOT NULL REFERENCES public.blocks (key) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
  dst                   VARCHAR(66) NOT NULL,
  src                   VARCHAR(66) NOT NULL,
  tx_data               BYTEA,
  UNIQUE (header_id, tx_hash)
 );
 -- +goose Down
 DROP TABLE eth.transaction_cids;
--- a/statediff/db/migrations/00007_create_eth_receipt_cids_table.sql
+++ b/statediff/db/migrations/00007_create_eth_receipt_cids_table.sql
@ -0,0 +1,20 @@
 -- +goose Up
 CREATE TABLE eth.receipt_cids (
  id                    SERIAL PRIMARY KEY,
  tx_id                 INTEGER NOT NULL REFERENCES eth.transaction_cids (id) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
  cid                   TEXT NOT NULL,
  mh_key                TEXT NOT NULL REFERENCES public.blocks (key) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
  contract              VARCHAR(66),
  contract_hash         VARCHAR(66),
  topic0s               VARCHAR(66)[],
  topic1s               VARCHAR(66)[],
  topic2s               VARCHAR(66)[],
  topic3s               VARCHAR(66)[],
  log_contracts         VARCHAR(66)[],
  post_state            VARCHAR(66),
  post_status           INTEGER,
  UNIQUE (tx_id)
 );
 -- +goose Down
 DROP TABLE eth.receipt_cids;
--- a/statediff/db/migrations/00008_create_eth_state_cids_table.sql
+++ b/statediff/db/migrations/00008_create_eth_state_cids_table.sql
@ -0,0 +1,15 @@
 -- +goose Up
 CREATE TABLE eth.state_cids (
  id                    BIGSERIAL PRIMARY KEY,
  header_id             INTEGER NOT NULL REFERENCES eth.header_cids (id) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
  state_leaf_key        VARCHAR(66),
  cid                   TEXT NOT NULL,
  mh_key                TEXT NOT NULL REFERENCES public.blocks (key) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
  state_path            BYTEA,
  node_type             INTEGER NOT NULL,
  diff                  BOOLEAN NOT NULL DEFAULT FALSE,
  UNIQUE (header_id, state_path)
 );
 -- +goose Down
 DROP TABLE eth.state_cids;
--- a/statediff/db/migrations/00009_create_eth_storage_cids_table.sql
+++ b/statediff/db/migrations/00009_create_eth_storage_cids_table.sql
@ -0,0 +1,15 @@
 -- +goose Up
 CREATE TABLE eth.storage_cids (
  id                    BIGSERIAL PRIMARY KEY,
  state_id              BIGINT NOT NULL REFERENCES eth.state_cids (id) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
  storage_leaf_key      VARCHAR(66),
  cid                   TEXT NOT NULL,
  mh_key                TEXT NOT NULL REFERENCES public.blocks (key) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
  storage_path          BYTEA,
  node_type             INTEGER NOT NULL,
  diff                  BOOLEAN NOT NULL DEFAULT FALSE,
  UNIQUE (state_id, storage_path)
 );
 -- +goose Down
 DROP TABLE eth.storage_cids;
--- a/statediff/db/migrations/00010_create_eth_state_accouts_table.sql
+++ b/statediff/db/migrations/00010_create_eth_state_accouts_table.sql
@ -0,0 +1,13 @@
 -- +goose Up
 CREATE TABLE eth.state_accounts (
  id                    SERIAL PRIMARY KEY,
  state_id              BIGINT NOT NULL REFERENCES eth.state_cids (id) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
  balance               NUMERIC NOT NULL,
  nonce                 INTEGER NOT NULL,
  code_hash             BYTEA NOT NULL,
  storage_root          VARCHAR(66) NOT NULL,
  UNIQUE (state_id)
 );
 -- +goose Down
 DROP TABLE eth.state_accounts;
--- a/statediff/db/migrations/00011_create_postgraphile_comments.sql
+++ b/statediff/db/migrations/00011_create_postgraphile_comments.sql
@ -0,0 +1,6 @@
 -- +goose Up
 COMMENT ON TABLE public.nodes IS E'@name NodeInfo';
 COMMENT ON TABLE eth.transaction_cids IS E'@name EthTransactionCids';
 COMMENT ON TABLE eth.header_cids IS E'@name EthHeaderCids';
 COMMENT ON COLUMN public.nodes.node_id IS E'@name ChainNodeID';
 COMMENT ON COLUMN eth.header_cids.node_id IS E'@name EthNodeID';
--- a/statediff/db/migrations/00012_potgraphile_triggers.sql
+++ b/statediff/db/migrations/00012_potgraphile_triggers.sql
@ -0,0 +1,69 @@
 -- +goose Up
 -- +goose StatementBegin
 CREATE FUNCTION eth.graphql_subscription() returns TRIGGER as $$
 declare
    table_name text = TG_ARGV[0];
    attribute text = TG_ARGV[1];
    id text;
 begin
    execute 'select $1.' || quote_ident(attribute)
        using new
        into id;
    perform pg_notify('postgraphile:' || table_name,
                      json_build_object(
                              '__node__', json_build_array(
                              table_name,
                              id
                          )
                          )::text
        );
    return new;
 end;
 $$ language plpgsql;
 -- +goose StatementEnd
 CREATE TRIGGER header_cids_ai
    after INSERT ON eth.header_cids
    for each row
    execute procedure eth.graphql_subscription('header_cids', 'id');
 CREATE TRIGGER receipt_cids_ai
    after INSERT ON eth.receipt_cids
    for each row
    execute procedure eth.graphql_subscription('receipt_cids', 'id');
 CREATE TRIGGER state_accounts_ai
    after INSERT ON eth.state_accounts
    for each row
    execute procedure eth.graphql_subscription('state_accounts', 'id');
 CREATE TRIGGER state_cids_ai
    after INSERT ON eth.state_cids
    for each row
    execute procedure eth.graphql_subscription('state_cids', 'id');
 CREATE TRIGGER storage_cids_ai
    after INSERT ON eth.storage_cids
    for each row
    execute procedure eth.graphql_subscription('storage_cids', 'id');
 CREATE TRIGGER transaction_cids_ai
    after INSERT ON eth.transaction_cids
    for each row
    execute procedure eth.graphql_subscription('transaction_cids', 'id');
 CREATE TRIGGER uncle_cids_ai
    after INSERT ON eth.uncle_cids
    for each row
    execute procedure eth.graphql_subscription('uncle_cids', 'id');
 -- +goose Down
 DROP TRIGGER uncle_cids_ai ON eth.uncle_cids;
 DROP TRIGGER transaction_cids_ai ON eth.transaction_cids;
 DROP TRIGGER storage_cids_ai ON eth.storage_cids;
 DROP TRIGGER state_cids_ai ON eth.state_cids;
 DROP TRIGGER state_accounts_ai ON eth.state_accounts;
 DROP TRIGGER receipt_cids_ai ON eth.receipt_cids;
 DROP TRIGGER header_cids_ai ON eth.header_cids;
 DROP FUNCTION eth.graphql_subscription();
--- a/statediff/db/migrations/00013_create_cid_indexes.sql
+++ b/statediff/db/migrations/00013_create_cid_indexes.sql
@ -0,0 +1,121 @@
 -- +goose Up
 -- header indexes
 CREATE INDEX block_number_index ON eth.header_cids USING brin (block_number);
 CREATE INDEX block_hash_index ON eth.header_cids USING btree (block_hash);
 CREATE INDEX header_cid_index ON eth.header_cids USING btree (cid);
 CREATE INDEX header_mh_index ON eth.header_cids USING btree (mh_key);
 CREATE INDEX state_root_index ON eth.header_cids USING btree (state_root);
 CREATE INDEX timestamp_index ON eth.header_cids USING brin (timestamp);
 -- transaction indexes
 CREATE INDEX tx_header_id_index ON eth.transaction_cids USING btree (header_id);
 CREATE INDEX tx_hash_index ON eth.transaction_cids USING btree (tx_hash);
 CREATE INDEX tx_cid_index ON eth.transaction_cids USING btree (cid);
 CREATE INDEX tx_mh_index ON eth.transaction_cids USING btree (mh_key);
 CREATE INDEX tx_dst_index ON eth.transaction_cids USING btree (dst);
 CREATE INDEX tx_src_index ON eth.transaction_cids USING btree (src);
 -- receipt indexes
 CREATE INDEX rct_tx_id_index ON eth.receipt_cids USING btree (tx_id);
 CREATE INDEX rct_cid_index ON eth.receipt_cids USING btree (cid);
 CREATE INDEX rct_mh_index ON eth.receipt_cids USING btree (mh_key);
 CREATE INDEX rct_contract_index ON eth.receipt_cids USING btree (contract);
 CREATE INDEX rct_contract_hash_index ON eth.receipt_cids USING btree (contract_hash);
 CREATE INDEX rct_topic0_index ON eth.receipt_cids USING gin (topic0s);
 CREATE INDEX rct_topic1_index ON eth.receipt_cids USING gin (topic1s);
 CREATE INDEX rct_topic2_index ON eth.receipt_cids USING gin (topic2s);
 CREATE INDEX rct_topic3_index ON eth.receipt_cids USING gin (topic3s);
 CREATE INDEX rct_log_contract_index ON eth.receipt_cids USING gin (log_contracts);
 -- state node indexes
 CREATE INDEX state_header_id_index ON eth.state_cids USING btree (header_id);
 CREATE INDEX state_leaf_key_index ON eth.state_cids USING btree (state_leaf_key);
 CREATE INDEX state_cid_index ON eth.state_cids USING btree (cid);
 CREATE INDEX state_mh_index ON eth.state_cids USING btree (mh_key);
 CREATE INDEX state_path_index ON eth.state_cids USING btree (state_path);
 -- storage node indexes
 CREATE INDEX storage_state_id_index ON eth.storage_cids USING btree (state_id);
 CREATE INDEX storage_leaf_key_index ON eth.storage_cids USING btree (storage_leaf_key);
 CREATE INDEX storage_cid_index ON eth.storage_cids USING btree (cid);
 CREATE INDEX storage_mh_index ON eth.storage_cids USING btree (mh_key);
 CREATE INDEX storage_path_index ON eth.storage_cids USING btree (storage_path);
 -- state accounts indexes
 CREATE INDEX account_state_id_index ON eth.state_accounts USING btree (state_id);
 CREATE INDEX storage_root_index ON eth.state_accounts USING btree (storage_root);
 -- +goose Down
 -- state account indexes
 DROP INDEX eth.storage_root_index;
 DROP INDEX eth.account_state_id_index;
 -- storage node indexes
 DROP INDEX eth.storage_path_index;
 DROP INDEX eth.storage_mh_index;
 DROP INDEX eth.storage_cid_index;
 DROP INDEX eth.storage_leaf_key_index;
 DROP INDEX eth.storage_state_id_index;
 -- state node indexes
 DROP INDEX eth.state_path_index;
 DROP INDEX eth.state_mh_index;
 DROP INDEX eth.state_cid_index;
 DROP INDEX eth.state_leaf_key_index;
 DROP INDEX eth.state_header_id_index;
 -- receipt indexes
 DROP INDEX eth.rct_log_contract_index;
 DROP INDEX eth.rct_topic3_index;
 DROP INDEX eth.rct_topic2_index;
 DROP INDEX eth.rct_topic1_index;
 DROP INDEX eth.rct_topic0_index;
 DROP INDEX eth.rct_contract_hash_index;
 DROP INDEX eth.rct_contract_index;
 DROP INDEX eth.rct_mh_index;
 DROP INDEX eth.rct_cid_index;
 DROP INDEX eth.rct_tx_id_index;
 -- transaction indexes
 DROP INDEX eth.tx_src_index;
 DROP INDEX eth.tx_dst_index;
 DROP INDEX eth.tx_mh_index;
 DROP INDEX eth.tx_cid_index;
 DROP INDEX eth.tx_hash_index;
 DROP INDEX eth.tx_header_id_index;
 -- header indexes
 DROP INDEX eth.timestamp_index;
 DROP INDEX eth.state_root_index;
 DROP INDEX eth.header_mh_index;
 DROP INDEX eth.header_cid_index;
 DROP INDEX eth.block_hash_index;
 DROP INDEX eth.block_number_index;
--- a/statediff/db/migrations/00014_create_stored_functions.sql
+++ b/statediff/db/migrations/00014_create_stored_functions.sql
@ -0,0 +1,158 @@
 -- +goose Up
 -- +goose StatementBegin
 -- returns if a storage node at the provided path was removed in the range > the provided height and <= the provided block hash
 CREATE OR REPLACE FUNCTION was_storage_removed(path BYTEA, height BIGINT, hash VARCHAR(66)) RETURNS BOOLEAN
 AS $$
 SELECT exists(SELECT 1
              FROM eth.storage_cids
                INNER JOIN eth.state_cids ON (storage_cids.state_id = state_cids.id)
                INNER JOIN eth.header_cids ON (state_cids.header_id = header_cids.id)
              WHERE storage_path = path
                AND block_number > height
                AND block_number <= (SELECT block_number
                                     FROM eth.header_cids
                                     WHERE block_hash = hash)
                AND storage_cids.node_type = 3
              LIMIT 1);
 $$ LANGUAGE SQL;
 -- +goose StatementEnd
 -- +goose StatementBegin
 -- returns if a state node at the provided path was removed in the range > the provided height and <= the provided block hash
 CREATE OR REPLACE FUNCTION was_state_removed(path BYTEA, height BIGINT, hash VARCHAR(66)) RETURNS BOOLEAN
 AS $$
 SELECT exists(SELECT 1
              FROM eth.state_cids
                INNER JOIN eth.header_cids ON (state_cids.header_id = header_cids.id)
              WHERE state_path = path
                AND block_number > height
                AND block_number <= (SELECT block_number
                                     FROM eth.header_cids
                                     WHERE block_hash = hash)
                AND state_cids.node_type = 3
              LIMIT 1);
 $$ LANGUAGE SQL;
 -- +goose StatementEnd
 -- +goose StatementBegin
 CREATE TYPE child_result AS (
    has_child BOOLEAN,
    children eth.header_cids[]
 );
 CREATE OR REPLACE FUNCTION has_child(hash VARCHAR(66), height BIGINT) RETURNS child_result AS
 $BODY$
 DECLARE
  child_height INT;
  temp_child eth.header_cids;
  new_child_result child_result;
 BEGIN
  child_height = height + 1;
  -- short circuit if there are no children
  SELECT exists(SELECT 1
              FROM eth.header_cids
              WHERE parent_hash = hash
                AND block_number = child_height
              LIMIT 1)
  INTO new_child_result.has_child;
  -- collect all the children for this header
  IF new_child_result.has_child THEN
    FOR temp_child IN
    SELECT * FROM eth.header_cids WHERE parent_hash = hash AND block_number = child_height
    LOOP
      new_child_result.children = array_append(new_child_result.children, temp_child);
    END LOOP;
  END IF;
 RETURN new_child_result;
 END
 $BODY$
 LANGUAGE 'plpgsql';
 -- +goose StatementEnd
 -- +goose StatementBegin
 CREATE OR REPLACE FUNCTION canonical_header_from_array(headers eth.header_cids[]) RETURNS eth.header_cids AS
 $BODY$
 DECLARE
  canonical_header eth.header_cids;
  canonical_child eth.header_cids;
  header eth.header_cids;
  current_child_result child_result;
  child_headers eth.header_cids[];
  current_header_with_child eth.header_cids;
  has_children_count INT DEFAULT 0;
 BEGIN
  -- for each header in the provided set
  FOREACH header IN ARRAY headers
  LOOP
    -- check if it has any children
    current_child_result = has_child(header.block_hash, header.block_number);
    IF current_child_result.has_child THEN
      -- if it does, take note
      has_children_count = has_children_count + 1;
      current_header_with_child = header;
      -- and add the children to the growing set of child headers
      child_headers = array_cat(child_headers, current_child_result.children);
    END IF;
  END LOOP;
  -- if none of the headers had children, none is more canonical than the other
  IF has_children_count = 0 THEN
    -- return the first one selected
    SELECT * INTO canonical_header FROM unnest(headers) LIMIT 1;
  -- if only one header had children, it can be considered the heaviest/canonical header of the set
  ELSIF has_children_count = 1 THEN
    -- return the only header with a child
    canonical_header = current_header_with_child;
  -- if there are multiple headers with children
  ELSE
    -- find the canonical header from the child set
    canonical_child = canonical_header_from_array(child_headers);
    -- the header that is parent to this header, is the canonical header at this level
    SELECT * INTO canonical_header FROM unnest(headers)
    WHERE block_hash = canonical_child.parent_hash;
  END IF;
  RETURN canonical_header;
 END
 $BODY$
 LANGUAGE 'plpgsql';
 -- +goose StatementEnd
 -- +goose StatementBegin
 CREATE OR REPLACE FUNCTION canonical_header_id(height BIGINT) RETURNS INTEGER AS
 $BODY$
 DECLARE
  canonical_header eth.header_cids;
  headers eth.header_cids[];
  header_count INT;
  temp_header eth.header_cids;
 BEGIN
  -- collect all headers at this height
  FOR temp_header IN
  SELECT * FROM eth.header_cids WHERE block_number = height
  LOOP
    headers = array_append(headers, temp_header);
  END LOOP;
  -- count the number of headers collected
  header_count = array_length(headers, 1);
  -- if we have less than 1 header, return NULL
  IF header_count IS NULL OR header_count < 1 THEN
    RETURN NULL;
  -- if we have one header, return its id
  ELSIF header_count = 1 THEN
    RETURN headers[1].id;
  -- if we have multiple headers we need to determine which one is canonical
  ELSE
    canonical_header = canonical_header_from_array(headers);
    RETURN canonical_header.id;
  END IF;
 END;
 $BODY$
 LANGUAGE 'plpgsql';
 -- +goose StatementEnd
 -- +goose Down
 DROP FUNCTION was_storage_removed;
 DROP FUNCTION was_state_removed;
 DROP FUNCTION canonical_header_id;
 DROP FUNCTION canonical_header_from_array;
 DROP FUNCTION has_child;
 DROP TYPE child_result;
--- a/statediff/db/schema.sql
+++ b/statediff/db/schema.sql
--- a/statediff/doc.go
+++ b/statediff/doc.go
@ -1,57 +0,0 @@
 // Copyright 2019 The go-ethereum Authors
 // This file is part of the go-ethereum library.
 //
 // The go-ethereum library is free software: you can redistribute it and/or modify
 // it under the terms of the GNU Lesser General Public License as published by
 // the Free Software Foundation, either version 3 of the License, or
 // (at your option) any later version.
 //
 // The go-ethereum library is distributed in the hope that it will be useful,
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 // GNU Lesser General Public License for more details.
 //
 // You should have received a copy of the GNU Lesser General Public License
 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
 /*
 Package statediff provides an auxiliary service that processes state diff objects from incoming chain events,
 relaying the objects to any rpc subscriptions.
 This work is adapted from work by Charles Crain at https://github.com/jpmorganchase/quorum/blob/9b7fd9af8082795eeeb6863d9746f12b82dd5078/statediff/statediff.go
 The service is spun up using the below CLI flags
 --statediff: boolean flag, turns on the service
 --statediff.streamblock: boolean flag, configures the service to associate and stream out the rest of the block data with the state diffs.
 --statediff.intermediatenodes: boolean flag, tells service to include intermediate (branch and extension) nodes; default (false) processes leaf nodes only.
 --statediff.watchedaddresses: string slice flag, used to limit the state diffing process to the given addresses. Usage: --statediff.watchedaddresses=addr1 --statediff.watchedaddresses=addr2 --statediff.watchedaddresses=addr3
 If you wish to use the websocket endpoint to subscribe to the statediff service, be sure to open up the Websocket RPC server with the `--ws` flag. The IPC-RPC server is turned on by default.
 The statediffing services works only with `--syncmode="full", but -importantly- does not require garbage collection to be turned off (does not require an archival node).
 e.g.
 $ ./geth --statediff --statediff.streamblock --ws --syncmode "full"
 This starts up the geth node in full sync mode, starts up the statediffing service, and opens up the websocket endpoint to subscribe to the service.
 Because the "streamblock" flag has been turned on, the service will strean out block data (headers, transactions, and receipts) along with the diffed state and storage leafs.
 Rpc subscriptions to the service can be created using the rpc.Client.Subscribe() method,
 with the "statediff" namespace, a statediff.Payload channel, and the name of the statediff api's rpc method- "stream".
 e.g.
 cli, _ := rpc.Dial("ipcPathOrWsURL")
 stateDiffPayloadChan := make(chan statediff.Payload, 20000)
 rpcSub, err := cli.Subscribe(context.Background(), "statediff", stateDiffPayloadChan, "stream"})
 for {
 	select {
 	case stateDiffPayload := <- stateDiffPayloadChan:
 		processPayload(stateDiffPayload)
 	case err := <- rpcSub.Err():
 		log.Error(err)
 	}
 }
 */
 package statediff
--- a/statediff/doc.md
+++ b/statediff/doc.md
@ -0,0 +1,215 @@
 # Statediff
 This package provides an auxiliary service that asynchronously processes state diff objects from chain events,
 either relaying the state objects to RPC subscribers or writing them directly to Postgres as IPLD objects.
 It also exposes RPC endpoints for fetching or writing to Postgres the state diff at a specific block height
 or for a specific block hash, this operates on historical block and state data and so depends on a complete state archive.
 Data is emitted in this differential format in order to make it feasible to IPLD-ize and index the *entire* Ethereum state
 (including intermediate state and storage trie nodes). If this state diff process is ran continuously from genesis,
 the entire state at any block can be materialized from the cumulative differentials up to that point.
 ## Statediff object
 A state diff `StateObject` is the collection of all the state and storage trie nodes that have been updated in a given block.
 For convenience, we also associate these nodes with the block number and hash, and optionally the set of code hashes and code for any
 contracts deployed in this block.
 A complete state diff `StateObject` will include all state and storage intermediate nodes, which is necessary for generating proofs and for
 traversing the tries.
 ```go
 // StateObject is a collection of state (and linked storage nodes) as well as the associated block number, block hash,
 // and a set of code hashes and their code
 type StateObject struct {
 	BlockNumber       *big.Int                `json:"blockNumber"     gencodec:"required"`
 	BlockHash         common.Hash             `json:"blockHash"       gencodec:"required"`
 	Nodes             []StateNode             `json:"nodes"           gencodec:"required"`
 	CodeAndCodeHashes []CodeAndCodeHash       `json:"codeMapping"`
 }
 // StateNode holds the data for a single state diff node
 type StateNode struct {
 	NodeType     NodeType      `json:"nodeType"        gencodec:"required"`
 	Path         []byte        `json:"path"            gencodec:"required"`
 	NodeValue    []byte        `json:"value"           gencodec:"required"`
 	StorageNodes []StorageNode `json:"storage"`
 	LeafKey      []byte        `json:"leafKey"`
 }
 // StorageNode holds the data for a single storage diff node
 type StorageNode struct {
 	NodeType  NodeType `json:"nodeType"        gencodec:"required"`
 	Path      []byte   `json:"path"            gencodec:"required"`
 	NodeValue []byte   `json:"value"           gencodec:"required"`
 	LeafKey   []byte   `json:"leafKey"`
 }
 // CodeAndCodeHash struct for holding codehash => code mappings
 // we can't use an actual map because they are not rlp serializable
 type CodeAndCodeHash struct {
 	Hash common.Hash `json:"codeHash"`
 	Code []byte      `json:"code"`
 }
 ```
 These objects are packed into a `Payload` structure which can additionally associate the `StateObject`
 with the block (header, uncles, and transactions), receipts, and total difficulty.
 This `Payload` encapsulates all of the differential data at a given block, and allows us to index the entire Ethereum data structure
 as hash-linked IPLD objects.
 ```go
 // Payload packages the data to send to state diff subscriptions
 type Payload struct {
 	BlockRlp        []byte   `json:"blockRlp"`
 	TotalDifficulty *big.Int `json:"totalDifficulty"`
 	ReceiptsRlp     []byte   `json:"receiptsRlp"`
 	StateObjectRlp  []byte   `json:"stateObjectRlp"    gencodec:"required"`
 	encoded []byte
 	err     error
 }
 ```
 ## Usage
 This state diffing service runs as an auxiliary service concurrent to the regular syncing process of the geth node.
 ### CLI configuration
 This service introduces a CLI flag namespace `statediff`
 `--statediff` flag is used to turn on the service
 `--statediff.writing` is used to tell the service to write state diff objects it produces from synced ChainEvents directly to a configured Postgres database
 `--statediff.db` is the connection string for the Postgres database to write to
 `--statediff.dbnodeid` is the node id to use in the Postgres database
 `--statediff.dbclientname` is the client name to use in the Postgres database
 The service can only operate in full sync mode (`--syncmode=full`), but only the historical RPC endpoints require an archive node (`--gcmode=archive`)
 e.g.
 `
 ./build/bin/geth --syncmode=full --gcmode=archive --statediff --statediff.writing --statediff.db=postgres://localhost:5432/vulcanize_testing?sslmode=disable --statediff.dbnodeid={nodeId} --statediff.dbclientname={dbClientName}
 `
 ### RPC endpoints
 The state diffing service exposes both a WS subscription endpoint, and a number of HTTP unary endpoints.
 Each of these endpoints requires a set of parameters provided by the caller
 ```go
 // Params is used to carry in parameters from subscribing/requesting clients configuration
 type Params struct {
 	IntermediateStateNodes   bool
 	IntermediateStorageNodes bool
 	IncludeBlock             bool
 	IncludeReceipts          bool
 	IncludeTD                bool
 	IncludeCode              bool
 	WatchedAddresses         []common.Address
 	WatchedStorageSlots      []common.Hash
 }
 ```
 Using these params we can tell the service whether to include state and/or storage intermediate nodes; whether
 to include the associated block (header, uncles, and transactions); whether to include the associated receipts;
 whether to include the total difficulty for this block; whether to include the set of code hashes and code for
 contracts deployed in this block; whether to limit the diffing process to a list of specific addresses; and/or
 whether to limit the diffing process to a list of specific storage slot keys.
 #### Subscription endpoint
 A websocket supporting RPC endpoint is exposed for subscribing to state diff `StateObjects` that come off the head of the chain while the geth node syncs.
 ```go
 // Stream is a subscription endpoint that fires off state diff payloads as they are created
 Stream(ctx context.Context, params Params) (*rpc.Subscription, error)
 ```
 To expose this endpoint the node needs to have the websocket server turned on (`--ws`),
 and the `statediff` namespace exposed (`--ws.api=statediff`).
 Go code subscriptions to this endpoint can be created using the `rpc.Client.Subscribe()` method,
 with the "statediff" namespace, a `statediff.Payload` channel, and the name of the statediff api's rpc method: "stream".
 e.g.
 ```go
 cli, err := rpc.Dial("ipcPathOrWsURL")
 if err != nil {
 	// handle error
 }
 stateDiffPayloadChan := make(chan statediff.Payload, 20000)
 methodName := "stream"
 params := statediff.Params{
    IncludeBlock:             true,
    IncludeTD:                true,
    IncludeReceipts:          true,
    IntermediateStorageNodes: true,
    IntermediateStateNodes:   true,
 }
 rpcSub, err := cli.Subscribe(context.Background(), statediff.APIName, stateDiffPayloadChan, methodName, params)
 if err != nil {
 	// handle error
 }
 for {
 	select {
 	case stateDiffPayload := <- stateDiffPayloadChan:
            // process the payload
        case err := <- rpcSub.Err():
    	    // handle rpc subscription error
        }
 }
 ```
 #### Unary endpoints
 The service also exposes unary RPC endpoints for retrieving the state diff `StateObject` for a specific block height/hash.
 ```go
 // StateDiffAt returns a state diff payload at the specific blockheight
 StateDiffAt(ctx context.Context, blockNumber uint64, params Params) (*Payload, error)
 // StateDiffFor returns a state diff payload for the specific blockhash
 StateDiffFor(ctx context.Context, blockHash common.Hash, params Params) (*Payload, error)
 ```
 To expose this endpoint the node needs to have the HTTP server turned on (`--http`),
 and the `statediff` namespace exposed (`--http.api=statediff`).
 ### Direct indexing into Postgres
 If `--statediff.writing` is set, the service will convert the state diff `StateObject` data into IPLD objects, persist them directly to Postgres,
 and generate secondary indexes around the IPLD data.
 The schema and migrations for this Postgres database are provided in `statediff/db/`.
 #### Postgres setup
 We use [pressly/goose](https://github.com/pressly/goose) as our Postgres migration manager.
 You can also load the Postgres schema directly into a database using
 `psql database_name < schema.sql`
 This will only work on a version 12.4 Postgres database.
 #### Schema overview
 Our Postgres schemas are built around a single IPFS backing Postgres IPLD blockstore table (`public.blocks`) that conforms with [go-ds-sql](https://github.com/ipfs/go-ds-sql/blob/master/postgres/postgres.go).
 All IPLD objects are stored in this table, where `key` is the blockstore-prefixed multihash key for the IPLD object and `data` contains
 the bytes for the IPLD block (in the case of all Ethereum IPLDs, this is the RLP byte encoding of the Ethereum object).
 The IPLD objects in this table can be traversed using an IPLD DAG interface, but since this table only maps multihash to raw IPLD object
 it is not particularly useful for searching through the data by looking up Ethereum objects by their constituent fields
 (e.g. by block number, tx source/recipient, state/storage trie node path). To improve the accessibility of these objects
 we create an Ethereum [advanced data layout](https://github.com/ipld/specs#schemas-and-advanced-data-layouts) (ADL) by generating secondary
 indexes on top of the raw IPLDs in other Postgres tables.
 These secondary index tables fall under the `eth` schema and follow an `{objectType}_cids` naming convention.
 These tables provide a view into individual fields of the underlying Ethereum IPLD objects, allowing lookups on these fields, and reference the raw IPLD objects stored in `public.blocks`
 by foreign keys to their multihash keys.
 Additionally, these tables maintain the hash-linked nature of Ethereum objects to one another. E.g. a storage trie node entry in the `storage_cids`
 table contains a `state_id` foreign key which references the `id` for the `state_cids` entry that contains the state leaf node for the contract that storage node belongs to,
 and in turn that `state_cids` entry contains a `header_id` foreign key which references the `id` of the `header_cids` entry that contains the header for the block these state and storage nodes were updated (diffed).
 ### Optimization
 On mainnet this process is extremely IO intensive and requires significant resources to allow it to keep up with the head of the chain.
 The state diff processing time for a specific block is dependent on the number and complexity of the state changes that occur in a block and
 the number of updated state nodes that are available in the in-memory cache vs must be retrieved from disc.
 If memory permits, one means of improving the efficiency of this process is to increase the in-memory trie cache allocation.
 This can be done by increasing the overall `--cache` allocation and/or by increasing the % of the cache allocated to trie
 usage with `--cache.trie`.