From d9d05695e62bf833a0d5e3b51e99b18f2eea6e72 Mon Sep 17 00:00:00 2001
From: Ian Norden <iannordenn@gmail.com>
Date: Mon, 5 Apr 2021 12:47:28 -0500
Subject: [PATCH 1/3] include Postgres migrations and schema

---
 .../00001_create_ipfs_blocks_table.sql        |    8 +
 .../migrations/00002_create_nodes_table.sql   |   13 +
 .../db/migrations/00003_create_eth_schema.sql |    5 +
 .../00004_create_eth_header_cids_table.sql    |   23 +
 .../00005_create_eth_uncle_cids_table.sql     |   14 +
 ...0006_create_eth_transaction_cids_table.sql |   16 +
 .../00007_create_eth_receipt_cids_table.sql   |   20 +
 .../00008_create_eth_state_cids_table.sql     |   15 +
 .../00009_create_eth_storage_cids_table.sql   |   15 +
 .../00010_create_eth_state_accouts_table.sql  |   13 +
 .../00011_create_postgraphile_comments.sql    |    6 +
 .../migrations/00012_potgraphile_triggers.sql |   69 +
 .../migrations/00013_create_cid_indexes.sql   |  121 ++
 .../00014_create_stored_functions.sql         |  158 +++
 statediff/db/schema.sql                       | 1201 +++++++++++++++++
 15 files changed, 1697 insertions(+)
 create mode 100644 statediff/db/migrations/00001_create_ipfs_blocks_table.sql
 create mode 100644 statediff/db/migrations/00002_create_nodes_table.sql
 create mode 100644 statediff/db/migrations/00003_create_eth_schema.sql
 create mode 100644 statediff/db/migrations/00004_create_eth_header_cids_table.sql
 create mode 100644 statediff/db/migrations/00005_create_eth_uncle_cids_table.sql
 create mode 100644 statediff/db/migrations/00006_create_eth_transaction_cids_table.sql
 create mode 100644 statediff/db/migrations/00007_create_eth_receipt_cids_table.sql
 create mode 100644 statediff/db/migrations/00008_create_eth_state_cids_table.sql
 create mode 100644 statediff/db/migrations/00009_create_eth_storage_cids_table.sql
 create mode 100644 statediff/db/migrations/00010_create_eth_state_accouts_table.sql
 create mode 100644 statediff/db/migrations/00011_create_postgraphile_comments.sql
 create mode 100644 statediff/db/migrations/00012_potgraphile_triggers.sql
 create mode 100644 statediff/db/migrations/00013_create_cid_indexes.sql
 create mode 100644 statediff/db/migrations/00014_create_stored_functions.sql
 create mode 100644 statediff/db/schema.sql

diff --git a/statediff/db/migrations/00001_create_ipfs_blocks_table.sql b/statediff/db/migrations/00001_create_ipfs_blocks_table.sql
new file mode 100644
index 000000000..6e3941e04
--- /dev/null
+++ b/statediff/db/migrations/00001_create_ipfs_blocks_table.sql
@@ -0,0 +1,8 @@
+-- +goose Up
+CREATE TABLE IF NOT EXISTS public.blocks (
+  key TEXT UNIQUE NOT NULL,
+  data BYTEA NOT NULL
+);
+
+-- +goose Down
+DROP TABLE public.blocks;
diff --git a/statediff/db/migrations/00002_create_nodes_table.sql b/statediff/db/migrations/00002_create_nodes_table.sql
new file mode 100644
index 000000000..e70c144bb
--- /dev/null
+++ b/statediff/db/migrations/00002_create_nodes_table.sql
@@ -0,0 +1,13 @@
+-- +goose Up
+CREATE TABLE nodes (
+  id            SERIAL PRIMARY KEY,
+  client_name   VARCHAR,
+  genesis_block VARCHAR(66),
+  network_id    VARCHAR,
+  node_id       VARCHAR(128),
+  chain_id      INTEGER DEFAULT 1,
+  CONSTRAINT node_uc UNIQUE (genesis_block, network_id, node_id, chain_id)
+);
+
+-- +goose Down
+DROP TABLE nodes;
diff --git a/statediff/db/migrations/00003_create_eth_schema.sql b/statediff/db/migrations/00003_create_eth_schema.sql
new file mode 100644
index 000000000..84d6f4b68
--- /dev/null
+++ b/statediff/db/migrations/00003_create_eth_schema.sql
@@ -0,0 +1,5 @@
+-- +goose Up
+CREATE SCHEMA eth;
+
+-- +goose Down
+DROP SCHEMA eth;
\ No newline at end of file
diff --git a/statediff/db/migrations/00004_create_eth_header_cids_table.sql b/statediff/db/migrations/00004_create_eth_header_cids_table.sql
new file mode 100644
index 000000000..339eb427b
--- /dev/null
+++ b/statediff/db/migrations/00004_create_eth_header_cids_table.sql
@@ -0,0 +1,23 @@
+-- +goose Up
+CREATE TABLE eth.header_cids (
+  id                    SERIAL PRIMARY KEY,
+  block_number          BIGINT NOT NULL,
+  block_hash            VARCHAR(66) NOT NULL,
+  parent_hash           VARCHAR(66) NOT NULL,
+  cid                   TEXT NOT NULL,
+  mh_key                TEXT NOT NULL REFERENCES public.blocks (key) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
+  td                    NUMERIC NOT NULL,
+  node_id               INTEGER NOT NULL REFERENCES nodes (id) ON DELETE CASCADE,
+  reward                NUMERIC NOT NULL,
+  state_root            VARCHAR(66) NOT NULL,
+  tx_root               VARCHAR(66) NOT NULL,
+  receipt_root          VARCHAR(66) NOT NULL,
+  uncle_root            VARCHAR(66) NOT NULL,
+  bloom                 BYTEA NOT NULL,
+  timestamp             NUMERIC NOT NULL,
+  times_validated       INTEGER NOT NULL DEFAULT 1,
+  UNIQUE (block_number, block_hash)
+);
+
+-- +goose Down
+DROP TABLE eth.header_cids;
\ No newline at end of file
diff --git a/statediff/db/migrations/00005_create_eth_uncle_cids_table.sql b/statediff/db/migrations/00005_create_eth_uncle_cids_table.sql
new file mode 100644
index 000000000..c46cafb9c
--- /dev/null
+++ b/statediff/db/migrations/00005_create_eth_uncle_cids_table.sql
@@ -0,0 +1,14 @@
+-- +goose Up
+CREATE TABLE eth.uncle_cids (
+  id                    SERIAL PRIMARY KEY,
+  header_id             INTEGER NOT NULL REFERENCES eth.header_cids (id) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
+  block_hash            VARCHAR(66) NOT NULL,
+  parent_hash           VARCHAR(66) NOT NULL,
+  cid                   TEXT NOT NULL,
+  mh_key                TEXT NOT NULL REFERENCES public.blocks (key) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
+  reward                NUMERIC NOT NULL,
+  UNIQUE (header_id, block_hash)
+);
+
+-- +goose Down
+DROP TABLE eth.uncle_cids;
\ No newline at end of file
diff --git a/statediff/db/migrations/00006_create_eth_transaction_cids_table.sql b/statediff/db/migrations/00006_create_eth_transaction_cids_table.sql
new file mode 100644
index 000000000..8be504f37
--- /dev/null
+++ b/statediff/db/migrations/00006_create_eth_transaction_cids_table.sql
@@ -0,0 +1,16 @@
+-- +goose Up
+CREATE TABLE eth.transaction_cids (
+  id                    SERIAL PRIMARY KEY,
+  header_id             INTEGER NOT NULL REFERENCES eth.header_cids (id) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
+  tx_hash               VARCHAR(66) NOT NULL,
+  index                 INTEGER NOT NULL,
+  cid                   TEXT NOT NULL,
+  mh_key                TEXT NOT NULL REFERENCES public.blocks (key) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
+  dst                   VARCHAR(66) NOT NULL,
+  src                   VARCHAR(66) NOT NULL,
+  tx_data               BYTEA,
+  UNIQUE (header_id, tx_hash)
+);
+
+-- +goose Down
+DROP TABLE eth.transaction_cids;
diff --git a/statediff/db/migrations/00007_create_eth_receipt_cids_table.sql b/statediff/db/migrations/00007_create_eth_receipt_cids_table.sql
new file mode 100644
index 000000000..e8d0e27d6
--- /dev/null
+++ b/statediff/db/migrations/00007_create_eth_receipt_cids_table.sql
@@ -0,0 +1,20 @@
+-- +goose Up
+CREATE TABLE eth.receipt_cids (
+  id                    SERIAL PRIMARY KEY,
+  tx_id                 INTEGER NOT NULL REFERENCES eth.transaction_cids (id) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
+  cid                   TEXT NOT NULL,
+  mh_key                TEXT NOT NULL REFERENCES public.blocks (key) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
+  contract              VARCHAR(66),
+  contract_hash         VARCHAR(66),
+  topic0s               VARCHAR(66)[],
+  topic1s               VARCHAR(66)[],
+  topic2s               VARCHAR(66)[],
+  topic3s               VARCHAR(66)[],
+  log_contracts         VARCHAR(66)[],
+  post_state            VARCHAR(66),
+  post_status           INTEGER,
+  UNIQUE (tx_id)
+);
+
+-- +goose Down
+DROP TABLE eth.receipt_cids;
\ No newline at end of file
diff --git a/statediff/db/migrations/00008_create_eth_state_cids_table.sql b/statediff/db/migrations/00008_create_eth_state_cids_table.sql
new file mode 100644
index 000000000..ccece9641
--- /dev/null
+++ b/statediff/db/migrations/00008_create_eth_state_cids_table.sql
@@ -0,0 +1,15 @@
+-- +goose Up
+CREATE TABLE eth.state_cids (
+  id                    BIGSERIAL PRIMARY KEY,
+  header_id             INTEGER NOT NULL REFERENCES eth.header_cids (id) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
+  state_leaf_key        VARCHAR(66),
+  cid                   TEXT NOT NULL,
+  mh_key                TEXT NOT NULL REFERENCES public.blocks (key) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
+  state_path            BYTEA,
+  node_type             INTEGER NOT NULL,
+  diff                  BOOLEAN NOT NULL DEFAULT FALSE,
+  UNIQUE (header_id, state_path)
+);
+
+-- +goose Down
+DROP TABLE eth.state_cids;
\ No newline at end of file
diff --git a/statediff/db/migrations/00009_create_eth_storage_cids_table.sql b/statediff/db/migrations/00009_create_eth_storage_cids_table.sql
new file mode 100644
index 000000000..954fb465d
--- /dev/null
+++ b/statediff/db/migrations/00009_create_eth_storage_cids_table.sql
@@ -0,0 +1,15 @@
+-- +goose Up
+CREATE TABLE eth.storage_cids (
+  id                    BIGSERIAL PRIMARY KEY,
+  state_id              BIGINT NOT NULL REFERENCES eth.state_cids (id) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
+  storage_leaf_key      VARCHAR(66),
+  cid                   TEXT NOT NULL,
+  mh_key                TEXT NOT NULL REFERENCES public.blocks (key) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
+  storage_path          BYTEA,
+  node_type             INTEGER NOT NULL,
+  diff                  BOOLEAN NOT NULL DEFAULT FALSE,
+  UNIQUE (state_id, storage_path)
+);
+
+-- +goose Down
+DROP TABLE eth.storage_cids;
\ No newline at end of file
diff --git a/statediff/db/migrations/00010_create_eth_state_accouts_table.sql b/statediff/db/migrations/00010_create_eth_state_accouts_table.sql
new file mode 100644
index 000000000..8a7e87083
--- /dev/null
+++ b/statediff/db/migrations/00010_create_eth_state_accouts_table.sql
@@ -0,0 +1,13 @@
+-- +goose Up
+CREATE TABLE eth.state_accounts (
+  id                    SERIAL PRIMARY KEY,
+  state_id              BIGINT NOT NULL REFERENCES eth.state_cids (id) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
+  balance               NUMERIC NOT NULL,
+  nonce                 INTEGER NOT NULL,
+  code_hash             BYTEA NOT NULL,
+  storage_root          VARCHAR(66) NOT NULL,
+  UNIQUE (state_id)
+);
+
+-- +goose Down
+DROP TABLE eth.state_accounts;
\ No newline at end of file
diff --git a/statediff/db/migrations/00011_create_postgraphile_comments.sql b/statediff/db/migrations/00011_create_postgraphile_comments.sql
new file mode 100644
index 000000000..16a051f40
--- /dev/null
+++ b/statediff/db/migrations/00011_create_postgraphile_comments.sql
@@ -0,0 +1,6 @@
+-- +goose Up
+COMMENT ON TABLE public.nodes IS E'@name NodeInfo';
+COMMENT ON TABLE eth.transaction_cids IS E'@name EthTransactionCids';
+COMMENT ON TABLE eth.header_cids IS E'@name EthHeaderCids';
+COMMENT ON COLUMN public.nodes.node_id IS E'@name ChainNodeID';
+COMMENT ON COLUMN eth.header_cids.node_id IS E'@name EthNodeID';
diff --git a/statediff/db/migrations/00012_potgraphile_triggers.sql b/statediff/db/migrations/00012_potgraphile_triggers.sql
new file mode 100644
index 000000000..34705337e
--- /dev/null
+++ b/statediff/db/migrations/00012_potgraphile_triggers.sql
@@ -0,0 +1,69 @@
+-- +goose Up
+-- +goose StatementBegin
+CREATE FUNCTION eth.graphql_subscription() returns TRIGGER as $$
+declare
+    table_name text = TG_ARGV[0];
+    attribute text = TG_ARGV[1];
+    id text;
+begin
+    execute 'select $1.' || quote_ident(attribute)
+        using new
+        into id;
+    perform pg_notify('postgraphile:' || table_name,
+                      json_build_object(
+                              '__node__', json_build_array(
+                              table_name,
+                              id
+                          )
+                          )::text
+        );
+    return new;
+end;
+$$ language plpgsql;
+-- +goose StatementEnd
+
+CREATE TRIGGER header_cids_ai
+    after INSERT ON eth.header_cids
+    for each row
+    execute procedure eth.graphql_subscription('header_cids', 'id');
+
+CREATE TRIGGER receipt_cids_ai
+    after INSERT ON eth.receipt_cids
+    for each row
+    execute procedure eth.graphql_subscription('receipt_cids', 'id');
+
+CREATE TRIGGER state_accounts_ai
+    after INSERT ON eth.state_accounts
+    for each row
+    execute procedure eth.graphql_subscription('state_accounts', 'id');
+
+CREATE TRIGGER state_cids_ai
+    after INSERT ON eth.state_cids
+    for each row
+    execute procedure eth.graphql_subscription('state_cids', 'id');
+
+CREATE TRIGGER storage_cids_ai
+    after INSERT ON eth.storage_cids
+    for each row
+    execute procedure eth.graphql_subscription('storage_cids', 'id');
+
+CREATE TRIGGER transaction_cids_ai
+    after INSERT ON eth.transaction_cids
+    for each row
+    execute procedure eth.graphql_subscription('transaction_cids', 'id');
+
+CREATE TRIGGER uncle_cids_ai
+    after INSERT ON eth.uncle_cids
+    for each row
+    execute procedure eth.graphql_subscription('uncle_cids', 'id');
+
+-- +goose Down
+DROP TRIGGER uncle_cids_ai ON eth.uncle_cids;
+DROP TRIGGER transaction_cids_ai ON eth.transaction_cids;
+DROP TRIGGER storage_cids_ai ON eth.storage_cids;
+DROP TRIGGER state_cids_ai ON eth.state_cids;
+DROP TRIGGER state_accounts_ai ON eth.state_accounts;
+DROP TRIGGER receipt_cids_ai ON eth.receipt_cids;
+DROP TRIGGER header_cids_ai ON eth.header_cids;
+
+DROP FUNCTION eth.graphql_subscription();
diff --git a/statediff/db/migrations/00013_create_cid_indexes.sql b/statediff/db/migrations/00013_create_cid_indexes.sql
new file mode 100644
index 000000000..bc38c5a26
--- /dev/null
+++ b/statediff/db/migrations/00013_create_cid_indexes.sql
@@ -0,0 +1,121 @@
+-- +goose Up
+-- header indexes
+CREATE INDEX block_number_index ON eth.header_cids USING brin (block_number);
+
+CREATE INDEX block_hash_index ON eth.header_cids USING btree (block_hash);
+
+CREATE INDEX header_cid_index ON eth.header_cids USING btree (cid);
+
+CREATE INDEX header_mh_index ON eth.header_cids USING btree (mh_key);
+
+CREATE INDEX state_root_index ON eth.header_cids USING btree (state_root);
+
+CREATE INDEX timestamp_index ON eth.header_cids USING brin (timestamp);
+
+-- transaction indexes
+CREATE INDEX tx_header_id_index ON eth.transaction_cids USING btree (header_id);
+
+CREATE INDEX tx_hash_index ON eth.transaction_cids USING btree (tx_hash);
+
+CREATE INDEX tx_cid_index ON eth.transaction_cids USING btree (cid);
+
+CREATE INDEX tx_mh_index ON eth.transaction_cids USING btree (mh_key);
+
+CREATE INDEX tx_dst_index ON eth.transaction_cids USING btree (dst);
+
+CREATE INDEX tx_src_index ON eth.transaction_cids USING btree (src);
+
+-- receipt indexes
+CREATE INDEX rct_tx_id_index ON eth.receipt_cids USING btree (tx_id);
+
+CREATE INDEX rct_cid_index ON eth.receipt_cids USING btree (cid);
+
+CREATE INDEX rct_mh_index ON eth.receipt_cids USING btree (mh_key);
+
+CREATE INDEX rct_contract_index ON eth.receipt_cids USING btree (contract);
+
+CREATE INDEX rct_contract_hash_index ON eth.receipt_cids USING btree (contract_hash);
+
+CREATE INDEX rct_topic0_index ON eth.receipt_cids USING gin (topic0s);
+
+CREATE INDEX rct_topic1_index ON eth.receipt_cids USING gin (topic1s);
+
+CREATE INDEX rct_topic2_index ON eth.receipt_cids USING gin (topic2s);
+
+CREATE INDEX rct_topic3_index ON eth.receipt_cids USING gin (topic3s);
+
+CREATE INDEX rct_log_contract_index ON eth.receipt_cids USING gin (log_contracts);
+
+-- state node indexes
+CREATE INDEX state_header_id_index ON eth.state_cids USING btree (header_id);
+
+CREATE INDEX state_leaf_key_index ON eth.state_cids USING btree (state_leaf_key);
+
+CREATE INDEX state_cid_index ON eth.state_cids USING btree (cid);
+
+CREATE INDEX state_mh_index ON eth.state_cids USING btree (mh_key);
+
+CREATE INDEX state_path_index ON eth.state_cids USING btree (state_path);
+
+-- storage node indexes
+CREATE INDEX storage_state_id_index ON eth.storage_cids USING btree (state_id);
+
+CREATE INDEX storage_leaf_key_index ON eth.storage_cids USING btree (storage_leaf_key);
+
+CREATE INDEX storage_cid_index ON eth.storage_cids USING btree (cid);
+
+CREATE INDEX storage_mh_index ON eth.storage_cids USING btree (mh_key);
+
+CREATE INDEX storage_path_index ON eth.storage_cids USING btree (storage_path);
+
+-- state accounts indexes
+CREATE INDEX account_state_id_index ON eth.state_accounts USING btree (state_id);
+
+CREATE INDEX storage_root_index ON eth.state_accounts USING btree (storage_root);
+
+-- +goose Down
+-- state account indexes
+DROP INDEX eth.storage_root_index;
+DROP INDEX eth.account_state_id_index;
+
+-- storage node indexes
+DROP INDEX eth.storage_path_index;
+DROP INDEX eth.storage_mh_index;
+DROP INDEX eth.storage_cid_index;
+DROP INDEX eth.storage_leaf_key_index;
+DROP INDEX eth.storage_state_id_index;
+
+-- state node indexes
+DROP INDEX eth.state_path_index;
+DROP INDEX eth.state_mh_index;
+DROP INDEX eth.state_cid_index;
+DROP INDEX eth.state_leaf_key_index;
+DROP INDEX eth.state_header_id_index;
+
+-- receipt indexes
+DROP INDEX eth.rct_log_contract_index;
+DROP INDEX eth.rct_topic3_index;
+DROP INDEX eth.rct_topic2_index;
+DROP INDEX eth.rct_topic1_index;
+DROP INDEX eth.rct_topic0_index;
+DROP INDEX eth.rct_contract_hash_index;
+DROP INDEX eth.rct_contract_index;
+DROP INDEX eth.rct_mh_index;
+DROP INDEX eth.rct_cid_index;
+DROP INDEX eth.rct_tx_id_index;
+
+-- transaction indexes
+DROP INDEX eth.tx_src_index;
+DROP INDEX eth.tx_dst_index;
+DROP INDEX eth.tx_mh_index;
+DROP INDEX eth.tx_cid_index;
+DROP INDEX eth.tx_hash_index;
+DROP INDEX eth.tx_header_id_index;
+
+-- header indexes
+DROP INDEX eth.timestamp_index;
+DROP INDEX eth.state_root_index;
+DROP INDEX eth.header_mh_index;
+DROP INDEX eth.header_cid_index;
+DROP INDEX eth.block_hash_index;
+DROP INDEX eth.block_number_index;
\ No newline at end of file
diff --git a/statediff/db/migrations/00014_create_stored_functions.sql b/statediff/db/migrations/00014_create_stored_functions.sql
new file mode 100644
index 000000000..8db60a301
--- /dev/null
+++ b/statediff/db/migrations/00014_create_stored_functions.sql
@@ -0,0 +1,158 @@
+-- +goose Up
+-- +goose StatementBegin
+-- returns if a storage node at the provided path was removed in the range > the provided height and <= the provided block hash
+CREATE OR REPLACE FUNCTION was_storage_removed(path BYTEA, height BIGINT, hash VARCHAR(66)) RETURNS BOOLEAN
+AS $$
+SELECT exists(SELECT 1
+              FROM eth.storage_cids
+                INNER JOIN eth.state_cids ON (storage_cids.state_id = state_cids.id)
+                INNER JOIN eth.header_cids ON (state_cids.header_id = header_cids.id)
+              WHERE storage_path = path
+                AND block_number > height
+                AND block_number <= (SELECT block_number
+                                     FROM eth.header_cids
+                                     WHERE block_hash = hash)
+                AND storage_cids.node_type = 3
+              LIMIT 1);
+$$ LANGUAGE SQL;
+-- +goose StatementEnd
+
+-- +goose StatementBegin
+-- returns if a state node at the provided path was removed in the range > the provided height and <= the provided block hash
+CREATE OR REPLACE FUNCTION was_state_removed(path BYTEA, height BIGINT, hash VARCHAR(66)) RETURNS BOOLEAN
+AS $$
+SELECT exists(SELECT 1
+              FROM eth.state_cids
+                INNER JOIN eth.header_cids ON (state_cids.header_id = header_cids.id)
+              WHERE state_path = path
+                AND block_number > height
+                AND block_number <= (SELECT block_number
+                                     FROM eth.header_cids
+                                     WHERE block_hash = hash)
+                AND state_cids.node_type = 3
+              LIMIT 1);
+$$ LANGUAGE SQL;
+-- +goose StatementEnd
+
+-- +goose StatementBegin
+CREATE TYPE child_result AS (
+    has_child BOOLEAN,
+    children eth.header_cids[]
+);
+
+CREATE OR REPLACE FUNCTION has_child(hash VARCHAR(66), height BIGINT) RETURNS child_result AS
+$BODY$
+DECLARE
+  child_height INT;
+  temp_child eth.header_cids;
+  new_child_result child_result;
+BEGIN
+  child_height = height + 1;
+  -- short circuit if there are no children
+  SELECT exists(SELECT 1
+              FROM eth.header_cids
+              WHERE parent_hash = hash
+                AND block_number = child_height
+              LIMIT 1)
+  INTO new_child_result.has_child;
+  -- collect all the children for this header
+  IF new_child_result.has_child THEN
+    FOR temp_child IN
+    SELECT * FROM eth.header_cids WHERE parent_hash = hash AND block_number = child_height
+    LOOP
+      new_child_result.children = array_append(new_child_result.children, temp_child);
+    END LOOP;
+  END IF;
+RETURN new_child_result;
+END
+$BODY$
+LANGUAGE 'plpgsql';
+-- +goose StatementEnd
+
+-- +goose StatementBegin
+CREATE OR REPLACE FUNCTION canonical_header_from_array(headers eth.header_cids[]) RETURNS eth.header_cids AS
+$BODY$
+DECLARE
+  canonical_header eth.header_cids;
+  canonical_child eth.header_cids;
+  header eth.header_cids;
+  current_child_result child_result;
+  child_headers eth.header_cids[];
+  current_header_with_child eth.header_cids;
+  has_children_count INT DEFAULT 0;
+BEGIN
+  -- for each header in the provided set
+  FOREACH header IN ARRAY headers
+  LOOP
+    -- check if it has any children
+    current_child_result = has_child(header.block_hash, header.block_number);
+    IF current_child_result.has_child THEN
+      -- if it does, take note
+      has_children_count = has_children_count + 1;
+      current_header_with_child = header;
+      -- and add the children to the growing set of child headers
+      child_headers = array_cat(child_headers, current_child_result.children);
+    END IF;
+  END LOOP;
+  -- if none of the headers had children, none is more canonical than the other
+  IF has_children_count = 0 THEN
+    -- return the first one selected
+    SELECT * INTO canonical_header FROM unnest(headers) LIMIT 1;
+  -- if only one header had children, it can be considered the heaviest/canonical header of the set
+  ELSIF has_children_count = 1 THEN
+    -- return the only header with a child
+    canonical_header = current_header_with_child;
+  -- if there are multiple headers with children
+  ELSE
+    -- find the canonical header from the child set
+    canonical_child = canonical_header_from_array(child_headers);
+    -- the header that is parent to this header, is the canonical header at this level
+    SELECT * INTO canonical_header FROM unnest(headers)
+    WHERE block_hash = canonical_child.parent_hash;
+  END IF;
+  RETURN canonical_header;
+END
+$BODY$
+LANGUAGE 'plpgsql';
+-- +goose StatementEnd
+
+-- +goose StatementBegin
+CREATE OR REPLACE FUNCTION canonical_header_id(height BIGINT) RETURNS INTEGER AS
+$BODY$
+DECLARE
+  canonical_header eth.header_cids;
+  headers eth.header_cids[];
+  header_count INT;
+  temp_header eth.header_cids;
+BEGIN
+  -- collect all headers at this height
+  FOR temp_header IN
+  SELECT * FROM eth.header_cids WHERE block_number = height
+  LOOP
+    headers = array_append(headers, temp_header);
+  END LOOP;
+  -- count the number of headers collected
+  header_count = array_length(headers, 1);
+  -- if we have less than 1 header, return NULL
+  IF header_count IS NULL OR header_count < 1 THEN
+    RETURN NULL;
+  -- if we have one header, return its id
+  ELSIF header_count = 1 THEN
+    RETURN headers[1].id;
+  -- if we have multiple headers we need to determine which one is canonical
+  ELSE
+    canonical_header = canonical_header_from_array(headers);
+    RETURN canonical_header.id;
+  END IF;
+END;
+$BODY$
+LANGUAGE 'plpgsql';
+-- +goose StatementEnd
+
+-- +goose Down
+DROP FUNCTION was_storage_removed;
+DROP FUNCTION was_state_removed;
+DROP FUNCTION canonical_header_id;
+DROP FUNCTION canonical_header_from_array;
+DROP FUNCTION has_child;
+DROP TYPE child_result;
\ No newline at end of file
diff --git a/statediff/db/schema.sql b/statediff/db/schema.sql
new file mode 100644
index 000000000..f80542a59
--- /dev/null
+++ b/statediff/db/schema.sql
@@ -0,0 +1,1201 @@
+--
+-- PostgreSQL database dump
+--
+
+-- Dumped from database version 12.4
+-- Dumped by pg_dump version 12.4
+
+SET statement_timeout = 0;
+SET lock_timeout = 0;
+SET idle_in_transaction_session_timeout = 0;
+SET client_encoding = 'UTF8';
+SET standard_conforming_strings = on;
+SELECT pg_catalog.set_config('search_path', '', false);
+SET check_function_bodies = false;
+SET xmloption = content;
+SET client_min_messages = warning;
+SET row_security = off;
+
+--
+-- Name: eth; Type: SCHEMA; Schema: -; Owner: -
+--
+
+CREATE SCHEMA eth;
+
+
+SET default_tablespace = '';
+
+SET default_table_access_method = heap;
+
+--
+-- Name: header_cids; Type: TABLE; Schema: eth; Owner: -
+--
+
+CREATE TABLE eth.header_cids (
+    id integer NOT NULL,
+    block_number bigint NOT NULL,
+    block_hash character varying(66) NOT NULL,
+    parent_hash character varying(66) NOT NULL,
+    cid text NOT NULL,
+    mh_key text NOT NULL,
+    td numeric NOT NULL,
+    node_id integer NOT NULL,
+    reward numeric NOT NULL,
+    state_root character varying(66) NOT NULL,
+    tx_root character varying(66) NOT NULL,
+    receipt_root character varying(66) NOT NULL,
+    uncle_root character varying(66) NOT NULL,
+    bloom bytea NOT NULL,
+    "timestamp" numeric NOT NULL,
+    times_validated integer DEFAULT 1 NOT NULL
+);
+
+
+--
+-- Name: TABLE header_cids; Type: COMMENT; Schema: eth; Owner: -
+--
+
+COMMENT ON TABLE eth.header_cids IS '@name EthHeaderCids';
+
+
+--
+-- Name: COLUMN header_cids.node_id; Type: COMMENT; Schema: eth; Owner: -
+--
+
+COMMENT ON COLUMN eth.header_cids.node_id IS '@name EthNodeID';
+
+
+--
+-- Name: child_result; Type: TYPE; Schema: public; Owner: -
+--
+
+CREATE TYPE public.child_result AS (
+	has_child boolean,
+	children eth.header_cids[]
+);
+
+
+--
+-- Name: graphql_subscription(); Type: FUNCTION; Schema: eth; Owner: -
+--
+
+CREATE FUNCTION eth.graphql_subscription() RETURNS trigger
+    LANGUAGE plpgsql
+    AS $_$
+declare
+    table_name text = TG_ARGV[0];
+    attribute text = TG_ARGV[1];
+    id text;
+begin
+    execute 'select $1.' || quote_ident(attribute)
+        using new
+        into id;
+    perform pg_notify('postgraphile:' || table_name,
+                      json_build_object(
+                              '__node__', json_build_array(
+                              table_name,
+                              id
+                          )
+                          )::text
+        );
+    return new;
+end;
+$_$;
+
+
+--
+-- Name: canonical_header_from_array(eth.header_cids[]); Type: FUNCTION; Schema: public; Owner: -
+--
+
+CREATE FUNCTION public.canonical_header_from_array(headers eth.header_cids[]) RETURNS eth.header_cids
+    LANGUAGE plpgsql
+    AS $$
+DECLARE
+canonical_header eth.header_cids;
+  canonical_child eth.header_cids;
+  header eth.header_cids;
+  current_child_result child_result;
+  child_headers eth.header_cids[];
+  current_header_with_child eth.header_cids;
+  has_children_count INT DEFAULT 0;
+BEGIN
+  -- for each header in the provided set
+  FOREACH header IN ARRAY headers
+  LOOP
+    -- check if it has any children
+    current_child_result = has_child(header.block_hash, header.block_number);
+    IF current_child_result.has_child THEN
+      -- if it does, take note
+      has_children_count = has_children_count + 1;
+      current_header_with_child = header;
+      -- and add the children to the growing set of child headers
+      child_headers = array_cat(child_headers, current_child_result.children);
+END IF;
+END LOOP;
+  -- if none of the headers had children, none is more canonical than the other
+  IF has_children_count = 0 THEN
+    -- return the first one selected
+SELECT * INTO canonical_header FROM unnest(headers) LIMIT 1;
+-- if only one header had children, it can be considered the heaviest/canonical header of the set
+ELSIF has_children_count = 1 THEN
+    -- return the only header with a child
+    canonical_header = current_header_with_child;
+  -- if there are multiple headers with children
+ELSE
+    -- find the canonical header from the child set
+    canonical_child = canonical_header_from_array(child_headers);
+    -- the header that is parent to this header, is the canonical header at this level
+SELECT * INTO canonical_header FROM unnest(headers)
+WHERE block_hash = canonical_child.parent_hash;
+END IF;
+RETURN canonical_header;
+END
+$$;
+
+
+--
+-- Name: canonical_header_id(bigint); Type: FUNCTION; Schema: public; Owner: -
+--
+
+CREATE FUNCTION public.canonical_header_id(height bigint) RETURNS integer
+    LANGUAGE plpgsql
+    AS $$
+DECLARE
+canonical_header eth.header_cids;
+  headers eth.header_cids[];
+  header_count INT;
+  temp_header eth.header_cids;
+BEGIN
+  -- collect all headers at this height
+FOR temp_header IN
+SELECT * FROM eth.header_cids WHERE block_number = height
+    LOOP
+    headers = array_append(headers, temp_header);
+END LOOP;
+  -- count the number of headers collected
+  header_count = array_length(headers, 1);
+  -- if we have less than 1 header, return NULL
+  IF header_count IS NULL OR header_count < 1 THEN
+    RETURN NULL;
+  -- if we have one header, return its id
+  ELSIF header_count = 1 THEN
+    RETURN headers[1].id;
+  -- if we have multiple headers we need to determine which one is canonical
+ELSE
+    canonical_header = canonical_header_from_array(headers);
+RETURN canonical_header.id;
+END IF;
+END;
+$$;
+
+
+--
+-- Name: has_child(character varying, bigint); Type: FUNCTION; Schema: public; Owner: -
+--
+
+CREATE FUNCTION public.has_child(hash character varying, height bigint) RETURNS public.child_result
+    LANGUAGE plpgsql
+    AS $$
+DECLARE
+child_height INT;
+  temp_child eth.header_cids;
+  new_child_result child_result;
+BEGIN
+  child_height = height + 1;
+  -- short circuit if there are no children
+SELECT exists(SELECT 1
+              FROM eth.header_cids
+              WHERE parent_hash = hash
+                AND block_number = child_height
+              LIMIT 1)
+INTO new_child_result.has_child;
+-- collect all the children for this header
+IF new_child_result.has_child THEN
+    FOR temp_child IN
+SELECT * FROM eth.header_cids WHERE parent_hash = hash AND block_number = child_height
+    LOOP
+      new_child_result.children = array_append(new_child_result.children, temp_child);
+END LOOP;
+END IF;
+RETURN new_child_result;
+END
+$$;
+
+
+--
+-- Name: was_state_removed(bytea, bigint, character varying); Type: FUNCTION; Schema: public; Owner: -
+--
+
+CREATE FUNCTION public.was_state_removed(path bytea, height bigint, hash character varying) RETURNS boolean
+    LANGUAGE sql
+    AS $$
+SELECT exists(SELECT 1
+              FROM eth.state_cids
+                       INNER JOIN eth.header_cids ON (state_cids.header_id = header_cids.id)
+              WHERE state_path = path
+                AND block_number > height
+                AND block_number <= (SELECT block_number
+                                     FROM eth.header_cids
+                                     WHERE block_hash = hash)
+                AND state_cids.node_type = 3
+              LIMIT 1);
+$$;
+
+
+--
+-- Name: was_storage_removed(bytea, bigint, character varying); Type: FUNCTION; Schema: public; Owner: -
+--
+
+CREATE FUNCTION public.was_storage_removed(path bytea, height bigint, hash character varying) RETURNS boolean
+    LANGUAGE sql
+    AS $$
+SELECT exists(SELECT 1
+              FROM eth.storage_cids
+                       INNER JOIN eth.state_cids ON (storage_cids.state_id = state_cids.id)
+                       INNER JOIN eth.header_cids ON (state_cids.header_id = header_cids.id)
+              WHERE storage_path = path
+                AND block_number > height
+                AND block_number <= (SELECT block_number
+                                     FROM eth.header_cids
+                                     WHERE block_hash = hash)
+                AND storage_cids.node_type = 3
+              LIMIT 1);
+$$;
+
+
+--
+-- Name: header_cids_id_seq; Type: SEQUENCE; Schema: eth; Owner: -
+--
+
+CREATE SEQUENCE eth.header_cids_id_seq
+    AS integer
+    START WITH 1
+    INCREMENT BY 1
+    NO MINVALUE
+    NO MAXVALUE
+    CACHE 1;
+
+
+--
+-- Name: header_cids_id_seq; Type: SEQUENCE OWNED BY; Schema: eth; Owner: -
+--
+
+ALTER SEQUENCE eth.header_cids_id_seq OWNED BY eth.header_cids.id;
+
+
+--
+-- Name: receipt_cids; Type: TABLE; Schema: eth; Owner: -
+--
+
+CREATE TABLE eth.receipt_cids (
+    id integer NOT NULL,
+    tx_id integer NOT NULL,
+    cid text NOT NULL,
+    mh_key text NOT NULL,
+    contract character varying(66),
+    contract_hash character varying(66),
+    topic0s character varying(66)[],
+    topic1s character varying(66)[],
+    topic2s character varying(66)[],
+    topic3s character varying(66)[],
+    log_contracts character varying(66)[],
+    post_state character varying(66),
+    post_status integer
+);
+
+
+--
+-- Name: receipt_cids_id_seq; Type: SEQUENCE; Schema: eth; Owner: -
+--
+
+CREATE SEQUENCE eth.receipt_cids_id_seq
+    AS integer
+    START WITH 1
+    INCREMENT BY 1
+    NO MINVALUE
+    NO MAXVALUE
+    CACHE 1;
+
+
+--
+-- Name: receipt_cids_id_seq; Type: SEQUENCE OWNED BY; Schema: eth; Owner: -
+--
+
+ALTER SEQUENCE eth.receipt_cids_id_seq OWNED BY eth.receipt_cids.id;
+
+
+--
+-- Name: state_accounts; Type: TABLE; Schema: eth; Owner: -
+--
+
+CREATE TABLE eth.state_accounts (
+    id integer NOT NULL,
+    state_id bigint NOT NULL,
+    balance numeric NOT NULL,
+    nonce integer NOT NULL,
+    code_hash bytea NOT NULL,
+    storage_root character varying(66) NOT NULL
+);
+
+
+--
+-- Name: state_accounts_id_seq; Type: SEQUENCE; Schema: eth; Owner: -
+--
+
+CREATE SEQUENCE eth.state_accounts_id_seq
+    AS integer
+    START WITH 1
+    INCREMENT BY 1
+    NO MINVALUE
+    NO MAXVALUE
+    CACHE 1;
+
+
+--
+-- Name: state_accounts_id_seq; Type: SEQUENCE OWNED BY; Schema: eth; Owner: -
+--
+
+ALTER SEQUENCE eth.state_accounts_id_seq OWNED BY eth.state_accounts.id;
+
+
+--
+-- Name: state_cids; Type: TABLE; Schema: eth; Owner: -
+--
+
+CREATE TABLE eth.state_cids (
+    id bigint NOT NULL,
+    header_id integer NOT NULL,
+    state_leaf_key character varying(66),
+    cid text NOT NULL,
+    mh_key text NOT NULL,
+    state_path bytea,
+    node_type integer NOT NULL,
+    diff boolean DEFAULT false NOT NULL
+);
+
+
+--
+-- Name: state_cids_id_seq; Type: SEQUENCE; Schema: eth; Owner: -
+--
+
+CREATE SEQUENCE eth.state_cids_id_seq
+    START WITH 1
+    INCREMENT BY 1
+    NO MINVALUE
+    NO MAXVALUE
+    CACHE 1;
+
+
+--
+-- Name: state_cids_id_seq; Type: SEQUENCE OWNED BY; Schema: eth; Owner: -
+--
+
+ALTER SEQUENCE eth.state_cids_id_seq OWNED BY eth.state_cids.id;
+
+
+--
+-- Name: storage_cids; Type: TABLE; Schema: eth; Owner: -
+--
+
+CREATE TABLE eth.storage_cids (
+    id bigint NOT NULL,
+    state_id bigint NOT NULL,
+    storage_leaf_key character varying(66),
+    cid text NOT NULL,
+    mh_key text NOT NULL,
+    storage_path bytea,
+    node_type integer NOT NULL,
+    diff boolean DEFAULT false NOT NULL
+);
+
+
+--
+-- Name: storage_cids_id_seq; Type: SEQUENCE; Schema: eth; Owner: -
+--
+
+CREATE SEQUENCE eth.storage_cids_id_seq
+    START WITH 1
+    INCREMENT BY 1
+    NO MINVALUE
+    NO MAXVALUE
+    CACHE 1;
+
+
+--
+-- Name: storage_cids_id_seq; Type: SEQUENCE OWNED BY; Schema: eth; Owner: -
+--
+
+ALTER SEQUENCE eth.storage_cids_id_seq OWNED BY eth.storage_cids.id;
+
+
+--
+-- Name: transaction_cids; Type: TABLE; Schema: eth; Owner: -
+--
+
+CREATE TABLE eth.transaction_cids (
+    id integer NOT NULL,
+    header_id integer NOT NULL,
+    tx_hash character varying(66) NOT NULL,
+    index integer NOT NULL,
+    cid text NOT NULL,
+    mh_key text NOT NULL,
+    dst character varying(66) NOT NULL,
+    src character varying(66) NOT NULL,
+    tx_data bytea
+);
+
+
+--
+-- Name: TABLE transaction_cids; Type: COMMENT; Schema: eth; Owner: -
+--
+
+COMMENT ON TABLE eth.transaction_cids IS '@name EthTransactionCids';
+
+
+--
+-- Name: transaction_cids_id_seq; Type: SEQUENCE; Schema: eth; Owner: -
+--
+
+CREATE SEQUENCE eth.transaction_cids_id_seq
+    AS integer
+    START WITH 1
+    INCREMENT BY 1
+    NO MINVALUE
+    NO MAXVALUE
+    CACHE 1;
+
+
+--
+-- Name: transaction_cids_id_seq; Type: SEQUENCE OWNED BY; Schema: eth; Owner: -
+--
+
+ALTER SEQUENCE eth.transaction_cids_id_seq OWNED BY eth.transaction_cids.id;
+
+
+--
+-- Name: uncle_cids; Type: TABLE; Schema: eth; Owner: -
+--
+
+CREATE TABLE eth.uncle_cids (
+    id integer NOT NULL,
+    header_id integer NOT NULL,
+    block_hash character varying(66) NOT NULL,
+    parent_hash character varying(66) NOT NULL,
+    cid text NOT NULL,
+    mh_key text NOT NULL,
+    reward numeric NOT NULL
+);
+
+
+--
+-- Name: uncle_cids_id_seq; Type: SEQUENCE; Schema: eth; Owner: -
+--
+
+CREATE SEQUENCE eth.uncle_cids_id_seq
+    AS integer
+    START WITH 1
+    INCREMENT BY 1
+    NO MINVALUE
+    NO MAXVALUE
+    CACHE 1;
+
+
+--
+-- Name: uncle_cids_id_seq; Type: SEQUENCE OWNED BY; Schema: eth; Owner: -
+--
+
+ALTER SEQUENCE eth.uncle_cids_id_seq OWNED BY eth.uncle_cids.id;
+
+
+--
+-- Name: blocks; Type: TABLE; Schema: public; Owner: -
+--
+
+CREATE TABLE public.blocks (
+    key text NOT NULL,
+    data bytea NOT NULL
+);
+
+
+--
+-- Name: goose_db_version; Type: TABLE; Schema: public; Owner: -
+--
+
+CREATE TABLE public.goose_db_version (
+    id integer NOT NULL,
+    version_id bigint NOT NULL,
+    is_applied boolean NOT NULL,
+    tstamp timestamp without time zone DEFAULT now()
+);
+
+
+--
+-- Name: goose_db_version_id_seq; Type: SEQUENCE; Schema: public; Owner: -
+--
+
+CREATE SEQUENCE public.goose_db_version_id_seq
+    AS integer
+    START WITH 1
+    INCREMENT BY 1
+    NO MINVALUE
+    NO MAXVALUE
+    CACHE 1;
+
+
+--
+-- Name: goose_db_version_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: -
+--
+
+ALTER SEQUENCE public.goose_db_version_id_seq OWNED BY public.goose_db_version.id;
+
+
+--
+-- Name: nodes; Type: TABLE; Schema: public; Owner: -
+--
+
+CREATE TABLE public.nodes (
+    id integer NOT NULL,
+    client_name character varying,
+    genesis_block character varying(66),
+    network_id character varying,
+    node_id character varying(128),
+    chain_id integer DEFAULT 1
+);
+
+
+--
+-- Name: TABLE nodes; Type: COMMENT; Schema: public; Owner: -
+--
+
+COMMENT ON TABLE public.nodes IS '@name NodeInfo';
+
+
+--
+-- Name: COLUMN nodes.node_id; Type: COMMENT; Schema: public; Owner: -
+--
+
+COMMENT ON COLUMN public.nodes.node_id IS '@name ChainNodeID';
+
+
+--
+-- Name: nodes_id_seq; Type: SEQUENCE; Schema: public; Owner: -
+--
+
+CREATE SEQUENCE public.nodes_id_seq
+    AS integer
+    START WITH 1
+    INCREMENT BY 1
+    NO MINVALUE
+    NO MAXVALUE
+    CACHE 1;
+
+
+--
+-- Name: nodes_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: -
+--
+
+ALTER SEQUENCE public.nodes_id_seq OWNED BY public.nodes.id;
+
+
+--
+-- Name: header_cids id; Type: DEFAULT; Schema: eth; Owner: -
+--
+
+ALTER TABLE ONLY eth.header_cids ALTER COLUMN id SET DEFAULT nextval('eth.header_cids_id_seq'::regclass);
+
+
+--
+-- Name: receipt_cids id; Type: DEFAULT; Schema: eth; Owner: -
+--
+
+ALTER TABLE ONLY eth.receipt_cids ALTER COLUMN id SET DEFAULT nextval('eth.receipt_cids_id_seq'::regclass);
+
+
+--
+-- Name: state_accounts id; Type: DEFAULT; Schema: eth; Owner: -
+--
+
+ALTER TABLE ONLY eth.state_accounts ALTER COLUMN id SET DEFAULT nextval('eth.state_accounts_id_seq'::regclass);
+
+
+--
+-- Name: state_cids id; Type: DEFAULT; Schema: eth; Owner: -
+--
+
+ALTER TABLE ONLY eth.state_cids ALTER COLUMN id SET DEFAULT nextval('eth.state_cids_id_seq'::regclass);
+
+
+--
+-- Name: storage_cids id; Type: DEFAULT; Schema: eth; Owner: -
+--
+
+ALTER TABLE ONLY eth.storage_cids ALTER COLUMN id SET DEFAULT nextval('eth.storage_cids_id_seq'::regclass);
+
+
+--
+-- Name: transaction_cids id; Type: DEFAULT; Schema: eth; Owner: -
+--
+
+ALTER TABLE ONLY eth.transaction_cids ALTER COLUMN id SET DEFAULT nextval('eth.transaction_cids_id_seq'::regclass);
+
+
+--
+-- Name: uncle_cids id; Type: DEFAULT; Schema: eth; Owner: -
+--
+
+ALTER TABLE ONLY eth.uncle_cids ALTER COLUMN id SET DEFAULT nextval('eth.uncle_cids_id_seq'::regclass);
+
+
+--
+-- Name: goose_db_version id; Type: DEFAULT; Schema: public; Owner: -
+--
+
+ALTER TABLE ONLY public.goose_db_version ALTER COLUMN id SET DEFAULT nextval('public.goose_db_version_id_seq'::regclass);
+
+
+--
+-- Name: nodes id; Type: DEFAULT; Schema: public; Owner: -
+--
+
+ALTER TABLE ONLY public.nodes ALTER COLUMN id SET DEFAULT nextval('public.nodes_id_seq'::regclass);
+
+
+--
+-- Name: header_cids header_cids_block_number_block_hash_key; Type: CONSTRAINT; Schema: eth; Owner: -
+--
+
+ALTER TABLE ONLY eth.header_cids
+    ADD CONSTRAINT header_cids_block_number_block_hash_key UNIQUE (block_number, block_hash);
+
+
+--
+-- Name: header_cids header_cids_pkey; Type: CONSTRAINT; Schema: eth; Owner: -
+--
+
+ALTER TABLE ONLY eth.header_cids
+    ADD CONSTRAINT header_cids_pkey PRIMARY KEY (id);
+
+
+--
+-- Name: receipt_cids receipt_cids_pkey; Type: CONSTRAINT; Schema: eth; Owner: -
+--
+
+ALTER TABLE ONLY eth.receipt_cids
+    ADD CONSTRAINT receipt_cids_pkey PRIMARY KEY (id);
+
+
+--
+-- Name: receipt_cids receipt_cids_tx_id_key; Type: CONSTRAINT; Schema: eth; Owner: -
+--
+
+ALTER TABLE ONLY eth.receipt_cids
+    ADD CONSTRAINT receipt_cids_tx_id_key UNIQUE (tx_id);
+
+
+--
+-- Name: state_accounts state_accounts_pkey; Type: CONSTRAINT; Schema: eth; Owner: -
+--
+
+ALTER TABLE ONLY eth.state_accounts
+    ADD CONSTRAINT state_accounts_pkey PRIMARY KEY (id);
+
+
+--
+-- Name: state_accounts state_accounts_state_id_key; Type: CONSTRAINT; Schema: eth; Owner: -
+--
+
+ALTER TABLE ONLY eth.state_accounts
+    ADD CONSTRAINT state_accounts_state_id_key UNIQUE (state_id);
+
+
+--
+-- Name: state_cids state_cids_header_id_state_path_key; Type: CONSTRAINT; Schema: eth; Owner: -
+--
+
+ALTER TABLE ONLY eth.state_cids
+    ADD CONSTRAINT state_cids_header_id_state_path_key UNIQUE (header_id, state_path);
+
+
+--
+-- Name: state_cids state_cids_pkey; Type: CONSTRAINT; Schema: eth; Owner: -
+--
+
+ALTER TABLE ONLY eth.state_cids
+    ADD CONSTRAINT state_cids_pkey PRIMARY KEY (id);
+
+
+--
+-- Name: storage_cids storage_cids_pkey; Type: CONSTRAINT; Schema: eth; Owner: -
+--
+
+ALTER TABLE ONLY eth.storage_cids
+    ADD CONSTRAINT storage_cids_pkey PRIMARY KEY (id);
+
+
+--
+-- Name: storage_cids storage_cids_state_id_storage_path_key; Type: CONSTRAINT; Schema: eth; Owner: -
+--
+
+ALTER TABLE ONLY eth.storage_cids
+    ADD CONSTRAINT storage_cids_state_id_storage_path_key UNIQUE (state_id, storage_path);
+
+
+--
+-- Name: transaction_cids transaction_cids_header_id_tx_hash_key; Type: CONSTRAINT; Schema: eth; Owner: -
+--
+
+ALTER TABLE ONLY eth.transaction_cids
+    ADD CONSTRAINT transaction_cids_header_id_tx_hash_key UNIQUE (header_id, tx_hash);
+
+
+--
+-- Name: transaction_cids transaction_cids_pkey; Type: CONSTRAINT; Schema: eth; Owner: -
+--
+
+ALTER TABLE ONLY eth.transaction_cids
+    ADD CONSTRAINT transaction_cids_pkey PRIMARY KEY (id);
+
+
+--
+-- Name: uncle_cids uncle_cids_header_id_block_hash_key; Type: CONSTRAINT; Schema: eth; Owner: -
+--
+
+ALTER TABLE ONLY eth.uncle_cids
+    ADD CONSTRAINT uncle_cids_header_id_block_hash_key UNIQUE (header_id, block_hash);
+
+
+--
+-- Name: uncle_cids uncle_cids_pkey; Type: CONSTRAINT; Schema: eth; Owner: -
+--
+
+ALTER TABLE ONLY eth.uncle_cids
+    ADD CONSTRAINT uncle_cids_pkey PRIMARY KEY (id);
+
+
+--
+-- Name: blocks blocks_key_key; Type: CONSTRAINT; Schema: public; Owner: -
+--
+
+ALTER TABLE ONLY public.blocks
+    ADD CONSTRAINT blocks_key_key UNIQUE (key);
+
+
+--
+-- Name: goose_db_version goose_db_version_pkey; Type: CONSTRAINT; Schema: public; Owner: -
+--
+
+ALTER TABLE ONLY public.goose_db_version
+    ADD CONSTRAINT goose_db_version_pkey PRIMARY KEY (id);
+
+
+--
+-- Name: nodes node_uc; Type: CONSTRAINT; Schema: public; Owner: -
+--
+
+ALTER TABLE ONLY public.nodes
+    ADD CONSTRAINT node_uc UNIQUE (genesis_block, network_id, node_id, chain_id);
+
+
+--
+-- Name: nodes nodes_pkey; Type: CONSTRAINT; Schema: public; Owner: -
+--
+
+ALTER TABLE ONLY public.nodes
+    ADD CONSTRAINT nodes_pkey PRIMARY KEY (id);
+
+
+--
+-- Name: account_state_id_index; Type: INDEX; Schema: eth; Owner: -
+--
+
+CREATE INDEX account_state_id_index ON eth.state_accounts USING btree (state_id);
+
+
+--
+-- Name: block_hash_index; Type: INDEX; Schema: eth; Owner: -
+--
+
+CREATE INDEX block_hash_index ON eth.header_cids USING btree (block_hash);
+
+
+--
+-- Name: block_number_index; Type: INDEX; Schema: eth; Owner: -
+--
+
+CREATE INDEX block_number_index ON eth.header_cids USING brin (block_number);
+
+
+--
+-- Name: header_cid_index; Type: INDEX; Schema: eth; Owner: -
+--
+
+CREATE INDEX header_cid_index ON eth.header_cids USING btree (cid);
+
+
+--
+-- Name: header_mh_index; Type: INDEX; Schema: eth; Owner: -
+--
+
+CREATE INDEX header_mh_index ON eth.header_cids USING btree (mh_key);
+
+
+--
+-- Name: rct_cid_index; Type: INDEX; Schema: eth; Owner: -
+--
+
+CREATE INDEX rct_cid_index ON eth.receipt_cids USING btree (cid);
+
+
+--
+-- Name: rct_contract_hash_index; Type: INDEX; Schema: eth; Owner: -
+--
+
+CREATE INDEX rct_contract_hash_index ON eth.receipt_cids USING btree (contract_hash);
+
+
+--
+-- Name: rct_contract_index; Type: INDEX; Schema: eth; Owner: -
+--
+
+CREATE INDEX rct_contract_index ON eth.receipt_cids USING btree (contract);
+
+
+--
+-- Name: rct_log_contract_index; Type: INDEX; Schema: eth; Owner: -
+--
+
+CREATE INDEX rct_log_contract_index ON eth.receipt_cids USING gin (log_contracts);
+
+
+--
+-- Name: rct_mh_index; Type: INDEX; Schema: eth; Owner: -
+--
+
+CREATE INDEX rct_mh_index ON eth.receipt_cids USING btree (mh_key);
+
+
+--
+-- Name: rct_topic0_index; Type: INDEX; Schema: eth; Owner: -
+--
+
+CREATE INDEX rct_topic0_index ON eth.receipt_cids USING gin (topic0s);
+
+
+--
+-- Name: rct_topic1_index; Type: INDEX; Schema: eth; Owner: -
+--
+
+CREATE INDEX rct_topic1_index ON eth.receipt_cids USING gin (topic1s);
+
+
+--
+-- Name: rct_topic2_index; Type: INDEX; Schema: eth; Owner: -
+--
+
+CREATE INDEX rct_topic2_index ON eth.receipt_cids USING gin (topic2s);
+
+
+--
+-- Name: rct_topic3_index; Type: INDEX; Schema: eth; Owner: -
+--
+
+CREATE INDEX rct_topic3_index ON eth.receipt_cids USING gin (topic3s);
+
+
+--
+-- Name: rct_tx_id_index; Type: INDEX; Schema: eth; Owner: -
+--
+
+CREATE INDEX rct_tx_id_index ON eth.receipt_cids USING btree (tx_id);
+
+
+--
+-- Name: state_cid_index; Type: INDEX; Schema: eth; Owner: -
+--
+
+CREATE INDEX state_cid_index ON eth.state_cids USING btree (cid);
+
+
+--
+-- Name: state_header_id_index; Type: INDEX; Schema: eth; Owner: -
+--
+
+CREATE INDEX state_header_id_index ON eth.state_cids USING btree (header_id);
+
+
+--
+-- Name: state_leaf_key_index; Type: INDEX; Schema: eth; Owner: -
+--
+
+CREATE INDEX state_leaf_key_index ON eth.state_cids USING btree (state_leaf_key);
+
+
+--
+-- Name: state_mh_index; Type: INDEX; Schema: eth; Owner: -
+--
+
+CREATE INDEX state_mh_index ON eth.state_cids USING btree (mh_key);
+
+
+--
+-- Name: state_path_index; Type: INDEX; Schema: eth; Owner: -
+--
+
+CREATE INDEX state_path_index ON eth.state_cids USING btree (state_path);
+
+
+--
+-- Name: state_root_index; Type: INDEX; Schema: eth; Owner: -
+--
+
+CREATE INDEX state_root_index ON eth.header_cids USING btree (state_root);
+
+
+--
+-- Name: storage_cid_index; Type: INDEX; Schema: eth; Owner: -
+--
+
+CREATE INDEX storage_cid_index ON eth.storage_cids USING btree (cid);
+
+
+--
+-- Name: storage_leaf_key_index; Type: INDEX; Schema: eth; Owner: -
+--
+
+CREATE INDEX storage_leaf_key_index ON eth.storage_cids USING btree (storage_leaf_key);
+
+
+--
+-- Name: storage_mh_index; Type: INDEX; Schema: eth; Owner: -
+--
+
+CREATE INDEX storage_mh_index ON eth.storage_cids USING btree (mh_key);
+
+
+--
+-- Name: storage_path_index; Type: INDEX; Schema: eth; Owner: -
+--
+
+CREATE INDEX storage_path_index ON eth.storage_cids USING btree (storage_path);
+
+
+--
+-- Name: storage_root_index; Type: INDEX; Schema: eth; Owner: -
+--
+
+CREATE INDEX storage_root_index ON eth.state_accounts USING btree (storage_root);
+
+
+--
+-- Name: storage_state_id_index; Type: INDEX; Schema: eth; Owner: -
+--
+
+CREATE INDEX storage_state_id_index ON eth.storage_cids USING btree (state_id);
+
+
+--
+-- Name: timestamp_index; Type: INDEX; Schema: eth; Owner: -
+--
+
+CREATE INDEX timestamp_index ON eth.header_cids USING brin ("timestamp");
+
+
+--
+-- Name: tx_cid_index; Type: INDEX; Schema: eth; Owner: -
+--
+
+CREATE INDEX tx_cid_index ON eth.transaction_cids USING btree (cid);
+
+
+--
+-- Name: tx_dst_index; Type: INDEX; Schema: eth; Owner: -
+--
+
+CREATE INDEX tx_dst_index ON eth.transaction_cids USING btree (dst);
+
+
+--
+-- Name: tx_hash_index; Type: INDEX; Schema: eth; Owner: -
+--
+
+CREATE INDEX tx_hash_index ON eth.transaction_cids USING btree (tx_hash);
+
+
+--
+-- Name: tx_header_id_index; Type: INDEX; Schema: eth; Owner: -
+--
+
+CREATE INDEX tx_header_id_index ON eth.transaction_cids USING btree (header_id);
+
+
+--
+-- Name: tx_mh_index; Type: INDEX; Schema: eth; Owner: -
+--
+
+CREATE INDEX tx_mh_index ON eth.transaction_cids USING btree (mh_key);
+
+
+--
+-- Name: tx_src_index; Type: INDEX; Schema: eth; Owner: -
+--
+
+CREATE INDEX tx_src_index ON eth.transaction_cids USING btree (src);
+
+
+--
+-- Name: header_cids header_cids_ai; Type: TRIGGER; Schema: eth; Owner: -
+--
+
+CREATE TRIGGER header_cids_ai AFTER INSERT ON eth.header_cids FOR EACH ROW EXECUTE FUNCTION eth.graphql_subscription('header_cids', 'id');
+
+
+--
+-- Name: receipt_cids receipt_cids_ai; Type: TRIGGER; Schema: eth; Owner: -
+--
+
+CREATE TRIGGER receipt_cids_ai AFTER INSERT ON eth.receipt_cids FOR EACH ROW EXECUTE FUNCTION eth.graphql_subscription('receipt_cids', 'id');
+
+
+--
+-- Name: state_accounts state_accounts_ai; Type: TRIGGER; Schema: eth; Owner: -
+--
+
+CREATE TRIGGER state_accounts_ai AFTER INSERT ON eth.state_accounts FOR EACH ROW EXECUTE FUNCTION eth.graphql_subscription('state_accounts', 'id');
+
+
+--
+-- Name: state_cids state_cids_ai; Type: TRIGGER; Schema: eth; Owner: -
+--
+
+CREATE TRIGGER state_cids_ai AFTER INSERT ON eth.state_cids FOR EACH ROW EXECUTE FUNCTION eth.graphql_subscription('state_cids', 'id');
+
+
+--
+-- Name: storage_cids storage_cids_ai; Type: TRIGGER; Schema: eth; Owner: -
+--
+
+CREATE TRIGGER storage_cids_ai AFTER INSERT ON eth.storage_cids FOR EACH ROW EXECUTE FUNCTION eth.graphql_subscription('storage_cids', 'id');
+
+
+--
+-- Name: transaction_cids transaction_cids_ai; Type: TRIGGER; Schema: eth; Owner: -
+--
+
+CREATE TRIGGER transaction_cids_ai AFTER INSERT ON eth.transaction_cids FOR EACH ROW EXECUTE FUNCTION eth.graphql_subscription('transaction_cids', 'id');
+
+
+--
+-- Name: uncle_cids uncle_cids_ai; Type: TRIGGER; Schema: eth; Owner: -
+--
+
+CREATE TRIGGER uncle_cids_ai AFTER INSERT ON eth.uncle_cids FOR EACH ROW EXECUTE FUNCTION eth.graphql_subscription('uncle_cids', 'id');
+
+
+--
+-- Name: header_cids header_cids_mh_key_fkey; Type: FK CONSTRAINT; Schema: eth; Owner: -
+--
+
+ALTER TABLE ONLY eth.header_cids
+    ADD CONSTRAINT header_cids_mh_key_fkey FOREIGN KEY (mh_key) REFERENCES public.blocks(key) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED;
+
+
+--
+-- Name: header_cids header_cids_node_id_fkey; Type: FK CONSTRAINT; Schema: eth; Owner: -
+--
+
+ALTER TABLE ONLY eth.header_cids
+    ADD CONSTRAINT header_cids_node_id_fkey FOREIGN KEY (node_id) REFERENCES public.nodes(id) ON DELETE CASCADE;
+
+
+--
+-- Name: receipt_cids receipt_cids_mh_key_fkey; Type: FK CONSTRAINT; Schema: eth; Owner: -
+--
+
+ALTER TABLE ONLY eth.receipt_cids
+    ADD CONSTRAINT receipt_cids_mh_key_fkey FOREIGN KEY (mh_key) REFERENCES public.blocks(key) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED;
+
+
+--
+-- Name: receipt_cids receipt_cids_tx_id_fkey; Type: FK CONSTRAINT; Schema: eth; Owner: -
+--
+
+ALTER TABLE ONLY eth.receipt_cids
+    ADD CONSTRAINT receipt_cids_tx_id_fkey FOREIGN KEY (tx_id) REFERENCES eth.transaction_cids(id) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED;
+
+
+--
+-- Name: state_accounts state_accounts_state_id_fkey; Type: FK CONSTRAINT; Schema: eth; Owner: -
+--
+
+ALTER TABLE ONLY eth.state_accounts
+    ADD CONSTRAINT state_accounts_state_id_fkey FOREIGN KEY (state_id) REFERENCES eth.state_cids(id) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED;
+
+
+--
+-- Name: state_cids state_cids_header_id_fkey; Type: FK CONSTRAINT; Schema: eth; Owner: -
+--
+
+ALTER TABLE ONLY eth.state_cids
+    ADD CONSTRAINT state_cids_header_id_fkey FOREIGN KEY (header_id) REFERENCES eth.header_cids(id) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED;
+
+
+--
+-- Name: state_cids state_cids_mh_key_fkey; Type: FK CONSTRAINT; Schema: eth; Owner: -
+--
+
+ALTER TABLE ONLY eth.state_cids
+    ADD CONSTRAINT state_cids_mh_key_fkey FOREIGN KEY (mh_key) REFERENCES public.blocks(key) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED;
+
+
+--
+-- Name: storage_cids storage_cids_mh_key_fkey; Type: FK CONSTRAINT; Schema: eth; Owner: -
+--
+
+ALTER TABLE ONLY eth.storage_cids
+    ADD CONSTRAINT storage_cids_mh_key_fkey FOREIGN KEY (mh_key) REFERENCES public.blocks(key) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED;
+
+
+--
+-- Name: storage_cids storage_cids_state_id_fkey; Type: FK CONSTRAINT; Schema: eth; Owner: -
+--
+
+ALTER TABLE ONLY eth.storage_cids
+    ADD CONSTRAINT storage_cids_state_id_fkey FOREIGN KEY (state_id) REFERENCES eth.state_cids(id) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED;
+
+
+--
+-- Name: transaction_cids transaction_cids_header_id_fkey; Type: FK CONSTRAINT; Schema: eth; Owner: -
+--
+
+ALTER TABLE ONLY eth.transaction_cids
+    ADD CONSTRAINT transaction_cids_header_id_fkey FOREIGN KEY (header_id) REFERENCES eth.header_cids(id) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED;
+
+
+--
+-- Name: transaction_cids transaction_cids_mh_key_fkey; Type: FK CONSTRAINT; Schema: eth; Owner: -
+--
+
+ALTER TABLE ONLY eth.transaction_cids
+    ADD CONSTRAINT transaction_cids_mh_key_fkey FOREIGN KEY (mh_key) REFERENCES public.blocks(key) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED;
+
+
+--
+-- Name: uncle_cids uncle_cids_header_id_fkey; Type: FK CONSTRAINT; Schema: eth; Owner: -
+--
+
+ALTER TABLE ONLY eth.uncle_cids
+    ADD CONSTRAINT uncle_cids_header_id_fkey FOREIGN KEY (header_id) REFERENCES eth.header_cids(id) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED;
+
+
+--
+-- Name: uncle_cids uncle_cids_mh_key_fkey; Type: FK CONSTRAINT; Schema: eth; Owner: -
+--
+
+ALTER TABLE ONLY eth.uncle_cids
+    ADD CONSTRAINT uncle_cids_mh_key_fkey FOREIGN KEY (mh_key) REFERENCES public.blocks(key) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED;
+
+
+--
+-- PostgreSQL database dump complete
+--
+
-- 
2.45.2


From 1328dad81d49b57a3afc4d8e8e49bf39b5a3b15c Mon Sep 17 00:00:00 2001
From: Ian Norden <iannordenn@gmail.com>
Date: Mon, 5 Apr 2021 12:47:39 -0500
Subject: [PATCH 2/3] service documentation

---
 statediff/builder_test.go |   1 -
 statediff/doc.go          |  57 -----------
 statediff/doc.md          | 207 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 207 insertions(+), 58 deletions(-)
 delete mode 100644 statediff/doc.go
 create mode 100644 statediff/doc.md

diff --git a/statediff/builder_test.go b/statediff/builder_test.go
index d62bdc766..b2a9f65e9 100644
--- a/statediff/builder_test.go
+++ b/statediff/builder_test.go
@@ -32,7 +32,6 @@ import (
 	sdtypes "github.com/ethereum/go-ethereum/statediff/types"
 )
 
-// TODO: add test that filters on address
 var (
 	contractLeafKey                                        []byte
 	emptyDiffs                                             = make([]sdtypes.StateNode, 0)
diff --git a/statediff/doc.go b/statediff/doc.go
deleted file mode 100644
index 184bc242c..000000000
--- a/statediff/doc.go
+++ /dev/null
@@ -1,57 +0,0 @@
-// Copyright 2019 The go-ethereum Authors
-// This file is part of the go-ethereum library.
-//
-// The go-ethereum library is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Lesser General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-//
-// The go-ethereum library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU Lesser General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public License
-// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
-
-/*
-Package statediff provides an auxiliary service that processes state diff objects from incoming chain events,
-relaying the objects to any rpc subscriptions.
-
-This work is adapted from work by Charles Crain at https://github.com/jpmorganchase/quorum/blob/9b7fd9af8082795eeeb6863d9746f12b82dd5078/statediff/statediff.go
-
-The service is spun up using the below CLI flags
---statediff: boolean flag, turns on the service
---statediff.streamblock: boolean flag, configures the service to associate and stream out the rest of the block data with the state diffs.
---statediff.intermediatenodes: boolean flag, tells service to include intermediate (branch and extension) nodes; default (false) processes leaf nodes only.
---statediff.watchedaddresses: string slice flag, used to limit the state diffing process to the given addresses. Usage: --statediff.watchedaddresses=addr1 --statediff.watchedaddresses=addr2 --statediff.watchedaddresses=addr3
-
-If you wish to use the websocket endpoint to subscribe to the statediff service, be sure to open up the Websocket RPC server with the `--ws` flag. The IPC-RPC server is turned on by default.
-
-The statediffing services works only with `--syncmode="full", but -importantly- does not require garbage collection to be turned off (does not require an archival node).
-
-e.g.
-
-$ ./geth --statediff --statediff.streamblock --ws --syncmode "full"
-
-This starts up the geth node in full sync mode, starts up the statediffing service, and opens up the websocket endpoint to subscribe to the service.
-Because the "streamblock" flag has been turned on, the service will strean out block data (headers, transactions, and receipts) along with the diffed state and storage leafs.
-
-Rpc subscriptions to the service can be created using the rpc.Client.Subscribe() method,
-with the "statediff" namespace, a statediff.Payload channel, and the name of the statediff api's rpc method- "stream".
-
-e.g.
-
-cli, _ := rpc.Dial("ipcPathOrWsURL")
-stateDiffPayloadChan := make(chan statediff.Payload, 20000)
-rpcSub, err := cli.Subscribe(context.Background(), "statediff", stateDiffPayloadChan, "stream"})
-for {
-	select {
-	case stateDiffPayload := <- stateDiffPayloadChan:
-		processPayload(stateDiffPayload)
-	case err := <- rpcSub.Err():
-		log.Error(err)
-	}
-}
-*/
-package statediff
diff --git a/statediff/doc.md b/statediff/doc.md
new file mode 100644
index 000000000..622229c0a
--- /dev/null
+++ b/statediff/doc.md
@@ -0,0 +1,207 @@
+This package provides an auxiliary service that asynchronously processes state diff objects from chain events,
+either relaying the state objects to rpc subscribers or writing them directly to Postgres.
+
+It also exposes RPC endpoints for fetching or writing to Postgres the state diff `StateObject` at a specific block height
+or for a specific block hash, this operates on historic block and state data and so is dependent on having a complete state archive.
+
+# Statediff Object
+A state diff `StateObject` is the collection of all the state and storage trie nodes that have been updated in a given block.
+For convenience, we also associate these nodes with the block number and hash, and optionally the set of code hashes and code for any
+contracts deployed in this block.
+
+A complete state diff `StateObject` will include all state and storage intermediate nodes, which is necessary for generating proofs and for
+traversing the tries.
+
+```go
+// StateObject is a collection of state (and linked storage nodes) as well as the associated block number, block hash,
+// and a set of code hashes and their code
+type StateObject struct {
+	BlockNumber       *big.Int                `json:"blockNumber"     gencodec:"required"`
+	BlockHash         common.Hash             `json:"blockHash"       gencodec:"required"`
+	Nodes             []StateNode             `json:"nodes"           gencodec:"required"`
+	CodeAndCodeHashes []CodeAndCodeHash       `json:"codeMapping"`
+}
+
+// StateNode holds the data for a single state diff node
+type StateNode struct {
+    NodeType     NodeType      `json:"nodeType"        gencodec:"required"`
+    Path         []byte        `json:"path"            gencodec:"required"`
+    NodeValue    []byte        `json:"value"           gencodec:"required"`
+    StorageNodes []StorageNode `json:"storage"`
+    LeafKey      []byte        `json:"leafKey"`
+}
+
+// StorageNode holds the data for a single storage diff node
+type StorageNode struct {
+    NodeType  NodeType `json:"nodeType"        gencodec:"required"`
+    Path      []byte   `json:"path"            gencodec:"required"`
+    NodeValue []byte   `json:"value"           gencodec:"required"`
+    LeafKey   []byte   `json:"leafKey"`
+}
+
+// CodeAndCodeHash struct for holding codehash => code mappings
+// we can't use an actual map because they are not rlp serializable
+type CodeAndCodeHash struct {
+    Hash common.Hash `json:"codeHash"`
+    Code []byte      `json:"code"`
+}
+```
+These objects are packed into a `Payload` structure which additionally associates the StateObject
+with the block (header, uncles, and transactions), receipts, and total difficulty.
+This `Payload` encapsulates all the block and state data at a given block, and allows us to index the entire Ethereum data structure
+as hash-linked IPLD objects.
+
+```go
+// Payload packages the data to send to state diff subscriptions
+type Payload struct {
+	BlockRlp        []byte   `json:"blockRlp"`
+	TotalDifficulty *big.Int `json:"totalDifficulty"`
+	ReceiptsRlp     []byte   `json:"receiptsRlp"`
+	StateObjectRlp  []byte   `json:"stateObjectRlp"    gencodec:"required"`
+
+	encoded []byte
+	err     error
+}
+```
+
+# Usage
+This state diffing service runs as an auxiliary service concurrent to the regular syncing process of the geth node.
+
+
+## CLI configuration
+This service introduces a CLI flag namespace `statediff`
+
+`--statediff` flag is used to turn on the service
+`--statediff.writing` is used to tell the service to write state diff objects it produces from synced ChainEvents directly to a configured Postgres database
+`--statediff.db` is the connection string for the Postgres database to write to
+`--statediff.dbnodeid` is the node id to use in the Postgres database
+`--statediff.dbclientname` is the client name to use in the Postgres database
+
+The service can only operate in full sync mode (`--syncmode=full`), but only the historical RPC endpoints require an archive node (`--gcmode=archive`)
+
+e.g.
+`
+./build/bin/geth --syncmode=full --gcmode=archive --statediff --statediff.writing --statediff.db=postgres://localhost:5432/vulcanize_testing?sslmode=disable --statediff.dbnodeid={nodeId} --statediff.dbclientname={dbClientName}
+`
+
+## RPC endpoints
+The state diffing service exposes both a WS subscription endpoint, and a number of HTTP unary endpoints.
+
+Each of these endpoints requires a set of parameters provided by the caller
+
+```go
+// Params is used to carry in parameters from subscribing/requesting clients configuration
+type Params struct {
+	IntermediateStateNodes   bool
+	IntermediateStorageNodes bool
+	IncludeBlock             bool
+	IncludeReceipts          bool
+	IncludeTD                bool
+	IncludeCode              bool
+	WatchedAddresses         []common.Address
+	WatchedStorageSlots      []common.Hash
+}
+```
+
+Using these params we can tell the service whether to include state and/or storage intermediate nodes; whether
+to include the associated block (header, uncles, and transactions); whether to include the associated receipts;
+whether to include the total difficult for this block; whether to include the set of code hashes and code for
+contracts deployed in this block; whether to limit the diffing process to a list of specific addresses; and/or
+whether to limit the diffing process to a list of specific storage slot keys.
+
+### Subscription endpoint
+A websocket supporting RPC endpoint is exposed for subscribing to state diff `StateObjects` that come off the head of the chain while the geth node syncs.
+
+```go
+// Stream is a subscription endpoint that fires off state diff payloads as they are created
+Stream(ctx context.Context, params Params) (*rpc.Subscription, error)
+```
+
+To expose this endpoint the node needs to have the websocket server turned on (`--ws`),
+and the `statediff` namespace exposed (`--ws.api=statediff`).
+
+Go code subscriptions to this endpoint can be created using the `rpc.Client.Subscribe()` method,
+with the "statediff" namespace, a `statediff.Payload` channel, and the name of the statediff api's rpc method: "stream".
+
+e.g.
+
+```go
+
+cli, err := rpc.Dial("ipcPathOrWsURL")
+if err != nil {
+	// handle error
+}
+stateDiffPayloadChan := make(chan statediff.Payload, 20000)
+methodName := "stream"
+params := statediff.Params{
+    IncludeBlock:             true,
+    IncludeTD:                true,
+    IncludeReceipts:          true,
+    IntermediateStorageNodes: true,
+    IntermediateStateNodes:   true,
+}
+rpcSub, err := cli.Subscribe(context.Background(), statediff.APIName, stateDiffPayloadChan, methodName, params)
+if err != nil {
+	// handle error
+}
+for {
+	select {
+	case stateDiffPayload := <- stateDiffPayloadChan:
+            // process the payload
+        case err := <- rpcSub.Err():
+    	    // handle rpc subscription error
+        }
+}
+```
+
+### Unary endpoints
+The service also exposes unary RPC endpoints for retrieving the state diff `StateObject` for a specific block height/hash.
+```go
+// StateDiffAt returns a state diff payload at the specific blockheight
+StateDiffAt(ctx context.Context, blockNumber uint64, params Params) (*Payload, error)
+
+// StateDiffFor returns a state diff payload for the specific blockhash
+StateDiffFor(ctx context.Context, blockHash common.Hash, params Params) (*Payload, error)
+```
+
+To expose this endpoint the node needs to have the HTTP server turned on (`--http`),
+and the `statediff` namespace exposed (`--http.api=statediff`).
+
+## Direct indexing into Postgres
+If `--statediff.writing` is set, the service will convert the state diff `StateObject` data into IPLD objects, persist them directly to Postgres,
+and generate secondary indexes around the IPLD data.
+
+The schema and migrations for this Postgres database are provided in `statediff/db/`.
+
+### Postgres setup
+We use [pressly/goose](https://github.com/pressly/goose) as our Postgres migration manager.
+You can also load the Postgres schema directly into a database using
+
+`psql database_name < schema.sql`
+
+This will only work on a version 12.4 Postgres database.
+
+### Schema overview 
+Our Postgres schemas are built around a single IPFS backing Postgres IPLD blockstore table (`public.blocks`) that conforms with [go-ds-sql](https://github.com/ipfs/go-ds-sql/blob/master/postgres/postgres.go).
+All IPLD objects are stored in this table, where `key` is blockstore-prefixed multihash key for the IPLD object and `data` contains
+the bytes for the IPLD block (in the case of all Ethereum IPLDs, this is the RLP byte encoding of the Ethereum object).
+
+The IPLD objects in this table can be traversed using an IPLD DAG interface, but since this table only maps multihash to raw IPLD object
+it is not particularly useful for searching through the data or and does not allow us to look up Ethereum objects by their constituent fields
+(e.g. by block number, tx source/recipient, state/storage trie node path). To improve the accessibility of these Ethereum IPLD objects
+we generate secondary indexes on top of the raw IPLDs in other Postgres tables. This collection of tables encapsulates an Ethereum [advanced data layout](https://github.com/ipld/specs#schemas-and-advanced-data-layouts) (ADL).
+
+These secondary index tables fall under the `eth` schema and follow an `{objectType}_cids` naming convention.
+Each of these tables provides a view into the individual fields of the underlying Ethereum IPLD object and references the raw IPLD object stored in `public.blocks` by multihash foreign key.
+Additionally, these tables link up to their parent object tables. E.g. the `storage_cids` table contains a `state_id` foreign key which references the `id`
+for the `state_cids` entry that contains the state leaf node for the contract the storage node belongs to, and in turn that `state_cids` entry contains a `header_id`
+foreign key which references the `id` of the `header_cids` entry that contains the header for the block these state and storage nodes were updated (diffed).
+
+## Optimization
+On mainnet this process is extremely IO intensive and requires significant resources to allow it to keep up with the head of the chain.
+The state diff processing time for a specific block is dependent on the number and complexity of the state changes that occur in a block and
+the number of updated state nodes that are available in the in-memory cache vs must be retrieved from disc.
+
+If memory permits, one means of improving the efficiency of this process is to increase the trie cache allocation.
+This can be done by increasing the overall `--cache` allocation and/or by increasing the % of the cache allocated to trie
+usage with `--cache.trie`.
\ No newline at end of file
-- 
2.45.2


From d65fbe4c62f85064f2f77e4cce9cf8669f38ced0 Mon Sep 17 00:00:00 2001
From: Ian Norden <iannordenn@gmail.com>
Date: Thu, 8 Apr 2021 21:28:24 -0500
Subject: [PATCH 3/3] touching up

---
 statediff/doc.md | 80 ++++++++++++++++++++++++++----------------------
 1 file changed, 44 insertions(+), 36 deletions(-)

diff --git a/statediff/doc.md b/statediff/doc.md
index 622229c0a..678d13172 100644
--- a/statediff/doc.md
+++ b/statediff/doc.md
@@ -1,10 +1,16 @@
+# Statediff
+
 This package provides an auxiliary service that asynchronously processes state diff objects from chain events,
-either relaying the state objects to rpc subscribers or writing them directly to Postgres.
+either relaying the state objects to RPC subscribers or writing them directly to Postgres as IPLD objects.
 
-It also exposes RPC endpoints for fetching or writing to Postgres the state diff `StateObject` at a specific block height
-or for a specific block hash, this operates on historic block and state data and so is dependent on having a complete state archive.
+It also exposes RPC endpoints for fetching or writing to Postgres the state diff at a specific block height
+or for a specific block hash, this operates on historical block and state data and so depends on a complete state archive.
 
-# Statediff Object
+Data is emitted in this differential format in order to make it feasible to IPLD-ize and index the *entire* Ethereum state
+(including intermediate state and storage trie nodes). If this state diff process is ran continuously from genesis,
+the entire state at any block can be materialized from the cumulative differentials up to that point.
+
+## Statediff object
 A state diff `StateObject` is the collection of all the state and storage trie nodes that have been updated in a given block.
 For convenience, we also associate these nodes with the block number and hash, and optionally the set of code hashes and code for any
 contracts deployed in this block.
@@ -24,31 +30,31 @@ type StateObject struct {
 
 // StateNode holds the data for a single state diff node
 type StateNode struct {
-    NodeType     NodeType      `json:"nodeType"        gencodec:"required"`
-    Path         []byte        `json:"path"            gencodec:"required"`
-    NodeValue    []byte        `json:"value"           gencodec:"required"`
-    StorageNodes []StorageNode `json:"storage"`
-    LeafKey      []byte        `json:"leafKey"`
+	NodeType     NodeType      `json:"nodeType"        gencodec:"required"`
+	Path         []byte        `json:"path"            gencodec:"required"`
+	NodeValue    []byte        `json:"value"           gencodec:"required"`
+	StorageNodes []StorageNode `json:"storage"`
+	LeafKey      []byte        `json:"leafKey"`
 }
 
 // StorageNode holds the data for a single storage diff node
 type StorageNode struct {
-    NodeType  NodeType `json:"nodeType"        gencodec:"required"`
-    Path      []byte   `json:"path"            gencodec:"required"`
-    NodeValue []byte   `json:"value"           gencodec:"required"`
-    LeafKey   []byte   `json:"leafKey"`
+	NodeType  NodeType `json:"nodeType"        gencodec:"required"`
+	Path      []byte   `json:"path"            gencodec:"required"`
+	NodeValue []byte   `json:"value"           gencodec:"required"`
+	LeafKey   []byte   `json:"leafKey"`
 }
 
 // CodeAndCodeHash struct for holding codehash => code mappings
 // we can't use an actual map because they are not rlp serializable
 type CodeAndCodeHash struct {
-    Hash common.Hash `json:"codeHash"`
-    Code []byte      `json:"code"`
+	Hash common.Hash `json:"codeHash"`
+	Code []byte      `json:"code"`
 }
 ```
-These objects are packed into a `Payload` structure which additionally associates the StateObject
+These objects are packed into a `Payload` structure which can additionally associate the `StateObject`
 with the block (header, uncles, and transactions), receipts, and total difficulty.
-This `Payload` encapsulates all the block and state data at a given block, and allows us to index the entire Ethereum data structure
+This `Payload` encapsulates all of the differential data at a given block, and allows us to index the entire Ethereum data structure
 as hash-linked IPLD objects.
 
 ```go
@@ -64,11 +70,11 @@ type Payload struct {
 }
 ```
 
-# Usage
+## Usage
 This state diffing service runs as an auxiliary service concurrent to the regular syncing process of the geth node.
 
 
-## CLI configuration
+### CLI configuration
 This service introduces a CLI flag namespace `statediff`
 
 `--statediff` flag is used to turn on the service
@@ -84,7 +90,7 @@ e.g.
 ./build/bin/geth --syncmode=full --gcmode=archive --statediff --statediff.writing --statediff.db=postgres://localhost:5432/vulcanize_testing?sslmode=disable --statediff.dbnodeid={nodeId} --statediff.dbclientname={dbClientName}
 `
 
-## RPC endpoints
+### RPC endpoints
 The state diffing service exposes both a WS subscription endpoint, and a number of HTTP unary endpoints.
 
 Each of these endpoints requires a set of parameters provided by the caller
@@ -105,11 +111,11 @@ type Params struct {
 
 Using these params we can tell the service whether to include state and/or storage intermediate nodes; whether
 to include the associated block (header, uncles, and transactions); whether to include the associated receipts;
-whether to include the total difficult for this block; whether to include the set of code hashes and code for
+whether to include the total difficulty for this block; whether to include the set of code hashes and code for
 contracts deployed in this block; whether to limit the diffing process to a list of specific addresses; and/or
 whether to limit the diffing process to a list of specific storage slot keys.
 
-### Subscription endpoint
+#### Subscription endpoint
 A websocket supporting RPC endpoint is exposed for subscribing to state diff `StateObjects` that come off the head of the chain while the geth node syncs.
 
 ```go
@@ -154,7 +160,7 @@ for {
 }
 ```
 
-### Unary endpoints
+#### Unary endpoints
 The service also exposes unary RPC endpoints for retrieving the state diff `StateObject` for a specific block height/hash.
 ```go
 // StateDiffAt returns a state diff payload at the specific blockheight
@@ -167,13 +173,13 @@ StateDiffFor(ctx context.Context, blockHash common.Hash, params Params) (*Payloa
 To expose this endpoint the node needs to have the HTTP server turned on (`--http`),
 and the `statediff` namespace exposed (`--http.api=statediff`).
 
-## Direct indexing into Postgres
+### Direct indexing into Postgres
 If `--statediff.writing` is set, the service will convert the state diff `StateObject` data into IPLD objects, persist them directly to Postgres,
 and generate secondary indexes around the IPLD data.
 
 The schema and migrations for this Postgres database are provided in `statediff/db/`.
 
-### Postgres setup
+#### Postgres setup
 We use [pressly/goose](https://github.com/pressly/goose) as our Postgres migration manager.
 You can also load the Postgres schema directly into a database using
 
@@ -181,27 +187,29 @@ You can also load the Postgres schema directly into a database using
 
 This will only work on a version 12.4 Postgres database.
 
-### Schema overview 
+#### Schema overview
 Our Postgres schemas are built around a single IPFS backing Postgres IPLD blockstore table (`public.blocks`) that conforms with [go-ds-sql](https://github.com/ipfs/go-ds-sql/blob/master/postgres/postgres.go).
-All IPLD objects are stored in this table, where `key` is blockstore-prefixed multihash key for the IPLD object and `data` contains
+All IPLD objects are stored in this table, where `key` is the blockstore-prefixed multihash key for the IPLD object and `data` contains
 the bytes for the IPLD block (in the case of all Ethereum IPLDs, this is the RLP byte encoding of the Ethereum object).
 
 The IPLD objects in this table can be traversed using an IPLD DAG interface, but since this table only maps multihash to raw IPLD object
-it is not particularly useful for searching through the data or and does not allow us to look up Ethereum objects by their constituent fields
-(e.g. by block number, tx source/recipient, state/storage trie node path). To improve the accessibility of these Ethereum IPLD objects
-we generate secondary indexes on top of the raw IPLDs in other Postgres tables. This collection of tables encapsulates an Ethereum [advanced data layout](https://github.com/ipld/specs#schemas-and-advanced-data-layouts) (ADL).
+it is not particularly useful for searching through the data by looking up Ethereum objects by their constituent fields
+(e.g. by block number, tx source/recipient, state/storage trie node path). To improve the accessibility of these objects
+we create an Ethereum [advanced data layout](https://github.com/ipld/specs#schemas-and-advanced-data-layouts) (ADL) by generating secondary
+indexes on top of the raw IPLDs in other Postgres tables.
 
 These secondary index tables fall under the `eth` schema and follow an `{objectType}_cids` naming convention.
-Each of these tables provides a view into the individual fields of the underlying Ethereum IPLD object and references the raw IPLD object stored in `public.blocks` by multihash foreign key.
-Additionally, these tables link up to their parent object tables. E.g. the `storage_cids` table contains a `state_id` foreign key which references the `id`
-for the `state_cids` entry that contains the state leaf node for the contract the storage node belongs to, and in turn that `state_cids` entry contains a `header_id`
-foreign key which references the `id` of the `header_cids` entry that contains the header for the block these state and storage nodes were updated (diffed).
+These tables provide a view into individual fields of the underlying Ethereum IPLD objects, allowing lookups on these fields, and reference the raw IPLD objects stored in `public.blocks`
+by foreign keys to their multihash keys.
+Additionally, these tables maintain the hash-linked nature of Ethereum objects to one another. E.g. a storage trie node entry in the `storage_cids`
+table contains a `state_id` foreign key which references the `id` for the `state_cids` entry that contains the state leaf node for the contract that storage node belongs to,
+and in turn that `state_cids` entry contains a `header_id` foreign key which references the `id` of the `header_cids` entry that contains the header for the block these state and storage nodes were updated (diffed).
 
-## Optimization
+### Optimization
 On mainnet this process is extremely IO intensive and requires significant resources to allow it to keep up with the head of the chain.
 The state diff processing time for a specific block is dependent on the number and complexity of the state changes that occur in a block and
 the number of updated state nodes that are available in the in-memory cache vs must be retrieved from disc.
 
-If memory permits, one means of improving the efficiency of this process is to increase the trie cache allocation.
+If memory permits, one means of improving the efficiency of this process is to increase the in-memory trie cache allocation.
 This can be done by increasing the overall `--cache` allocation and/or by increasing the % of the cache allocated to trie
 usage with `--cache.trie`.
\ No newline at end of file
-- 
2.45.2