split pre and post batch processing migrations apart; create make targets for running each

This commit is contained in:
i-norden 2021-11-18 19:03:37 -06:00
parent cb4d0a1275
commit 1b4a6fc2e3
18 changed files with 228 additions and 50 deletions

View File

@ -56,10 +56,16 @@ migrate: $(GOOSE) checkdbvars
$(GOOSE) -dir db/migrations postgres "$(CONNECT_STRING)" up
pg_dump -O -s $(CONNECT_STRING) > schema.sql
## Apply all migrations not already run, for the batch processing set
.PHONY: migrate_batch_set
migrate_batch_set: $(GOOSE) checkdbvars
$(GOOSE) -dir db/batch_process_migrations postgres "$(CONNECT_STRING)" up
## Apply migrations to be ran before a batch processing
.PHONY: migrate_pre_batch_set
migrate_pre_batch_set: $(GOOSE) checkdbvars
$(GOOSE) -dir db/pre_batch_processing_migrations postgres "$(CONNECT_STRING)" up
pg_dump -O -s $(CONNECT_STRING) > schema.sql
## Apply migrations to be ran after a batch processing
.PHONY: migrate_post_batch_set
migrate_post_batch_set: $(GOOSE) checkdbvars
$(GOOSE) -dir db/post_batch_processing_migrations postgres "$(CONNECT_STRING)" up
pg_dump -O -s $(CONNECT_STRING) > schema.sql
## Create a new migration file

View File

@ -0,0 +1,14 @@
-- +goose Up
COMMENT ON TABLE public.nodes IS E'@name NodeInfo';
COMMENT ON TABLE eth.transaction_cids IS E'@name EthTransactionCids';
COMMENT ON TABLE eth.header_cids IS E'@name EthHeaderCids';
COMMENT ON COLUMN public.nodes.node_id IS E'@name ChainNodeID';
COMMENT ON COLUMN eth.header_cids.node_id IS E'@name EthNodeID';
-- +goose Down
COMMENT ON TABLE public.nodes IS NULL;
COMMENT ON TABLE eth.transaction_cids IS NULL;
COMMENT ON TABLE eth.header_cids IS NULL;
COMMENT ON COLUMN public.nodes.node_id IS NULL;
COMMENT ON COLUMN eth.header_cids.node_id IS NULL;

View File

@ -1,69 +1,31 @@
-- +goose Up
-- log indexes
DROP INDEX eth.log_topic3_index;
DROP INDEX eth.log_topic2_index;
DROP INDEX eth.log_topic1_index;
DROP INDEX eth.log_topic0_index;
DROP INDEX eth.log_address_index;
DROP INDEX eth.log_cid_index;
-- access list indexes
DROP INDEX eth.access_list_storage_keys_index;
DROP INDEX eth.access_list_element_address_index;
-- state account indexes
DROP INDEX eth.storage_root_index;
DROP index eth.account_state_path_index;
-- storage node indexes
DROP INDEX eth.storage_node_type_index;
DROP INDEX eth.storage_path_index;
DROP INDEX eth.storage_cid_index;
DROP INDEX eth.storage_leaf_key_index;
DROP INDEX eth.storage_state_path_index;
-- state node indexes
DROP INDEX eth.state_node_type_index;
DROP INDEX eth.state_path_index;
DROP INDEX eth.state_cid_index;
DROP INDEX eth.state_leaf_key_index;
-- receipt indexes
DROP INDEX eth.rct_contract_hash_index;
DROP INDEX eth.rct_contract_index;
DROP INDEX eth.rct_leaf_cid_index;
-- transaction indexes
DROP INDEX eth.tx_src_index;
DROP INDEX eth.tx_dst_index;
DROP INDEX eth.tx_cid_index;
-- header indexes
DROP INDEX eth.timestamp_index;
DROP INDEX eth.state_root_index;
DROP INDEX eth.header_cid_index;
DROP INDEX eth.block_number_index;
-- +goose Down
-- header indexes
CREATE INDEX block_number_index ON eth.header_cids USING brin (block_number);
CREATE INDEX header_cid_index ON eth.header_cids USING btree (cid);
CREATE INDEX header_mh_index ON eth.header_cids USING btree (mh_key);
CREATE INDEX state_root_index ON eth.header_cids USING btree (state_root);
CREATE INDEX timestamp_index ON eth.header_cids USING brin (timestamp);
-- uncle indexes
CREATE INDEX uncle_header_id_index ON eth.uncle_cids USING btree (header_id);
-- transaction indexes
CREATE INDEX tx_header_id_index ON eth.transaction_cids USING btree (header_id);
CREATE INDEX tx_cid_index ON eth.transaction_cids USING btree (cid);
CREATE INDEX tx_mh_index ON eth.transaction_cids USING btree (mh_key);
CREATE INDEX tx_dst_index ON eth.transaction_cids USING btree (dst);
CREATE INDEX tx_src_index ON eth.transaction_cids USING btree (src);
-- receipt indexes
CREATE INDEX rct_leaf_cid_index ON eth.receipt_cids USING btree (leaf_cid);
CREATE INDEX rct_leaf_mh_index ON eth.receipt_cids USING btree (leaf_mh_key);
CREATE INDEX rct_contract_index ON eth.receipt_cids USING btree (contract);
CREATE INDEX rct_contract_hash_index ON eth.receipt_cids USING btree (contract_hash);
-- state node indexes
CREATE INDEX state_leaf_key_index ON eth.state_cids USING btree (state_leaf_key);
CREATE INDEX state_cid_index ON eth.state_cids USING btree (cid);
CREATE INDEX state_mh_index ON eth.state_cids USING btree (mh_key);
CREATE INDEX state_path_index ON eth.state_cids USING btree (state_path);
CREATE INDEX state_node_type_index ON eth.state_cids USING btree (node_type);
@ -71,6 +33,7 @@ CREATE INDEX state_node_type_index ON eth.state_cids USING btree (node_type);
CREATE INDEX storage_state_path_index ON eth.storage_cids USING btree (state_path);
CREATE INDEX storage_leaf_key_index ON eth.storage_cids USING btree (storage_leaf_key);
CREATE INDEX storage_cid_index ON eth.storage_cids USING btree (cid);
CREATE INDEX storage_mh_index ON eth.storage_cids USING btree (mh_key);
CREATE INDEX storage_path_index ON eth.storage_cids USING btree (storage_path);
CREATE INDEX storage_node_type_index ON eth.storage_cids USING btree (node_type);
@ -83,9 +46,66 @@ CREATE INDEX access_list_element_address_index ON eth.access_list_elements USING
CREATE INDEX access_list_storage_keys_index ON eth.access_list_elements USING gin (storage_keys);
-- log indexes
CREATE INDEX log_mh_index ON eth.log_cids USING btree (leaf_mh_key);
CREATE INDEX log_cid_index ON eth.log_cids USING btree (leaf_cid);
CREATE INDEX log_address_index ON eth.log_cids USING btree (address);
CREATE INDEX log_topic0_index ON eth.log_cids USING btree (topic0);
CREATE INDEX log_topic1_index ON eth.log_cids USING btree (topic1);
CREATE INDEX log_topic2_index ON eth.log_cids USING btree (topic2);
CREATE INDEX log_topic3_index ON eth.log_cids USING btree (topic3);
-- +goose Down
-- log indexes
DROP INDEX eth.log_topic3_index;
DROP INDEX eth.log_topic2_index;
DROP INDEX eth.log_topic1_index;
DROP INDEX eth.log_topic0_index;
DROP INDEX eth.log_address_index;
DROP INDEX eth.log_cid_index;
DROP INDEX eth.log_mh_index;
-- access list indexes
DROP INDEX eth.access_list_storage_keys_index;
DROP INDEX eth.access_list_element_address_index;
-- state account indexes
DROP INDEX eth.storage_root_index;
DROP index eth.account_state_path_index;
-- storage node indexes
DROP INDEX eth.storage_node_type_index;
DROP INDEX eth.storage_path_index;
DROP INDEX eth.storage_mh_index;
DROP INDEX eth.storage_cid_index;
DROP INDEX eth.storage_leaf_key_index;
DROP INDEX eth.storage_state_path_index;
-- state node indexes
DROP INDEX eth.state_node_type_index;
DROP INDEX eth.state_path_index;
DROP INDEX eth.state_mh_index;
DROP INDEX eth.state_cid_index;
DROP INDEX eth.state_leaf_key_index;
-- receipt indexes
DROP INDEX eth.rct_contract_hash_index;
DROP INDEX eth.rct_contract_index;
DROP INDEX eth.rct_leaf_mh_index;
DROP INDEX eth.rct_leaf_cid_index;
-- transaction indexes
DROP INDEX eth.tx_src_index;
DROP INDEX eth.tx_dst_index;
DROP INDEX eth.tx_mh_index;
DROP INDEX eth.tx_cid_index;
DROP INDEX eth.tx_header_id_index;
-- uncle indexes
DROP INDEX eth.uncle_header_id_index;
-- header indexes
DROP INDEX eth.timestamp_index;
DROP INDEX eth.state_root_index;
DROP INDEX eth.header_mh_index;
DROP INDEX eth.header_cid_index;
DROP INDEX eth.block_number_index;

View File

@ -0,0 +1,138 @@
-- +goose Up
-- +goose StatementBegin
-- returns if a state leaf node was removed within the provided block number
CREATE OR REPLACE FUNCTION was_state_leaf_removed(key character varying, hash character varying)
RETURNS boolean AS $$
SELECT state_cids.node_type = 3
FROM eth.state_cids
INNER JOIN eth.header_cids ON (state_cids.header_id = header_cids.block_hash)
WHERE state_leaf_key = key
AND block_number <= (SELECT block_number
FROM eth.header_cids
WHERE block_hash = hash)
ORDER BY block_number DESC LIMIT 1;
$$
language sql;
-- +goose StatementEnd
-- +goose StatementBegin
CREATE TYPE child_result AS (
has_child BOOLEAN,
children eth.header_cids[]
);
CREATE OR REPLACE FUNCTION has_child(hash VARCHAR(66), height BIGINT) RETURNS child_result AS
$BODY$
DECLARE
child_height INT;
temp_child eth.header_cids;
new_child_result child_result;
BEGIN
child_height = height + 1;
-- short circuit if there are no children
SELECT exists(SELECT 1
FROM eth.header_cids
WHERE parent_hash = hash
AND block_number = child_height
LIMIT 1)
INTO new_child_result.has_child;
-- collect all the children for this header
IF new_child_result.has_child THEN
FOR temp_child IN
SELECT * FROM eth.header_cids WHERE parent_hash = hash AND block_number = child_height
LOOP
new_child_result.children = array_append(new_child_result.children, temp_child);
END LOOP;
END IF;
RETURN new_child_result;
END
$BODY$
LANGUAGE 'plpgsql';
-- +goose StatementEnd
-- +goose StatementBegin
CREATE OR REPLACE FUNCTION canonical_header_from_array(headers eth.header_cids[]) RETURNS eth.header_cids AS
$BODY$
DECLARE
canonical_header eth.header_cids;
canonical_child eth.header_cids;
header eth.header_cids;
current_child_result child_result;
child_headers eth.header_cids[];
current_header_with_child eth.header_cids;
has_children_count INT DEFAULT 0;
BEGIN
-- for each header in the provided set
FOREACH header IN ARRAY headers
LOOP
-- check if it has any children
current_child_result = has_child(header.block_hash, header.block_number);
IF current_child_result.has_child THEN
-- if it does, take note
has_children_count = has_children_count + 1;
current_header_with_child = header;
-- and add the children to the growing set of child headers
child_headers = array_cat(child_headers, current_child_result.children);
END IF;
END LOOP;
-- if none of the headers had children, none is more canonical than the other
IF has_children_count = 0 THEN
-- return the first one selected
SELECT * INTO canonical_header FROM unnest(headers) LIMIT 1;
-- if only one header had children, it can be considered the heaviest/canonical header of the set
ELSIF has_children_count = 1 THEN
-- return the only header with a child
canonical_header = current_header_with_child;
-- if there are multiple headers with children
ELSE
-- find the canonical header from the child set
canonical_child = canonical_header_from_array(child_headers);
-- the header that is parent to this header, is the canonical header at this level
SELECT * INTO canonical_header FROM unnest(headers)
WHERE block_hash = canonical_child.parent_hash;
END IF;
RETURN canonical_header;
END
$BODY$
LANGUAGE 'plpgsql';
-- +goose StatementEnd
-- +goose StatementBegin
CREATE OR REPLACE FUNCTION canonical_header_id(height BIGINT) RETURNS INTEGER AS
$BODY$
DECLARE
canonical_header eth.header_cids;
headers eth.header_cids[];
header_count INT;
temp_header eth.header_cids;
BEGIN
-- collect all headers at this height
FOR temp_header IN
SELECT * FROM eth.header_cids WHERE block_number = height
LOOP
headers = array_append(headers, temp_header);
END LOOP;
-- count the number of headers collected
header_count = array_length(headers, 1);
-- if we have less than 1 header, return NULL
IF header_count IS NULL OR header_count < 1 THEN
RETURN NULL;
-- if we have one header, return its id
ELSIF header_count = 1 THEN
RETURN headers[1].id;
-- if we have multiple headers we need to determine which one is canonical
ELSE
canonical_header = canonical_header_from_array(headers);
RETURN canonical_header.id;
END IF;
END;
$BODY$
LANGUAGE 'plpgsql';
-- +goose StatementEnd
-- +goose Down
DROP FUNCTION was_state_leaf_removed;
DROP FUNCTION canonical_header_id;
DROP FUNCTION canonical_header_from_array;
DROP FUNCTION has_child;
DROP TYPE child_result;