From 2695d9e353136e26aa54f5bd29cf20c0824fc6f4 Mon Sep 17 00:00:00 2001 From: i-norden Date: Tue, 7 Mar 2023 18:17:35 -0600 Subject: [PATCH] updated get_storage_at procedures for v5 --- ...sql => 00016_create_stored_procedures.sql} | 23 ++- .../00023_create_get_storage_at_functions.sql | 112 +++++++++++++ .../00023_get_storage_at_functions.sql | 147 ------------------ schema.sql | 116 +++++++++++++- 4 files changed, 244 insertions(+), 154 deletions(-) rename db/migrations/{00016_create_stored_functions.sql => 00016_create_stored_procedures.sql} (82%) create mode 100644 db/migrations/00023_create_get_storage_at_functions.sql delete mode 100644 db/migrations/00023_get_storage_at_functions.sql diff --git a/db/migrations/00016_create_stored_functions.sql b/db/migrations/00016_create_stored_procedures.sql similarity index 82% rename from db/migrations/00016_create_stored_functions.sql rename to db/migrations/00016_create_stored_procedures.sql index b72c77a..7792ca3 100644 --- a/db/migrations/00016_create_stored_functions.sql +++ b/db/migrations/00016_create_stored_procedures.sql @@ -1,15 +1,29 @@ -- +goose Up -- +goose StatementBegin --- returns if a state leaf node was removed within the provided block number -CREATE OR REPLACE FUNCTION was_state_leaf_removed(key character varying, hash character varying) +-- returns if the state leaf key is vacated (previously existed but now is empty) at the provided block hash +CREATE OR REPLACE FUNCTION was_state_leaf_removed(v_key VARCHAR(66), v_hash VARCHAR) RETURNS boolean AS $$ SELECT state_cids.removed = true FROM eth.state_cids INNER JOIN eth.header_cids ON (state_cids.header_id = header_cids.block_hash) - WHERE state_leaf_key = key + WHERE state_leaf_key = v_key AND state_cids.block_number <= (SELECT block_number FROM eth.header_cids - WHERE block_hash = hash) + WHERE block_hash = v_hash) + ORDER BY state_cids.block_number DESC LIMIT 1; +$$ +language sql; +-- +goose StatementEnd + +-- +goose StatementBegin +-- returns if the state leaf key is vacated (previously existed but now is empty) at the provided block height +CREATE OR REPLACE FUNCTION public.was_state_leaf_removed_by_number(v_key VARCHAR(66), v_block_no BIGINT) + RETURNS BOOLEAN AS $$ + SELECT state_cids.removed = true + FROM eth.state_cids + INNER JOIN eth.header_cids ON (state_cids.header_id = header_cids.block_hash) + WHERE state_leaf_key = v_key + AND state_cids.block_number <= v_block_no ORDER BY state_cids.block_number DESC LIMIT 1; $$ language sql; @@ -132,6 +146,7 @@ LANGUAGE 'plpgsql'; -- +goose Down DROP FUNCTION was_state_leaf_removed; +DROP FUNCTION was_state_leaf_removed_by_number; DROP FUNCTION canonical_header_hash; DROP FUNCTION canonical_header_from_array; DROP FUNCTION has_child; diff --git a/db/migrations/00023_create_get_storage_at_functions.sql b/db/migrations/00023_create_get_storage_at_functions.sql new file mode 100644 index 0000000..6140966 --- /dev/null +++ b/db/migrations/00023_create_get_storage_at_functions.sql @@ -0,0 +1,112 @@ +-- +goose Up +-- +goose StatementBegin +CREATE OR REPLACE FUNCTION public.get_storage_at_by_number(v_state_leaf_key text, v_storage_leaf_key text, v_block_no bigint) + RETURNS TABLE + ( + cid TEXT, + block_number BIGINT, + removed BOOL, + state_leaf_removed BOOL + ) +AS +$BODY$ +DECLARE + v_state_path BYTEA; + v_header TEXT; + v_canonical_header TEXT; +BEGIN + CREATE TEMP TABLE tmp_tt_stg2 + ( + header_id TEXT, + cid TEXT, + block_number BIGINT, + removed BOOL, + state_leaf_removed BOOL + ) ON COMMIT DROP; + + -- in best case scenario, the latest record we find for the provided keys is for a canonical block + INSERT INTO tmp_tt_stg2 + SELECT storage_cids.header_id, + storage_cids.cid, + storage_cids.block_number, + storage_cids.removed, + was_state_leaf_removed_by_number(v_state_leaf_key, v_block_no) AS state_leaf_removed + FROM eth.storage_cids + WHERE storage_leaf_key = v_storage_leaf_key + AND storage_cids.state_leaf_key = v_state_leaf_key -- can lookup directly on the leaf key in v5 + AND storage_cids.block_number <= v_block_no + ORDER BY storage_cids.block_number DESC LIMIT 1; + + -- check if result is from canonical state + SELECT header_id, canonical_header_hash(tmp_tt_stg2.block_number), tmp_tt_stg2.block_number + INTO v_header, v_canonical_header, v_block_no + FROM tmp_tt_stg2 LIMIT 1; + + IF v_header IS NULL OR v_header != v_canonical_header THEN + RAISE NOTICE 'get_storage_at_by_number: chosen header NULL OR % != canonical header % for block number %, trying again.', v_header, v_canonical_header, v_block_no; + TRUNCATE tmp_tt_stg2; + -- If we hit on a non-canonical block, we need to go back and do a comprehensive check. + -- We try to avoid this to avoid joining between storage_cids, state_cids, and header_cids + INSERT INTO tmp_tt_stg2 + SELECT storage_cids.header_id, + storage_cids.cid, + storage_cids.block_number, + storage_cids.removed, + was_state_leaf_removed_by_number( + v_state_leaf_key, + v_block_no + ) AS state_leaf_removed + FROM eth.storage_cids + INNER JOIN eth.state_cids ON ( + storage_cids.header_id = state_cids.header_id + AND storage_cids.block_number = state_cids.block_number + AND storage_cids.state_leaf_key = state_cids.state_leaf_key + ) + INNER JOIN eth.header_cids ON ( + state_cids.header_id = header_cids.block_hash + AND state_cids.block_number = header_cids.block_number + ) + WHERE state_leaf_key = v_state_leaf_key + AND storage_leaf_key = v_storage_leaf_key + AND state_cids.block_number <= v_block_no + AND storage_cids.block_number <= v_block_no + AND header_cids.block_number <= v_block_no + AND header_cids.block_hash = (SELECT canonical_header_hash(header_cids.block_number)) + ORDER BY header_cids.block_number DESC LIMIT 1; + END IF; + + RETURN QUERY SELECT t.cid, t.block_number, t.removed, t.state_leaf_removed + FROM tmp_tt_stg2 AS t + LIMIT 1; +END +$BODY$ +language 'plpgsql'; +-- +goose StatementEnd + +-- +goose StatementBegin +CREATE OR REPLACE FUNCTION public.get_storage_at_by_hash(v_state_leaf_key TEXT, v_storage_leaf_key text, v_block_hash text) + RETURNS TABLE + ( + cid TEXT, + block_number BIGINT, + node_type INTEGER, + state_leaf_removed BOOL + ) +AS +$BODY$ +DECLARE + v_block_no BIGINT; +BEGIN + SELECT h.block_number INTO v_block_no FROM eth.header_cids AS h WHERE block_hash = v_block_hash LIMIT 1; + IF v_block_no IS NULL THEN + RETURN; + END IF; + RETURN QUERY SELECT * FROM get_storage_at_by_number(v_state_leaf_key, v_storage_leaf_key, v_block_no); +END +$BODY$ +LANGUAGE 'plpgsql'; +-- +goose StatementEnd + +-- +goose Down +DROP FUNCTION get_storage_at_by_hash; +DROP FUNCTION get_storage_at_by_number; diff --git a/db/migrations/00023_get_storage_at_functions.sql b/db/migrations/00023_get_storage_at_functions.sql deleted file mode 100644 index f4090ae..0000000 --- a/db/migrations/00023_get_storage_at_functions.sql +++ /dev/null @@ -1,147 +0,0 @@ --- +goose Up --- +goose StatementBegin -CREATE OR REPLACE FUNCTION public.was_state_leaf_removed_by_number(v_key character varying, v_block_no bigint) - RETURNS BOOLEAN AS -$$ -SELECT STATE_CIDS.NODE_TYPE = 3 -FROM ETH.STATE_CIDS - INNER JOIN ETH.HEADER_CIDS ON (STATE_CIDS.HEADER_ID = HEADER_CIDS.BLOCK_HASH) -WHERE STATE_LEAF_KEY = v_key - AND STATE_CIDS.BLOCK_NUMBER <= v_block_no -ORDER BY STATE_CIDS.BLOCK_NUMBER DESC -LIMIT 1; -$$ -language sql; - -CREATE OR REPLACE FUNCTION public.get_storage_at_by_number(v_state_leaf_key text, v_storage_leaf_key text, v_block_no bigint) - RETURNS TABLE - ( - cid TEXT, - mh_key TEXT, - block_number BIGINT, - node_type INTEGER, - state_leaf_removed BOOL - ) - LANGUAGE plpgsql -AS -$$ -DECLARE - v_state_path BYTEA; - v_header TEXT; - v_canonical_header TEXT; -BEGIN - CREATE TEMP TABLE tmp_tt_stg2 - ( - header_id TEXT, - cid TEXT, - mh_key TEXT, - block_number BIGINT, - node_type INTEGER, - state_leaf_removed BOOL - ) ON COMMIT DROP; - - -- In the best case scenario, the state_path is stable, and we can cheaply look that up via the state_leaf_key - -- and then use it (plus storage_leaf_key and block_number) to zero in on the storage_cid. - SELECT STATE_PATH, STATE_CIDS.BLOCK_NUMBER - INTO v_state_path, v_block_no - FROM ETH.STATE_CIDS - WHERE STATE_LEAF_KEY = v_state_leaf_key - AND STATE_CIDS.BLOCK_NUMBER <= v_block_no - ORDER BY STATE_CIDS.BLOCK_NUMBER DESC - LIMIT 1; - - INSERT INTO tmp_tt_stg2 - SELECT STORAGE_CIDS.HEADER_ID, - STORAGE_CIDS.CID, - STORAGE_CIDS.MH_KEY, - STORAGE_CIDS.BLOCK_NUMBER, - STORAGE_CIDS.NODE_TYPE, - WAS_STATE_LEAF_REMOVED_BY_NUMBER( - v_state_leaf_key, - v_block_no - ) AS STATE_LEAF_REMOVED - FROM ETH.STORAGE_CIDS - WHERE STORAGE_LEAF_KEY = v_storage_leaf_key - AND STORAGE_CIDS.BLOCK_NUMBER <= v_block_no - AND STORAGE_CIDS.STATE_PATH = v_state_path - AND STORAGE_CIDS.BLOCK_NUMBER <= v_block_no - ORDER BY STORAGE_CIDS.BLOCK_NUMBER DESC - LIMIT 1; - - SELECT header_id, CANONICAL_HEADER_HASH(tmp_tt_stg2.block_number), tmp_tt_stg2.block_number - INTO v_header, v_canonical_header, v_block_no - FROM tmp_tt_stg2 - LIMIT 1; - IF v_header IS NULL OR v_header != v_canonical_header THEN - RAISE NOTICE 'get_storage_at_by_number: chosen header NULL OR % != canonical header % for block number %, trying again.', v_header, v_canonical_header, v_block_no; - TRUNCATE tmp_tt_stg2; - -- If we missed on the above, or hit on a non-canonical block, we need to go back and do a comprehensive check. - -- We try to avoid this because joining on STORAGE_CIDS.STATE_PATH = STATE_CIDS.STATE_PATH is quite - -- expensive whenever there are a lot of candidate rows, as is often the case when the state_path only - -- changes very infrequently (if ever). - INSERT INTO tmp_tt_stg2 - SELECT STORAGE_CIDS.HEADER_ID, - STORAGE_CIDS.CID, - STORAGE_CIDS.MH_KEY, - STORAGE_CIDS.BLOCK_NUMBER, - STORAGE_CIDS.NODE_TYPE, - WAS_STATE_LEAF_REMOVED_BY_NUMBER( - v_state_leaf_key, - v_block_no - ) AS STATE_LEAF_REMOVED - FROM ETH.STORAGE_CIDS - INNER JOIN ETH.STATE_CIDS ON ( - STORAGE_CIDS.HEADER_ID = STATE_CIDS.HEADER_ID - AND STORAGE_CIDS.BLOCK_NUMBER = STATE_CIDS.BLOCK_NUMBER - AND STORAGE_CIDS.STATE_PATH = STATE_CIDS.STATE_PATH - ) - INNER JOIN ETH.HEADER_CIDS ON ( - STATE_CIDS.HEADER_ID = HEADER_CIDS.BLOCK_HASH - AND STATE_CIDS.BLOCK_NUMBER = HEADER_CIDS.BLOCK_NUMBER - ) - WHERE STATE_LEAF_KEY = v_state_leaf_key - AND STATE_CIDS.BLOCK_NUMBER <= v_block_no - AND STORAGE_LEAF_KEY = v_storage_leaf_key - AND STORAGE_CIDS.BLOCK_NUMBER <= v_block_no - AND HEADER_CIDS.BLOCK_NUMBER <= v_block_no - AND HEADER_CIDS.BLOCK_HASH = (SELECT CANONICAL_HEADER_HASH(HEADER_CIDS.BLOCK_NUMBER)) - ORDER BY HEADER_CIDS.BLOCK_NUMBER DESC - LIMIT 1; - END IF; - - RETURN QUERY SELECT t.cid, t.mh_key, t.block_number, t.node_type, t.state_leaf_removed - fROM tmp_tt_stg2 AS t - LIMIT 1; -END -$$; - -CREATE OR REPLACE FUNCTION public.get_storage_at_by_hash(v_state_leaf_key TEXT, v_storage_leaf_key text, v_block_hash text) - RETURNS TABLE - ( - cid TEXT, - mh_key TEXT, - block_number BIGINT, - node_type INTEGER, - state_leaf_removed BOOL - ) - LANGUAGE plpgsql -AS -$$ -DECLARE - v_block_no BIGINT; -BEGIN - SELECT h.BLOCK_NUMBER INTO v_block_no FROM ETH.HEADER_CIDS AS h WHERE BLOCK_HASH = v_block_hash LIMIT 1; - IF v_block_no IS NULL THEN - RETURN; - END IF; - RETURN QUERY SELECT * FROM get_storage_at_by_number(v_state_leaf_key, v_storage_leaf_key, v_block_no); -END -$$; --- +goose StatementEnd - --- +goose Down --- +goose StatementBegin -DROP FUNCTION public.was_state_leaf_removed_by_number; -DROP FUNCTION public.get_storage_at_by_number; -DROP FUNCTION public.get_storage_at_by_hash; --- +goose StatementEnd diff --git a/schema.sql b/schema.sql index 0e800b0..6c3610b 100644 --- a/schema.sql +++ b/schema.sql @@ -187,6 +187,100 @@ END $$; +-- +-- Name: get_storage_at_by_hash(text, text, text); Type: FUNCTION; Schema: public; Owner: - +-- + +CREATE FUNCTION public.get_storage_at_by_hash(v_state_leaf_key text, v_storage_leaf_key text, v_block_hash text) RETURNS TABLE(cid text, block_number bigint, node_type integer, state_leaf_removed boolean) + LANGUAGE plpgsql + AS $$ +DECLARE + v_block_no BIGINT; +BEGIN + SELECT h.block_number INTO v_block_no FROM eth.header_cids AS h WHERE block_hash = v_block_hash LIMIT 1; + IF v_block_no IS NULL THEN + RETURN; + END IF; + RETURN QUERY SELECT * FROM get_storage_at_by_number(v_state_leaf_key, v_storage_leaf_key, v_block_no); +END +$$; + + +-- +-- Name: get_storage_at_by_number(text, text, bigint); Type: FUNCTION; Schema: public; Owner: - +-- + +CREATE FUNCTION public.get_storage_at_by_number(v_state_leaf_key text, v_storage_leaf_key text, v_block_no bigint) RETURNS TABLE(cid text, block_number bigint, removed boolean, state_leaf_removed boolean) + LANGUAGE plpgsql + AS $$ +DECLARE + v_state_path BYTEA; + v_header TEXT; + v_canonical_header TEXT; +BEGIN + CREATE TEMP TABLE tmp_tt_stg2 + ( + header_id TEXT, + cid TEXT, + block_number BIGINT, + removed BOOL, + state_leaf_removed BOOL + ) ON COMMIT DROP; + -- in best case scenario, the latest record we find for the provided keys is for a canonical block + INSERT INTO tmp_tt_stg2 + SELECT storage_cids.header_id, + storage_cids.cid, + storage_cids.block_number, + storage_cids.removed, + was_state_leaf_removed_by_number(v_state_leaf_key, v_block_no) AS state_leaf_removed + FROM eth.storage_cids + WHERE storage_leaf_key = v_storage_leaf_key + AND storage_cids.state_leaf_key = v_state_leaf_key -- can lookup directly on the leaf key in v5 + AND storage_cids.block_number <= v_block_no + ORDER BY storage_cids.block_number DESC LIMIT 1; + -- check if result is from canonical state + SELECT header_id, canonical_header_hash(tmp_tt_stg2.block_number), tmp_tt_stg2.block_number + INTO v_header, v_canonical_header, v_block_no + FROM tmp_tt_stg2 LIMIT 1; + IF v_header IS NULL OR v_header != v_canonical_header THEN + RAISE NOTICE 'get_storage_at_by_number: chosen header NULL OR % != canonical header % for block number %, trying again.', v_header, v_canonical_header, v_block_no; + TRUNCATE tmp_tt_stg2; + -- If we hit on a non-canonical block, we need to go back and do a comprehensive check. + -- We try to avoid this to avoid joining between storage_cids, state_cids, and header_cids + INSERT INTO tmp_tt_stg2 + SELECT storage_cids.header_id, + storage_cids.cid, + storage_cids.block_number, + storage_cids.removed, + was_state_leaf_removed_by_number( + v_state_leaf_key, + v_block_no + ) AS state_leaf_removed + FROM eth.storage_cids + INNER JOIN eth.state_cids ON ( + storage_cids.header_id = state_cids.header_id + AND storage_cids.block_number = state_cids.block_number + AND storage_cids.state_leaf_key = state_cids.state_leaf_key + ) + INNER JOIN eth.header_cids ON ( + state_cids.header_id = header_cids.block_hash + AND state_cids.block_number = header_cids.block_number + ) + WHERE state_leaf_key = v_state_leaf_key + AND storage_leaf_key = v_storage_leaf_key + AND state_cids.block_number <= v_block_no + AND storage_cids.block_number <= v_block_no + AND header_cids.block_number <= v_block_no + AND header_cids.block_hash = (SELECT canonical_header_hash(header_cids.block_number)) + ORDER BY header_cids.block_number DESC LIMIT 1; + END IF; + RETURN QUERY SELECT t.cid, t.block_number, t.removed, t.state_leaf_removed + FROM tmp_tt_stg2 AS t + LIMIT 1; +END +$$; + + -- -- Name: has_child(character varying, bigint); Type: FUNCTION; Schema: public; Owner: - -- @@ -224,16 +318,32 @@ $$; -- Name: was_state_leaf_removed(character varying, character varying); Type: FUNCTION; Schema: public; Owner: - -- -CREATE FUNCTION public.was_state_leaf_removed(key character varying, hash character varying) RETURNS boolean +CREATE FUNCTION public.was_state_leaf_removed(v_key character varying, v_hash character varying) RETURNS boolean LANGUAGE sql AS $$ SELECT state_cids.removed = true FROM eth.state_cids INNER JOIN eth.header_cids ON (state_cids.header_id = header_cids.block_hash) - WHERE state_leaf_key = key + WHERE state_leaf_key = v_key AND state_cids.block_number <= (SELECT block_number FROM eth.header_cids - WHERE block_hash = hash) + WHERE block_hash = v_hash) + ORDER BY state_cids.block_number DESC LIMIT 1; +$$; + + +-- +-- Name: was_state_leaf_removed_by_number(character varying, bigint); Type: FUNCTION; Schema: public; Owner: - +-- + +CREATE FUNCTION public.was_state_leaf_removed_by_number(v_key character varying, v_block_no bigint) RETURNS boolean + LANGUAGE sql + AS $$ + SELECT state_cids.removed = true + FROM eth.state_cids + INNER JOIN eth.header_cids ON (state_cids.header_id = header_cids.block_hash) + WHERE state_leaf_key = v_key + AND state_cids.block_number <= v_block_no ORDER BY state_cids.block_number DESC LIMIT 1; $$;