From 986ce1ead80c845da325c495c07de04296c821b9 Mon Sep 17 00:00:00 2001 From: Thomas E Lackey Date: Mon, 21 Nov 2022 11:56:08 -0600 Subject: [PATCH] Update behavior back to a comprehensive JOIN on state_path if we came up empty in our optimized check. (#118) --- .../00023_get_storage_at_functions.sql | 145 ++++++++++-------- 1 file changed, 78 insertions(+), 67 deletions(-) diff --git a/db/migrations/00023_get_storage_at_functions.sql b/db/migrations/00023_get_storage_at_functions.sql index 9ff5249..f4090ae 100644 --- a/db/migrations/00023_get_storage_at_functions.sql +++ b/db/migrations/00023_get_storage_at_functions.sql @@ -1,48 +1,52 @@ -- +goose Up -- +goose StatementBegin -CREATE OR REPLACE FUNCTION public.was_state_leaf_removed_by_number(key character varying, blockNo bigint) - RETURNS boolean AS $$ -SELECT state_cids.node_type = 3 -FROM eth.state_cids - INNER JOIN eth.header_cids ON (state_cids.header_id = header_cids.block_hash) -WHERE state_leaf_key = key - AND state_cids.block_number <= blockNo -ORDER BY state_cids.block_number DESC LIMIT 1; +CREATE OR REPLACE FUNCTION public.was_state_leaf_removed_by_number(v_key character varying, v_block_no bigint) + RETURNS BOOLEAN AS +$$ +SELECT STATE_CIDS.NODE_TYPE = 3 +FROM ETH.STATE_CIDS + INNER JOIN ETH.HEADER_CIDS ON (STATE_CIDS.HEADER_ID = HEADER_CIDS.BLOCK_HASH) +WHERE STATE_LEAF_KEY = v_key + AND STATE_CIDS.BLOCK_NUMBER <= v_block_no +ORDER BY STATE_CIDS.BLOCK_NUMBER DESC +LIMIT 1; $$ language sql; -CREATE OR REPLACE FUNCTION public.get_storage_at_by_number(stateLeafKey text, storageLeafKey text, blockNo bigint) +CREATE OR REPLACE FUNCTION public.get_storage_at_by_number(v_state_leaf_key text, v_storage_leaf_key text, v_block_no bigint) RETURNS TABLE ( - cid text, - mh_key text, - block_number bigint, - node_type integer, - state_leaf_removed bool + cid TEXT, + mh_key TEXT, + block_number BIGINT, + node_type INTEGER, + state_leaf_removed BOOL ) LANGUAGE plpgsql AS $$ DECLARE - statePath bytea; - temp_header text; - temp_canonical_header text; + v_state_path BYTEA; + v_header TEXT; + v_canonical_header TEXT; BEGIN CREATE TEMP TABLE tmp_tt_stg2 ( - header_id text, - cid text, - mh_key text, - block_number bigint, - node_type integer, - state_leaf_removed bool + header_id TEXT, + cid TEXT, + mh_key TEXT, + block_number BIGINT, + node_type INTEGER, + state_leaf_removed BOOL ) ON COMMIT DROP; - SELECT state_path, STATE_CIDS.block_number - INTO statePath, blockNo + -- In the best case scenario, the state_path is stable, and we can cheaply look that up via the state_leaf_key + -- and then use it (plus storage_leaf_key and block_number) to zero in on the storage_cid. + SELECT STATE_PATH, STATE_CIDS.BLOCK_NUMBER + INTO v_state_path, v_block_no FROM ETH.STATE_CIDS - WHERE STATE_LEAF_KEY = stateLeafKey - AND STATE_CIDS.BLOCK_NUMBER <= blockNo + WHERE STATE_LEAF_KEY = v_state_leaf_key + AND STATE_CIDS.BLOCK_NUMBER <= v_block_no ORDER BY STATE_CIDS.BLOCK_NUMBER DESC LIMIT 1; @@ -52,42 +56,38 @@ BEGIN STORAGE_CIDS.MH_KEY, STORAGE_CIDS.BLOCK_NUMBER, STORAGE_CIDS.NODE_TYPE, - was_state_leaf_removed_by_number( - stateLeafKey, - blockNo + WAS_STATE_LEAF_REMOVED_BY_NUMBER( + v_state_leaf_key, + v_block_no ) AS STATE_LEAF_REMOVED - FROM eth.storage_cids - INNER JOIN ETH.STATE_CIDS ON ( - STORAGE_CIDS.HEADER_ID = STATE_CIDS.HEADER_ID - AND STORAGE_CIDS.BLOCK_NUMBER = STATE_CIDS.BLOCK_NUMBER - ) - WHERE storage_leaf_key = storageLeafKey - AND storage_cids.block_number <= blockNo - AND storage_cids.state_path = statePath - AND state_leaf_key = stateLeafKey - AND storage_cids.block_number <= blockNo - ORDER BY state_cids.block_number DESC + FROM ETH.STORAGE_CIDS + WHERE STORAGE_LEAF_KEY = v_storage_leaf_key + AND STORAGE_CIDS.BLOCK_NUMBER <= v_block_no + AND STORAGE_CIDS.STATE_PATH = v_state_path + AND STORAGE_CIDS.BLOCK_NUMBER <= v_block_no + ORDER BY STORAGE_CIDS.BLOCK_NUMBER DESC LIMIT 1; - SELECT header_id, canonical_header_hash(tmp_tt_stg2.block_number), tmp_tt_stg2.block_number - into temp_header, temp_canonical_header, blockNo - from tmp_tt_stg2; - IF temp_header IS NOT NULL AND temp_header != temp_canonical_header THEN - raise notice 'get_storage_at_by_number: chosen header % != canonical header % for block number %, trying again.', temp_header, temp_canonical_header, blockNo; + SELECT header_id, CANONICAL_HEADER_HASH(tmp_tt_stg2.block_number), tmp_tt_stg2.block_number + INTO v_header, v_canonical_header, v_block_no + FROM tmp_tt_stg2 + LIMIT 1; + IF v_header IS NULL OR v_header != v_canonical_header THEN + RAISE NOTICE 'get_storage_at_by_number: chosen header NULL OR % != canonical header % for block number %, trying again.', v_header, v_canonical_header, v_block_no; TRUNCATE tmp_tt_stg2; - -- If we chose a non-canonical block, we need to go back and get the right one. While we could do this in one - -- query if we wanted, the disadvantage would be that it is very uncommon, while the join on - -- STORAGE_CIDS.STATE_PATH = STATE_CIDS.STATE_PATH is quite expensive whenever there are a lot of candidate rows, - -- so we wish to avoid that more expensive check when possible. + -- If we missed on the above, or hit on a non-canonical block, we need to go back and do a comprehensive check. + -- We try to avoid this because joining on STORAGE_CIDS.STATE_PATH = STATE_CIDS.STATE_PATH is quite + -- expensive whenever there are a lot of candidate rows, as is often the case when the state_path only + -- changes very infrequently (if ever). INSERT INTO tmp_tt_stg2 SELECT STORAGE_CIDS.HEADER_ID, STORAGE_CIDS.CID, STORAGE_CIDS.MH_KEY, STORAGE_CIDS.BLOCK_NUMBER, STORAGE_CIDS.NODE_TYPE, - was_state_leaf_removed_by_number( - stateLeafKey, - blockNo + WAS_STATE_LEAF_REMOVED_BY_NUMBER( + v_state_leaf_key, + v_block_no ) AS STATE_LEAF_REMOVED FROM ETH.STORAGE_CIDS INNER JOIN ETH.STATE_CIDS ON ( @@ -99,31 +99,42 @@ BEGIN STATE_CIDS.HEADER_ID = HEADER_CIDS.BLOCK_HASH AND STATE_CIDS.BLOCK_NUMBER = HEADER_CIDS.BLOCK_NUMBER ) - WHERE STATE_LEAF_KEY = stateLeafKey - AND STATE_CIDS.BLOCK_NUMBER = blockNo - AND STORAGE_LEAF_KEY = storageLeafKey - AND STORAGE_CIDS.BLOCK_NUMBER = blockNo - AND HEADER_CIDS.BLOCK_NUMBER = blockNo - AND HEADER_CIDS.BLOCK_HASH = temp_canonical_header + WHERE STATE_LEAF_KEY = v_state_leaf_key + AND STATE_CIDS.BLOCK_NUMBER <= v_block_no + AND STORAGE_LEAF_KEY = v_storage_leaf_key + AND STORAGE_CIDS.BLOCK_NUMBER <= v_block_no + AND HEADER_CIDS.BLOCK_NUMBER <= v_block_no + AND HEADER_CIDS.BLOCK_HASH = (SELECT CANONICAL_HEADER_HASH(HEADER_CIDS.BLOCK_NUMBER)) ORDER BY HEADER_CIDS.BLOCK_NUMBER DESC LIMIT 1; END IF; - RETURN QUERY SELECT t.cid, t.mh_key, t.block_number, t.node_type, t.state_leaf_removed from tmp_tt_stg2 as t; + RETURN QUERY SELECT t.cid, t.mh_key, t.block_number, t.node_type, t.state_leaf_removed + fROM tmp_tt_stg2 AS t + LIMIT 1; END $$; -CREATE OR REPLACE FUNCTION public.get_storage_at_by_hash(stateLeafKey text, storageLeafKey text, blockHash text) - RETURNS TABLE(cid text, mh_key text, block_number bigint, node_type integer, state_leaf_removed bool) LANGUAGE plpgsql -AS $$ +CREATE OR REPLACE FUNCTION public.get_storage_at_by_hash(v_state_leaf_key TEXT, v_storage_leaf_key text, v_block_hash text) + RETURNS TABLE + ( + cid TEXT, + mh_key TEXT, + block_number BIGINT, + node_type INTEGER, + state_leaf_removed BOOL + ) + LANGUAGE plpgsql +AS +$$ DECLARE - blockNo bigint; + v_block_no BIGINT; BEGIN - SELECT h.BLOCK_NUMBER INTO blockNo FROM ETH.HEADER_CIDS as h WHERE BLOCK_HASH = blockHash limit 1; - IF blockNo IS NULL THEN + SELECT h.BLOCK_NUMBER INTO v_block_no FROM ETH.HEADER_CIDS AS h WHERE BLOCK_HASH = v_block_hash LIMIT 1; + IF v_block_no IS NULL THEN RETURN; END IF; - RETURN QUERY SELECT * FROM get_storage_at_by_number(stateLeafKey, storageLeafKey, blockNo); + RETURN QUERY SELECT * FROM get_storage_at_by_number(v_state_leaf_key, v_storage_leaf_key, v_block_no); END $$; -- +goose StatementEnd