From 268a282eaca3c6c625d65bb418fa9a582bb7425f Mon Sep 17 00:00:00 2001 From: Thomas E Lackey Date: Fri, 18 Nov 2022 12:32:35 -0600 Subject: [PATCH] Draft: Split get_storage_at into two paths: one optimistic and simple, the other more comprehensive but slower. (#117) * Split get_storage_at into two paths: one optimisic and simple, the other more exhaustive but slower. * Remove is null check * Fix name * Update comment --- .../00023_get_storage_at_functions.sql | 127 +++++++++++++----- 1 file changed, 97 insertions(+), 30 deletions(-) diff --git a/db/migrations/00023_get_storage_at_functions.sql b/db/migrations/00023_get_storage_at_functions.sql index 581dd4f..9ff5249 100644 --- a/db/migrations/00023_get_storage_at_functions.sql +++ b/db/migrations/00023_get_storage_at_functions.sql @@ -12,37 +12,104 @@ $$ language sql; CREATE OR REPLACE FUNCTION public.get_storage_at_by_number(stateLeafKey text, storageLeafKey text, blockNo bigint) - RETURNS TABLE(cid text, mh_key text, block_number bigint, node_type integer, state_leaf_removed bool) LANGUAGE plpgsql -AS $$ + RETURNS TABLE + ( + cid text, + mh_key text, + block_number bigint, + node_type integer, + state_leaf_removed bool + ) + LANGUAGE plpgsql +AS +$$ +DECLARE + statePath bytea; + temp_header text; + temp_canonical_header text; BEGIN - RETURN QUERY SELECT STORAGE_CIDS.CID, - STORAGE_CIDS.MH_KEY, - STORAGE_CIDS.BLOCK_NUMBER, - STORAGE_CIDS.NODE_TYPE, - was_state_leaf_removed_by_number( - stateLeafKey, - blockNo - ) AS STATE_LEAF_REMOVED - FROM ETH.STORAGE_CIDS - INNER JOIN ETH.STATE_CIDS ON ( - STORAGE_CIDS.HEADER_ID = STATE_CIDS.HEADER_ID - AND STORAGE_CIDS.BLOCK_NUMBER = STATE_CIDS.BLOCK_NUMBER - AND STORAGE_CIDS.STATE_PATH = STATE_CIDS.STATE_PATH - AND STORAGE_CIDS.BLOCK_NUMBER <= blockNo - ) - INNER JOIN ETH.HEADER_CIDS ON ( - STATE_CIDS.HEADER_ID = HEADER_CIDS.BLOCK_HASH - AND STATE_CIDS.BLOCK_NUMBER = HEADER_CIDS.BLOCK_NUMBER - AND STATE_CIDS.BLOCK_NUMBER <= blockNo - ) - WHERE STATE_LEAF_KEY = stateLeafKey - AND STATE_CIDS.BLOCK_NUMBER <= blockNo - AND STORAGE_LEAF_KEY = storageLeafKey - AND STORAGE_CIDS.BLOCK_NUMBER <= blockNo - AND HEADER_CIDS.BLOCK_NUMBER <= blockNo - AND HEADER_CIDS.BLOCK_HASH = (SELECT CANONICAL_HEADER_HASH(HEADER_CIDS.BLOCK_NUMBER)) - ORDER BY HEADER_CIDS.BLOCK_NUMBER DESC - LIMIT 1; + CREATE TEMP TABLE tmp_tt_stg2 + ( + header_id text, + cid text, + mh_key text, + block_number bigint, + node_type integer, + state_leaf_removed bool + ) ON COMMIT DROP; + + SELECT state_path, STATE_CIDS.block_number + INTO statePath, blockNo + FROM ETH.STATE_CIDS + WHERE STATE_LEAF_KEY = stateLeafKey + AND STATE_CIDS.BLOCK_NUMBER <= blockNo + ORDER BY STATE_CIDS.BLOCK_NUMBER DESC + LIMIT 1; + + INSERT INTO tmp_tt_stg2 + SELECT STORAGE_CIDS.HEADER_ID, + STORAGE_CIDS.CID, + STORAGE_CIDS.MH_KEY, + STORAGE_CIDS.BLOCK_NUMBER, + STORAGE_CIDS.NODE_TYPE, + was_state_leaf_removed_by_number( + stateLeafKey, + blockNo + ) AS STATE_LEAF_REMOVED + FROM eth.storage_cids + INNER JOIN ETH.STATE_CIDS ON ( + STORAGE_CIDS.HEADER_ID = STATE_CIDS.HEADER_ID + AND STORAGE_CIDS.BLOCK_NUMBER = STATE_CIDS.BLOCK_NUMBER + ) + WHERE storage_leaf_key = storageLeafKey + AND storage_cids.block_number <= blockNo + AND storage_cids.state_path = statePath + AND state_leaf_key = stateLeafKey + AND storage_cids.block_number <= blockNo + ORDER BY state_cids.block_number DESC + LIMIT 1; + + SELECT header_id, canonical_header_hash(tmp_tt_stg2.block_number), tmp_tt_stg2.block_number + into temp_header, temp_canonical_header, blockNo + from tmp_tt_stg2; + IF temp_header IS NOT NULL AND temp_header != temp_canonical_header THEN + raise notice 'get_storage_at_by_number: chosen header % != canonical header % for block number %, trying again.', temp_header, temp_canonical_header, blockNo; + TRUNCATE tmp_tt_stg2; + -- If we chose a non-canonical block, we need to go back and get the right one. While we could do this in one + -- query if we wanted, the disadvantage would be that it is very uncommon, while the join on + -- STORAGE_CIDS.STATE_PATH = STATE_CIDS.STATE_PATH is quite expensive whenever there are a lot of candidate rows, + -- so we wish to avoid that more expensive check when possible. + INSERT INTO tmp_tt_stg2 + SELECT STORAGE_CIDS.HEADER_ID, + STORAGE_CIDS.CID, + STORAGE_CIDS.MH_KEY, + STORAGE_CIDS.BLOCK_NUMBER, + STORAGE_CIDS.NODE_TYPE, + was_state_leaf_removed_by_number( + stateLeafKey, + blockNo + ) AS STATE_LEAF_REMOVED + FROM ETH.STORAGE_CIDS + INNER JOIN ETH.STATE_CIDS ON ( + STORAGE_CIDS.HEADER_ID = STATE_CIDS.HEADER_ID + AND STORAGE_CIDS.BLOCK_NUMBER = STATE_CIDS.BLOCK_NUMBER + AND STORAGE_CIDS.STATE_PATH = STATE_CIDS.STATE_PATH + ) + INNER JOIN ETH.HEADER_CIDS ON ( + STATE_CIDS.HEADER_ID = HEADER_CIDS.BLOCK_HASH + AND STATE_CIDS.BLOCK_NUMBER = HEADER_CIDS.BLOCK_NUMBER + ) + WHERE STATE_LEAF_KEY = stateLeafKey + AND STATE_CIDS.BLOCK_NUMBER = blockNo + AND STORAGE_LEAF_KEY = storageLeafKey + AND STORAGE_CIDS.BLOCK_NUMBER = blockNo + AND HEADER_CIDS.BLOCK_NUMBER = blockNo + AND HEADER_CIDS.BLOCK_HASH = temp_canonical_header + ORDER BY HEADER_CIDS.BLOCK_NUMBER DESC + LIMIT 1; + END IF; + + RETURN QUERY SELECT t.cid, t.mh_key, t.block_number, t.node_type, t.state_leaf_removed from tmp_tt_stg2 as t; END $$;