Update behavior back to a comprehensive JOIN on state_path if we came up empty in our optimized check. (#118)

This commit is contained in:
Thomas E Lackey 2022-11-21 11:56:08 -06:00 committed by i-norden
parent 268a282eac
commit 986ce1ead8

View File

@ -1,48 +1,52 @@
-- +goose Up -- +goose Up
-- +goose StatementBegin -- +goose StatementBegin
CREATE OR REPLACE FUNCTION public.was_state_leaf_removed_by_number(key character varying, blockNo bigint) CREATE OR REPLACE FUNCTION public.was_state_leaf_removed_by_number(v_key character varying, v_block_no bigint)
RETURNS boolean AS $$ RETURNS BOOLEAN AS
SELECT state_cids.node_type = 3 $$
FROM eth.state_cids SELECT STATE_CIDS.NODE_TYPE = 3
INNER JOIN eth.header_cids ON (state_cids.header_id = header_cids.block_hash) FROM ETH.STATE_CIDS
WHERE state_leaf_key = key INNER JOIN ETH.HEADER_CIDS ON (STATE_CIDS.HEADER_ID = HEADER_CIDS.BLOCK_HASH)
AND state_cids.block_number <= blockNo WHERE STATE_LEAF_KEY = v_key
ORDER BY state_cids.block_number DESC LIMIT 1; AND STATE_CIDS.BLOCK_NUMBER <= v_block_no
ORDER BY STATE_CIDS.BLOCK_NUMBER DESC
LIMIT 1;
$$ $$
language sql; language sql;
CREATE OR REPLACE FUNCTION public.get_storage_at_by_number(stateLeafKey text, storageLeafKey text, blockNo bigint) CREATE OR REPLACE FUNCTION public.get_storage_at_by_number(v_state_leaf_key text, v_storage_leaf_key text, v_block_no bigint)
RETURNS TABLE RETURNS TABLE
( (
cid text, cid TEXT,
mh_key text, mh_key TEXT,
block_number bigint, block_number BIGINT,
node_type integer, node_type INTEGER,
state_leaf_removed bool state_leaf_removed BOOL
) )
LANGUAGE plpgsql LANGUAGE plpgsql
AS AS
$$ $$
DECLARE DECLARE
statePath bytea; v_state_path BYTEA;
temp_header text; v_header TEXT;
temp_canonical_header text; v_canonical_header TEXT;
BEGIN BEGIN
CREATE TEMP TABLE tmp_tt_stg2 CREATE TEMP TABLE tmp_tt_stg2
( (
header_id text, header_id TEXT,
cid text, cid TEXT,
mh_key text, mh_key TEXT,
block_number bigint, block_number BIGINT,
node_type integer, node_type INTEGER,
state_leaf_removed bool state_leaf_removed BOOL
) ON COMMIT DROP; ) ON COMMIT DROP;
SELECT state_path, STATE_CIDS.block_number -- In the best case scenario, the state_path is stable, and we can cheaply look that up via the state_leaf_key
INTO statePath, blockNo -- and then use it (plus storage_leaf_key and block_number) to zero in on the storage_cid.
SELECT STATE_PATH, STATE_CIDS.BLOCK_NUMBER
INTO v_state_path, v_block_no
FROM ETH.STATE_CIDS FROM ETH.STATE_CIDS
WHERE STATE_LEAF_KEY = stateLeafKey WHERE STATE_LEAF_KEY = v_state_leaf_key
AND STATE_CIDS.BLOCK_NUMBER <= blockNo AND STATE_CIDS.BLOCK_NUMBER <= v_block_no
ORDER BY STATE_CIDS.BLOCK_NUMBER DESC ORDER BY STATE_CIDS.BLOCK_NUMBER DESC
LIMIT 1; LIMIT 1;
@ -52,42 +56,38 @@ BEGIN
STORAGE_CIDS.MH_KEY, STORAGE_CIDS.MH_KEY,
STORAGE_CIDS.BLOCK_NUMBER, STORAGE_CIDS.BLOCK_NUMBER,
STORAGE_CIDS.NODE_TYPE, STORAGE_CIDS.NODE_TYPE,
was_state_leaf_removed_by_number( WAS_STATE_LEAF_REMOVED_BY_NUMBER(
stateLeafKey, v_state_leaf_key,
blockNo v_block_no
) AS STATE_LEAF_REMOVED ) AS STATE_LEAF_REMOVED
FROM eth.storage_cids FROM ETH.STORAGE_CIDS
INNER JOIN ETH.STATE_CIDS ON ( WHERE STORAGE_LEAF_KEY = v_storage_leaf_key
STORAGE_CIDS.HEADER_ID = STATE_CIDS.HEADER_ID AND STORAGE_CIDS.BLOCK_NUMBER <= v_block_no
AND STORAGE_CIDS.BLOCK_NUMBER = STATE_CIDS.BLOCK_NUMBER AND STORAGE_CIDS.STATE_PATH = v_state_path
) AND STORAGE_CIDS.BLOCK_NUMBER <= v_block_no
WHERE storage_leaf_key = storageLeafKey ORDER BY STORAGE_CIDS.BLOCK_NUMBER DESC
AND storage_cids.block_number <= blockNo
AND storage_cids.state_path = statePath
AND state_leaf_key = stateLeafKey
AND storage_cids.block_number <= blockNo
ORDER BY state_cids.block_number DESC
LIMIT 1; LIMIT 1;
SELECT header_id, canonical_header_hash(tmp_tt_stg2.block_number), tmp_tt_stg2.block_number SELECT header_id, CANONICAL_HEADER_HASH(tmp_tt_stg2.block_number), tmp_tt_stg2.block_number
into temp_header, temp_canonical_header, blockNo INTO v_header, v_canonical_header, v_block_no
from tmp_tt_stg2; FROM tmp_tt_stg2
IF temp_header IS NOT NULL AND temp_header != temp_canonical_header THEN LIMIT 1;
raise notice 'get_storage_at_by_number: chosen header % != canonical header % for block number %, trying again.', temp_header, temp_canonical_header, blockNo; IF v_header IS NULL OR v_header != v_canonical_header THEN
RAISE NOTICE 'get_storage_at_by_number: chosen header NULL OR % != canonical header % for block number %, trying again.', v_header, v_canonical_header, v_block_no;
TRUNCATE tmp_tt_stg2; TRUNCATE tmp_tt_stg2;
-- If we chose a non-canonical block, we need to go back and get the right one. While we could do this in one -- If we missed on the above, or hit on a non-canonical block, we need to go back and do a comprehensive check.
-- query if we wanted, the disadvantage would be that it is very uncommon, while the join on -- We try to avoid this because joining on STORAGE_CIDS.STATE_PATH = STATE_CIDS.STATE_PATH is quite
-- STORAGE_CIDS.STATE_PATH = STATE_CIDS.STATE_PATH is quite expensive whenever there are a lot of candidate rows, -- expensive whenever there are a lot of candidate rows, as is often the case when the state_path only
-- so we wish to avoid that more expensive check when possible. -- changes very infrequently (if ever).
INSERT INTO tmp_tt_stg2 INSERT INTO tmp_tt_stg2
SELECT STORAGE_CIDS.HEADER_ID, SELECT STORAGE_CIDS.HEADER_ID,
STORAGE_CIDS.CID, STORAGE_CIDS.CID,
STORAGE_CIDS.MH_KEY, STORAGE_CIDS.MH_KEY,
STORAGE_CIDS.BLOCK_NUMBER, STORAGE_CIDS.BLOCK_NUMBER,
STORAGE_CIDS.NODE_TYPE, STORAGE_CIDS.NODE_TYPE,
was_state_leaf_removed_by_number( WAS_STATE_LEAF_REMOVED_BY_NUMBER(
stateLeafKey, v_state_leaf_key,
blockNo v_block_no
) AS STATE_LEAF_REMOVED ) AS STATE_LEAF_REMOVED
FROM ETH.STORAGE_CIDS FROM ETH.STORAGE_CIDS
INNER JOIN ETH.STATE_CIDS ON ( INNER JOIN ETH.STATE_CIDS ON (
@ -99,31 +99,42 @@ BEGIN
STATE_CIDS.HEADER_ID = HEADER_CIDS.BLOCK_HASH STATE_CIDS.HEADER_ID = HEADER_CIDS.BLOCK_HASH
AND STATE_CIDS.BLOCK_NUMBER = HEADER_CIDS.BLOCK_NUMBER AND STATE_CIDS.BLOCK_NUMBER = HEADER_CIDS.BLOCK_NUMBER
) )
WHERE STATE_LEAF_KEY = stateLeafKey WHERE STATE_LEAF_KEY = v_state_leaf_key
AND STATE_CIDS.BLOCK_NUMBER = blockNo AND STATE_CIDS.BLOCK_NUMBER <= v_block_no
AND STORAGE_LEAF_KEY = storageLeafKey AND STORAGE_LEAF_KEY = v_storage_leaf_key
AND STORAGE_CIDS.BLOCK_NUMBER = blockNo AND STORAGE_CIDS.BLOCK_NUMBER <= v_block_no
AND HEADER_CIDS.BLOCK_NUMBER = blockNo AND HEADER_CIDS.BLOCK_NUMBER <= v_block_no
AND HEADER_CIDS.BLOCK_HASH = temp_canonical_header AND HEADER_CIDS.BLOCK_HASH = (SELECT CANONICAL_HEADER_HASH(HEADER_CIDS.BLOCK_NUMBER))
ORDER BY HEADER_CIDS.BLOCK_NUMBER DESC ORDER BY HEADER_CIDS.BLOCK_NUMBER DESC
LIMIT 1; LIMIT 1;
END IF; END IF;
RETURN QUERY SELECT t.cid, t.mh_key, t.block_number, t.node_type, t.state_leaf_removed from tmp_tt_stg2 as t; RETURN QUERY SELECT t.cid, t.mh_key, t.block_number, t.node_type, t.state_leaf_removed
fROM tmp_tt_stg2 AS t
LIMIT 1;
END END
$$; $$;
CREATE OR REPLACE FUNCTION public.get_storage_at_by_hash(stateLeafKey text, storageLeafKey text, blockHash text) CREATE OR REPLACE FUNCTION public.get_storage_at_by_hash(v_state_leaf_key TEXT, v_storage_leaf_key text, v_block_hash text)
RETURNS TABLE(cid text, mh_key text, block_number bigint, node_type integer, state_leaf_removed bool) LANGUAGE plpgsql RETURNS TABLE
AS $$ (
cid TEXT,
mh_key TEXT,
block_number BIGINT,
node_type INTEGER,
state_leaf_removed BOOL
)
LANGUAGE plpgsql
AS
$$
DECLARE DECLARE
blockNo bigint; v_block_no BIGINT;
BEGIN BEGIN
SELECT h.BLOCK_NUMBER INTO blockNo FROM ETH.HEADER_CIDS as h WHERE BLOCK_HASH = blockHash limit 1; SELECT h.BLOCK_NUMBER INTO v_block_no FROM ETH.HEADER_CIDS AS h WHERE BLOCK_HASH = v_block_hash LIMIT 1;
IF blockNo IS NULL THEN IF v_block_no IS NULL THEN
RETURN; RETURN;
END IF; END IF;
RETURN QUERY SELECT * FROM get_storage_at_by_number(stateLeafKey, storageLeafKey, blockNo); RETURN QUERY SELECT * FROM get_storage_at_by_number(v_state_leaf_key, v_storage_leaf_key, v_block_no);
END END
$$; $$;
-- +goose StatementEnd -- +goose StatementEnd