Draft: Split get_storage_at into two paths: one optimistic and simple, the other more comprehensive but slower. (#117)

* Split get_storage_at into two paths: one optimisic and simple, the other more exhaustive but slower. * Remove is null check * Fix name * Update comment
2022-11-18 12:32:35 -06:00 · 2022-11-18 12:32:35 -06:00 · 268a282eac
commit 268a282eac
parent 53461a0996
1 changed files with 97 additions and 30 deletions
--- a/db/migrations/00023_get_storage_at_functions.sql
+++ b/db/migrations/00023_get_storage_at_functions.sql
@ -12,10 +12,76 @@ $$
 language sql;

 CREATE OR REPLACE FUNCTION public.get_storage_at_by_number(stateLeafKey text, storageLeafKey text, blockNo bigint)
-    RETURNS TABLE(cid text, mh_key text, block_number bigint, node_type integer, state_leaf_removed bool) LANGUAGE plpgsql
-AS $$
+    RETURNS TABLE
+            (
+                cid                text,
+                mh_key             text,
+                block_number       bigint,
+                node_type          integer,
+                state_leaf_removed bool
+            )
+    LANGUAGE plpgsql
+AS
+$$
+DECLARE
+    statePath             bytea;
+    temp_header           text;
+    temp_canonical_header text;
 BEGIN
-    RETURN QUERY SELECT STORAGE_CIDS.CID,
+    CREATE TEMP TABLE tmp_tt_stg2
+    (
+        header_id          text,
+        cid                text,
+        mh_key             text,
+        block_number       bigint,
+        node_type          integer,
+        state_leaf_removed bool
+    ) ON COMMIT DROP;
+
+    SELECT state_path, STATE_CIDS.block_number
+    INTO statePath, blockNo
+    FROM ETH.STATE_CIDS
+    WHERE STATE_LEAF_KEY = stateLeafKey
+      AND STATE_CIDS.BLOCK_NUMBER <= blockNo
+    ORDER BY STATE_CIDS.BLOCK_NUMBER DESC
+    LIMIT 1;
+
+    INSERT INTO tmp_tt_stg2
+    SELECT STORAGE_CIDS.HEADER_ID,
+           STORAGE_CIDS.CID,
+           STORAGE_CIDS.MH_KEY,
+           STORAGE_CIDS.BLOCK_NUMBER,
+           STORAGE_CIDS.NODE_TYPE,
+           was_state_leaf_removed_by_number(
+                   stateLeafKey,
+                   blockNo
+               ) AS STATE_LEAF_REMOVED
+    FROM eth.storage_cids
+             INNER JOIN ETH.STATE_CIDS ON (
+                STORAGE_CIDS.HEADER_ID = STATE_CIDS.HEADER_ID
+            AND STORAGE_CIDS.BLOCK_NUMBER = STATE_CIDS.BLOCK_NUMBER
+        )
+    WHERE storage_leaf_key = storageLeafKey
+      AND storage_cids.block_number <= blockNo
+      AND storage_cids.state_path = statePath
+      AND state_leaf_key = stateLeafKey
+      AND storage_cids.block_number <= blockNo
+    ORDER BY state_cids.block_number DESC
+    LIMIT 1;
+
+    SELECT header_id, canonical_header_hash(tmp_tt_stg2.block_number), tmp_tt_stg2.block_number
+    into temp_header, temp_canonical_header, blockNo
+    from tmp_tt_stg2;
+    IF temp_header IS NOT NULL AND temp_header != temp_canonical_header THEN
+        raise notice 'get_storage_at_by_number: chosen header % != canonical header % for block number %, trying again.', temp_header, temp_canonical_header, blockNo;
+        TRUNCATE tmp_tt_stg2;
+        -- If we chose a non-canonical block, we need to go back and get the right one.  While we could do this in one
+        -- query if we wanted, the disadvantage would be that it is very uncommon, while the join on
+        -- STORAGE_CIDS.STATE_PATH = STATE_CIDS.STATE_PATH is quite expensive whenever there are a lot of candidate rows,
+        -- so we wish to avoid that more expensive check when possible.
+        INSERT INTO tmp_tt_stg2
+        SELECT STORAGE_CIDS.HEADER_ID,
+               STORAGE_CIDS.CID,
               STORAGE_CIDS.MH_KEY,
               STORAGE_CIDS.BLOCK_NUMBER,
               STORAGE_CIDS.NODE_TYPE,
@ -28,21 +94,22 @@ BEGIN
                    STORAGE_CIDS.HEADER_ID = STATE_CIDS.HEADER_ID
                AND STORAGE_CIDS.BLOCK_NUMBER = STATE_CIDS.BLOCK_NUMBER
                AND STORAGE_CIDS.STATE_PATH = STATE_CIDS.STATE_PATH
-                         AND STORAGE_CIDS.BLOCK_NUMBER <= blockNo
            )
                 INNER JOIN ETH.HEADER_CIDS ON (
                    STATE_CIDS.HEADER_ID = HEADER_CIDS.BLOCK_HASH
                AND STATE_CIDS.BLOCK_NUMBER = HEADER_CIDS.BLOCK_NUMBER
-                         AND STATE_CIDS.BLOCK_NUMBER <= blockNo
            )
        WHERE STATE_LEAF_KEY = stateLeafKey
-                   AND STATE_CIDS.BLOCK_NUMBER <= blockNo
+          AND STATE_CIDS.BLOCK_NUMBER = blockNo
          AND STORAGE_LEAF_KEY = storageLeafKey
-                   AND STORAGE_CIDS.BLOCK_NUMBER <= blockNo
-                   AND HEADER_CIDS.BLOCK_NUMBER <= blockNo
-                   AND HEADER_CIDS.BLOCK_HASH = (SELECT CANONICAL_HEADER_HASH(HEADER_CIDS.BLOCK_NUMBER))
+          AND STORAGE_CIDS.BLOCK_NUMBER = blockNo
+          AND HEADER_CIDS.BLOCK_NUMBER = blockNo
+          AND HEADER_CIDS.BLOCK_HASH = temp_canonical_header
        ORDER BY HEADER_CIDS.BLOCK_NUMBER DESC
        LIMIT 1;
+    END IF;
+
+    RETURN QUERY SELECT t.cid, t.mh_key, t.block_number, t.node_type, t.state_leaf_removed from tmp_tt_stg2 as t;
 END
 $$;