From c0a91b9d9f6dbecaa718a95b2aeb5facbbfdd96f Mon Sep 17 00:00:00 2001 From: Ian Norden Date: Mon, 9 Nov 2020 10:29:04 -0600 Subject: [PATCH] updated canonical header finder functions --- ...pdated_canonical_hash_finder_functions.sql | 110 ++++++++++ db/schema.sql | 205 ++++++++++++++---- pkg/eth/backend.go | 6 +- pkg/eth/ipld_retriever.go | 4 +- pkg/rpc/check.go | 16 ++ pkg/rpc/http.go | 16 ++ pkg/rpc/ipc.go | 16 ++ pkg/rpc/ws.go | 16 ++ 8 files changed, 339 insertions(+), 50 deletions(-) create mode 100644 db/migrations/00019_updated_canonical_hash_finder_functions.sql diff --git a/db/migrations/00019_updated_canonical_hash_finder_functions.sql b/db/migrations/00019_updated_canonical_hash_finder_functions.sql new file mode 100644 index 00000000..fe7e2459 --- /dev/null +++ b/db/migrations/00019_updated_canonical_hash_finder_functions.sql @@ -0,0 +1,110 @@ +-- +goose Up +ALTER FUNCTION canonical_header RENAME TO canonical_header_id; + +-- +goose StatementBegin +CREATE TYPE child_result AS ( + has_child BOOLEAN, + children eth.header_cids[] +); + +CREATE OR REPLACE FUNCTION has_child(hash VARCHAR(66)) RETURNS child_result AS +$BODY$ +DECLARE + new_child_result child_result; +BEGIN + -- short circuit if there are no children + SELECT exists(SELECT 1 + FROM eth.header_cids + WHERE parent_hash = hash + LIMIT 1) + INTO new_child_result.has_child; + -- collect all the children for this header + IF new_child_result.has_child THEN + SELECT * + INTO new_child_result.children + FROM eth.header_cids + WHERE parent_hash = hash; + ELSE + END IF; + RETURN new_child_result; +END +$BODY$ +LANGUAGE 'plpgsql'; +-- +goose StatementEnd + +-- +goose StatementBegin +CREATE OR REPLACE FUNCTION canonical_header_from_set(headers eth.header_cids[]) RETURNS eth.header_cids AS +$BODY$ +DECLARE + canonical_header eth.header_cids; + canonical_child eth.header_cids; + header eth.header_cids; + current_child_result child_result; + child_headers eth.header_cids[]; + current_header_with_child eth.header_cids; + has_children_count INT DEFAULT 0; +BEGIN + -- for each header in the provided set + FOR header IN SELECT * FROM headers + LOOP + -- check if it has any children + SELECT * INTO current_child_result FROM has_child(header.block_hash); + IF current_child_result.has_child THEN + -- if it does, take note + has_children_count = has_children_count + 1; + current_header_with_child = header; + -- and add the children to the growing set of child headers + child_headers = array_cat(child_headers, current_child_result.children); + END IF; + END LOOP; + -- if none of the headers had children, none is more canonical than the other + IF has_children_count = 0 THEN + -- return the first one selected + SELECT * INTO canonical_header FROM headers LIMIT 1; + -- if only one header had children, it can be considered the heaviest/canonical header of the set + ELSIF has_children_count = 1 THEN + -- return the only header with a child + canonical_header = current_header_with_child; + -- if there are multiple headers with children + ELSE + -- find the canonical the canonical header from the child set + canonical_child = canonical_header_from_set(child_headers); + -- the header that is parent to this header, is the canonical header at this level + SELECT * INTO canonical_header FROM headers + WHERE block_hash = canonical_child.parent_hash; + END IF; + RETURN canonical_header; +END +$BODY$ +LANGUAGE 'plpgsql'; +-- +goose StatementEnd + +-- +goose StatementBegin +CREATE OR REPLACE FUNCTION canonical_header(height BIGINT) RETURNS eth.header_cids AS +$BODY$ +DECLARE + header_count INT; + headers eth.header_cids[]; +BEGIN + -- collect every header at this height, noting how many are collected + SELECT *, count(*) + INTO headers, header_count + FROM eth.header_cids + WHERE block_number = height; + -- if only one header is present, it can be considered canonical (if no header is present we will throw an error) + IF header_count = 1 THEN + RETURN headers[0]; + END IF; + -- otherwise, if there are multiple headers at this height, we need to determine which is canonical + RETURN canonical_header_from_set(headers); +END +$BODY$ +LANGUAGE 'plpgsql'; +-- +goose StatementEnd + +-- +goose Down +DROP FUNCTION canonical_header; +DROP FUNCTION canonical_header_from_set; +DROP FUNCTION has_child; +DROP TYPE child_result; +ALTER FUNCTION canonical_header_id RENAME TO canonical_header; diff --git a/db/schema.sql b/db/schema.sql index 91be03f0..54b6d0a2 100644 --- a/db/schema.sql +++ b/db/schema.sql @@ -2,8 +2,8 @@ -- PostgreSQL database dump -- --- Dumped from database version 12.1 --- Dumped by pg_dump version 12.1 +-- Dumped from database version 12.4 +-- Dumped by pg_dump version 12.4 SET statement_timeout = 0; SET lock_timeout = 0; @@ -23,6 +23,58 @@ SET row_security = off; CREATE SCHEMA eth; +SET default_tablespace = ''; + +SET default_table_access_method = heap; + +-- +-- Name: header_cids; Type: TABLE; Schema: eth; Owner: - +-- + +CREATE TABLE eth.header_cids ( + id integer NOT NULL, + block_number bigint NOT NULL, + block_hash character varying(66) NOT NULL, + parent_hash character varying(66) NOT NULL, + cid text NOT NULL, + mh_key text NOT NULL, + td numeric NOT NULL, + node_id integer NOT NULL, + reward numeric NOT NULL, + state_root character varying(66) NOT NULL, + tx_root character varying(66) NOT NULL, + receipt_root character varying(66) NOT NULL, + uncle_root character varying(66) NOT NULL, + bloom bytea NOT NULL, + "timestamp" numeric NOT NULL, + times_validated integer DEFAULT 1 NOT NULL +); + + +-- +-- Name: TABLE header_cids; Type: COMMENT; Schema: eth; Owner: - +-- + +COMMENT ON TABLE eth.header_cids IS '@name EthHeaderCids'; + + +-- +-- Name: COLUMN header_cids.node_id; Type: COMMENT; Schema: eth; Owner: - +-- + +COMMENT ON COLUMN eth.header_cids.node_id IS '@name EthNodeID'; + + +-- +-- Name: child_result; Type: TYPE; Schema: public; Owner: - +-- + +CREATE TYPE public.child_result AS ( + has_child boolean, + children eth.header_cids[] +); + + -- -- Name: graphql_subscription(); Type: FUNCTION; Schema: eth; Owner: - -- @@ -55,7 +107,83 @@ $_$; -- Name: canonical_header(bigint); Type: FUNCTION; Schema: public; Owner: - -- -CREATE FUNCTION public.canonical_header(height bigint) RETURNS integer +CREATE FUNCTION public.canonical_header(height bigint) RETURNS eth.header_cids + LANGUAGE plpgsql + AS $$ +DECLARE + header_count INT; + headers eth.header_cids[]; +BEGIN + -- collect every header at this height, noting how many are collected + SELECT *, count(*) + INTO headers, header_count + FROM eth.header_cids + WHERE block_number = height; + -- if only one header is present, it can be considered canonical (if no header is present we will throw an error) + IF header_count = 1 THEN + RETURN headers[0]; + END IF; + -- otherwise, if there are multiple headers at this height, we need to determine which is canonical + RETURN canonical_header_from_set(headers); +END +$$; + + +-- +-- Name: canonical_header_from_set(eth.header_cids[]); Type: FUNCTION; Schema: public; Owner: - +-- + +CREATE FUNCTION public.canonical_header_from_set(headers eth.header_cids[]) RETURNS eth.header_cids + LANGUAGE plpgsql + AS $$ +DECLARE + canonical_header eth.header_cids; + canonical_child eth.header_cids; + header eth.header_cids; + current_child_result child_result; + child_headers eth.header_cids[]; + current_header_with_child eth.header_cids; + has_children_count INT DEFAULT 0; +BEGIN + -- for each header in the provided set + FOR header IN SELECT * FROM headers + LOOP + -- check if it has any children + SELECT * INTO current_child_result FROM has_child(header.block_hash); + IF current_child_result.has_child THEN + -- if it does, take note + has_children_count = has_children_count + 1; + current_header_with_child = header; + -- and add the children to the growing set of child headers + child_headers = array_cat(child_headers, current_child_result.children); + END IF; + END LOOP; + -- if none of the headers had children, none is more canonical than the other + IF has_children_count = 0 THEN + -- return the first one selected + SELECT * INTO canonical_header FROM headers LIMIT 1; + -- if only one header had children, it can be considered the heaviest/canonical header of the set + ELSIF has_children_count = 1 THEN + -- return the only header with a child + canonical_header = current_header_with_child; + -- if there are multiple headers with children + ELSE + -- find the canonical the canonical header from the child set + canonical_child = canonical_header_from_set(child_headers); + -- the header that is parent to this header, is the canonical header at this level + SELECT * INTO canonical_header FROM headers + WHERE block_hash = canonical_child.parent_hash; + END IF; + RETURN canonical_header; +END +$$; + + +-- +-- Name: canonical_header_id(bigint); Type: FUNCTION; Schema: public; Owner: - +-- + +CREATE FUNCTION public.canonical_header_id(height bigint) RETURNS integer LANGUAGE plpgsql AS $$ DECLARE @@ -78,6 +206,35 @@ END $$; +-- +-- Name: has_child(character varying); Type: FUNCTION; Schema: public; Owner: - +-- + +CREATE FUNCTION public.has_child(hash character varying) RETURNS public.child_result + LANGUAGE plpgsql + AS $$ +DECLARE + new_child_result child_result; +BEGIN + -- short circuit if there are no children + SELECT exists(SELECT 1 + FROM eth.header_cids + WHERE parent_hash = hash + LIMIT 1) + INTO new_child_result.has_child; + -- collect all the children for this header + IF new_child_result.has_child THEN + SELECT * + INTO new_child_result.children + FROM eth.header_cids + WHERE parent_hash = hash; + ELSE + END IF; + RETURN new_child_result; +END +$$; + + -- -- Name: header_weight(character varying); Type: FUNCTION; Schema: public; Owner: - -- @@ -139,48 +296,6 @@ CREATE FUNCTION public.was_storage_removed(path bytea, height bigint, hash chara $$; -SET default_tablespace = ''; - -SET default_table_access_method = heap; - --- --- Name: header_cids; Type: TABLE; Schema: eth; Owner: - --- - -CREATE TABLE eth.header_cids ( - id integer NOT NULL, - block_number bigint NOT NULL, - block_hash character varying(66) NOT NULL, - parent_hash character varying(66) NOT NULL, - cid text NOT NULL, - mh_key text NOT NULL, - td numeric NOT NULL, - node_id integer NOT NULL, - reward numeric NOT NULL, - state_root character varying(66) NOT NULL, - tx_root character varying(66) NOT NULL, - receipt_root character varying(66) NOT NULL, - uncle_root character varying(66) NOT NULL, - bloom bytea NOT NULL, - "timestamp" numeric NOT NULL, - times_validated integer DEFAULT 1 NOT NULL -); - - --- --- Name: TABLE header_cids; Type: COMMENT; Schema: eth; Owner: - --- - -COMMENT ON TABLE eth.header_cids IS '@name EthHeaderCids'; - - --- --- Name: COLUMN header_cids.node_id; Type: COMMENT; Schema: eth; Owner: - --- - -COMMENT ON COLUMN eth.header_cids.node_id IS '@name EthNodeID'; - - -- -- Name: header_cids_id_seq; Type: SEQUENCE; Schema: eth; Owner: - -- diff --git a/pkg/eth/backend.go b/pkg/eth/backend.go index 0cb4c52c..b5d9f797 100644 --- a/pkg/eth/backend.go +++ b/pkg/eth/backend.go @@ -55,10 +55,10 @@ var ( const ( RetrieveCanonicalBlockHashByNumber = `SELECT block_hash FROM eth.header_cids INNER JOIN public.blocks ON (header_cids.mh_key = blocks.key) - WHERE id = (SELECT canonical_header($1))` + WHERE id = (SELECT canonical_header_id($1))` RetrieveCanonicalHeaderByNumber = `SELECT cid, data FROM eth.header_cids INNER JOIN public.blocks ON (header_cids.mh_key = blocks.key) - WHERE id = (SELECT canonical_header($1))` + WHERE id = (SELECT canonical_header_id($1))` RetrieveTD = `SELECT td FROM eth.header_cids WHERE header_cids.block_hash = $1` RetrieveRPCTransaction = `SELECT blocks.data, block_hash, block_number, index FROM public.blocks, eth.transaction_cids, eth.header_cids @@ -72,7 +72,7 @@ const ( AND block_number <= (SELECT block_number FROM eth.header_cids WHERE block_hash = $2) - AND header_cids.id = (SELECT canonical_header(block_number)) + AND header_cids.id = (SELECT canonical_header_id(block_number)) ORDER BY block_number DESC LIMIT 1` RetrieveCodeByMhKey = `SELECT data FROM public.blocks WHERE key = $1` diff --git a/pkg/eth/ipld_retriever.go b/pkg/eth/ipld_retriever.go index 8418bfe6..caf5dad5 100644 --- a/pkg/eth/ipld_retriever.go +++ b/pkg/eth/ipld_retriever.go @@ -108,7 +108,7 @@ const ( AND block_number <= (SELECT block_number FROM eth.header_cids WHERE block_hash = $2) - AND header_cids.id = (SELECT canonical_header(block_number)) + AND header_cids.id = (SELECT canonical_header_id(block_number)) ORDER BY block_number DESC LIMIT 1` RetrieveAccountByLeafKeyAndBlockNumberPgStr = `SELECT state_cids.cid, @@ -151,7 +151,7 @@ const ( AND block_number <= (SELECT block_number FROM eth.header_cids WHERE block_hash = $3) - AND header_cids.id = (SELECT canonical_header(block_number)) + AND header_cids.id = (SELECT canonical_header_id(block_number)) ORDER BY block_number DESC LIMIT 1` ) diff --git a/pkg/rpc/check.go b/pkg/rpc/check.go index 754f752e..0921568a 100644 --- a/pkg/rpc/check.go +++ b/pkg/rpc/check.go @@ -1,3 +1,19 @@ +// VulcanizeDB +// Copyright © 2020 Vulcanize + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. + +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + package rpc import "github.com/ethereum/go-ethereum/rpc" diff --git a/pkg/rpc/http.go b/pkg/rpc/http.go index ae1ab03e..5cb85ca0 100644 --- a/pkg/rpc/http.go +++ b/pkg/rpc/http.go @@ -1,3 +1,19 @@ +// VulcanizeDB +// Copyright © 2020 Vulcanize + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. + +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + package rpc import ( diff --git a/pkg/rpc/ipc.go b/pkg/rpc/ipc.go index 4e170ebe..7b8e1c28 100644 --- a/pkg/rpc/ipc.go +++ b/pkg/rpc/ipc.go @@ -1,3 +1,19 @@ +// VulcanizeDB +// Copyright © 2020 Vulcanize + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. + +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + package rpc import ( diff --git a/pkg/rpc/ws.go b/pkg/rpc/ws.go index cff19ab9..585cab62 100644 --- a/pkg/rpc/ws.go +++ b/pkg/rpc/ws.go @@ -1,3 +1,19 @@ +// VulcanizeDB +// Copyright © 2020 Vulcanize + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. + +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + package rpc import (