Merge pull request #72 from deep-stack/pm-v4-multi-node

Multi-node setup to run the migrations
This commit is contained in:
Ashwin Phatak 2022-05-04 16:04:10 +05:30 committed by GitHub
commit 045120ce20
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 254 additions and 50 deletions

View File

@ -2,4 +2,44 @@
Schemas and utils for IPLD ETH Postgres database
## Database UML
![](vulcanize_db.png)
![](vulcanize_db.png)
## Run
* Remove any existing containers / volumes:
```bash
docker-compose down -v --remove-orphans
```
* Spin up an access node and three data nodes using [docker-compose.test.yml](./docker-compose.test.yml):
```bash
docker-compose -f docker-compose.test.yml up timescale-test-db pg_data_node_1 pg_data_node_2 pg_data_node_3
```
Following final output should be seen on all the nodes:
```
LOG: TimescaleDB background worker launcher connected to shared catalogs
```
* Edit [startup_script.sh](./scripts/startup_script.sh) to change the number of migrations to be run:
```bash
./goose -dir migrations/vulcanizedb postgres "$VDB_PG_CONNECT" up-to 23
```
* In another `ipld-eth-db` terminal window, build an image `migrations-test` using [Dockerfile](./db/Dockerfile):
```bash
docker build -t migrations-test -f ./db/Dockerfile .
```
* After the access and data nodes have spun up, start a container using `migrations-test` image to run the db migrations:
```bash
# Here, we are running the container using host network.
# So connect to access node on 127.0.0.1:8066
docker run --rm --network host -e DATABASE_USER=postgres -e DATABASE_PASSWORD=password -e DATABASE_HOSTNAME=127.0.0.1 -e DATABASE_PORT=8066 -e DATABASE_NAME=vulcanize_testing_v4 migrations-test
```

View File

@ -55,13 +55,13 @@ DROP TABLE eth.header_cids;
DROP TABLE public.blocks;
-- rename new tables
ALTER TABLE eth.log_cids_i RENAME TO eth.log_cids;
ALTER TABLE eth.access_list_elements_i RENAME TO eth.access_list_elements;
ALTER TABLE eth.state_accounts_i RENAME TO eth.state_accounts;
ALTER TABLE eth.storage_cids_i RENAME TO eth.storage_cids;
ALTER TABLE eth.state_cids_i RENAME TO eth.state_cids;
ALTER TABLE eth.receipt_cids_i RENAME TO eth.receipt_cids;
ALTER TABLE eth.transaction_cids_i RENAME TO eth.transaction_cids;
ALTER TABLE eth.uncle_cids_i RENAME TO eth.uncle_cids;
ALTER TABLE eth.header_cids_i RENAME TO eth.header_cids;
ALTER TABLE public.blocks_i RENAME TO public.blocks;
ALTER TABLE eth.log_cids_i RENAME TO log_cids;
ALTER TABLE eth.access_list_elements_i RENAME TO access_list_elements;
ALTER TABLE eth.state_accounts_i RENAME TO state_accounts;
ALTER TABLE eth.storage_cids_i RENAME TO storage_cids;
ALTER TABLE eth.state_cids_i RENAME TO state_cids;
ALTER TABLE eth.receipt_cids_i RENAME TO receipt_cids;
ALTER TABLE eth.transaction_cids_i RENAME TO transaction_cids;
ALTER TABLE eth.uncle_cids_i RENAME TO uncle_cids;
ALTER TABLE eth.header_cids_i RENAME TO header_cids;
ALTER TABLE public.blocks_i RENAME TO blocks;

View File

@ -0,0 +1,16 @@
-- +goose NO TRANSACTION
-- +goose Up
-- this is generated by a script
SELECT add_data_node('dn3', host => 'pg_data_node_3', port => 5432, database => 'vulcanize_testing_v4', password => 'password');
SELECT add_data_node('dn2', host => 'pg_data_node_2', port => 5432, database => 'vulcanize_testing_v4', password => 'password');
SELECT add_data_node('dn1', host => 'pg_data_node_1', port => 5432, database => 'vulcanize_testing_v4', password => 'password');
CALL distributed_exec($$ CREATE SCHEMA eth $$);
-- +goose Down
-- this is generated by a script
CALL distributed_exec($$ DROP SCHEMA eth $$);
SELECT delete_data_node('dn1', force => true, if_attached => true);
SELECT delete_data_node('dn2', force => true, if_attached => true);
SELECT delete_data_node('dn3', force => true, if_attached => true);

View File

@ -1,5 +0,0 @@
-- +goose Up
-- this is generated by a script
-- +goose Down
-- this is generated by a script

View File

@ -12,16 +12,16 @@ CREATE TABLE eth.header_cids_i (LIKE eth.header_cids INCLUDING ALL);
CREATE TABLE public.blocks_i (LIKE public.blocks INCLUDING ALL);
-- turn them into distributed hypertables
SELECT create_distributed_hypertable('public.blocks_i', 'block_number', chunk_time_interval => 32768, replication_factor => 3);
SELECT create_distributed_hypertable('eth.header_cids_i', 'block_number' chunk_time_interval => 32768, replication_factor => 3);
SELECT create_distributed_hypertable('eth.uncle_cids_i', 'block_number', chunk_time_interval => 32768, replication_factor => 3);
SELECT create_distributed_hypertable('eth.transaction_cids_i', 'block_number', chunk_time_interval => 32768, replication_factor => 3);
SELECT create_distributed_hypertable('eth.receipt_cids_i', 'block_number', chunk_time_interval => 32768, replication_factor => 3);
SELECT create_distributed_hypertable('eth.state_cids_i', 'block_number', chunk_time_interval => 32768, replication_factor => 3);
SELECT create_distributed_hypertable('eth.storage_cids_i', 'block_number', chunk_time_interval => 32768, replication_factor => 3);
SELECT create_distributed_hypertable('eth.state_accounts_i', 'block_number', chunk_time_interval => 32768, replication_factor => 3);
SELECT create_distributed_hypertable('eth.access_list_elements_i', 'block_number', chunk_time_interval => 32768, replication_factor => 3);
SELECT create_distributed_hypertable('eth.log_cids_i', 'block_number', chunk_time_interval => 32768, replication_factor => 3);
SELECT create_distributed_hypertable('public.blocks_i', 'block_number', chunk_time_interval => 32768);
SELECT create_distributed_hypertable('eth.header_cids_i', 'block_number', chunk_time_interval => 32768);
SELECT create_distributed_hypertable('eth.uncle_cids_i', 'block_number', chunk_time_interval => 32768);
SELECT create_distributed_hypertable('eth.transaction_cids_i', 'block_number', chunk_time_interval => 32768);
SELECT create_distributed_hypertable('eth.receipt_cids_i', 'block_number', chunk_time_interval => 32768);
SELECT create_distributed_hypertable('eth.state_cids_i', 'block_number', chunk_time_interval => 32768);
SELECT create_distributed_hypertable('eth.storage_cids_i', 'block_number', chunk_time_interval => 32768);
SELECT create_distributed_hypertable('eth.state_accounts_i', 'block_number', chunk_time_interval => 32768);
SELECT create_distributed_hypertable('eth.access_list_elements_i', 'block_number', chunk_time_interval => 32768);
SELECT create_distributed_hypertable('eth.log_cids_i', 'block_number', chunk_time_interval => 32768);
-- migrate data
INSERT INTO eth.log_cids_i (SELECT * FROM eth.log_cids);
@ -48,16 +48,26 @@ DROP TABLE eth.header_cids;
DROP TABLE public.blocks;
-- rename distributed hypertables
ALTER TABLE eth.log_cids_i RENAME TO eth.log_cids;
ALTER TABLE eth.access_list_elements_i RENAME TO eth.access_list_elements;
ALTER TABLE eth.state_accounts_i RENAME TO eth.state_accounts;
ALTER TABLE eth.storage_cids_i RENAME TO eth.storage_cids;
ALTER TABLE eth.state_cids_i RENAME TO eth.state_cids;
ALTER TABLE eth.receipt_cids_i RENAME TO eth.receipt_cids;
ALTER TABLE eth.transaction_cids_i RENAME TO eth.transaction_cids;
ALTER TABLE eth.uncle_cids_i RENAME TO eth.uncle_cids;
ALTER TABLE eth.header_cids_i RENAME TO eth.header_cids;
ALTER TABLE public.blocks_i RENAME TO public.blocks;
ALTER TABLE eth.log_cids_i RENAME TO log_cids;
ALTER TABLE eth.access_list_elements_i RENAME TO access_list_elements;
ALTER TABLE eth.state_accounts_i RENAME TO state_accounts;
ALTER TABLE eth.storage_cids_i RENAME TO storage_cids;
ALTER TABLE eth.state_cids_i RENAME TO state_cids;
ALTER TABLE eth.receipt_cids_i RENAME TO receipt_cids;
ALTER TABLE eth.transaction_cids_i RENAME TO transaction_cids;
ALTER TABLE eth.uncle_cids_i RENAME TO uncle_cids;
ALTER TABLE eth.header_cids_i RENAME TO header_cids;
ALTER TABLE public.blocks_i RENAME TO blocks;
CALL distributed_exec('ALTER TABLE eth.log_cids_i RENAME TO log_cids');
CALL distributed_exec('ALTER TABLE eth.access_list_elements_i RENAME TO access_list_elements');
CALL distributed_exec('ALTER TABLE eth.state_accounts_i RENAME TO state_accounts');
CALL distributed_exec('ALTER TABLE eth.storage_cids_i RENAME TO storage_cids');
CALL distributed_exec('ALTER TABLE eth.state_cids_i RENAME TO state_cids');
CALL distributed_exec('ALTER TABLE eth.receipt_cids_i RENAME TO receipt_cids');
CALL distributed_exec('ALTER TABLE eth.transaction_cids_i RENAME TO transaction_cids');
CALL distributed_exec('ALTER TABLE eth.uncle_cids_i RENAME TO uncle_cids');
CALL distributed_exec('ALTER TABLE eth.header_cids_i RENAME TO header_cids');
CALL distributed_exec('ALTER TABLE public.blocks_i RENAME TO blocks');
-- update version
INSERT INTO public.db_version (singleton, version) VALUES (true, 'v4.0.00-dh')
@ -81,7 +91,7 @@ CREATE TABLE public.blocks_i (LIKE public.blocks INCLUDING ALL);
-- turn them into hypertables
SELECT create_hypertable('public.blocks_i', 'block_number', migrate_data => true, chunk_time_interval => 32768);
SELECT create_hypertable('eth.header_cids_i', 'block_number' migrate_data => true, chunk_time_interval => 32768);
SELECT create_hypertable('eth.header_cids_i', 'block_number', migrate_data => true, chunk_time_interval => 32768);
SELECT create_hypertable('eth.uncle_cids_i', 'block_number', migrate_data => true, chunk_time_interval => 32768);
SELECT create_hypertable('eth.transaction_cids_i', 'block_number', migrate_data => true, chunk_time_interval => 32768);
SELECT create_hypertable('eth.receipt_cids_i', 'block_number', migrate_data => true, chunk_time_interval => 32768);
@ -116,13 +126,13 @@ DROP TABLE eth.header_cids;
DROP TABLE public.blocks;
-- rename hypertable tables
ALTER TABLE eth.log_cids_i RENAME TO eth.log_cids;
ALTER TABLE eth.access_list_elements_i RENAME TO eth.access_list_elements;
ALTER TABLE eth.state_accounts_i RENAME TO eth.state_accounts;
ALTER TABLE eth.storage_cids_i RENAME TO eth.storage_cids;
ALTER TABLE eth.state_cids_i RENAME TO eth.state_cids;
ALTER TABLE eth.receipt_cids_i RENAME TO eth.receipt_cids;
ALTER TABLE eth.transaction_cids_i RENAME TO eth.transaction_cids;
ALTER TABLE eth.uncle_cids_i RENAME TO eth.uncle_cids;
ALTER TABLE eth.header_cids_i RENAME TO eth.header_cids;
ALTER TABLE public.blocks_i RENAME TO public.blocks;
ALTER TABLE eth.log_cids_i RENAME TO log_cids;
ALTER TABLE eth.access_list_elements_i RENAME TO access_list_elements;
ALTER TABLE eth.state_accounts_i RENAME TO state_accounts;
ALTER TABLE eth.storage_cids_i RENAME TO storage_cids;
ALTER TABLE eth.state_cids_i RENAME TO state_cids;
ALTER TABLE eth.receipt_cids_i RENAME TO receipt_cids;
ALTER TABLE eth.transaction_cids_i RENAME TO transaction_cids;
ALTER TABLE eth.uncle_cids_i RENAME TO uncle_cids;
ALTER TABLE eth.header_cids_i RENAME TO header_cids;
ALTER TABLE public.blocks_i RENAME TO blocks;

View File

@ -0,0 +1,5 @@
-- +goose Up
-- to be generated by a script
-- +goose Down
-- to be generated by a script

View File

@ -0,0 +1,27 @@
-- +goose Up
-- to be generated by a script
SELECT set_replication_factor('public.blocks', 3);
SELECT set_replication_factor('eth.header_cids', 3);
SELECT set_replication_factor('eth.uncle_cids', 3);
SELECT set_replication_factor('eth.transaction_cids', 3);
SELECT set_replication_factor('eth.receipt_cids', 3);
SELECT set_replication_factor('eth.state_cids', 3);
SELECT set_replication_factor('eth.storage_cids', 3);
SELECT set_replication_factor('eth.state_accounts', 3);
SELECT set_replication_factor('eth.access_list_elements', 3);
SELECT set_replication_factor('eth.log_cids', 3);
-- +goose Down
-- to be generated by a script
SELECT set_replication_factor('public.blocks', 1);
SELECT set_replication_factor('eth.header_cids', 1);
SELECT set_replication_factor('eth.uncle_cids', 1);
SELECT set_replication_factor('eth.transaction_cids', 1);
SELECT set_replication_factor('eth.receipt_cids', 1);
SELECT set_replication_factor('eth.state_cids', 1);
SELECT set_replication_factor('eth.storage_cids', 1);
SELECT set_replication_factor('eth.state_accounts', 1);
SELECT set_replication_factor('eth.access_list_elements', 1);
SELECT set_replication_factor('eth.log_cids', 1);

View File

@ -16,4 +16,59 @@ services:
POSTGRES_DB: "vulcanize_testing"
POSTGRES_PASSWORD: "password"
ports:
- "127.0.0.1:8066:5432"
- "127.0.0.1:8066:5432"
timescale-test-db:
restart: always
image: timescale/timescaledb:latest-pg14
command: ["postgres", "-c", "log_statement=all"]
environment:
POSTGRES_USER: "postgres"
POSTGRES_DB: "vulcanize_testing_v4"
POSTGRES_PASSWORD: "password"
ports:
- "127.0.0.1:8066:5432"
volumes:
- ./scripts/init-access-node.sh:/docker-entrypoint-initdb.d/init-access-node.sh
pg_data_node_1:
image: timescale/timescaledb:latest-pg14
container_name: pg_data_node_1
restart: unless-stopped
command: ["postgres", "-c", "log_statement=all"]
environment:
POSTGRES_USER: "postgres"
POSTGRES_DB: "vulcanize_testing_v4"
POSTGRES_PASSWORD: "password"
ports:
- "127.0.0.1:8067:5432"
volumes:
- ./scripts/init-data-node.sh:/docker-entrypoint-initdb.d/init-data-node.sh
pg_data_node_2:
image: timescale/timescaledb:latest-pg14
container_name: pg_data_node_2
restart: unless-stopped
command: ["postgres", "-c", "log_statement=all"]
environment:
POSTGRES_USER: "postgres"
POSTGRES_DB: "vulcanize_testing_v4"
POSTGRES_PASSWORD: "password"
ports:
- "127.0.0.1:8068:5432"
volumes:
- ./scripts/init-data-node.sh:/docker-entrypoint-initdb.d/init-data-node.sh
pg_data_node_3:
image: timescale/timescaledb:latest-pg14
container_name: pg_data_node_3
restart: unless-stopped
command: ["postgres", "-c", "log_statement=all"]
environment:
POSTGRES_USER: "postgres"
POSTGRES_DB: "vulcanize_testing_v4"
POSTGRES_PASSWORD: "password"
ports:
- "127.0.0.1:8069:5432"
volumes:
- ./scripts/init-data-node.sh:/docker-entrypoint-initdb.d/init-data-node.sh

View File

@ -3,7 +3,7 @@
if [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ] || [ -z "$4" ] || [ -z "$5" ]
then
echo "Env variables not provided"
echo "Usage: ./ipfs_postgres.sh <MIGRATION_FILE_PATH:string> <NODE_NAME:string> <NODE_HOST:string> <NODE_PORT:numeric/string> <NODE_DATABASE:string> <EXECUTE_SQ:bool>"
echo "Usage: ./add_nodes.sh <MIGRATION_FILE_PATH:string> <NODE_NAME:string> <NODE_HOST:string> <NODE_PORT:numeric/string> <NODE_DATABASE:string> <EXECUTE_SQL:bool>"
echo "Only <EXECUTE_SQL> is optional"
exit 1
fi

29
scripts/init-access-node.sh Executable file
View File

@ -0,0 +1,29 @@
#!/bin/sh
set -e
# https://docs.timescale.com/timescaledb/latest/how-to-guides/multinode-timescaledb/multinode-config/
# To achieve good query performance you need to enable partition-wise aggregation on the access node. This pushes down aggregation queries to the data nodes.
# https://www.postgresql.org/docs/12/runtime-config-query.html#enable_partitionwise_aggregate
sed -ri "s!^#?(enable_partitionwise_aggregate)\s*=.*!\1 = on!" /var/lib/postgresql/data/postgresql.conf
grep "enable_partitionwise_aggregate" /var/lib/postgresql/data/postgresql.conf
# JIT should be set to off on the access node as JIT currently doesn't work well with distributed queries.
# https://www.postgresql.org/docs/12/runtime-config-query.html#jit
sed -ri "s!^#?(jit)\s*=.*!\1 = off!" /var/lib/postgresql/data/postgresql.conf
grep "jit" /var/lib/postgresql/data/postgresql.conf
# https://docs.timescale.com/timescaledb/latest/how-to-guides/multinode-timescaledb/multinode-auth/
# https://docs.timescale.com/timescaledb/latest/how-to-guides/multinode-timescaledb/multinode-auth/#password-authentication
# Set password_encryption = 'scram-sha-256' in postgresql.conf on the access node.
sed -ri "s!^#?(password_encryption)\s*=.*!\1 = 'scram-sha-256'!" /var/lib/postgresql/data/postgresql.conf
grep "password_encryption" /var/lib/postgresql/data/postgresql.conf
# Append to data/passfile *:*:*:ROLE:ROLE_PASSWORD
# This file stores the passwords for each role that the access node connects to on the data nodes.
echo "*:*:*:postgres:password">>/var/lib/postgresql/data/passfile
chmod 0600 /var/lib/postgresql/data/passfile
# Add "host all all ACCESS_NODE_IP scram-sha-256" pg_hba.conf on the data nodes.
# Skipped. Using default "host all all all scram-sha-256" for now.

27
scripts/init-data-node.sh Executable file
View File

@ -0,0 +1,27 @@
#!/bin/sh
set -e
# https://docs.timescale.com/timescaledb/latest/how-to-guides/configuration/timescaledb-config/#timescaledb-last-tuned-string
# https://docs.timescale.com/timescaledb/latest/how-to-guides/multi-node-setup/required-configuration/
# It is necessary to change the parameter max_prepared_transactions to a non-zero value ('150' is recommended).
# https://www.postgresql.org/docs/12/runtime-config-resource.html#max_prepared_transactions
sed -ri "s!^#?(max_prepared_transactions)\s*=.*!\1 = 150!" /var/lib/postgresql/data/postgresql.conf
grep "max_prepared_transactions" /var/lib/postgresql/data/postgresql.conf
# Statement timeout should be disabled on the data nodes and managed through the access node configuration if desired.
# https://www.postgresql.org/docs/12/runtime-config-client.html#statement_timeout
sed -ri "s!^#?(statement_timeout)\s*=.*!\1 = 0!" /var/lib/postgresql/data/postgresql.conf
grep "statement_timeout" /var/lib/postgresql/data/postgresql.conf
# On the data nodes, set the wal_level to logical or higher to move or copy chunks between data nodes.
# https://www.postgresql.org/docs/14/runtime-config-wal.html
sed -ri "s!^#?(wal_level)\s*=.*!\1 = 'logical'!" /var/lib/postgresql/data/postgresql.conf
grep "wal_level" /var/lib/postgresql/data/postgresql.conf
# https://docs.timescale.com/timescaledb/latest/how-to-guides/multinode-timescaledb/multinode-auth/
# https://docs.timescale.com/timescaledb/latest/how-to-guides/multinode-timescaledb/multinode-auth/#password-authentication
# Set password_encryption = 'scram-sha-256' in postgresql.conf on the data node.
sed -ri "s!^#?(password_encryption)\s*=.*!\1 = 'scram-sha-256'!" /var/lib/postgresql/data/postgresql.conf
grep "password_encryption" /var/lib/postgresql/data/postgresql.conf

View File

@ -8,7 +8,7 @@ VDB_PG_CONNECT=postgresql://$DATABASE_USER:$DATABASE_PASSWORD@$DATABASE_HOSTNAME
# Run the DB migrations
echo "Connecting with: $VDB_PG_CONNECT"
echo "Running database migrations"
./goose -dir migrations/vulcanizedb postgres "$VDB_PG_CONNECT" up
./goose -dir migrations/vulcanizedb postgres "$VDB_PG_CONNECT" up-to 23
# If the db migrations ran without err
if [[ $? -eq 0 ]]; then