Multi-node setup to run the migrations

This commit is contained in:
Prathamesh Musale 2022-04-29 08:33:16 +05:30
parent 96bd0b5d9a
commit 06d9ef96e7
10 changed files with 186 additions and 20 deletions

View File

@ -3,3 +3,43 @@ Schemas and utils for IPLD ETH Postgres database
## Database UML
![](vulcanize_db.png)
## Run
* Remove any existing containers / volumes:
```bash
docker-compose down -v --remove-orphans
```
* Spin up an access node and two data nodes using [docker-compose.test.yml](./docker-compose.test.yml):
```bash
docker-compose -f docker-compose.test.yml up timescale-test-db pg_data_node_1 pg_data_node_2
```
Following final output should be seen on each node:
```
LOG: TimescaleDB background worker launcher connected to shared catalogs
```
* Edit [startup_script.sh](./scripts/startup_script.sh) to change the number of migrations to be run:
```bash
./goose -dir migrations/vulcanizedb postgres "$VDB_PG_CONNECT" up-to 21
```
* In another `ipld-eth-db` terminal window, build an image `migrations-test` using [Dockerfile](./db/Dockerfile):
```bash
docker build -t migrations-test -f ./db/Dockerfile .
```
* After the access and data nodes have spun up, start a container using `migrations-test` image to run the db migrations:
```bash
# Here, we are running the container using host network.
# So connect to access node on 127.0.0.1:8066
docker run --rm --network host -e DATABASE_USER=postgres -e DATABASE_PASSWORD=password -e DATABASE_HOSTNAME=127.0.0.1 -e DATABASE_PORT=8066 -e DATABASE_NAME=vulcanize_testing_v4 migrations-test
```

View File

@ -0,0 +1,34 @@
-- +goose NO TRANSACTION
-- +goose Up
-- this is generated by a script
SELECT add_data_node('dn2', host => 'pg_data_node_2', port => 5432, database => 'vulcanize_testing_v4', password => 'password');
-- SELECT attach_data_node('dn2', 'eth.log_cids', if_not_attached => true);
-- SELECT attach_data_node('dn2', 'eth.access_list_elements', if_not_attached => true);
-- SELECT attach_data_node('dn2', 'eth.state_accounts', if_not_attached => true);
-- SELECT attach_data_node('dn2', 'eth.storage_cids', if_not_attached => true);
-- SELECT attach_data_node('dn2', 'eth.state_cid', if_not_attached => true);
-- SELECT attach_data_node('dn2', 'eth.receipt_cids', if_not_attached => true);
-- SELECT attach_data_node('dn2', 'eth.transaction_cids', if_not_attached => true);
-- SELECT attach_data_node('dn2', 'eth.uncle_cids', if_not_attached => true);
-- SELECT attach_data_node('dn2', 'eth.header_cid', if_not_attached => true);
-- SELECT attach_data_node('dn2', 'public.blocks', if_not_attached => true);
SELECT add_data_node('dn1', host => 'pg_data_node_1', port => 5432, database => 'vulcanize_testing_v4', password => 'password');
-- SELECT attach_data_node('dn1', 'eth.log_cids', if_not_attached => true);
-- SELECT attach_data_node('dn1', 'eth.access_list_elements', if_not_attached => true);
-- SELECT attach_data_node('dn1', 'eth.state_accounts', if_not_attached => true);
-- SELECT attach_data_node('dn1', 'eth.storage_cids', if_not_attached => true);
-- SELECT attach_data_node('dn1', 'eth.state_cid', if_not_attached => true);
-- SELECT attach_data_node('dn1', 'eth.receipt_cids', if_not_attached => true);
-- SELECT attach_data_node('dn1', 'eth.transaction_cids', if_not_attached => true);
-- SELECT attach_data_node('dn1', 'eth.uncle_cids', if_not_attached => true);
-- SELECT attach_data_node('dn1', 'eth.header_cid', if_not_attached => true);
-- SELECT attach_data_node('dn1', 'public.blocks', if_not_attached => true);
CALL distributed_exec($$ CREATE SCHEMA eth $$);
-- +goose Down
-- this is generated by a script
CALL distributed_exec($$ DROP SCHEMA eth $$);
-- SELECT detach_data_node('dn1', force => true, if_attached = true);
SELECT delete_data_node('dn1', force => true, if_attached => true);
-- SELECT detach_data_node('dn2', force => true, if_attached = true);
SELECT delete_data_node('dn2', force => true, if_attached => true);

View File

@ -1,5 +0,0 @@
-- +goose Up
-- this is generated by a script
-- +goose Down
-- this is generated by a script

View File

@ -12,16 +12,16 @@ CREATE TABLE eth.header_cids_i (LIKE eth.header_cids INCLUDING ALL);
CREATE TABLE public.blocks_i (LIKE public.blocks INCLUDING ALL);
-- turn them into distributed hypertables
SELECT create_distributed_hypertable('public.blocks_i', 'block_number', chunk_time_interval => 32768, replication_factor => 3);
SELECT create_distributed_hypertable('eth.header_cids_i', 'block_number' chunk_time_interval => 32768, replication_factor => 3);
SELECT create_distributed_hypertable('eth.uncle_cids_i', 'block_number', chunk_time_interval => 32768, replication_factor => 3);
SELECT create_distributed_hypertable('eth.transaction_cids_i', 'block_number', chunk_time_interval => 32768, replication_factor => 3);
SELECT create_distributed_hypertable('eth.receipt_cids_i', 'block_number', chunk_time_interval => 32768, replication_factor => 3);
SELECT create_distributed_hypertable('eth.state_cids_i', 'block_number', chunk_time_interval => 32768, replication_factor => 3);
SELECT create_distributed_hypertable('eth.storage_cids_i', 'block_number', chunk_time_interval => 32768, replication_factor => 3);
SELECT create_distributed_hypertable('eth.state_accounts_i', 'block_number', chunk_time_interval => 32768, replication_factor => 3);
SELECT create_distributed_hypertable('eth.access_list_elements_i', 'block_number', chunk_time_interval => 32768, replication_factor => 3);
SELECT create_distributed_hypertable('eth.log_cids_i', 'block_number', chunk_time_interval => 32768, replication_factor => 3);
SELECT create_distributed_hypertable('public.blocks_i', 'block_number', chunk_time_interval => 32768);
SELECT create_distributed_hypertable('eth.header_cids_i', 'block_number', chunk_time_interval => 32768);
SELECT create_distributed_hypertable('eth.uncle_cids_i', 'block_number', chunk_time_interval => 32768);
SELECT create_distributed_hypertable('eth.transaction_cids_i', 'block_number', chunk_time_interval => 32768);
SELECT create_distributed_hypertable('eth.receipt_cids_i', 'block_number', chunk_time_interval => 32768);
SELECT create_distributed_hypertable('eth.state_cids_i', 'block_number', chunk_time_interval => 32768);
SELECT create_distributed_hypertable('eth.storage_cids_i', 'block_number', chunk_time_interval => 32768);
SELECT create_distributed_hypertable('eth.state_accounts_i', 'block_number', chunk_time_interval => 32768);
SELECT create_distributed_hypertable('eth.access_list_elements_i', 'block_number', chunk_time_interval => 32768);
SELECT create_distributed_hypertable('eth.log_cids_i', 'block_number', chunk_time_interval => 32768);
-- migrate data
INSERT INTO eth.log_cids_i (SELECT * FROM eth.log_cids);
@ -81,7 +81,7 @@ CREATE TABLE public.blocks_i (LIKE public.blocks INCLUDING ALL);
-- turn them into hypertables
SELECT create_hypertable('public.blocks_i', 'block_number', migrate_data => true, chunk_time_interval => 32768);
SELECT create_hypertable('eth.header_cids_i', 'block_number' migrate_data => true, chunk_time_interval => 32768);
SELECT create_hypertable('eth.header_cids_i', 'block_number', migrate_data => true, chunk_time_interval => 32768);
SELECT create_hypertable('eth.uncle_cids_i', 'block_number', migrate_data => true, chunk_time_interval => 32768);
SELECT create_hypertable('eth.transaction_cids_i', 'block_number', migrate_data => true, chunk_time_interval => 32768);
SELECT create_hypertable('eth.receipt_cids_i', 'block_number', migrate_data => true, chunk_time_interval => 32768);

View File

@ -0,0 +1,5 @@
-- +goose Up
-- to be generated by a script
-- +goose Down
-- to be generated by a script

View File

@ -17,3 +17,44 @@ services:
POSTGRES_PASSWORD: "password"
ports:
- "127.0.0.1:8066:5432"
timescale-test-db:
restart: always
image: timescale/timescaledb:latest-pg14
command: ["postgres", "-c", "log_statement=all"]
environment:
POSTGRES_USER: "postgres"
POSTGRES_DB: "vulcanize_testing_v4"
POSTGRES_PASSWORD: "password"
ports:
- "127.0.0.1:8066:5432"
volumes:
- ./scripts/init-access-node.sh:/docker-entrypoint-initdb.d/init-access-node.sh
pg_data_node_1:
image: timescale/timescaledb:latest-pg14
container_name: pg_data_node_1
restart: unless-stopped
command: ["postgres", "-c", "log_statement=all"]
environment:
POSTGRES_USER: "postgres"
POSTGRES_DB: "vulcanize_testing_v4"
POSTGRES_PASSWORD: "password"
ports:
- "127.0.0.1:8067:5432"
volumes:
- ./scripts/init-data-node.sh:/docker-entrypoint-initdb.d/init-data-node.sh
pg_data_node_2:
image: timescale/timescaledb:latest-pg14
container_name: pg_data_node_2
restart: unless-stopped
command: ["postgres", "-c", "log_statement=all"]
environment:
POSTGRES_USER: "postgres"
POSTGRES_DB: "vulcanize_testing_v4"
POSTGRES_PASSWORD: "password"
ports:
- "127.0.0.1:8068:5432"
volumes:
- ./scripts/init-data-node.sh:/docker-entrypoint-initdb.d/init-data-node.sh

View File

@ -3,7 +3,7 @@
if [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ] || [ -z "$4" ] || [ -z "$5" ]
then
echo "Env variables not provided"
echo "Usage: ./ipfs_postgres.sh <MIGRATION_FILE_PATH:string> <NODE_NAME:string> <NODE_HOST:string> <NODE_PORT:numeric/string> <NODE_DATABASE:string> <EXECUTE_SQ:bool>"
echo "Usage: ./add_nodes.sh <MIGRATION_FILE_PATH:string> <NODE_NAME:string> <NODE_HOST:string> <NODE_PORT:numeric/string> <NODE_DATABASE:string> <EXECUTE_SQL:bool>"
echo "Only <EXECUTE_SQL> is optional"
exit 1
fi

View File

@ -0,0 +1,29 @@
#!/bin/sh
set -e
# https://docs.timescale.com/timescaledb/latest/how-to-guides/multinode-timescaledb/multinode-config/
# To achieve good query performance you need to enable partition-wise aggregation on the access node. This pushes down aggregation queries to the data nodes.
# https://www.postgresql.org/docs/12/runtime-config-query.html#enable_partitionwise_aggregate
sed -ri "s!^#?(enable_partitionwise_aggregate)\s*=.*!\1 = on!" /var/lib/postgresql/data/postgresql.conf
grep "enable_partitionwise_aggregate" /var/lib/postgresql/data/postgresql.conf
# JIT should be set to off on the access node as JIT currently doesn't work well with distributed queries.
# https://www.postgresql.org/docs/12/runtime-config-query.html#jit
sed -ri "s!^#?(jit)\s*=.*!\1 = off!" /var/lib/postgresql/data/postgresql.conf
grep "jit" /var/lib/postgresql/data/postgresql.conf
# https://docs.timescale.com/timescaledb/latest/how-to-guides/multinode-timescaledb/multinode-auth/
# https://docs.timescale.com/timescaledb/latest/how-to-guides/multinode-timescaledb/multinode-auth/#password-authentication
# Set password_encryption = 'scram-sha-256' in postgresql.conf on the access node.
sed -ri "s!^#?(password_encryption)\s*=.*!\1 = 'scram-sha-256'!" /var/lib/postgresql/data/postgresql.conf
grep "password_encryption" /var/lib/postgresql/data/postgresql.conf
# Append to data/passfile *:*:*:ROLE:ROLE_PASSWORD
# This file stores the passwords for each role that the access node connects to on the data nodes.
echo "*:*:*:postgres:password">>/var/lib/postgresql/data/passfile
chmod 0600 /var/lib/postgresql/data/passfile
# Add "host all all ACCESS_NODE_IP scram-sha-256" pg_hba.conf on the data nodes.
# Skipped. Using default "host all all all scram-sha-256" for now.

22
scripts/init-data-node.sh Executable file
View File

@ -0,0 +1,22 @@
#!/bin/sh
set -e
# https://docs.timescale.com/timescaledb/latest/how-to-guides/configuration/timescaledb-config/#timescaledb-last-tuned-string
# https://docs.timescale.com/timescaledb/latest/how-to-guides/multi-node-setup/required-configuration/
# It is necessary to change the parameter max_prepared_transactions to a non-zero value ('150' is recommended).
# https://www.postgresql.org/docs/12/runtime-config-resource.html#max_prepared_transactions
sed -ri "s!^#?(max_prepared_transactions)\s*=.*!\1 = 150!" /var/lib/postgresql/data/postgresql.conf
grep "max_prepared_transactions" /var/lib/postgresql/data/postgresql.conf
# Statement timeout should be disabled on the data nodes and managed through the access node configuration if desired.
# https://www.postgresql.org/docs/12/runtime-config-client.html#statement_timeout
sed -ri "s!^#?(statement_timeout)\s*=.*!\1 = 0!" /var/lib/postgresql/data/postgresql.conf
grep "statement_timeout" /var/lib/postgresql/data/postgresql.conf
# https://docs.timescale.com/timescaledb/latest/how-to-guides/multinode-timescaledb/multinode-auth/
# https://docs.timescale.com/timescaledb/latest/how-to-guides/multinode-timescaledb/multinode-auth/#password-authentication
# Set password_encryption = 'scram-sha-256' in postgresql.conf on the data node.
sed -ri "s!^#?(password_encryption)\s*=.*!\1 = 'scram-sha-256'!" /var/lib/postgresql/data/postgresql.conf
grep "password_encryption" /var/lib/postgresql/data/postgresql.conf

View File

@ -8,7 +8,7 @@ VDB_PG_CONNECT=postgresql://$DATABASE_USER:$DATABASE_PASSWORD@$DATABASE_HOSTNAME
# Run the DB migrations
echo "Connecting with: $VDB_PG_CONNECT"
echo "Running database migrations"
./goose -dir migrations/vulcanizedb postgres "$VDB_PG_CONNECT" up
./goose -dir migrations/vulcanizedb postgres "$VDB_PG_CONNECT" up-to 21
# If the db migrations ran without err
if [[ $? -eq 0 ]]; then