[WIP] Handle restarts in fixturenet-eth stack (#324)

* Use mounted volumes for data in geth nodes

* Use mounted volumes for data in lighthouse nodes

* Avoid resetting genesis time in a lighthouse node on restart

* Mount parent datadir for lighthouse nodes

* Trap signals on shutdown and clean up in lighthouse nodes

* Allow stalled sync in lighthouse beacon nodes

* Gracefully shutdown geth nodes

* Add clean up instructions

* Gracefully shutdown lighthouse boot node
This commit is contained in:
prathamesh0 2023-04-19 12:22:13 +05:30 committed by GitHub
parent cf79f0de0a
commit 3130af1615
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 113 additions and 24 deletions

View File

@ -8,6 +8,8 @@ services:
environment: environment:
RUN_BOOTNODE: "true" RUN_BOOTNODE: "true"
image: cerc/fixturenet-eth-geth:local image: cerc/fixturenet-eth-geth:local
volumes:
- fixturenet_eth_bootnode_geth_data:/root/ethdata
ports: ports:
- "9898" - "9898"
- "30303" - "30303"
@ -26,6 +28,7 @@ services:
image: cerc/fixturenet-eth-geth:local image: cerc/fixturenet-eth-geth:local
volumes: volumes:
- fixturenet_geth_accounts:/opt/testnet/build/el - fixturenet_geth_accounts:/opt/testnet/build/el
- fixturenet_eth_geth_1_data:/root/ethdata
healthcheck: healthcheck:
test: ["CMD", "nc", "-v", "localhost", "8545"] test: ["CMD", "nc", "-v", "localhost", "8545"]
interval: 30s interval: 30s
@ -52,12 +55,16 @@ services:
image: cerc/fixturenet-eth-geth:local image: cerc/fixturenet-eth-geth:local
depends_on: depends_on:
- fixturenet-eth-bootnode-geth - fixturenet-eth-bootnode-geth
volumes:
- fixturenet_eth_geth_2_data:/root/ethdata
fixturenet-eth-bootnode-lighthouse: fixturenet-eth-bootnode-lighthouse:
hostname: fixturenet-eth-bootnode-lighthouse hostname: fixturenet-eth-bootnode-lighthouse
environment: environment:
RUN_BOOTNODE: "true" RUN_BOOTNODE: "true"
image: cerc/fixturenet-eth-lighthouse:local image: cerc/fixturenet-eth-lighthouse:local
volumes:
- fixturenet_eth_bootnode_lighthouse_data:/opt/testnet/build/cl
fixturenet-eth-lighthouse-1: fixturenet-eth-lighthouse-1:
hostname: fixturenet-eth-lighthouse-1 hostname: fixturenet-eth-lighthouse-1
@ -74,6 +81,8 @@ services:
ETH1_ENDPOINT: "http://fixturenet-eth-geth-1:8545" ETH1_ENDPOINT: "http://fixturenet-eth-geth-1:8545"
EXECUTION_ENDPOINT: "http://fixturenet-eth-geth-1:8551" EXECUTION_ENDPOINT: "http://fixturenet-eth-geth-1:8551"
image: cerc/fixturenet-eth-lighthouse:local image: cerc/fixturenet-eth-lighthouse:local
volumes:
- fixturenet_eth_lighthouse_1_data:/opt/testnet/build/cl
depends_on: depends_on:
fixturenet-eth-bootnode-lighthouse: fixturenet-eth-bootnode-lighthouse:
condition: service_started condition: service_started
@ -98,6 +107,8 @@ services:
EXECUTION_ENDPOINT: "http://fixturenet-eth-geth-2:8551" EXECUTION_ENDPOINT: "http://fixturenet-eth-geth-2:8551"
LIGHTHOUSE_GENESIS_STATE_URL: "http://fixturenet-eth-lighthouse-1:8001/eth/v2/debug/beacon/states/0" LIGHTHOUSE_GENESIS_STATE_URL: "http://fixturenet-eth-lighthouse-1:8001/eth/v2/debug/beacon/states/0"
image: cerc/fixturenet-eth-lighthouse:local image: cerc/fixturenet-eth-lighthouse:local
volumes:
- fixturenet_eth_lighthouse_2_data:/opt/testnet/build/cl
depends_on: depends_on:
fixturenet-eth-bootnode-lighthouse: fixturenet-eth-bootnode-lighthouse:
condition: service_started condition: service_started
@ -106,3 +117,9 @@ services:
volumes: volumes:
fixturenet_geth_accounts: fixturenet_geth_accounts:
fixturenet_eth_bootnode_geth_data:
fixturenet_eth_geth_1_data:
fixturenet_eth_geth_2_data:
fixturenet_eth_bootnode_lighthouse_data:
fixturenet_eth_lighthouse_1_data:
fixturenet_eth_lighthouse_2_data:

View File

@ -22,6 +22,6 @@ COPY run-el.sh /opt/testnet/run.sh
RUN cd /opt/testnet && make genesis-el RUN cd /opt/testnet && make genesis-el
COPY --from=geth /usr/local/bin/geth /usr/local/bin/ COPY --from=geth /usr/local/bin/geth /usr/local/bin/
RUN geth init /opt/testnet/build/el/geth.json && rm -f ~/.ethereum/geth/nodekey RUN geth --datadir ~/ethdata init /opt/testnet/build/el/geth.json && rm -f ~/ethdata/geth/nodekey
ENTRYPOINT ["/opt/testnet/run.sh"] ENTRYPOINT ["/opt/testnet/run.sh"]

View File

@ -12,6 +12,6 @@ for line in `cat ../build/el/accounts.csv`; do
echo "" echo ""
echo "$ADDRESS" echo "$ADDRESS"
geth account import --password .pw.$$ .key.$$ geth account import --datadir=~/ethdata --password .pw.$$ .key.$$
rm -f .pw.$$ .key.$$ rm -f .pw.$$ .key.$$
done done

View File

@ -18,13 +18,31 @@ if [ "true" == "$CERC_REMOTE_DEBUG" ] && [ -x "/usr/local/bin/dlv" ]; then
START_CMD="/usr/local/bin/dlv --listen=:40000 --headless=true --api-version=2 --accept-multiclient exec /usr/local/bin/geth --continue --" START_CMD="/usr/local/bin/dlv --listen=:40000 --headless=true --api-version=2 --accept-multiclient exec /usr/local/bin/geth --continue --"
fi fi
# See https://linuxconfig.org/how-to-propagate-a-signal-to-child-processes-from-a-bash-script
cleanup() {
echo "Signal received, cleaning up..."
# Kill the child process first (CERC_REMOTE_DEBUG=true uses dlv which starts geth as a child process)
pkill -P ${geth_pid}
sleep 2
kill $(jobs -p)
wait
echo "Done"
}
trap 'cleanup' SIGINT SIGTERM
if [ "true" == "$RUN_BOOTNODE" ]; then if [ "true" == "$RUN_BOOTNODE" ]; then
$START_CMD \ $START_CMD \
--datadir=~/ethdata \
--nodekeyhex="${BOOTNODE_KEY}" \ --nodekeyhex="${BOOTNODE_KEY}" \
--nodiscover \ --nodiscover \
--ipcdisable \ --ipcdisable \
--networkid=${NETWORK_ID} \ --networkid=${NETWORK_ID} \
--netrestrict="${NETRESTRICT}" --netrestrict="${NETRESTRICT}" \
&
geth_pid=$!
else else
cd /opt/testnet/accounts cd /opt/testnet/accounts
./import_keys.sh ./import_keys.sh
@ -74,6 +92,7 @@ else
fi fi
$START_CMD \ $START_CMD \
--datadir=~/ethdata \
--bootnodes="${ENODE}" \ --bootnodes="${ENODE}" \
--allow-insecure-unlock \ --allow-insecure-unlock \
--http \ --http \
@ -101,5 +120,10 @@ else
--metrics.addr="0.0.0.0" \ --metrics.addr="0.0.0.0" \
--verbosity=${CERC_GETH_VERBOSITY:-3} \ --verbosity=${CERC_GETH_VERBOSITY:-3} \
--vmodule="${CERC_GETH_VMODULE:-statediff/*=5}" \ --vmodule="${CERC_GETH_VMODULE:-statediff/*=5}" \
--miner.etherbase="${ETHERBASE}" ${STATEDIFF_OPTS} --miner.etherbase="${ETHERBASE}" ${STATEDIFF_OPTS} \
&
geth_pid=$!
fi fi
wait $geth_pid

View File

@ -19,9 +19,9 @@ http_port=8001
authrpc_port=8551 authrpc_port=8551
exec lighthouse \ exec lighthouse \
--debug-level $DEBUG_LEVEL \
bn \ bn \
$SUBSCRIBE_ALL_SUBNETS \ $SUBSCRIBE_ALL_SUBNETS \
--debug-level $DEBUG_LEVEL \
--boot-nodes "$ENR" \ --boot-nodes "$ENR" \
--datadir $data_dir \ --datadir $data_dir \
--testnet-dir $TESTNET_DIR \ --testnet-dir $TESTNET_DIR \
@ -38,4 +38,5 @@ exec lighthouse \
--execution-jwt $JWTSECRET \ --execution-jwt $JWTSECRET \
--terminal-total-difficulty-override $ETH1_TTD \ --terminal-total-difficulty-override $ETH1_TTD \
--suggested-fee-recipient $SUGGESTED_FEE_RECIPIENT \ --suggested-fee-recipient $SUGGESTED_FEE_RECIPIENT \
--target-peers $((BN_COUNT - 1)) --target-peers $((BN_COUNT - 1)) \
--http-allow-sync-stalled \

View File

@ -21,13 +21,21 @@ if [ ! -f "$DATADIR/bootnode/enr.dat" ]; then
--udp-port $BOOTNODE_PORT \ --udp-port $BOOTNODE_PORT \
--tcp-port $BOOTNODE_PORT \ --tcp-port $BOOTNODE_PORT \
--genesis-fork-version $GENESIS_FORK_VERSION \ --genesis-fork-version $GENESIS_FORK_VERSION \
--output-dir $DATADIR/bootnode --output-dir $DATADIR/bootnode-temp
# Output ENR to a temp dir and mv as "lcli generate-bootnode-enr" will not overwrite an empty dir (mounted volume)
mkdir -p $DATADIR/bootnode
mv $DATADIR/bootnode-temp/* $DATADIR/bootnode
rm -r $DATADIR/bootnode-temp
echo "Generated bootnode enr"
else
echo "Found existing bootnode enr"
fi
bootnode_enr=`cat $DATADIR/bootnode/enr.dat` bootnode_enr=`cat $DATADIR/bootnode/enr.dat`
echo "- $bootnode_enr" > $TESTNET_DIR/boot_enr.yaml echo "- $bootnode_enr" > $TESTNET_DIR/boot_enr.yaml
echo "Written bootnode enr to $TESTNET_DIR/boot_enr.yaml"
echo "Generated bootnode enr and written to $TESTNET_DIR/boot_enr.yaml"
fi
exec lighthouse boot_node \ exec lighthouse boot_node \
--testnet-dir $TESTNET_DIR \ --testnet-dir $TESTNET_DIR \

View File

@ -16,4 +16,4 @@ lcli \
$TESTNET_DIR/genesis.ssz \ $TESTNET_DIR/genesis.ssz \
$NOW $NOW
echo "Reset genesis time to now ($NOW)" echo "Reset genesis time to ($NOW)"

View File

@ -21,9 +21,9 @@ while getopts "pd:" flag; do
done done
exec lighthouse \ exec lighthouse \
--debug-level $DEBUG_LEVEL \
vc \ vc \
$BUILDER_PROPOSALS \ $BUILDER_PROPOSALS \
--debug-level $DEBUG_LEVEL \
--validators-dir $DATADIR/node_$NODE_NUMBER/validators \ --validators-dir $DATADIR/node_$NODE_NUMBER/validators \
--secrets-dir $DATADIR/node_$NODE_NUMBER/secrets \ --secrets-dir $DATADIR/node_$NODE_NUMBER/secrets \
--testnet-dir $TESTNET_DIR \ --testnet-dir $TESTNET_DIR \

View File

@ -1,12 +1,25 @@
#!/bin/bash #!/bin/bash
# See https://linuxconfig.org/how-to-propagate-a-signal-to-child-processes-from-a-bash-script
cleanup() {
echo "Signal received, cleaning up..."
kill $(jobs -p)
wait
echo "Done"
}
trap 'cleanup' SIGINT SIGTERM
if [ "true" == "$RUN_BOOTNODE" ]; then if [ "true" == "$RUN_BOOTNODE" ]; then
cd /opt/testnet/build/cl cd /opt/testnet/build/cl
python3 -m http.server 3000 & python3 -m http.server 3000 &
cd /opt/testnet/cl cd /opt/testnet/cl
./bootnode.sh 2>&1 | tee /var/log/lighthouse_bootnode.log ./bootnode.sh 2>&1 | tee /var/log/lighthouse_bootnode.log &
bootnode_pid=$!
wait $bootnode_pid
else else
while [ 1 -eq 1 ]; do while [ 1 -eq 1 ]; do
echo "Waiting on geth ..." echo "Waiting on geth ..."
@ -25,7 +38,12 @@ else
cd /opt/testnet/cl cd /opt/testnet/cl
if [ -z "$LIGHTHOUSE_GENESIS_STATE_URL" ]; then if [ -z "$LIGHTHOUSE_GENESIS_STATE_URL" ]; then
# Check if beacon node data exists to avoid resetting genesis time on a restart
if [ -d /opt/testnet/build/cl/node_"$NODE_NUMBER"/beacon ]; then
echo "Skipping genesis time reset"
else
./reset_genesis_time.sh ./reset_genesis_time.sh
fi
else else
while [ 1 -eq 1 ]; do while [ 1 -eq 1 ]; do
echo "Waiting on Genesis time ..." echo "Waiting on Genesis time ..."
@ -54,10 +72,9 @@ else
echo -n "$JWT" > $JWTSECRET echo -n "$JWT" > $JWTSECRET
./beacon_node.sh 2>&1 | tee /var/log/lighthouse_bn.log & ./beacon_node.sh 2>&1 | tee /var/log/lighthouse_bn.log &
lpid=$! beacon_pid=$!
./validator_client.sh 2>&1 | tee /var/log/lighthouse_vc.log & ./validator_client.sh 2>&1 | tee /var/log/lighthouse_vc.log &
vpid=$! validator_pid=$!
wait $lpid $vpid wait $beacon_pid $validator_pid
fi fi

View File

@ -3,14 +3,17 @@
Instructions for deploying a local a geth + lighthouse blockchain "fixturenet" for development and testing purposes using laconic-stack-orchestrator (the installation of which is covered [here](https://github.com/cerc-io/stack-orchestrator#user-mode)): Instructions for deploying a local a geth + lighthouse blockchain "fixturenet" for development and testing purposes using laconic-stack-orchestrator (the installation of which is covered [here](https://github.com/cerc-io/stack-orchestrator#user-mode)):
## Clone required repositories ## Clone required repositories
``` ```
$ laconic-so --stack fixturenet-eth setup-repositories $ laconic-so --stack fixturenet-eth setup-repositories
``` ```
## Build the fixturenet-eth containers ## Build the fixturenet-eth containers
``` ```
$ laconic-so --stack fixturenet-eth build-containers $ laconic-so --stack fixturenet-eth build-containers
``` ```
This should create several container images in the local image registry: This should create several container images in the local image registry:
* cerc/go-ethereum * cerc/go-ethereum
@ -19,6 +22,7 @@ This should create several container images in the local image registry:
* cerc/fixturenet-eth-lighthouse * cerc/fixturenet-eth-lighthouse
## Deploy the stack ## Deploy the stack
``` ```
$ laconic-so --stack fixturenet-eth deploy up $ laconic-so --stack fixturenet-eth deploy up
``` ```
@ -100,3 +104,21 @@ keycloak-nginx-1 0.0.0.0:55859->80/tcp
migrations-1 migrations-1
tx-spammer-1 tx-spammer-1
``` ```
## Clean up
Stop all services running in the background:
```bash
$ laconic-so --stack fixturenet-eth deploy down
```
Clear volumes created by this stack:
```bash
# List all relevant volumes
$ docker volume ls -q --filter "name=.*fixturenet_eth_bootnode_geth_data|.*fixturenet_eth_bootnode_lighthouse_data|.*fixturenet_eth_geth_1_data|.*fixturenet_eth_geth_2_data|.*fixturenet_eth_lighthouse_1_data|.*fixturenet_eth_lighthouse_2_data|.*fixturenet_geth_accounts"
# Remove all the listed volumes
$ docker volume rm $(docker volume ls -q --filter "name=.*fixturenet_eth_bootnode_geth_data|.*fixturenet_eth_bootnode_lighthouse_data|.*fixturenet_eth_geth_1_data|.*fixturenet_eth_geth_2_data|.*fixturenet_eth_lighthouse_1_data|.*fixturenet_eth_lighthouse_2_data|.*fixturenet_geth_accounts")
```