[WIP] Handle restarts in fixturenet-eth stack (#324)

* Use mounted volumes for data in geth nodes

* Use mounted volumes for data in lighthouse nodes

* Avoid resetting genesis time in a lighthouse node on restart

* Mount parent datadir for lighthouse nodes

* Trap signals on shutdown and clean up in lighthouse nodes

* Allow stalled sync in lighthouse beacon nodes

* Gracefully shutdown geth nodes

* Add clean up instructions

* Gracefully shutdown lighthouse boot node
This commit is contained in:
prathamesh0 2023-04-19 12:22:13 +05:30 committed by GitHub
parent cf79f0de0a
commit 3130af1615
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 113 additions and 24 deletions

View File

@ -8,6 +8,8 @@ services:
environment:
RUN_BOOTNODE: "true"
image: cerc/fixturenet-eth-geth:local
volumes:
- fixturenet_eth_bootnode_geth_data:/root/ethdata
ports:
- "9898"
- "30303"
@ -26,6 +28,7 @@ services:
image: cerc/fixturenet-eth-geth:local
volumes:
- fixturenet_geth_accounts:/opt/testnet/build/el
- fixturenet_eth_geth_1_data:/root/ethdata
healthcheck:
test: ["CMD", "nc", "-v", "localhost", "8545"]
interval: 30s
@ -52,12 +55,16 @@ services:
image: cerc/fixturenet-eth-geth:local
depends_on:
- fixturenet-eth-bootnode-geth
volumes:
- fixturenet_eth_geth_2_data:/root/ethdata
fixturenet-eth-bootnode-lighthouse:
hostname: fixturenet-eth-bootnode-lighthouse
environment:
RUN_BOOTNODE: "true"
image: cerc/fixturenet-eth-lighthouse:local
volumes:
- fixturenet_eth_bootnode_lighthouse_data:/opt/testnet/build/cl
fixturenet-eth-lighthouse-1:
hostname: fixturenet-eth-lighthouse-1
@ -74,6 +81,8 @@ services:
ETH1_ENDPOINT: "http://fixturenet-eth-geth-1:8545"
EXECUTION_ENDPOINT: "http://fixturenet-eth-geth-1:8551"
image: cerc/fixturenet-eth-lighthouse:local
volumes:
- fixturenet_eth_lighthouse_1_data:/opt/testnet/build/cl
depends_on:
fixturenet-eth-bootnode-lighthouse:
condition: service_started
@ -98,6 +107,8 @@ services:
EXECUTION_ENDPOINT: "http://fixturenet-eth-geth-2:8551"
LIGHTHOUSE_GENESIS_STATE_URL: "http://fixturenet-eth-lighthouse-1:8001/eth/v2/debug/beacon/states/0"
image: cerc/fixturenet-eth-lighthouse:local
volumes:
- fixturenet_eth_lighthouse_2_data:/opt/testnet/build/cl
depends_on:
fixturenet-eth-bootnode-lighthouse:
condition: service_started
@ -106,3 +117,9 @@ services:
volumes:
fixturenet_geth_accounts:
fixturenet_eth_bootnode_geth_data:
fixturenet_eth_geth_1_data:
fixturenet_eth_geth_2_data:
fixturenet_eth_bootnode_lighthouse_data:
fixturenet_eth_lighthouse_1_data:
fixturenet_eth_lighthouse_2_data:

View File

@ -22,6 +22,6 @@ COPY run-el.sh /opt/testnet/run.sh
RUN cd /opt/testnet && make genesis-el
COPY --from=geth /usr/local/bin/geth /usr/local/bin/
RUN geth init /opt/testnet/build/el/geth.json && rm -f ~/.ethereum/geth/nodekey
RUN geth --datadir ~/ethdata init /opt/testnet/build/el/geth.json && rm -f ~/ethdata/geth/nodekey
ENTRYPOINT ["/opt/testnet/run.sh"]

View File

@ -12,6 +12,6 @@ for line in `cat ../build/el/accounts.csv`; do
echo ""
echo "$ADDRESS"
geth account import --password .pw.$$ .key.$$
geth account import --datadir=~/ethdata --password .pw.$$ .key.$$
rm -f .pw.$$ .key.$$
done

View File

@ -18,13 +18,31 @@ if [ "true" == "$CERC_REMOTE_DEBUG" ] && [ -x "/usr/local/bin/dlv" ]; then
START_CMD="/usr/local/bin/dlv --listen=:40000 --headless=true --api-version=2 --accept-multiclient exec /usr/local/bin/geth --continue --"
fi
# See https://linuxconfig.org/how-to-propagate-a-signal-to-child-processes-from-a-bash-script
cleanup() {
echo "Signal received, cleaning up..."
# Kill the child process first (CERC_REMOTE_DEBUG=true uses dlv which starts geth as a child process)
pkill -P ${geth_pid}
sleep 2
kill $(jobs -p)
wait
echo "Done"
}
trap 'cleanup' SIGINT SIGTERM
if [ "true" == "$RUN_BOOTNODE" ]; then
$START_CMD \
--datadir=~/ethdata \
--nodekeyhex="${BOOTNODE_KEY}" \
--nodiscover \
--ipcdisable \
--networkid=${NETWORK_ID} \
--netrestrict="${NETRESTRICT}"
--netrestrict="${NETRESTRICT}" \
&
geth_pid=$!
else
cd /opt/testnet/accounts
./import_keys.sh
@ -74,6 +92,7 @@ else
fi
$START_CMD \
--datadir=~/ethdata \
--bootnodes="${ENODE}" \
--allow-insecure-unlock \
--http \
@ -101,5 +120,10 @@ else
--metrics.addr="0.0.0.0" \
--verbosity=${CERC_GETH_VERBOSITY:-3} \
--vmodule="${CERC_GETH_VMODULE:-statediff/*=5}" \
--miner.etherbase="${ETHERBASE}" ${STATEDIFF_OPTS}
--miner.etherbase="${ETHERBASE}" ${STATEDIFF_OPTS} \
&
geth_pid=$!
fi
wait $geth_pid

View File

@ -19,9 +19,9 @@ http_port=8001
authrpc_port=8551
exec lighthouse \
--debug-level $DEBUG_LEVEL \
bn \
$SUBSCRIBE_ALL_SUBNETS \
--debug-level $DEBUG_LEVEL \
--boot-nodes "$ENR" \
--datadir $data_dir \
--testnet-dir $TESTNET_DIR \
@ -38,4 +38,5 @@ exec lighthouse \
--execution-jwt $JWTSECRET \
--terminal-total-difficulty-override $ETH1_TTD \
--suggested-fee-recipient $SUGGESTED_FEE_RECIPIENT \
--target-peers $((BN_COUNT - 1))
--target-peers $((BN_COUNT - 1)) \
--http-allow-sync-stalled \

View File

@ -21,13 +21,21 @@ if [ ! -f "$DATADIR/bootnode/enr.dat" ]; then
--udp-port $BOOTNODE_PORT \
--tcp-port $BOOTNODE_PORT \
--genesis-fork-version $GENESIS_FORK_VERSION \
--output-dir $DATADIR/bootnode
--output-dir $DATADIR/bootnode-temp
# Output ENR to a temp dir and mv as "lcli generate-bootnode-enr" will not overwrite an empty dir (mounted volume)
mkdir -p $DATADIR/bootnode
mv $DATADIR/bootnode-temp/* $DATADIR/bootnode
rm -r $DATADIR/bootnode-temp
echo "Generated bootnode enr"
else
echo "Found existing bootnode enr"
fi
bootnode_enr=`cat $DATADIR/bootnode/enr.dat`
echo "- $bootnode_enr" > $TESTNET_DIR/boot_enr.yaml
echo "Generated bootnode enr and written to $TESTNET_DIR/boot_enr.yaml"
fi
echo "Written bootnode enr to $TESTNET_DIR/boot_enr.yaml"
exec lighthouse boot_node \
--testnet-dir $TESTNET_DIR \

View File

@ -16,4 +16,4 @@ lcli \
$TESTNET_DIR/genesis.ssz \
$NOW
echo "Reset genesis time to now ($NOW)"
echo "Reset genesis time to ($NOW)"

View File

@ -21,9 +21,9 @@ while getopts "pd:" flag; do
done
exec lighthouse \
--debug-level $DEBUG_LEVEL \
vc \
$BUILDER_PROPOSALS \
--debug-level $DEBUG_LEVEL \
--validators-dir $DATADIR/node_$NODE_NUMBER/validators \
--secrets-dir $DATADIR/node_$NODE_NUMBER/secrets \
--testnet-dir $TESTNET_DIR \

View File

@ -1,12 +1,25 @@
#!/bin/bash
# See https://linuxconfig.org/how-to-propagate-a-signal-to-child-processes-from-a-bash-script
cleanup() {
echo "Signal received, cleaning up..."
kill $(jobs -p)
wait
echo "Done"
}
trap 'cleanup' SIGINT SIGTERM
if [ "true" == "$RUN_BOOTNODE" ]; then
cd /opt/testnet/build/cl
python3 -m http.server 3000 &
cd /opt/testnet/cl
./bootnode.sh 2>&1 | tee /var/log/lighthouse_bootnode.log
./bootnode.sh 2>&1 | tee /var/log/lighthouse_bootnode.log &
bootnode_pid=$!
wait $bootnode_pid
else
while [ 1 -eq 1 ]; do
echo "Waiting on geth ..."
@ -25,7 +38,12 @@ else
cd /opt/testnet/cl
if [ -z "$LIGHTHOUSE_GENESIS_STATE_URL" ]; then
# Check if beacon node data exists to avoid resetting genesis time on a restart
if [ -d /opt/testnet/build/cl/node_"$NODE_NUMBER"/beacon ]; then
echo "Skipping genesis time reset"
else
./reset_genesis_time.sh
fi
else
while [ 1 -eq 1 ]; do
echo "Waiting on Genesis time ..."
@ -54,10 +72,9 @@ else
echo -n "$JWT" > $JWTSECRET
./beacon_node.sh 2>&1 | tee /var/log/lighthouse_bn.log &
lpid=$!
beacon_pid=$!
./validator_client.sh 2>&1 | tee /var/log/lighthouse_vc.log &
vpid=$!
validator_pid=$!
wait $lpid $vpid
wait $beacon_pid $validator_pid
fi

View File

@ -3,14 +3,17 @@
Instructions for deploying a local a geth + lighthouse blockchain "fixturenet" for development and testing purposes using laconic-stack-orchestrator (the installation of which is covered [here](https://github.com/cerc-io/stack-orchestrator#user-mode)):
## Clone required repositories
```
$ laconic-so --stack fixturenet-eth setup-repositories
```
## Build the fixturenet-eth containers
```
$ laconic-so --stack fixturenet-eth build-containers
```
This should create several container images in the local image registry:
* cerc/go-ethereum
@ -19,6 +22,7 @@ This should create several container images in the local image registry:
* cerc/fixturenet-eth-lighthouse
## Deploy the stack
```
$ laconic-so --stack fixturenet-eth deploy up
```
@ -100,3 +104,21 @@ keycloak-nginx-1 0.0.0.0:55859->80/tcp
migrations-1
tx-spammer-1
```
## Clean up
Stop all services running in the background:
```bash
$ laconic-so --stack fixturenet-eth deploy down
```
Clear volumes created by this stack:
```bash
# List all relevant volumes
$ docker volume ls -q --filter "name=.*fixturenet_eth_bootnode_geth_data|.*fixturenet_eth_bootnode_lighthouse_data|.*fixturenet_eth_geth_1_data|.*fixturenet_eth_geth_2_data|.*fixturenet_eth_lighthouse_1_data|.*fixturenet_eth_lighthouse_2_data|.*fixturenet_geth_accounts"
# Remove all the listed volumes
$ docker volume rm $(docker volume ls -q --filter "name=.*fixturenet_eth_bootnode_geth_data|.*fixturenet_eth_bootnode_lighthouse_data|.*fixturenet_eth_geth_1_data|.*fixturenet_eth_geth_2_data|.*fixturenet_eth_lighthouse_1_data|.*fixturenet_eth_lighthouse_2_data|.*fixturenet_geth_accounts")
```