[WIP] Handle restarts in fixturenet-eth stack (#324)
* Use mounted volumes for data in geth nodes * Use mounted volumes for data in lighthouse nodes * Avoid resetting genesis time in a lighthouse node on restart * Mount parent datadir for lighthouse nodes * Trap signals on shutdown and clean up in lighthouse nodes * Allow stalled sync in lighthouse beacon nodes * Gracefully shutdown geth nodes * Add clean up instructions * Gracefully shutdown lighthouse boot node
This commit is contained in:
parent
cf79f0de0a
commit
3130af1615
@ -8,6 +8,8 @@ services:
|
||||
environment:
|
||||
RUN_BOOTNODE: "true"
|
||||
image: cerc/fixturenet-eth-geth:local
|
||||
volumes:
|
||||
- fixturenet_eth_bootnode_geth_data:/root/ethdata
|
||||
ports:
|
||||
- "9898"
|
||||
- "30303"
|
||||
@ -26,6 +28,7 @@ services:
|
||||
image: cerc/fixturenet-eth-geth:local
|
||||
volumes:
|
||||
- fixturenet_geth_accounts:/opt/testnet/build/el
|
||||
- fixturenet_eth_geth_1_data:/root/ethdata
|
||||
healthcheck:
|
||||
test: ["CMD", "nc", "-v", "localhost", "8545"]
|
||||
interval: 30s
|
||||
@ -52,12 +55,16 @@ services:
|
||||
image: cerc/fixturenet-eth-geth:local
|
||||
depends_on:
|
||||
- fixturenet-eth-bootnode-geth
|
||||
volumes:
|
||||
- fixturenet_eth_geth_2_data:/root/ethdata
|
||||
|
||||
fixturenet-eth-bootnode-lighthouse:
|
||||
hostname: fixturenet-eth-bootnode-lighthouse
|
||||
environment:
|
||||
RUN_BOOTNODE: "true"
|
||||
image: cerc/fixturenet-eth-lighthouse:local
|
||||
volumes:
|
||||
- fixturenet_eth_bootnode_lighthouse_data:/opt/testnet/build/cl
|
||||
|
||||
fixturenet-eth-lighthouse-1:
|
||||
hostname: fixturenet-eth-lighthouse-1
|
||||
@ -74,6 +81,8 @@ services:
|
||||
ETH1_ENDPOINT: "http://fixturenet-eth-geth-1:8545"
|
||||
EXECUTION_ENDPOINT: "http://fixturenet-eth-geth-1:8551"
|
||||
image: cerc/fixturenet-eth-lighthouse:local
|
||||
volumes:
|
||||
- fixturenet_eth_lighthouse_1_data:/opt/testnet/build/cl
|
||||
depends_on:
|
||||
fixturenet-eth-bootnode-lighthouse:
|
||||
condition: service_started
|
||||
@ -98,6 +107,8 @@ services:
|
||||
EXECUTION_ENDPOINT: "http://fixturenet-eth-geth-2:8551"
|
||||
LIGHTHOUSE_GENESIS_STATE_URL: "http://fixturenet-eth-lighthouse-1:8001/eth/v2/debug/beacon/states/0"
|
||||
image: cerc/fixturenet-eth-lighthouse:local
|
||||
volumes:
|
||||
- fixturenet_eth_lighthouse_2_data:/opt/testnet/build/cl
|
||||
depends_on:
|
||||
fixturenet-eth-bootnode-lighthouse:
|
||||
condition: service_started
|
||||
@ -106,3 +117,9 @@ services:
|
||||
|
||||
volumes:
|
||||
fixturenet_geth_accounts:
|
||||
fixturenet_eth_bootnode_geth_data:
|
||||
fixturenet_eth_geth_1_data:
|
||||
fixturenet_eth_geth_2_data:
|
||||
fixturenet_eth_bootnode_lighthouse_data:
|
||||
fixturenet_eth_lighthouse_1_data:
|
||||
fixturenet_eth_lighthouse_2_data:
|
||||
|
@ -22,6 +22,6 @@ COPY run-el.sh /opt/testnet/run.sh
|
||||
RUN cd /opt/testnet && make genesis-el
|
||||
|
||||
COPY --from=geth /usr/local/bin/geth /usr/local/bin/
|
||||
RUN geth init /opt/testnet/build/el/geth.json && rm -f ~/.ethereum/geth/nodekey
|
||||
RUN geth --datadir ~/ethdata init /opt/testnet/build/el/geth.json && rm -f ~/ethdata/geth/nodekey
|
||||
|
||||
ENTRYPOINT ["/opt/testnet/run.sh"]
|
||||
|
@ -12,6 +12,6 @@ for line in `cat ../build/el/accounts.csv`; do
|
||||
|
||||
echo ""
|
||||
echo "$ADDRESS"
|
||||
geth account import --password .pw.$$ .key.$$
|
||||
geth account import --datadir=~/ethdata --password .pw.$$ .key.$$
|
||||
rm -f .pw.$$ .key.$$
|
||||
done
|
||||
|
@ -18,17 +18,35 @@ if [ "true" == "$CERC_REMOTE_DEBUG" ] && [ -x "/usr/local/bin/dlv" ]; then
|
||||
START_CMD="/usr/local/bin/dlv --listen=:40000 --headless=true --api-version=2 --accept-multiclient exec /usr/local/bin/geth --continue --"
|
||||
fi
|
||||
|
||||
# See https://linuxconfig.org/how-to-propagate-a-signal-to-child-processes-from-a-bash-script
|
||||
cleanup() {
|
||||
echo "Signal received, cleaning up..."
|
||||
|
||||
# Kill the child process first (CERC_REMOTE_DEBUG=true uses dlv which starts geth as a child process)
|
||||
pkill -P ${geth_pid}
|
||||
sleep 2
|
||||
kill $(jobs -p)
|
||||
|
||||
wait
|
||||
echo "Done"
|
||||
}
|
||||
trap 'cleanup' SIGINT SIGTERM
|
||||
|
||||
if [ "true" == "$RUN_BOOTNODE" ]; then
|
||||
$START_CMD \
|
||||
--datadir=~/ethdata \
|
||||
--nodekeyhex="${BOOTNODE_KEY}" \
|
||||
--nodiscover \
|
||||
--ipcdisable \
|
||||
--networkid=${NETWORK_ID} \
|
||||
--netrestrict="${NETRESTRICT}"
|
||||
--netrestrict="${NETRESTRICT}" \
|
||||
&
|
||||
|
||||
geth_pid=$!
|
||||
else
|
||||
cd /opt/testnet/accounts
|
||||
./import_keys.sh
|
||||
|
||||
|
||||
echo -n "$JWT" > /opt/testnet/build/el/jwtsecret
|
||||
|
||||
if [ "$CERC_RUN_STATEDIFF" == "detect" ] && [ -n "$CERC_STATEDIFF_DB_HOST" ]; then
|
||||
@ -74,6 +92,7 @@ else
|
||||
fi
|
||||
|
||||
$START_CMD \
|
||||
--datadir=~/ethdata \
|
||||
--bootnodes="${ENODE}" \
|
||||
--allow-insecure-unlock \
|
||||
--http \
|
||||
@ -101,5 +120,10 @@ else
|
||||
--metrics.addr="0.0.0.0" \
|
||||
--verbosity=${CERC_GETH_VERBOSITY:-3} \
|
||||
--vmodule="${CERC_GETH_VMODULE:-statediff/*=5}" \
|
||||
--miner.etherbase="${ETHERBASE}" ${STATEDIFF_OPTS}
|
||||
--miner.etherbase="${ETHERBASE}" ${STATEDIFF_OPTS} \
|
||||
&
|
||||
|
||||
geth_pid=$!
|
||||
fi
|
||||
|
||||
wait $geth_pid
|
||||
|
@ -19,9 +19,9 @@ http_port=8001
|
||||
authrpc_port=8551
|
||||
|
||||
exec lighthouse \
|
||||
--debug-level $DEBUG_LEVEL \
|
||||
bn \
|
||||
$SUBSCRIBE_ALL_SUBNETS \
|
||||
--debug-level $DEBUG_LEVEL \
|
||||
--boot-nodes "$ENR" \
|
||||
--datadir $data_dir \
|
||||
--testnet-dir $TESTNET_DIR \
|
||||
@ -38,4 +38,5 @@ exec lighthouse \
|
||||
--execution-jwt $JWTSECRET \
|
||||
--terminal-total-difficulty-override $ETH1_TTD \
|
||||
--suggested-fee-recipient $SUGGESTED_FEE_RECIPIENT \
|
||||
--target-peers $((BN_COUNT - 1))
|
||||
--target-peers $((BN_COUNT - 1)) \
|
||||
--http-allow-sync-stalled \
|
||||
|
@ -21,14 +21,22 @@ if [ ! -f "$DATADIR/bootnode/enr.dat" ]; then
|
||||
--udp-port $BOOTNODE_PORT \
|
||||
--tcp-port $BOOTNODE_PORT \
|
||||
--genesis-fork-version $GENESIS_FORK_VERSION \
|
||||
--output-dir $DATADIR/bootnode
|
||||
--output-dir $DATADIR/bootnode-temp
|
||||
|
||||
bootnode_enr=`cat $DATADIR/bootnode/enr.dat`
|
||||
echo "- $bootnode_enr" > $TESTNET_DIR/boot_enr.yaml
|
||||
|
||||
echo "Generated bootnode enr and written to $TESTNET_DIR/boot_enr.yaml"
|
||||
# Output ENR to a temp dir and mv as "lcli generate-bootnode-enr" will not overwrite an empty dir (mounted volume)
|
||||
mkdir -p $DATADIR/bootnode
|
||||
mv $DATADIR/bootnode-temp/* $DATADIR/bootnode
|
||||
rm -r $DATADIR/bootnode-temp
|
||||
|
||||
echo "Generated bootnode enr"
|
||||
else
|
||||
echo "Found existing bootnode enr"
|
||||
fi
|
||||
|
||||
bootnode_enr=`cat $DATADIR/bootnode/enr.dat`
|
||||
echo "- $bootnode_enr" > $TESTNET_DIR/boot_enr.yaml
|
||||
echo "Written bootnode enr to $TESTNET_DIR/boot_enr.yaml"
|
||||
|
||||
exec lighthouse boot_node \
|
||||
--testnet-dir $TESTNET_DIR \
|
||||
--port $BOOTNODE_PORT \
|
||||
|
@ -16,4 +16,4 @@ lcli \
|
||||
$TESTNET_DIR/genesis.ssz \
|
||||
$NOW
|
||||
|
||||
echo "Reset genesis time to now ($NOW)"
|
||||
echo "Reset genesis time to ($NOW)"
|
||||
|
@ -21,9 +21,9 @@ while getopts "pd:" flag; do
|
||||
done
|
||||
|
||||
exec lighthouse \
|
||||
--debug-level $DEBUG_LEVEL \
|
||||
vc \
|
||||
$BUILDER_PROPOSALS \
|
||||
--debug-level $DEBUG_LEVEL \
|
||||
--validators-dir $DATADIR/node_$NODE_NUMBER/validators \
|
||||
--secrets-dir $DATADIR/node_$NODE_NUMBER/secrets \
|
||||
--testnet-dir $TESTNET_DIR \
|
||||
|
@ -1,12 +1,25 @@
|
||||
#!/bin/bash
|
||||
|
||||
if [ "true" == "$RUN_BOOTNODE" ]; then
|
||||
# See https://linuxconfig.org/how-to-propagate-a-signal-to-child-processes-from-a-bash-script
|
||||
cleanup() {
|
||||
echo "Signal received, cleaning up..."
|
||||
kill $(jobs -p)
|
||||
|
||||
wait
|
||||
echo "Done"
|
||||
}
|
||||
trap 'cleanup' SIGINT SIGTERM
|
||||
|
||||
if [ "true" == "$RUN_BOOTNODE" ]; then
|
||||
cd /opt/testnet/build/cl
|
||||
python3 -m http.server 3000 &
|
||||
|
||||
|
||||
cd /opt/testnet/cl
|
||||
./bootnode.sh 2>&1 | tee /var/log/lighthouse_bootnode.log
|
||||
./bootnode.sh 2>&1 | tee /var/log/lighthouse_bootnode.log &
|
||||
bootnode_pid=$!
|
||||
|
||||
wait $bootnode_pid
|
||||
else
|
||||
while [ 1 -eq 1 ]; do
|
||||
echo "Waiting on geth ..."
|
||||
@ -25,7 +38,12 @@ else
|
||||
cd /opt/testnet/cl
|
||||
|
||||
if [ -z "$LIGHTHOUSE_GENESIS_STATE_URL" ]; then
|
||||
./reset_genesis_time.sh
|
||||
# Check if beacon node data exists to avoid resetting genesis time on a restart
|
||||
if [ -d /opt/testnet/build/cl/node_"$NODE_NUMBER"/beacon ]; then
|
||||
echo "Skipping genesis time reset"
|
||||
else
|
||||
./reset_genesis_time.sh
|
||||
fi
|
||||
else
|
||||
while [ 1 -eq 1 ]; do
|
||||
echo "Waiting on Genesis time ..."
|
||||
@ -54,10 +72,9 @@ else
|
||||
echo -n "$JWT" > $JWTSECRET
|
||||
|
||||
./beacon_node.sh 2>&1 | tee /var/log/lighthouse_bn.log &
|
||||
lpid=$!
|
||||
beacon_pid=$!
|
||||
./validator_client.sh 2>&1 | tee /var/log/lighthouse_vc.log &
|
||||
vpid=$!
|
||||
validator_pid=$!
|
||||
|
||||
wait $lpid $vpid
|
||||
wait $beacon_pid $validator_pid
|
||||
fi
|
||||
|
||||
|
@ -3,15 +3,18 @@
|
||||
Instructions for deploying a local a geth + lighthouse blockchain "fixturenet" for development and testing purposes using laconic-stack-orchestrator (the installation of which is covered [here](https://github.com/cerc-io/stack-orchestrator#user-mode)):
|
||||
|
||||
## Clone required repositories
|
||||
|
||||
```
|
||||
$ laconic-so --stack fixturenet-eth setup-repositories
|
||||
```
|
||||
|
||||
## Build the fixturenet-eth containers
|
||||
|
||||
```
|
||||
$ laconic-so --stack fixturenet-eth build-containers
|
||||
```
|
||||
This should create several container images in the local image registry:
|
||||
|
||||
This should create several container images in the local image registry:
|
||||
|
||||
* cerc/go-ethereum
|
||||
* cerc/lighthouse
|
||||
@ -19,6 +22,7 @@ This should create several container images in the local image registry:
|
||||
* cerc/fixturenet-eth-lighthouse
|
||||
|
||||
## Deploy the stack
|
||||
|
||||
```
|
||||
$ laconic-so --stack fixturenet-eth deploy up
|
||||
```
|
||||
@ -57,7 +61,7 @@ Several other containers can used with the basic `fixturenet-eth`:
|
||||
* `eth-probe` (captures eth1 tx gossip)
|
||||
* `keycloak` (nginx proxy with keycloak auth for API authentication)
|
||||
* `tx-spammer` (generates and sends automated transactions to the fixturenet)
|
||||
|
||||
|
||||
It is not necessary to use them all at once, but a complete example follows:
|
||||
|
||||
```
|
||||
@ -99,4 +103,22 @@ keycloak-db-1 0.0.0.0:55850->5432/tcp
|
||||
keycloak-nginx-1 0.0.0.0:55859->80/tcp
|
||||
migrations-1
|
||||
tx-spammer-1
|
||||
```
|
||||
```
|
||||
|
||||
## Clean up
|
||||
|
||||
Stop all services running in the background:
|
||||
|
||||
```bash
|
||||
$ laconic-so --stack fixturenet-eth deploy down
|
||||
```
|
||||
|
||||
Clear volumes created by this stack:
|
||||
|
||||
```bash
|
||||
# List all relevant volumes
|
||||
$ docker volume ls -q --filter "name=.*fixturenet_eth_bootnode_geth_data|.*fixturenet_eth_bootnode_lighthouse_data|.*fixturenet_eth_geth_1_data|.*fixturenet_eth_geth_2_data|.*fixturenet_eth_lighthouse_1_data|.*fixturenet_eth_lighthouse_2_data|.*fixturenet_geth_accounts"
|
||||
|
||||
# Remove all the listed volumes
|
||||
$ docker volume rm $(docker volume ls -q --filter "name=.*fixturenet_eth_bootnode_geth_data|.*fixturenet_eth_bootnode_lighthouse_data|.*fixturenet_eth_geth_1_data|.*fixturenet_eth_geth_2_data|.*fixturenet_eth_lighthouse_1_data|.*fixturenet_eth_lighthouse_2_data|.*fixturenet_geth_accounts")
|
||||
```
|
||||
|
Loading…
Reference in New Issue
Block a user