swarm/network: WIP consider all nodes for healthy iteration (#19155)

* swarm/network: WIP consider all nodes for healthy iteration

* swarm/network/simulation: extend TestWaitTillHealthy to really check kads are healthy

* cmd/swarm/swarm-snapshot: fixed bugs in snapshot creation binary

* swarm/network/simulation: addressed PR comments

* swarm/network/simulation: defer sim.Clsoe()

* swarm/network/simulation: fixed wrong sim.Close()

* swarm/network/simulation: addressed PR comments

* cmd/swarm/swarm-snapshot: reducing default to 8 nodes, more to 4

* cmd/swarm/swarm-snapshot: extended timeout to 3 mins, or 256 nodes snapshot times out

* swarm/network/simulation: More PR comments
This commit is contained in:
lash 2019-02-28 08:12:50 +01:00 committed by Viktor Trón
parent 505a49e689
commit 62d9d63858
5 changed files with 113 additions and 29 deletions

View File

@ -59,13 +59,16 @@ func createSnapshot(filename string, nodes int, services []string) (err error) {
log.Debug("create snapshot", "filename", filename, "nodes", nodes, "services", services)
sim := simulation.New(map[string]simulation.ServiceFunc{
"bzz": func(ctx *adapters.ServiceContext, b *sync.Map) (node.Service, func(), error) {
"bzz": func(ctx *adapters.ServiceContext, bucket *sync.Map) (node.Service, func(), error) {
addr := network.NewAddr(ctx.Config.Node())
kad := network.NewKademlia(addr.Over(), network.NewKadParams())
hp := network.NewHiveParams()
hp.KeepAliveInterval = time.Duration(200) * time.Millisecond
hp.Discovery = true // discovery must be enabled when creating a snapshot
// store the kademlia in the bucket, needed later in the WaitTillHealthy function
bucket.Store(simulation.BucketKeyKademlia, kad)
config := &network.BzzConfig{
OverlayAddr: addr.Over(),
UnderlayAddr: addr.Under(),
@ -76,17 +79,17 @@ func createSnapshot(filename string, nodes int, services []string) (err error) {
})
defer sim.Close()
_, err = sim.AddNodes(nodes)
ids, err := sim.AddNodes(nodes)
if err != nil {
return fmt.Errorf("add nodes: %v", err)
}
err = sim.Net.ConnectNodesRing(nil)
err = sim.Net.ConnectNodesRing(ids)
if err != nil {
return fmt.Errorf("connect nodes: %v", err)
}
ctx, cancelSimRun := context.WithTimeout(context.Background(), 2*time.Minute)
ctx, cancelSimRun := context.WithTimeout(context.Background(), 3*time.Minute)
defer cancelSimRun()
if _, err := sim.WaitTillHealthy(ctx); err != nil {
return fmt.Errorf("wait for healthy kademlia: %v", err)

View File

@ -48,7 +48,7 @@ func TestSnapshotCreate(t *testing.T) {
},
{
name: "more nodes",
nodes: defaultNodes + 5,
nodes: defaultNodes + 4,
},
{
name: "services",
@ -81,7 +81,7 @@ func TestSnapshotCreate(t *testing.T) {
}
testCmd := runSnapshot(t, append(args, file.Name())...)
testCmd.ExpectExit()
testCmd.WaitExit()
if code := testCmd.ExitStatus(); code != 0 {
t.Fatalf("command exit code %v, expected 0", code)
}

View File

@ -27,7 +27,7 @@ import (
var gitCommit string // Git SHA1 commit hash of the release (set via linker flags)
// default value for "create" command --nodes flag
const defaultNodes = 10
const defaultNodes = 8
func main() {
err := newApp().Run(os.Args)

View File

@ -58,7 +58,7 @@ func (s *Simulation) WaitTillHealthy(ctx context.Context) (ill map[enode.ID]*net
for k := range ill {
delete(ill, k)
}
log.Debug("kademlia health check", "addr count", len(addrs))
log.Debug("kademlia health check", "addr count", len(addrs), "kad len", len(kademlias))
for id, k := range kademlias {
//PeerPot for this node
addr := common.Bytes2Hex(k.BaseAddr())
@ -70,7 +70,7 @@ func (s *Simulation) WaitTillHealthy(ctx context.Context) (ill map[enode.ID]*net
log.Debug("kademlia", "connectNN", h.ConnectNN, "knowNN", h.KnowNN)
log.Debug("kademlia", "health", h.ConnectNN && h.KnowNN, "addr", hex.EncodeToString(k.BaseAddr()), "node", id)
log.Debug("kademlia", "ill condition", !h.ConnectNN, "addr", hex.EncodeToString(k.BaseAddr()), "node", id)
if !h.ConnectNN {
if !h.Healthy() {
ill[id] = k
}
}
@ -85,6 +85,7 @@ func (s *Simulation) WaitTillHealthy(ctx context.Context) (ill map[enode.ID]*net
// in simulation bucket.
func (s *Simulation) kademlias() (ks map[enode.ID]*network.Kademlia) {
items := s.UpNodesItems(BucketKeyKademlia)
log.Debug("kademlia len items", "len", len(items))
ks = make(map[enode.ID]*network.Kademlia, len(items))
for id, v := range items {
k, ok := v.(*network.Kademlia)

View File

@ -22,16 +22,115 @@ import (
"testing"
"time"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/node"
"github.com/ethereum/go-ethereum/p2p/simulations/adapters"
"github.com/ethereum/go-ethereum/swarm/network"
)
/*
TestWaitTillHealthy tests that we indeed get a healthy network after we wait for it.
For this to be tested, a bit of a snake tail bite needs to happen:
* First we create a first simulation
* Run it as nodes connected in a ring
* Wait until the network is healthy
* Then we create a snapshot
* With this snapshot we create a new simulation
* This simulation is expected to have a healthy configuration, as it uses the snapshot
* Thus we just iterate all nodes and check that their kademlias are healthy
* If all kademlias are healthy, the test succeeded, otherwise it failed
*/
func TestWaitTillHealthy(t *testing.T) {
sim := New(map[string]ServiceFunc{
testNodesNum := 10
// create the first simulation
sim := New(createSimServiceMap(true))
// connect and...
nodeIDs, err := sim.AddNodesAndConnectRing(testNodesNum)
if err != nil {
t.Fatal(err)
}
// array of all overlay addresses
var addrs [][]byte
// iterate once to be able to build the peer map
for _, node := range nodeIDs {
//get the kademlia overlay address from this ID
a := node.Bytes()
//append it to the array of all overlay addresses
addrs = append(addrs, a)
}
// build a PeerPot only once
pp := network.NewPeerPotMap(network.NewKadParams().NeighbourhoodSize, addrs)
ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
defer cancel()
// ...wait until healthy
ill, err := sim.WaitTillHealthy(ctx)
if err != nil {
for id, kad := range ill {
t.Log("Node", id)
t.Log(kad.String())
}
t.Fatal(err)
}
// now create a snapshot of this network
snap, err := sim.Net.Snapshot()
if err != nil {
t.Fatal(err)
}
// close the initial simulation
sim.Close()
// create a control simulation
controlSim := New(createSimServiceMap(false))
defer controlSim.Close()
// load the snapshot into this control simulation
err = controlSim.Net.Load(snap)
if err != nil {
t.Fatal(err)
}
_, err = controlSim.WaitTillHealthy(ctx)
if err != nil {
t.Fatal(err)
}
for _, node := range nodeIDs {
// ...get its kademlia
item, ok := controlSim.NodeItem(node, BucketKeyKademlia)
if !ok {
t.Fatal("No kademlia bucket item")
}
kad := item.(*network.Kademlia)
// get its base address
kid := common.Bytes2Hex(kad.BaseAddr())
//get the health info
info := kad.GetHealthInfo(pp[kid])
log.Trace("Health info", "info", info)
// check that it is healthy
healthy := info.Healthy()
if !healthy {
t.Fatalf("Expected node %v of control simulation to be healthy, but it is not, unhealthy kademlias: %v", node, kad.String())
}
}
}
// createSimServiceMap returns the services map
// this function will create the sim services with or without discovery enabled
// based on the flag passed
func createSimServiceMap(discovery bool) map[string]ServiceFunc {
return map[string]ServiceFunc{
"bzz": func(ctx *adapters.ServiceContext, b *sync.Map) (node.Service, func(), error) {
addr := network.NewAddr(ctx.Config.Node())
hp := network.NewHiveParams()
hp.Discovery = discovery
config := &network.BzzConfig{
OverlayAddr: addr.Over(),
UnderlayAddr: addr.Under(),
@ -43,24 +142,5 @@ func TestWaitTillHealthy(t *testing.T) {
b.Store(BucketKeyKademlia, kad)
return network.NewBzz(config, kad, nil, nil, nil), nil, nil
},
})
defer sim.Close()
_, err := sim.AddNodesAndConnectRing(10)
if err != nil {
t.Fatal(err)
}
ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
defer cancel()
ill, err := sim.WaitTillHealthy(ctx)
if err != nil {
for id, kad := range ill {
t.Log("Node", id)
t.Log(kad.String())
}
if err != nil {
t.Fatal(err)
}
}
}