swarm/network: Saturation check for healthy networks (#19071)

* swarm/network: new saturation for  implementation

* swarm/network: re-added saturation func in Kademlia as it is used elsewhere

* swarm/network: saturation with higher MinBinSize

* swarm/network: PeersPerBin with depth check

* swarm/network: edited tests to pass new saturated check

* swarm/network: minor fix saturated check

* swarm/network/simulations/discovery: fixed renamed RPC call

* swarm/network: renamed to isSaturated and returns bool

* swarm/network: early depth check
This commit is contained in:
holisticode 2019-02-14 13:01:50 -05:00 committed by Viktor Trón
parent fab8c5a1cd
commit 2af24724dd
4 changed files with 179 additions and 29 deletions

View File

@ -628,7 +628,8 @@ func (k *Kademlia) string() string {
// used for testing only // used for testing only
// TODO move to separate testing tools file // TODO move to separate testing tools file
type PeerPot struct { type PeerPot struct {
NNSet [][]byte NNSet [][]byte
PeersPerBin []int
} }
// NewPeerPotMap creates a map of pot record of *BzzAddr with keys // NewPeerPotMap creates a map of pot record of *BzzAddr with keys
@ -654,6 +655,7 @@ func NewPeerPotMap(neighbourhoodSize int, addrs [][]byte) map[string]*PeerPot {
// all nn-peers // all nn-peers
var nns [][]byte var nns [][]byte
peersPerBin := make([]int, depth)
// iterate through the neighbours, going from the deepest to the shallowest // iterate through the neighbours, going from the deepest to the shallowest
np.EachNeighbour(a, Pof, func(val pot.Val, po int) bool { np.EachNeighbour(a, Pof, func(val pot.Val, po int) bool {
@ -667,14 +669,18 @@ func NewPeerPotMap(neighbourhoodSize int, addrs [][]byte) map[string]*PeerPot {
// a neighbor is any peer in or deeper than the depth // a neighbor is any peer in or deeper than the depth
if po >= depth { if po >= depth {
nns = append(nns, addr) nns = append(nns, addr)
return true } else {
// for peers < depth, we just count the number in each bin
// the bin is the index of the slice
peersPerBin[po]++
} }
return false return true
}) })
log.Trace(fmt.Sprintf("%x PeerPotMap NNS: %s", addrs[i][:4], LogAddrs(nns))) log.Trace(fmt.Sprintf("%x PeerPotMap NNS: %s, peersPerBin", addrs[i][:4], LogAddrs(nns)))
ppmap[common.Bytes2Hex(a)] = &PeerPot{ ppmap[common.Bytes2Hex(a)] = &PeerPot{
NNSet: nns, NNSet: nns,
PeersPerBin: peersPerBin,
} }
} }
return ppmap return ppmap
@ -698,6 +704,39 @@ func (k *Kademlia) saturation() int {
return prev return prev
} }
// isSaturated returns true if the kademlia is considered saturated, or false if not.
// It checks this by checking an array of ints called unsaturatedBins; each item in that array corresponds
// to the bin which is unsaturated (number of connections < k.MinBinSize).
// The bin is considered unsaturated only if there are actual peers in that PeerPot's bin (peersPerBin)
// (if there is no peer for a given bin, then no connection could ever be established;
// in a God's view this is relevant as no more peers will ever appear on that bin)
func (k *Kademlia) isSaturated(peersPerBin []int, depth int) bool {
// depth could be calculated from k but as this is called from `GetHealthInfo()`,
// the depth has already been calculated so we can require it as a parameter
// early check for depth
if depth != len(peersPerBin) {
return false
}
unsaturatedBins := make([]int, 0)
k.conns.EachBin(k.base, Pof, 0, func(po, size int, f func(func(val pot.Val) bool) bool) bool {
if po >= depth {
return false
}
log.Trace("peers per bin", "peersPerBin[po]", peersPerBin[po], "po", po)
// if there are actually peers in the PeerPot who can fulfill k.MinBinSize
if size < k.MinBinSize && size < peersPerBin[po] {
log.Trace("connections for po", "po", po, "size", size)
unsaturatedBins = append(unsaturatedBins, po)
}
return true
})
log.Trace("list of unsaturated bins", "unsaturatedBins", unsaturatedBins)
return len(unsaturatedBins) == 0
}
// knowNeighbours tests if all neighbours in the peerpot // knowNeighbours tests if all neighbours in the peerpot
// are found among the peers known to the kademlia // are found among the peers known to the kademlia
// It is used in Healthy function for testing only // It is used in Healthy function for testing only
@ -780,11 +819,13 @@ type Health struct {
ConnectNN bool // whether node is connected to all its neighbours ConnectNN bool // whether node is connected to all its neighbours
CountConnectNN int // amount of neighbours connected to CountConnectNN int // amount of neighbours connected to
MissingConnectNN [][]byte // which neighbours we should have been connected to but we're not MissingConnectNN [][]byte // which neighbours we should have been connected to but we're not
Saturated bool // whether we are connected to all the peers we would have liked to // Saturated: if in all bins < depth number of connections >= MinBinsize or,
Hive string // if number of connections < MinBinSize, to the number of available peers in that bin
Saturated bool
Hive string
} }
// Healthy reports the health state of the kademlia connectivity // GetHealthInfo reports the health state of the kademlia connectivity
// //
// The PeerPot argument provides an all-knowing view of the network // The PeerPot argument provides an all-knowing view of the network
// The resulting Health object is a result of comparisons between // The resulting Health object is a result of comparisons between
@ -792,7 +833,7 @@ type Health struct {
// what SHOULD it have been when we take all we know about the network into consideration. // what SHOULD it have been when we take all we know about the network into consideration.
// //
// used for testing only // used for testing only
func (k *Kademlia) Healthy(pp *PeerPot) *Health { func (k *Kademlia) GetHealthInfo(pp *PeerPot) *Health {
k.lock.RLock() k.lock.RLock()
defer k.lock.RUnlock() defer k.lock.RUnlock()
if len(pp.NNSet) < k.NeighbourhoodSize { if len(pp.NNSet) < k.NeighbourhoodSize {
@ -801,7 +842,10 @@ func (k *Kademlia) Healthy(pp *PeerPot) *Health {
gotnn, countgotnn, culpritsgotnn := k.connectedNeighbours(pp.NNSet) gotnn, countgotnn, culpritsgotnn := k.connectedNeighbours(pp.NNSet)
knownn, countknownn, culpritsknownn := k.knowNeighbours(pp.NNSet) knownn, countknownn, culpritsknownn := k.knowNeighbours(pp.NNSet)
depth := depthForPot(k.conns, k.NeighbourhoodSize, k.base) depth := depthForPot(k.conns, k.NeighbourhoodSize, k.base)
saturated := k.saturation() < depth
// check saturation
saturated := k.isSaturated(pp.PeersPerBin, depth)
log.Trace(fmt.Sprintf("%08x: healthy: knowNNs: %v, gotNNs: %v, saturated: %v\n", k.base, knownn, gotnn, saturated)) log.Trace(fmt.Sprintf("%08x: healthy: knowNNs: %v, gotNNs: %v, saturated: %v\n", k.base, knownn, gotnn, saturated))
return &Health{ return &Health{
KnowNN: knownn, KnowNN: knownn,
@ -814,3 +858,13 @@ func (k *Kademlia) Healthy(pp *PeerPot) *Health {
Hive: k.string(), Hive: k.string(),
} }
} }
// Healthy return the strict interpretation of `Healthy` given a `Health` struct
// definition of strict health: all conditions must be true:
// - we at least know one peer
// - we know all neighbors
// - we are connected to all known neighbors
// - it is saturated
func (h *Health) Healthy() bool {
return h.KnowNN && h.ConnectNN && h.CountKnowNN > 0 && h.Saturated
}

View File

@ -168,6 +168,46 @@ func TestNeighbourhoodDepth(t *testing.T) {
testNum++ testNum++
} }
// TestHighMinBinSize tests that the saturation function also works
// if MinBinSize is > 2, the connection count is < k.MinBinSize
// and there are more peers available than connected
func TestHighMinBinSize(t *testing.T) {
// a function to test for different MinBinSize values
testKad := func(minBinSize int) {
// create a test kademlia
tk := newTestKademlia(t, "11111111")
// set its MinBinSize to desired value
tk.KadParams.MinBinSize = minBinSize
// add a couple of peers (so we have NN and depth)
tk.On("00000000") // bin 0
tk.On("11100000") // bin 3
tk.On("11110000") // bin 4
first := "10000000" // add a first peer at bin 1
tk.Register(first) // register it
// we now have one registered peer at bin 1;
// iterate and connect one peer at each iteration;
// should be unhealthy until at minBinSize - 1
// we connect the unconnected but registered peer
for i := 1; i < minBinSize; i++ {
peer := fmt.Sprintf("1000%b", 8|i)
tk.On(peer)
if i == minBinSize-1 {
tk.On(first)
tk.checkHealth(true)
return
}
tk.checkHealth(false)
}
}
// test MinBinSizes of 3 to 5
testMinBinSizes := []int{3, 4, 5}
for _, k := range testMinBinSizes {
testKad(k)
}
}
// TestHealthStrict tests the simplest definition of health // TestHealthStrict tests the simplest definition of health
// Which means whether we are connected to all neighbors we know of // Which means whether we are connected to all neighbors we know of
func TestHealthStrict(t *testing.T) { func TestHealthStrict(t *testing.T) {
@ -176,60 +216,116 @@ func TestHealthStrict(t *testing.T) {
// no peers // no peers
// unhealthy (and lonely) // unhealthy (and lonely)
tk := newTestKademlia(t, "11111111") tk := newTestKademlia(t, "11111111")
tk.checkHealth(false, false) tk.checkHealth(false)
// know one peer but not connected // know one peer but not connected
// unhealthy // unhealthy
tk.Register("11100000") tk.Register("11100000")
tk.checkHealth(false, false) tk.checkHealth(false)
// know one peer and connected // know one peer and connected
// healthy // unhealthy: not saturated
tk.On("11100000") tk.On("11100000")
tk.checkHealth(true, false) tk.checkHealth(true)
// know two peers, only one connected // know two peers, only one connected
// unhealthy // unhealthy
tk.Register("11111100") tk.Register("11111100")
tk.checkHealth(false, false) tk.checkHealth(false)
// know two peers and connected to both // know two peers and connected to both
// healthy // healthy
tk.On("11111100") tk.On("11111100")
tk.checkHealth(true, false) tk.checkHealth(true)
// know three peers, connected to the two deepest // know three peers, connected to the two deepest
// healthy // healthy
tk.Register("00000000") tk.Register("00000000")
tk.checkHealth(true, false) tk.checkHealth(false)
// know three peers, connected to all three // know three peers, connected to all three
// healthy // healthy
tk.On("00000000") tk.On("00000000")
tk.checkHealth(true, false) tk.checkHealth(true)
// add fourth peer deeper than current depth // add fourth peer deeper than current depth
// unhealthy // unhealthy
tk.Register("11110000") tk.Register("11110000")
tk.checkHealth(false, false) tk.checkHealth(false)
// connected to three deepest peers // connected to three deepest peers
// healthy // healthy
tk.On("11110000") tk.On("11110000")
tk.checkHealth(true, false) tk.checkHealth(true)
// add additional peer in same bin as deepest peer // add additional peer in same bin as deepest peer
// unhealthy // unhealthy
tk.Register("11111101") tk.Register("11111101")
tk.checkHealth(false, false) tk.checkHealth(false)
// four deepest of five peers connected // four deepest of five peers connected
// healthy // healthy
tk.On("11111101") tk.On("11111101")
tk.checkHealth(true, false) tk.checkHealth(true)
// add additional peer in bin 0
// unhealthy: unsaturated bin 0, 2 known but 1 connected
tk.Register("00000001")
tk.checkHealth(false)
// Connect second in bin 0
// healthy
tk.On("00000001")
tk.checkHealth(true)
// add peer in bin 1
// unhealthy, as it is known but not connected
tk.Register("10000000")
tk.checkHealth(false)
// connect peer in bin 1
// depth change, is now 1
// healthy, 1 peer in bin 1 known and connected
tk.On("10000000")
tk.checkHealth(true)
// add second peer in bin 1
// unhealthy, as it is known but not connected
tk.Register("10000001")
tk.checkHealth(false)
// connect second peer in bin 1
// healthy,
tk.On("10000001")
tk.checkHealth(true)
// connect third peer in bin 1
// healthy,
tk.On("10000011")
tk.checkHealth(true)
// add peer in bin 2
// unhealthy, no depth change
tk.Register("11000000")
tk.checkHealth(false)
// connect peer in bin 2
// depth change - as we already have peers in bin 3 and 4,
// we have contiguous bins, no bin < po 5 is empty -> depth 5
// healthy, every bin < depth has the max available peers,
// even if they are < MinBinSize
tk.On("11000000")
tk.checkHealth(true)
// add peer in bin 2
// unhealthy, peer bin is below depth 5 but
// has more available peers (2) than connected ones (1)
// --> unsaturated
tk.Register("11000011")
tk.checkHealth(false)
} }
func (tk *testKademlia) checkHealth(expectHealthy bool, expectSaturation bool) { func (tk *testKademlia) checkHealth(expectHealthy bool) {
tk.t.Helper() tk.t.Helper()
kid := common.Bytes2Hex(tk.BaseAddr()) kid := common.Bytes2Hex(tk.BaseAddr())
addrs := [][]byte{tk.BaseAddr()} addrs := [][]byte{tk.BaseAddr()}
@ -239,13 +335,13 @@ func (tk *testKademlia) checkHealth(expectHealthy bool, expectSaturation bool) {
}) })
pp := NewPeerPotMap(tk.NeighbourhoodSize, addrs) pp := NewPeerPotMap(tk.NeighbourhoodSize, addrs)
healthParams := tk.Healthy(pp[kid]) healthParams := tk.GetHealthInfo(pp[kid])
// definition of health, all conditions but be true: // definition of health, all conditions but be true:
// - we at least know one peer // - we at least know one peer
// - we know all neighbors // - we know all neighbors
// - we are connected to all known neighbors // - we are connected to all known neighbors
health := healthParams.KnowNN && healthParams.ConnectNN && healthParams.CountKnowNN > 0 health := healthParams.Healthy()
if expectHealthy != health { if expectHealthy != health {
tk.t.Fatalf("expected kademlia health %v, is %v\n%v", expectHealthy, health, tk.String()) tk.t.Fatalf("expected kademlia health %v, is %v\n%v", expectHealthy, health, tk.String())
} }

View File

@ -64,7 +64,7 @@ func (s *Simulation) WaitTillHealthy(ctx context.Context) (ill map[enode.ID]*net
addr := common.Bytes2Hex(k.BaseAddr()) addr := common.Bytes2Hex(k.BaseAddr())
pp := ppmap[addr] pp := ppmap[addr]
//call Healthy RPC //call Healthy RPC
h := k.Healthy(pp) h := k.GetHealthInfo(pp)
//print info //print info
log.Debug(k.String()) log.Debug(k.String())
log.Debug("kademlia", "connectNN", h.ConnectNN, "knowNN", h.KnowNN) log.Debug("kademlia", "connectNN", h.ConnectNN, "knowNN", h.KnowNN)

View File

@ -267,7 +267,7 @@ func discoverySimulation(nodes, conns int, adapter adapters.NodeAdapter) (*simul
} }
healthy := &network.Health{} healthy := &network.Health{}
if err := client.Call(&healthy, "hive_healthy", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil { if err := client.Call(&healthy, "hive_getHealthInfo", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil {
return false, fmt.Errorf("error getting node health: %s", err) return false, fmt.Errorf("error getting node health: %s", err)
} }
log.Debug(fmt.Sprintf("node %4s healthy: connected nearest neighbours: %v, know nearest neighbours: %v,\n\n%v", id, healthy.ConnectNN, healthy.KnowNN, healthy.Hive)) log.Debug(fmt.Sprintf("node %4s healthy: connected nearest neighbours: %v, know nearest neighbours: %v,\n\n%v", id, healthy.ConnectNN, healthy.KnowNN, healthy.Hive))
@ -352,7 +352,7 @@ func discoveryPersistenceSimulation(nodes, conns int, adapter adapters.NodeAdapt
healthy := &network.Health{} healthy := &network.Health{}
addr := id.String() addr := id.String()
ppmap := network.NewPeerPotMap(network.NewKadParams().NeighbourhoodSize, addrs) ppmap := network.NewPeerPotMap(network.NewKadParams().NeighbourhoodSize, addrs)
if err := client.Call(&healthy, "hive_healthy", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil { if err := client.Call(&healthy, "hive_getHealthInfo", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil {
return fmt.Errorf("error getting node health: %s", err) return fmt.Errorf("error getting node health: %s", err)
} }
@ -422,7 +422,7 @@ func discoveryPersistenceSimulation(nodes, conns int, adapter adapters.NodeAdapt
healthy := &network.Health{} healthy := &network.Health{}
ppmap := network.NewPeerPotMap(network.NewKadParams().NeighbourhoodSize, addrs) ppmap := network.NewPeerPotMap(network.NewKadParams().NeighbourhoodSize, addrs)
if err := client.Call(&healthy, "hive_healthy", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil { if err := client.Call(&healthy, "hive_getHealthInfo", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil {
return false, fmt.Errorf("error getting node health: %s", err) return false, fmt.Errorf("error getting node health: %s", err)
} }
log.Info(fmt.Sprintf("node %4s healthy: got nearest neighbours: %v, know nearest neighbours: %v", id, healthy.ConnectNN, healthy.KnowNN)) log.Info(fmt.Sprintf("node %4s healthy: got nearest neighbours: %v, know nearest neighbours: %v", id, healthy.ConnectNN, healthy.KnowNN))