589 lines
16 KiB
Go
589 lines
16 KiB
Go
package p2p
|
|
|
|
import (
|
|
"bytes"
|
|
"crypto/ecdsa"
|
|
"crypto/rand"
|
|
"errors"
|
|
"fmt"
|
|
"net"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/ethereum/go-ethereum/logger"
|
|
"github.com/ethereum/go-ethereum/logger/glog"
|
|
"github.com/ethereum/go-ethereum/p2p/discover"
|
|
"github.com/ethereum/go-ethereum/p2p/nat"
|
|
"github.com/ethereum/go-ethereum/rlp"
|
|
)
|
|
|
|
const (
|
|
defaultDialTimeout = 10 * time.Second
|
|
refreshPeersInterval = 30 * time.Second
|
|
staticPeerCheckInterval = 15 * time.Second
|
|
|
|
// This is the maximum number of inbound connection
|
|
// that are allowed to linger between 'accepted' and
|
|
// 'added as peer'.
|
|
maxAcceptConns = 50
|
|
|
|
// Maximum number of concurrently dialing outbound connections.
|
|
maxDialingConns = 50
|
|
|
|
// total timeout for encryption handshake and protocol
|
|
// handshake in both directions.
|
|
handshakeTimeout = 5 * time.Second
|
|
// maximum time allowed for reading a complete message.
|
|
// this is effectively the amount of time a connection can be idle.
|
|
frameReadTimeout = 1 * time.Minute
|
|
// maximum amount of time allowed for writing a complete message.
|
|
frameWriteTimeout = 5 * time.Second
|
|
)
|
|
|
|
var srvjslog = logger.NewJsonLogger()
|
|
|
|
// Server manages all peer connections.
|
|
//
|
|
// The fields of Server are used as configuration parameters.
|
|
// You should set them before starting the Server. Fields may not be
|
|
// modified while the server is running.
|
|
type Server struct {
|
|
// This field must be set to a valid secp256k1 private key.
|
|
PrivateKey *ecdsa.PrivateKey
|
|
|
|
// MaxPeers is the maximum number of peers that can be
|
|
// connected. It must be greater than zero.
|
|
MaxPeers int
|
|
|
|
// Name sets the node name of this server.
|
|
// Use common.MakeName to create a name that follows existing conventions.
|
|
Name string
|
|
|
|
// Bootstrap nodes are used to establish connectivity
|
|
// with the rest of the network.
|
|
BootstrapNodes []*discover.Node
|
|
|
|
// Static nodes are used as pre-configured connections which are always
|
|
// maintained and re-connected on disconnects.
|
|
StaticNodes []*discover.Node
|
|
|
|
// Trusted nodes are used as pre-configured connections which are always
|
|
// allowed to connect, even above the peer limit.
|
|
TrustedNodes []*discover.Node
|
|
|
|
// NodeDatabase is the path to the database containing the previously seen
|
|
// live nodes in the network.
|
|
NodeDatabase string
|
|
|
|
// Protocols should contain the protocols supported
|
|
// by the server. Matching protocols are launched for
|
|
// each peer.
|
|
Protocols []Protocol
|
|
|
|
// If ListenAddr is set to a non-nil address, the server
|
|
// will listen for incoming connections.
|
|
//
|
|
// If the port is zero, the operating system will pick a port. The
|
|
// ListenAddr field will be updated with the actual address when
|
|
// the server is started.
|
|
ListenAddr string
|
|
|
|
// If set to a non-nil value, the given NAT port mapper
|
|
// is used to make the listening port available to the
|
|
// Internet.
|
|
NAT nat.Interface
|
|
|
|
// If Dialer is set to a non-nil value, the given Dialer
|
|
// is used to dial outbound peer connections.
|
|
Dialer *net.Dialer
|
|
|
|
// If NoDial is true, the server will not dial any peers.
|
|
NoDial bool
|
|
|
|
// Hooks for testing. These are useful because we can inhibit
|
|
// the whole protocol stack.
|
|
setupFunc
|
|
newPeerHook
|
|
|
|
ourHandshake *protoHandshake
|
|
|
|
lock sync.RWMutex // protects running, peers and the trust fields
|
|
running bool
|
|
peers map[discover.NodeID]*Peer
|
|
staticNodes map[discover.NodeID]*discover.Node // Map of currently maintained static remote nodes
|
|
staticDial chan *discover.Node // Dial request channel reserved for the static nodes
|
|
staticCycle time.Duration // Overrides staticPeerCheckInterval, used for testing
|
|
trustedNodes map[discover.NodeID]bool // Set of currently trusted remote nodes
|
|
|
|
ntab *discover.Table
|
|
listener net.Listener
|
|
|
|
quit chan struct{}
|
|
loopWG sync.WaitGroup // {dial,listen,nat}Loop
|
|
peerWG sync.WaitGroup // active peer goroutines
|
|
}
|
|
|
|
type setupFunc func(net.Conn, *ecdsa.PrivateKey, *protoHandshake, *discover.Node, bool, map[discover.NodeID]bool) (*conn, error)
|
|
type newPeerHook func(*Peer)
|
|
|
|
// Peers returns all connected peers.
|
|
func (srv *Server) Peers() (peers []*Peer) {
|
|
srv.lock.RLock()
|
|
defer srv.lock.RUnlock()
|
|
for _, peer := range srv.peers {
|
|
if peer != nil {
|
|
peers = append(peers, peer)
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
// PeerCount returns the number of connected peers.
|
|
func (srv *Server) PeerCount() int {
|
|
srv.lock.RLock()
|
|
n := len(srv.peers)
|
|
srv.lock.RUnlock()
|
|
return n
|
|
}
|
|
|
|
// AddPeer connects to the given node and maintains the connection until the
|
|
// server is shut down. If the connection fails for any reason, the server will
|
|
// attempt to reconnect the peer.
|
|
func (srv *Server) AddPeer(node *discover.Node) {
|
|
srv.lock.Lock()
|
|
defer srv.lock.Unlock()
|
|
|
|
srv.staticNodes[node.ID] = node
|
|
}
|
|
|
|
// Broadcast sends an RLP-encoded message to all connected peers.
|
|
// This method is deprecated and will be removed later.
|
|
func (srv *Server) Broadcast(protocol string, code uint64, data interface{}) error {
|
|
return srv.BroadcastLimited(protocol, code, func(i float64) float64 { return i }, data)
|
|
}
|
|
|
|
// BroadcastsRange an RLP-encoded message to a random set of peers using the limit function to limit the amount
|
|
// of peers.
|
|
func (srv *Server) BroadcastLimited(protocol string, code uint64, limit func(float64) float64, data interface{}) error {
|
|
var payload []byte
|
|
if data != nil {
|
|
var err error
|
|
payload, err = rlp.EncodeToBytes(data)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
srv.lock.RLock()
|
|
defer srv.lock.RUnlock()
|
|
|
|
i, max := 0, int(limit(float64(len(srv.peers))))
|
|
for _, peer := range srv.peers {
|
|
if i >= max {
|
|
break
|
|
}
|
|
|
|
if peer != nil {
|
|
var msg = Msg{Code: code}
|
|
if data != nil {
|
|
msg.Payload = bytes.NewReader(payload)
|
|
msg.Size = uint32(len(payload))
|
|
}
|
|
peer.writeProtoMsg(protocol, msg)
|
|
i++
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Start starts running the server.
|
|
// Servers can be re-used and started again after stopping.
|
|
func (srv *Server) Start() (err error) {
|
|
srv.lock.Lock()
|
|
defer srv.lock.Unlock()
|
|
if srv.running {
|
|
return errors.New("server already running")
|
|
}
|
|
glog.V(logger.Info).Infoln("Starting Server")
|
|
|
|
// static fields
|
|
if srv.PrivateKey == nil {
|
|
return fmt.Errorf("Server.PrivateKey must be set to a non-nil key")
|
|
}
|
|
if srv.MaxPeers <= 0 {
|
|
return fmt.Errorf("Server.MaxPeers must be > 0")
|
|
}
|
|
srv.quit = make(chan struct{})
|
|
srv.peers = make(map[discover.NodeID]*Peer)
|
|
|
|
// Create the current trust maps, and the associated dialing channel
|
|
srv.trustedNodes = make(map[discover.NodeID]bool)
|
|
for _, node := range srv.TrustedNodes {
|
|
srv.trustedNodes[node.ID] = true
|
|
}
|
|
srv.staticNodes = make(map[discover.NodeID]*discover.Node)
|
|
for _, node := range srv.StaticNodes {
|
|
srv.staticNodes[node.ID] = node
|
|
}
|
|
srv.staticDial = make(chan *discover.Node)
|
|
|
|
if srv.setupFunc == nil {
|
|
srv.setupFunc = setupConn
|
|
}
|
|
|
|
// node table
|
|
ntab, err := discover.ListenUDP(srv.PrivateKey, srv.ListenAddr, srv.NAT, srv.NodeDatabase)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
srv.ntab = ntab
|
|
|
|
// handshake
|
|
srv.ourHandshake = &protoHandshake{Version: baseProtocolVersion, Name: srv.Name, ID: ntab.Self().ID}
|
|
for _, p := range srv.Protocols {
|
|
srv.ourHandshake.Caps = append(srv.ourHandshake.Caps, p.cap())
|
|
}
|
|
|
|
// listen/dial
|
|
if srv.ListenAddr != "" {
|
|
if err := srv.startListening(); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if srv.Dialer == nil {
|
|
srv.Dialer = &net.Dialer{Timeout: defaultDialTimeout}
|
|
}
|
|
if !srv.NoDial {
|
|
srv.loopWG.Add(1)
|
|
go srv.dialLoop()
|
|
}
|
|
if srv.NoDial && srv.ListenAddr == "" {
|
|
glog.V(logger.Warn).Infoln("I will be kind-of useless, neither dialing nor listening.")
|
|
}
|
|
// maintain the static peers
|
|
go srv.staticNodesLoop()
|
|
|
|
srv.running = true
|
|
return nil
|
|
}
|
|
|
|
func (srv *Server) startListening() error {
|
|
listener, err := net.Listen("tcp", srv.ListenAddr)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
laddr := listener.Addr().(*net.TCPAddr)
|
|
srv.ListenAddr = laddr.String()
|
|
srv.listener = listener
|
|
srv.loopWG.Add(1)
|
|
go srv.listenLoop()
|
|
if !laddr.IP.IsLoopback() && srv.NAT != nil {
|
|
srv.loopWG.Add(1)
|
|
go func() {
|
|
nat.Map(srv.NAT, srv.quit, "tcp", laddr.Port, laddr.Port, "ethereum p2p")
|
|
srv.loopWG.Done()
|
|
}()
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Stop terminates the server and all active peer connections.
|
|
// It blocks until all active connections have been closed.
|
|
func (srv *Server) Stop() {
|
|
srv.lock.Lock()
|
|
if !srv.running {
|
|
srv.lock.Unlock()
|
|
return
|
|
}
|
|
srv.running = false
|
|
srv.lock.Unlock()
|
|
|
|
glog.V(logger.Info).Infoln("Stopping Server")
|
|
srv.ntab.Close()
|
|
if srv.listener != nil {
|
|
// this unblocks listener Accept
|
|
srv.listener.Close()
|
|
}
|
|
close(srv.quit)
|
|
srv.loopWG.Wait()
|
|
|
|
// No new peers can be added at this point because dialLoop and
|
|
// listenLoop are down. It is safe to call peerWG.Wait because
|
|
// peerWG.Add is not called outside of those loops.
|
|
srv.lock.Lock()
|
|
for _, peer := range srv.peers {
|
|
peer.Disconnect(DiscQuitting)
|
|
}
|
|
srv.lock.Unlock()
|
|
srv.peerWG.Wait()
|
|
}
|
|
|
|
// Self returns the local node's endpoint information.
|
|
func (srv *Server) Self() *discover.Node {
|
|
srv.lock.RLock()
|
|
defer srv.lock.RUnlock()
|
|
if !srv.running {
|
|
return &discover.Node{IP: net.ParseIP("0.0.0.0")}
|
|
}
|
|
return srv.ntab.Self()
|
|
}
|
|
|
|
// main loop for adding connections via listening
|
|
func (srv *Server) listenLoop() {
|
|
defer srv.loopWG.Done()
|
|
|
|
// This channel acts as a semaphore limiting
|
|
// active inbound connections that are lingering pre-handshake.
|
|
// If all slots are taken, no further connections are accepted.
|
|
slots := make(chan struct{}, maxAcceptConns)
|
|
for i := 0; i < maxAcceptConns; i++ {
|
|
slots <- struct{}{}
|
|
}
|
|
|
|
glog.V(logger.Info).Infoln("Listening on", srv.listener.Addr())
|
|
for {
|
|
<-slots
|
|
conn, err := srv.listener.Accept()
|
|
if err != nil {
|
|
return
|
|
}
|
|
glog.V(logger.Debug).Infof("Accepted conn %v\n", conn.RemoteAddr())
|
|
srv.peerWG.Add(1)
|
|
go func() {
|
|
srv.startPeer(conn, nil)
|
|
slots <- struct{}{}
|
|
}()
|
|
}
|
|
}
|
|
|
|
// staticNodesLoop is responsible for periodically checking that static
|
|
// connections are actually live, and requests dialing if not.
|
|
func (srv *Server) staticNodesLoop() {
|
|
// Create a default maintenance ticker, but override it requested
|
|
cycle := staticPeerCheckInterval
|
|
if srv.staticCycle != 0 {
|
|
cycle = srv.staticCycle
|
|
}
|
|
tick := time.NewTicker(cycle)
|
|
|
|
for {
|
|
select {
|
|
case <-srv.quit:
|
|
return
|
|
|
|
case <-tick.C:
|
|
// Collect all the non-connected static nodes
|
|
needed := []*discover.Node{}
|
|
srv.lock.RLock()
|
|
for id, node := range srv.staticNodes {
|
|
if _, ok := srv.peers[id]; !ok {
|
|
needed = append(needed, node)
|
|
}
|
|
}
|
|
srv.lock.RUnlock()
|
|
|
|
// Try to dial each of them (don't hang if server terminates)
|
|
for _, node := range needed {
|
|
glog.V(logger.Debug).Infof("Dialing static peer %v", node)
|
|
select {
|
|
case srv.staticDial <- node:
|
|
case <-srv.quit:
|
|
return
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func (srv *Server) dialLoop() {
|
|
var (
|
|
dialed = make(chan *discover.Node)
|
|
dialing = make(map[discover.NodeID]bool)
|
|
findresults = make(chan []*discover.Node)
|
|
refresh = time.NewTimer(0)
|
|
)
|
|
defer srv.loopWG.Done()
|
|
defer refresh.Stop()
|
|
|
|
// Limit the number of concurrent dials
|
|
slots := make(chan struct{}, maxDialingConns)
|
|
for i := 0; i < maxDialingConns; i++ {
|
|
slots <- struct{}{}
|
|
}
|
|
dial := func(dest *discover.Node) {
|
|
// Don't dial nodes that would fail the checks in addPeer.
|
|
// This is important because the connection handshake is a lot
|
|
// of work and we'd rather avoid doing that work for peers
|
|
// that can't be added.
|
|
srv.lock.RLock()
|
|
ok, _ := srv.checkPeer(dest.ID)
|
|
srv.lock.RUnlock()
|
|
if !ok || dialing[dest.ID] {
|
|
return
|
|
}
|
|
// Request a dial slot to prevent CPU exhaustion
|
|
<-slots
|
|
|
|
dialing[dest.ID] = true
|
|
srv.peerWG.Add(1)
|
|
go func() {
|
|
srv.dialNode(dest)
|
|
dialed <- dest
|
|
slots <- struct{}{}
|
|
}()
|
|
}
|
|
|
|
srv.ntab.Bootstrap(srv.BootstrapNodes)
|
|
for {
|
|
select {
|
|
case <-refresh.C:
|
|
// Grab some nodes to connect to if we're not at capacity.
|
|
srv.lock.RLock()
|
|
needpeers := len(srv.peers) < srv.MaxPeers/2
|
|
srv.lock.RUnlock()
|
|
if needpeers {
|
|
go func() {
|
|
var target discover.NodeID
|
|
rand.Read(target[:])
|
|
findresults <- srv.ntab.Lookup(target)
|
|
}()
|
|
} else {
|
|
// Make sure we check again if the peer count falls
|
|
// below MaxPeers.
|
|
refresh.Reset(refreshPeersInterval)
|
|
}
|
|
case dest := <-srv.staticDial:
|
|
dial(dest)
|
|
case dests := <-findresults:
|
|
for _, dest := range dests {
|
|
dial(dest)
|
|
}
|
|
refresh.Reset(refreshPeersInterval)
|
|
case dest := <-dialed:
|
|
delete(dialing, dest.ID)
|
|
if len(dialing) == 0 {
|
|
// Check again immediately after dialing all current candidates.
|
|
refresh.Reset(0)
|
|
}
|
|
case <-srv.quit:
|
|
// TODO: maybe wait for active dials
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
func (srv *Server) dialNode(dest *discover.Node) {
|
|
addr := &net.TCPAddr{IP: dest.IP, Port: int(dest.TCP)}
|
|
glog.V(logger.Debug).Infof("Dialing %v\n", dest)
|
|
conn, err := srv.Dialer.Dial("tcp", addr.String())
|
|
if err != nil {
|
|
// dialLoop adds to the wait group counter when launching
|
|
// dialNode, so we need to count it down again. startPeer also
|
|
// does that when an error occurs.
|
|
srv.peerWG.Done()
|
|
glog.V(logger.Detail).Infof("dial error: %v", err)
|
|
return
|
|
}
|
|
srv.startPeer(conn, dest)
|
|
}
|
|
|
|
func (srv *Server) startPeer(fd net.Conn, dest *discover.Node) {
|
|
// TODO: handle/store session token
|
|
|
|
// Run setupFunc, which should create an authenticated connection
|
|
// and run the capability exchange. Note that any early error
|
|
// returns during that exchange need to call peerWG.Done because
|
|
// the callers of startPeer added the peer to the wait group already.
|
|
fd.SetDeadline(time.Now().Add(handshakeTimeout))
|
|
|
|
// Check capacity, but override for static nodes
|
|
srv.lock.RLock()
|
|
atcap := len(srv.peers) == srv.MaxPeers
|
|
if dest != nil {
|
|
if _, ok := srv.staticNodes[dest.ID]; ok {
|
|
atcap = false
|
|
}
|
|
}
|
|
srv.lock.RUnlock()
|
|
|
|
conn, err := srv.setupFunc(fd, srv.PrivateKey, srv.ourHandshake, dest, atcap, srv.trustedNodes)
|
|
if err != nil {
|
|
fd.Close()
|
|
glog.V(logger.Debug).Infof("Handshake with %v failed: %v", fd.RemoteAddr(), err)
|
|
srv.peerWG.Done()
|
|
return
|
|
}
|
|
conn.MsgReadWriter = &netWrapper{
|
|
wrapped: conn.MsgReadWriter,
|
|
conn: fd, rtimeout: frameReadTimeout, wtimeout: frameWriteTimeout,
|
|
}
|
|
p := newPeer(fd, conn, srv.Protocols)
|
|
if ok, reason := srv.addPeer(conn.ID, p); !ok {
|
|
glog.V(logger.Detail).Infof("Not adding %v (%v)\n", p, reason)
|
|
p.politeDisconnect(reason)
|
|
srv.peerWG.Done()
|
|
return
|
|
}
|
|
// The handshakes are done and it passed all checks.
|
|
// Spawn the Peer loops.
|
|
go srv.runPeer(p)
|
|
}
|
|
|
|
func (srv *Server) runPeer(p *Peer) {
|
|
glog.V(logger.Debug).Infof("Added %v\n", p)
|
|
srvjslog.LogJson(&logger.P2PConnected{
|
|
RemoteId: p.ID().String(),
|
|
RemoteAddress: p.RemoteAddr().String(),
|
|
RemoteVersionString: p.Name(),
|
|
NumConnections: srv.PeerCount(),
|
|
})
|
|
if srv.newPeerHook != nil {
|
|
srv.newPeerHook(p)
|
|
}
|
|
discreason := p.run()
|
|
srv.removePeer(p)
|
|
glog.V(logger.Debug).Infof("Removed %v (%v)\n", p, discreason)
|
|
srvjslog.LogJson(&logger.P2PDisconnected{
|
|
RemoteId: p.ID().String(),
|
|
NumConnections: srv.PeerCount(),
|
|
})
|
|
}
|
|
|
|
func (srv *Server) addPeer(id discover.NodeID, p *Peer) (bool, DiscReason) {
|
|
srv.lock.Lock()
|
|
defer srv.lock.Unlock()
|
|
if ok, reason := srv.checkPeer(id); !ok {
|
|
return false, reason
|
|
}
|
|
srv.peers[id] = p
|
|
return true, 0
|
|
}
|
|
|
|
// checkPeer verifies whether a peer looks promising and should be allowed/kept
|
|
// in the pool, or if it's of no use.
|
|
func (srv *Server) checkPeer(id discover.NodeID) (bool, DiscReason) {
|
|
// First up, figure out if the peer is static or trusted
|
|
_, static := srv.staticNodes[id]
|
|
trusted := srv.trustedNodes[id]
|
|
|
|
// Make sure the peer passes all required checks
|
|
switch {
|
|
case !srv.running:
|
|
return false, DiscQuitting
|
|
case !static && !trusted && len(srv.peers) >= srv.MaxPeers:
|
|
return false, DiscTooManyPeers
|
|
case srv.peers[id] != nil:
|
|
return false, DiscAlreadyConnected
|
|
case id == srv.ntab.Self().ID:
|
|
return false, DiscSelf
|
|
default:
|
|
return true, 0
|
|
}
|
|
}
|
|
|
|
func (srv *Server) removePeer(p *Peer) {
|
|
srv.lock.Lock()
|
|
delete(srv.peers, p.ID())
|
|
srv.lock.Unlock()
|
|
srv.peerWG.Done()
|
|
}
|