p2p: track write errors and prevent writes during shutdown

As of this commit, we no longer rely on the protocol handler to report
write errors in a timely fashion. When a write fails, shutdown is
initiated immediately and no new writes can start. This will also
prevent new writes from starting after Server.Stop has been called.
This commit is contained in:
Felix Lange 2015-06-15 13:42:44 +02:00
parent 6f5c6150b7
commit 8dcbdcad0a

View File

@ -115,37 +115,54 @@ func newPeer(conn *conn, protocols []Protocol) *Peer {
} }
func (p *Peer) run() DiscReason { func (p *Peer) run() DiscReason {
readErr := make(chan error, 1) var (
writeStart = make(chan struct{}, 1)
writeErr = make(chan error, 1)
readErr = make(chan error, 1)
reason DiscReason
requested bool
)
p.wg.Add(2) p.wg.Add(2)
go p.readLoop(readErr) go p.readLoop(readErr)
go p.pingLoop() go p.pingLoop()
p.startProtocols() // Start all protocol handlers.
writeStart <- struct{}{}
p.startProtocols(writeStart, writeErr)
// Wait for an error or disconnect. // Wait for an error or disconnect.
var ( loop:
reason DiscReason for {
requested bool
)
select { select {
case err := <-writeErr:
// A write finished. Allow the next write to start if
// there was no error.
if err != nil {
glog.V(logger.Detail).Infof("%v: Write error: %v\n", p, err)
reason = DiscNetworkError
break loop
}
writeStart <- struct{}{}
case err := <-readErr: case err := <-readErr:
if r, ok := err.(DiscReason); ok { if r, ok := err.(DiscReason); ok {
reason = r reason = r
} else { } else {
// Note: We rely on protocols to abort if there is a write
// error. It might be more robust to handle them here as well.
glog.V(logger.Detail).Infof("%v: Read error: %v\n", p, err) glog.V(logger.Detail).Infof("%v: Read error: %v\n", p, err)
reason = DiscNetworkError reason = DiscNetworkError
} }
break loop
case err := <-p.protoErr: case err := <-p.protoErr:
reason = discReasonForError(err) reason = discReasonForError(err)
break loop
case reason = <-p.disc: case reason = <-p.disc:
requested = true requested = true
break loop
} }
}
close(p.closed) close(p.closed)
p.rw.close(reason) p.rw.close(reason)
p.wg.Wait() p.wg.Wait()
if requested { if requested {
reason = DiscRequested reason = DiscRequested
} }
@ -247,11 +264,13 @@ outer:
return result return result
} }
func (p *Peer) startProtocols() { func (p *Peer) startProtocols(writeStart <-chan struct{}, writeErr chan<- error) {
p.wg.Add(len(p.running)) p.wg.Add(len(p.running))
for _, proto := range p.running { for _, proto := range p.running {
proto := proto proto := proto
proto.closed = p.closed proto.closed = p.closed
proto.wstart = writeStart
proto.werr = writeErr
glog.V(logger.Detail).Infof("%v: Starting protocol %s/%d\n", p, proto.Name, proto.Version) glog.V(logger.Detail).Infof("%v: Starting protocol %s/%d\n", p, proto.Name, proto.Version)
go func() { go func() {
err := proto.Run(p, proto) err := proto.Run(p, proto)
@ -280,18 +299,31 @@ func (p *Peer) getProto(code uint64) (*protoRW, error) {
type protoRW struct { type protoRW struct {
Protocol Protocol
in chan Msg in chan Msg // receices read messages
closed <-chan struct{} closed <-chan struct{} // receives when peer is shutting down
wstart <-chan struct{} // receives when write may start
werr chan<- error // for write results
offset uint64 offset uint64
w MsgWriter w MsgWriter
} }
func (rw *protoRW) WriteMsg(msg Msg) error { func (rw *protoRW) WriteMsg(msg Msg) (err error) {
if msg.Code >= rw.Length { if msg.Code >= rw.Length {
return newPeerError(errInvalidMsgCode, "not handled") return newPeerError(errInvalidMsgCode, "not handled")
} }
msg.Code += rw.offset msg.Code += rw.offset
return rw.w.WriteMsg(msg) select {
case <-rw.wstart:
err = rw.w.WriteMsg(msg)
// Report write status back to Peer.run. It will initiate
// shutdown if the error is non-nil and unblock the next write
// otherwise. The calling protocol code should exit for errors
// as well but we don't want to rely on that.
rw.werr <- err
case <-rw.closed:
err = fmt.Errorf("shutting down")
}
return err
} }
func (rw *protoRW) ReadMsg() (Msg, error) { func (rw *protoRW) ReadMsg() (Msg, error) {