diff --git a/dial_sync.go b/dial_sync.go index f746b9a9a30c3ac53da9b954c6142f7ce609d849..4c1230f5fc393c2993228fd1790ef681c5244387 100644 --- a/dial_sync.go +++ b/dial_sync.go @@ -2,11 +2,15 @@ package swarm import ( "context" + "errors" "sync" "github.com/libp2p/go-libp2p-core/peer" ) +// TODO: change this text when we fix the bug +var errDialCanceled = errors.New("dial was aborted internally, likely due to https://git.io/Je2wW") + // DialFunc is the type of function expected by DialSync. type DialFunc func(context.Context, peer.ID) (*Conn, error) @@ -78,6 +82,16 @@ func (ad *activeDial) decref() { func (ad *activeDial) start(ctx context.Context) { ad.conn, ad.err = ad.ds.dialFunc(ctx, ad.id) + + // This isn't the user's context so we should fix the error. + switch ad.err { + case context.Canceled: + // The dial was canceled with `CancelDial`. + ad.err = errDialCanceled + case context.DeadlineExceeded: + // We hit an internal timeout, not a context timeout. + ad.err = ErrDialTimeout + } close(ad.waitch) ad.cancel() } diff --git a/swarm.go b/swarm.go index 5366bdcc6f7ba12d45fdb9e5b9fd5c7f21a0da5f..f7c57717fa9d8c57c302bf5ada175d8e84700587 100644 --- a/swarm.go +++ b/swarm.go @@ -40,6 +40,9 @@ var ErrSwarmClosed = errors.New("swarm closed") // transport is misbehaving. var ErrAddrFiltered = errors.New("address filtered") +// ErrDialTimeout is returned when one a dial times out due to the global timeout +var ErrDialTimeout = errors.New("dial timed out") + // Swarm is a connection muxer, allowing connections to other peers to // be opened and closed, while still using the same Chan for all // communication. The Chan sends/receives Messages, which note the diff --git a/swarm_dial.go b/swarm_dial.go index ffb7ab5e2c0b4db8f432f43b3cbbdcd1640ccaca..475a7e250eab269952705d7902ec556b9270641c 100644 --- a/swarm_dial.go +++ b/swarm_dial.go @@ -221,12 +221,23 @@ func (s *Swarm) dialPeer(ctx context.Context, p peer.ID) (*Conn, error) { defer cancel() conn, err = s.dsync.DialLock(ctx, p) - if err != nil { - return nil, err + if err == nil { + return conn, nil } log.Debugf("network for %s finished dialing %s", s.local, p) - return conn, err + + if ctx.Err() != nil { + // Context error trumps any dial errors as it was likely the ultimate cause. + return nil, ctx.Err() + } + + if s.ctx.Err() != nil { + // Ok, so the swarm is shutting down. + return nil, ErrSwarmClosed + } + + return nil, err } // doDial is an ugly shim method to retain all the logging and backoff logic