Commit 83dd28eb authored by Steven Allen's avatar Steven Allen

feat(swarm): improve dial context

1. Always return the caller's context error if relevant.
2. Don't return "context canceled" when we're just shutting down.
3. Don't claim that the context deadline has been exceeded when the dial timeout
is canceled.
parent e71f7af3
......@@ -2,11 +2,15 @@ package swarm
import (
"context"
"errors"
"sync"
"github.com/libp2p/go-libp2p-core/peer"
)
// TODO: change this text when we fix the bug
var errDialCanceled = errors.New("dial was aborted internally, likely due to https://git.io/Je2wW")
// DialFunc is the type of function expected by DialSync.
type DialFunc func(context.Context, peer.ID) (*Conn, error)
......@@ -78,6 +82,16 @@ func (ad *activeDial) decref() {
func (ad *activeDial) start(ctx context.Context) {
ad.conn, ad.err = ad.ds.dialFunc(ctx, ad.id)
// This isn't the user's context so we should fix the error.
switch ad.err {
case context.Canceled:
// The dial was canceled with `CancelDial`.
ad.err = errDialCanceled
case context.DeadlineExceeded:
// We hit an internal timeout, not a context timeout.
ad.err = ErrDialTimeout
}
close(ad.waitch)
ad.cancel()
}
......
......@@ -40,6 +40,9 @@ var ErrSwarmClosed = errors.New("swarm closed")
// transport is misbehaving.
var ErrAddrFiltered = errors.New("address filtered")
// ErrDialTimeout is returned when one a dial times out due to the global timeout
var ErrDialTimeout = errors.New("dial timed out")
// Swarm is a connection muxer, allowing connections to other peers to
// be opened and closed, while still using the same Chan for all
// communication. The Chan sends/receives Messages, which note the
......
......@@ -221,12 +221,23 @@ func (s *Swarm) dialPeer(ctx context.Context, p peer.ID) (*Conn, error) {
defer cancel()
conn, err = s.dsync.DialLock(ctx, p)
if err != nil {
return nil, err
if err == nil {
return conn, nil
}
log.Debugf("network for %s finished dialing %s", s.local, p)
return conn, err
if ctx.Err() != nil {
// Context error trumps any dial errors as it was likely the ultimate cause.
return nil, ctx.Err()
}
if s.ctx.Err() != nil {
// Ok, so the swarm is shutting down.
return nil, ErrSwarmClosed
}
return nil, err
}
// doDial is an ugly shim method to retain all the logging and backoff logic
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment