Commit 8071fe6c authored by Steven Allen's avatar Steven Allen

switch to a per DialPeer/NewStream timeout defined in go-libp2p-net

The global per-peer dial timeout had a significant drawback: When dialing many
peers, this timeout could cause libp2p to cancel dials while they were still
stuck in the limiter.

A better but more complicated approach is a time budget system but we can
implement that later.

This change simply applies the limit to each `DialPeer`/`NewStream` call
independently and makes it easy to override. While old timeout tried to
account for how much we're willing to spend dialing a single peer, this new
timeout tries to account for the amount of time a single "client" is willing to
wait for a dial to complete before they no longer care.
parent 1c07b91c
...@@ -34,7 +34,7 @@ func (dj *dialJob) cancelled() bool { ...@@ -34,7 +34,7 @@ func (dj *dialJob) cancelled() bool {
} }
func (dj *dialJob) dialTimeout() time.Duration { func (dj *dialJob) dialTimeout() time.Duration {
timeout := DialTimeout timeout := transport.DialTimeout
if lowTimeoutFilters.AddrBlocked(dj.addr) { if lowTimeoutFilters.AddrBlocked(dj.addr) {
timeout = DialTimeoutLocal timeout = DialTimeoutLocal
} }
......
...@@ -37,11 +37,6 @@ var ErrSwarmClosed = errors.New("swarm closed") ...@@ -37,11 +37,6 @@ var ErrSwarmClosed = errors.New("swarm closed")
// transport is misbehaving. // transport is misbehaving.
var ErrAddrFiltered = errors.New("address filtered") var ErrAddrFiltered = errors.New("address filtered")
// DialTimeout is the maximum duration a Dial is allowed to take.
// This includes the time between dialing the raw network connection,
// protocol selection as well the handshake, if applicable.
var DialTimeout = 60 * time.Second
// Swarm is a connection muxer, allowing connections to other peers to // Swarm is a connection muxer, allowing connections to other peers to
// be opened and closed, while still using the same Chan for all // be opened and closed, while still using the same Chan for all
// communication. The Chan sends/receives Messages, which note the // communication. The Chan sends/receives Messages, which note the
......
...@@ -202,10 +202,15 @@ func (s *Swarm) dialPeer(ctx context.Context, p peer.ID) (*Conn, error) { ...@@ -202,10 +202,15 @@ func (s *Swarm) dialPeer(ctx context.Context, p peer.ID) (*Conn, error) {
return nil, ErrDialBackoff return nil, ErrDialBackoff
} }
// apply the DialPeer timeout
ctx, cancel := context.WithTimeout(ctx, inet.GetDialPeerTimeout(ctx))
defer cancel()
conn, err := s.dsync.DialLock(ctx, p) conn, err := s.dsync.DialLock(ctx, p)
if err != nil { if err != nil {
return nil, err return nil, err
} }
log.Debugf("network for %s finished dialing %s", s.local, p) log.Debugf("network for %s finished dialing %s", s.local, p)
return conn, err return conn, err
} }
...@@ -256,9 +261,6 @@ func (s *Swarm) canDial(addr ma.Multiaddr) bool { ...@@ -256,9 +261,6 @@ func (s *Swarm) canDial(addr ma.Multiaddr) bool {
// dial is the actual swarm's dial logic, gated by Dial. // dial is the actual swarm's dial logic, gated by Dial.
func (s *Swarm) dial(ctx context.Context, p peer.ID) (*Conn, error) { func (s *Swarm) dial(ctx context.Context, p peer.ID) (*Conn, error) {
ctx, cancel := context.WithTimeout(ctx, DialTimeout)
defer cancel()
var logdial = lgbl.Dial("swarm", s.LocalPeer(), p, nil, nil) var logdial = lgbl.Dial("swarm", s.LocalPeer(), p, nil, nil)
if p == s.local { if p == s.local {
log.Event(ctx, "swarmDialDoDialSelf", logdial) log.Event(ctx, "swarmDialDoDialSelf", logdial)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment