From 8071fe6c242ce720af1497de95b4d1b1d45bd7ee Mon Sep 17 00:00:00 2001 From: Steven Allen Date: Thu, 14 Jun 2018 15:48:25 -0700 Subject: [PATCH] switch to a per DialPeer/NewStream timeout defined in go-libp2p-net The global per-peer dial timeout had a significant drawback: When dialing many peers, this timeout could cause libp2p to cancel dials while they were still stuck in the limiter. A better but more complicated approach is a time budget system but we can implement that later. This change simply applies the limit to each `DialPeer`/`NewStream` call independently and makes it easy to override. While old timeout tried to account for how much we're willing to spend dialing a single peer, this new timeout tries to account for the amount of time a single "client" is willing to wait for a dial to complete before they no longer care. --- limiter.go | 2 +- swarm.go | 5 ----- swarm_dial.go | 8 +++++--- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/limiter.go b/limiter.go index 391b828..02aed50 100644 --- a/limiter.go +++ b/limiter.go @@ -34,7 +34,7 @@ func (dj *dialJob) cancelled() bool { } func (dj *dialJob) dialTimeout() time.Duration { - timeout := DialTimeout + timeout := transport.DialTimeout if lowTimeoutFilters.AddrBlocked(dj.addr) { timeout = DialTimeoutLocal } diff --git a/swarm.go b/swarm.go index 0a50bed..7d795ab 100644 --- a/swarm.go +++ b/swarm.go @@ -37,11 +37,6 @@ var ErrSwarmClosed = errors.New("swarm closed") // transport is misbehaving. var ErrAddrFiltered = errors.New("address filtered") -// DialTimeout is the maximum duration a Dial is allowed to take. -// This includes the time between dialing the raw network connection, -// protocol selection as well the handshake, if applicable. -var DialTimeout = 60 * time.Second - // Swarm is a connection muxer, allowing connections to other peers to // be opened and closed, while still using the same Chan for all // communication. The Chan sends/receives Messages, which note the diff --git a/swarm_dial.go b/swarm_dial.go index fe7a4cb..3e0cfaf 100644 --- a/swarm_dial.go +++ b/swarm_dial.go @@ -202,10 +202,15 @@ func (s *Swarm) dialPeer(ctx context.Context, p peer.ID) (*Conn, error) { return nil, ErrDialBackoff } + // apply the DialPeer timeout + ctx, cancel := context.WithTimeout(ctx, inet.GetDialPeerTimeout(ctx)) + defer cancel() + conn, err := s.dsync.DialLock(ctx, p) if err != nil { return nil, err } + log.Debugf("network for %s finished dialing %s", s.local, p) return conn, err } @@ -256,9 +261,6 @@ func (s *Swarm) canDial(addr ma.Multiaddr) bool { // dial is the actual swarm's dial logic, gated by Dial. func (s *Swarm) dial(ctx context.Context, p peer.ID) (*Conn, error) { - ctx, cancel := context.WithTimeout(ctx, DialTimeout) - defer cancel() - var logdial = lgbl.Dial("swarm", s.LocalPeer(), p, nil, nil) if p == s.local { log.Event(ctx, "swarmDialDoDialSelf", logdial) -- GitLab