swarm_dial.go 17.5 KB
Newer Older
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
1 2 3
package swarm

import (
4
	"context"
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
5 6 7 8 9
	"errors"
	"fmt"
	"sync"
	"time"

10 11 12
	"github.com/libp2p/go-libp2p-core/network"
	"github.com/libp2p/go-libp2p-core/peer"
	"github.com/libp2p/go-libp2p-core/transport"
Aarsh Shah's avatar
Aarsh Shah committed
13 14

	addrutil "github.com/libp2p/go-addr-util"
15 16
	lgbl "github.com/libp2p/go-libp2p-loggables"

Steven Allen's avatar
Steven Allen committed
17
	logging "github.com/ipfs/go-log"
Jeromy's avatar
Jeromy committed
18
	ma "github.com/multiformats/go-multiaddr"
19
	manet "github.com/multiformats/go-multiaddr/net"
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
20 21 22 23 24 25 26 27 28 29 30 31 32 33
)

// Diagram of dial sync:
//
//   many callers of Dial()   synched w.  dials many addrs       results to callers
//  ----------------------\    dialsync    use earliest            /--------------
//  -----------------------\              |----------\           /----------------
//  ------------------------>------------<-------     >---------<-----------------
//  -----------------------|              \----x                 \----------------
//  ----------------------|                \-----x                \---------------
//                                         any may fail          if no addr at end
//                                                             retry dialAttempt x

var (
34 35
	// ErrDialBackoff is returned by the backoff code when a given peer has
	// been dialed too frequently
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
36
	ErrDialBackoff = errors.New("dial backoff")
37 38 39

	// ErrDialToSelf is returned if we attempt to dial our own peer
	ErrDialToSelf = errors.New("dial to self attempted")
Steven Allen's avatar
Steven Allen committed
40 41 42 43

	// ErrNoTransport is returned when we don't know a transport for the
	// given multiaddr.
	ErrNoTransport = errors.New("no transport for protocol")
44 45 46 47 48 49 50 51

	// ErrAllDialsFailed is returned when connecting to a peer has ultimately failed
	ErrAllDialsFailed = errors.New("all dials failed")

	// ErrNoAddresses is returned when we fail to find any addresses for a
	// peer we're trying to dial.
	ErrNoAddresses = errors.New("no addresses")

Aliabbas Merchant's avatar
Aliabbas Merchant committed
52
	// ErrNoGoodAddresses is returned when we find addresses for a peer but
53 54
	// can't use any of them.
	ErrNoGoodAddresses = errors.New("no good addresses")
55 56 57 58

	// ErrGaterDisallowedConnection is returned when the gater prevents us from
	// forming a connection with a peer.
	ErrGaterDisallowedConnection = errors.New("gater disallows connection to peer")
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
59 60
)

Steven Allen's avatar
Steven Allen committed
61
// DialAttempts governs how many times a goroutine will try to dial a given peer.
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
62 63
// Note: this is down to one, as we have _too many dials_ atm. To add back in,
// add loop back in Dial(.)
Steven Allen's avatar
Steven Allen committed
64
const DialAttempts = 1
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
65

Steven Allen's avatar
Steven Allen committed
66 67 68
// ConcurrentFdDials is the number of concurrent outbound dials over transports
// that consume file descriptors
const ConcurrentFdDials = 160
Jeromy's avatar
Jeromy committed
69

Steven Allen's avatar
Steven Allen committed
70 71 72
// DefaultPerPeerRateLimit is the number of concurrent outbound dials to make
// per peer
const DefaultPerPeerRateLimit = 8
Jeromy's avatar
Jeromy committed
73

Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
// dialbackoff is a struct used to avoid over-dialing the same, dead peers.
// Whenever we totally time out on a peer (all three attempts), we add them
// to dialbackoff. Then, whenevers goroutines would _wait_ (dialsync), they
// check dialbackoff. If it's there, they don't wait and exit promptly with
// an error. (the single goroutine that is actually dialing continues to
// dial). If a dial is successful, the peer is removed from backoff.
// Example:
//
//  for {
//  	if ok, wait := dialsync.Lock(p); !ok {
//  		if backoff.Backoff(p) {
//  			return errDialFailed
//  		}
//  		<-wait
//  		continue
//  	}
//  	defer dialsync.Unlock(p)
//  	c, err := actuallyDial(p)
//  	if err != nil {
//  		dialbackoff.AddBackoff(p)
//  		continue
//  	}
//  	dialbackoff.Clear(p)
//  }
//
Jeromy's avatar
Jeromy committed
99

Steven Allen's avatar
Steven Allen committed
100 101
// DialBackoff is a type for tracking peer dial backoffs.
//
102
// * It's safe to use its zero value.
Steven Allen's avatar
Steven Allen committed
103 104 105
// * It's thread-safe.
// * It's *not* safe to move this type after using.
type DialBackoff struct {
Will Scott's avatar
Will Scott committed
106
	entries map[peer.ID]map[string]*backoffAddr
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
107 108 109
	lock    sync.RWMutex
}

Will Scott's avatar
Will Scott committed
110
type backoffAddr struct {
Jeromy's avatar
Jeromy committed
111 112 113 114
	tries int
	until time.Time
}

Will Scott's avatar
Will Scott committed
115
func (db *DialBackoff) init(ctx context.Context) {
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
116
	if db.entries == nil {
Will Scott's avatar
Will Scott committed
117
		db.entries = make(map[peer.ID]map[string]*backoffAddr)
Will Scott's avatar
Will Scott committed
118 119 120 121 122 123
	}
	go db.background(ctx)
}

func (db *DialBackoff) background(ctx context.Context) {
	ticker := time.NewTicker(BackoffMax)
Will Scott's avatar
Will Scott committed
124
	defer ticker.Stop()
Will Scott's avatar
Will Scott committed
125 126 127 128 129 130 131
	for {
		select {
		case <-ctx.Done():
			return
		case <-ticker.C:
			db.cleanup()
		}
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
132 133 134 135
	}
}

// Backoff returns whether the client should backoff from dialing
Will Scott's avatar
Will Scott committed
136 137
// peer p at address addr
func (db *DialBackoff) Backoff(p peer.ID, addr ma.Multiaddr) (backoff bool) {
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
138
	db.lock.Lock()
Jeromy's avatar
Jeromy committed
139 140
	defer db.lock.Unlock()

141 142
	ap, found := db.entries[p][string(addr.Bytes())]
	return found && time.Now().Before(ap.until)
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
143 144
}

Steven Allen's avatar
Steven Allen committed
145 146 147 148 149 150 151 152
// BackoffBase is the base amount of time to backoff (default: 5s).
var BackoffBase = time.Second * 5

// BackoffCoef is the backoff coefficient (default: 1s).
var BackoffCoef = time.Second

// BackoffMax is the maximum backoff time (default: 5m).
var BackoffMax = time.Minute * 5
Jeromy's avatar
Jeromy committed
153

Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
154 155 156
// AddBackoff lets other nodes know that we've entered backoff with
// peer p, so dialers should not wait unnecessarily. We still will
// attempt to dial with one goroutine, in case we get through.
Steven Allen's avatar
Steven Allen committed
157 158 159 160 161 162 163
//
// Backoff is not exponential, it's quadratic and computed according to the
// following formula:
//
//     BackoffBase + BakoffCoef * PriorBackoffs^2
//
// Where PriorBackoffs is the number of previous backoffs.
Will Scott's avatar
Will Scott committed
164
func (db *DialBackoff) AddBackoff(p peer.ID, addr ma.Multiaddr) {
Will Scott's avatar
Will Scott committed
165
	saddr := string(addr.Bytes())
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
166
	db.lock.Lock()
Jeromy's avatar
Jeromy committed
167 168 169
	defer db.lock.Unlock()
	bp, ok := db.entries[p]
	if !ok {
170 171
		bp = make(map[string]*backoffAddr, 1)
		db.entries[p] = bp
Will Scott's avatar
Will Scott committed
172
	}
Will Scott's avatar
Will Scott committed
173
	ba, ok := bp[saddr]
Will Scott's avatar
Will Scott committed
174
	if !ok {
Will Scott's avatar
Will Scott committed
175
		bp[saddr] = &backoffAddr{
Jeromy's avatar
Jeromy committed
176
			tries: 1,
Steven Allen's avatar
Steven Allen committed
177
			until: time.Now().Add(BackoffBase),
Jeromy's avatar
Jeromy committed
178 179 180 181
		}
		return
	}

Will Scott's avatar
Will Scott committed
182
	backoffTime := BackoffBase + BackoffCoef*time.Duration(ba.tries*ba.tries)
Steven Allen's avatar
Steven Allen committed
183 184
	if backoffTime > BackoffMax {
		backoffTime = BackoffMax
Jeromy's avatar
Jeromy committed
185
	}
Will Scott's avatar
Will Scott committed
186 187
	ba.until = time.Now().Add(backoffTime)
	ba.tries++
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
188 189 190 191
}

// Clear removes a backoff record. Clients should call this after a
// successful Dial.
Steven Allen's avatar
Steven Allen committed
192
func (db *DialBackoff) Clear(p peer.ID) {
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
193
	db.lock.Lock()
Jeromy's avatar
Jeromy committed
194
	defer db.lock.Unlock()
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
195 196 197
	delete(db.entries, p)
}

Will Scott's avatar
Will Scott committed
198 199 200 201 202 203 204
func (db *DialBackoff) cleanup() {
	db.lock.Lock()
	defer db.lock.Unlock()
	now := time.Now()
	for p, e := range db.entries {
		good := false
		for _, backoff := range e {
205 206 207 208 209
			backoffTime := BackoffBase + BackoffCoef*time.Duration(backoff.tries*backoff.tries)
			if backoffTime > BackoffMax {
				backoffTime = BackoffMax
			}
			if now.Before(backoff.until.Add(backoffTime)) {
Will Scott's avatar
Will Scott committed
210 211 212 213 214
				good = true
				break
			}
		}
		if !good {
Will Scott's avatar
Will Scott committed
215
			delete(db.entries, p)
Will Scott's avatar
Will Scott committed
216 217 218 219
		}
	}
}

Steven Allen's avatar
Steven Allen committed
220
// DialPeer connects to a peer.
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
221 222 223 224
//
// The idea is that the client of Swarm does not need to know what network
// the connection will happen over. Swarm can use whichever it choses.
// This allows us to use various transport protocols, do NAT traversal/relay,
Steven Allen's avatar
Steven Allen committed
225
// etc. to achieve connection.
226
func (s *Swarm) DialPeer(ctx context.Context, p peer.ID) (network.Conn, error) {
227 228 229 230 231
	if s.gater != nil && !s.gater.InterceptPeerDial(p) {
		log.Debugf("gater disallowed outbound connection to peer %s", p.Pretty())
		return nil, &DialError{Peer: p, Cause: ErrGaterDisallowedConnection}
	}

Steven Allen's avatar
Steven Allen committed
232 233 234 235 236 237 238 239 240
	return s.dialPeer(ctx, p)
}

// internal dial method that returns an unwrapped conn
//
// It is gated by the swarm's dial synchronization systems: dialsync and
// dialbackoff.
func (s *Swarm) dialPeer(ctx context.Context, p peer.ID) (*Conn, error) {
	log.Debugf("[%s] swarm dialing peer [%s]", s.local, p)
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
241
	var logdial = lgbl.Dial("swarm", s.LocalPeer(), p, nil, nil)
242 243 244 245 246
	err := p.Validate()
	if err != nil {
		return nil, err
	}

Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
247 248 249 250 251
	if p == s.local {
		log.Event(ctx, "swarmDialSelf", logdial)
		return nil, ErrDialToSelf
	}

252
	defer log.EventBegin(ctx, "swarmDialAttemptSync", p).Done()
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
253 254

	// check if we already have an open connection first
Steven Allen's avatar
Steven Allen committed
255
	conn := s.bestConnToPeer(p)
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
256 257 258 259
	if conn != nil {
		return conn, nil
	}

260
	// apply the DialPeer timeout
261
	ctx, cancel := context.WithTimeout(ctx, network.GetDialPeerTimeout(ctx))
262 263
	defer cancel()

264
	conn, err = s.dsync.DialLock(ctx, p)
265 266
	if err == nil {
		return conn, nil
Steven Allen's avatar
Steven Allen committed
267
	}
268

Steven Allen's avatar
Steven Allen committed
269
	log.Debugf("network for %s finished dialing %s", s.local, p)
270 271 272 273 274 275 276 277 278 279 280 281

	if ctx.Err() != nil {
		// Context error trumps any dial errors as it was likely the ultimate cause.
		return nil, ctx.Err()
	}

	if s.ctx.Err() != nil {
		// Ok, so the swarm is shutting down.
		return nil, ErrSwarmClosed
	}

	return nil, err
282
}
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
283

284 285 286
// doDial is an ugly shim method to retain all the logging and backoff logic
// of the old dialsync code
func (s *Swarm) doDial(ctx context.Context, p peer.ID) (*Conn, error) {
Steven Allen's avatar
Steven Allen committed
287 288 289 290 291 292 293 294
	// Short circuit.
	// By the time we take the dial lock, we may already *have* a connection
	// to the peer.
	c := s.bestConnToPeer(p)
	if c != nil {
		return c, nil
	}

Steven Allen's avatar
Steven Allen committed
295 296
	logdial := lgbl.Dial("swarm", s.LocalPeer(), p, nil, nil)

297 298 299
	// ok, we have been charged to dial! let's do it.
	// if it succeeds, dial will add the conn to the swarm itself.
	defer log.EventBegin(ctx, "swarmDialAttemptStart", logdial).Done()
Steven Allen's avatar
Steven Allen committed
300 301

	conn, err := s.dial(ctx, p)
302
	if err != nil {
Steven Allen's avatar
Steven Allen committed
303 304 305 306 307 308 309 310 311
		conn = s.bestConnToPeer(p)
		if conn != nil {
			// Hm? What error?
			// Could have canceled the dial because we received a
			// connection or some other random reason.
			// Just ignore the error and return the connection.
			log.Debugf("ignoring dial error because we have a connection: %s", err)
			return conn, nil
		}
312

Steven Allen's avatar
Steven Allen committed
313
		// ok, we failed.
314
		return nil, err
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
315
	}
316
	return conn, nil
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
317 318
}

Steven Allen's avatar
Steven Allen committed
319 320 321 322 323
func (s *Swarm) canDial(addr ma.Multiaddr) bool {
	t := s.TransportForDialing(addr)
	return t != nil && t.CanDial(addr)
}

Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
324 325 326 327 328 329 330 331 332 333 334
// dial is the actual swarm's dial logic, gated by Dial.
func (s *Swarm) dial(ctx context.Context, p peer.ID) (*Conn, error) {
	var logdial = lgbl.Dial("swarm", s.LocalPeer(), p, nil, nil)
	if p == s.local {
		log.Event(ctx, "swarmDialDoDialSelf", logdial)
		return nil, ErrDialToSelf
	}
	defer log.EventBegin(ctx, "swarmDialDo", logdial).Done()
	logdial["dial"] = "failure" // start off with failure. set to "success" at the end.

	sk := s.peers.PrivKey(s.local)
Matt Joiner's avatar
Matt Joiner committed
335
	logdial["encrypted"] = sk != nil // log whether this will be an encrypted dial or not.
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
336 337 338 339 340
	if sk == nil {
		// fine for sk to be nil, just log.
		log.Debug("Dial not given PrivateKey, so WILL NOT SECURE conn.")
	}

Jeromy's avatar
Jeromy committed
341
	//////
Matt Joiner's avatar
Matt Joiner committed
342 343
	peerAddrs := s.peers.Addrs(p)
	if len(peerAddrs) == 0 {
344
		return nil, &DialError{Peer: p, Cause: ErrNoAddresses}
Matt Joiner's avatar
Matt Joiner committed
345
	}
346
	goodAddrs := s.filterKnownUndialables(p, peerAddrs)
Matt Joiner's avatar
Matt Joiner committed
347
	if len(goodAddrs) == 0 {
348
		return nil, &DialError{Peer: p, Cause: ErrNoGoodAddresses}
Matt Joiner's avatar
Matt Joiner committed
349
	}
Aarsh Shah's avatar
Aarsh Shah committed
350 351 352

	/////// Check backoff andnRank addresses
	var nonBackoff bool
353
	for _, a := range goodAddrs {
Will Scott's avatar
Will Scott committed
354 355 356 357
		// skip addresses in back-off
		if !s.backf.Backoff(p, a) {
			nonBackoff = true
		}
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
358
	}
Will Scott's avatar
Will Scott committed
359 360 361
	if !nonBackoff {
		return nil, ErrDialBackoff
	}
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
362

Aarsh Shah's avatar
Aarsh Shah committed
363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403
	// ranks addresses in descending order of preference for dialing
	// Private UDP > Public UDP > Private TCP > Public TCP > UDP Relay server > TCP Relay server
	rankAddrsFnc := func(addrs []ma.Multiaddr) []ma.Multiaddr {
		var localUdpAddrs []ma.Multiaddr // private udp
		var relayUdpAddrs []ma.Multiaddr // relay udp
		var othersUdp []ma.Multiaddr     // public udp

		var localFdAddrs []ma.Multiaddr // private fd consuming
		var relayFdAddrs []ma.Multiaddr //  relay fd consuming
		var othersFd []ma.Multiaddr     // public fd consuming

		for _, a := range addrs {
			if _, err := a.ValueForProtocol(ma.P_CIRCUIT); err == nil {
				if s.IsFdConsumingAddr(a) {
					relayFdAddrs = append(relayFdAddrs, a)
					continue
				}
				relayUdpAddrs = append(relayUdpAddrs, a)
			} else if manet.IsPrivateAddr(a) {
				if s.IsFdConsumingAddr(a) {
					localFdAddrs = append(localFdAddrs, a)
					continue
				}
				localUdpAddrs = append(localUdpAddrs, a)
			} else {
				if s.IsFdConsumingAddr(a) {
					othersFd = append(othersFd, a)
					continue
				}
				othersUdp = append(othersUdp, a)
			}
		}

		relays := append(relayUdpAddrs, relayFdAddrs...)
		fds := append(localFdAddrs, othersFd...)

		return append(append(append(localUdpAddrs, othersUdp...), fds...), relays...)
	}

	connC, dialErr := s.dialAddrs(ctx, p, rankAddrsFnc(goodAddrs))

404 405
	if dialErr != nil {
		logdial["error"] = dialErr.Cause.Error()
406 407 408 409
		switch dialErr.Cause {
		case context.Canceled, context.DeadlineExceeded:
			// Always prefer the context errors as we rely on being
			// able to check them.
410 411 412
			//
			// Removing this will BREAK backoff (causing us to
			// backoff when canceling dials).
413
			return nil, dialErr.Cause
414 415
		}
		return nil, dialErr
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
416
	}
Steven Allen's avatar
Steven Allen committed
417 418 419 420
	logdial["conn"] = logging.Metadata{
		"localAddr":  connC.LocalMultiaddr(),
		"remoteAddr": connC.RemoteMultiaddr(),
	}
421
	swarmC, err := s.addConn(connC, network.DirOutbound)
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
422
	if err != nil {
Jeromy's avatar
Jeromy committed
423
		logdial["error"] = err.Error()
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
424
		connC.Close() // close the connection. didn't work out :(
425
		return nil, &DialError{Peer: p, Cause: err}
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
426 427 428 429 430 431
	}

	logdial["dial"] = "success"
	return swarmC, nil
}

432 433 434 435 436
// filterKnownUndialables takes a list of multiaddrs, and removes those
// that we definitely don't want to dial: addresses configured to be blocked,
// IPv6 link-local addresses, addresses without a dial-capable transport,
// and addresses that we know to be our own.
// This is an optimization to avoid wasting time on dials that we know are going to fail.
437
func (s *Swarm) filterKnownUndialables(p peer.ID, addrs []ma.Multiaddr) []ma.Multiaddr {
438 439 440 441 442 443 444 445 446 447 448 449 450 451 452
	lisAddrs, _ := s.InterfaceListenAddresses()
	var ourAddrs []ma.Multiaddr
	for _, addr := range lisAddrs {
		protos := addr.Protocols()
		// we're only sure about filtering out /ip4 and /ip6 addresses, so far
		if len(protos) == 2 && (protos[0].Code == ma.P_IP4 || protos[0].Code == ma.P_IP6) {
			ourAddrs = append(ourAddrs, addr)
		}
	}

	return addrutil.FilterAddrs(addrs,
		addrutil.SubtractFilter(ourAddrs...),
		s.canDial,
		// TODO: Consider allowing link-local addresses
		addrutil.AddrOverNonLocalIP,
453 454 455
		func(addr ma.Multiaddr) bool {
			return s.gater == nil || s.gater.InterceptAddrDial(p, addr)
		},
456 457 458
	)
}

Aarsh Shah's avatar
Aarsh Shah committed
459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475
func (s *Swarm) dialAddrs(ctx context.Context, p peer.ID, remoteAddrs []ma.Multiaddr) (transport.CapableConn, *DialError) {
	/*
		This slice-to-chan code is temporary, the peerstore can currently provide
		a channel as an interface for receiving addresses, but more thought
		needs to be put into the execution. For now, this allows us to use
		the improved rate limiter, while maintaining the outward behaviour
		that we previously had (halting a dial when we run out of addrs)
	*/
	var remoteAddrChan chan ma.Multiaddr
	if len(remoteAddrs) > 0 {
		remoteAddrChan = make(chan ma.Multiaddr, len(remoteAddrs))
		for i := range remoteAddrs {
			remoteAddrChan <- remoteAddrs[i]
		}
		close(remoteAddrChan)
	}

Jeromy's avatar
Jeromy committed
476
	log.Debugf("%s swarm dialing %s", s.local, p)
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
477 478 479 480

	ctx, cancel := context.WithCancel(ctx)
	defer cancel() // cancel work when we exit func

Jeromy's avatar
Jeromy committed
481 482
	// use a single response type instead of errs and conns, reduces complexity *a ton*
	respch := make(chan dialResult)
483
	err := &DialError{Peer: p}
Jeromy's avatar
Jeromy committed
484

485 486
	defer s.limiter.clearAllPeerDials(p)

Jeromy's avatar
Jeromy committed
487
	var active int
488
dialLoop:
Aarsh Shah's avatar
Aarsh Shah committed
489
	for remoteAddrChan != nil || active > 0 {
490 491 492
		// Check for context cancellations and/or responses first.
		select {
		case <-ctx.Done():
493
			break dialLoop
494 495 496 497
		case resp := <-respch:
			active--
			if resp.Err != nil {
				// Errors are normal, lots of dials will fail
Will Scott's avatar
Will Scott committed
498 499 500 501
				if resp.Err != context.Canceled {
					s.backf.AddBackoff(p, resp.Addr)
				}

tg's avatar
tg committed
502
				log.Infof("got error on dial: %s", resp.Err)
503
				err.recordErr(resp.Addr, resp.Err)
504 505 506 507 508 509 510 511 512 513
			} else if resp.Conn != nil {
				return resp.Conn, nil
			}

			// We got a result, try again from the top.
			continue
		default:
		}

		// Now, attempt to dial.
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
514
		select {
Aarsh Shah's avatar
Aarsh Shah committed
515
		case addr, ok := <-remoteAddrChan:
Jeromy's avatar
Jeromy committed
516
			if !ok {
Aarsh Shah's avatar
Aarsh Shah committed
517
				remoteAddrChan = nil
Jeromy's avatar
Jeromy committed
518
				continue
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
519 520
			}

Jeromy's avatar
Jeromy committed
521 522 523
			s.limitedDial(ctx, p, addr, respch)
			active++
		case <-ctx.Done():
524
			break dialLoop
Jeromy's avatar
Jeromy committed
525 526 527 528
		case resp := <-respch:
			active--
			if resp.Err != nil {
				// Errors are normal, lots of dials will fail
Will Scott's avatar
Will Scott committed
529 530 531 532
				if resp.Err != context.Canceled {
					s.backf.AddBackoff(p, resp.Addr)
				}

tg's avatar
tg committed
533
				log.Infof("got error on dial: %s", resp.Err)
534
				err.recordErr(resp.Addr, resp.Err)
Jeromy's avatar
Jeromy committed
535 536
			} else if resp.Conn != nil {
				return resp.Conn, nil
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
537 538 539
			}
		}
	}
tg's avatar
tg committed
540

541 542 543
	if ctxErr := ctx.Err(); ctxErr != nil {
		err.Cause = ctxErr
	} else if len(err.DialErrors) == 0 {
544
		err.Cause = network.ErrNoRemoteAddrs
545 546
	} else {
		err.Cause = ErrAllDialsFailed
tg's avatar
tg committed
547
	}
548
	return nil, err
Jeromy's avatar
Jeromy committed
549 550
}

Jeromy's avatar
Jeromy committed
551 552 553
// limitedDial will start a dial to the given peer when
// it is able, respecting the various different types of rate
// limiting that occur without using extra goroutines per addr
Jeromy's avatar
Jeromy committed
554 555 556 557 558 559 560
func (s *Swarm) limitedDial(ctx context.Context, p peer.ID, a ma.Multiaddr, resp chan dialResult) {
	s.limiter.AddDialJob(&dialJob{
		addr: a,
		peer: p,
		resp: resp,
		ctx:  ctx,
	})
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
561 562
}

563
func (s *Swarm) dialAddr(ctx context.Context, p peer.ID, addr ma.Multiaddr) (transport.CapableConn, error) {
Steven Allen's avatar
Steven Allen committed
564 565 566 567
	// Just to double check. Costs nothing.
	if s.local == p {
		return nil, ErrDialToSelf
	}
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
568 569
	log.Debugf("%s swarm dialing %s %s", s.local, p, addr)

570 571
	tpt := s.TransportForDialing(addr)
	if tpt == nil {
Steven Allen's avatar
Steven Allen committed
572
		return nil, ErrNoTransport
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
573 574
	}

575
	connC, err := tpt.Dial(ctx, addr, p)
Steven Allen's avatar
Steven Allen committed
576
	if err != nil {
577
		return nil, err
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
578 579
	}

Steven Allen's avatar
Steven Allen committed
580 581
	// Trust the transport? Yeah... right.
	if connC.RemotePeer() != p {
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
582
		connC.Close()
583
		err = fmt.Errorf("BUG in transport %T: tried to dial %s, dialed %s", p, connC.RemotePeer(), tpt)
Steven Allen's avatar
Steven Allen committed
584 585
		log.Error(err)
		return nil, err
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
586 587 588 589 590
	}

	// success! we got one!
	return connC, nil
}
Aarsh Shah's avatar
Aarsh Shah committed
591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611

// TODO We should have a `IsFdConsuming() bool` method on the `Transport` interface in go-libp2p-core/transport.
// This function checks if any of the transport protocols in the address requires a file descriptor.
// For now:
// A Non-circuit address which has the TCP/UNIX protocol is deemed FD consuming.
// For a circuit-relay address, we look at the address of the relay server/proxy
// and use the same logic as above to decide.
func (s *Swarm) IsFdConsumingAddr(addr ma.Multiaddr) bool {
	first, _ := ma.SplitFunc(addr, func(c ma.Component) bool {
		return c.Protocol().Code == ma.P_CIRCUIT
	})

	// for safety
	if first == nil {
		return true
	}

	_, err1 := first.ValueForProtocol(ma.P_TCP)
	_, err2 := first.ValueForProtocol(ma.P_UNIX)
	return err1 == nil || err2 == nil
}