swarm_dial.go 20.6 KB
Newer Older
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
1 2 3
package swarm

import (
4
	"context"
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
5 6 7 8 9
	"errors"
	"fmt"
	"sync"
	"time"

10 11 12
	"github.com/libp2p/go-libp2p-core/network"
	"github.com/libp2p/go-libp2p-core/peer"
	"github.com/libp2p/go-libp2p-core/transport"
Aarsh Shah's avatar
Aarsh Shah committed
13 14

	addrutil "github.com/libp2p/go-addr-util"
15 16
	lgbl "github.com/libp2p/go-libp2p-loggables"

Jeromy's avatar
Jeromy committed
17
	ma "github.com/multiformats/go-multiaddr"
18
	manet "github.com/multiformats/go-multiaddr/net"
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
19 20 21 22 23 24 25 26 27 28 29 30 31 32
)

// Diagram of dial sync:
//
//   many callers of Dial()   synched w.  dials many addrs       results to callers
//  ----------------------\    dialsync    use earliest            /--------------
//  -----------------------\              |----------\           /----------------
//  ------------------------>------------<-------     >---------<-----------------
//  -----------------------|              \----x                 \----------------
//  ----------------------|                \-----x                \---------------
//                                         any may fail          if no addr at end
//                                                             retry dialAttempt x

var (
33 34
	// ErrDialBackoff is returned by the backoff code when a given peer has
	// been dialed too frequently
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
35
	ErrDialBackoff = errors.New("dial backoff")
36 37 38

	// ErrDialToSelf is returned if we attempt to dial our own peer
	ErrDialToSelf = errors.New("dial to self attempted")
Steven Allen's avatar
Steven Allen committed
39 40 41 42

	// ErrNoTransport is returned when we don't know a transport for the
	// given multiaddr.
	ErrNoTransport = errors.New("no transport for protocol")
43 44 45 46 47 48 49 50

	// ErrAllDialsFailed is returned when connecting to a peer has ultimately failed
	ErrAllDialsFailed = errors.New("all dials failed")

	// ErrNoAddresses is returned when we fail to find any addresses for a
	// peer we're trying to dial.
	ErrNoAddresses = errors.New("no addresses")

Aliabbas Merchant's avatar
Aliabbas Merchant committed
51
	// ErrNoGoodAddresses is returned when we find addresses for a peer but
52 53
	// can't use any of them.
	ErrNoGoodAddresses = errors.New("no good addresses")
54 55 56 57

	// ErrGaterDisallowedConnection is returned when the gater prevents us from
	// forming a connection with a peer.
	ErrGaterDisallowedConnection = errors.New("gater disallows connection to peer")
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
58 59
)

60
var (
vyzo's avatar
vyzo committed
61 62 63
	delayDialPrivateAddr = 1 * time.Millisecond
	delayDialPublicAddr  = 5 * time.Millisecond
	delayDialRelayAddr   = 10 * time.Millisecond
64 65
)

Steven Allen's avatar
Steven Allen committed
66
// DialAttempts governs how many times a goroutine will try to dial a given peer.
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
67 68
// Note: this is down to one, as we have _too many dials_ atm. To add back in,
// add loop back in Dial(.)
Steven Allen's avatar
Steven Allen committed
69
const DialAttempts = 1
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
70

Steven Allen's avatar
Steven Allen committed
71 72 73
// ConcurrentFdDials is the number of concurrent outbound dials over transports
// that consume file descriptors
const ConcurrentFdDials = 160
Jeromy's avatar
Jeromy committed
74

Steven Allen's avatar
Steven Allen committed
75 76 77
// DefaultPerPeerRateLimit is the number of concurrent outbound dials to make
// per peer
const DefaultPerPeerRateLimit = 8
Jeromy's avatar
Jeromy committed
78

Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
// dialbackoff is a struct used to avoid over-dialing the same, dead peers.
// Whenever we totally time out on a peer (all three attempts), we add them
// to dialbackoff. Then, whenevers goroutines would _wait_ (dialsync), they
// check dialbackoff. If it's there, they don't wait and exit promptly with
// an error. (the single goroutine that is actually dialing continues to
// dial). If a dial is successful, the peer is removed from backoff.
// Example:
//
//  for {
//  	if ok, wait := dialsync.Lock(p); !ok {
//  		if backoff.Backoff(p) {
//  			return errDialFailed
//  		}
//  		<-wait
//  		continue
//  	}
//  	defer dialsync.Unlock(p)
//  	c, err := actuallyDial(p)
//  	if err != nil {
//  		dialbackoff.AddBackoff(p)
//  		continue
//  	}
//  	dialbackoff.Clear(p)
//  }
//
Jeromy's avatar
Jeromy committed
104

Steven Allen's avatar
Steven Allen committed
105 106
// DialBackoff is a type for tracking peer dial backoffs.
//
107
// * It's safe to use its zero value.
Steven Allen's avatar
Steven Allen committed
108 109 110
// * It's thread-safe.
// * It's *not* safe to move this type after using.
type DialBackoff struct {
Will Scott's avatar
Will Scott committed
111
	entries map[peer.ID]map[string]*backoffAddr
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
112 113 114
	lock    sync.RWMutex
}

Will Scott's avatar
Will Scott committed
115
type backoffAddr struct {
Jeromy's avatar
Jeromy committed
116 117 118 119
	tries int
	until time.Time
}

Will Scott's avatar
Will Scott committed
120
func (db *DialBackoff) init(ctx context.Context) {
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
121
	if db.entries == nil {
Will Scott's avatar
Will Scott committed
122
		db.entries = make(map[peer.ID]map[string]*backoffAddr)
Will Scott's avatar
Will Scott committed
123 124 125 126 127 128
	}
	go db.background(ctx)
}

func (db *DialBackoff) background(ctx context.Context) {
	ticker := time.NewTicker(BackoffMax)
Will Scott's avatar
Will Scott committed
129
	defer ticker.Stop()
Will Scott's avatar
Will Scott committed
130 131 132 133 134 135 136
	for {
		select {
		case <-ctx.Done():
			return
		case <-ticker.C:
			db.cleanup()
		}
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
137 138 139 140
	}
}

// Backoff returns whether the client should backoff from dialing
Will Scott's avatar
Will Scott committed
141 142
// peer p at address addr
func (db *DialBackoff) Backoff(p peer.ID, addr ma.Multiaddr) (backoff bool) {
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
143
	db.lock.Lock()
Jeromy's avatar
Jeromy committed
144 145
	defer db.lock.Unlock()

146 147
	ap, found := db.entries[p][string(addr.Bytes())]
	return found && time.Now().Before(ap.until)
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
148 149
}

Steven Allen's avatar
Steven Allen committed
150 151 152 153 154 155 156 157
// BackoffBase is the base amount of time to backoff (default: 5s).
var BackoffBase = time.Second * 5

// BackoffCoef is the backoff coefficient (default: 1s).
var BackoffCoef = time.Second

// BackoffMax is the maximum backoff time (default: 5m).
var BackoffMax = time.Minute * 5
Jeromy's avatar
Jeromy committed
158

Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
159 160 161
// AddBackoff lets other nodes know that we've entered backoff with
// peer p, so dialers should not wait unnecessarily. We still will
// attempt to dial with one goroutine, in case we get through.
Steven Allen's avatar
Steven Allen committed
162 163 164 165 166 167 168
//
// Backoff is not exponential, it's quadratic and computed according to the
// following formula:
//
//     BackoffBase + BakoffCoef * PriorBackoffs^2
//
// Where PriorBackoffs is the number of previous backoffs.
Will Scott's avatar
Will Scott committed
169
func (db *DialBackoff) AddBackoff(p peer.ID, addr ma.Multiaddr) {
Will Scott's avatar
Will Scott committed
170
	saddr := string(addr.Bytes())
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
171
	db.lock.Lock()
Jeromy's avatar
Jeromy committed
172 173 174
	defer db.lock.Unlock()
	bp, ok := db.entries[p]
	if !ok {
175 176
		bp = make(map[string]*backoffAddr, 1)
		db.entries[p] = bp
Will Scott's avatar
Will Scott committed
177
	}
Will Scott's avatar
Will Scott committed
178
	ba, ok := bp[saddr]
Will Scott's avatar
Will Scott committed
179
	if !ok {
Will Scott's avatar
Will Scott committed
180
		bp[saddr] = &backoffAddr{
Jeromy's avatar
Jeromy committed
181
			tries: 1,
Steven Allen's avatar
Steven Allen committed
182
			until: time.Now().Add(BackoffBase),
Jeromy's avatar
Jeromy committed
183 184 185 186
		}
		return
	}

Will Scott's avatar
Will Scott committed
187
	backoffTime := BackoffBase + BackoffCoef*time.Duration(ba.tries*ba.tries)
Steven Allen's avatar
Steven Allen committed
188 189
	if backoffTime > BackoffMax {
		backoffTime = BackoffMax
Jeromy's avatar
Jeromy committed
190
	}
Will Scott's avatar
Will Scott committed
191 192
	ba.until = time.Now().Add(backoffTime)
	ba.tries++
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
193 194 195 196
}

// Clear removes a backoff record. Clients should call this after a
// successful Dial.
Steven Allen's avatar
Steven Allen committed
197
func (db *DialBackoff) Clear(p peer.ID) {
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
198
	db.lock.Lock()
Jeromy's avatar
Jeromy committed
199
	defer db.lock.Unlock()
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
200 201 202
	delete(db.entries, p)
}

Will Scott's avatar
Will Scott committed
203 204 205 206 207 208 209
func (db *DialBackoff) cleanup() {
	db.lock.Lock()
	defer db.lock.Unlock()
	now := time.Now()
	for p, e := range db.entries {
		good := false
		for _, backoff := range e {
210 211 212 213 214
			backoffTime := BackoffBase + BackoffCoef*time.Duration(backoff.tries*backoff.tries)
			if backoffTime > BackoffMax {
				backoffTime = BackoffMax
			}
			if now.Before(backoff.until.Add(backoffTime)) {
Will Scott's avatar
Will Scott committed
215 216 217 218 219
				good = true
				break
			}
		}
		if !good {
Will Scott's avatar
Will Scott committed
220
			delete(db.entries, p)
Will Scott's avatar
Will Scott committed
221 222 223 224
		}
	}
}

Steven Allen's avatar
Steven Allen committed
225
// DialPeer connects to a peer.
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
226 227 228 229
//
// The idea is that the client of Swarm does not need to know what network
// the connection will happen over. Swarm can use whichever it choses.
// This allows us to use various transport protocols, do NAT traversal/relay,
Steven Allen's avatar
Steven Allen committed
230
// etc. to achieve connection.
231
func (s *Swarm) DialPeer(ctx context.Context, p peer.ID) (network.Conn, error) {
232 233 234 235 236
	if s.gater != nil && !s.gater.InterceptPeerDial(p) {
		log.Debugf("gater disallowed outbound connection to peer %s", p.Pretty())
		return nil, &DialError{Peer: p, Cause: ErrGaterDisallowedConnection}
	}

Steven Allen's avatar
Steven Allen committed
237 238 239 240 241 242 243 244 245
	return s.dialPeer(ctx, p)
}

// internal dial method that returns an unwrapped conn
//
// It is gated by the swarm's dial synchronization systems: dialsync and
// dialbackoff.
func (s *Swarm) dialPeer(ctx context.Context, p peer.ID) (*Conn, error) {
	log.Debugf("[%s] swarm dialing peer [%s]", s.local, p)
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
246
	var logdial = lgbl.Dial("swarm", s.LocalPeer(), p, nil, nil)
247 248 249 250 251
	err := p.Validate()
	if err != nil {
		return nil, err
	}

Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
252 253 254 255 256
	if p == s.local {
		log.Event(ctx, "swarmDialSelf", logdial)
		return nil, ErrDialToSelf
	}

257
	defer log.EventBegin(ctx, "swarmDialAttemptSync", p).Done()
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
258

Steven Allen's avatar
Steven Allen committed
259 260 261
	// check if we already have an open (usable) connection first
	conn := s.bestAcceptableConnToPeer(ctx, p)
	if conn != nil {
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
262 263 264
		return conn, nil
	}

265
	// apply the DialPeer timeout
266
	ctx, cancel := context.WithTimeout(ctx, network.GetDialPeerTimeout(ctx))
267 268
	defer cancel()

269
	conn, err = s.dsync.DialLock(ctx, p)
270 271
	if err == nil {
		return conn, nil
Steven Allen's avatar
Steven Allen committed
272
	}
273

Steven Allen's avatar
Steven Allen committed
274
	log.Debugf("network for %s finished dialing %s", s.local, p)
275 276 277 278 279 280 281 282 283 284 285 286

	if ctx.Err() != nil {
		// Context error trumps any dial errors as it was likely the ultimate cause.
		return nil, ctx.Err()
	}

	if s.ctx.Err() != nil {
		// Ok, so the swarm is shutting down.
		return nil, ErrSwarmClosed
	}

	return nil, err
287
}
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
288

289 290 291 292
///////////////////////////////////////////////////////////////////////////////////
// lo and behold, The Dialer
// TODO explain how all this works
//////////////////////////////////////////////////////////////////////////////////
293

vyzo's avatar
vyzo committed
294 295 296
type DialRequest struct {
	Ctx   context.Context
	Resch chan DialResponse
297 298
}

vyzo's avatar
vyzo committed
299 300 301
type DialResponse struct {
	Conn *Conn
	Err  error
302 303 304
}

// dialWorker is an active dial goroutine that synchronizes and executes concurrent dials
305
func (s *Swarm) dialWorker(ctx context.Context, p peer.ID, reqch <-chan DialRequest) error {
306
	if p == s.local {
307
		return ErrDialToSelf
Steven Allen's avatar
Steven Allen committed
308 309
	}

310 311
	go s.dialWorkerLoop(ctx, p, reqch)
	return nil
312
}
Steven Allen's avatar
Steven Allen committed
313

vyzo's avatar
vyzo committed
314
func (s *Swarm) dialWorkerLoop(ctx context.Context, p peer.ID, reqch <-chan DialRequest) {
315
	defer s.limiter.clearAllPeerDials(p)
Steven Allen's avatar
Steven Allen committed
316

317
	type pendRequest struct {
vyzo's avatar
vyzo committed
318
		req   DialRequest               // the original request
319 320 321 322 323
		err   *DialError                // dial error accumulator
		addrs map[ma.Multiaddr]struct{} // pending addr dials
	}

	type addrDial struct {
324
		addr     ma.Multiaddr
325 326 327 328 329 330 331 332 333 334
		ctx      context.Context
		conn     *Conn
		err      error
		requests []int
	}

	reqno := 0
	requests := make(map[int]*pendRequest)
	pending := make(map[ma.Multiaddr]*addrDial)

335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362
	dispatchError := func(ad *addrDial, err error) {
		ad.err = err
		for _, reqno := range ad.requests {
			pr, ok := requests[reqno]
			if !ok {
				// has already been dispatched
				continue
			}

			// accumulate the error
			pr.err.recordErr(ad.addr, err)

			delete(pr.addrs, ad.addr)
			if len(pr.addrs) == 0 {
				// all addrs have erred, dispatch dial error
				pr.req.Resch <- DialResponse{Err: pr.err}
				delete(requests, reqno)
			}
		}

		ad.requests = nil

		// if it was a backoff, clear the address dial so that it doesn't inhibit new dial requests
		if err == ErrDialBackoff {
			delete(pending, ad.addr)
		}
	}

363
	var triggerDial <-chan time.Time
vyzo's avatar
vyzo committed
364
	var triggerTimer *time.Timer
365 366 367
	triggerNow := make(chan time.Time)
	close(triggerNow)

vyzo's avatar
vyzo committed
368 369 370 371 372 373
	defer func() {
		if triggerTimer != nil {
			triggerTimer.Stop()
		}
	}()

374 375 376 377
	var nextDial []ma.Multiaddr
	active := 0
	done := false

378
	resch := make(chan dialResult)
379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396

loop:
	for {
		select {
		case req, ok := <-reqch:
			if !ok {
				// request channel has been closed, wait for pending dials to complete
				if active > 0 {
					done = true
					reqch = nil
					triggerDial = nil
					continue loop
				}

				// no active dials, we are done
				return
			}

vyzo's avatar
vyzo committed
397
			c := s.bestAcceptableConnToPeer(req.Ctx, p)
398
			if c != nil {
vyzo's avatar
vyzo committed
399
				req.Resch <- DialResponse{Conn: c}
400 401 402
				continue loop
			}

vyzo's avatar
vyzo committed
403
			addrs, err := s.addrsForDial(req.Ctx, p)
404
			if err != nil {
vyzo's avatar
vyzo committed
405
				req.Resch <- DialResponse{Err: err}
406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436
				continue loop
			}

			// at this point, len(addrs) > 0 or else it would be error from addrsForDial
			// ranke them to process in order
			addrs = s.rankAddrs(addrs)

			// create the pending request object
			pr := &pendRequest{
				req:   req,
				err:   &DialError{Peer: p},
				addrs: make(map[ma.Multiaddr]struct{}),
			}
			for _, a := range addrs {
				pr.addrs[a] = struct{}{}
			}

			// check if any of the addrs has been successfully dialed and accumulate
			// errors from complete dials while collecting new addrs to dial/join
			var todial []ma.Multiaddr
			var tojoin []*addrDial

			for _, a := range addrs {
				ad, ok := pending[a]
				if !ok {
					todial = append(todial, a)
					continue
				}

				if ad.conn != nil {
					// dial to this addr was successful, complete the request
vyzo's avatar
vyzo committed
437
					req.Resch <- DialResponse{Conn: ad.conn}
438 439 440 441 442 443 444
					continue loop
				}

				if ad.err != nil {
					// dial to this addr errored, accumulate the error
					pr.err.recordErr(a, ad.err)
					delete(pr.addrs, a)
445
					continue
446 447 448 449 450 451 452 453
				}

				// dial is still pending, add to the join list
				tojoin = append(tojoin, ad)
			}

			if len(todial) == 0 && len(tojoin) == 0 {
				// all request applicable addrs have been dialed, we must have errored
vyzo's avatar
vyzo committed
454
				req.Resch <- DialResponse{Err: pr.err}
455 456 457 458 459 460 461 462 463 464 465 466 467
				continue loop
			}

			// the request has some pending or new dials, track it and schedule new dials
			reqno++
			requests[reqno] = pr

			for _, ad := range tojoin {
				ad.requests = append(ad.requests, reqno)
			}

			if len(todial) > 0 {
				for _, a := range todial {
468
					pending[a] = &addrDial{addr: a, ctx: req.Ctx, requests: []int{reqno}}
469 470 471 472 473
				}

				nextDial = append(nextDial, todial...)
				nextDial = s.rankAddrs(nextDial)

474 475
				// trigger a new dial now to account for the new addrs we added
				triggerDial = triggerNow
476 477 478
			}

		case <-triggerDial:
479 480 481 482 483 484 485 486 487
			// we dial batches of addresses together, logically belonging to the same batch
			// after a batch of addresses has been dialed, we add a delay before initiating the next batch
			dialed := false
			last := 0
			next := 0
			for i, addr := range nextDial {
				if dialed && !s.sameAddrBatch(nextDial[last], addr) {
					break
				}
488

489
				next = i + 1
490

491 492 493 494 495 496 497 498 499 500 501
				// spawn the dial
				ad := pending[addr]
				err := s.dialNextAddr(ad.ctx, p, addr, resch)
				if err != nil {
					dispatchError(ad, err)
					continue
				}

				dialed = true
				last = i
				active++
502 503
			}

504 505 506 507 508 509 510
			lastDial := nextDial[last]
			nextDial = nextDial[next:]
			if !dialed || len(nextDial) == 0 {
				// we didn't dial anything because of backoff or we don't have any more addresses
				triggerDial = nil
				continue loop
			}
511

512 513
			// select an appropriate delay for the next dial batch
			delay := s.delayForNextDial(lastDial)
vyzo's avatar
vyzo committed
514 515 516 517 518 519 520 521 522
			if triggerTimer == nil {
				triggerTimer = time.NewTimer(delay)
			} else {
				if !triggerTimer.Stop() && triggerDial != triggerTimer.C {
					<-triggerTimer.C
				}
				triggerTimer.Reset(delay)
			}
			triggerDial = triggerTimer.C
523 524 525 526 527

		case res := <-resch:
			active--

			if done && active == 0 {
528 529 530 531 532 533 534 535 536
				if res.Conn != nil {
					// we got an actual connection, but the dial has been cancelled
					// Should we close it? I think not, we should just add it to the swarm
					_, err := s.addConn(res.Conn, network.DirOutbound)
					if err != nil {
						// well duh, now we have to close it
						res.Conn.Close()
					}
				}
537 538 539
				return
			}

540
			ad := pending[res.Addr]
541

542 543 544 545 546 547 548
			if res.Conn != nil {
				// we got a connection, add it to the swarm
				conn, err := s.addConn(res.Conn, network.DirOutbound)
				if err != nil {
					// oops no, we failed to add it to the swarm
					res.Conn.Close()
					dispatchError(ad, err)
549 550 551
					if active == 0 && len(nextDial) > 0 {
						triggerDial = triggerNow
					}
552 553
					continue loop
				}
554

555 556
				// dispatch to still pending requests
				for _, reqno := range ad.requests {
557 558 559 560 561 562
					pr, ok := requests[reqno]
					if !ok {
						// it has already dispatched a connection
						continue
					}

563
					pr.req.Resch <- DialResponse{Conn: conn}
564 565 566
					delete(requests, reqno)
				}

567 568 569
				ad.conn = conn
				ad.requests = nil

570 571 572
				continue loop
			}

573 574 575
			// it must be an error -- add backoff if applicable and dispatch
			if res.Err != context.Canceled {
				s.backf.AddBackoff(p, res.Addr)
576
			}
577

578
			dispatchError(ad, res.Err)
579 580 581
			if active == 0 && len(nextDial) > 0 {
				triggerDial = triggerNow
			}
Steven Allen's avatar
Steven Allen committed
582
		}
583 584
	}
}
585

586 587 588 589 590 591 592 593 594
func (s *Swarm) addrsForDial(ctx context.Context, p peer.ID) ([]ma.Multiaddr, error) {
	peerAddrs := s.peers.Addrs(p)
	if len(peerAddrs) == 0 {
		return nil, ErrNoAddresses
	}

	goodAddrs := s.filterKnownUndialables(p, peerAddrs)
	if forceDirect, _ := network.GetForceDirectDial(ctx); forceDirect {
		goodAddrs = addrutil.FilterAddrs(goodAddrs, s.nonProxyAddr)
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
595
	}
596 597 598 599 600 601 602 603

	if len(goodAddrs) == 0 {
		return nil, ErrNoGoodAddresses
	}

	return goodAddrs, nil
}

604
func (s *Swarm) dialNextAddr(ctx context.Context, p peer.ID, addr ma.Multiaddr, resch chan dialResult) error {
605 606 607
	// check the dial backoff
	if forceDirect, _ := network.GetForceDirectDial(ctx); !forceDirect {
		if s.backf.Backoff(p, addr) {
608
			return ErrDialBackoff
609 610 611 612
		}
	}

	// start the dial
613
	s.limitedDial(ctx, p, addr, resch)
614

615
	return nil
616 617
}

618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638
func (s *Swarm) sameAddrBatch(a, b ma.Multiaddr) bool {
	// is it a relay addr?
	if s.IsRelayAddr(a) {
		return s.IsRelayAddr(b)
	}

	// is it an expensive addr?
	if s.IsExpensiveAddr(a) {
		return s.IsExpensiveAddr(b)
	}

	// is it a public addr?
	if !manet.IsPrivateAddr(a) {
		return !manet.IsPrivateAddr(b) &&
			s.IsFdConsumingAddr(a) == s.IsFdConsumingAddr(b)
	}

	// it's a private addr
	return manet.IsPrivateAddr(b)
}

639 640
func (s *Swarm) delayForNextDial(addr ma.Multiaddr) time.Duration {
	if _, err := addr.ValueForProtocol(ma.P_CIRCUIT); err == nil {
641
		return delayDialRelayAddr
642 643 644
	}

	if manet.IsPrivateAddr(addr) {
645
		return delayDialPrivateAddr
646 647
	}

648
	return delayDialPublicAddr
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
649 650
}

Steven Allen's avatar
Steven Allen committed
651 652 653 654 655
func (s *Swarm) canDial(addr ma.Multiaddr) bool {
	t := s.TransportForDialing(addr)
	return t != nil && t.CanDial(addr)
}

Aarsh Shah's avatar
Aarsh Shah committed
656 657 658 659 660
func (s *Swarm) nonProxyAddr(addr ma.Multiaddr) bool {
	t := s.TransportForDialing(addr)
	return !t.Proxy()
}

661 662 663 664 665
// ranks addresses in descending order of preference for dialing, with the following rules:
// NonRelay > Relay
// NonWS > WS
// Private > Public
// UDP > TCP
666
func (s *Swarm) rankAddrs(addrs []ma.Multiaddr) []ma.Multiaddr {
667 668 669 670 671 672 673 674 675 676 677 678 679
	addrTier := func(a ma.Multiaddr) (tier int) {
		if s.IsRelayAddr(a) {
			tier |= 0b1000
		}
		if s.IsExpensiveAddr(a) {
			tier |= 0b0100
		}
		if !manet.IsPrivateAddr(a) {
			tier |= 0b0010
		}
		if s.IsFdConsumingAddr(a) {
			tier |= 0b0001
		}
680

681 682
		return tier
	}
683

684
	tiers := make([][]ma.Multiaddr, 16)
685
	for _, a := range addrs {
686 687
		tier := addrTier(a)
		tiers[tier] = append(tiers[tier], a)
688 689
	}

690 691 692 693
	result := make([]ma.Multiaddr, 0, len(addrs))
	for _, tier := range tiers {
		result = append(result, tier...)
	}
694

695
	return result
696 697
}

698 699 700 701 702
// filterKnownUndialables takes a list of multiaddrs, and removes those
// that we definitely don't want to dial: addresses configured to be blocked,
// IPv6 link-local addresses, addresses without a dial-capable transport,
// and addresses that we know to be our own.
// This is an optimization to avoid wasting time on dials that we know are going to fail.
703
func (s *Swarm) filterKnownUndialables(p peer.ID, addrs []ma.Multiaddr) []ma.Multiaddr {
704 705 706 707 708
	lisAddrs, _ := s.InterfaceListenAddresses()
	var ourAddrs []ma.Multiaddr
	for _, addr := range lisAddrs {
		protos := addr.Protocols()
		// we're only sure about filtering out /ip4 and /ip6 addresses, so far
709
		if protos[0].Code == ma.P_IP4 || protos[0].Code == ma.P_IP6 {
710 711 712 713 714 715 716 717 718
			ourAddrs = append(ourAddrs, addr)
		}
	}

	return addrutil.FilterAddrs(addrs,
		addrutil.SubtractFilter(ourAddrs...),
		s.canDial,
		// TODO: Consider allowing link-local addresses
		addrutil.AddrOverNonLocalIP,
719 720 721
		func(addr ma.Multiaddr) bool {
			return s.gater == nil || s.gater.InterceptAddrDial(p, addr)
		},
722 723 724
	)
}

Jeromy's avatar
Jeromy committed
725 726 727
// limitedDial will start a dial to the given peer when
// it is able, respecting the various different types of rate
// limiting that occur without using extra goroutines per addr
Jeromy's avatar
Jeromy committed
728 729 730 731 732 733 734
func (s *Swarm) limitedDial(ctx context.Context, p peer.ID, a ma.Multiaddr, resp chan dialResult) {
	s.limiter.AddDialJob(&dialJob{
		addr: a,
		peer: p,
		resp: resp,
		ctx:  ctx,
	})
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
735 736
}

737
// dialAddr is the actual dial for an addr, indirectly invoked through the limiter
738
func (s *Swarm) dialAddr(ctx context.Context, p peer.ID, addr ma.Multiaddr) (transport.CapableConn, error) {
Steven Allen's avatar
Steven Allen committed
739 740 741 742
	// Just to double check. Costs nothing.
	if s.local == p {
		return nil, ErrDialToSelf
	}
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
743 744
	log.Debugf("%s swarm dialing %s %s", s.local, p, addr)

745 746
	tpt := s.TransportForDialing(addr)
	if tpt == nil {
Steven Allen's avatar
Steven Allen committed
747
		return nil, ErrNoTransport
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
748 749
	}

750
	connC, err := tpt.Dial(ctx, addr, p)
Steven Allen's avatar
Steven Allen committed
751
	if err != nil {
752
		return nil, err
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
753 754
	}

Steven Allen's avatar
Steven Allen committed
755 756
	// Trust the transport? Yeah... right.
	if connC.RemotePeer() != p {
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
757
		connC.Close()
758
		err = fmt.Errorf("BUG in transport %T: tried to dial %s, dialed %s", p, connC.RemotePeer(), tpt)
Steven Allen's avatar
Steven Allen committed
759 760
		log.Error(err)
		return nil, err
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
761 762 763 764 765
	}

	// success! we got one!
	return connC, nil
}
Aarsh Shah's avatar
Aarsh Shah committed
766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786

// TODO We should have a `IsFdConsuming() bool` method on the `Transport` interface in go-libp2p-core/transport.
// This function checks if any of the transport protocols in the address requires a file descriptor.
// For now:
// A Non-circuit address which has the TCP/UNIX protocol is deemed FD consuming.
// For a circuit-relay address, we look at the address of the relay server/proxy
// and use the same logic as above to decide.
func (s *Swarm) IsFdConsumingAddr(addr ma.Multiaddr) bool {
	first, _ := ma.SplitFunc(addr, func(c ma.Component) bool {
		return c.Protocol().Code == ma.P_CIRCUIT
	})

	// for safety
	if first == nil {
		return true
	}

	_, err1 := first.ValueForProtocol(ma.P_TCP)
	_, err2 := first.ValueForProtocol(ma.P_UNIX)
	return err1 == nil || err2 == nil
}
787 788 789 790 791 792 793 794 795 796 797

func (s *Swarm) IsExpensiveAddr(addr ma.Multiaddr) bool {
	_, err1 := addr.ValueForProtocol(ma.P_WS)
	_, err2 := addr.ValueForProtocol(ma.P_WSS)
	return err1 == nil || err2 == nil
}

func (s *Swarm) IsRelayAddr(addr ma.Multiaddr) bool {
	_, err := addr.ValueForProtocol(ma.P_CIRCUIT)
	return err == nil
}