messagequeue.go 17.7 KB
Newer Older
1 2 3 4
package messagequeue

import (
	"context"
dirkmc's avatar
dirkmc committed
5
	"math"
6
	"sync"
7 8 9
	"time"

	bsmsg "github.com/ipfs/go-bitswap/message"
dirkmc's avatar
dirkmc committed
10
	pb "github.com/ipfs/go-bitswap/message/pb"
11
	bsnet "github.com/ipfs/go-bitswap/network"
dirkmc's avatar
dirkmc committed
12 13
	bswl "github.com/ipfs/go-bitswap/wantlist"
	cid "github.com/ipfs/go-cid"
14
	logging "github.com/ipfs/go-log"
Raúl Kripalani's avatar
Raúl Kripalani committed
15
	peer "github.com/libp2p/go-libp2p-core/peer"
16
	"github.com/libp2p/go-libp2p/p2p/protocol/ping"
Dirk McCormick's avatar
Dirk McCormick committed
17
	"go.uber.org/zap"
18 19 20
)

var log = logging.Logger("bitswap")
Dirk McCormick's avatar
Dirk McCormick committed
21
var sflog = log.Desugar()
22

23 24
const (
	defaultRebroadcastInterval = 30 * time.Second
dirkmc's avatar
dirkmc committed
25 26 27 28 29 30 31 32 33 34 35 36
	// maxRetries is the number of times to attempt to send a message before
	// giving up
	maxRetries = 10
	// maxMessageSize is the maximum message size in bytes
	maxMessageSize = 1024 * 1024 * 2
	// sendErrorBackoff is the time to wait before retrying to connect after
	// an error when trying to send a message
	sendErrorBackoff = 100 * time.Millisecond
	// maxPriority is the max priority as defined by the bitswap protocol
	maxPriority = math.MaxInt32
	// sendMessageDebounce is the debounce duration when calling sendMessage()
	sendMessageDebounce = time.Millisecond
37 38
	// when we reach sendMessageCutoff wants/cancels, we'll send the message immediately.
	sendMessageCutoff = 256
Steven Allen's avatar
Steven Allen committed
39 40
	// when we debounce for more than sendMessageMaxDelay, we'll send the
	// message immediately.
41
	sendMessageMaxDelay = 20 * time.Millisecond
42
)
43

44
// MessageNetwork is any network that can connect peers and generate a message
45
// sender.
46 47 48
type MessageNetwork interface {
	ConnectTo(context.Context, peer.ID) error
	NewMessageSender(context.Context, peer.ID) (bsnet.MessageSender, error)
49 50
	Latency(peer.ID) time.Duration
	Ping(context.Context, peer.ID) ping.Result
51
	Self() peer.ID
52 53
}

54
// MessageQueue implements queue of want messages to send to peers.
55
type MessageQueue struct {
dirkmc's avatar
dirkmc committed
56 57 58
	ctx              context.Context
	p                peer.ID
	network          MessageNetwork
59
	dhTimeoutMgr     DontHaveTimeoutManager
dirkmc's avatar
dirkmc committed
60 61 62
	maxMessageSize   int
	sendErrorBackoff time.Duration

Steven Allen's avatar
Steven Allen committed
63 64
	outgoingWork chan time.Time
	done         chan struct{}
dirkmc's avatar
dirkmc committed
65 66 67 68 69 70

	// Take lock whenever any of these variables are modified
	wllock    sync.Mutex
	bcstWants recallWantlist
	peerWants recallWantlist
	cancels   *cid.Set
71
	priority  int32
dirkmc's avatar
dirkmc committed
72 73

	// Dont touch any of these variables outside of run loop
74 75 76 77
	sender                bsnet.MessageSender
	rebroadcastIntervalLk sync.RWMutex
	rebroadcastInterval   time.Duration
	rebroadcastTimer      *time.Timer
78 79 80
	// For performance reasons we just clear out the fields of the message
	// instead of creating a new one every time.
	msg bsmsg.BitSwapMessage
81 82
}

dirkmc's avatar
dirkmc committed
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
// recallWantlist keeps a list of pending wants, and a list of all wants that
// have ever been requested
type recallWantlist struct {
	// The list of all wants that have been requested, including wants that
	// have been sent and wants that have not yet been sent
	allWants *bswl.Wantlist
	// The list of wants that have not yet been sent
	pending *bswl.Wantlist
}

func newRecallWantList() recallWantlist {
	return recallWantlist{
		allWants: bswl.New(),
		pending:  bswl.New(),
	}
}

// Add want to both the pending list and the list of all wants
101
func (r *recallWantlist) Add(c cid.Cid, priority int32, wtype pb.Message_Wantlist_WantType) {
dirkmc's avatar
dirkmc committed
102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
	r.allWants.Add(c, priority, wtype)
	r.pending.Add(c, priority, wtype)
}

// Remove wants from both the pending list and the list of all wants
func (r *recallWantlist) Remove(c cid.Cid) {
	r.allWants.Remove(c)
	r.pending.Remove(c)
}

// Remove wants by type from both the pending list and the list of all wants
func (r *recallWantlist) RemoveType(c cid.Cid, wtype pb.Message_Wantlist_WantType) {
	r.allWants.RemoveType(c, wtype)
	r.pending.RemoveType(c, wtype)
}

118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
type peerConn struct {
	p       peer.ID
	network MessageNetwork
}

func newPeerConnection(p peer.ID, network MessageNetwork) *peerConn {
	return &peerConn{p, network}
}

func (pc *peerConn) Ping(ctx context.Context) ping.Result {
	return pc.network.Ping(ctx, pc.p)
}

func (pc *peerConn) Latency() time.Duration {
	return pc.network.Latency(pc.p)
}

// Fires when a timeout occurs waiting for a response from a peer running an
// older version of Bitswap that doesn't support DONT_HAVE messages.
type OnDontHaveTimeout func(peer.ID, []cid.Cid)

// DontHaveTimeoutManager pings a peer to estimate latency so it can set a reasonable
// upper bound on when to consider a DONT_HAVE request as timed out (when connected to
// a peer that doesn't support DONT_HAVE messages)
type DontHaveTimeoutManager interface {
	// Start the manager (idempotent)
	Start()
	// Shutdown the manager (Shutdown is final, manager cannot be restarted)
	Shutdown()
	// AddPending adds the wants as pending a response. If the are not
	// cancelled before the timeout, the OnDontHaveTimeout method will be called.
	AddPending([]cid.Cid)
	// CancelPending removes the wants
	CancelPending([]cid.Cid)
}

// New creates a new MessageQueue.
func New(ctx context.Context, p peer.ID, network MessageNetwork, onDontHaveTimeout OnDontHaveTimeout) *MessageQueue {
	onTimeout := func(ks []cid.Cid) {
Dirk McCormick's avatar
Dirk McCormick committed
157
		log.Infow("Bitswap: timeout waiting for blocks", "cids", ks, "peer", p)
158 159
		onDontHaveTimeout(p, ks)
	}
160
	dhTimeoutMgr := newDontHaveTimeoutMgr(newPeerConnection(p, network), onTimeout)
161
	return newMessageQueue(ctx, p, network, maxMessageSize, sendErrorBackoff, dhTimeoutMgr)
dirkmc's avatar
dirkmc committed
162 163 164
}

// This constructor is used by the tests
165 166 167
func newMessageQueue(ctx context.Context, p peer.ID, network MessageNetwork,
	maxMsgSize int, sendErrorBackoff time.Duration, dhTimeoutMgr DontHaveTimeoutManager) *MessageQueue {

dirkmc's avatar
dirkmc committed
168
	mq := &MessageQueue{
169 170
		ctx:                 ctx,
		p:                   p,
dirkmc's avatar
dirkmc committed
171
		network:             network,
172
		dhTimeoutMgr:        dhTimeoutMgr,
dirkmc's avatar
dirkmc committed
173 174 175 176
		maxMessageSize:      maxMsgSize,
		bcstWants:           newRecallWantList(),
		peerWants:           newRecallWantList(),
		cancels:             cid.NewSet(),
Steven Allen's avatar
Steven Allen committed
177
		outgoingWork:        make(chan time.Time, 1),
178 179
		done:                make(chan struct{}),
		rebroadcastInterval: defaultRebroadcastInterval,
dirkmc's avatar
dirkmc committed
180 181
		sendErrorBackoff:    sendErrorBackoff,
		priority:            maxPriority,
182 183 184
		// For performance reasons we just clear out the fields of the message
		// after using it, instead of creating a new one every time.
		msg: bsmsg.New(false),
185
	}
dirkmc's avatar
dirkmc committed
186 187

	return mq
188 189
}

dirkmc's avatar
dirkmc committed
190 191 192
// Add want-haves that are part of a broadcast to all connected peers
func (mq *MessageQueue) AddBroadcastWantHaves(wantHaves []cid.Cid) {
	if len(wantHaves) == 0 {
193 194
		return
	}
dirkmc's avatar
dirkmc committed
195 196 197 198 199 200 201 202 203 204 205

	mq.wllock.Lock()
	defer mq.wllock.Unlock()

	for _, c := range wantHaves {
		mq.bcstWants.Add(c, mq.priority, pb.Message_Wantlist_Have)
		mq.priority--

		// We're adding a want-have for the cid, so clear any pending cancel
		// for the cid
		mq.cancels.Remove(c)
206
	}
dirkmc's avatar
dirkmc committed
207 208 209

	// Schedule a message send
	mq.signalWorkReady()
210 211
}

dirkmc's avatar
dirkmc committed
212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247
// Add want-haves and want-blocks for the peer for this message queue.
func (mq *MessageQueue) AddWants(wantBlocks []cid.Cid, wantHaves []cid.Cid) {
	if len(wantBlocks) == 0 && len(wantHaves) == 0 {
		return
	}

	mq.wllock.Lock()
	defer mq.wllock.Unlock()

	for _, c := range wantHaves {
		mq.peerWants.Add(c, mq.priority, pb.Message_Wantlist_Have)
		mq.priority--

		// We're adding a want-have for the cid, so clear any pending cancel
		// for the cid
		mq.cancels.Remove(c)
	}
	for _, c := range wantBlocks {
		mq.peerWants.Add(c, mq.priority, pb.Message_Wantlist_Block)
		mq.priority--

		// We're adding a want-block for the cid, so clear any pending cancel
		// for the cid
		mq.cancels.Remove(c)
	}

	// Schedule a message send
	mq.signalWorkReady()
}

// Add cancel messages for the given keys.
func (mq *MessageQueue) AddCancels(cancelKs []cid.Cid) {
	if len(cancelKs) == 0 {
		return
	}

248 249 250
	// Cancel any outstanding DONT_HAVE timers
	mq.dhTimeoutMgr.CancelPending(cancelKs)

dirkmc's avatar
dirkmc committed
251 252 253
	mq.wllock.Lock()
	defer mq.wllock.Unlock()

254
	// Remove keys from broadcast and peer wants, and add to cancels
dirkmc's avatar
dirkmc committed
255 256 257 258 259 260 261 262
	for _, c := range cancelKs {
		mq.bcstWants.Remove(c)
		mq.peerWants.Remove(c)
		mq.cancels.Add(c)
	}

	// Schedule a message send
	mq.signalWorkReady()
263 264 265 266 267 268
}

// SetRebroadcastInterval sets a new interval on which to rebroadcast the full wantlist
func (mq *MessageQueue) SetRebroadcastInterval(delay time.Duration) {
	mq.rebroadcastIntervalLk.Lock()
	mq.rebroadcastInterval = delay
269 270 271
	if mq.rebroadcastTimer != nil {
		mq.rebroadcastTimer.Reset(delay)
	}
272
	mq.rebroadcastIntervalLk.Unlock()
273
}
274

dirkmc's avatar
dirkmc committed
275
// Startup starts the processing of messages and rebroadcasting.
276
func (mq *MessageQueue) Startup() {
277 278 279
	mq.rebroadcastIntervalLk.RLock()
	mq.rebroadcastTimer = time.NewTimer(mq.rebroadcastInterval)
	mq.rebroadcastIntervalLk.RUnlock()
280
	go mq.runQueue()
281 282
}

283
// Shutdown stops the processing of messages for a message queue.
284 285 286
func (mq *MessageQueue) Shutdown() {
	close(mq.done)
}
287

288 289 290 291 292
func (mq *MessageQueue) onShutdown() {
	// Shut down the DONT_HAVE timeout manager
	mq.dhTimeoutMgr.Shutdown()
}

293
func (mq *MessageQueue) runQueue() {
294 295
	defer mq.onShutdown()

Steven Allen's avatar
Steven Allen committed
296 297 298
	// Create a timer for debouncing scheduled work.
	scheduleWork := time.NewTimer(0)
	if !scheduleWork.Stop() {
299 300
		// Need to drain the timer if Stop() returns false
		// See: https://golang.org/pkg/time/#Timer.Stop
Steven Allen's avatar
Steven Allen committed
301 302 303 304
		<-scheduleWork.C
	}

	var workScheduled time.Time
305 306
	for {
		select {
307 308
		case <-mq.rebroadcastTimer.C:
			mq.rebroadcastWantlist()
Steven Allen's avatar
Steven Allen committed
309 310 311 312 313 314 315 316
		case when := <-mq.outgoingWork:
			// If we have work scheduled, cancel the timer. If we
			// don't, record when the work was scheduled.
			// We send the time on the channel so we accurately
			// track delay.
			if workScheduled.IsZero() {
				workScheduled = when
			} else if !scheduleWork.Stop() {
317
				// Need to drain the timer if Stop() returns false
Steven Allen's avatar
Steven Allen committed
318 319 320 321 322
				<-scheduleWork.C
			}

			// If we have too many updates and/or we've waited too
			// long, send immediately.
323
			if mq.pendingWorkCount() > sendMessageCutoff ||
Steven Allen's avatar
Steven Allen committed
324 325 326 327 328 329 330 331 332 333 334
				time.Since(workScheduled) >= sendMessageMaxDelay {
				mq.sendIfReady()
				workScheduled = time.Time{}
			} else {
				// Otherwise, extend the timer.
				scheduleWork.Reset(sendMessageDebounce)
			}
		case <-scheduleWork.C:
			// We have work scheduled and haven't seen any updates
			// in sendMessageDebounce. Send immediately.
			workScheduled = time.Time{}
dirkmc's avatar
dirkmc committed
335
			mq.sendIfReady()
336 337 338 339 340
		case <-mq.done:
			if mq.sender != nil {
				mq.sender.Close()
			}
			return
341
		case <-mq.ctx.Done():
342
			if mq.sender != nil {
Steven Allen's avatar
Steven Allen committed
343
				_ = mq.sender.Reset()
344 345 346 347 348 349
			}
			return
		}
	}
}

dirkmc's avatar
dirkmc committed
350
// Periodically resend the list of wants to the peer
351 352 353 354 355
func (mq *MessageQueue) rebroadcastWantlist() {
	mq.rebroadcastIntervalLk.RLock()
	mq.rebroadcastTimer.Reset(mq.rebroadcastInterval)
	mq.rebroadcastIntervalLk.RUnlock()

dirkmc's avatar
dirkmc committed
356 357 358 359 360
	// If some wants were transferred from the rebroadcast list
	if mq.transferRebroadcastWants() {
		// Send them out
		mq.sendMessage()
	}
361 362
}

dirkmc's avatar
dirkmc committed
363 364 365 366
// Transfer wants from the rebroadcast lists into the pending lists.
func (mq *MessageQueue) transferRebroadcastWants() bool {
	mq.wllock.Lock()
	defer mq.wllock.Unlock()
367

dirkmc's avatar
dirkmc committed
368 369 370
	// Check if there are any wants to rebroadcast
	if mq.bcstWants.allWants.Len() == 0 && mq.peerWants.allWants.Len() == 0 {
		return false
371
	}
dirkmc's avatar
dirkmc committed
372 373 374 375 376 377

	// Copy all wants into pending wants lists
	mq.bcstWants.pending.Absorb(mq.bcstWants.allWants)
	mq.peerWants.pending.Absorb(mq.peerWants.allWants)

	return true
378 379
}

Steven Allen's avatar
Steven Allen committed
380
func (mq *MessageQueue) signalWorkReady() {
dirkmc's avatar
dirkmc committed
381
	select {
Steven Allen's avatar
Steven Allen committed
382
	case mq.outgoingWork <- time.Now():
dirkmc's avatar
dirkmc committed
383 384
	default:
	}
385 386
}

dirkmc's avatar
dirkmc committed
387 388 389
func (mq *MessageQueue) sendIfReady() {
	if mq.hasPendingWork() {
		mq.sendMessage()
390
	}
dirkmc's avatar
dirkmc committed
391
}
392

dirkmc's avatar
dirkmc committed
393
func (mq *MessageQueue) sendMessage() {
394
	err := mq.initializeSender()
395 396 397
	if err != nil {
		log.Infof("cant open message sender to peer %s: %s", mq.p, err)
		// TODO: cant connect, what now?
dirkmc's avatar
dirkmc committed
398 399
		// TODO: should we stop using this connection and clear the want list
		// to avoid using up memory?
400
		return
401 402
	}

403
	// Make sure the DONT_HAVE timeout manager has started
404 405
	// Note: Start is idempotent
	mq.dhTimeoutMgr.Start()
406

dirkmc's avatar
dirkmc committed
407
	// Convert want lists to a Bitswap Message
408 409 410 411 412 413
	message := mq.extractOutgoingMessage(mq.sender.SupportsHave())

	// After processing the message, clear out its fields to save memory
	defer mq.msg.Reset(false)

	if message.Empty() {
dirkmc's avatar
dirkmc committed
414 415 416
		return
	}

417 418
	wantlist := message.Wantlist()
	mq.logOutgoingMessage(wantlist)
dirkmc's avatar
dirkmc committed
419 420 421

	// Try to send this message repeatedly
	for i := 0; i < maxRetries; i++ {
422
		if mq.attemptSendAndRecovery(message) {
dirkmc's avatar
dirkmc committed
423
			// We were able to send successfully.
424
			mq.onMessageSent(wantlist)
dirkmc's avatar
dirkmc committed
425

426
			mq.simulateDontHaveWithTimeout(wantlist)
427

dirkmc's avatar
dirkmc committed
428 429 430 431 432 433 434
			// If the message was too big and only a subset of wants could be
			// sent, schedule sending the rest of the wants in the next
			// iteration of the event loop.
			if mq.hasPendingWork() {
				mq.signalWorkReady()
			}

435 436
			return
		}
437 438
	}
}
439

440 441 442 443
// If want-block times out, simulate a DONT_HAVE reponse.
// This is necessary when making requests to peers running an older version of
// Bitswap that doesn't support the DONT_HAVE response, and is also useful to
// mitigate getting blocked by a peer that takes a long time to respond.
444
func (mq *MessageQueue) simulateDontHaveWithTimeout(wantlist []bsmsg.Entry) {
445 446
	// Get the CID of each want-block that expects a DONT_HAVE response
	wants := make([]cid.Cid, 0, len(wantlist))
447 448 449

	mq.wllock.Lock()

450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466
	for _, entry := range wantlist {
		if entry.WantType == pb.Message_Wantlist_Block && entry.SendDontHave {
			// Unlikely, but just in case check that the block hasn't been
			// received in the interim
			c := entry.Cid
			if _, ok := mq.peerWants.allWants.Contains(c); ok {
				wants = append(wants, c)
			}
		}
	}

	mq.wllock.Unlock()

	// Add wants to DONT_HAVE timeout manager
	mq.dhTimeoutMgr.AddPending(wants)
}

467
func (mq *MessageQueue) logOutgoingMessage(wantlist []bsmsg.Entry) {
Dirk McCormick's avatar
Dirk McCormick committed
468 469 470 471 472
	// Save some CPU cycles and allocations if log level is higher than debug
	if ce := sflog.Check(zap.DebugLevel, "Bitswap -> send wants"); ce == nil {
		return
	}

Dirk McCormick's avatar
Dirk McCormick committed
473
	self := mq.network.Self()
474
	for _, e := range wantlist {
Dirk McCormick's avatar
Dirk McCormick committed
475 476 477 478 479 480 481 482 483 484 485 486 487 488 489
		if e.Cancel {
			if e.WantType == pb.Message_Wantlist_Have {
				log.Debugw("Bitswap -> cancel-have", "local", self, "to", mq.p, "cid", e.Cid)
			} else {
				log.Debugw("Bitswap -> cancel-block", "local", self, "to", mq.p, "cid", e.Cid)
			}
		} else {
			if e.WantType == pb.Message_Wantlist_Have {
				log.Debugw("Bitswap -> want-have", "local", self, "to", mq.p, "cid", e.Cid)
			} else {
				log.Debugw("Bitswap -> want-block", "local", self, "to", mq.p, "cid", e.Cid)
			}
		}
	}
}
dirkmc's avatar
dirkmc committed
490 491

func (mq *MessageQueue) hasPendingWork() bool {
Steven Allen's avatar
Steven Allen committed
492 493 494 495
	return mq.pendingWorkCount() > 0
}

func (mq *MessageQueue) pendingWorkCount() int {
dirkmc's avatar
dirkmc committed
496 497 498
	mq.wllock.Lock()
	defer mq.wllock.Unlock()

Steven Allen's avatar
Steven Allen committed
499
	return mq.bcstWants.pending.Len() + mq.peerWants.pending.Len() + mq.cancels.Len()
dirkmc's avatar
dirkmc committed
500 501
}

502
func (mq *MessageQueue) extractOutgoingMessage(supportsHave bool) bsmsg.BitSwapMessage {
503
	msg := mq.msg
dirkmc's avatar
dirkmc committed
504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553

	mq.wllock.Lock()
	defer mq.wllock.Unlock()

	// Get broadcast and regular wantlist entries
	bcstEntries := mq.bcstWants.pending.SortedEntries()
	peerEntries := mq.peerWants.pending.SortedEntries()

	// Size of the message so far
	msgSize := 0

	// Add each broadcast want-have to the message
	for i := 0; i < len(bcstEntries) && msgSize < mq.maxMessageSize; i++ {
		// Broadcast wants are sent as want-have
		wantType := pb.Message_Wantlist_Have

		// If the remote peer doesn't support HAVE / DONT_HAVE messages,
		// send a want-block instead
		if !supportsHave {
			wantType = pb.Message_Wantlist_Block
		}

		e := bcstEntries[i]
		msgSize += msg.AddEntry(e.Cid, e.Priority, wantType, false)
	}

	// Add each regular want-have / want-block to the message
	for i := 0; i < len(peerEntries) && msgSize < mq.maxMessageSize; i++ {
		e := peerEntries[i]
		// If the remote peer doesn't support HAVE / DONT_HAVE messages,
		// don't send want-haves (only send want-blocks)
		if !supportsHave && e.WantType == pb.Message_Wantlist_Have {
			mq.peerWants.RemoveType(e.Cid, pb.Message_Wantlist_Have)
		} else {
			msgSize += msg.AddEntry(e.Cid, e.Priority, e.WantType, true)
		}
	}

	// Add each cancel to the message
	cancels := mq.cancels.Keys()
	for i := 0; i < len(cancels) && msgSize < mq.maxMessageSize; i++ {
		c := cancels[i]

		msgSize += msg.Cancel(c)

		// Clear the cancel - we make a best effort to let peers know about
		// cancels but won't save them to resend if there's a failure.
		mq.cancels.Remove(c)
	}

554 555 556 557 558
	return msg
}

// Called when the message has been successfully sent.
func (mq *MessageQueue) onMessageSent(wantlist []bsmsg.Entry) {
dirkmc's avatar
dirkmc committed
559
	// Remove the sent keys from the broadcast and regular wantlists.
560 561
	mq.wllock.Lock()
	defer mq.wllock.Unlock()
dirkmc's avatar
dirkmc committed
562

563 564 565
	for _, e := range wantlist {
		mq.bcstWants.pending.Remove(e.Cid)
		mq.peerWants.pending.RemoveType(e.Cid, e.WantType)
dirkmc's avatar
dirkmc committed
566 567
	}
}
568

569
func (mq *MessageQueue) initializeSender() error {
570 571 572
	if mq.sender != nil {
		return nil
	}
573
	nsender, err := openSender(mq.ctx, mq.network, mq.p)
574 575 576 577 578 579
	if err != nil {
		return err
	}
	mq.sender = nsender
	return nil
}
580

581 582
func (mq *MessageQueue) attemptSendAndRecovery(message bsmsg.BitSwapMessage) bool {
	err := mq.sender.SendMsg(mq.ctx, message)
583 584 585
	if err == nil {
		return true
	}
586

587
	log.Infof("bitswap send error: %s", err)
Steven Allen's avatar
Steven Allen committed
588
	_ = mq.sender.Reset()
589 590 591 592 593
	mq.sender = nil

	select {
	case <-mq.done:
		return true
594
	case <-mq.ctx.Done():
595
		return true
dirkmc's avatar
dirkmc committed
596 597
	case <-time.After(mq.sendErrorBackoff):
		// wait 100ms in case disconnect notifications are still propagating
598
		log.Warn("SendMsg errored but neither 'done' nor context.Done() were set")
599
	}
600

601
	err = mq.initializeSender()
602 603 604
	if err != nil {
		log.Infof("couldnt open sender again after SendMsg(%s) failed: %s", mq.p, err)
		return true
605
	}
606 607 608 609 610 611 612 613 614 615 616 617

	// TODO: Is this the same instance for the remote peer?
	// If its not, we should resend our entire wantlist to them
	/*
		if mq.sender.InstanceID() != mq.lastSeenInstanceID {
			wlm = mq.getFullWantlistMessage()
		}
	*/
	return false
}

func openSender(ctx context.Context, network MessageNetwork, p peer.ID) (bsnet.MessageSender, error) {
618 619 620 621 622
	// allow ten minutes for connections this includes looking them up in the
	// dht dialing them, and handshaking
	conctx, cancel := context.WithTimeout(ctx, time.Minute*10)
	defer cancel()

623
	err := network.ConnectTo(conctx, p)
624
	if err != nil {
625
		return nil, err
626 627
	}

628
	nsender, err := network.NewMessageSender(ctx, p)
629
	if err != nil {
630
		return nil, err
631 632
	}

633
	return nsender, nil
634
}