messagequeue.go 18.7 KB
Newer Older
1 2 3 4
package messagequeue

import (
	"context"
dirkmc's avatar
dirkmc committed
5
	"math"
6
	"sync"
7 8 9
	"time"

	bsmsg "github.com/ipfs/go-bitswap/message"
dirkmc's avatar
dirkmc committed
10
	pb "github.com/ipfs/go-bitswap/message/pb"
11
	bsnet "github.com/ipfs/go-bitswap/network"
12
	"github.com/ipfs/go-bitswap/wantlist"
dirkmc's avatar
dirkmc committed
13 14
	bswl "github.com/ipfs/go-bitswap/wantlist"
	cid "github.com/ipfs/go-cid"
15
	logging "github.com/ipfs/go-log"
Raúl Kripalani's avatar
Raúl Kripalani committed
16
	peer "github.com/libp2p/go-libp2p-core/peer"
17
	"github.com/libp2p/go-libp2p/p2p/protocol/ping"
Dirk McCormick's avatar
Dirk McCormick committed
18
	"go.uber.org/zap"
19 20 21
)

var log = logging.Logger("bitswap")
Dirk McCormick's avatar
Dirk McCormick committed
22
var sflog = log.Desugar()
23

24 25
const (
	defaultRebroadcastInterval = 30 * time.Second
dirkmc's avatar
dirkmc committed
26 27 28 29 30 31 32 33 34 35 36 37
	// maxRetries is the number of times to attempt to send a message before
	// giving up
	maxRetries = 10
	// maxMessageSize is the maximum message size in bytes
	maxMessageSize = 1024 * 1024 * 2
	// sendErrorBackoff is the time to wait before retrying to connect after
	// an error when trying to send a message
	sendErrorBackoff = 100 * time.Millisecond
	// maxPriority is the max priority as defined by the bitswap protocol
	maxPriority = math.MaxInt32
	// sendMessageDebounce is the debounce duration when calling sendMessage()
	sendMessageDebounce = time.Millisecond
38 39
	// when we reach sendMessageCutoff wants/cancels, we'll send the message immediately.
	sendMessageCutoff = 256
Steven Allen's avatar
Steven Allen committed
40 41
	// when we debounce for more than sendMessageMaxDelay, we'll send the
	// message immediately.
42
	sendMessageMaxDelay = 20 * time.Millisecond
43
)
44

45
// MessageNetwork is any network that can connect peers and generate a message
46
// sender.
47 48 49
type MessageNetwork interface {
	ConnectTo(context.Context, peer.ID) error
	NewMessageSender(context.Context, peer.ID) (bsnet.MessageSender, error)
50 51
	Latency(peer.ID) time.Duration
	Ping(context.Context, peer.ID) ping.Result
52
	Self() peer.ID
53 54
}

55
// MessageQueue implements queue of want messages to send to peers.
56
type MessageQueue struct {
dirkmc's avatar
dirkmc committed
57 58 59
	ctx              context.Context
	p                peer.ID
	network          MessageNetwork
60
	dhTimeoutMgr     DontHaveTimeoutManager
dirkmc's avatar
dirkmc committed
61 62 63
	maxMessageSize   int
	sendErrorBackoff time.Duration

Steven Allen's avatar
Steven Allen committed
64 65
	outgoingWork chan time.Time
	done         chan struct{}
dirkmc's avatar
dirkmc committed
66 67 68 69 70 71

	// Take lock whenever any of these variables are modified
	wllock    sync.Mutex
	bcstWants recallWantlist
	peerWants recallWantlist
	cancels   *cid.Set
72
	priority  int32
dirkmc's avatar
dirkmc committed
73 74

	// Dont touch any of these variables outside of run loop
75 76 77 78
	sender                bsnet.MessageSender
	rebroadcastIntervalLk sync.RWMutex
	rebroadcastInterval   time.Duration
	rebroadcastTimer      *time.Timer
79 80 81
	// For performance reasons we just clear out the fields of the message
	// instead of creating a new one every time.
	msg bsmsg.BitSwapMessage
82 83
}

84
// recallWantlist keeps a list of pending wants and a list of sent wants
dirkmc's avatar
dirkmc committed
85 86 87
type recallWantlist struct {
	// The list of wants that have not yet been sent
	pending *bswl.Wantlist
88 89
	// The list of wants that have been sent
	sent *bswl.Wantlist
dirkmc's avatar
dirkmc committed
90 91 92 93
}

func newRecallWantList() recallWantlist {
	return recallWantlist{
94 95
		pending: bswl.New(),
		sent:    bswl.New(),
dirkmc's avatar
dirkmc committed
96 97 98
	}
}

99
// Add want to the pending list
100
func (r *recallWantlist) Add(c cid.Cid, priority int32, wtype pb.Message_Wantlist_WantType) {
dirkmc's avatar
dirkmc committed
101 102 103
	r.pending.Add(c, priority, wtype)
}

104
// Remove wants from both the pending list and the list of sent wants
dirkmc's avatar
dirkmc committed
105
func (r *recallWantlist) Remove(c cid.Cid) {
106
	r.sent.Remove(c)
dirkmc's avatar
dirkmc committed
107 108 109
	r.pending.Remove(c)
}

110
// Remove wants by type from both the pending list and the list of sent wants
dirkmc's avatar
dirkmc committed
111
func (r *recallWantlist) RemoveType(c cid.Cid, wtype pb.Message_Wantlist_WantType) {
112
	r.sent.RemoveType(c, wtype)
dirkmc's avatar
dirkmc committed
113 114 115
	r.pending.RemoveType(c, wtype)
}

116 117 118 119 120 121
// Sent moves the want from the pending to the sent list
func (r *recallWantlist) Sent(e bsmsg.Entry) {
	r.pending.RemoveType(e.Cid, e.WantType)
	r.sent.Add(e.Cid, e.Priority, e.WantType)
}

122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
type peerConn struct {
	p       peer.ID
	network MessageNetwork
}

func newPeerConnection(p peer.ID, network MessageNetwork) *peerConn {
	return &peerConn{p, network}
}

func (pc *peerConn) Ping(ctx context.Context) ping.Result {
	return pc.network.Ping(ctx, pc.p)
}

func (pc *peerConn) Latency() time.Duration {
	return pc.network.Latency(pc.p)
}

// Fires when a timeout occurs waiting for a response from a peer running an
// older version of Bitswap that doesn't support DONT_HAVE messages.
type OnDontHaveTimeout func(peer.ID, []cid.Cid)

// DontHaveTimeoutManager pings a peer to estimate latency so it can set a reasonable
// upper bound on when to consider a DONT_HAVE request as timed out (when connected to
// a peer that doesn't support DONT_HAVE messages)
type DontHaveTimeoutManager interface {
	// Start the manager (idempotent)
	Start()
	// Shutdown the manager (Shutdown is final, manager cannot be restarted)
	Shutdown()
	// AddPending adds the wants as pending a response. If the are not
	// cancelled before the timeout, the OnDontHaveTimeout method will be called.
	AddPending([]cid.Cid)
	// CancelPending removes the wants
	CancelPending([]cid.Cid)
}

// New creates a new MessageQueue.
func New(ctx context.Context, p peer.ID, network MessageNetwork, onDontHaveTimeout OnDontHaveTimeout) *MessageQueue {
	onTimeout := func(ks []cid.Cid) {
Dirk McCormick's avatar
Dirk McCormick committed
161
		log.Infow("Bitswap: timeout waiting for blocks", "cids", ks, "peer", p)
162 163
		onDontHaveTimeout(p, ks)
	}
164
	dhTimeoutMgr := newDontHaveTimeoutMgr(newPeerConnection(p, network), onTimeout)
165
	return newMessageQueue(ctx, p, network, maxMessageSize, sendErrorBackoff, dhTimeoutMgr)
dirkmc's avatar
dirkmc committed
166 167 168
}

// This constructor is used by the tests
169 170 171
func newMessageQueue(ctx context.Context, p peer.ID, network MessageNetwork,
	maxMsgSize int, sendErrorBackoff time.Duration, dhTimeoutMgr DontHaveTimeoutManager) *MessageQueue {

dirkmc's avatar
dirkmc committed
172
	mq := &MessageQueue{
173 174
		ctx:                 ctx,
		p:                   p,
dirkmc's avatar
dirkmc committed
175
		network:             network,
176
		dhTimeoutMgr:        dhTimeoutMgr,
dirkmc's avatar
dirkmc committed
177 178 179 180
		maxMessageSize:      maxMsgSize,
		bcstWants:           newRecallWantList(),
		peerWants:           newRecallWantList(),
		cancels:             cid.NewSet(),
Steven Allen's avatar
Steven Allen committed
181
		outgoingWork:        make(chan time.Time, 1),
182 183
		done:                make(chan struct{}),
		rebroadcastInterval: defaultRebroadcastInterval,
dirkmc's avatar
dirkmc committed
184 185
		sendErrorBackoff:    sendErrorBackoff,
		priority:            maxPriority,
186 187 188
		// For performance reasons we just clear out the fields of the message
		// after using it, instead of creating a new one every time.
		msg: bsmsg.New(false),
189
	}
dirkmc's avatar
dirkmc committed
190 191

	return mq
192 193
}

dirkmc's avatar
dirkmc committed
194 195 196
// Add want-haves that are part of a broadcast to all connected peers
func (mq *MessageQueue) AddBroadcastWantHaves(wantHaves []cid.Cid) {
	if len(wantHaves) == 0 {
197 198
		return
	}
dirkmc's avatar
dirkmc committed
199 200 201 202 203 204 205 206 207 208 209

	mq.wllock.Lock()
	defer mq.wllock.Unlock()

	for _, c := range wantHaves {
		mq.bcstWants.Add(c, mq.priority, pb.Message_Wantlist_Have)
		mq.priority--

		// We're adding a want-have for the cid, so clear any pending cancel
		// for the cid
		mq.cancels.Remove(c)
210
	}
dirkmc's avatar
dirkmc committed
211 212 213

	// Schedule a message send
	mq.signalWorkReady()
214 215
}

dirkmc's avatar
dirkmc committed
216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251
// Add want-haves and want-blocks for the peer for this message queue.
func (mq *MessageQueue) AddWants(wantBlocks []cid.Cid, wantHaves []cid.Cid) {
	if len(wantBlocks) == 0 && len(wantHaves) == 0 {
		return
	}

	mq.wllock.Lock()
	defer mq.wllock.Unlock()

	for _, c := range wantHaves {
		mq.peerWants.Add(c, mq.priority, pb.Message_Wantlist_Have)
		mq.priority--

		// We're adding a want-have for the cid, so clear any pending cancel
		// for the cid
		mq.cancels.Remove(c)
	}
	for _, c := range wantBlocks {
		mq.peerWants.Add(c, mq.priority, pb.Message_Wantlist_Block)
		mq.priority--

		// We're adding a want-block for the cid, so clear any pending cancel
		// for the cid
		mq.cancels.Remove(c)
	}

	// Schedule a message send
	mq.signalWorkReady()
}

// Add cancel messages for the given keys.
func (mq *MessageQueue) AddCancels(cancelKs []cid.Cid) {
	if len(cancelKs) == 0 {
		return
	}

252 253 254
	// Cancel any outstanding DONT_HAVE timers
	mq.dhTimeoutMgr.CancelPending(cancelKs)

dirkmc's avatar
dirkmc committed
255 256 257
	mq.wllock.Lock()
	defer mq.wllock.Unlock()

258 259
	workReady := false

260
	// Remove keys from broadcast and peer wants, and add to cancels
dirkmc's avatar
dirkmc committed
261
	for _, c := range cancelKs {
262 263 264 265 266
		// Check if a want for the key was sent
		_, wasSentBcst := mq.bcstWants.sent.Contains(c)
		_, wasSentPeer := mq.peerWants.sent.Contains(c)

		// Remove the want from tracking wantlists
dirkmc's avatar
dirkmc committed
267 268
		mq.bcstWants.Remove(c)
		mq.peerWants.Remove(c)
269 270 271 272 273 274

		// Only send a cancel if a want was sent
		if wasSentBcst || wasSentPeer {
			mq.cancels.Add(c)
			workReady = true
		}
dirkmc's avatar
dirkmc committed
275 276 277
	}

	// Schedule a message send
278 279 280
	if workReady {
		mq.signalWorkReady()
	}
281 282 283 284 285 286
}

// SetRebroadcastInterval sets a new interval on which to rebroadcast the full wantlist
func (mq *MessageQueue) SetRebroadcastInterval(delay time.Duration) {
	mq.rebroadcastIntervalLk.Lock()
	mq.rebroadcastInterval = delay
287 288 289
	if mq.rebroadcastTimer != nil {
		mq.rebroadcastTimer.Reset(delay)
	}
290
	mq.rebroadcastIntervalLk.Unlock()
291
}
292

dirkmc's avatar
dirkmc committed
293
// Startup starts the processing of messages and rebroadcasting.
294
func (mq *MessageQueue) Startup() {
295 296 297
	mq.rebroadcastIntervalLk.RLock()
	mq.rebroadcastTimer = time.NewTimer(mq.rebroadcastInterval)
	mq.rebroadcastIntervalLk.RUnlock()
298
	go mq.runQueue()
299 300
}

301
// Shutdown stops the processing of messages for a message queue.
302 303 304
func (mq *MessageQueue) Shutdown() {
	close(mq.done)
}
305

306 307 308 309 310
func (mq *MessageQueue) onShutdown() {
	// Shut down the DONT_HAVE timeout manager
	mq.dhTimeoutMgr.Shutdown()
}

311
func (mq *MessageQueue) runQueue() {
312 313
	defer mq.onShutdown()

Steven Allen's avatar
Steven Allen committed
314 315 316
	// Create a timer for debouncing scheduled work.
	scheduleWork := time.NewTimer(0)
	if !scheduleWork.Stop() {
317 318
		// Need to drain the timer if Stop() returns false
		// See: https://golang.org/pkg/time/#Timer.Stop
Steven Allen's avatar
Steven Allen committed
319 320 321 322
		<-scheduleWork.C
	}

	var workScheduled time.Time
323 324
	for {
		select {
325 326
		case <-mq.rebroadcastTimer.C:
			mq.rebroadcastWantlist()
Steven Allen's avatar
Steven Allen committed
327 328 329 330 331 332 333 334
		case when := <-mq.outgoingWork:
			// If we have work scheduled, cancel the timer. If we
			// don't, record when the work was scheduled.
			// We send the time on the channel so we accurately
			// track delay.
			if workScheduled.IsZero() {
				workScheduled = when
			} else if !scheduleWork.Stop() {
335
				// Need to drain the timer if Stop() returns false
Steven Allen's avatar
Steven Allen committed
336 337 338 339 340
				<-scheduleWork.C
			}

			// If we have too many updates and/or we've waited too
			// long, send immediately.
341
			if mq.pendingWorkCount() > sendMessageCutoff ||
Steven Allen's avatar
Steven Allen committed
342 343 344 345 346 347 348 349 350 351 352
				time.Since(workScheduled) >= sendMessageMaxDelay {
				mq.sendIfReady()
				workScheduled = time.Time{}
			} else {
				// Otherwise, extend the timer.
				scheduleWork.Reset(sendMessageDebounce)
			}
		case <-scheduleWork.C:
			// We have work scheduled and haven't seen any updates
			// in sendMessageDebounce. Send immediately.
			workScheduled = time.Time{}
dirkmc's avatar
dirkmc committed
353
			mq.sendIfReady()
354 355 356 357 358
		case <-mq.done:
			if mq.sender != nil {
				mq.sender.Close()
			}
			return
359
		case <-mq.ctx.Done():
360
			if mq.sender != nil {
Steven Allen's avatar
Steven Allen committed
361
				_ = mq.sender.Reset()
362 363 364 365 366 367
			}
			return
		}
	}
}

dirkmc's avatar
dirkmc committed
368
// Periodically resend the list of wants to the peer
369 370 371 372 373
func (mq *MessageQueue) rebroadcastWantlist() {
	mq.rebroadcastIntervalLk.RLock()
	mq.rebroadcastTimer.Reset(mq.rebroadcastInterval)
	mq.rebroadcastIntervalLk.RUnlock()

dirkmc's avatar
dirkmc committed
374 375 376 377 378
	// If some wants were transferred from the rebroadcast list
	if mq.transferRebroadcastWants() {
		// Send them out
		mq.sendMessage()
	}
379 380
}

dirkmc's avatar
dirkmc committed
381 382 383 384
// Transfer wants from the rebroadcast lists into the pending lists.
func (mq *MessageQueue) transferRebroadcastWants() bool {
	mq.wllock.Lock()
	defer mq.wllock.Unlock()
385

dirkmc's avatar
dirkmc committed
386
	// Check if there are any wants to rebroadcast
387
	if mq.bcstWants.sent.Len() == 0 && mq.peerWants.sent.Len() == 0 {
dirkmc's avatar
dirkmc committed
388
		return false
389
	}
dirkmc's avatar
dirkmc committed
390

391 392 393
	// Copy sent wants into pending wants lists
	mq.bcstWants.pending.Absorb(mq.bcstWants.sent)
	mq.peerWants.pending.Absorb(mq.peerWants.sent)
dirkmc's avatar
dirkmc committed
394 395

	return true
396 397
}

Steven Allen's avatar
Steven Allen committed
398
func (mq *MessageQueue) signalWorkReady() {
dirkmc's avatar
dirkmc committed
399
	select {
Steven Allen's avatar
Steven Allen committed
400
	case mq.outgoingWork <- time.Now():
dirkmc's avatar
dirkmc committed
401 402
	default:
	}
403 404
}

dirkmc's avatar
dirkmc committed
405 406 407
func (mq *MessageQueue) sendIfReady() {
	if mq.hasPendingWork() {
		mq.sendMessage()
408
	}
dirkmc's avatar
dirkmc committed
409
}
410

dirkmc's avatar
dirkmc committed
411
func (mq *MessageQueue) sendMessage() {
412
	err := mq.initializeSender()
413 414 415
	if err != nil {
		log.Infof("cant open message sender to peer %s: %s", mq.p, err)
		// TODO: cant connect, what now?
dirkmc's avatar
dirkmc committed
416 417
		// TODO: should we stop using this connection and clear the want list
		// to avoid using up memory?
418
		return
419 420
	}

421
	// Make sure the DONT_HAVE timeout manager has started
422 423
	// Note: Start is idempotent
	mq.dhTimeoutMgr.Start()
424

dirkmc's avatar
dirkmc committed
425
	// Convert want lists to a Bitswap Message
426
	message, onSent := mq.extractOutgoingMessage(mq.sender.SupportsHave())
427 428 429 430 431

	// After processing the message, clear out its fields to save memory
	defer mq.msg.Reset(false)

	if message.Empty() {
dirkmc's avatar
dirkmc committed
432 433 434
		return
	}

435 436
	wantlist := message.Wantlist()
	mq.logOutgoingMessage(wantlist)
dirkmc's avatar
dirkmc committed
437 438 439

	// Try to send this message repeatedly
	for i := 0; i < maxRetries; i++ {
440
		if mq.attemptSendAndRecovery(message) {
dirkmc's avatar
dirkmc committed
441
			// We were able to send successfully.
442
			onSent(wantlist)
dirkmc's avatar
dirkmc committed
443

444
			mq.simulateDontHaveWithTimeout(wantlist)
445

dirkmc's avatar
dirkmc committed
446 447 448 449 450 451 452
			// If the message was too big and only a subset of wants could be
			// sent, schedule sending the rest of the wants in the next
			// iteration of the event loop.
			if mq.hasPendingWork() {
				mq.signalWorkReady()
			}

453 454
			return
		}
455 456
	}
}
457

458 459 460 461
// If want-block times out, simulate a DONT_HAVE reponse.
// This is necessary when making requests to peers running an older version of
// Bitswap that doesn't support the DONT_HAVE response, and is also useful to
// mitigate getting blocked by a peer that takes a long time to respond.
462
func (mq *MessageQueue) simulateDontHaveWithTimeout(wantlist []bsmsg.Entry) {
463 464
	// Get the CID of each want-block that expects a DONT_HAVE response
	wants := make([]cid.Cid, 0, len(wantlist))
465 466 467

	mq.wllock.Lock()

468 469 470 471 472
	for _, entry := range wantlist {
		if entry.WantType == pb.Message_Wantlist_Block && entry.SendDontHave {
			// Unlikely, but just in case check that the block hasn't been
			// received in the interim
			c := entry.Cid
473
			if _, ok := mq.peerWants.sent.Contains(c); ok {
474 475 476 477 478 479 480 481 482 483 484
				wants = append(wants, c)
			}
		}
	}

	mq.wllock.Unlock()

	// Add wants to DONT_HAVE timeout manager
	mq.dhTimeoutMgr.AddPending(wants)
}

485
func (mq *MessageQueue) logOutgoingMessage(wantlist []bsmsg.Entry) {
Dirk McCormick's avatar
Dirk McCormick committed
486
	// Save some CPU cycles and allocations if log level is higher than debug
Steven Allen's avatar
Steven Allen committed
487
	if ce := sflog.Check(zap.DebugLevel, "sent message"); ce == nil {
Dirk McCormick's avatar
Dirk McCormick committed
488 489 490
		return
	}

Dirk McCormick's avatar
Dirk McCormick committed
491
	self := mq.network.Self()
492
	for _, e := range wantlist {
Dirk McCormick's avatar
Dirk McCormick committed
493 494
		if e.Cancel {
			if e.WantType == pb.Message_Wantlist_Have {
Steven Allen's avatar
Steven Allen committed
495 496 497 498 499 500
				log.Debugw("sent message",
					"type", "CANCEL_WANT_HAVE",
					"cid", e.Cid,
					"local", self,
					"to", mq.p,
				)
Dirk McCormick's avatar
Dirk McCormick committed
501
			} else {
Steven Allen's avatar
Steven Allen committed
502 503 504 505 506 507
				log.Debugw("sent message",
					"type", "CANCEL_WANT_BLOCK",
					"cid", e.Cid,
					"local", self,
					"to", mq.p,
				)
Dirk McCormick's avatar
Dirk McCormick committed
508 509 510
			}
		} else {
			if e.WantType == pb.Message_Wantlist_Have {
Steven Allen's avatar
Steven Allen committed
511 512 513 514 515 516
				log.Debugw("sent message",
					"type", "WANT_HAVE",
					"cid", e.Cid,
					"local", self,
					"to", mq.p,
				)
Dirk McCormick's avatar
Dirk McCormick committed
517
			} else {
Steven Allen's avatar
Steven Allen committed
518 519 520 521 522 523
				log.Debugw("sent message",
					"type", "WANT_BLOCK",
					"cid", e.Cid,
					"local", self,
					"to", mq.p,
				)
Dirk McCormick's avatar
Dirk McCormick committed
524 525 526 527
			}
		}
	}
}
dirkmc's avatar
dirkmc committed
528

529
// Whether there is work to be processed
dirkmc's avatar
dirkmc committed
530
func (mq *MessageQueue) hasPendingWork() bool {
Steven Allen's avatar
Steven Allen committed
531 532 533
	return mq.pendingWorkCount() > 0
}

534
// The amount of work that is waiting to be processed
Steven Allen's avatar
Steven Allen committed
535
func (mq *MessageQueue) pendingWorkCount() int {
dirkmc's avatar
dirkmc committed
536 537 538
	mq.wllock.Lock()
	defer mq.wllock.Unlock()

Steven Allen's avatar
Steven Allen committed
539
	return mq.bcstWants.pending.Len() + mq.peerWants.pending.Len() + mq.cancels.Len()
dirkmc's avatar
dirkmc committed
540 541
}

542
// Convert the lists of wants into a Bitswap message
543
func (mq *MessageQueue) extractOutgoingMessage(supportsHave bool) (bsmsg.BitSwapMessage, func([]bsmsg.Entry)) {
dirkmc's avatar
dirkmc committed
544 545 546 547 548 549 550 551 552 553
	mq.wllock.Lock()
	defer mq.wllock.Unlock()

	// Get broadcast and regular wantlist entries
	bcstEntries := mq.bcstWants.pending.SortedEntries()
	peerEntries := mq.peerWants.pending.SortedEntries()

	// Size of the message so far
	msgSize := 0

554
	// Always prioritize cancels, then targeted, then broadcast.
dirkmc's avatar
dirkmc committed
555

556 557 558 559
	// Add each cancel to the message
	cancels := mq.cancels.Keys()
	for i := 0; i < len(cancels) && msgSize < mq.maxMessageSize; i++ {
		c := cancels[i]
dirkmc's avatar
dirkmc committed
560

561 562 563 564 565
		msgSize += mq.msg.Cancel(c)

		// Clear the cancel - we make a best effort to let peers know about
		// cancels but won't save them to resend if there's a failure.
		mq.cancels.Remove(c)
dirkmc's avatar
dirkmc committed
566 567 568 569 570 571 572 573 574 575
	}

	// Add each regular want-have / want-block to the message
	for i := 0; i < len(peerEntries) && msgSize < mq.maxMessageSize; i++ {
		e := peerEntries[i]
		// If the remote peer doesn't support HAVE / DONT_HAVE messages,
		// don't send want-haves (only send want-blocks)
		if !supportsHave && e.WantType == pb.Message_Wantlist_Have {
			mq.peerWants.RemoveType(e.Cid, pb.Message_Wantlist_Have)
		} else {
576
			msgSize += mq.msg.AddEntry(e.Cid, e.Priority, e.WantType, true)
dirkmc's avatar
dirkmc committed
577 578 579
		}
	}

580 581 582 583
	// Add each broadcast want-have to the message
	for i := 0; i < len(bcstEntries) && msgSize < mq.maxMessageSize; i++ {
		// Broadcast wants are sent as want-have
		wantType := pb.Message_Wantlist_Have
dirkmc's avatar
dirkmc committed
584

585 586 587 588 589
		// If the remote peer doesn't support HAVE / DONT_HAVE messages,
		// send a want-block instead
		if !supportsHave {
			wantType = pb.Message_Wantlist_Block
		}
dirkmc's avatar
dirkmc committed
590

591 592
		e := bcstEntries[i]
		msgSize += mq.msg.AddEntry(e.Cid, e.Priority, wantType, false)
dirkmc's avatar
dirkmc committed
593 594
	}

595 596 597 598
	// Called when the message has been successfully sent.
	onMessageSent := func(wantlist []bsmsg.Entry) {
		bcst := keysToSet(bcstEntries)
		prws := keysToSet(peerEntries)
599

600 601
		mq.wllock.Lock()
		defer mq.wllock.Unlock()
dirkmc's avatar
dirkmc committed
602

603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621
		// Move the keys from pending to sent
		for _, e := range wantlist {
			if _, ok := bcst[e.Cid]; ok {
				mq.bcstWants.Sent(e)
			}
			if _, ok := prws[e.Cid]; ok {
				mq.peerWants.Sent(e)
			}
		}
	}

	return mq.msg, onMessageSent
}

// Convert wantlist entries into a set of cids
func keysToSet(wl []wantlist.Entry) map[cid.Cid]struct{} {
	set := make(map[cid.Cid]struct{}, len(wl))
	for _, e := range wl {
		set[e.Cid] = struct{}{}
dirkmc's avatar
dirkmc committed
622
	}
623
	return set
dirkmc's avatar
dirkmc committed
624
}
625

626
func (mq *MessageQueue) initializeSender() error {
627 628 629
	if mq.sender != nil {
		return nil
	}
630
	nsender, err := openSender(mq.ctx, mq.network, mq.p)
631 632 633 634 635 636
	if err != nil {
		return err
	}
	mq.sender = nsender
	return nil
}
637

638 639
func (mq *MessageQueue) attemptSendAndRecovery(message bsmsg.BitSwapMessage) bool {
	err := mq.sender.SendMsg(mq.ctx, message)
640 641 642
	if err == nil {
		return true
	}
643

644
	log.Infof("bitswap send error: %s", err)
Steven Allen's avatar
Steven Allen committed
645
	_ = mq.sender.Reset()
646 647 648 649 650
	mq.sender = nil

	select {
	case <-mq.done:
		return true
651
	case <-mq.ctx.Done():
652
		return true
dirkmc's avatar
dirkmc committed
653 654
	case <-time.After(mq.sendErrorBackoff):
		// wait 100ms in case disconnect notifications are still propagating
655
		log.Warn("SendMsg errored but neither 'done' nor context.Done() were set")
656
	}
657

658
	err = mq.initializeSender()
659 660 661
	if err != nil {
		log.Infof("couldnt open sender again after SendMsg(%s) failed: %s", mq.p, err)
		return true
662
	}
663 664 665 666 667 668 669 670 671 672 673 674

	// TODO: Is this the same instance for the remote peer?
	// If its not, we should resend our entire wantlist to them
	/*
		if mq.sender.InstanceID() != mq.lastSeenInstanceID {
			wlm = mq.getFullWantlistMessage()
		}
	*/
	return false
}

func openSender(ctx context.Context, network MessageNetwork, p peer.ID) (bsnet.MessageSender, error) {
675 676 677 678 679
	// allow ten minutes for connections this includes looking them up in the
	// dht dialing them, and handshaking
	conctx, cancel := context.WithTimeout(ctx, time.Minute*10)
	defer cancel()

680
	err := network.ConnectTo(conctx, p)
681
	if err != nil {
682
		return nil, err
683 684
	}

685
	nsender, err := network.NewMessageSender(ctx, p)
686
	if err != nil {
687
		return nil, err
688 689
	}

690
	return nsender, nil
691
}