query.go 10.5 KB
Newer Older
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
1 2 3
package dht

import (
Jeromy's avatar
Jeromy committed
4
	"context"
Steven Allen's avatar
Steven Allen committed
5
	"errors"
Adin Schmahmann's avatar
Adin Schmahmann committed
6 7
	"fmt"

8 9
	"github.com/libp2p/go-libp2p-core/network"
	"github.com/libp2p/go-libp2p-core/peer"
10
	pstore "github.com/libp2p/go-libp2p-core/peerstore"
Adin Schmahmann's avatar
Adin Schmahmann committed
11
	"github.com/libp2p/go-libp2p-core/routing"
12

Adin Schmahmann's avatar
Adin Schmahmann committed
13
	"github.com/libp2p/go-libp2p-kad-dht/qpeerset"
14
	kb "github.com/libp2p/go-libp2p-kbucket"
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
15 16
)

Steven Allen's avatar
Steven Allen committed
17 18 19
// ErrNoPeersQueried is returned when we failed to connect to any peers.
var ErrNoPeersQueried = errors.New("failed to query any peers")

Adin Schmahmann's avatar
Adin Schmahmann committed
20
type queryFn func(context.Context, peer.ID) ([]*peer.AddrInfo, error)
Adin Schmahmann's avatar
Adin Schmahmann committed
21
type stopFn func() bool
22

Aarsh Shah's avatar
Aarsh Shah committed
23
// query represents a single DHT query.
Adin Schmahmann's avatar
Adin Schmahmann committed
24
type query struct {
25 26
	// the query context.
	ctx context.Context
Adin Schmahmann's avatar
Adin Schmahmann committed
27

28
	// the cancellation function for the query context.
Adin Schmahmann's avatar
Adin Schmahmann committed
29
	cancel context.CancelFunc
30

Adin Schmahmann's avatar
Adin Schmahmann committed
31
	dht *IpfsDHT
Jeromy's avatar
Jeromy committed
32

Adin Schmahmann's avatar
Adin Schmahmann committed
33 34 35 36 37 38 39 40 41
	// seedPeers is the set of peers that seed the query
	seedPeers []peer.ID

	// queryPeers is the set of peers known by this query and their respective states.
	queryPeers *qpeerset.QueryPeerset

	// terminated is set when the first worker thread encounters the termination condition.
	// Its role is to make sure that once termination is determined, it is sticky.
	terminated bool
42 43 44 45 46 47

	// the function that will be used to query a single peer.
	queryFn queryFn

	// stopFn is used to determine if we should stop the WHOLE disjoint query.
	stopFn stopFn
48 49
}

Adin Schmahmann's avatar
Adin Schmahmann committed
50
type lookupWithFollowupResult struct {
Aarsh Shah's avatar
Aarsh Shah committed
51 52
	peers []peer.ID            // the top K not unreachable peers at the end of the query
	state []qpeerset.PeerState // the peer states at the end of the query
Adin Schmahmann's avatar
Adin Schmahmann committed
53

Adin Schmahmann's avatar
Adin Schmahmann committed
54 55 56 57 58 59 60 61 62 63 64 65
	// indicates that neither the lookup nor the followup has been prematurely terminated by an external condition such
	// as context cancellation or the stop function being called.
	completed bool
}

// runLookupWithFollowup executes the lookup on the target using the given query function and stopping when either the
// context is cancelled or the stop function returns true. Note: if the stop function is not sticky, i.e. it does not
// return true every time after the first time it returns true, it is not guaranteed to cause a stop to occur just
// because it momentarily returns true.
//
// After the lookup is complete the query function is run (unless stopped) against all of the top K peers from the
// lookup that have not already been successfully queried.
Aarsh Shah's avatar
Aarsh Shah committed
66
func (dht *IpfsDHT) runLookupWithFollowup(ctx context.Context, target string, queryFn queryFn, stopFn stopFn) (*lookupWithFollowupResult, error) {
Adin Schmahmann's avatar
Adin Schmahmann committed
67
	// run the query
Aarsh Shah's avatar
Aarsh Shah committed
68
	lookupRes, err := dht.runQuery(ctx, target, queryFn, stopFn)
Adin Schmahmann's avatar
Adin Schmahmann committed
69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
	if err != nil {
		return nil, err
	}

	// query all of the top K peers we've either Heard about or have outstanding queries we're Waiting on.
	// This ensures that all of the top K results have been queried which adds to resiliency against churn for query
	// functions that carry state (e.g. FindProviders and GetValue) as well as establish connections that are needed
	// by stateless query functions (e.g. GetClosestPeers and therefore Provide and PutValue)
	queryPeers := make([]peer.ID, 0, len(lookupRes.peers))
	for i, p := range lookupRes.peers {
		if state := lookupRes.state[i]; state == qpeerset.PeerHeard || state == qpeerset.PeerWaiting {
			queryPeers = append(queryPeers, p)
		}
	}

	if len(queryPeers) == 0 {
		return lookupRes, nil
	}

	// return if the lookup has been externally stopped
	if ctx.Err() != nil || stopFn() {
		lookupRes.completed = false
		return lookupRes, nil
	}

	doneCh := make(chan struct{}, len(queryPeers))
	followUpCtx, cancelFollowUp := context.WithCancel(ctx)
	for _, p := range queryPeers {
		qp := p
		go func() {
			_, _ = queryFn(followUpCtx, qp)
			doneCh <- struct{}{}
		}()
	}

	// wait for all queries to complete before returning, aborting ongoing queries if we've been externally stopped
processFollowUp:
	for i := 0; i < len(queryPeers); i++ {
		select {
		case <-doneCh:
			if stopFn() {
				cancelFollowUp()
				if i < len(queryPeers)-1 {
					lookupRes.completed = false
				}
				break processFollowUp
			}
		case <-ctx.Done():
			lookupRes.completed = false
			break processFollowUp
		}
	}

	return lookupRes, nil
}

Aarsh Shah's avatar
Aarsh Shah committed
125
func (dht *IpfsDHT) runQuery(ctx context.Context, target string, queryFn queryFn, stopFn stopFn) (*lookupWithFollowupResult, error) {
Adin Schmahmann's avatar
Adin Schmahmann committed
126
	queryCtx, cancelQuery := context.WithCancel(ctx)
Adin Schmahmann's avatar
Adin Schmahmann committed
127

128
	// pick the K closest peers to the key in our Routing table and shuffle them.
Adin Schmahmann's avatar
Adin Schmahmann committed
129 130
	targetKadID := kb.ConvertKey(target)
	seedPeers := dht.routingTable.NearestPeers(targetKadID, dht.bucketSize)
131 132 133 134 135 136 137
	if len(seedPeers) == 0 {
		routing.PublishQueryEvent(ctx, &routing.QueryEvent{
			Type:  routing.QueryError,
			Extra: kb.ErrLookupFailure.Error(),
		})
		return nil, kb.ErrLookupFailure
	}
Adin Schmahmann's avatar
Adin Schmahmann committed
138

Aarsh Shah's avatar
Aarsh Shah committed
139 140 141 142 143 144 145 146 147
	q := &query{
		ctx:        queryCtx,
		cancel:     cancelQuery,
		dht:        dht,
		queryPeers: qpeerset.NewQueryPeerset(target),
		seedPeers:  seedPeers,
		terminated: false,
		queryFn:    queryFn,
		stopFn:     stopFn,
148 149
	}

Aarsh Shah's avatar
Aarsh Shah committed
150 151
	// run the query
	q.runWithGreedyParallelism()
152

Aarsh Shah's avatar
Aarsh Shah committed
153
	res := q.constructLookupResult(targetKadID)
Adin Schmahmann's avatar
Adin Schmahmann committed
154
	return res, nil
155 156
}

Adin Schmahmann's avatar
Adin Schmahmann committed
157
// constructLookupResult takes the query information and uses it to construct the lookup result
Aarsh Shah's avatar
Aarsh Shah committed
158 159
func (q *query) constructLookupResult(target kb.ID) *lookupWithFollowupResult {
	// determine if the query terminated early
Adin Schmahmann's avatar
Adin Schmahmann committed
160
	completed := true
Aarsh Shah's avatar
Aarsh Shah committed
161 162 163

	if !(q.isLookupTermination()) {
		completed = false
Adin Schmahmann's avatar
Adin Schmahmann committed
164
	}
165

Aarsh Shah's avatar
Aarsh Shah committed
166
	// extract the top K not unreachable peers
Adin Schmahmann's avatar
Adin Schmahmann committed
167 168
	var peers []peer.ID
	peerState := make(map[peer.ID]qpeerset.PeerState)
Aarsh Shah's avatar
Aarsh Shah committed
169 170 171 172 173
	qp := q.queryPeers.GetClosestNotUnreachable(q.dht.bucketSize)
	for _, p := range qp {
		state := q.queryPeers.GetState(p)
		peerState[p] = state
		peers = append(peers, p)
174 175
	}

Adin Schmahmann's avatar
Adin Schmahmann committed
176 177
	// get the top K overall peers
	sortedPeers := kb.SortClosestPeers(peers, target)
Aarsh Shah's avatar
Aarsh Shah committed
178 179
	if len(sortedPeers) > q.dht.bucketSize {
		sortedPeers = sortedPeers[:q.dht.bucketSize]
180 181
	}

Aarsh Shah's avatar
Aarsh Shah committed
182
	// return the top K not unreachable peers as well as their states at the end of the query
Adin Schmahmann's avatar
Adin Schmahmann committed
183 184 185 186 187 188 189 190 191
	res := &lookupWithFollowupResult{
		peers:     sortedPeers,
		state:     make([]qpeerset.PeerState, len(sortedPeers)),
		completed: completed,
	}

	for i, p := range sortedPeers {
		res.state[i] = peerState[p]
	}
192 193

	return res
Adin Schmahmann's avatar
Adin Schmahmann committed
194
}
195

Adin Schmahmann's avatar
Adin Schmahmann committed
196 197 198 199 200
type queryUpdate struct {
	seen        []peer.ID
	queried     []peer.ID
	unreachable []peer.ID
}
201

Adin Schmahmann's avatar
Adin Schmahmann committed
202 203 204
func (q *query) runWithGreedyParallelism() {
	pathCtx, cancelPath := context.WithCancel(q.ctx)
	defer cancelPath()
205

Adin Schmahmann's avatar
Adin Schmahmann committed
206
	alpha := q.dht.alpha
207

Adin Schmahmann's avatar
Adin Schmahmann committed
208 209
	ch := make(chan *queryUpdate, alpha)
	ch <- &queryUpdate{seen: q.seedPeers}
210

Adin Schmahmann's avatar
Adin Schmahmann committed
211 212 213 214 215 216
	for {
		select {
		case update := <-ch:
			q.updateState(update)
		case <-pathCtx.Done():
			q.terminate()
Adin Schmahmann's avatar
Adin Schmahmann committed
217 218
		}

Adin Schmahmann's avatar
Adin Schmahmann committed
219 220 221 222 223 224
		// termination is triggered on end-of-lookup conditions or starvation of unused peers
		if q.readyToTerminate() {
			q.terminate()

			// exit once all goroutines have been cleaned up
			if q.queryPeers.NumWaiting() == 0 {
Adin Schmahmann's avatar
Adin Schmahmann committed
225 226
				return
			}
Adin Schmahmann's avatar
Adin Schmahmann committed
227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278
			continue
		}

		// if all "threads" are busy, wait until someone finishes
		if q.queryPeers.NumWaiting() >= alpha {
			continue
		}

		// spawn new queries, up to the parallelism allowance
		for j := 0; j < alpha-q.queryPeers.NumWaiting(); j++ {
			q.spawnQuery(ch)
		}
	}
}

// spawnQuery starts one query, if an available seen peer is found
func (q *query) spawnQuery(ch chan<- *queryUpdate) {
	if peers := q.queryPeers.GetSortedHeard(); len(peers) == 0 {
		return
	} else {
		q.queryPeers.SetState(peers[0], qpeerset.PeerWaiting)
		go q.queryPeer(ch, peers[0])
	}
}

func (q *query) readyToTerminate() bool {
	// if termination has already been determined, the query is considered terminated forever,
	// regardless of any change to queryPeers that might occur after the initial termination.
	if q.terminated {
		return true
	}
	// give the application logic a chance to terminate
	if q.stopFn() {
		return true
	}
	if q.isStarvationTermination() {
		return true
	}
	if q.isLookupTermination() {
		return true
	}
	return false
}

// From the set of all nodes that are not unreachable,
// if the closest beta nodes are all queried, the lookup can terminate.
func (q *query) isLookupTermination() bool {
	var peers []peer.ID
	peers = q.queryPeers.GetClosestNotUnreachable(q.dht.beta)
	for _, p := range peers {
		if q.queryPeers.GetState(p) != qpeerset.PeerQueried {
			return false
Adin Schmahmann's avatar
Adin Schmahmann committed
279
		}
280
	}
Adin Schmahmann's avatar
Adin Schmahmann committed
281 282 283 284 285
	return true
}

func (q *query) isStarvationTermination() bool {
	return q.queryPeers.NumHeard() == 0 && q.queryPeers.NumWaiting() == 0
286 287
}

Adin Schmahmann's avatar
Adin Schmahmann committed
288 289
func (q *query) terminate() {
	q.terminated = true
290 291
}

Adin Schmahmann's avatar
Adin Schmahmann committed
292 293 294
// queryPeer queries a single peer and reports its findings on the channel.
// queryPeer does not access the query state in queryPeers!
func (q *query) queryPeer(ch chan<- *queryUpdate, p peer.ID) {
295
	dialCtx, queryCtx := q.ctx, q.ctx
Adin Schmahmann's avatar
Adin Schmahmann committed
296

297
	// dial the peer
Adin Schmahmann's avatar
Adin Schmahmann committed
298
	if err := q.dht.dialPeer(dialCtx, p); err != nil {
Adin Schmahmann's avatar
Adin Schmahmann committed
299 300
		ch <- &queryUpdate{unreachable: []peer.ID{p}}
		return
Adin Schmahmann's avatar
Adin Schmahmann committed
301
	}
302 303

	// send query RPC to the remote peer
Adin Schmahmann's avatar
Adin Schmahmann committed
304 305
	newPeers, err := q.queryFn(queryCtx, p)
	if err != nil {
Adin Schmahmann's avatar
Adin Schmahmann committed
306 307
		ch <- &queryUpdate{unreachable: []peer.ID{p}}
		return
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
308
	}
309

Adin Schmahmann's avatar
Adin Schmahmann committed
310 311
	// process new peers
	saw := []peer.ID{}
Adin Schmahmann's avatar
Adin Schmahmann committed
312 313 314 315 316
	for _, next := range newPeers {
		if next.ID == q.dht.self { // don't add self.
			logger.Debugf("PEERS CLOSER -- worker for: %v found self", p)
			continue
		}
317

Adin Schmahmann's avatar
Adin Schmahmann committed
318 319
		// add their addresses to the dialer's peerstore
		q.dht.peerstore.AddAddrs(next.ID, next.Addrs, pstore.TempAddrTTL)
Adin Schmahmann's avatar
Adin Schmahmann committed
320
		saw = append(saw, next.ID)
321
	}
Adin Schmahmann's avatar
Adin Schmahmann committed
322

Adin Schmahmann's avatar
Adin Schmahmann committed
323 324
	ch <- &queryUpdate{seen: saw, queried: []peer.ID{p}}
}
325

Adin Schmahmann's avatar
Adin Schmahmann committed
326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351
func (q *query) updateState(up *queryUpdate) {
	for _, p := range up.seen {
		if p == q.dht.self { // don't add self.
			continue
		}
		q.queryPeers.TryAdd(p)
	}
	for _, p := range up.queried {
		if p == q.dht.self { // don't add self.
			continue
		}
		if st := q.queryPeers.GetState(p); st == qpeerset.PeerWaiting {
			q.queryPeers.SetState(p, qpeerset.PeerQueried)
		} else {
			panic(fmt.Errorf("kademlia protocol error: tried to transition to the queried state from state %v", st))
		}
	}
	for _, p := range up.unreachable {
		if p == q.dht.self { // don't add self.
			continue
		}
		if st := q.queryPeers.GetState(p); st == qpeerset.PeerWaiting {
			q.queryPeers.SetState(p, qpeerset.PeerUnreachable)
		} else {
			panic(fmt.Errorf("kademlia protocol error: tried to transition to the unreachable state from state %v", st))
		}
352
	}
353
}
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
354

Adin Schmahmann's avatar
Adin Schmahmann committed
355
func (dht *IpfsDHT) dialPeer(ctx context.Context, p peer.ID) error {
356
	// short-circuit if we're already connected.
Adin Schmahmann's avatar
Adin Schmahmann committed
357
	if dht.host.Network().Connectedness(p) == network.Connected {
358 359 360
		return nil
	}

Matt Joiner's avatar
Matt Joiner committed
361
	logger.Debug("not connected. dialing.")
Adin Schmahmann's avatar
Adin Schmahmann committed
362
	routing.PublishQueryEvent(ctx, &routing.QueryEvent{
363
		Type: routing.DialingPeer,
364 365 366
		ID:   p,
	})

367
	pi := peer.AddrInfo{ID: p}
Adin Schmahmann's avatar
Adin Schmahmann committed
368
	if err := dht.host.Connect(ctx, pi); err != nil {
Matt Joiner's avatar
Matt Joiner committed
369
		logger.Debugf("error connecting: %s", err)
Adin Schmahmann's avatar
Adin Schmahmann committed
370
		routing.PublishQueryEvent(ctx, &routing.QueryEvent{
371
			Type:  routing.QueryError,
372 373 374 375 376 377
			Extra: err.Error(),
			ID:    p,
		})

		return err
	}
Matt Joiner's avatar
Matt Joiner committed
378
	logger.Debugf("connected. dial success.")
379 380
	return nil
}