package dht import ( "context" "errors" "sync" logging "github.com/ipfs/go-log" todoctr "github.com/ipfs/go-todocounter" process "github.com/jbenet/goprocess" ctxproc "github.com/jbenet/goprocess/context" kb "github.com/libp2p/go-libp2p-kbucket" inet "github.com/libp2p/go-libp2p-net" peer "github.com/libp2p/go-libp2p-peer" pset "github.com/libp2p/go-libp2p-peer/peerset" pstore "github.com/libp2p/go-libp2p-peerstore" queue "github.com/libp2p/go-libp2p-peerstore/queue" routing "github.com/libp2p/go-libp2p-routing" notif "github.com/libp2p/go-libp2p-routing/notifications" ) // ErrNoPeersQueried is returned when we failed to connect to any peers. var ErrNoPeersQueried = errors.New("failed to query any peers") var maxQueryConcurrency = AlphaValue type dhtQuery struct { dht *IpfsDHT key string // the key we're querying for qfunc queryFunc // the function to execute per peer concurrency int // the concurrency parameter } type dhtQueryResult struct { peer *pstore.PeerInfo // FindPeer closerPeers []*pstore.PeerInfo // * success bool finalSet *pset.PeerSet queriedSet *pset.PeerSet } // constructs query func (dht *IpfsDHT) newQuery(k string, f queryFunc) *dhtQuery { return &dhtQuery{ key: k, dht: dht, qfunc: f, concurrency: maxQueryConcurrency, } } // QueryFunc is a function that runs a particular query with a given peer. // It returns either: // - the value // - a list of peers potentially better able to serve the query // - an error type queryFunc func(context.Context, peer.ID) (*dhtQueryResult, error) // Run runs the query at hand. pass in a list of peers to use first. func (q *dhtQuery) Run(ctx context.Context, peers []peer.ID) (*dhtQueryResult, error) { if len(peers) == 0 { logger.Warning("Running query with no peers!") return nil, kb.ErrLookupFailure } select { case <-ctx.Done(): return nil, ctx.Err() default: } ctx, cancel := context.WithCancel(ctx) defer cancel() runner := newQueryRunner(q) return runner.Run(ctx, peers) } type dhtQueryRunner struct { query *dhtQuery // query to run peersSeen *pset.PeerSet // all peers queried. prevent querying same peer 2x peersQueried *pset.PeerSet // peers successfully connected to and queried peersDialed *dialQueue // peers we have dialed to peersToQuery *queue.ChanQueue // peers remaining to be queried peersRemaining todoctr.Counter // peersToQuery + currently processing result *dhtQueryResult // query result rateLimit chan struct{} // processing semaphore log logging.EventLogger runCtx context.Context proc process.Process sync.RWMutex } func newQueryRunner(q *dhtQuery) *dhtQueryRunner { proc := process.WithParent(process.Background()) ctx := ctxproc.OnClosingContext(proc) peersToQuery := queue.NewChanQueue(ctx, queue.NewXORDistancePQ(string(q.key))) r := &dhtQueryRunner{ query: q, peersRemaining: todoctr.NewSyncCounter(), peersSeen: pset.New(), peersQueried: pset.New(), rateLimit: make(chan struct{}, q.concurrency), peersToQuery: peersToQuery, proc: proc, } dq, err := newDialQueue(&dqParams{ ctx: ctx, target: q.key, in: peersToQuery, dialFn: r.dialPeer, config: dqDefaultConfig(), }) if err != nil { panic(err) } r.peersDialed = dq return r } func (r *dhtQueryRunner) Run(ctx context.Context, peers []peer.ID) (*dhtQueryResult, error) { r.log = logger r.runCtx = ctx // setup concurrency rate limiting for i := 0; i < r.query.concurrency; i++ { r.rateLimit <- struct{}{} } // add all the peers we got first. for _, p := range peers { r.addPeerToQuery(p) } // start the dial queue only after we've added the initial set of peers. // this is to avoid race conditions that could cause the peersRemaining todoctr // to be done too early if the initial dial fails before others make it into the queue. r.peersDialed.Start() // go do this thing. // do it as a child proc to make sure Run exits // ONLY AFTER spawn workers has exited. r.proc.Go(r.spawnWorkers) // wait until they're done. var err error // now, if the context finishes, close the proc. // we have to do it here because the logic before is setup, which // should run without closing the proc. ctxproc.CloseAfterContext(r.proc, ctx) select { case <-r.peersRemaining.Done(): r.proc.Close() if r.peersQueried.Size() == 0 { err = ErrNoPeersQueried } else { err = routing.ErrNotFound } case <-r.proc.Closed(): err = r.runCtx.Err() } r.RLock() defer r.RUnlock() if r.result != nil && r.result.success { return r.result, nil } return &dhtQueryResult{ finalSet: r.peersSeen, queriedSet: r.peersQueried, }, err } func (r *dhtQueryRunner) addPeerToQuery(next peer.ID) { // if new peer is ourselves... if next == r.query.dht.self { r.log.Debug("addPeerToQuery skip self") return } if !r.peersSeen.TryAdd(next) { return } notif.PublishQueryEvent(r.runCtx, ¬if.QueryEvent{ Type: notif.AddingPeer, ID: next, }) r.peersRemaining.Increment(1) select { case r.peersToQuery.EnqChan <- next: case <-r.proc.Closing(): } } func (r *dhtQueryRunner) spawnWorkers(proc process.Process) { for { select { case <-r.peersRemaining.Done(): return case <-r.proc.Closing(): return case <-r.rateLimit: ch := r.peersDialed.Consume() select { case p, ok := <-ch: if !ok { // this signals context cancellation. return } // do it as a child func to make sure Run exits // ONLY AFTER spawn workers has exited. proc.Go(func(proc process.Process) { r.queryPeer(proc, p) }) case <-r.proc.Closing(): return case <-r.peersRemaining.Done(): return } } } } func (r *dhtQueryRunner) dialPeer(ctx context.Context, p peer.ID) error { // short-circuit if we're already connected. if r.query.dht.host.Network().Connectedness(p) == inet.Connected { return nil } logger.Debug("not connected. dialing.") notif.PublishQueryEvent(r.runCtx, ¬if.QueryEvent{ Type: notif.DialingPeer, ID: p, }) pi := pstore.PeerInfo{ID: p} if err := r.query.dht.host.Connect(ctx, pi); err != nil { logger.Debugf("error connecting: %s", err) notif.PublishQueryEvent(r.runCtx, ¬if.QueryEvent{ Type: notif.QueryError, Extra: err.Error(), ID: p, }) // This peer is dropping out of the race. r.peersRemaining.Decrement(1) return err } logger.Debugf("connected. dial success.") return nil } func (r *dhtQueryRunner) queryPeer(proc process.Process, p peer.ID) { // ok let's do this! // create a context from our proc. ctx := ctxproc.OnClosingContext(proc) // make sure we do this when we exit defer func() { // signal we're done processing peer p r.peersRemaining.Decrement(1) r.rateLimit <- struct{}{} }() // finally, run the query against this peer res, err := r.query.qfunc(ctx, p) r.peersQueried.Add(p) if err != nil { logger.Debugf("ERROR worker for: %v %v", p, err) } else if res.success { logger.Debugf("SUCCESS worker for: %v %s", p, res) r.Lock() r.result = res r.Unlock() if res.peer != nil { r.query.dht.peerstore.AddAddrs(res.peer.ID, res.peer.Addrs, pstore.TempAddrTTL) } go r.proc.Close() // signal to everyone that we're done. // must be async, as we're one of the children, and Close blocks. } else if len(res.closerPeers) > 0 { logger.Debugf("PEERS CLOSER -- worker for: %v (%d closer peers)", p, len(res.closerPeers)) for _, next := range res.closerPeers { if next.ID == r.query.dht.self { // don't add self. logger.Debugf("PEERS CLOSER -- worker for: %v found self", p) continue } // add their addresses to the dialer's peerstore r.query.dht.peerstore.AddAddrs(next.ID, next.Addrs, pstore.TempAddrTTL) r.addPeerToQuery(next.ID) logger.Debugf("PEERS CLOSER -- worker for: %v added %v (%v)", p, next.ID, next.Addrs) } } else { logger.Debugf("QUERY worker for: %v - not found, and no closer peers.", p) } }