Unverified Commit 7269da47 authored by Steven Allen's avatar Steven Allen Committed by GitHub

Merge pull request #115 from reinerRubin/bug/4355-dial-errors

return all dial errors if dial has failed
parents e20fb5e3 3719137c
......@@ -2,7 +2,9 @@ package swarm_test
import (
"context"
"fmt"
"net"
"regexp"
"sync"
"testing"
"time"
......@@ -480,3 +482,49 @@ func TestDialBackoffClears(t *testing.T) {
t.Log("correctly cleared backoff")
}
}
func TestDialPeerFailed(t *testing.T) {
t.Parallel()
ctx := context.Background()
swarms := makeSwarms(ctx, t, 2)
defer closeSwarms(swarms)
testedSwarm, targetSwarm := swarms[0], swarms[1]
exceptedErrorsCount := 5
for i := 0; i < exceptedErrorsCount; i++ {
_, silentPeerAddress, silentPeerListener := newSilentPeer(t)
go acceptAndHang(silentPeerListener)
defer silentPeerListener.Close()
testedSwarm.Peerstore().AddAddr(
targetSwarm.LocalPeer(),
silentPeerAddress,
pstore.PermanentAddrTTL)
}
_, err := testedSwarm.DialPeer(ctx, targetSwarm.LocalPeer())
if err == nil {
t.Fatal(err)
}
// dial_test.go:508: correctly get a combined error: dial attempt failed: 10 errors occurred:
// * <peer.ID Qm*Wpwtvc> --> <peer.ID Qm*cc2FQR> (/ip4/127.0.0.1/tcp/46485) dial attempt failed: failed to negotiate security protocol: context deadline exceeded
// * <peer.ID Qm*Wpwtvc> --> <peer.ID Qm*cc2FQR> (/ip4/127.0.0.1/tcp/34881) dial attempt failed: failed to negotiate security protocol: context deadline exceeded
// ...
errorCountRegexpString := fmt.Sprintf("%d errors occurred", exceptedErrorsCount)
errorCountRegexp := regexp.MustCompile(errorCountRegexpString)
if !errorCountRegexp.MatchString(err.Error()) {
t.Fatalf("can't find total err count: `%s' in `%s'", errorCountRegexpString, err.Error())
}
connectErrorsRegexpString := `\* <peer\.ID .+?> --> <peer\.ID .+?> \(.+?\) dial attempt failed:.+`
connectErrorsRegexp := regexp.MustCompile(connectErrorsRegexpString)
connectErrors := connectErrorsRegexp.FindAll([]byte(err.Error()), -1)
if len(connectErrors) != exceptedErrorsCount {
t.Fatalf("connectErrors must contain %d errros; "+
"but `%s' was found in `%s' %d times",
exceptedErrorsCount, connectErrorsRegexpString, err.Error(), len(connectErrors))
}
}
......@@ -149,6 +149,12 @@
"hash": "QmSVaJe1aRjc78cZARTtf4pqvXERYwihyYhZWoVWceHnsK",
"name": "go-libp2p-secio",
"version": "2.0.30"
},
{
"author": "hashicorp",
"hash": "QmfGQp6VVqdPCDyzEM6EGwMY74YPabTSEoQWHUxZuCSWj3",
"name": "go-multierror",
"version": "0.1.0"
}
],
"gxVersion": "0.9.1",
......@@ -158,3 +164,4 @@
"releaseCmd": "git commit -a -m \"gx publish $VERSION\"",
"version": "3.0.35"
}
......@@ -7,6 +7,8 @@ import (
"sync"
"time"
"github.com/hashicorp/go-multierror"
logging "github.com/ipfs/go-log"
addrutil "github.com/libp2p/go-addr-util"
lgbl "github.com/libp2p/go-libp2p-loggables"
......@@ -358,9 +360,7 @@ func (s *Swarm) dialAddrs(ctx context.Context, p peer.ID, remoteAddrs <-chan ma.
// use a single response type instead of errs and conns, reduces complexity *a ton*
respch := make(chan dialResult)
defaultDialFail := inet.ErrNoRemoteAddrs
exitErr := defaultDialFail
var dialErrors *multierror.Error
defer s.limiter.clearAllPeerDials(p)
......@@ -369,16 +369,17 @@ func (s *Swarm) dialAddrs(ctx context.Context, p peer.ID, remoteAddrs <-chan ma.
// Check for context cancellations and/or responses first.
select {
case <-ctx.Done():
if exitErr == defaultDialFail {
exitErr = ctx.Err()
if dialError := dialErrors.ErrorOrNil(); dialError != nil {
return nil, dialError
}
return nil, exitErr
return nil, ctx.Err()
case resp := <-respch:
active--
if resp.Err != nil {
log.Infof("got error on dial to %s: %s", resp.Addr, resp.Err)
// Errors are normal, lots of dials will fail
exitErr = resp.Err
log.Infof("got error on dial: %s", resp.Err)
dialErrors = multierror.Append(dialErrors, resp.Err)
} else if resp.Conn != nil {
return resp.Conn, nil
}
......@@ -399,22 +400,28 @@ func (s *Swarm) dialAddrs(ctx context.Context, p peer.ID, remoteAddrs <-chan ma.
s.limitedDial(ctx, p, addr, respch)
active++
case <-ctx.Done():
if exitErr == defaultDialFail {
exitErr = ctx.Err()
if dialError := dialErrors.ErrorOrNil(); dialError != nil {
return nil, dialError
}
return nil, exitErr
return nil, ctx.Err()
case resp := <-respch:
active--
if resp.Err != nil {
log.Infof("got error on dial to %s: %s", resp.Addr, resp.Err)
// Errors are normal, lots of dials will fail
exitErr = resp.Err
log.Infof("got error on dial: %s", resp.Err)
dialErrors = multierror.Append(dialErrors, resp.Err)
} else if resp.Conn != nil {
return resp.Conn, nil
}
}
}
return nil, exitErr
if dialError := dialErrors.ErrorOrNil(); dialError != nil {
return nil, dialError
}
return nil, inet.ErrNoRemoteAddrs
}
// limitedDial will start a dial to the given peer when
......@@ -443,7 +450,7 @@ func (s *Swarm) dialAddr(ctx context.Context, p peer.ID, addr ma.Multiaddr) (tra
connC, err := tpt.Dial(ctx, addr, p)
if err != nil {
return nil, fmt.Errorf("%s --> %s dial attempt failed: %s", s.local, p, err)
return nil, fmt.Errorf("%s --> %s (%s) dial attempt failed: %s", s.local, p, addr, err)
}
// Trust the transport? Yeah... right.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment