@@ -2,12 +2,12 @@ package libp2p
22
33import (
44 "context"
5- "errors"
65 "fmt"
76 "strings"
87 "sync"
98 "time"
109
10+ "github.com/cenkalti/backoff/v4"
1111 kbucket "github.com/libp2p/go-libp2p-kbucket"
1212 "github.com/libp2p/go-libp2p/core/network"
1313 "github.com/libp2p/go-libp2p/core/peer"
@@ -142,11 +142,14 @@ func (c *Crawler) connect(ctx context.Context, pi peer.AddrInfo) error {
142142 return fmt .Errorf ("skipping node as it has no public IP address" ) // change knownErrs map if changing this msg
143143 }
144144
145- var (
146- retry int = 0
147- maxRetries int = 3
148- firstErr error = nil
149- )
145+ // init an exponential backoff
146+ bo := backoff .NewExponentialBackOff ()
147+ bo .InitialInterval = time .Second
148+ bo .MaxInterval = 10 * time .Second
149+ bo .MaxElapsedTime = time .Minute
150+
151+ // keep track of retries for debug logging
152+ retry := 0
150153
151154 for {
152155 logEntry := log .WithFields (log.Fields {
@@ -166,48 +169,36 @@ func (c *Crawler) connect(ctx context.Context, pi peer.AddrInfo) error {
166169 return nil
167170 }
168171
169- // at this point we know something went wrong. Track the first error
170- // because subsequent connection attempts have a shorter timeout which
171- // means that it's more likely to run into a context.DeadlineExceeded
172- // error. If that's the case, we return the original error for tracking
173- // purposes.
174- if firstErr == nil {
175- firstErr = err
176- }
177-
178172 switch true {
179173 case strings .Contains (err .Error (), db .ErrorStr [models .NetErrorConnectionRefused ]):
180174 // Might be transient because the remote doesn't want us to connect. Try again!
181175 case strings .Contains (err .Error (), db .ErrorStr [models .NetErrorConnectionGated ]):
182- // Hints at a configuration issue but could be transient. Try again!
176+ // Hints at a configuration issue and should not happen, but if it
177+ // does it could be transient. Try again anyway, but at least log a warning.
178+ logEntry .WithError (err ).Warnln ("Connection gated!" )
183179 case strings .Contains (err .Error (), db .ErrorStr [models .NetErrorCantAssignRequestedAddress ]):
184180 // Transient error due to local UDP issues. Try again!
185- maxRetries = 10 // increase the maximum number of retries as the error _should_ go away
186- case strings .Contains (err .Error (), db .ErrorStr ["RESOURCE_LIMIT_EXCEEDED (201)" ]): // thrown by a circuit relay
181+ case strings .Contains (err .Error (), "dial backoff" ):
182+ // should not happen because we disabled backoff checks with our
183+ // go-libp2p fork. Try again anyway, but at least log a warning.
184+ logEntry .WithError (err ).Warnln ("Dial backoff!" )
185+ case strings .Contains (err .Error (), "RESOURCE_LIMIT_EXCEEDED (201)" ): // thrown by a circuit relay
187186 // We already have too many open connections over a relay. Try again!
188- maxRetries = 10 // increase the maximum number of retries as the error _should_ go away
189187 default :
190- if errors .Is (err , context .DeadlineExceeded ) {
191- err = firstErr
192- }
193188 logEntry .WithError (err ).Debugln ("Failed connecting to peer" , pi .ID .ShortString ())
194189 return err
195190 }
196191
197- if retry == maxRetries {
198- if errors .Is (err , context .DeadlineExceeded ) {
199- err = firstErr
200- }
192+ sleepDur := bo .NextBackOff ()
193+ if sleepDur == backoff .Stop {
201194 logEntry .WithError (err ).Debugln ("Exceeded retries connecting to peer" , pi .ID .ShortString ())
202195 return err
203196 }
204197
205- sleep := time .Second * time .Duration (3 * (retry + 1 )) // TODO: parameterize
206-
207198 select {
208199 case <- ctx .Done ():
209200 return ctx .Err ()
210- case <- time .After (sleep ):
201+ case <- time .After (sleepDur ):
211202 retry += 1
212203 continue
213204 }
0 commit comments