11package swarm
22
33import (
4+ "fmt"
45 "sync"
56
67 ma "github.com/multiformats/go-multiaddr"
@@ -10,10 +11,24 @@ import (
1011type blackHoleState int
1112
1213const (
13- blackHoleStateAllowed blackHoleState = iota
14+ blackHoleStateProbing blackHoleState = iota
15+ blackHoleStateAllowed
1416 blackHoleStateBlocked
1517)
1618
19+ func (st blackHoleState ) String () string {
20+ switch st {
21+ case blackHoleStateProbing :
22+ return "Probing"
23+ case blackHoleStateAllowed :
24+ return "Allowed"
25+ case blackHoleStateBlocked :
26+ return "Blocked"
27+ default :
28+ return fmt .Sprintf ("Unknown %d" , st )
29+ }
30+ }
31+
1732type blackHoleResult int
1833
1934const (
@@ -22,32 +37,34 @@ const (
2237 blackHoleResultBlocked
2338)
2439
25- // blackHoleFilter provides black hole filtering logic for dials. On detecting a black holed
26- // network environment, subsequent dials are blocked and only 1 dial every n requests is allowed.
27- // This should be used in conjunction with an UDP or IPv6 address filter to detect UDP or
28- // IPv6 black hole.
29- // Requests are blocked if the success fraction in the last n outcomes is less than
30- // minSuccessFraction. If a request succeeds in Blocked state, the filter state is reset and n
31- // subsequent requests are allowed before reevaluating black hole status. Evaluating over n
32- // outcomes avoids situations where a dial was cancelled because a competing dial succeeded,
33- // the address was unreachable, and other false negatives.
40+ // blackHoleFilter provides black hole filtering for dials. This filter should be used in
41+ // concert with a UDP of IPv6 address filter to detect UDP or IPv6 black hole. In a black
42+ // holed environments dial requests are blocked and only periodic probes to check the
43+ // state of the black hole are allowed.
44+ //
45+ // Requests are blocked if the number of successes in the last n dials is less than
46+ // minSuccesses. If a request succeeds in Blocked state, the filter state is reset and n
47+ // subsequent requests are allowed before reevaluating black hole state. Dials cancelled
48+ // when some other concurrent dial succeeded are counted as failures. A sufficiently large
49+ // n prevents false negatives in such cases.
3450type blackHoleFilter struct {
35- // n is the minimum number of completed dials required before we start blocking.
36- // Every nth request is allowed irrespective of the status of the detector.
51+ // n serves the dual purpose of being the minimum number of requests after which we
52+ // probe the state of the black hole in blocked state and the minimum number of
53+ // completed dials required before evaluating black hole state.
3754 n int
38- // minSuccessFraction is the minimum success fraction required to allow dials.
39- minSuccessFraction float64
55+ // minSuccesses is the minimum number of Success required in the last n dials
56+ // to consider we are not blocked.
57+ minSuccesses int
4058 // name for the detector.
4159 name string
4260
43- // requests counts number of dial requests up to n. Resets to 0 every nth request.
61+ // requests counts number of dial requests to peers. We handle request at a peer
62+ // level and record results at individual address dial level.
4463 requests int
45- // dialResults of the last `n` allowed dials. success is true.
64+ // dialResults of the last `n` dials. A successful dial is true.
4665 dialResults []bool
4766 // successes is the count of successful dials in outcomes
4867 successes int
49- // failures is the count of failed dials in outcomes
50- failures int
5168 // state is the current state of the detector
5269 state blackHoleState
5370
@@ -72,16 +89,12 @@ func (b *blackHoleFilter) RecordResult(success bool) {
7289
7390 if success {
7491 b .successes ++
75- } else {
76- b .failures ++
7792 }
7893 b .dialResults = append (b .dialResults , success )
7994
8095 if len (b .dialResults ) > b .n {
8196 if b .dialResults [0 ] {
8297 b .successes --
83- } else {
84- b .failures --
8598 }
8699 b .dialResults = b .dialResults [1 : b .n + 1 ]
87100 }
@@ -90,8 +103,9 @@ func (b *blackHoleFilter) RecordResult(success bool) {
90103 b .trackMetrics ()
91104}
92105
93- // HandleRequest handles a new dial request for the filter. It returns a
94- func (b * blackHoleFilter ) HandleRequest () blackHoleResult {
106+ // HandleDialPeerRequest returns the result of applying the black hole filter
107+ // for the dial request.
108+ func (b * blackHoleFilter ) HandleDialPeerRequest () blackHoleResult {
95109 b .mu .Lock ()
96110 defer b .mu .Unlock ()
97111
@@ -101,7 +115,7 @@ func (b *blackHoleFilter) HandleRequest() blackHoleResult {
101115
102116 if b .state == blackHoleStateAllowed {
103117 return blackHoleResultAllowed
104- } else if b .requests % b .n == 0 {
118+ } else if b .state == blackHoleStateProbing || b . requests % b .n == 0 {
105119 return blackHoleResultProbing
106120 } else {
107121 return blackHoleResultBlocked
@@ -110,47 +124,42 @@ func (b *blackHoleFilter) HandleRequest() blackHoleResult {
110124
111125func (b * blackHoleFilter ) reset () {
112126 b .successes = 0
113- b .failures = 0
114127 b .dialResults = b .dialResults [:0 ]
115128 b .requests = 0
116129 b .updateState ()
117130}
118131
119132func (b * blackHoleFilter ) updateState () {
120133 st := b .state
121- successFraction := 0.0
134+
122135 if len (b .dialResults ) < b .n {
136+ b .state = blackHoleStateProbing
137+ } else if b .successes >= b .minSuccesses {
123138 b .state = blackHoleStateAllowed
124139 } else {
125- successFraction = float64 (b .successes ) / float64 (b .successes + b .failures )
126- if successFraction >= b .minSuccessFraction {
127- b .state = blackHoleStateAllowed
128- } else {
129- b .state = blackHoleStateBlocked
130- }
140+ b .state = blackHoleStateBlocked
131141 }
142+
132143 if st != b .state {
133- if b .state == blackHoleStateAllowed {
134- log .Debugf ("%s blackHoleDetector state changed to Allowed" , b .name )
135- } else {
136- log .Debugf ("%s blackHoleDetector state changed to Blocked. Success fraction is %0.3f" , b .name , successFraction )
137- }
144+ log .Debugf ("%s blackHoleDetector state changed from %s to %s" , b .name , st , b .state )
138145 }
139146}
140147
141148func (b * blackHoleFilter ) trackMetrics () {
142149 if b .metricsTracer == nil {
143150 return
144151 }
145- successFraction := 0.0
146- if b .successes + b .failures != 0 {
147- successFraction = float64 (b .successes ) / float64 (b .successes + b .failures )
148- }
149152
150153 nextRequestAllowedAfter := 0
151154 if b .state == blackHoleStateBlocked {
152155 nextRequestAllowedAfter = b .n - (b .requests % b .n )
153156 }
157+
158+ successFraction := 0.0
159+ if len (b .dialResults ) > 0 {
160+ successFraction = float64 (b .successes ) / float64 (len (b .dialResults ))
161+ }
162+
154163 b .metricsTracer .UpdatedBlackHoleFilterState (
155164 b .name ,
156165 b .state ,
@@ -160,34 +169,56 @@ func (b *blackHoleFilter) trackMetrics() {
160169}
161170
162171// blackHoleDetector provides UDP and IPv6 black hole detection using a `blackHoleFilter`
163- // for each. For details of the black hole detection logic see `blackHoleFilter`
172+ // for each. For details of the black hole detection logic see `blackHoleFilter`.
173+ //
174+ // black hole filtering is done at a peer dial level to ensure that periodic probes to
175+ // detect change of the black hole state are actually dialed and are not skipped
176+ // because of dial prioritisation logic.
164177type blackHoleDetector struct {
165178 udp , ipv6 * blackHoleFilter
166179}
167180
168- func (d * blackHoleDetector ) HandleRequest (addr ma.Multiaddr ) bool {
169- if ! manet .IsPublicAddr (addr ) {
170- return true
181+ // HandleDialPeerRequest filters the peer's addresses removing black holed addresses
182+ func (d * blackHoleDetector ) HandleDialPeerRequest (addrs []ma.Multiaddr ) []ma.Multiaddr {
183+ hasUDP , hasIPv6 := false , false
184+ for _ , addr := range addrs {
185+ if ! manet .IsPublicAddr (addr ) {
186+ continue
187+ }
188+ if isProtocolAddr (addr , ma .P_UDP ) {
189+ hasUDP = true
190+ }
191+ if isProtocolAddr (addr , ma .P_IP6 ) {
192+ hasIPv6 = true
193+ }
171194 }
172195
173- udpres := blackHoleResultAllowed
174- if d .udp != nil && isProtocolAddr ( addr , ma . P_UDP ) {
175- udpres = d .udp .HandleRequest ()
196+ udpRes := blackHoleResultAllowed
197+ if d .udp != nil && hasUDP {
198+ udpRes = d .udp .HandleDialPeerRequest ()
176199 }
177200
178- ipv6res := blackHoleResultAllowed
179- if d .ipv6 != nil && isProtocolAddr ( addr , ma . P_IP6 ) {
180- ipv6res = d .ipv6 .HandleRequest ()
201+ ipv6Res := blackHoleResultAllowed
202+ if d .ipv6 != nil && hasIPv6 {
203+ ipv6Res = d .ipv6 .HandleDialPeerRequest ()
181204 }
182205
183- // Allow all probes irrespective of the state of the other filter
184- if udpres == blackHoleResultProbing || ipv6res == blackHoleResultProbing {
185- return true
206+ // If we are probing to check black hole status allow all addresses
207+ if udpRes == blackHoleResultProbing || ipv6Res == blackHoleResultProbing {
208+ return addrs
186209 }
187- return udpres != blackHoleResultBlocked && ipv6res != blackHoleResultBlocked
210+
211+ filterudp := udpRes == blackHoleResultBlocked
212+ filteripv6 := ipv6Res == blackHoleResultBlocked
213+
214+ return ma .FilterAddrs (
215+ addrs ,
216+ func (a ma.Multiaddr ) bool { return ! (filterudp && isProtocolAddr (a , ma .P_UDP )) },
217+ func (a ma.Multiaddr ) bool { return ! (filteripv6 && isProtocolAddr (a , ma .P_IP6 )) },
218+ )
188219}
189220
190- // RecordResult updates the state of the relevant `blackHoleFilter` for addr
221+ // RecordResult updates the state of the relevant `blackHoleFilter`s for addr
191222func (d * blackHoleDetector ) RecordResult (addr ma.Multiaddr , success bool ) {
192223 if ! manet .IsPublicAddr (addr ) {
193224 return
@@ -204,13 +235,13 @@ func newBlackHoleDetector(detectUDP, detectIPv6 bool, mt MetricsTracer) *blackHo
204235 d := & blackHoleDetector {}
205236
206237 // A black hole is a binary property. On a network if UDP dials are blocked or there is
207- // no IPv6 connectivity, all dials will fail. So a low min success fraction like 0.01 is
208- // good enough.
238+ // no IPv6 connectivity, all dials will fail. So a low success rate of 3 out 100 dials
239+ // is good enough.
209240 if detectUDP {
210- d .udp = & blackHoleFilter {n : 100 , minSuccessFraction : 0.01 , name : "UDP" , metricsTracer : mt }
241+ d .udp = & blackHoleFilter {n : 100 , minSuccesses : 3 , name : "UDP" , metricsTracer : mt }
211242 }
212243 if detectIPv6 {
213- d .ipv6 = & blackHoleFilter {n : 100 , minSuccessFraction : 0.01 , name : "IPv6" , metricsTracer : mt }
244+ d .ipv6 = & blackHoleFilter {n : 100 , minSuccesses : 3 , name : "IPv6" , metricsTracer : mt }
214245 }
215246 return d
216247}
0 commit comments