Skip to content

Commit cb803ec

Browse files
committed
address review comments
1 parent b872a85 commit cb803ec

File tree

6 files changed

+173
-139
lines changed

6 files changed

+173
-139
lines changed

dashboards/swarm/swarm.json

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3071,13 +3071,18 @@
30713071
{
30723072
"options": {
30733073
"0": {
3074-
"color": "green",
3074+
"color": "blue",
30753075
"index": 0,
3076-
"text": "Allowed"
3076+
"text": "Probing"
30773077
},
30783078
"1": {
3079-
"color": "purple",
3079+
"color": "green",
30803080
"index": 1,
3081+
"text": "Allowed"
3082+
},
3083+
"2": {
3084+
"color": "purple",
3085+
"index": 2,
30813086
"text": "Blocked"
30823087
}
30833088
},
@@ -3145,7 +3150,17 @@
31453150
"fixedColor": "purple",
31463151
"mode": "fixed"
31473152
},
3148-
"mappings": [],
3153+
"mappings": [
3154+
{
3155+
"options": {
3156+
"0": {
3157+
"index": 0,
3158+
"text": "-"
3159+
}
3160+
},
3161+
"type": "value"
3162+
}
3163+
],
31493164
"thresholds": {
31503165
"mode": "absolute",
31513166
"steps": [
@@ -3169,7 +3184,7 @@
31693184
"colorMode": "value",
31703185
"graphMode": "none",
31713186
"justifyMode": "auto",
3172-
"orientation": "auto",
3187+
"orientation": "horizontal",
31733188
"reduceOptions": {
31743189
"calcs": [
31753190
"lastNotNull"
@@ -3302,6 +3317,6 @@
33023317
"timezone": "",
33033318
"title": "libp2p Swarm",
33043319
"uid": "a15PyhO4z",
3305-
"version": 6,
3320+
"version": 7,
33063321
"weekStart": ""
33073322
}
Lines changed: 92 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package swarm
22

33
import (
4+
"fmt"
45
"sync"
56

67
ma "github.com/multiformats/go-multiaddr"
@@ -10,10 +11,24 @@ import (
1011
type blackHoleState int
1112

1213
const (
13-
blackHoleStateAllowed blackHoleState = iota
14+
blackHoleStateProbing blackHoleState = iota
15+
blackHoleStateAllowed
1416
blackHoleStateBlocked
1517
)
1618

19+
func (st blackHoleState) String() string {
20+
switch st {
21+
case blackHoleStateProbing:
22+
return "Probing"
23+
case blackHoleStateAllowed:
24+
return "Allowed"
25+
case blackHoleStateBlocked:
26+
return "Blocked"
27+
default:
28+
return fmt.Sprintf("Unknown %d", st)
29+
}
30+
}
31+
1732
type blackHoleResult int
1833

1934
const (
@@ -22,32 +37,34 @@ const (
2237
blackHoleResultBlocked
2338
)
2439

25-
// blackHoleFilter provides black hole filtering logic for dials. On detecting a black holed
26-
// network environment, subsequent dials are blocked and only 1 dial every n requests is allowed.
27-
// This should be used in conjunction with an UDP or IPv6 address filter to detect UDP or
28-
// IPv6 black hole.
29-
// Requests are blocked if the success fraction in the last n outcomes is less than
30-
// minSuccessFraction. If a request succeeds in Blocked state, the filter state is reset and n
31-
// subsequent requests are allowed before reevaluating black hole status. Evaluating over n
32-
// outcomes avoids situations where a dial was cancelled because a competing dial succeeded,
33-
// the address was unreachable, and other false negatives.
40+
// blackHoleFilter provides black hole filtering for dials. This filter should be used in
41+
// concert with a UDP of IPv6 address filter to detect UDP or IPv6 black hole. In a black
42+
// holed environments dial requests are blocked and only periodic probes to check the
43+
// state of the black hole are allowed.
44+
//
45+
// Requests are blocked if the number of successes in the last n dials is less than
46+
// minSuccesses. If a request succeeds in Blocked state, the filter state is reset and n
47+
// subsequent requests are allowed before reevaluating black hole state. Dials cancelled
48+
// when some other concurrent dial succeeded are counted as failures. A sufficiently large
49+
// n prevents false negatives in such cases.
3450
type blackHoleFilter struct {
35-
// n is the minimum number of completed dials required before we start blocking.
36-
// Every nth request is allowed irrespective of the status of the detector.
51+
// n serves the dual purpose of being the minimum number of requests after which we
52+
// probe the state of the black hole in blocked state and the minimum number of
53+
// completed dials required before evaluating black hole state.
3754
n int
38-
// minSuccessFraction is the minimum success fraction required to allow dials.
39-
minSuccessFraction float64
55+
// minSuccesses is the minimum number of Success required in the last n dials
56+
// to consider we are not blocked.
57+
minSuccesses int
4058
// name for the detector.
4159
name string
4260

43-
// requests counts number of dial requests up to n. Resets to 0 every nth request.
61+
// requests counts number of dial requests to peers. We handle request at a peer
62+
// level and record results at individual address dial level.
4463
requests int
45-
// dialResults of the last `n` allowed dials. success is true.
64+
// dialResults of the last `n` dials. A successful dial is true.
4665
dialResults []bool
4766
// successes is the count of successful dials in outcomes
4867
successes int
49-
// failures is the count of failed dials in outcomes
50-
failures int
5168
// state is the current state of the detector
5269
state blackHoleState
5370

@@ -72,16 +89,12 @@ func (b *blackHoleFilter) RecordResult(success bool) {
7289

7390
if success {
7491
b.successes++
75-
} else {
76-
b.failures++
7792
}
7893
b.dialResults = append(b.dialResults, success)
7994

8095
if len(b.dialResults) > b.n {
8196
if b.dialResults[0] {
8297
b.successes--
83-
} else {
84-
b.failures--
8598
}
8699
b.dialResults = b.dialResults[1 : b.n+1]
87100
}
@@ -90,8 +103,9 @@ func (b *blackHoleFilter) RecordResult(success bool) {
90103
b.trackMetrics()
91104
}
92105

93-
// HandleRequest handles a new dial request for the filter. It returns a
94-
func (b *blackHoleFilter) HandleRequest() blackHoleResult {
106+
// HandleDialPeerRequest returns the result of applying the black hole filter
107+
// for the dial request.
108+
func (b *blackHoleFilter) HandleDialPeerRequest() blackHoleResult {
95109
b.mu.Lock()
96110
defer b.mu.Unlock()
97111

@@ -101,7 +115,7 @@ func (b *blackHoleFilter) HandleRequest() blackHoleResult {
101115

102116
if b.state == blackHoleStateAllowed {
103117
return blackHoleResultAllowed
104-
} else if b.requests%b.n == 0 {
118+
} else if b.state == blackHoleStateProbing || b.requests%b.n == 0 {
105119
return blackHoleResultProbing
106120
} else {
107121
return blackHoleResultBlocked
@@ -110,47 +124,42 @@ func (b *blackHoleFilter) HandleRequest() blackHoleResult {
110124

111125
func (b *blackHoleFilter) reset() {
112126
b.successes = 0
113-
b.failures = 0
114127
b.dialResults = b.dialResults[:0]
115128
b.requests = 0
116129
b.updateState()
117130
}
118131

119132
func (b *blackHoleFilter) updateState() {
120133
st := b.state
121-
successFraction := 0.0
134+
122135
if len(b.dialResults) < b.n {
136+
b.state = blackHoleStateProbing
137+
} else if b.successes >= b.minSuccesses {
123138
b.state = blackHoleStateAllowed
124139
} else {
125-
successFraction = float64(b.successes) / float64(b.successes+b.failures)
126-
if successFraction >= b.minSuccessFraction {
127-
b.state = blackHoleStateAllowed
128-
} else {
129-
b.state = blackHoleStateBlocked
130-
}
140+
b.state = blackHoleStateBlocked
131141
}
142+
132143
if st != b.state {
133-
if b.state == blackHoleStateAllowed {
134-
log.Debugf("%s blackHoleDetector state changed to Allowed", b.name)
135-
} else {
136-
log.Debugf("%s blackHoleDetector state changed to Blocked. Success fraction is %0.3f", b.name, successFraction)
137-
}
144+
log.Debugf("%s blackHoleDetector state changed from %s to %s", b.name, st, b.state)
138145
}
139146
}
140147

141148
func (b *blackHoleFilter) trackMetrics() {
142149
if b.metricsTracer == nil {
143150
return
144151
}
145-
successFraction := 0.0
146-
if b.successes+b.failures != 0 {
147-
successFraction = float64(b.successes) / float64(b.successes+b.failures)
148-
}
149152

150153
nextRequestAllowedAfter := 0
151154
if b.state == blackHoleStateBlocked {
152155
nextRequestAllowedAfter = b.n - (b.requests % b.n)
153156
}
157+
158+
successFraction := 0.0
159+
if len(b.dialResults) > 0 {
160+
successFraction = float64(b.successes) / float64(len(b.dialResults))
161+
}
162+
154163
b.metricsTracer.UpdatedBlackHoleFilterState(
155164
b.name,
156165
b.state,
@@ -160,34 +169,56 @@ func (b *blackHoleFilter) trackMetrics() {
160169
}
161170

162171
// blackHoleDetector provides UDP and IPv6 black hole detection using a `blackHoleFilter`
163-
// for each. For details of the black hole detection logic see `blackHoleFilter`
172+
// for each. For details of the black hole detection logic see `blackHoleFilter`.
173+
//
174+
// black hole filtering is done at a peer dial level to ensure that periodic probes to
175+
// detect change of the black hole state are actually dialed and are not skipped
176+
// because of dial prioritisation logic.
164177
type blackHoleDetector struct {
165178
udp, ipv6 *blackHoleFilter
166179
}
167180

168-
func (d *blackHoleDetector) HandleRequest(addr ma.Multiaddr) bool {
169-
if !manet.IsPublicAddr(addr) {
170-
return true
181+
// HandleDialPeerRequest filters the peer's addresses removing black holed addresses
182+
func (d *blackHoleDetector) HandleDialPeerRequest(addrs []ma.Multiaddr) []ma.Multiaddr {
183+
hasUDP, hasIPv6 := false, false
184+
for _, addr := range addrs {
185+
if !manet.IsPublicAddr(addr) {
186+
continue
187+
}
188+
if isProtocolAddr(addr, ma.P_UDP) {
189+
hasUDP = true
190+
}
191+
if isProtocolAddr(addr, ma.P_IP6) {
192+
hasIPv6 = true
193+
}
171194
}
172195

173-
udpres := blackHoleResultAllowed
174-
if d.udp != nil && isProtocolAddr(addr, ma.P_UDP) {
175-
udpres = d.udp.HandleRequest()
196+
udpRes := blackHoleResultAllowed
197+
if d.udp != nil && hasUDP {
198+
udpRes = d.udp.HandleDialPeerRequest()
176199
}
177200

178-
ipv6res := blackHoleResultAllowed
179-
if d.ipv6 != nil && isProtocolAddr(addr, ma.P_IP6) {
180-
ipv6res = d.ipv6.HandleRequest()
201+
ipv6Res := blackHoleResultAllowed
202+
if d.ipv6 != nil && hasIPv6 {
203+
ipv6Res = d.ipv6.HandleDialPeerRequest()
181204
}
182205

183-
// Allow all probes irrespective of the state of the other filter
184-
if udpres == blackHoleResultProbing || ipv6res == blackHoleResultProbing {
185-
return true
206+
// If we are probing to check black hole status allow all addresses
207+
if udpRes == blackHoleResultProbing || ipv6Res == blackHoleResultProbing {
208+
return addrs
186209
}
187-
return udpres != blackHoleResultBlocked && ipv6res != blackHoleResultBlocked
210+
211+
filterudp := udpRes == blackHoleResultBlocked
212+
filteripv6 := ipv6Res == blackHoleResultBlocked
213+
214+
return ma.FilterAddrs(
215+
addrs,
216+
func(a ma.Multiaddr) bool { return !(filterudp && isProtocolAddr(a, ma.P_UDP)) },
217+
func(a ma.Multiaddr) bool { return !(filteripv6 && isProtocolAddr(a, ma.P_IP6)) },
218+
)
188219
}
189220

190-
// RecordResult updates the state of the relevant `blackHoleFilter` for addr
221+
// RecordResult updates the state of the relevant `blackHoleFilter`s for addr
191222
func (d *blackHoleDetector) RecordResult(addr ma.Multiaddr, success bool) {
192223
if !manet.IsPublicAddr(addr) {
193224
return
@@ -204,13 +235,13 @@ func newBlackHoleDetector(detectUDP, detectIPv6 bool, mt MetricsTracer) *blackHo
204235
d := &blackHoleDetector{}
205236

206237
// A black hole is a binary property. On a network if UDP dials are blocked or there is
207-
// no IPv6 connectivity, all dials will fail. So a low min success fraction like 0.01 is
208-
// good enough.
238+
// no IPv6 connectivity, all dials will fail. So a low success rate of 3 out 100 dials
239+
// is good enough.
209240
if detectUDP {
210-
d.udp = &blackHoleFilter{n: 100, minSuccessFraction: 0.01, name: "UDP", metricsTracer: mt}
241+
d.udp = &blackHoleFilter{n: 100, minSuccesses: 3, name: "UDP", metricsTracer: mt}
211242
}
212243
if detectIPv6 {
213-
d.ipv6 = &blackHoleFilter{n: 100, minSuccessFraction: 0.01, name: "IPv6", metricsTracer: mt}
244+
d.ipv6 = &blackHoleFilter{n: 100, minSuccesses: 3, name: "IPv6", metricsTracer: mt}
214245
}
215246
return d
216247
}

0 commit comments

Comments
 (0)