@@ -2,21 +2,23 @@ package rafttests
22
33import (
44 "context"
5+ "fmt"
56 "math"
67 "testing"
78 "time"
89
9- "github.com/hashicorp/vault/api"
10- "github.com/kr/pretty"
11-
10+ "github.com/hashicorp/go-hclog"
1211 autopilot "github.com/hashicorp/raft-autopilot"
13-
14- "github.com/stretchr/testify/require"
15-
12+ "github.com/hashicorp/vault/api"
1613 "github.com/hashicorp/vault/helper/namespace"
1714 "github.com/hashicorp/vault/helper/testhelpers"
15+ "github.com/hashicorp/vault/helper/testhelpers/teststorage"
1816 "github.com/hashicorp/vault/physical/raft"
17+ "github.com/hashicorp/vault/sdk/helper/strutil"
1918 "github.com/hashicorp/vault/vault"
19+ "github.com/kr/pretty"
20+ testingintf "github.com/mitchellh/go-testing-interface"
21+ "github.com/stretchr/testify/require"
2022)
2123
2224func TestRaft_Autopilot_Disable (t * testing.T ) {
@@ -208,3 +210,133 @@ func TestRaft_Autopilot_Configuration(t *testing.T) {
208210 vault .TestWaitActive (t , leaderCore .Core )
209211 configCheckFunc (config )
210212}
213+
214+ // TestRaft_Autopilot_Stabilization_Delay verifies that if a node takes a long
215+ // time to become ready, it doesn't get promoted to voter until then.
216+ func TestRaft_Autopilot_Stabilization_Delay (t * testing.T ) {
217+ conf , opts := teststorage .ClusterSetup (nil , nil , teststorage .RaftBackendSetup )
218+ conf .DisableAutopilot = false
219+ opts .InmemClusterLayers = true
220+ opts .KeepStandbysSealed = true
221+ opts .SetupFunc = nil
222+ timeToHealthyCore2 := 5 * time .Second
223+ opts .PhysicalFactory = func (t testingintf.T , coreIdx int , logger hclog.Logger , conf map [string ]interface {}) * vault.PhysicalBackendBundle {
224+ config := map [string ]interface {}{
225+ "snapshot_threshold" : "50" ,
226+ "trailing_logs" : "100" ,
227+ "autopilot_reconcile_interval" : "1s" ,
228+ }
229+ if coreIdx == 2 {
230+ config ["snapshot_delay" ] = timeToHealthyCore2 .String ()
231+ }
232+ return teststorage .MakeRaftBackend (t , coreIdx , logger , config )
233+ }
234+
235+ cluster := vault .NewTestCluster (t , conf , opts )
236+ cluster .Start ()
237+ defer cluster .Cleanup ()
238+ testhelpers .WaitForActiveNode (t , cluster )
239+
240+ // Check that autopilot execution state is running
241+ client := cluster .Cores [0 ].Client
242+ state , err := client .Sys ().RaftAutopilotState ()
243+ require .NotNil (t , state )
244+ require .NoError (t , err )
245+ require .Equal (t , true , state .Healthy )
246+ require .Len (t , state .Servers , 1 )
247+ require .Equal (t , "core-0" , state .Servers ["core-0" ].ID )
248+ require .Equal (t , "alive" , state .Servers ["core-0" ].NodeStatus )
249+ require .Equal (t , "leader" , state .Servers ["core-0" ].Status )
250+
251+ _ , err = client .Logical ().Write ("sys/storage/raft/autopilot/configuration" , map [string ]interface {}{
252+ "server_stabilization_time" : "3s" ,
253+ })
254+ require .NoError (t , err )
255+
256+ config , err := client .Sys ().RaftAutopilotConfiguration ()
257+ require .NoError (t , err )
258+
259+ // Wait for 110% of the stabilization time to add nodes
260+ stabilizationKickOffWaitDuration := time .Duration (math .Ceil (1.1 * float64 (config .ServerStabilizationTime )))
261+ time .Sleep (stabilizationKickOffWaitDuration )
262+
263+ cli := cluster .Cores [0 ].Client
264+ // Write more keys than snapshot_threshold
265+ for i := 0 ; i < 250 ; i ++ {
266+ _ , err := cli .Logical ().Write (fmt .Sprintf ("secret/%d" , i ), map [string ]interface {}{
267+ "test" : "data" ,
268+ })
269+ if err != nil {
270+ t .Fatal (err )
271+ }
272+ }
273+
274+ joinFunc := func (core * vault.TestClusterCore ) {
275+ _ , err := core .JoinRaftCluster (namespace .RootContext (context .Background ()), []* raft.LeaderJoinInfo {
276+ {
277+ LeaderAPIAddr : client .Address (),
278+ TLSConfig : cluster .Cores [0 ].TLSConfig ,
279+ Retry : true ,
280+ },
281+ }, false )
282+ require .NoError (t , err )
283+ time .Sleep (1 * time .Second )
284+ cluster .UnsealCore (t , core )
285+ }
286+
287+ checkState := func (nodeID string , numServers int , allHealthy bool , healthy bool , suffrage string ) {
288+ state , err = client .Sys ().RaftAutopilotState ()
289+ require .NoError (t , err )
290+ require .Equal (t , allHealthy , state .Healthy )
291+ require .Len (t , state .Servers , numServers )
292+ require .Equal (t , healthy , state .Servers [nodeID ].Healthy )
293+ require .Equal (t , "alive" , state .Servers [nodeID ].NodeStatus )
294+ require .Equal (t , suffrage , state .Servers [nodeID ].Status )
295+ }
296+
297+ joinFunc (cluster .Cores [1 ])
298+ checkState ("core-1" , 2 , false , false , "non-voter" )
299+
300+ core2shouldBeHealthyAt := time .Now ().Add (timeToHealthyCore2 )
301+ joinFunc (cluster .Cores [2 ])
302+ checkState ("core-2" , 3 , false , false , "non-voter" )
303+
304+ stabilizationWaitDuration := time .Duration (1.25 * float64 (config .ServerStabilizationTime ))
305+ deadline := time .Now ().Add (stabilizationWaitDuration )
306+ var core1healthy , core2healthy bool
307+ for time .Now ().Before (deadline ) {
308+ state , err := client .Sys ().RaftAutopilotState ()
309+ require .NoError (t , err )
310+ core1healthy = state .Servers ["core-1" ].Healthy
311+ core2healthy = state .Servers ["core-2" ].Healthy
312+ time .Sleep (1 * time .Second )
313+ }
314+ if ! core1healthy || core2healthy {
315+ t .Fatalf ("expected health: core1=true and core2=false, got: core=%v, core2=%v" , core1healthy , core2healthy )
316+ }
317+
318+ time .Sleep (2 * time .Second ) // wait for reconciliation
319+ state , err = client .Sys ().RaftAutopilotState ()
320+ require .NoError (t , err )
321+ require .Equal (t , []string {"core-0" , "core-1" }, state .Voters )
322+
323+ for time .Now ().Before (core2shouldBeHealthyAt ) {
324+ state , err := client .Sys ().RaftAutopilotState ()
325+ require .NoError (t , err )
326+ core2healthy = state .Servers ["core-2" ].Healthy
327+ time .Sleep (1 * time .Second )
328+ t .Log (core2healthy )
329+ }
330+
331+ deadline = time .Now ().Add (10 * time .Second )
332+ for time .Now ().Before (deadline ) {
333+ state , err = client .Sys ().RaftAutopilotState ()
334+ if err != nil {
335+ t .Fatal (err )
336+ }
337+ if strutil .EquivalentSlices (state .Voters , []string {"core-0" , "core-1" , "core-2" }) {
338+ break
339+ }
340+ }
341+ require .Equal (t , state .Voters , []string {"core-0" , "core-1" , "core-2" })
342+ }
0 commit comments