Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions internal/controller/nodedeployment/envtest/helpers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,3 +151,27 @@ func condTrue(snd *seiv1alpha1.SeiNodeDeployment, condType string) bool {
c := apimeta.FindStatusCondition(snd.Status.Conditions, condType)
return c != nil && c.Status == metav1.ConditionTrue
}

// listEventsForSND returns events whose InvolvedObject UID matches the SND
// and (optionally) whose Reason matches the filter. Empty reason returns
// every event for the SND.
func listEventsForSND(t *testing.T, snd *seiv1alpha1.SeiNodeDeployment, reason string) []corev1.Event {
t.Helper()
list := &corev1.EventList{}
if err := testCli.List(testCtx, list, client.InNamespace(snd.Namespace)); err != nil {
t.Logf("listEventsForSND list error: %v", err)
return nil
}
out := make([]corev1.Event, 0, len(list.Items))
for i := range list.Items {
e := list.Items[i]
if e.InvolvedObject.UID != snd.UID {
continue
}
if reason != "" && e.Reason != reason {
continue
}
out = append(out, e)
}
return out
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
//go:build envtest

package envtest_test

import (
"testing"

. "github.com/onsi/gomega"

apimeta "k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"

seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1"
"github.com/sei-protocol/sei-k8s-controller/internal/controller/nodedeployment/envtest/fixtures"
)

// TestInPlaceRollout_Supersession verifies the supersession branch in
// detectDeploymentNeeded (internal/controller/nodedeployment/nodes.go):
// when spec.template.spec.image changes a second time before the first
// rollout finishes, the in-flight plan must be replaced with one
// targeting the latest hash, and a RolloutSuperseded event must be
// recorded against the SND so an operator can see the transition in
// `kubectl describe`.
//
// Timing control comes from pausing the StatefulSet status faker. While
// paused, the StatefulSet's .Status never advances past the previous
// generation, which stalls the SeiNode-side ObserveImage task and in
// turn stalls the SND-side AwaitSpecUpdate task that polls children's
// Status.CurrentImage. The v2 rollout therefore parks in flight,
// giving us a deterministic window in which to patch v3.
//
// Asserts in order:
//
// 1. Initial v1 deployment reaches steady state (status.rollout == nil)
// 2. Patching to v2 starts a rollout with TargetHash != v1 hash
// 3. With the faker paused, the v2 rollout remains in flight (does NOT
// complete) until we resume the faker
// 4. Patching to v3 while v2 is in flight retargets Status.Rollout to
// the v3 hash and records a RolloutSuperseded event referencing the
// old (v2) target
// 5. After resuming the faker, the v3 rollout completes cleanly:
// status.plan nil, status.rollout nil, RolloutInProgress
// False/RolloutComplete, every child at v3
func TestInPlaceRollout_Supersession(t *testing.T) {
g := NewWithT(t)
ns := makeNamespace(t)

const (
v1 = "ghcr.io/sei-protocol/seid:v1.0.0"
v2 = "ghcr.io/sei-protocol/seid:v2.0.0"
v3 = "ghcr.io/sei-protocol/seid:v3.0.0"
replicas = 2
)

snd := fixtures.NewSND(ns, "supersession",
fixtures.WithReplicas(replicas),
fixtures.WithImage(v1),
)
g.Expect(testCli.Create(testCtx, snd)).To(Succeed())
key := client.ObjectKeyFromObject(snd)

// 1. Initial v1 settle. With the faker running, the rollout
// collapses to instantaneous; we just want Status.TemplateHash
// populated and no in-flight rollout.
waitForStatus(t, key, func(latest *seiv1alpha1.SeiNodeDeployment) bool {
if latest.Status.TemplateHash == "" {
return false
}
if latest.Status.Rollout != nil {
return false
}
return !condTrue(latest, seiv1alpha1.ConditionRolloutInProgress)
}, "initial v1 deployment reached steady state")

v1Hash := getSND(t, key).Status.TemplateHash
g.Expect(v1Hash).NotTo(BeEmpty())

// 2. Pause the faker BEFORE the v2 patch so the v2 rollout stalls
// in flight. The Cleanup ensures we re-enable the faker even if
// the test fails partway, keeping subsequent tests on a
// deterministic harness.
testFaker.Pause()
t.Cleanup(testFaker.Resume)

// 3. Patch v1→v2 and wait for the rollout to register. Do NOT wait
// for completion — with the faker paused, it can't complete.
patchSNDImage(t, getSND(t, key), v2)

var v2Hash string
waitForStatus(t, key, func(latest *seiv1alpha1.SeiNodeDeployment) bool {
if latest.Status.Rollout == nil {
return false
}
if latest.Status.Rollout.TargetHash == "" || latest.Status.Rollout.TargetHash == v1Hash {
return false
}
v2Hash = latest.Status.Rollout.TargetHash
return condTrue(latest, seiv1alpha1.ConditionRolloutInProgress)
}, "v2 rollout registered (Status.Rollout populated, RolloutInProgress=True)")
g.Expect(v2Hash).NotTo(Equal(v1Hash))

// 4. Patch v2→v3 while the v2 plan is in flight. detectDeploymentNeeded's
// supersession branch should fire on the next reconcile: it nils
// Status.Plan, records a RolloutSuperseded event with the v2
// target hash, then falls through to write a fresh Status.Rollout
// with the v3 hash.
patchSNDImage(t, getSND(t, key), v3)

var v3Hash string
waitForStatus(t, key, func(latest *seiv1alpha1.SeiNodeDeployment) bool {
if latest.Status.Rollout == nil {
return false
}
if latest.Status.Rollout.TargetHash == v1Hash || latest.Status.Rollout.TargetHash == v2Hash {
return false
}
v3Hash = latest.Status.Rollout.TargetHash
return condTrue(latest, seiv1alpha1.ConditionRolloutInProgress)
}, "rollout retargeted to v3 (supersession applied)")
g.Expect(v3Hash).NotTo(Equal(v2Hash))
g.Expect(v3Hash).NotTo(Equal(v1Hash))

// 5. The supersession event was recorded against the SND. The
// message format from nodes.go:detectDeploymentNeeded includes
// the old (v2) target hash verbatim, so we can assert on
// presence + content rather than just count.
events := listEventsForSND(t, getSND(t, key), "RolloutSuperseded")
g.Expect(events).NotTo(BeEmpty(), "expected at least one RolloutSuperseded event for the v2→v3 transition")
g.Expect(events[0].Message).To(ContainSubstring(v2Hash),
"supersession event should reference the old (v2) target hash")

// 6. Resume the faker so the v3 rollout can complete.
testFaker.Resume()

// 7. The v3 rollout reaches terminal state: plan cleared,
// Status.Rollout cleared, RolloutInProgress False with reason
// RolloutComplete.
waitForStatus(t, key, func(latest *seiv1alpha1.SeiNodeDeployment) bool {
if latest.Status.Plan != nil {
return false
}
if latest.Status.Rollout != nil {
return false
}
cond := apimeta.FindStatusCondition(latest.Status.Conditions, seiv1alpha1.ConditionRolloutInProgress)
if cond == nil || cond.Status != metav1.ConditionFalse {
return false
}
return cond.Reason == "RolloutComplete"
}, "v3 rollout reached terminal state after faker resume")

// 8. Every child landed on v3 (not v2).
kids := listChildren(t, getSND(t, key))
g.Expect(kids).To(HaveLen(replicas))
for i := range kids {
g.Expect(kids[i].Spec.Image).To(Equal(v3), "child %s spec.image", kids[i].Name)
}
}
82 changes: 59 additions & 23 deletions internal/controller/nodedeployment/envtest/sts_status_faker.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,31 @@ import (
"context"
"errors"
"fmt"
"sync"
"time"

appsv1 "k8s.io/api/apps/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"sigs.k8s.io/controller-runtime/pkg/client"
)

// StartStatefulSetStatusFaker spawns a goroutine that periodically reconciles
// the .Status of every StatefulSet in the cluster so envtest can drive
// rollouts to completion. envtest's apiserver has no StatefulSet
// controller — Pods never get created, .Status stays empty, and the
// SeiNode rollout plan (ReplacePod + ObserveImage) gates on
// status.observedGeneration / status.updatedReplicas which never advance.
//
// The faker patches every observed StatefulSet to look "fully rolled
// out at the current spec generation":
// StatusFaker drives StatefulSet .Status toward a "fully rolled out at
// current generation" state on a 50ms tick, since envtest's apiserver
// runs no StatefulSet controller of its own. Tests that need to observe
// transient (in-flight) rollout state can Pause() the faker, drive the
// scenario, then Resume() — see TestInPlaceRollout_Supersession.
type StatusFaker struct {
ctx context.Context
cancel context.CancelFunc
kc client.Client

mu sync.Mutex
paused bool
}

// StartStatefulSetStatusFaker spawns the faker goroutine and returns a
// handle. The fake patches every observed StatefulSet to look "fully
// rolled out at the current spec generation":
//
// - status.observedGeneration = .Generation
// - status.currentRevision = "stub-rev"
Expand All @@ -29,30 +38,57 @@ import (
// - status.readyReplicas = *spec.replicas
// - status.replicas = *spec.replicas
//
// This is intentionally indistinguishable from "rollout already done".
// The InPlace test does not assert on transient rollout state — it
// asserts on terminal state — so collapsing the rollout to
// instantaneous is fine.
// This is intentionally indistinguishable from "rollout already done."
// envtest tests assert on terminal state, so collapsing the rollout to
// instantaneous is fine for the common case.
//
// The returned cancel func stops the goroutine. Callers should defer it.
// Poll interval is short (50ms) because the InPlace test's poll loop
// runs at 200ms; the faker needs to win the race for every reconcile
// trigger.
func StartStatefulSetStatusFaker(ctx context.Context, kc client.Client) context.CancelFunc {
// Poll interval is 50ms because test poll loops run at 200ms; the faker
// needs to win the race for every reconcile trigger.
func StartStatefulSetStatusFaker(ctx context.Context, kc client.Client) *StatusFaker {
innerCtx, cancel := context.WithCancel(ctx)
go runFaker(innerCtx, kc)
return cancel
f := &StatusFaker{ctx: innerCtx, cancel: cancel, kc: kc}
go f.run()
return f
}

// Stop cancels the faker goroutine.
func (f *StatusFaker) Stop() { f.cancel() }

// Pause halts status writes. StatefulSets observed while paused stay in
// whatever .Status state the apiserver last persisted, which lets a test
// stall a rollout mid-flight without modifying production controller code.
func (f *StatusFaker) Pause() {
f.mu.Lock()
defer f.mu.Unlock()
f.paused = true
}

func runFaker(ctx context.Context, kc client.Client) {
// Resume re-enables status writes. The next tick will reconcile any
// StatefulSets that diverged from the desired faked state while paused.
func (f *StatusFaker) Resume() {
f.mu.Lock()
defer f.mu.Unlock()
f.paused = false
}

func (f *StatusFaker) isPaused() bool {
f.mu.Lock()
defer f.mu.Unlock()
return f.paused
}

func (f *StatusFaker) run() {
t := time.NewTicker(50 * time.Millisecond)
defer t.Stop()
for {
select {
case <-ctx.Done():
case <-f.ctx.Done():
return
case <-t.C:
if err := fakeStatuses(ctx, kc); err != nil && !errors.Is(err, context.Canceled) {
if f.isPaused() {
continue
}
if err := fakeStatuses(f.ctx, f.kc); err != nil && !errors.Is(err, context.Canceled) {
// Best-effort; the next tick will retry. Swallow rather
// than spam logs — envtest teardown races are noisy
// enough already.
Expand Down
9 changes: 5 additions & 4 deletions internal/controller/nodedeployment/envtest/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,10 @@ import (
// Package-level handles populated by TestMain and consumed by individual
// _test.go files via the helpers in helpers_test.go.
var (
testCli client.Client
testCtx context.Context
testCncl context.CancelFunc
testCli client.Client
testCtx context.Context
testCncl context.CancelFunc
testFaker *envtestpkg.StatusFaker
)

func TestMain(m *testing.M) {
Expand Down Expand Up @@ -136,7 +137,7 @@ func run(m *testing.M) (int, error) {
// controller's perspective. The test asserts on terminal state, so
// this is indistinguishable from a real (already-completed)
// rollout.
envtestpkg.StartStatefulSetStatusFaker(testCtx, kc)
testFaker = envtestpkg.StartStatefulSetStatusFaker(testCtx, kc)

// SeiNode reconciler — wired with the stub sidecar so init plans
// (genesis mode: ensure-data-pvc, apply-statefulset, apply-service,
Expand Down
Loading