From 8ffc4f20e6eeeabe6d8c965f66cbef05a4e70974 Mon Sep 17 00:00:00 2001 From: Thomas Kosiewski Date: Thu, 12 Feb 2026 13:41:51 +0000 Subject: [PATCH 1/2] =?UTF-8?q?=F0=9F=A4=96=20fix:=20back=20off=20provisio?= =?UTF-8?q?ner=20reconciliation=20on=20coderd=20429s?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - add explicit jittered exponential requeue for CoderProvisioner rate-limit responses - add optional X-Coder-Bypass-Ratelimit handling with automatic fallback when rejected - extend controller and bootstrap tests for backoff + bypass fallback behavior - include existing flake.nix tooling update present in workspace --- _Generated with `mux` • Model: `openai:gpt-5.3-codex` • Thinking: `xhigh` • Cost: `$1.46`_ --- flake.nix | 2 + internal/coderbootstrap/client.go | 102 +++++++++++---- internal/coderbootstrap/client_test.go | 67 ++++++++++ internal/coderbootstrap/provisionerkeys.go | 18 ++- .../coderbootstrap/provisionerkeys_test.go | 118 ++++++++++++++++++ .../controller/coderprovisioner_controller.go | 111 +++++++++++++++- .../coderprovisioner_controller_test.go | 58 +++++++++ 7 files changed, 441 insertions(+), 35 deletions(-) diff --git a/flake.nix b/flake.nix index c9c94752..e5123383 100644 --- a/flake.nix +++ b/flake.nix @@ -57,6 +57,8 @@ govulncheck docsPython + + yazi ]; }; } diff --git a/internal/coderbootstrap/client.go b/internal/coderbootstrap/client.go index b498c7b5..cde090c8 100644 --- a/internal/coderbootstrap/client.go +++ b/internal/coderbootstrap/client.go @@ -5,7 +5,6 @@ import ( "context" "errors" "net/http" - "net/url" "time" "golang.org/x/xerrors" @@ -17,6 +16,65 @@ const ( coderSDKRequestTimeout = 30 * time.Second ) +type bypassRateLimitContextKey struct{} + +func withRateLimitBypass(ctx context.Context) context.Context { + if ctx == nil { + return nil + } + return context.WithValue(ctx, bypassRateLimitContextKey{}, true) +} + +func shouldBypassRateLimit(ctx context.Context) bool { + if ctx == nil { + return false + } + enabled, _ := ctx.Value(bypassRateLimitContextKey{}).(bool) + return enabled +} + +func isRateLimitBypassRejected(err error) bool { + var apiErr *codersdk.Error + return errors.As(err, &apiErr) && apiErr.StatusCode() == http.StatusPreconditionRequired +} + +// IsRateLimitError reports whether err (or any wrapped cause) is a codersdk +// API error with HTTP 429 Too Many Requests. +func IsRateLimitError(err error) bool { + var apiErr *codersdk.Error + return errors.As(err, &apiErr) && apiErr.StatusCode() == http.StatusTooManyRequests +} + +func withOptionalRateLimitBypass[T any](ctx context.Context, operation func(context.Context) (T, error)) (T, error) { + result, err := operation(withRateLimitBypass(ctx)) + if err == nil { + return result, nil + } + if !isRateLimitBypassRejected(err) { + return result, err + } + return operation(ctx) +} + +type bypassRateLimitRoundTripper struct { + base http.RoundTripper +} + +func (rt bypassRateLimitRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) { + if req == nil { + return nil, xerrors.New("assertion failed: request must not be nil") + } + if !shouldBypassRateLimit(req.Context()) { + return rt.base.RoundTrip(req) + } + + cloned := req.Clone(req.Context()) + cloned.Header = req.Header.Clone() + cloned.Header.Set(codersdk.BypassRatelimitHeader, "true") + + return rt.base.RoundTrip(cloned) +} + // RegisterWorkspaceProxyRequest describes how to register a workspace proxy in Coder. type RegisterWorkspaceProxyRequest struct { CoderURL string @@ -60,24 +118,10 @@ func (c *SDKClient) EnsureWorkspaceProxy(ctx context.Context, req RegisterWorksp return RegisterWorkspaceProxyResponse{}, xerrors.New("proxy name is required") } - coderURL, err := url.Parse(req.CoderURL) + client, err := newAuthenticatedClient(req.CoderURL, req.SessionToken) if err != nil { - return RegisterWorkspaceProxyResponse{}, xerrors.Errorf("parse coder URL: %w", err) - } - - client := codersdk.New(coderURL) - client.SetSessionToken(req.SessionToken) - if client.HTTPClient == nil { - client.HTTPClient = &http.Client{} - } - defaultTransport, ok := http.DefaultTransport.(*http.Transport) - if !ok { - return RegisterWorkspaceProxyResponse{}, xerrors.New("assertion failed: http.DefaultTransport is not *http.Transport") + return RegisterWorkspaceProxyResponse{}, err } - // Use a dedicated transport to avoid sharing http.DefaultTransport's - // connection pool across parallel test servers. - client.HTTPClient.Transport = defaultTransport.Clone() - client.HTTPClient.Timeout = coderSDKRequestTimeout existing, err := client.WorkspaceProxyByName(ctx, req.ProxyName) if err != nil { @@ -86,10 +130,12 @@ func (c *SDKClient) EnsureWorkspaceProxy(ctx context.Context, req RegisterWorksp return RegisterWorkspaceProxyResponse{}, xerrors.Errorf("query workspace proxy %q: %w", req.ProxyName, err) } - created, createErr := client.CreateWorkspaceProxy(ctx, codersdk.CreateWorkspaceProxyRequest{ - Name: req.ProxyName, - DisplayName: req.DisplayName, - Icon: req.Icon, + created, createErr := withOptionalRateLimitBypass(ctx, func(requestCtx context.Context) (codersdk.UpdateWorkspaceProxyResponse, error) { + return client.CreateWorkspaceProxy(requestCtx, codersdk.CreateWorkspaceProxyRequest{ + Name: req.ProxyName, + DisplayName: req.DisplayName, + Icon: req.Icon, + }) }) if createErr != nil { return RegisterWorkspaceProxyResponse{}, xerrors.Errorf("create workspace proxy %q: %w", req.ProxyName, createErr) @@ -109,12 +155,14 @@ func (c *SDKClient) EnsureWorkspaceProxy(ctx context.Context, req RegisterWorksp icon = existing.IconURL } - updated, err := client.PatchWorkspaceProxy(ctx, codersdk.PatchWorkspaceProxy{ - ID: existing.ID, - Name: existing.Name, - DisplayName: displayName, - Icon: icon, - RegenerateToken: true, + updated, err := withOptionalRateLimitBypass(ctx, func(requestCtx context.Context) (codersdk.UpdateWorkspaceProxyResponse, error) { + return client.PatchWorkspaceProxy(requestCtx, codersdk.PatchWorkspaceProxy{ + ID: existing.ID, + Name: existing.Name, + DisplayName: displayName, + Icon: icon, + RegenerateToken: true, + }) }) if err != nil { return RegisterWorkspaceProxyResponse{}, xerrors.Errorf("update workspace proxy %q: %w", req.ProxyName, err) diff --git a/internal/coderbootstrap/client_test.go b/internal/coderbootstrap/client_test.go index 75ffe02d..1b97f7c2 100644 --- a/internal/coderbootstrap/client_test.go +++ b/internal/coderbootstrap/client_test.go @@ -8,6 +8,7 @@ import ( "testing" "time" + "github.com/coder/coder/v2/codersdk" "github.com/google/uuid" "github.com/stretchr/testify/require" @@ -200,6 +201,72 @@ func TestEnsureWorkspaceProxyUpdatesExistingProxy(t *testing.T) { require.Equal(t, "token-updated", result.ProxyToken) } +func TestEnsureWorkspaceProxyCreateFallsBackWhenRateLimitBypassRejected(t *testing.T) { + t.Parallel() + + const proxyName = "proxy-fallback" + now := time.Now().UTC().Format(time.RFC3339) + proxyID := uuid.NewString() + createCalls := 0 + bypassRejectedCalls := 0 + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch { + case r.Method == http.MethodGet && r.URL.Path == "/api/v2/workspaceproxies/"+proxyName: + w.WriteHeader(http.StatusNotFound) + _ = json.NewEncoder(w).Encode(map[string]string{"message": "not found"}) + return + case r.Method == http.MethodPost && r.URL.Path == "/api/v2/workspaceproxies": + createCalls++ + if r.Header.Get(codersdk.BypassRatelimitHeader) == "true" { + bypassRejectedCalls++ + w.WriteHeader(http.StatusPreconditionRequired) + _ = json.NewEncoder(w).Encode(map[string]string{"message": "bypass is not allowed"}) + return + } + + response := proxyResponse{} + response.Proxy.ID = proxyID + response.Proxy.Name = proxyName + response.Proxy.DisplayName = "Proxy Fallback" + response.Proxy.IconURL = "/emojis/1f5fa.png" + response.Proxy.Healthy = true + response.Proxy.PathAppURL = "https://proxy-fallback.example.com" + response.Proxy.WildcardHostname = "*.proxy-fallback.example.com" + response.Proxy.Status.Status = "unregistered" + response.Proxy.Status.CheckedAt = now + response.Proxy.CreatedAt = now + response.Proxy.UpdatedAt = now + response.Proxy.Version = "2.0.0" + response.ProxyToken = "token-created" + + w.WriteHeader(http.StatusCreated) + err := json.NewEncoder(w).Encode(response) + require.NoError(t, err) + return + default: + w.WriteHeader(http.StatusNotFound) + _ = json.NewEncoder(w).Encode(map[string]string{"message": "unexpected route"}) + return + } + })) + defer server.Close() + + client := coderbootstrap.NewSDKClient() + result, err := client.EnsureWorkspaceProxy(context.Background(), coderbootstrap.RegisterWorkspaceProxyRequest{ + CoderURL: server.URL, + SessionToken: "session-token", + ProxyName: proxyName, + DisplayName: "Proxy Fallback", + Icon: "/emojis/1f5fa.png", + }) + require.NoError(t, err) + require.Equal(t, proxyName, result.ProxyName) + require.Equal(t, "token-created", result.ProxyToken) + require.Equal(t, 2, createCalls) + require.Equal(t, 1, bypassRejectedCalls) +} + func TestEnsureWorkspaceProxyValidatesInputs(t *testing.T) { t.Parallel() diff --git a/internal/coderbootstrap/provisionerkeys.go b/internal/coderbootstrap/provisionerkeys.go index e1934ca9..fdc2f387 100644 --- a/internal/coderbootstrap/provisionerkeys.go +++ b/internal/coderbootstrap/provisionerkeys.go @@ -67,9 +67,11 @@ func (c *SDKClient) EnsureProvisionerKey(ctx context.Context, req EnsureProvisio }, nil } - created, err := client.CreateProvisionerKey(ctx, organization.ID, codersdk.CreateProvisionerKeyRequest{ - Name: req.KeyName, - Tags: req.Tags, + created, err := withOptionalRateLimitBypass(ctx, func(requestCtx context.Context) (codersdk.CreateProvisionerKeyResponse, error) { + return client.CreateProvisionerKey(requestCtx, organization.ID, codersdk.CreateProvisionerKeyRequest{ + Name: req.KeyName, + Tags: req.Tags, + }) }) if err != nil { return EnsureProvisionerKeyResponse{}, xerrors.Errorf("create provisioner key %q: %w", req.KeyName, err) @@ -78,7 +80,9 @@ func (c *SDKClient) EnsureProvisionerKey(ctx context.Context, req EnsureProvisio return EnsureProvisionerKeyResponse{}, xerrors.Errorf("assertion failed: created provisioner key %q returned an empty key", req.KeyName) } - createdMetadata, err := findOrganizationProvisionerKey(ctx, client, organization.ID, req.KeyName) + createdMetadata, err := withOptionalRateLimitBypass(ctx, func(requestCtx context.Context) (*codersdk.ProvisionerKey, error) { + return findOrganizationProvisionerKey(requestCtx, client, organization.ID, req.KeyName) + }) if err != nil { return EnsureProvisionerKeyResponse{}, xerrors.Errorf("query created provisioner key %q: %w", req.KeyName, err) } @@ -119,7 +123,9 @@ func (c *SDKClient) DeleteProvisionerKey(ctx context.Context, coderURL, sessionT return err } - err = client.DeleteProvisionerKey(ctx, organization.ID, keyName) + _, err = withOptionalRateLimitBypass(ctx, func(requestCtx context.Context) (struct{}, error) { + return struct{}{}, client.DeleteProvisionerKey(requestCtx, organization.ID, keyName) + }) if err == nil { return nil } @@ -166,7 +172,7 @@ func newAuthenticatedClient(coderURL, sessionToken string) (*codersdk.Client, er } // Use a dedicated transport to avoid sharing http.DefaultTransport's // connection pool across parallel test servers. - client.HTTPClient.Transport = defaultTransport.Clone() + client.HTTPClient.Transport = bypassRateLimitRoundTripper{base: defaultTransport.Clone()} client.HTTPClient.Timeout = coderSDKRequestTimeout return client, nil diff --git a/internal/coderbootstrap/provisionerkeys_test.go b/internal/coderbootstrap/provisionerkeys_test.go index a1032aaf..05aa6427 100644 --- a/internal/coderbootstrap/provisionerkeys_test.go +++ b/internal/coderbootstrap/provisionerkeys_test.go @@ -8,6 +8,7 @@ import ( "testing" "time" + "github.com/coder/coder/v2/codersdk" "github.com/google/uuid" "github.com/stretchr/testify/require" @@ -150,6 +151,123 @@ func TestEnsureProvisionerKey_Exists(t *testing.T) { require.Empty(t, resp.Key) } +func TestEnsureProvisionerKey_FallsBackWhenRateLimitBypassRejected(t *testing.T) { + t.Parallel() + + const keyName = "provisioner-key" + orgID := uuid.New() + keyID := uuid.New() + now := time.Now().UTC().Format(time.RFC3339) + + createCalls := 0 + listCalls := 0 + bypassRejectedCalls := 0 + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Header.Get(codersdk.BypassRatelimitHeader) == "true" { + bypassRejectedCalls++ + writeJSONResponse(t, w, http.StatusPreconditionRequired, map[string]any{ + "message": "bypass is not allowed", + }) + return + } + + switch { + case r.Method == http.MethodGet && r.URL.Path == "/api/v2/organizations/default": + writeJSONResponse(t, w, http.StatusOK, map[string]any{ + "id": orgID.String(), + "name": "default", + "created_at": now, + "updated_at": now, + }) + return + case r.Method == http.MethodGet && r.URL.Path == "/api/v2/organizations/"+orgID.String()+"/provisionerkeys": + listCalls++ + if createCalls == 0 { + writeJSONResponse(t, w, http.StatusOK, []any{}) + return + } + writeJSONResponse(t, w, http.StatusOK, []map[string]any{{ + "id": keyID.String(), + "name": keyName, + "organization": orgID.String(), + "created_at": now, + "tags": map[string]string{"cluster": "dev"}, + }}) + return + case r.Method == http.MethodPost && r.URL.Path == "/api/v2/organizations/"+orgID.String()+"/provisionerkeys": + createCalls++ + writeJSONResponse(t, w, http.StatusCreated, map[string]any{ + "key": "plaintext-provisioner-key", + }) + return + default: + writeJSONResponse(t, w, http.StatusNotFound, map[string]any{"message": "unexpected route"}) + return + } + })) + defer server.Close() + + client := coderbootstrap.NewSDKClient() + resp, err := client.EnsureProvisionerKey(context.Background(), coderbootstrap.EnsureProvisionerKeyRequest{ + CoderURL: server.URL, + SessionToken: "session-token", + KeyName: keyName, + Tags: map[string]string{"cluster": "dev"}, + }) + require.NoError(t, err) + require.Equal(t, 1, createCalls) + require.Equal(t, 2, listCalls) + require.Equal(t, 2, bypassRejectedCalls) + require.Equal(t, orgID, resp.OrganizationID) + require.Equal(t, keyID, resp.KeyID) + require.Equal(t, keyName, resp.KeyName) + require.Equal(t, "plaintext-provisioner-key", resp.Key) +} + +func TestDeleteProvisionerKey_FallsBackWhenRateLimitBypassRejected(t *testing.T) { + t.Parallel() + + const keyName = "delete-me" + orgID := uuid.New() + now := time.Now().UTC().Format(time.RFC3339) + deleteCalls := 0 + bypassRejectedCalls := 0 + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Header.Get(codersdk.BypassRatelimitHeader) == "true" { + bypassRejectedCalls++ + writeJSONResponse(t, w, http.StatusPreconditionRequired, map[string]any{"message": "bypass is not allowed"}) + return + } + + switch { + case r.Method == http.MethodGet && r.URL.Path == "/api/v2/organizations/default": + writeJSONResponse(t, w, http.StatusOK, map[string]any{ + "id": orgID.String(), + "name": "default", + "created_at": now, + "updated_at": now, + }) + return + case r.Method == http.MethodDelete && r.URL.Path == "/api/v2/organizations/"+orgID.String()+"/provisionerkeys/"+keyName: + deleteCalls++ + w.WriteHeader(http.StatusNoContent) + return + default: + writeJSONResponse(t, w, http.StatusNotFound, map[string]any{"message": "unexpected route"}) + return + } + })) + defer server.Close() + + client := coderbootstrap.NewSDKClient() + err := client.DeleteProvisionerKey(context.Background(), server.URL, "session-token", "", keyName) + require.NoError(t, err) + require.Equal(t, 1, deleteCalls) + require.Equal(t, 1, bypassRejectedCalls) +} + func TestEnsureProvisionerKey_ValidationErrors(t *testing.T) { t.Parallel() diff --git a/internal/controller/coderprovisioner_controller.go b/internal/controller/coderprovisioner_controller.go index 6bdd184c..2dff18d2 100644 --- a/internal/controller/coderprovisioner_controller.go +++ b/internal/controller/coderprovisioner_controller.go @@ -3,10 +3,14 @@ package controller import ( "context" + "crypto/rand" + "encoding/binary" "fmt" "hash/fnv" "maps" "slices" + "sync" + "time" "github.com/google/uuid" appsv1 "k8s.io/api/apps/v1" @@ -33,8 +37,15 @@ const ( provisionerNamePrefix = "provisioner-" provisionerServiceAccountSuffix = "-provisioner" provisionerKeyChecksumAnnotation = "checksum/provisioner-key" + + provisionerRateLimitBackoffBase = 2 * time.Second + provisionerRateLimitBackoffCap = 2 * time.Minute + provisionerRateLimitBackoffFloor = 1 * time.Second + provisionerRateLimitJitterRatio = 0.2 ) +var provisionerRateLimitBackoffAttempts sync.Map // map[string]int + // CoderProvisionerReconciler reconciles a CoderProvisioner object. type CoderProvisionerReconciler struct { client.Client @@ -50,7 +61,7 @@ type CoderProvisionerReconciler struct { // +kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=roles;rolebindings,verbs=get;list;watch;create;update;patch;delete // Reconcile converges the desired CoderProvisioner spec into Deployment, RBAC, and Secret resources. -func (r *CoderProvisionerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { +func (r *CoderProvisionerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (result ctrl.Result, err error) { if r.Client == nil { return ctrl.Result{}, fmt.Errorf("assertion failed: reconciler client must not be nil") } @@ -62,7 +73,42 @@ func (r *CoderProvisionerReconciler) Reconcile(ctx context.Context, req ctrl.Req } provisioner := &coderv1alpha1.CoderProvisioner{} - if err := r.Get(ctx, req.NamespacedName, provisioner); err != nil { + defer func() { + switch { + case err == nil: + if result.RequeueAfter == 0 { + resetProvisionerRateLimitBackoff(req.NamespacedName) + } + return + case !coderbootstrap.IsRateLimitError(err): + resetProvisionerRateLimitBackoff(req.NamespacedName) + return + } + + backoff := nextProvisionerRateLimitBackoff(req.NamespacedName) + message := fmt.Sprintf("Coder API rate limited while reconciling provisioner key; retrying in %s", backoff.Round(time.Second)) + setCondition( + provisioner, + coderv1alpha1.CoderProvisionerConditionProvisionerKeyReady, + metav1.ConditionFalse, + "RateLimited", + message, + ) + if statusErr := r.Status().Update(ctx, provisioner); statusErr != nil && !apierrors.IsNotFound(statusErr) { + ctrl.LoggerFrom(ctx).Error(statusErr, "failed to update coderprovisioner status after rate limit", "name", req.Name, "namespace", req.Namespace) + } + + ctrl.LoggerFrom(ctx).Info( + "coder API rate limited during provisioner reconciliation; requeueing with backoff", + "name", req.Name, + "namespace", req.Namespace, + "requeueAfter", backoff, + ) + result = ctrl.Result{RequeueAfter: backoff} + err = nil + }() + + if err = r.Get(ctx, req.NamespacedName, provisioner); err != nil { if apierrors.IsNotFound(err) { return ctrl.Result{}, nil } @@ -1083,6 +1129,67 @@ func hashProvisionerSecret(secretValue []byte) string { return fmt.Sprintf("%08x", hasher.Sum32()) } +func nextProvisionerRateLimitBackoff(namespacedName types.NamespacedName) time.Duration { + if namespacedName.Namespace == "" || namespacedName.Name == "" { + return provisionerRateLimitBackoffFloor + } + + mapKey := namespacedName.String() + attempt := 1 + if previous, ok := provisionerRateLimitBackoffAttempts.Load(mapKey); ok { + previousAttempt, castOK := previous.(int) + if !castOK || previousAttempt < 0 { + previousAttempt = 0 + } + attempt = previousAttempt + 1 + } + provisionerRateLimitBackoffAttempts.Store(mapKey, attempt) + + backoff := provisionerRateLimitBackoffBase + for i := 1; i < attempt; i++ { + if backoff >= provisionerRateLimitBackoffCap/2 { + backoff = provisionerRateLimitBackoffCap + break + } + backoff *= 2 + } + if backoff > provisionerRateLimitBackoffCap { + backoff = provisionerRateLimitBackoffCap + } + + jittered := time.Duration(float64(backoff) * provisionerRateLimitJitterMultiplier()) + if jittered < provisionerRateLimitBackoffFloor { + jittered = provisionerRateLimitBackoffFloor + } + if jittered > provisionerRateLimitBackoffCap { + jittered = provisionerRateLimitBackoffCap + } + + return jittered +} + +func resetProvisionerRateLimitBackoff(namespacedName types.NamespacedName) { + if namespacedName.Namespace == "" || namespacedName.Name == "" { + return + } + provisionerRateLimitBackoffAttempts.Delete(namespacedName.String()) +} + +func provisionerRateLimitJitterMultiplier() float64 { + if provisionerRateLimitJitterRatio <= 0 { + return 1 + } + + var randomBytes [8]byte + if _, err := rand.Read(randomBytes[:]); err != nil { + return 1 + } + + randomFraction := float64(binary.BigEndian.Uint64(randomBytes[:])) / float64(^uint64(0)) + jitter := (randomFraction*2 - 1) * provisionerRateLimitJitterRatio + return 1 + jitter +} + func setCondition( provisioner *coderv1alpha1.CoderProvisioner, conditionType string, diff --git a/internal/controller/coderprovisioner_controller_test.go b/internal/controller/coderprovisioner_controller_test.go index 8ed3f2f2..de31bd8c 100644 --- a/internal/controller/coderprovisioner_controller_test.go +++ b/internal/controller/coderprovisioner_controller_test.go @@ -4,10 +4,12 @@ import ( "context" "fmt" "hash/fnv" + "net/http" "strings" "testing" "time" + "github.com/coder/coder/v2/codersdk" "github.com/google/uuid" "github.com/stretchr/testify/require" appsv1 "k8s.io/api/apps/v1" @@ -939,6 +941,62 @@ func TestCoderProvisionerReconciler_ConditionsOnFailure(t *testing.T) { }) } +func TestCoderProvisionerReconciler_RateLimitBackoff(t *testing.T) { + t.Parallel() + + ctx := context.Background() + namespace := createTestNamespace(ctx, t, "coderprov-rate-limit") + controlPlane := createTestControlPlane(ctx, t, namespace, "controlplane-rate-limit", "https://coder.example.com") + bootstrapSecret := createBootstrapSecret(ctx, t, namespace, "bootstrap-creds", coderv1alpha1.DefaultTokenSecretKey, "session-token") + + provisioner := &coderv1alpha1.CoderProvisioner{ + ObjectMeta: metav1.ObjectMeta{Name: "provisioner-rate-limit", Namespace: namespace}, + Spec: coderv1alpha1.CoderProvisionerSpec{ + ControlPlaneRef: corev1.LocalObjectReference{Name: controlPlane.Name}, + Bootstrap: coderv1alpha1.CoderProvisionerBootstrapSpec{ + CredentialsSecretRef: coderv1alpha1.SecretKeySelector{Name: bootstrapSecret.Name, Key: coderv1alpha1.DefaultTokenSecretKey}, + }, + Key: coderv1alpha1.CoderProvisionerKeySpec{Name: "rate-limit-key"}, + }, + } + require.NoError(t, k8sClient.Create(ctx, provisioner)) + t.Cleanup(func() { + _ = k8sClient.Delete(context.Background(), provisioner) + }) + + bootstrapClient := &fakeBootstrapClient{ + provisionerKeyErr: codersdk.NewTestError( + http.StatusTooManyRequests, + http.MethodPost, + "https://coder.example.com/api/v2/organizations/default/provisionerkeys", + ), + } + reconciler := &controller.CoderProvisionerReconciler{Client: k8sClient, Scheme: scheme, BootstrapClient: bootstrapClient} + request := types.NamespacedName{Name: provisioner.Name, Namespace: provisioner.Namespace} + + // The first reconcile only adds the cleanup finalizer. + reconcileProvisioner(ctx, t, reconciler, request) + + firstResult, firstErr := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: request}) + require.NoError(t, firstErr) + require.GreaterOrEqual(t, firstResult.RequeueAfter, time.Second) + require.LessOrEqual(t, firstResult.RequeueAfter, 3*time.Second) + + secondResult, secondErr := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: request}) + require.NoError(t, secondErr) + require.Greater(t, secondResult.RequeueAfter, firstResult.RequeueAfter) + require.GreaterOrEqual(t, secondResult.RequeueAfter, 3*time.Second) + require.LessOrEqual(t, secondResult.RequeueAfter, 6*time.Second) + require.Equal(t, 2, bootstrapClient.provisionerKeyCalls) + + reconciled := &coderv1alpha1.CoderProvisioner{} + require.NoError(t, k8sClient.Get(ctx, request, reconciled)) + requireCondition(t, reconciled.Status.Conditions, coderv1alpha1.CoderProvisionerConditionProvisionerKeyReady, metav1.ConditionFalse) + rateLimitedCondition := findCondition(t, reconciled.Status.Conditions, coderv1alpha1.CoderProvisionerConditionProvisionerKeyReady) + require.Equal(t, "RateLimited", rateLimitedCondition.Reason) + require.Contains(t, rateLimitedCondition.Message, "retrying in") +} + func TestCoderProvisionerReconciler_LongNameTruncation(t *testing.T) { t.Parallel() From ba3f91e95c6dc3ce124cc393ae3e7be4ca8a40de Mon Sep 17 00:00:00 2001 From: Thomas Kosiewski Date: Thu, 12 Feb 2026 13:55:19 +0000 Subject: [PATCH 2/2] =?UTF-8?q?=F0=9F=A4=96=20fix:=20avoid=20status=20hot-?= =?UTF-8?q?loop=20while=20handling=20coderd=20429s?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - avoid unconditional status writes in 429 defer path - only persist condition changes when status actually changed - keep a stable RateLimited condition message so self-updates do not trigger rapid reconciles - update rate-limit test assertion accordingly --- _Generated with `mux` • Model: `openai:gpt-5.3-codex` • Thinking: `xhigh` • Cost: `$1.46`_ --- internal/controller/coderprovisioner_controller.go | 10 ++++++---- .../controller/coderprovisioner_controller_test.go | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/internal/controller/coderprovisioner_controller.go b/internal/controller/coderprovisioner_controller.go index 2dff18d2..65bcc8cd 100644 --- a/internal/controller/coderprovisioner_controller.go +++ b/internal/controller/coderprovisioner_controller.go @@ -86,16 +86,18 @@ func (r *CoderProvisionerReconciler) Reconcile(ctx context.Context, req ctrl.Req } backoff := nextProvisionerRateLimitBackoff(req.NamespacedName) - message := fmt.Sprintf("Coder API rate limited while reconciling provisioner key; retrying in %s", backoff.Round(time.Second)) + statusSnapshot := provisioner.Status.DeepCopy() setCondition( provisioner, coderv1alpha1.CoderProvisionerConditionProvisionerKeyReady, metav1.ConditionFalse, "RateLimited", - message, + "Coder API rate limited while reconciling provisioner key", ) - if statusErr := r.Status().Update(ctx, provisioner); statusErr != nil && !apierrors.IsNotFound(statusErr) { - ctrl.LoggerFrom(ctx).Error(statusErr, "failed to update coderprovisioner status after rate limit", "name", req.Name, "namespace", req.Namespace) + if statusSnapshot != nil && !equality.Semantic.DeepEqual(*statusSnapshot, provisioner.Status) { + if statusErr := r.Status().Update(ctx, provisioner); statusErr != nil && !apierrors.IsNotFound(statusErr) { + ctrl.LoggerFrom(ctx).Error(statusErr, "failed to update coderprovisioner status after rate limit", "name", req.Name, "namespace", req.Namespace) + } } ctrl.LoggerFrom(ctx).Info( diff --git a/internal/controller/coderprovisioner_controller_test.go b/internal/controller/coderprovisioner_controller_test.go index de31bd8c..bdbb5782 100644 --- a/internal/controller/coderprovisioner_controller_test.go +++ b/internal/controller/coderprovisioner_controller_test.go @@ -994,7 +994,7 @@ func TestCoderProvisionerReconciler_RateLimitBackoff(t *testing.T) { requireCondition(t, reconciled.Status.Conditions, coderv1alpha1.CoderProvisionerConditionProvisionerKeyReady, metav1.ConditionFalse) rateLimitedCondition := findCondition(t, reconciled.Status.Conditions, coderv1alpha1.CoderProvisionerConditionProvisionerKeyReady) require.Equal(t, "RateLimited", rateLimitedCondition.Reason) - require.Contains(t, rateLimitedCondition.Message, "retrying in") + require.Equal(t, "Coder API rate limited while reconciling provisioner key", rateLimitedCondition.Message) } func TestCoderProvisionerReconciler_LongNameTruncation(t *testing.T) {