From 2bbf9594a7a1aaf0aecda87f55b056fb90d81743 Mon Sep 17 00:00:00 2001 From: Thomas Kosiewski Date: Fri, 13 Feb 2026 10:06:14 +0000 Subject: [PATCH] =?UTF-8?q?=F0=9F=A4=96=20fix:=20stabilize=20CoderProvisio?= =?UTF-8?q?ner=20reconcile=20and=20startup?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes CoderProvisioner bring-up failures and reconcile churn by: - granting manager RBAC verbs required to create delegated provisioner Roles - preventing coderd key metadata-backfill retries from repeatedly rotating keys - launching provisioners with explicit command/args compatible with coder images - avoiding CODER_ORGANIZATION injection for key-auth provisioners - updating tests and generated CRD/docs to match behavior --- _Generated with `mux` • Model: `openai:gpt-5.3-codex` • Thinking: `xhigh` • Cost: `$0.00`_ --- api/v1alpha1/coderprovisioner_types.go | 2 +- .../bases/coder.com_coderprovisioners.yaml | 2 +- config/rbac/role.yaml | 15 ++++---- docs/reference/api/coderprovisioner.md | 2 +- .../controller/coderprovisioner_controller.go | 36 +++++++++++++++---- .../coderprovisioner_controller_test.go | 6 ++-- 6 files changed, 44 insertions(+), 19 deletions(-) diff --git a/api/v1alpha1/coderprovisioner_types.go b/api/v1alpha1/coderprovisioner_types.go index 3bb27001..f80593f4 100644 --- a/api/v1alpha1/coderprovisioner_types.go +++ b/api/v1alpha1/coderprovisioner_types.go @@ -69,7 +69,7 @@ type CoderProvisionerSpec struct { Tags map[string]string `json:"tags,omitempty"` // Image is the container image. Defaults to the control plane image. Image string `json:"image,omitempty"` - // ExtraArgs are appended after "provisionerd start". + // ExtraArgs are appended after "provisioner start". ExtraArgs []string `json:"extraArgs,omitempty"` // ExtraEnv are injected into the provisioner container. ExtraEnv []corev1.EnvVar `json:"extraEnv,omitempty"` diff --git a/config/crd/bases/coder.com_coderprovisioners.yaml b/config/crd/bases/coder.com_coderprovisioners.yaml index 7f857bbe..f8f62166 100644 --- a/config/crd/bases/coder.com_coderprovisioners.yaml +++ b/config/crd/bases/coder.com_coderprovisioners.yaml @@ -87,7 +87,7 @@ spec: type: object x-kubernetes-map-type: atomic extraArgs: - description: ExtraArgs are appended after "provisionerd start". + description: ExtraArgs are appended after "provisioner start". items: type: string type: array diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 9dd3a4dc..94feb70d 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -18,7 +18,6 @@ rules: - "" resources: - namespaces - - pods verbs: - get - list @@ -26,12 +25,8 @@ rules: - apiGroups: - "" resources: - - pods/log - verbs: - - get -- apiGroups: - - "" - resources: + - persistentvolumeclaims + - pods - secrets - serviceaccounts - services @@ -43,6 +38,12 @@ rules: - patch - update - watch +- apiGroups: + - "" + resources: + - pods/log + verbs: + - get - apiGroups: - aggregation.coder.com resources: diff --git a/docs/reference/api/coderprovisioner.md b/docs/reference/api/coderprovisioner.md index 085a41f8..dfd81167 100644 --- a/docs/reference/api/coderprovisioner.md +++ b/docs/reference/api/coderprovisioner.md @@ -20,7 +20,7 @@ | `replicas` | integer | Replicas is the desired number of provisioner pods. | | `tags` | object (keys:string, values:string) | Tags are attached to the provisioner key for job routing. | | `image` | string | Image is the container image. Defaults to the control plane image. | -| `extraArgs` | string array | ExtraArgs are appended after "provisionerd start". | +| `extraArgs` | string array | ExtraArgs are appended after "provisioner start". | | `extraEnv` | [EnvVar](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#envvar-v1-core) array | ExtraEnv are injected into the provisioner container. | | `resources` | [ResourceRequirements](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#resourcerequirements-v1-core) | Resources for the provisioner container. | | `imagePullSecrets` | [LocalObjectReference](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#localobjectreference-v1-core) array | ImagePullSecrets are used by the pod to pull private images. | diff --git a/internal/controller/coderprovisioner_controller.go b/internal/controller/coderprovisioner_controller.go index 96bbe8e8..45f2062d 100644 --- a/internal/controller/coderprovisioner_controller.go +++ b/internal/controller/coderprovisioner_controller.go @@ -68,6 +68,7 @@ type CoderProvisionerReconciler struct { // +kubebuilder:rbac:groups=coder.com,resources=coderprovisioners/status,verbs=get;update;patch // +kubebuilder:rbac:groups=coder.com,resources=coderprovisioners/finalizers,verbs=update // +kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups="",resources=pods;persistentvolumeclaims,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups="",resources=secrets;serviceaccounts,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=roles;rolebindings,verbs=get;list;watch;create;update;patch;delete @@ -441,7 +442,7 @@ func (r *CoderProvisionerReconciler) Reconcile(ctx context.Context, req ctrl.Req "ProvisionerKeyReady", "Provisioner key is available in coderd", ) - } else if status.OrganizationName == "" || status.TagsHash == "" || status.ControlPlaneRefName == "" { + } else if status.OrganizationName == "" || status.TagsHash == "" || status.ControlPlaneRefName == "" || status.ControlPlaneURL == "" { // Secret is usable and no drift detected, but status metadata is empty // (e.g. upgrade from older version). Call EnsureProvisionerKey to populate // IDs and key name. If coderd reports an existing key (no plaintext key @@ -550,6 +551,30 @@ func (r *CoderProvisionerReconciler) Reconcile(ctx context.Context, req ctrl.Req "Provisioner key secret is available", ) + // Persist key-related status metadata immediately so retries after later + // reconciliation failures (for example RBAC/deployment errors) do not + // repeatedly re-run metadata backfill/rotation against coderd. + keyStatusSnapshot := provisioner.Status.DeepCopy() + if keyStatusSnapshot == nil { + return ctrl.Result{}, fmt.Errorf("assertion failed: key status snapshot must not be nil") + } + provisioner.Status.OrganizationID = organizationID + provisioner.Status.OrganizationName = appliedOrgName + provisioner.Status.ProvisionerKeyID = provisionerKeyID + provisioner.Status.ProvisionerKeyName = provisionerKeyName + provisioner.Status.TagsHash = appliedTagsHash + provisioner.Status.ControlPlaneRefName = appliedControlPlaneRefName + provisioner.Status.ControlPlaneURL = appliedControlPlaneURL + provisioner.Status.SecretRef = &coderv1alpha1.SecretKeySelector{Name: keySecretName, Key: keySecretKey} + if !equality.Semantic.DeepEqual(*keyStatusSnapshot, provisioner.Status) { + if err := r.Status().Update(ctx, provisioner); err != nil { + return ctrl.Result{}, fmt.Errorf("update coderprovisioner key status: %w", err) + } + statusSnapshot = provisioner.Status.DeepCopy() + if statusSnapshot == nil { + return ctrl.Result{}, fmt.Errorf("assertion failed: status snapshot must not be nil after key status update") + } + } serviceAccountName := provisionerServiceAccountName(provisioner.Name) if _, err := r.reconcileServiceAccount(ctx, provisioner, serviceAccountName); err != nil { return ctrl.Result{}, err @@ -573,7 +598,7 @@ func (r *CoderProvisionerReconciler) Reconcile(ctx context.Context, req ctrl.Req } secretRef := &coderv1alpha1.SecretKeySelector{Name: keySecretName, Key: keySecretKey} - deployment, err := r.reconcileDeployment(ctx, provisioner, image, controlPlane.Status.URL, organizationName, secretRef, serviceAccountName, secretChecksum) + deployment, err := r.reconcileDeployment(ctx, provisioner, image, controlPlane.Status.URL, secretRef, serviceAccountName, secretChecksum) if err != nil { return ctrl.Result{}, err } @@ -946,7 +971,6 @@ func (r *CoderProvisionerReconciler) reconcileDeployment( provisioner *coderv1alpha1.CoderProvisioner, image string, coderURL string, - organizationName string, secretRef *coderv1alpha1.SecretKeySelector, serviceAccountName string, secretChecksum string, @@ -971,7 +995,7 @@ func (r *CoderProvisionerReconciler) reconcileDeployment( terminationGracePeriodSeconds = *provisioner.Spec.TerminationGracePeriodSeconds } - args := []string{"provisionerd", "start"} + args := []string{"provisioner", "start"} args = append(args, provisioner.Spec.ExtraArgs...) env := []corev1.EnvVar{ @@ -984,9 +1008,6 @@ func (r *CoderProvisionerReconciler) reconcileDeployment( }}, }, } - if organizationName != "" && organizationName != defaultProvisionerOrganizationName { - env = append(env, corev1.EnvVar{Name: "CODER_ORGANIZATION", Value: organizationName}) - } env = append(env, provisioner.Spec.ExtraEnv...) deployment.Spec.Replicas = &replicas @@ -1005,6 +1026,7 @@ func (r *CoderProvisionerReconciler) reconcileDeployment( Containers: []corev1.Container{{ Name: "provisioner", Image: image, + Command: []string{"coder"}, Args: args, Env: env, Resources: provisioner.Spec.Resources, diff --git a/internal/controller/coderprovisioner_controller_test.go b/internal/controller/coderprovisioner_controller_test.go index a271d3ed..e5f0f886 100644 --- a/internal/controller/coderprovisioner_controller_test.go +++ b/internal/controller/coderprovisioner_controller_test.go @@ -518,14 +518,16 @@ func TestCoderProvisionerReconciler_BasicCreate(t *testing.T) { container := deployment.Spec.Template.Spec.Containers[0] require.Equal(t, "provisioner", container.Name) require.Equal(t, "provisioner-image:test", container.Image) - require.Equal(t, []string{"provisionerd", "start", "--test-mode=true"}, container.Args) + require.Equal(t, []string{"coder"}, container.Command) + require.Equal(t, []string{"provisioner", "start", "--test-mode=true"}, container.Args) envByName := make(map[string]corev1.EnvVar, len(container.Env)) for _, envVar := range container.Env { envByName[envVar.Name] = envVar } require.Equal(t, "https://coder.example.com", envByName["CODER_URL"].Value) - require.Equal(t, "acme", envByName["CODER_ORGANIZATION"].Value) + _, hasOrganizationEnv := envByName["CODER_ORGANIZATION"] + require.False(t, hasOrganizationEnv) require.Equal(t, "extra-value", envByName["EXTRA_ENV"].Value) keyEnv, ok := envByName["CODER_PROVISIONER_DAEMON_KEY"] require.True(t, ok)