From 21ae9b7fcff6ee0b488331833ae87c6bbc70ed6b Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Thu, 28 May 2026 20:59:39 +0200 Subject: [PATCH 01/13] direct: replace WaitAfterCreate/WaitAfterUpdate with Engine callback for intermediate state saves Resources with multi-step deployments (apps, clusters, model serving endpoints, etc.) previously split their logic between DoCreate/DoUpdate and WaitAfterCreate/WaitAfterUpdate at an arbitrary point, making it hard to persist state incrementally. Replace the WaitAfterXxx methods with an *Engine parameter on DoCreate and DoUpdate. Engine.SetID + Engine.SaveState can be called immediately after the initial API call succeeds, before any long-running wait, so the resource is tracked in state even if deployment is interrupted mid-wait (preventing orphaned resources). Simple resources pass _ *Engine and are unaffected. Complex resources (apps, clusters, database instances, model serving endpoints, vector search endpoints and indexes) now inline their wait logic and call engine.SetID/SaveState at the appropriate point. Co-authored-by: Denis Bilenko --- bundle/direct/apply.go | 51 +++------- bundle/direct/dresources/README.md | 4 +- bundle/direct/dresources/adapter.go | 94 +++---------------- bundle/direct/dresources/alert.go | 4 +- bundle/direct/dresources/all_test.go | 19 +--- bundle/direct/dresources/app.go | 33 ++++--- bundle/direct/dresources/app_test.go | 36 +++++-- bundle/direct/dresources/catalog.go | 4 +- bundle/direct/dresources/cluster.go | 76 ++++++++------- bundle/direct/dresources/dashboard.go | 4 +- bundle/direct/dresources/database_catalog.go | 4 +- bundle/direct/dresources/database_instance.go | 36 +++---- bundle/direct/dresources/engine.go | 47 ++++++++++ bundle/direct/dresources/experiment.go | 4 +- bundle/direct/dresources/external_location.go | 4 +- bundle/direct/dresources/grants.go | 6 +- bundle/direct/dresources/job.go | 4 +- bundle/direct/dresources/model.go | 4 +- .../dresources/model_serving_endpoint.go | 25 ++--- bundle/direct/dresources/permissions.go | 6 +- bundle/direct/dresources/pipeline.go | 4 +- bundle/direct/dresources/postgres_branch.go | 4 +- bundle/direct/dresources/postgres_catalog.go | 2 +- bundle/direct/dresources/postgres_endpoint.go | 4 +- bundle/direct/dresources/postgres_project.go | 4 +- .../dresources/postgres_synced_table.go | 2 +- bundle/direct/dresources/quality_monitor.go | 4 +- bundle/direct/dresources/registered_model.go | 4 +- bundle/direct/dresources/schema.go | 4 +- bundle/direct/dresources/schema_test.go | 6 +- bundle/direct/dresources/secret_scope.go | 2 +- bundle/direct/dresources/secret_scope_acls.go | 4 +- bundle/direct/dresources/sql_warehouse.go | 4 +- .../dresources/synced_database_table.go | 4 +- .../dresources/vector_search_endpoint.go | 20 ++-- .../direct/dresources/vector_search_index.go | 44 ++++----- bundle/direct/dresources/volume.go | 4 +- 37 files changed, 274 insertions(+), 311 deletions(-) create mode 100644 bundle/direct/dresources/engine.go diff --git a/bundle/direct/apply.go b/bundle/direct/apply.go index a4a61f727f2..b9db11fa6fa 100644 --- a/bundle/direct/apply.go +++ b/bundle/direct/apply.go @@ -51,6 +51,10 @@ func (d *DeploymentUnit) Deploy(ctx context.Context, db *dstate.DeploymentState, } func (d *DeploymentUnit) Create(ctx context.Context, db *dstate.DeploymentState, newState any) error { + engine := dresources.NewEngine(d.Adapter.StateType(), func(id string, x any) error { + return db.SaveState(d.ResourceKey, id, x, d.DependsOn) + }) + var newID string var remoteState any _, err := retryWith(ctx, func(err error) bool { @@ -59,7 +63,7 @@ func (d *DeploymentUnit) Create(ctx context.Context, db *dstate.DeploymentState, return ok && isTransient(ctx, err) }, func() (struct{}, error) { var e error - newID, remoteState, e = d.Adapter.DoCreate(ctx, newState) + newID, remoteState, e = d.Adapter.DoCreate(ctx, engine, newState) return struct{}{}, e }) err = dresources.UnwrapRetrySafe(err) @@ -80,18 +84,6 @@ func (d *DeploymentUnit) Create(ctx context.Context, db *dstate.DeploymentState, return fmt.Errorf("saving state after creating id=%s: %w", newID, err) } - waitRemoteState, err := retryOnTransient(ctx, func() (any, error) { - return d.Adapter.WaitAfterCreate(ctx, newID, newState) - }) - if err != nil { - return fmt.Errorf("waiting after creating id=%s: %w", newID, err) - } - - err = d.SetRemoteState(waitRemoteState) - if err != nil { - return err - } - return nil } @@ -134,8 +126,13 @@ func (d *DeploymentUnit) Update(ctx context.Context, db *dstate.DeploymentState, return fmt.Errorf("internal error: DoUpdate not implemented for resource %s", d.ResourceKey) } + engine := dresources.NewEngine(d.Adapter.StateType(), func(_ string, x any) error { + return db.SaveState(d.ResourceKey, id, x, d.DependsOn) + }) + engine.SetID(id) + remoteState, err := retryOnTransient(ctx, func() (any, error) { - return d.Adapter.DoUpdate(ctx, id, newState, planEntry) + return d.Adapter.DoUpdate(ctx, engine, id, newState, planEntry) }) if err != nil { return fmt.Errorf("updating id=%s: %w", id, err) @@ -151,19 +148,6 @@ func (d *DeploymentUnit) Update(ctx context.Context, db *dstate.DeploymentState, return fmt.Errorf("saving state id=%s: %w", id, err) } - waitRemoteState, err := retryOnTransient(ctx, func() (any, error) { - return d.Adapter.WaitAfterUpdate(ctx, id, newState) - }) - if err != nil { - return fmt.Errorf("waiting after updating id=%s: %w", id, err) - } - - // Update remote state with the result from wait operation - err = d.SetRemoteState(waitRemoteState) - if err != nil { - return err - } - return nil } @@ -195,19 +179,6 @@ func (d *DeploymentUnit) UpdateWithID(ctx context.Context, db *dstate.Deployment return fmt.Errorf("saving state id=%s: %w", oldID, err) } - waitRemoteState, err := retryOnTransient(ctx, func() (any, error) { - return d.Adapter.WaitAfterUpdate(ctx, newID, newState) - }) - if err != nil { - return fmt.Errorf("waiting after updating id=%s: %w", newID, err) - } - - // Update remote state with the result from wait operation - err = d.SetRemoteState(waitRemoteState) - if err != nil { - return err - } - return nil } diff --git a/bundle/direct/dresources/README.md b/bundle/direct/dresources/README.md index a20b68c4ebd..b9c6e4754e2 100644 --- a/bundle/direct/dresources/README.md +++ b/bundle/direct/dresources/README.md @@ -34,9 +34,9 @@ Do **not** derive update mask field names from `entry.Changes`. The paths in `en If a resource has fields that must not be sent in updates (deploy-only, lifecycle-only, etc.), document them explicitly with a `var` block and a comment explaining each exclusion. -## Async APIs: WaitAfterCreate / WaitAfterUpdate +## Async APIs -For resources whose create or update is asynchronous (the resource is not immediately ready after the call returns), implement `WaitAfterCreate` and/or `WaitAfterUpdate` instead of polling inline inside DoCreate/DoUpdate. These are the correct extension points in the framework, and polling inline bypasses state persistence timing. +For resources whose create or update is asynchronous, poll inline inside `DoCreate`/`DoUpdate` after the initial API call. To prevent orphaning if deployment is interrupted during a long wait, call `engine.SetID(id)` then `engine.SaveState(config)` immediately after the resource is created and before any waiting. The framework provides a `*Engine` as the second argument to both methods. ## Slice ordering: KeyedSlices diff --git a/bundle/direct/dresources/adapter.go b/bundle/direct/dresources/adapter.go index 6891632b626..a29a094cc78 100644 --- a/bundle/direct/dresources/adapter.go +++ b/bundle/direct/dresources/adapter.go @@ -53,13 +53,14 @@ type IResource interface { // DoCreate creates a new resource from the newState. Returns id of the resource and optionally remote state. // If remote state is available as part of the operation, return it; otherwise return nil. - // Example: func (r *ResourceVolume) DoCreate(ctx context.Context, newState *catalog.CreateVolumeRequestContent) (string, *catalog.VolumeInfo, error) - DoCreate(ctx context.Context, newState any) (id string, remoteState any, e error) + // Call engine.SetID then engine.SaveState to persist intermediate state before long-running waits. + // Example: func (r *ResourceVolume) DoCreate(ctx context.Context, _ *Engine, newState *catalog.CreateVolumeRequestContent) (string, *catalog.VolumeInfo, error) + DoCreate(ctx context.Context, engine *Engine, newState any) (id string, remoteState any, e error) // [Optional] DoUpdate updates the resource. ID must not change as a result of this operation. Returns optionally remote state. // If remote state is available as part of the operation, return it; otherwise return nil. - // Example: func (r *ResourceSchema) DoUpdate(ctx context.Context, id string, newState *catalog.CreateSchema, entry *PlanEntry) (*catalog.SchemaInfo, error) - DoUpdate(ctx context.Context, id string, newState any, entry *PlanEntry) (remoteState any, e error) + // Example: func (r *ResourceSchema) DoUpdate(ctx context.Context, _ *Engine, id string, newState *catalog.CreateSchema, entry *PlanEntry) (*catalog.SchemaInfo, error) + DoUpdate(ctx context.Context, engine *Engine, id string, newState any, entry *PlanEntry) (remoteState any, e error) // [Optional] DoUpdateWithID performs an update that may result in resource having a new ID. Returns new id and optionally remote state. DoUpdateWithID(ctx context.Context, id string, newState any) (newID string, remoteState any, e error) @@ -67,13 +68,6 @@ type IResource interface { // [Optional] DoResize resizes the resource. Only supported by clusters DoResize(ctx context.Context, id string, newState any) error - // [Optional] WaitAfterCreate waits for the resource to become ready after creation. Returns optionally updated remote state. - // TODO: wait status should be persisted in the state. - WaitAfterCreate(ctx context.Context, id string, newState any) (remoteState any, e error) - - // [Optional] WaitAfterUpdate waits for the resource to become ready after update. Returns optionally updated remote state. - WaitAfterUpdate(ctx context.Context, id string, newState any) (remoteState any, e error) - // [Optional] WaitAfterDelete waits for the resource to be fully removed after DoDelete returns. // Useful for backends with asynchronous deletion: a follow-up create on the same name (recreate path) // would otherwise race with the in-progress teardown. State is dropped before this is called, so a @@ -98,8 +92,6 @@ type Adapter struct { // Optional: doUpdate *calladapt.BoundCaller doUpdateWithID *calladapt.BoundCaller - waitAfterCreate *calladapt.BoundCaller - waitAfterUpdate *calladapt.BoundCaller waitAfterDelete *calladapt.BoundCaller overrideChangeDesc *calladapt.BoundCaller doResize *calladapt.BoundCaller @@ -131,8 +123,6 @@ func NewAdapter(typedNil any, resourceType string, client *databricks.WorkspaceC doUpdate: nil, doUpdateWithID: nil, doResize: nil, - waitAfterCreate: nil, - waitAfterUpdate: nil, waitAfterDelete: nil, overrideChangeDesc: nil, resourceConfig: GetResourceConfig(resourceType), @@ -206,16 +196,6 @@ func (a *Adapter) initMethods(resource any) error { return err } - a.waitAfterCreate, err = calladapt.PrepareCall(resource, reflect.TypeFor[IResource](), "WaitAfterCreate") - if err != nil { - return err - } - - a.waitAfterUpdate, err = calladapt.PrepareCall(resource, reflect.TypeFor[IResource](), "WaitAfterUpdate") - if err != nil { - return err - } - a.waitAfterDelete, err = calladapt.PrepareCall(resource, reflect.TypeFor[IResource](), "WaitAfterDelete") if err != nil { return err @@ -278,7 +258,7 @@ func (a *Adapter) validate() error { validations := []any{ "PrepareState return", a.prepareState.OutTypes[0], stateType, - "DoCreate newState", a.doCreate.InTypes[1], stateType, + "DoCreate newState", a.doCreate.InTypes[2], stateType, "DoDelete state", a.doDelete.InTypes[2], stateType, } @@ -301,7 +281,7 @@ func (a *Adapter) validate() error { // Validate DoUpdate: must return (remoteType, error) if implemented if a.doUpdate != nil { - validations = append(validations, "DoUpdate newState", a.doUpdate.InTypes[2], stateType) + validations = append(validations, "DoUpdate newState", a.doUpdate.InTypes[3], stateType) if len(a.doUpdate.OutTypes) != 2 { return fmt.Errorf("DoUpdate must return (remoteType, error), got %d return values", len(a.doUpdate.OutTypes)) } @@ -321,24 +301,6 @@ func (a *Adapter) validate() error { validations = append(validations, "DoUpdateWithID remoteState return", a.doUpdateWithID.OutTypes[1], remoteType) } - if a.waitAfterCreate != nil { - validations = append(validations, "WaitAfterCreate newState", a.waitAfterCreate.InTypes[2], stateType) - // WaitAfterCreate must return (remoteType, error) - if len(a.waitAfterCreate.OutTypes) != 2 { - return fmt.Errorf("WaitAfterCreate must return (remoteType, error), got %d return values", len(a.waitAfterCreate.OutTypes)) - } - validations = append(validations, "WaitAfterCreate remoteState return", a.waitAfterCreate.OutTypes[0], remoteType) - } - - if a.waitAfterUpdate != nil { - validations = append(validations, "WaitAfterUpdate newState", a.waitAfterUpdate.InTypes[2], stateType) - // WaitAfterUpdate must return (remoteType, error) - if len(a.waitAfterUpdate.OutTypes) != 2 { - return fmt.Errorf("WaitAfterUpdate must return (remoteType, error), got %d return values", len(a.waitAfterUpdate.OutTypes)) - } - validations = append(validations, "WaitAfterUpdate remoteState return", a.waitAfterUpdate.OutTypes[0], remoteType) - } - err = validateTypes(validations...) if err != nil { return err @@ -433,8 +395,8 @@ func normalizeNilPointer(v any) any { return v } -func (a *Adapter) DoCreate(ctx context.Context, newState any) (string, any, error) { - outs, err := a.doCreate.Call(ctx, newState) +func (a *Adapter) DoCreate(ctx context.Context, engine *Engine, newState any) (string, any, error) { + outs, err := a.doCreate.Call(ctx, engine, newState) if err != nil { return "", nil, err } @@ -451,12 +413,12 @@ func (a *Adapter) HasDoUpdate() bool { // DoUpdate updates the resource with the plan entry computed during plan. // Returns remote state if available, otherwise nil. -func (a *Adapter) DoUpdate(ctx context.Context, id string, newState any, entry *PlanEntry) (any, error) { +func (a *Adapter) DoUpdate(ctx context.Context, engine *Engine, id string, newState any, entry *PlanEntry) (any, error) { if a.doUpdate == nil { return nil, errors.New("internal error: DoUpdate not found") } - outs, err := a.doUpdate.Call(ctx, id, newState, entry) + outs, err := a.doUpdate.Call(ctx, engine, id, newState, entry) if err != nil { return nil, err } @@ -495,40 +457,6 @@ func (a *Adapter) DoResize(ctx context.Context, id string, newState any) error { return err } -// WaitAfterCreate waits for the resource to become ready after creation. -// If the resource doesn't implement this method, this is a no-op. -// Returns the updated remoteState if available, otherwise returns nil -func (a *Adapter) WaitAfterCreate(ctx context.Context, id string, newState any) (any, error) { - if a.waitAfterCreate == nil { - return nil, nil // no-op if not implemented - } - - outs, err := a.waitAfterCreate.Call(ctx, id, newState) - if err != nil { - return nil, err - } - - remoteState := normalizeNilPointer(outs[0]) - return remoteState, nil -} - -// WaitAfterUpdate waits for the resource to become ready after update. -// If the resource doesn't implement this method, this is a no-op. -// Returns the updated remoteState if available, otherwise returns nil. -func (a *Adapter) WaitAfterUpdate(ctx context.Context, id string, newState any) (any, error) { - if a.waitAfterUpdate == nil { - return nil, nil // no-op if not implemented - } - - outs, err := a.waitAfterUpdate.Call(ctx, id, newState) - if err != nil { - return nil, err - } - - remoteState := normalizeNilPointer(outs[0]) - return remoteState, nil -} - // WaitAfterDelete waits for the resource to be fully removed after DoDelete. // If the resource doesn't implement this method, this is a no-op. func (a *Adapter) WaitAfterDelete(ctx context.Context, id string) error { diff --git a/bundle/direct/dresources/alert.go b/bundle/direct/dresources/alert.go index a18641e810a..af71e378d41 100644 --- a/bundle/direct/dresources/alert.go +++ b/bundle/direct/dresources/alert.go @@ -37,7 +37,7 @@ func (r *ResourceAlert) DoRead(ctx context.Context, id string) (*sql.AlertV2, er } // DoCreate creates the alert and returns its id. -func (r *ResourceAlert) DoCreate(ctx context.Context, config *sql.AlertV2) (string, *sql.AlertV2, error) { +func (r *ResourceAlert) DoCreate(ctx context.Context, _ *Engine, config *sql.AlertV2) (string, *sql.AlertV2, error) { request := sql.CreateAlertV2Request{ Alert: *config, } @@ -49,7 +49,7 @@ func (r *ResourceAlert) DoCreate(ctx context.Context, config *sql.AlertV2) (stri } // DoUpdate updates the alert in place. -func (r *ResourceAlert) DoUpdate(ctx context.Context, id string, config *sql.AlertV2, _ *PlanEntry) (*sql.AlertV2, error) { +func (r *ResourceAlert) DoUpdate(ctx context.Context, _ *Engine, id string, config *sql.AlertV2, _ *PlanEntry) (*sql.AlertV2, error) { request := sql.UpdateAlertV2Request{ Id: id, Alert: *config, diff --git a/bundle/direct/dresources/all_test.go b/bundle/direct/dresources/all_test.go index f18a84d0efc..d1fe61d80c7 100644 --- a/bundle/direct/dresources/all_test.go +++ b/bundle/direct/dresources/all_test.go @@ -827,7 +827,8 @@ func testCRUD(t *testing.T, group string, adapter *Adapter, client *databricks.W require.Error(t, err) // TODO: if errors.Is(err, databricks.ErrResourceDoesNotExist) {... } - createdID, remoteStateFromCreate, err := adapter.DoCreate(ctx, newState) + nopEngine := NewNopEngine(adapter.StateType()) + createdID, remoteStateFromCreate, err := adapter.DoCreate(ctx, nopEngine, newState) require.NoError(t, err, "DoCreate failed state=%v", newState) require.NotEmpty(t, createdID, "ID returned from DoCreate was empty") @@ -849,14 +850,8 @@ func testCRUD(t *testing.T, group string, adapter *Adapter, client *databricks.W "unexpected differences between remappedState and remappedRemoteStateFromCreate") } - remoteStateFromWaitCreate, err := adapter.WaitAfterCreate(ctx, createdID, newState) - require.NoError(t, err) - if remoteStateFromWaitCreate != nil { - require.Equal(t, remote, remoteStateFromWaitCreate) - } - if adapter.HasDoUpdate() { - remoteStateFromUpdate, err := adapter.DoUpdate(ctx, createdID, newState, &deployplan.PlanEntry{}) + remoteStateFromUpdate, err := adapter.DoUpdate(ctx, nopEngine, createdID, newState, &deployplan.PlanEntry{}) require.NoError(t, err, "DoUpdate failed") if remoteStateFromUpdate != nil { remappedStateFromUpdate, err := adapter.RemapState(remoteStateFromUpdate) @@ -865,14 +860,6 @@ func testCRUD(t *testing.T, group string, adapter *Adapter, client *databricks.W "unexpected differences between remappedState and remappedStateFromUpdate") } - remoteStateFromWaitUpdate, err := adapter.WaitAfterUpdate(ctx, createdID, newState) - require.NoError(t, err) - if remoteStateFromWaitUpdate != nil { - remappedStateFromWaitUpdate, err := adapter.RemapState(remoteStateFromWaitUpdate) - require.NoError(t, err) - ignoreFilter.requireEqual(t, remappedState, remappedStateFromWaitUpdate, - "unexpected differences between remappedState and remappedStateFromWaitUpdate") - } } require.NoError(t, structwalk.Walk(newState, func(path *structpath.PathNode, val any, field *reflect.StructField) { diff --git a/bundle/direct/dresources/app.go b/bundle/direct/dresources/app.go index ed6752d834d..e36036d28b0 100644 --- a/bundle/direct/dresources/app.go +++ b/bundle/direct/dresources/app.go @@ -116,7 +116,7 @@ func (r *ResourceApp) DoRead(ctx context.Context, id string) (*AppRemote, error) return remote, nil } -func (r *ResourceApp) DoCreate(ctx context.Context, config *AppState) (string, *AppRemote, error) { +func (r *ResourceApp) DoCreate(ctx context.Context, engine *Engine, config *AppState) (string, *AppRemote, error) { // Start app compute only when lifecycle.started=true is explicit. // For nil (omitted) or false, use no_compute=true (do not start compute). noCompute := config.Lifecycle == nil || config.Lifecycle.Started == nil || !*config.Lifecycle.Started @@ -154,7 +154,22 @@ func (r *ResourceApp) DoCreate(ctx context.Context, config *AppState) (string, * return "", nil, err } - return app.Name, nil, nil + // Save state as soon as the app exists so it is not orphaned if the wait or + // lifecycle management is interrupted. + engine.SetID(app.Name) + if err := engine.SaveState(config); err != nil { + return "", nil, err + } + + remote, err := r.waitForApp(ctx, r.client, config.Name) + if err != nil { + return "", nil, err + } + alreadyStarted := remote.Lifecycle != nil && remote.Lifecycle.Started != nil && *remote.Lifecycle.Started + if err := r.manageLifecycle(ctx, config.Name, config, alreadyStarted); err != nil { + return "", nil, err + } + return app.Name, remote, nil } var UpdateMaskFields = []string{ @@ -172,7 +187,7 @@ var UpdateMaskFields = []string{ var updateMask = strings.Join(UpdateMaskFields, ",") -func (r *ResourceApp) DoUpdate(ctx context.Context, id string, config *AppState, entry *PlanEntry) (*AppRemote, error) { +func (r *ResourceApp) DoUpdate(ctx context.Context, _ *Engine, id string, config *AppState, entry *PlanEntry) (*AppRemote, error) { // Deploy-only fields (source_code_path, config, // git_source, lifecycle) are not part of apps.App and thus excluded from the request body. if hasAppChanges(entry) { @@ -302,18 +317,6 @@ func (r *ResourceApp) DoDelete(ctx context.Context, id string, _ *AppState) erro return err } -func (r *ResourceApp) WaitAfterCreate(ctx context.Context, id string, config *AppState) (*AppRemote, error) { - remote, err := r.waitForApp(ctx, r.client, config.Name) - if err != nil { - return nil, err - } - alreadyStarted := remote.Lifecycle != nil && remote.Lifecycle.Started != nil && *remote.Lifecycle.Started - if err := r.manageLifecycle(ctx, config.Name, config, alreadyStarted); err != nil { - return nil, err - } - return remote, nil -} - // waitForApp waits for the app to reach the target state. The target state is either ACTIVE or STOPPED. // Apps with no_compute set to true will reach the STOPPED state, otherwise they will reach the ACTIVE state. // We can't use the default waiter from SDK because it only waits on ACTIVE state but we need also STOPPED state. diff --git a/bundle/direct/dresources/app_test.go b/bundle/direct/dresources/app_test.go index c7cbfacd1af..7d7ec940567 100644 --- a/bundle/direct/dresources/app_test.go +++ b/bundle/direct/dresources/app_test.go @@ -42,10 +42,18 @@ func TestAppDoCreate_RetriesWhenAppIsDeleting(t *testing.T) { server.Handle("GET", "/api/2.0/apps/{name}", func(req testserver.Request) any { getCallCount++ + if getCallCount == 1 { + return apps.App{ + Name: req.Vars["name"], + ComputeStatus: &apps.ComputeStatus{ + State: apps.ComputeStateDeleting, + }, + } + } return apps.App{ Name: req.Vars["name"], ComputeStatus: &apps.ComputeStatus{ - State: apps.ComputeStateDeleting, + State: apps.ComputeStateActive, }, } }) @@ -60,12 +68,12 @@ func TestAppDoCreate_RetriesWhenAppIsDeleting(t *testing.T) { r := (&ResourceApp{}).New(client) ctx := t.Context() - name, _, err := r.DoCreate(ctx, &AppState{App: apps.App{Name: "test-app"}}) + name, _, err := r.DoCreate(ctx, NewNopEngine(reflect.TypeFor[*AppState]()), &AppState{App: apps.App{Name: "test-app"}}) require.NoError(t, err) assert.Equal(t, "test-app", name) assert.Equal(t, 2, createCallCount, "expected Create to be called twice (1 retry)") - assert.Equal(t, 1, getCallCount, "expected Get to be called once to check app state") + assert.Equal(t, 2, getCallCount, "expected Get to be called twice: once to check app state, once by waitForApp") } // TestAppDoCreate_RetriesWhenGetReturnsNotFound verifies that DoCreate retries @@ -97,11 +105,19 @@ func TestAppDoCreate_RetriesWhenGetReturnsNotFound(t *testing.T) { server.Handle("GET", "/api/2.0/apps/{name}", func(req testserver.Request) any { getCallCount++ - return testserver.Response{ - StatusCode: 404, - Body: map[string]string{ - "error_code": "RESOURCE_DOES_NOT_EXIST", - "message": "App not found.", + if getCallCount == 1 { + return testserver.Response{ + StatusCode: 404, + Body: map[string]string{ + "error_code": "RESOURCE_DOES_NOT_EXIST", + "message": "App not found.", + }, + } + } + return apps.App{ + Name: req.Vars["name"], + ComputeStatus: &apps.ComputeStatus{ + State: apps.ComputeStateActive, }, } }) @@ -116,12 +132,12 @@ func TestAppDoCreate_RetriesWhenGetReturnsNotFound(t *testing.T) { r := (&ResourceApp{}).New(client) ctx := t.Context() - name, _, err := r.DoCreate(ctx, &AppState{App: apps.App{Name: "test-app"}}) + name, _, err := r.DoCreate(ctx, NewNopEngine(reflect.TypeFor[*AppState]()), &AppState{App: apps.App{Name: "test-app"}}) require.NoError(t, err) assert.Equal(t, "test-app", name) assert.Equal(t, 2, createCallCount, "expected Create to be called twice") - assert.Equal(t, 1, getCallCount, "expected Get to be called once to check app state") + assert.Equal(t, 2, getCallCount, "expected Get to be called twice: once to check app state, once by waitForApp") } func TestAppDoUpdate_UpdateMaskHasAllFields(t *testing.T) { diff --git a/bundle/direct/dresources/catalog.go b/bundle/direct/dresources/catalog.go index 1ce28b54123..d23cda3538f 100644 --- a/bundle/direct/dresources/catalog.go +++ b/bundle/direct/dresources/catalog.go @@ -40,7 +40,7 @@ func (r *ResourceCatalog) DoRead(ctx context.Context, id string) (*catalog.Catal return r.client.Catalogs.GetByName(ctx, id) } -func (r *ResourceCatalog) DoCreate(ctx context.Context, config *catalog.CreateCatalog) (string, *catalog.CatalogInfo, error) { +func (r *ResourceCatalog) DoCreate(ctx context.Context, _ *Engine, config *catalog.CreateCatalog) (string, *catalog.CatalogInfo, error) { response, err := r.client.Catalogs.Create(ctx, *config) if err != nil || response == nil { return "", nil, err @@ -49,7 +49,7 @@ func (r *ResourceCatalog) DoCreate(ctx context.Context, config *catalog.CreateCa } // DoUpdate updates the catalog in place and returns remote state. -func (r *ResourceCatalog) DoUpdate(ctx context.Context, id string, config *catalog.CreateCatalog, _ *PlanEntry) (*catalog.CatalogInfo, error) { +func (r *ResourceCatalog) DoUpdate(ctx context.Context, _ *Engine, id string, config *catalog.CreateCatalog, _ *PlanEntry) (*catalog.CatalogInfo, error) { updateRequest := catalog.UpdateCatalog{ Comment: config.Comment, EnablePredictiveOptimization: "", // Not supported by DABs diff --git a/bundle/direct/dresources/cluster.go b/bundle/direct/dresources/cluster.go index b41de453017..c16cf341e70 100644 --- a/bundle/direct/dresources/cluster.go +++ b/bundle/direct/dresources/cluster.go @@ -147,12 +147,39 @@ func (r *ResourceCluster) DoRead(ctx context.Context, id string) (*ClusterRemote return remote, nil } -func (r *ResourceCluster) DoCreate(ctx context.Context, config *ClusterState) (string, *ClusterRemote, error) { +func (r *ResourceCluster) DoCreate(ctx context.Context, engine *Engine, config *ClusterState) (string, *ClusterRemote, error) { wait, err := r.client.Clusters.Create(ctx, makeCreateCluster(&config.ClusterSpec)) if err != nil { return "", nil, err } - return wait.ClusterId, nil, nil + id := wait.ClusterId + + // Save state immediately after the cluster is created so it is not orphaned + // if the subsequent wait or terminate is interrupted. + engine.SetID(id) + if err := engine.SaveState(config); err != nil { + return "", nil, err + } + + // Always wait for RUNNING first: clusters start in PENDING state and must be polled. + _, err = r.client.Clusters.WaitGetClusterRunning(ctx, id, 15*time.Minute, nil) + if err != nil { + return "", nil, err + } + + if config.Lifecycle != nil && config.Lifecycle.Started != nil && !*config.Lifecycle.Started { + // started=false: terminate the cluster after it reaches RUNNING. + deleteWaiter, err := r.client.Clusters.Delete(ctx, compute.DeleteCluster{ClusterId: id}) + if err != nil { + return "", nil, err + } + _, err = deleteWaiter.Get() + if err != nil { + return "", nil, err + } + } + + return id, nil, nil } // hasClusterChanges reports whether the plan entry contains any Update changes @@ -161,7 +188,7 @@ func hasClusterChanges(entry *PlanEntry) bool { return entry.Changes.HasChangeExcept("lifecycle", "lifecycle.started") } -func (r *ResourceCluster) DoUpdate(ctx context.Context, id string, config *ClusterState, entry *PlanEntry) (*ClusterRemote, error) { +func (r *ResourceCluster) DoUpdate(ctx context.Context, _ *Engine, id string, config *ClusterState, entry *PlanEntry) (*ClusterRemote, error) { if hasClusterChanges(entry) { // Same retry as in TF provider logic // https://github.com/databricks/terraform-provider-databricks/blob/3eecd0f90cf99d7777e79a3d03c41f9b2aafb004/clusters/resource_cluster.go#L624 @@ -191,50 +218,21 @@ func (r *ResourceCluster) DoUpdate(ctx context.Context, id string, config *Clust desiredStarted := *config.Lifecycle.Started alreadyRunning := remoteClusterIsRunning(entry) if desiredStarted && !alreadyRunning { - // lifecycle.started=true: fire Start; WaitAfterUpdate polls for RUNNING. + // lifecycle.started=true: fire Start and wait for RUNNING. _, err := r.client.Clusters.Start(ctx, compute.StartCluster{ClusterId: id}) + if err != nil { + return nil, err + } + _, err = r.client.Clusters.WaitGetClusterRunning(ctx, id, 15*time.Minute, nil) return nil, err } else if !desiredStarted && alreadyRunning { - // lifecycle.started=false: fire Delete; WaitAfterUpdate polls for TERMINATED. + // lifecycle.started=false: fire Delete and wait for TERMINATED. // Delete does not remove the cluster, it just sets the state to TERMINATED. _, err := r.client.Clusters.Delete(ctx, compute.DeleteCluster{ClusterId: id}) - return nil, err - } - - return nil, nil -} - -// WaitAfterUpdate waits for the cluster to reach the desired lifecycle state after DoUpdate. -func (r *ResourceCluster) WaitAfterUpdate(ctx context.Context, id string, config *ClusterState) (*ClusterRemote, error) { - if config.Lifecycle == nil || config.Lifecycle.Started == nil { - return nil, nil - } - - if *config.Lifecycle.Started { - _, err := r.client.Clusters.WaitGetClusterRunning(ctx, id, 15*time.Minute, nil) - return nil, err - } - - _, err := r.client.Clusters.WaitGetClusterTerminated(ctx, id, 15*time.Minute, nil) - return nil, err -} - -// WaitAfterCreate waits for the cluster to reach RUNNING state (clusters always start on creation). -// When lifecycle.started=false, it then terminates the cluster. -func (r *ResourceCluster) WaitAfterCreate(ctx context.Context, id string, config *ClusterState) (*ClusterRemote, error) { - // Always wait for RUNNING first: clusters start in PENDING state and must be polled. - _, err := r.client.Clusters.WaitGetClusterRunning(ctx, id, 15*time.Minute, nil) - if err != nil { - return nil, err - } - - if config.Lifecycle != nil && config.Lifecycle.Started != nil && !*config.Lifecycle.Started { - // started=false: terminate the cluster after it reaches RUNNING. - deleteWaiter, err := r.client.Clusters.Delete(ctx, compute.DeleteCluster{ClusterId: id}) if err != nil { return nil, err } - _, err = deleteWaiter.Get() + _, err = r.client.Clusters.WaitGetClusterTerminated(ctx, id, 15*time.Minute, nil) return nil, err } diff --git a/bundle/direct/dresources/dashboard.go b/bundle/direct/dresources/dashboard.go index dbf492a0bee..71d7b1f9218 100644 --- a/bundle/direct/dresources/dashboard.go +++ b/bundle/direct/dresources/dashboard.go @@ -264,7 +264,7 @@ func responseToState(createOrUpdateResp *dashboards.Dashboard, publishResp *dash } } -func (r *ResourceDashboard) DoCreate(ctx context.Context, config *DashboardState) (string, *DashboardState, error) { +func (r *ResourceDashboard) DoCreate(ctx context.Context, _ *Engine, config *DashboardState) (string, *DashboardState, error) { dashboard, err := prepareDashboardRequest(config) if err != nil { return "", nil, err @@ -321,7 +321,7 @@ func (r *ResourceDashboard) DoCreate(ctx context.Context, config *DashboardState return createResp.DashboardId, responseToState(createResp, publishResp, dashboard.SerializedDashboard, config.Published), nil } -func (r *ResourceDashboard) DoUpdate(ctx context.Context, id string, config *DashboardState, _ *PlanEntry) (*DashboardState, error) { +func (r *ResourceDashboard) DoUpdate(ctx context.Context, _ *Engine, id string, config *DashboardState, _ *PlanEntry) (*DashboardState, error) { dashboard, err := prepareDashboardRequest(config) if err != nil { return nil, err diff --git a/bundle/direct/dresources/database_catalog.go b/bundle/direct/dresources/database_catalog.go index 9bffa708d73..f5e18e8da27 100644 --- a/bundle/direct/dresources/database_catalog.go +++ b/bundle/direct/dresources/database_catalog.go @@ -24,7 +24,7 @@ func (r *ResourceDatabaseCatalog) DoRead(ctx context.Context, id string) (*datab return r.client.Database.GetDatabaseCatalogByName(ctx, id) } -func (r *ResourceDatabaseCatalog) DoCreate(ctx context.Context, config *database.DatabaseCatalog) (string, *database.DatabaseCatalog, error) { +func (r *ResourceDatabaseCatalog) DoCreate(ctx context.Context, _ *Engine, config *database.DatabaseCatalog) (string, *database.DatabaseCatalog, error) { result, err := r.client.Database.CreateDatabaseCatalog(ctx, database.CreateDatabaseCatalogRequest{ Catalog: *config, }) @@ -34,7 +34,7 @@ func (r *ResourceDatabaseCatalog) DoCreate(ctx context.Context, config *database return result.Name, nil, nil } -func (r *ResourceDatabaseCatalog) DoUpdate(ctx context.Context, id string, config *database.DatabaseCatalog, _ *PlanEntry) (*database.DatabaseCatalog, error) { +func (r *ResourceDatabaseCatalog) DoUpdate(ctx context.Context, _ *Engine, id string, config *database.DatabaseCatalog, _ *PlanEntry) (*database.DatabaseCatalog, error) { request := database.UpdateDatabaseCatalogRequest{ DatabaseCatalog: *config, Name: id, diff --git a/bundle/direct/dresources/database_instance.go b/bundle/direct/dresources/database_instance.go index 2169a61fc8e..35251eb526c 100644 --- a/bundle/direct/dresources/database_instance.go +++ b/bundle/direct/dresources/database_instance.go @@ -25,38 +25,42 @@ func (d *ResourceDatabaseInstance) DoRead(ctx context.Context, id string) (*data return d.client.Database.GetDatabaseInstanceByName(ctx, id) } -func (d *ResourceDatabaseInstance) DoCreate(ctx context.Context, config *database.DatabaseInstance) (string, *database.DatabaseInstance, error) { +func (d *ResourceDatabaseInstance) DoCreate(ctx context.Context, engine *Engine, config *database.DatabaseInstance) (string, *database.DatabaseInstance, error) { waiter, err := d.client.Database.CreateDatabaseInstance(ctx, database.CreateDatabaseInstanceRequest{ DatabaseInstance: *config, }) if err != nil { return "", nil, err } - return waiter.Response.Name, nil, nil -} + id := waiter.Response.Name -func (d *ResourceDatabaseInstance) DoUpdate(ctx context.Context, id string, config *database.DatabaseInstance, _ *PlanEntry) (*database.DatabaseInstance, error) { - request := database.UpdateDatabaseInstanceRequest{ - DatabaseInstance: *config, - Name: config.Name, - UpdateMask: "*", + // Save state immediately after the instance is created so it is not orphaned + // if the subsequent wait is interrupted. + engine.SetID(id) + if err := engine.SaveState(config); err != nil { + return "", nil, err } - request.DatabaseInstance.Uid = id - _, err := d.client.Database.UpdateDatabaseInstance(ctx, request) - return nil, err -} -func (d *ResourceDatabaseInstance) WaitAfterCreate(ctx context.Context, id string, config *database.DatabaseInstance) (*database.DatabaseInstance, error) { - waiter := &database.WaitGetDatabaseInstanceDatabaseAvailable[database.DatabaseInstance]{ + waiterObj := &database.WaitGetDatabaseInstanceDatabaseAvailable[database.DatabaseInstance]{ Response: config, Name: config.Name, Poll: func(timeout time.Duration, callback func(*database.DatabaseInstance)) (*database.DatabaseInstance, error) { return d.client.Database.WaitGetDatabaseInstanceDatabaseAvailable(ctx, config.Name, timeout, callback) }, } - // _ is remoteState, should we return it here? - _, err := waiter.GetWithTimeout(20 * time.Minute) + _, err = waiterObj.GetWithTimeout(20 * time.Minute) + return id, nil, err +} + +func (d *ResourceDatabaseInstance) DoUpdate(ctx context.Context, _ *Engine, id string, config *database.DatabaseInstance, _ *PlanEntry) (*database.DatabaseInstance, error) { + request := database.UpdateDatabaseInstanceRequest{ + DatabaseInstance: *config, + Name: config.Name, + UpdateMask: "*", + } + request.DatabaseInstance.Uid = id + _, err := d.client.Database.UpdateDatabaseInstance(ctx, request) return nil, err } diff --git a/bundle/direct/dresources/engine.go b/bundle/direct/dresources/engine.go new file mode 100644 index 00000000000..c4c8be8e25f --- /dev/null +++ b/bundle/direct/dresources/engine.go @@ -0,0 +1,47 @@ +package dresources + +import ( + "errors" + "fmt" + "reflect" +) + +// Engine provides state persistence to resource implementations. +// Pass it to DoCreate or DoUpdate to save intermediate state before long-running +// wait operations, so the resource is not orphaned if deployment is interrupted. +type Engine struct { + id string + stateType reflect.Type + saveFunc func(id string, x any) error +} + +// NewEngine creates an Engine with the given state type and save function. +// The framework calls this before invoking DoCreate or DoUpdate. +func NewEngine(stateType reflect.Type, saveFunc func(id string, x any) error) *Engine { + return &Engine{id: "", stateType: stateType, saveFunc: saveFunc} +} + +// NewNopEngine creates an Engine that discards all saves. Use in tests. +func NewNopEngine(stateType reflect.Type) *Engine { + return NewEngine(stateType, func(_ string, _ any) error { return nil }) +} + +// SetID sets the resource id for subsequent SaveState calls. +// Must be called before SaveState during DoCreate; for DoUpdate the Engine is +// pre-configured with the existing id. +func (e *Engine) SetID(id string) { + e.id = id +} + +// SaveState saves the resource state. x must be of the same pointer-to-struct +// type as the resource's state type. Returns an error if SetID was not called. +func (e *Engine) SaveState(x any) error { + if e.id == "" { + return errors.New("SaveState: id not set, call SetID first") + } + xt := reflect.TypeOf(x) + if xt != e.stateType { + return fmt.Errorf("SaveState: type mismatch: expected %v, got %v", e.stateType, xt) + } + return e.saveFunc(e.id, x) +} diff --git a/bundle/direct/dresources/experiment.go b/bundle/direct/dresources/experiment.go index bedabc81365..71f807d4987 100644 --- a/bundle/direct/dresources/experiment.go +++ b/bundle/direct/dresources/experiment.go @@ -50,7 +50,7 @@ func (r *ResourceExperiment) DoRead(ctx context.Context, id string) (*ml.Experim return result.Experiment, nil } -func (r *ResourceExperiment) DoCreate(ctx context.Context, config *ml.CreateExperiment) (string, *ml.Experiment, error) { +func (r *ResourceExperiment) DoCreate(ctx context.Context, _ *Engine, config *ml.CreateExperiment) (string, *ml.Experiment, error) { result, err := r.client.Experiments.CreateExperiment(ctx, *config) if err != nil { return "", nil, err @@ -58,7 +58,7 @@ func (r *ResourceExperiment) DoCreate(ctx context.Context, config *ml.CreateExpe return result.ExperimentId, nil, nil } -func (r *ResourceExperiment) DoUpdate(ctx context.Context, id string, config *ml.CreateExperiment, _ *PlanEntry) (*ml.Experiment, error) { +func (r *ResourceExperiment) DoUpdate(ctx context.Context, _ *Engine, id string, config *ml.CreateExperiment, _ *PlanEntry) (*ml.Experiment, error) { updateReq := ml.UpdateExperiment{ ExperimentId: id, NewName: config.Name, diff --git a/bundle/direct/dresources/external_location.go b/bundle/direct/dresources/external_location.go index 64eace48eb3..be29bed5830 100644 --- a/bundle/direct/dresources/external_location.go +++ b/bundle/direct/dresources/external_location.go @@ -44,7 +44,7 @@ func (r *ResourceExternalLocation) DoRead(ctx context.Context, id string) (*cata return r.client.ExternalLocations.GetByName(ctx, id) } -func (r *ResourceExternalLocation) DoCreate(ctx context.Context, config *catalog.CreateExternalLocation) (string, *catalog.ExternalLocationInfo, error) { +func (r *ResourceExternalLocation) DoCreate(ctx context.Context, _ *Engine, config *catalog.CreateExternalLocation) (string, *catalog.ExternalLocationInfo, error) { response, err := r.client.ExternalLocations.Create(ctx, *config) if err != nil || response == nil { return "", nil, err @@ -53,7 +53,7 @@ func (r *ResourceExternalLocation) DoCreate(ctx context.Context, config *catalog } // DoUpdate updates the external location in place and returns remote state. -func (r *ResourceExternalLocation) DoUpdate(ctx context.Context, id string, config *catalog.CreateExternalLocation, _ *PlanEntry) (*catalog.ExternalLocationInfo, error) { +func (r *ResourceExternalLocation) DoUpdate(ctx context.Context, _ *Engine, id string, config *catalog.CreateExternalLocation, _ *PlanEntry) (*catalog.ExternalLocationInfo, error) { updateRequest := catalog.UpdateExternalLocation{ Comment: config.Comment, CredentialName: config.CredentialName, diff --git a/bundle/direct/dresources/grants.go b/bundle/direct/dresources/grants.go index 596346f1614..f03df836698 100644 --- a/bundle/direct/dresources/grants.go +++ b/bundle/direct/dresources/grants.go @@ -107,8 +107,8 @@ func (r *ResourceGrants) DoRead(ctx context.Context, id string) (*GrantsState, e }, nil } -func (r *ResourceGrants) DoCreate(ctx context.Context, state *GrantsState) (string, *GrantsState, error) { - _, err := r.DoUpdate(ctx, "", state, nil) +func (r *ResourceGrants) DoCreate(ctx context.Context, engine *Engine, state *GrantsState) (string, *GrantsState, error) { + _, err := r.DoUpdate(ctx, engine, "", state, nil) if err != nil { // Grants Update is idempotent (additive PATCH), so retrying on transient errors is safe. return "", nil, retrySafe(err) @@ -117,7 +117,7 @@ func (r *ResourceGrants) DoCreate(ctx context.Context, state *GrantsState) (stri return state.SecurableType + "/" + state.FullName, nil, nil } -func (r *ResourceGrants) DoUpdate(ctx context.Context, _ string, state *GrantsState, entry *PlanEntry) (*GrantsState, error) { +func (r *ResourceGrants) DoUpdate(ctx context.Context, _ *Engine, _ string, state *GrantsState, entry *PlanEntry) (*GrantsState, error) { if state.FullName == "" { return nil, errors.New("internal error: grants full_name must be resolved before deployment") } diff --git a/bundle/direct/dresources/job.go b/bundle/direct/dresources/job.go index 49ca54036c4..efc013904b2 100644 --- a/bundle/direct/dresources/job.go +++ b/bundle/direct/dresources/job.go @@ -122,7 +122,7 @@ func makeJobRemote(job *jobs.Job) *JobRemote { } } -func (r *ResourceJob) DoCreate(ctx context.Context, config *jobs.JobSettings) (string, *JobRemote, error) { +func (r *ResourceJob) DoCreate(ctx context.Context, _ *Engine, config *jobs.JobSettings) (string, *JobRemote, error) { request, err := makeCreateJob(*config) if err != nil { return "", nil, err @@ -134,7 +134,7 @@ func (r *ResourceJob) DoCreate(ctx context.Context, config *jobs.JobSettings) (s return strconv.FormatInt(response.JobId, 10), nil, nil } -func (r *ResourceJob) DoUpdate(ctx context.Context, id string, config *jobs.JobSettings, _ *PlanEntry) (*JobRemote, error) { +func (r *ResourceJob) DoUpdate(ctx context.Context, _ *Engine, id string, config *jobs.JobSettings, _ *PlanEntry) (*JobRemote, error) { request, err := makeResetJob(*config, id) if err != nil { return nil, err diff --git a/bundle/direct/dresources/model.go b/bundle/direct/dresources/model.go index 9d04231456d..8b723f30603 100644 --- a/bundle/direct/dresources/model.go +++ b/bundle/direct/dresources/model.go @@ -52,7 +52,7 @@ func (r *ResourceMlflowModel) DoRead(ctx context.Context, id string) (*MlflowMod }, nil } -func (r *ResourceMlflowModel) DoCreate(ctx context.Context, config *ml.CreateModelRequest) (string, *MlflowModelRemote, error) { +func (r *ResourceMlflowModel) DoCreate(ctx context.Context, _ *Engine, config *ml.CreateModelRequest) (string, *MlflowModelRemote, error) { response, err := r.client.ModelRegistry.CreateModel(ctx, *config) if err != nil { return "", nil, err @@ -62,7 +62,7 @@ func (r *ResourceMlflowModel) DoCreate(ctx context.Context, config *ml.CreateMod return response.RegisteredModel.Name, nil, nil } -func (r *ResourceMlflowModel) DoUpdate(ctx context.Context, id string, config *ml.CreateModelRequest, entry *PlanEntry) (*MlflowModelRemote, error) { +func (r *ResourceMlflowModel) DoUpdate(ctx context.Context, _ *Engine, id string, config *ml.CreateModelRequest, entry *PlanEntry) (*MlflowModelRemote, error) { updateRequest := ml.UpdateModelRequest{ Name: id, Description: config.Description, diff --git a/bundle/direct/dresources/model_serving_endpoint.go b/bundle/direct/dresources/model_serving_endpoint.go index ccab3a13dea..3ba258d1090 100644 --- a/bundle/direct/dresources/model_serving_endpoint.go +++ b/bundle/direct/dresources/model_serving_endpoint.go @@ -128,13 +128,22 @@ func (r *ResourceModelServingEndpoint) DoRead(ctx context.Context, id string) (* }, nil } -func (r *ResourceModelServingEndpoint) DoCreate(ctx context.Context, config *serving.CreateServingEndpoint) (string, *ModelServingEndpointRemote, error) { +func (r *ResourceModelServingEndpoint) DoCreate(ctx context.Context, engine *Engine, config *serving.CreateServingEndpoint) (string, *ModelServingEndpointRemote, error) { waiter, err := r.client.ServingEndpoints.Create(ctx, *config) if err != nil { return "", nil, err } + id := waiter.Response.Name - return waiter.Response.Name, nil, nil + // Save state immediately after the endpoint is created so it is not orphaned + // if the subsequent wait is interrupted. + engine.SetID(id) + if err := engine.SaveState(config); err != nil { + return "", nil, err + } + + remote, err := r.waitForEndpointReady(ctx, config.Name) + return id, remote, err } // waitForEndpointReady waits for the serving endpoint to be ready (not updating) @@ -150,14 +159,6 @@ func (r *ResourceModelServingEndpoint) waitForEndpointReady(ctx context.Context, }, nil } -func (r *ResourceModelServingEndpoint) WaitAfterCreate(ctx context.Context, id string, config *serving.CreateServingEndpoint) (*ModelServingEndpointRemote, error) { - return r.waitForEndpointReady(ctx, config.Name) -} - -func (r *ResourceModelServingEndpoint) WaitAfterUpdate(ctx context.Context, id string, config *serving.CreateServingEndpoint) (*ModelServingEndpointRemote, error) { - return r.waitForEndpointReady(ctx, config.Name) -} - func (r *ResourceModelServingEndpoint) updateAiGateway(ctx context.Context, id string, aiGateway *serving.AiGatewayConfig) error { if aiGateway == nil { req := serving.PutAiGatewayRequest{ @@ -290,7 +291,7 @@ func (r *ResourceModelServingEndpoint) updateTags(ctx context.Context, id string return nil } -func (r *ResourceModelServingEndpoint) DoUpdate(ctx context.Context, id string, config *serving.CreateServingEndpoint, entry *PlanEntry) (*ModelServingEndpointRemote, error) { +func (r *ResourceModelServingEndpoint) DoUpdate(ctx context.Context, _ *Engine, id string, config *serving.CreateServingEndpoint, entry *PlanEntry) (*ModelServingEndpointRemote, error) { var err error // Terraform makes these API calls sequentially. We do the same here. @@ -324,7 +325,7 @@ func (r *ResourceModelServingEndpoint) DoUpdate(ctx context.Context, id string, } } - return nil, nil + return r.waitForEndpointReady(ctx, config.Name) } func (r *ResourceModelServingEndpoint) DoDelete(ctx context.Context, id string, _ *serving.CreateServingEndpoint) error { diff --git a/bundle/direct/dresources/permissions.go b/bundle/direct/dresources/permissions.go index 91ca9000aaf..cca6f3a445a 100644 --- a/bundle/direct/dresources/permissions.go +++ b/bundle/direct/dresources/permissions.go @@ -215,9 +215,9 @@ func (r *ResourcePermissions) DoRead(ctx context.Context, id string) (*Permissio } // DoCreate calls https://docs.databricks.com/api/workspace/jobs/setjobpermissions. -func (r *ResourcePermissions) DoCreate(ctx context.Context, newState *PermissionsState) (string, *PermissionsState, error) { +func (r *ResourcePermissions) DoCreate(ctx context.Context, engine *Engine, newState *PermissionsState) (string, *PermissionsState, error) { // should we remember the default here? - _, err := r.DoUpdate(ctx, newState.ObjectID, newState, nil) + _, err := r.DoUpdate(ctx, engine, newState.ObjectID, newState, nil) if err != nil { // Permissions Set is idempotent (PUT), so retrying on transient errors is safe. return "", nil, retrySafe(err) @@ -227,7 +227,7 @@ func (r *ResourcePermissions) DoCreate(ctx context.Context, newState *Permission } // DoUpdate calls https://docs.databricks.com/api/workspace/jobs/setjobpermissions. -func (r *ResourcePermissions) DoUpdate(ctx context.Context, _ string, newState *PermissionsState, _ *PlanEntry) (*PermissionsState, error) { +func (r *ResourcePermissions) DoUpdate(ctx context.Context, _ *Engine, _ string, newState *PermissionsState, _ *PlanEntry) (*PermissionsState, error) { extractedType, extractedID, err := parsePermissionsID(newState.ObjectID) if err != nil { return nil, err diff --git a/bundle/direct/dresources/pipeline.go b/bundle/direct/dresources/pipeline.go index 0861f0fad2f..e2e6aa12807 100644 --- a/bundle/direct/dresources/pipeline.go +++ b/bundle/direct/dresources/pipeline.go @@ -123,7 +123,7 @@ func makePipelineRemote(p *pipelines.GetPipelineResponse) *PipelineRemote { } } -func (r *ResourcePipeline) DoCreate(ctx context.Context, config *pipelines.CreatePipeline) (string, *PipelineRemote, error) { +func (r *ResourcePipeline) DoCreate(ctx context.Context, _ *Engine, config *pipelines.CreatePipeline) (string, *PipelineRemote, error) { response, err := r.client.Pipelines.Create(ctx, *config) if err != nil { return "", nil, err @@ -131,7 +131,7 @@ func (r *ResourcePipeline) DoCreate(ctx context.Context, config *pipelines.Creat return response.PipelineId, nil, nil } -func (r *ResourcePipeline) DoUpdate(ctx context.Context, id string, config *pipelines.CreatePipeline, _ *PlanEntry) (*PipelineRemote, error) { +func (r *ResourcePipeline) DoUpdate(ctx context.Context, _ *Engine, id string, config *pipelines.CreatePipeline, _ *PlanEntry) (*PipelineRemote, error) { request := pipelines.EditPipeline{ AllowDuplicateNames: config.AllowDuplicateNames, BudgetPolicyId: config.BudgetPolicyId, diff --git a/bundle/direct/dresources/postgres_branch.go b/bundle/direct/dresources/postgres_branch.go index 11c0e7a0fe0..d53ac29dade 100644 --- a/bundle/direct/dresources/postgres_branch.go +++ b/bundle/direct/dresources/postgres_branch.go @@ -102,7 +102,7 @@ func (r *ResourcePostgresBranch) DoRead(ctx context.Context, id string) (*Postgr return makePostgresBranchRemote(branch), nil } -func (r *ResourcePostgresBranch) DoCreate(ctx context.Context, config *PostgresBranchState) (string, *PostgresBranchRemote, error) { +func (r *ResourcePostgresBranch) DoCreate(ctx context.Context, _ *Engine, config *PostgresBranchState) (string, *PostgresBranchRemote, error) { waiter, err := r.client.Postgres.CreateBranch(ctx, postgres.CreateBranchRequest{ BranchId: config.BranchId, Parent: config.Parent, @@ -135,7 +135,7 @@ func (r *ResourcePostgresBranch) DoCreate(ctx context.Context, config *PostgresB return remote.Name, remote, nil } -func (r *ResourcePostgresBranch) DoUpdate(ctx context.Context, id string, config *PostgresBranchState, entry *PlanEntry) (*PostgresBranchRemote, error) { +func (r *ResourcePostgresBranch) DoUpdate(ctx context.Context, _ *Engine, id string, config *PostgresBranchState, entry *PlanEntry) (*PostgresBranchRemote, error) { // Build update mask from fields that have action="update" in the changes map. // This excludes immutable fields and fields that haven't changed. // Prefix with "spec." because the API expects paths relative to the Branch object, diff --git a/bundle/direct/dresources/postgres_catalog.go b/bundle/direct/dresources/postgres_catalog.go index 35a1c0f0cc6..f8d3f51f6d3 100644 --- a/bundle/direct/dresources/postgres_catalog.go +++ b/bundle/direct/dresources/postgres_catalog.go @@ -91,7 +91,7 @@ func (r *ResourcePostgresCatalog) DoRead(ctx context.Context, id string) (*Postg return makePostgresCatalogRemote(catalog), nil } -func (r *ResourcePostgresCatalog) DoCreate(ctx context.Context, config *PostgresCatalogState) (string, *PostgresCatalogRemote, error) { +func (r *ResourcePostgresCatalog) DoCreate(ctx context.Context, _ *Engine, config *PostgresCatalogState) (string, *PostgresCatalogRemote, error) { waiter, err := r.client.Postgres.CreateCatalog(ctx, postgres.CreateCatalogRequest{ CatalogId: config.CatalogId, Catalog: postgres.Catalog{ diff --git a/bundle/direct/dresources/postgres_endpoint.go b/bundle/direct/dresources/postgres_endpoint.go index 04dd6874f2c..91e43df1028 100644 --- a/bundle/direct/dresources/postgres_endpoint.go +++ b/bundle/direct/dresources/postgres_endpoint.go @@ -141,7 +141,7 @@ func (r *ResourcePostgresEndpoint) waitForReconciliation(ctx context.Context, na } } -func (r *ResourcePostgresEndpoint) DoCreate(ctx context.Context, config *PostgresEndpointState) (string, *PostgresEndpointRemote, error) { +func (r *ResourcePostgresEndpoint) DoCreate(ctx context.Context, _ *Engine, config *PostgresEndpointState) (string, *PostgresEndpointRemote, error) { waiter, err := r.client.Postgres.CreateEndpoint(ctx, postgres.CreateEndpointRequest{ EndpointId: config.EndpointId, Parent: config.Parent, @@ -179,7 +179,7 @@ func (r *ResourcePostgresEndpoint) DoCreate(ctx context.Context, config *Postgre return remote.Name, remote, nil } -func (r *ResourcePostgresEndpoint) DoUpdate(ctx context.Context, id string, config *PostgresEndpointState, entry *PlanEntry) (*PostgresEndpointRemote, error) { +func (r *ResourcePostgresEndpoint) DoUpdate(ctx context.Context, _ *Engine, id string, config *PostgresEndpointState, entry *PlanEntry) (*PostgresEndpointRemote, error) { // Build update mask from fields that have action="update" in the changes map. // This excludes immutable fields and fields that haven't changed. // Prefix with "spec." because the API expects paths relative to the Endpoint object, diff --git a/bundle/direct/dresources/postgres_project.go b/bundle/direct/dresources/postgres_project.go index fc2ef631e38..35f223a534d 100644 --- a/bundle/direct/dresources/postgres_project.go +++ b/bundle/direct/dresources/postgres_project.go @@ -106,7 +106,7 @@ func (r *ResourcePostgresProject) DoRead(ctx context.Context, id string) (*Postg return makePostgresProjectRemote(project), nil } -func (r *ResourcePostgresProject) DoCreate(ctx context.Context, config *PostgresProjectState) (string, *PostgresProjectRemote, error) { +func (r *ResourcePostgresProject) DoCreate(ctx context.Context, _ *Engine, config *PostgresProjectState) (string, *PostgresProjectRemote, error) { waiter, err := r.client.Postgres.CreateProject(ctx, postgres.CreateProjectRequest{ ProjectId: config.ProjectId, Project: postgres.Project{ @@ -138,7 +138,7 @@ func (r *ResourcePostgresProject) DoCreate(ctx context.Context, config *Postgres return remote.Name, remote, nil } -func (r *ResourcePostgresProject) DoUpdate(ctx context.Context, id string, config *PostgresProjectState, entry *PlanEntry) (*PostgresProjectRemote, error) { +func (r *ResourcePostgresProject) DoUpdate(ctx context.Context, _ *Engine, id string, config *PostgresProjectState, entry *PlanEntry) (*PostgresProjectRemote, error) { // Build the mask from the plan's change list and prefix with "spec." (the // API expects paths relative to Project). The API rejects mask entries // that aren't also populated in the request body, and a wildcard "*" diff --git a/bundle/direct/dresources/postgres_synced_table.go b/bundle/direct/dresources/postgres_synced_table.go index 623605faf43..22f3c4d89c1 100644 --- a/bundle/direct/dresources/postgres_synced_table.go +++ b/bundle/direct/dresources/postgres_synced_table.go @@ -91,7 +91,7 @@ func (r *ResourcePostgresSyncedTable) DoRead(ctx context.Context, id string) (*P return makePostgresSyncedTableRemote(syncedTable), nil } -func (r *ResourcePostgresSyncedTable) DoCreate(ctx context.Context, config *PostgresSyncedTableState) (string, *PostgresSyncedTableRemote, error) { +func (r *ResourcePostgresSyncedTable) DoCreate(ctx context.Context, _ *Engine, config *PostgresSyncedTableState) (string, *PostgresSyncedTableRemote, error) { waiter, err := r.client.Postgres.CreateSyncedTable(ctx, postgres.CreateSyncedTableRequest{ SyncedTableId: config.SyncedTableId, SyncedTable: postgres.SyncedTable{ diff --git a/bundle/direct/dresources/quality_monitor.go b/bundle/direct/dresources/quality_monitor.go index c66fed4e0bb..7a0473ab543 100644 --- a/bundle/direct/dresources/quality_monitor.go +++ b/bundle/direct/dresources/quality_monitor.go @@ -72,7 +72,7 @@ func (r *ResourceQualityMonitor) DoRead(ctx context.Context, id string) (*catalo }) } -func (r *ResourceQualityMonitor) DoCreate(ctx context.Context, config *QualityMonitorState) (string, *catalog.MonitorInfo, error) { +func (r *ResourceQualityMonitor) DoCreate(ctx context.Context, _ *Engine, config *QualityMonitorState) (string, *catalog.MonitorInfo, error) { req := config.CreateMonitor req.TableName = config.TableName //nolint:staticcheck // Direct quality_monitor resource still uses legacy monitor endpoints; v1 data-quality migration is separate work. @@ -83,7 +83,7 @@ func (r *ResourceQualityMonitor) DoCreate(ctx context.Context, config *QualityMo return response.TableName, response, nil } -func (r *ResourceQualityMonitor) DoUpdate(ctx context.Context, id string, config *QualityMonitorState, _ *PlanEntry) (*catalog.MonitorInfo, error) { +func (r *ResourceQualityMonitor) DoUpdate(ctx context.Context, _ *Engine, id string, config *QualityMonitorState, _ *PlanEntry) (*catalog.MonitorInfo, error) { updateRequest := catalog.UpdateMonitor{ TableName: id, BaselineTableName: config.BaselineTableName, diff --git a/bundle/direct/dresources/registered_model.go b/bundle/direct/dresources/registered_model.go index 888870a7581..d1438a25bf5 100644 --- a/bundle/direct/dresources/registered_model.go +++ b/bundle/direct/dresources/registered_model.go @@ -55,7 +55,7 @@ func (r *ResourceRegisteredModel) DoRead(ctx context.Context, id string) (*catal }) } -func (r *ResourceRegisteredModel) DoCreate(ctx context.Context, config *catalog.CreateRegisteredModelRequest) (string, *catalog.RegisteredModelInfo, error) { +func (r *ResourceRegisteredModel) DoCreate(ctx context.Context, _ *Engine, config *catalog.CreateRegisteredModelRequest) (string, *catalog.RegisteredModelInfo, error) { response, err := r.client.RegisteredModels.Create(ctx, *config) if err != nil { return "", nil, err @@ -64,7 +64,7 @@ func (r *ResourceRegisteredModel) DoCreate(ctx context.Context, config *catalog. return response.FullName, response, nil } -func (r *ResourceRegisteredModel) DoUpdate(ctx context.Context, id string, config *catalog.CreateRegisteredModelRequest, _ *PlanEntry) (*catalog.RegisteredModelInfo, error) { +func (r *ResourceRegisteredModel) DoUpdate(ctx context.Context, _ *Engine, id string, config *catalog.CreateRegisteredModelRequest, _ *PlanEntry) (*catalog.RegisteredModelInfo, error) { updateRequest := catalog.UpdateRegisteredModelRequest{ FullName: id, Comment: config.Comment, diff --git a/bundle/direct/dresources/schema.go b/bundle/direct/dresources/schema.go index f082ea6c547..5895961fab7 100644 --- a/bundle/direct/dresources/schema.go +++ b/bundle/direct/dresources/schema.go @@ -37,7 +37,7 @@ func (r *ResourceSchema) DoRead(ctx context.Context, id string) (*catalog.Schema return r.client.Schemas.GetByFullName(ctx, id) } -func (r *ResourceSchema) DoCreate(ctx context.Context, config *catalog.CreateSchema) (string, *catalog.SchemaInfo, error) { +func (r *ResourceSchema) DoCreate(ctx context.Context, _ *Engine, config *catalog.CreateSchema) (string, *catalog.SchemaInfo, error) { response, err := r.client.Schemas.Create(ctx, *config) if err != nil || response == nil { return "", nil, err @@ -46,7 +46,7 @@ func (r *ResourceSchema) DoCreate(ctx context.Context, config *catalog.CreateSch } // DoUpdate updates the schema in place and returns remote state. -func (r *ResourceSchema) DoUpdate(ctx context.Context, id string, config *catalog.CreateSchema, _ *PlanEntry) (*catalog.SchemaInfo, error) { +func (r *ResourceSchema) DoUpdate(ctx context.Context, _ *Engine, id string, config *catalog.CreateSchema, _ *PlanEntry) (*catalog.SchemaInfo, error) { updateRequest := catalog.UpdateSchema{ Comment: config.Comment, EnablePredictiveOptimization: "", // Not supported by DABs diff --git a/bundle/direct/dresources/schema_test.go b/bundle/direct/dresources/schema_test.go index d013610e052..e3a66bf4cc6 100644 --- a/bundle/direct/dresources/schema_test.go +++ b/bundle/direct/dresources/schema_test.go @@ -2,6 +2,7 @@ package dresources import ( "encoding/json" + "reflect" "testing" "github.com/databricks/databricks-sdk-go/service/catalog" @@ -24,7 +25,8 @@ func TestResourceSchema_DoUpdate_WithUnsupportedForceSendFields(t *testing.T) { ForceSendFields: nil, } - id, _, err := adapter.DoCreate(ctx, config) + nopEngine := NewNopEngine(reflect.TypeOf(config)) + id, _, err := adapter.DoCreate(ctx, nopEngine, config) require.NoError(t, err) config.Comment = "updated comment" @@ -37,7 +39,7 @@ func TestResourceSchema_DoUpdate_WithUnsupportedForceSendFields(t *testing.T) { "Owner", // Unsupported - should be filtered out } - _, err = adapter.DoUpdate(ctx, id, config, &PlanEntry{}) + _, err = adapter.DoUpdate(ctx, nopEngine, id, config, &PlanEntry{}) require.NoError(t, err) result, err := adapter.DoRead(ctx, id) diff --git a/bundle/direct/dresources/secret_scope.go b/bundle/direct/dresources/secret_scope.go index c811dc84d77..a77fb1ebd3c 100644 --- a/bundle/direct/dresources/secret_scope.go +++ b/bundle/direct/dresources/secret_scope.go @@ -66,7 +66,7 @@ func (r *ResourceSecretScope) DoRead(ctx context.Context, id string) (*workspace return nil, fmt.Errorf("secret scope %q not found", id) } -func (r *ResourceSecretScope) DoCreate(ctx context.Context, state *SecretScopeConfig) (string, *workspace.SecretScope, error) { +func (r *ResourceSecretScope) DoCreate(ctx context.Context, _ *Engine, state *SecretScopeConfig) (string, *workspace.SecretScope, error) { err := r.client.Secrets.CreateScope(ctx, state.CreateScope) if err != nil { return "", nil, err diff --git a/bundle/direct/dresources/secret_scope_acls.go b/bundle/direct/dresources/secret_scope_acls.go index ef04cb7cb6a..14cd05b8f91 100644 --- a/bundle/direct/dresources/secret_scope_acls.go +++ b/bundle/direct/dresources/secret_scope_acls.go @@ -92,7 +92,7 @@ func (r *ResourceSecretScopeAcls) RemapState(remote *SecretScopeAclsState) *Secr return remote } -func (r *ResourceSecretScopeAcls) DoCreate(ctx context.Context, state *SecretScopeAclsState) (string, *SecretScopeAclsState, error) { +func (r *ResourceSecretScopeAcls) DoCreate(ctx context.Context, _ *Engine, state *SecretScopeAclsState) (string, *SecretScopeAclsState, error) { err := r.setACLs(ctx, state.ScopeName, state.Acls) if err != nil { return "", nil, err @@ -109,7 +109,7 @@ func (r *ResourceSecretScopeAcls) DoUpdateWithID(ctx context.Context, id string, return state.ScopeName, nil, nil } -func (r *ResourceSecretScopeAcls) DoUpdate(ctx context.Context, id string, state *SecretScopeAclsState, _ *PlanEntry) (*SecretScopeAclsState, error) { +func (r *ResourceSecretScopeAcls) DoUpdate(ctx context.Context, _ *Engine, id string, state *SecretScopeAclsState, _ *PlanEntry) (*SecretScopeAclsState, error) { _, _, err := r.DoUpdateWithID(ctx, id, state) return nil, err } diff --git a/bundle/direct/dresources/sql_warehouse.go b/bundle/direct/dresources/sql_warehouse.go index 146dee5294d..5dbd9d1d621 100644 --- a/bundle/direct/dresources/sql_warehouse.go +++ b/bundle/direct/dresources/sql_warehouse.go @@ -118,7 +118,7 @@ func (r *ResourceSqlWarehouse) DoRead(ctx context.Context, id string) (*SqlWareh } // DoCreate creates the warehouse and returns its id. -func (r *ResourceSqlWarehouse) DoCreate(ctx context.Context, config *SqlWarehouseState) (string, *SqlWarehouseRemote, error) { +func (r *ResourceSqlWarehouse) DoCreate(ctx context.Context, _ *Engine, config *SqlWarehouseState) (string, *SqlWarehouseRemote, error) { waiter, err := r.client.Warehouses.Create(ctx, config.CreateWarehouseRequest) if err != nil { return "", nil, err @@ -133,7 +133,7 @@ func hasWarehouseChanges(entry *PlanEntry) bool { } // DoUpdate updates the warehouse in place. -func (r *ResourceSqlWarehouse) DoUpdate(ctx context.Context, id string, config *SqlWarehouseState, entry *PlanEntry) (*SqlWarehouseRemote, error) { +func (r *ResourceSqlWarehouse) DoUpdate(ctx context.Context, _ *Engine, id string, config *SqlWarehouseState, entry *PlanEntry) (*SqlWarehouseRemote, error) { if hasWarehouseChanges(entry) { request := sql.EditWarehouseRequest{ AutoStopMins: config.AutoStopMins, diff --git a/bundle/direct/dresources/synced_database_table.go b/bundle/direct/dresources/synced_database_table.go index d45c6fb3fc7..7041329ca7d 100644 --- a/bundle/direct/dresources/synced_database_table.go +++ b/bundle/direct/dresources/synced_database_table.go @@ -24,7 +24,7 @@ func (r *ResourceSyncedDatabaseTable) DoRead(ctx context.Context, name string) ( return r.client.Database.GetSyncedDatabaseTableByName(ctx, name) } -func (r *ResourceSyncedDatabaseTable) DoCreate(ctx context.Context, config *database.SyncedDatabaseTable) (string, *database.SyncedDatabaseTable, error) { +func (r *ResourceSyncedDatabaseTable) DoCreate(ctx context.Context, _ *Engine, config *database.SyncedDatabaseTable) (string, *database.SyncedDatabaseTable, error) { result, err := r.client.Database.CreateSyncedDatabaseTable(ctx, database.CreateSyncedDatabaseTableRequest{ SyncedTable: *config, }) @@ -34,7 +34,7 @@ func (r *ResourceSyncedDatabaseTable) DoCreate(ctx context.Context, config *data return result.Name, nil, nil } -func (r *ResourceSyncedDatabaseTable) DoUpdate(ctx context.Context, id string, config *database.SyncedDatabaseTable, _ *PlanEntry) (*database.SyncedDatabaseTable, error) { +func (r *ResourceSyncedDatabaseTable) DoUpdate(ctx context.Context, _ *Engine, id string, config *database.SyncedDatabaseTable, _ *PlanEntry) (*database.SyncedDatabaseTable, error) { request := database.UpdateSyncedDatabaseTableRequest{ SyncedTable: *config, Name: id, diff --git a/bundle/direct/dresources/vector_search_endpoint.go b/bundle/direct/dresources/vector_search_endpoint.go index 1178def16be..70f06891a4d 100644 --- a/bundle/direct/dresources/vector_search_endpoint.go +++ b/bundle/direct/dresources/vector_search_endpoint.go @@ -65,24 +65,28 @@ func (r *ResourceVectorSearchEndpoint) DoRead(ctx context.Context, id string) (* return newVectorSearchEndpointRemote(info), nil } -func (r *ResourceVectorSearchEndpoint) DoCreate(ctx context.Context, config *vectorsearch.CreateEndpoint) (string, *VectorSearchEndpointRemote, error) { - waiter, err := r.client.VectorSearchEndpoints.CreateEndpoint(ctx, *config) +func (r *ResourceVectorSearchEndpoint) DoCreate(ctx context.Context, engine *Engine, config *vectorsearch.CreateEndpoint) (string, *VectorSearchEndpointRemote, error) { + _, err := r.client.VectorSearchEndpoints.CreateEndpoint(ctx, *config) if err != nil { return "", nil, err } id := config.Name - return id, newVectorSearchEndpointRemote(waiter.Response), nil -} -func (r *ResourceVectorSearchEndpoint) WaitAfterCreate(ctx context.Context, id string, config *vectorsearch.CreateEndpoint) (*VectorSearchEndpointRemote, error) { + // Save state immediately after the endpoint is created so it is not orphaned + // if the subsequent wait is interrupted. + engine.SetID(id) + if err := engine.SaveState(config); err != nil { + return "", nil, err + } + info, err := r.client.VectorSearchEndpoints.WaitGetEndpointVectorSearchEndpointOnline(ctx, config.Name, 60*time.Minute, nil) if err != nil { - return nil, err + return "", nil, err } - return newVectorSearchEndpointRemote(info), nil + return id, newVectorSearchEndpointRemote(info), nil } -func (r *ResourceVectorSearchEndpoint) DoUpdate(ctx context.Context, id string, config *vectorsearch.CreateEndpoint, entry *PlanEntry) (*VectorSearchEndpointRemote, error) { +func (r *ResourceVectorSearchEndpoint) DoUpdate(ctx context.Context, _ *Engine, id string, config *vectorsearch.CreateEndpoint, entry *PlanEntry) (*VectorSearchEndpointRemote, error) { if entry.Changes.HasChange(pathBudgetPolicyId) { _, err := r.client.VectorSearchEndpoints.UpdateEndpointBudgetPolicy(ctx, vectorsearch.PatchEndpointBudgetPolicyRequest{ EndpointName: id, diff --git a/bundle/direct/dresources/vector_search_index.go b/bundle/direct/dresources/vector_search_index.go index 48ee6f0f968..0bba784a90a 100644 --- a/bundle/direct/dresources/vector_search_index.go +++ b/bundle/direct/dresources/vector_search_index.go @@ -121,8 +121,8 @@ func (r *ResourceVectorSearchIndex) DoRead(ctx context.Context, id string) (*Vec }, nil } -func (r *ResourceVectorSearchIndex) DoCreate(ctx context.Context, config *VectorSearchIndexState) (string, *VectorSearchIndexRemote, error) { - index, err := r.client.VectorSearchIndexes.CreateIndex(ctx, config.CreateVectorIndexRequest) +func (r *ResourceVectorSearchIndex) DoCreate(ctx context.Context, engine *Engine, config *VectorSearchIndexState) (string, *VectorSearchIndexRemote, error) { + _, err := r.client.VectorSearchIndexes.CreateIndex(ctx, config.CreateVectorIndexRequest) if err != nil { return "", nil, err } @@ -135,26 +135,18 @@ func (r *ResourceVectorSearchIndex) DoCreate(ctx context.Context, config *Vector return "", nil, err } config.EndpointUuid = endpointUuid - return config.Name, &VectorSearchIndexRemote{VectorIndex: *index, EndpointUuid: endpointUuid}, nil -} -// No DoUpdate: vector search indexes have no update API. All SDK fields are -// declared in resources.yml under recreate_on_changes or ignore_remote_changes. -// If a future SDK bump adds a new field that isn't classified, the framework -// rejects the resulting Update plan at bundle_plan.go (see also the reflection -// test in vector_search_index_test.go which catches it earlier at unit-test time). - -func (r *ResourceVectorSearchIndex) DoDelete(ctx context.Context, id string, _ *VectorSearchIndexState) error { - return r.client.VectorSearchIndexes.DeleteIndexByIndexName(ctx, id) -} + // Save state immediately after the index is created (endpoint UUID now set) so it + // is not orphaned if the subsequent provisioning wait is interrupted. + engine.SetID(config.Name) + if err := engine.SaveState(config); err != nil { + return "", nil, err + } -// WaitAfterCreate polls GetIndex until Status.Ready=true. CreateIndex returns -// immediately with metadata of an index whose embedding pipeline is still -// provisioning; queries against an index that isn't ready fail. Blocking here -// lets dependent resources (and the next plan) see a usable index. -func (r *ResourceVectorSearchIndex) WaitAfterCreate(ctx context.Context, id string, config *VectorSearchIndexState) (*VectorSearchIndexRemote, error) { + // CreateIndex returns immediately; poll until the embedding pipeline is ready so + // dependent resources and the next plan see a usable index. index, err := retries.Poll(ctx, createIndexTimeout, func() (*vectorsearch.VectorIndex, *retries.Err) { - idx, getErr := r.client.VectorSearchIndexes.GetIndexByIndexName(ctx, id) + idx, getErr := r.client.VectorSearchIndexes.GetIndexByIndexName(ctx, config.Name) if getErr != nil { return nil, retries.Halt(getErr) } @@ -168,9 +160,19 @@ func (r *ResourceVectorSearchIndex) WaitAfterCreate(ctx context.Context, id stri return idx, nil }) if err != nil { - return nil, err + return "", nil, err } - return &VectorSearchIndexRemote{VectorIndex: *index, EndpointUuid: config.EndpointUuid}, nil + return config.Name, &VectorSearchIndexRemote{VectorIndex: *index, EndpointUuid: endpointUuid}, nil +} + +// No DoUpdate: vector search indexes have no update API. All SDK fields are +// declared in resources.yml under recreate_on_changes or ignore_remote_changes. +// If a future SDK bump adds a new field that isn't classified, the framework +// rejects the resulting Update plan at bundle_plan.go (see also the reflection +// test in vector_search_index_test.go which catches it earlier at unit-test time). + +func (r *ResourceVectorSearchIndex) DoDelete(ctx context.Context, id string, _ *VectorSearchIndexState) error { + return r.client.VectorSearchIndexes.DeleteIndexByIndexName(ctx, id) } // WaitAfterDelete polls GetIndex until it returns 404. The DELETE call is diff --git a/bundle/direct/dresources/volume.go b/bundle/direct/dresources/volume.go index 73bf7a79b40..3291226b12d 100644 --- a/bundle/direct/dresources/volume.go +++ b/bundle/direct/dresources/volume.go @@ -42,7 +42,7 @@ func (r *ResourceVolume) DoRead(ctx context.Context, id string) (*catalog.Volume return r.client.Volumes.ReadByName(ctx, id) } -func (r *ResourceVolume) DoCreate(ctx context.Context, config *catalog.CreateVolumeRequestContent) (string, *catalog.VolumeInfo, error) { +func (r *ResourceVolume) DoCreate(ctx context.Context, _ *Engine, config *catalog.CreateVolumeRequestContent) (string, *catalog.VolumeInfo, error) { response, err := r.client.Volumes.Create(ctx, *config) if err != nil { return "", nil, err @@ -50,7 +50,7 @@ func (r *ResourceVolume) DoCreate(ctx context.Context, config *catalog.CreateVol return response.FullName, response, nil } -func (r *ResourceVolume) DoUpdate(ctx context.Context, id string, config *catalog.CreateVolumeRequestContent, _ *PlanEntry) (*catalog.VolumeInfo, error) { +func (r *ResourceVolume) DoUpdate(ctx context.Context, _ *Engine, id string, config *catalog.CreateVolumeRequestContent, _ *PlanEntry) (*catalog.VolumeInfo, error) { updateRequest := catalog.UpdateVolumeRequestContent{ Comment: config.Comment, Name: id, From 45b86470e36826d40e58ea5d22adf10e0efc1f6c Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Fri, 29 May 2026 15:03:06 +0200 Subject: [PATCH 02/13] direct/testserver: improve app test realism MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make testserver app DELETE asynchronous (sets DELETING state) to match real API behaviour. AppsGet auto-removes the app after returning it in DELETING state, so the next request sees it as gone. app_test.go: use real SDK calls (Create + DeleteByName) to put the app in DELETING state before testing DoCreate retry logic, instead of injecting state directly. RetriesWhenGetReturnsNotFound uses a one-shot POST override so GET returns 404 naturally without a custom GET handler. all_test.go: retry DoRead once after DoDelete to let async deletions (DELETING → gone) clear before asserting the resource is absent. Add Server.GetWorkspace to testserver for pre-seeding state in tests. Co-authored-by: Denis Bilenko --- bundle/direct/dresources/all_test.go | 5 ++ bundle/direct/dresources/app_test.go | 99 ++++++---------------------- libs/testserver/apps.go | 36 ++++++++++ libs/testserver/handlers.go | 4 +- libs/testserver/server.go | 6 ++ 5 files changed, 68 insertions(+), 82 deletions(-) diff --git a/bundle/direct/dresources/all_test.go b/bundle/direct/dresources/all_test.go index d1fe61d80c7..fb33d12cb9d 100644 --- a/bundle/direct/dresources/all_test.go +++ b/bundle/direct/dresources/all_test.go @@ -904,6 +904,11 @@ func testCRUD(t *testing.T, group string, adapter *Adapter, client *databricks.W deleteIsNoop := strings.HasSuffix(group, "permissions") || strings.HasSuffix(group, "grants") remoteAfterDelete, err := adapter.DoRead(ctx, createdID) + // Some resources delete asynchronously (e.g. apps transition through a + // DELETING state). Read once more to let that pending state clear. + if err == nil && remoteAfterDelete != nil && !deleteIsNoop { + remoteAfterDelete, err = adapter.DoRead(ctx, createdID) + } if deleteIsNoop { require.NoError(t, err) } else { diff --git a/bundle/direct/dresources/app_test.go b/bundle/direct/dresources/app_test.go index 7d7ec940567..19f2a35a332 100644 --- a/bundle/direct/dresources/app_test.go +++ b/bundle/direct/dresources/app_test.go @@ -17,47 +17,6 @@ import ( // an app already exists but is in DELETING state. func TestAppDoCreate_RetriesWhenAppIsDeleting(t *testing.T) { server := testserver.New(t) - - createCallCount := 0 - getCallCount := 0 - - server.Handle("POST", "/api/2.0/apps", func(req testserver.Request) any { - createCallCount++ - if createCallCount == 1 { - return testserver.Response{ - StatusCode: 409, - Body: map[string]string{ - "error_code": "RESOURCE_ALREADY_EXISTS", - "message": "An app with the same name already exists.", - }, - } - } - return apps.App{ - Name: "test-app", - ComputeStatus: &apps.ComputeStatus{ - State: apps.ComputeStateActive, - }, - } - }) - - server.Handle("GET", "/api/2.0/apps/{name}", func(req testserver.Request) any { - getCallCount++ - if getCallCount == 1 { - return apps.App{ - Name: req.Vars["name"], - ComputeStatus: &apps.ComputeStatus{ - State: apps.ComputeStateDeleting, - }, - } - } - return apps.App{ - Name: req.Vars["name"], - ComputeStatus: &apps.ComputeStatus{ - State: apps.ComputeStateActive, - }, - } - }) - testserver.AddDefaultHandlers(server) client, err := databricks.NewWorkspaceClient(&databricks.Config{ @@ -66,14 +25,21 @@ func TestAppDoCreate_RetriesWhenAppIsDeleting(t *testing.T) { }) require.NoError(t, err) - r := (&ResourceApp{}).New(client) ctx := t.Context() + + // Create then delete an app to put it in DELETING state. + // The testserver's DELETE is asynchronous: it sets DELETING rather than + // removing immediately, so the retry create will find the app in that state. + _, err = client.Apps.Create(ctx, apps.CreateAppRequest{App: apps.App{Name: "test-app"}}) + require.NoError(t, err) + _, err = client.Apps.DeleteByName(ctx, "test-app") + require.NoError(t, err) + + r := (&ResourceApp{}).New(client) name, _, err := r.DoCreate(ctx, NewNopEngine(reflect.TypeFor[*AppState]()), &AppState{App: apps.App{Name: "test-app"}}) require.NoError(t, err) assert.Equal(t, "test-app", name) - assert.Equal(t, 2, createCallCount, "expected Create to be called twice (1 retry)") - assert.Equal(t, 2, getCallCount, "expected Get to be called twice: once to check app state, once by waitForApp") } // TestAppDoCreate_RetriesWhenGetReturnsNotFound verifies that DoCreate retries @@ -81,45 +47,20 @@ func TestAppDoCreate_RetriesWhenAppIsDeleting(t *testing.T) { func TestAppDoCreate_RetriesWhenGetReturnsNotFound(t *testing.T) { server := testserver.New(t) - createCallCount := 0 - getCallCount := 0 - + // Simulate a race: the app existed when Create was called (returns 409) but + // was deleted before the existence check (GET returns 404). The first POST + // returns 409 without storing anything so the standard GET handler returns + // 404 naturally, and the retry POST creates the app normally. + rejectedOnce := false server.Handle("POST", "/api/2.0/apps", func(req testserver.Request) any { - createCallCount++ - if createCallCount == 1 { + if !rejectedOnce { + rejectedOnce = true return testserver.Response{ StatusCode: 409, - Body: map[string]string{ - "error_code": "RESOURCE_ALREADY_EXISTS", - "message": "An app with the same name already exists.", - }, + Body: map[string]string{"error_code": "RESOURCE_ALREADY_EXISTS", "message": "An app with the same name already exists."}, } } - return apps.App{ - Name: "test-app", - ComputeStatus: &apps.ComputeStatus{ - State: apps.ComputeStateActive, - }, - } - }) - - server.Handle("GET", "/api/2.0/apps/{name}", func(req testserver.Request) any { - getCallCount++ - if getCallCount == 1 { - return testserver.Response{ - StatusCode: 404, - Body: map[string]string{ - "error_code": "RESOURCE_DOES_NOT_EXIST", - "message": "App not found.", - }, - } - } - return apps.App{ - Name: req.Vars["name"], - ComputeStatus: &apps.ComputeStatus{ - State: apps.ComputeStateActive, - }, - } + return req.Workspace.AppsUpsert(req, "") }) testserver.AddDefaultHandlers(server) @@ -136,8 +77,6 @@ func TestAppDoCreate_RetriesWhenGetReturnsNotFound(t *testing.T) { require.NoError(t, err) assert.Equal(t, "test-app", name) - assert.Equal(t, 2, createCallCount, "expected Create to be called twice") - assert.Equal(t, 2, getCallCount, "expected Get to be called twice: once to check app state, once by waitForApp") } func TestAppDoUpdate_UpdateMaskHasAllFields(t *testing.T) { diff --git a/libs/testserver/apps.go b/libs/testserver/apps.go index b35632e27ad..33ba5e2b6c2 100644 --- a/libs/testserver/apps.go +++ b/libs/testserver/apps.go @@ -72,6 +72,42 @@ func (s *FakeWorkspace) AppsCreateUpdate(req Request, name string) Response { } } +func (s *FakeWorkspace) AppsGet(_ Request, name string) Response { + defer s.LockUnlock()() + + app, ok := s.Apps[name] + if !ok { + return Response{StatusCode: 404, Body: map[string]string{"message": "App not found: " + name}} + } + + // When an app is in DELETING state, remove it from the store after returning. + // This simulates deletion completing so the next create attempt succeeds. + if app.ComputeStatus != nil && app.ComputeStatus.State == apps.ComputeStateDeleting { + delete(s.Apps, name) + } + + return Response{Body: app} +} + +// AppsDelete marks an app as DELETING, simulating asynchronous deletion. +// The app is removed from the store when it is next retrieved via AppsGet. +func (s *FakeWorkspace) AppsDelete(_ Request, name string) Response { + defer s.LockUnlock()() + + app, ok := s.Apps[name] + if !ok { + return Response{StatusCode: 404} + } + + app.ComputeStatus = &apps.ComputeStatus{ + State: apps.ComputeStateDeleting, + Message: "App compute is being deleted.", + } + s.Apps[name] = app + + return Response{} +} + func (s *FakeWorkspace) AppsGetUpdate(_ Request, name string) Response { defer s.LockUnlock()() diff --git a/libs/testserver/handlers.go b/libs/testserver/handlers.go index a5dea385abe..36db891e8bc 100644 --- a/libs/testserver/handlers.go +++ b/libs/testserver/handlers.go @@ -404,7 +404,7 @@ func AddDefaultHandlers(server *Server) { }) server.Handle("GET", "/api/2.0/apps/{name}", func(req Request) any { - return MapGet(req.Workspace, req.Workspace.Apps, req.Vars["name"]) + return req.Workspace.AppsGet(req, req.Vars["name"]) }) server.Handle("POST", "/api/2.0/apps", func(req Request) any { @@ -416,7 +416,7 @@ func AddDefaultHandlers(server *Server) { }) server.Handle("DELETE", "/api/2.0/apps/{name}", func(req Request) any { - return MapDelete(req.Workspace, req.Workspace.Apps, req.Vars["name"]) + return req.Workspace.AppsDelete(req, req.Vars["name"]) }) // Schemas: diff --git a/libs/testserver/server.go b/libs/testserver/server.go index eb0268d694f..c8ae59d539b 100644 --- a/libs/testserver/server.go +++ b/libs/testserver/server.go @@ -296,6 +296,12 @@ Response.Body = '' return s } +// GetWorkspace returns (creating if necessary) the FakeWorkspace for the given token. +// Use this in tests to pre-seed state before making requests. +func (s *Server) GetWorkspace(token string) *FakeWorkspace { + return s.getWorkspaceForToken(token) +} + func (s *Server) getWorkspaceForToken(token string) *FakeWorkspace { if token == "" { return nil From b064bf70a80e0f03ef9d8405b103fa2b357a0de0 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Mon, 1 Jun 2026 10:52:36 +0200 Subject: [PATCH 03/13] testserver: remove unused GetWorkspace method Co-authored-by: Denis Bilenko --- libs/testserver/server.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/libs/testserver/server.go b/libs/testserver/server.go index c8ae59d539b..eb0268d694f 100644 --- a/libs/testserver/server.go +++ b/libs/testserver/server.go @@ -296,12 +296,6 @@ Response.Body = '' return s } -// GetWorkspace returns (creating if necessary) the FakeWorkspace for the given token. -// Use this in tests to pre-seed state before making requests. -func (s *Server) GetWorkspace(token string) *FakeWorkspace { - return s.getWorkspaceForToken(token) -} - func (s *Server) getWorkspaceForToken(token string) *FakeWorkspace { if token == "" { return nil From 9ccc42d447f919dfc0b112275a691d54da50cfcf Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Mon, 1 Jun 2026 15:05:38 +0200 Subject: [PATCH 04/13] direct: simplify Engine API: SaveState(id, state) replaces SetID+SaveState Collapse the two-step engine.SetID(id) / engine.SaveState(state) pattern into a single engine.SaveState(id, state) call. The Engine still tracks the id internally and panics if a subsequent call passes a different id (guards against bugs). Also extract the polling loop from vector_search_index.DoCreate into a private waitForIndexReady helper so that the unchanged DoDelete stays between the two functions and does not appear in the diff. Co-authored-by: Denis Bilenko --- bundle/direct/apply.go | 2 - bundle/direct/dresources/adapter.go | 2 +- bundle/direct/dresources/app.go | 4 +- bundle/direct/dresources/cluster.go | 3 +- bundle/direct/dresources/database_instance.go | 3 +- bundle/direct/dresources/engine.go | 19 +++----- .../dresources/model_serving_endpoint.go | 3 +- .../dresources/vector_search_endpoint.go | 3 +- .../direct/dresources/vector_search_index.go | 43 +++++++++++-------- 9 files changed, 40 insertions(+), 42 deletions(-) diff --git a/bundle/direct/apply.go b/bundle/direct/apply.go index b9db11fa6fa..3380e5018b6 100644 --- a/bundle/direct/apply.go +++ b/bundle/direct/apply.go @@ -129,8 +129,6 @@ func (d *DeploymentUnit) Update(ctx context.Context, db *dstate.DeploymentState, engine := dresources.NewEngine(d.Adapter.StateType(), func(_ string, x any) error { return db.SaveState(d.ResourceKey, id, x, d.DependsOn) }) - engine.SetID(id) - remoteState, err := retryOnTransient(ctx, func() (any, error) { return d.Adapter.DoUpdate(ctx, engine, id, newState, planEntry) }) diff --git a/bundle/direct/dresources/adapter.go b/bundle/direct/dresources/adapter.go index a29a094cc78..0e273545157 100644 --- a/bundle/direct/dresources/adapter.go +++ b/bundle/direct/dresources/adapter.go @@ -53,7 +53,7 @@ type IResource interface { // DoCreate creates a new resource from the newState. Returns id of the resource and optionally remote state. // If remote state is available as part of the operation, return it; otherwise return nil. - // Call engine.SetID then engine.SaveState to persist intermediate state before long-running waits. + // Call engine.SaveState(id, state) to persist intermediate state before long-running waits. // Example: func (r *ResourceVolume) DoCreate(ctx context.Context, _ *Engine, newState *catalog.CreateVolumeRequestContent) (string, *catalog.VolumeInfo, error) DoCreate(ctx context.Context, engine *Engine, newState any) (id string, remoteState any, e error) diff --git a/bundle/direct/dresources/app.go b/bundle/direct/dresources/app.go index e36036d28b0..bceace5284b 100644 --- a/bundle/direct/dresources/app.go +++ b/bundle/direct/dresources/app.go @@ -156,8 +156,8 @@ func (r *ResourceApp) DoCreate(ctx context.Context, engine *Engine, config *AppS // Save state as soon as the app exists so it is not orphaned if the wait or // lifecycle management is interrupted. - engine.SetID(app.Name) - if err := engine.SaveState(config); err != nil { + + if err := engine.SaveState(app.Name, config); err != nil { return "", nil, err } diff --git a/bundle/direct/dresources/cluster.go b/bundle/direct/dresources/cluster.go index c16cf341e70..45aaa16b1bc 100644 --- a/bundle/direct/dresources/cluster.go +++ b/bundle/direct/dresources/cluster.go @@ -156,8 +156,7 @@ func (r *ResourceCluster) DoCreate(ctx context.Context, engine *Engine, config * // Save state immediately after the cluster is created so it is not orphaned // if the subsequent wait or terminate is interrupted. - engine.SetID(id) - if err := engine.SaveState(config); err != nil { + if err := engine.SaveState(id, config); err != nil { return "", nil, err } diff --git a/bundle/direct/dresources/database_instance.go b/bundle/direct/dresources/database_instance.go index 35251eb526c..3f0dc827fab 100644 --- a/bundle/direct/dresources/database_instance.go +++ b/bundle/direct/dresources/database_instance.go @@ -36,8 +36,7 @@ func (d *ResourceDatabaseInstance) DoCreate(ctx context.Context, engine *Engine, // Save state immediately after the instance is created so it is not orphaned // if the subsequent wait is interrupted. - engine.SetID(id) - if err := engine.SaveState(config); err != nil { + if err := engine.SaveState(id, config); err != nil { return "", nil, err } diff --git a/bundle/direct/dresources/engine.go b/bundle/direct/dresources/engine.go index c4c8be8e25f..1e9bdd7b28c 100644 --- a/bundle/direct/dresources/engine.go +++ b/bundle/direct/dresources/engine.go @@ -1,7 +1,6 @@ package dresources import ( - "errors" "fmt" "reflect" ) @@ -26,18 +25,14 @@ func NewNopEngine(stateType reflect.Type) *Engine { return NewEngine(stateType, func(_ string, _ any) error { return nil }) } -// SetID sets the resource id for subsequent SaveState calls. -// Must be called before SaveState during DoCreate; for DoUpdate the Engine is -// pre-configured with the existing id. -func (e *Engine) SetID(id string) { - e.id = id -} - -// SaveState saves the resource state. x must be of the same pointer-to-struct -// type as the resource's state type. Returns an error if SetID was not called. -func (e *Engine) SaveState(x any) error { +// SaveState saves the resource state. id must be the resource's identifier; on +// the first call it is recorded, and subsequent calls panic if a different id is +// passed. x must be a pointer to the same struct type as the resource's state. +func (e *Engine) SaveState(id string, x any) error { if e.id == "" { - return errors.New("SaveState: id not set, call SetID first") + e.id = id + } else if e.id != id { + panic(fmt.Sprintf("SaveState: id mismatch: expected %q, got %q", e.id, id)) } xt := reflect.TypeOf(x) if xt != e.stateType { diff --git a/bundle/direct/dresources/model_serving_endpoint.go b/bundle/direct/dresources/model_serving_endpoint.go index 3ba258d1090..94d7fe8ee6f 100644 --- a/bundle/direct/dresources/model_serving_endpoint.go +++ b/bundle/direct/dresources/model_serving_endpoint.go @@ -137,8 +137,7 @@ func (r *ResourceModelServingEndpoint) DoCreate(ctx context.Context, engine *Eng // Save state immediately after the endpoint is created so it is not orphaned // if the subsequent wait is interrupted. - engine.SetID(id) - if err := engine.SaveState(config); err != nil { + if err := engine.SaveState(id, config); err != nil { return "", nil, err } diff --git a/bundle/direct/dresources/vector_search_endpoint.go b/bundle/direct/dresources/vector_search_endpoint.go index 70f06891a4d..001df65e9c3 100644 --- a/bundle/direct/dresources/vector_search_endpoint.go +++ b/bundle/direct/dresources/vector_search_endpoint.go @@ -74,8 +74,7 @@ func (r *ResourceVectorSearchEndpoint) DoCreate(ctx context.Context, engine *Eng // Save state immediately after the endpoint is created so it is not orphaned // if the subsequent wait is interrupted. - engine.SetID(id) - if err := engine.SaveState(config); err != nil { + if err := engine.SaveState(id, config); err != nil { return "", nil, err } diff --git a/bundle/direct/dresources/vector_search_index.go b/bundle/direct/dresources/vector_search_index.go index 0bba784a90a..68e6a86e85e 100644 --- a/bundle/direct/dresources/vector_search_index.go +++ b/bundle/direct/dresources/vector_search_index.go @@ -138,15 +138,34 @@ func (r *ResourceVectorSearchIndex) DoCreate(ctx context.Context, engine *Engine // Save state immediately after the index is created (endpoint UUID now set) so it // is not orphaned if the subsequent provisioning wait is interrupted. - engine.SetID(config.Name) - if err := engine.SaveState(config); err != nil { + if err := engine.SaveState(config.Name, config); err != nil { return "", nil, err } - // CreateIndex returns immediately; poll until the embedding pipeline is ready so - // dependent resources and the next plan see a usable index. + remote, err := r.waitForIndexReady(ctx, config.Name, endpointUuid) + if err != nil { + return "", nil, err + } + return config.Name, remote, nil +} + +// No DoUpdate: vector search indexes have no update API. All SDK fields are +// declared in resources.yml under recreate_on_changes or ignore_remote_changes. +// If a future SDK bump adds a new field that isn't classified, the framework +// rejects the resulting Update plan at bundle_plan.go (see also the reflection +// test in vector_search_index_test.go which catches it earlier at unit-test time). + +func (r *ResourceVectorSearchIndex) DoDelete(ctx context.Context, id string, _ *VectorSearchIndexState) error { + return r.client.VectorSearchIndexes.DeleteIndexByIndexName(ctx, id) +} + +// waitForIndexReady polls GetIndex until Status.Ready=true. CreateIndex returns +// immediately with metadata of an index whose embedding pipeline is still +// provisioning; queries against an index that isn't ready fail. Blocking here +// lets dependent resources (and the next plan) see a usable index. +func (r *ResourceVectorSearchIndex) waitForIndexReady(ctx context.Context, id, endpointUuid string) (*VectorSearchIndexRemote, error) { index, err := retries.Poll(ctx, createIndexTimeout, func() (*vectorsearch.VectorIndex, *retries.Err) { - idx, getErr := r.client.VectorSearchIndexes.GetIndexByIndexName(ctx, config.Name) + idx, getErr := r.client.VectorSearchIndexes.GetIndexByIndexName(ctx, id) if getErr != nil { return nil, retries.Halt(getErr) } @@ -160,19 +179,9 @@ func (r *ResourceVectorSearchIndex) DoCreate(ctx context.Context, engine *Engine return idx, nil }) if err != nil { - return "", nil, err + return nil, err } - return config.Name, &VectorSearchIndexRemote{VectorIndex: *index, EndpointUuid: endpointUuid}, nil -} - -// No DoUpdate: vector search indexes have no update API. All SDK fields are -// declared in resources.yml under recreate_on_changes or ignore_remote_changes. -// If a future SDK bump adds a new field that isn't classified, the framework -// rejects the resulting Update plan at bundle_plan.go (see also the reflection -// test in vector_search_index_test.go which catches it earlier at unit-test time). - -func (r *ResourceVectorSearchIndex) DoDelete(ctx context.Context, id string, _ *VectorSearchIndexState) error { - return r.client.VectorSearchIndexes.DeleteIndexByIndexName(ctx, id) + return &VectorSearchIndexRemote{VectorIndex: *index, EndpointUuid: endpointUuid}, nil } // WaitAfterDelete polls GetIndex until it returns 404. The DELETE call is From 36d75eaecc792011c4763ff683c6fa388f5bb944 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Mon, 1 Jun 2026 15:53:22 +0200 Subject: [PATCH 05/13] direct: Engine.SaveState takes ctx, returns void; logs I/O failures internally MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resource implementations cannot recover from a state-persistence failure — the resource already exists on the server and aborting would not undo its creation. Log the error via logdiag instead of propagating it, and drop the error return so call sites are a single statement. Co-authored-by: Denis Bilenko --- bundle/direct/dresources/adapter.go | 2 +- bundle/direct/dresources/app.go | 4 +--- bundle/direct/dresources/cluster.go | 4 +--- bundle/direct/dresources/database_instance.go | 4 +--- bundle/direct/dresources/engine.go | 13 ++++++++++--- bundle/direct/dresources/model_serving_endpoint.go | 4 +--- bundle/direct/dresources/vector_search_endpoint.go | 4 +--- bundle/direct/dresources/vector_search_index.go | 4 +--- 8 files changed, 17 insertions(+), 22 deletions(-) diff --git a/bundle/direct/dresources/adapter.go b/bundle/direct/dresources/adapter.go index 0e273545157..cfcd675f9b2 100644 --- a/bundle/direct/dresources/adapter.go +++ b/bundle/direct/dresources/adapter.go @@ -53,7 +53,7 @@ type IResource interface { // DoCreate creates a new resource from the newState. Returns id of the resource and optionally remote state. // If remote state is available as part of the operation, return it; otherwise return nil. - // Call engine.SaveState(id, state) to persist intermediate state before long-running waits. + // Call engine.SaveState(ctx, id, state) to persist intermediate state before long-running waits. // Example: func (r *ResourceVolume) DoCreate(ctx context.Context, _ *Engine, newState *catalog.CreateVolumeRequestContent) (string, *catalog.VolumeInfo, error) DoCreate(ctx context.Context, engine *Engine, newState any) (id string, remoteState any, e error) diff --git a/bundle/direct/dresources/app.go b/bundle/direct/dresources/app.go index bceace5284b..f82de0e2363 100644 --- a/bundle/direct/dresources/app.go +++ b/bundle/direct/dresources/app.go @@ -157,9 +157,7 @@ func (r *ResourceApp) DoCreate(ctx context.Context, engine *Engine, config *AppS // Save state as soon as the app exists so it is not orphaned if the wait or // lifecycle management is interrupted. - if err := engine.SaveState(app.Name, config); err != nil { - return "", nil, err - } + engine.SaveState(ctx, app.Name, config) remote, err := r.waitForApp(ctx, r.client, config.Name) if err != nil { diff --git a/bundle/direct/dresources/cluster.go b/bundle/direct/dresources/cluster.go index 45aaa16b1bc..e62a8e99775 100644 --- a/bundle/direct/dresources/cluster.go +++ b/bundle/direct/dresources/cluster.go @@ -156,9 +156,7 @@ func (r *ResourceCluster) DoCreate(ctx context.Context, engine *Engine, config * // Save state immediately after the cluster is created so it is not orphaned // if the subsequent wait or terminate is interrupted. - if err := engine.SaveState(id, config); err != nil { - return "", nil, err - } + engine.SaveState(ctx, id, config) // Always wait for RUNNING first: clusters start in PENDING state and must be polled. _, err = r.client.Clusters.WaitGetClusterRunning(ctx, id, 15*time.Minute, nil) diff --git a/bundle/direct/dresources/database_instance.go b/bundle/direct/dresources/database_instance.go index 3f0dc827fab..c66a0bd092a 100644 --- a/bundle/direct/dresources/database_instance.go +++ b/bundle/direct/dresources/database_instance.go @@ -36,9 +36,7 @@ func (d *ResourceDatabaseInstance) DoCreate(ctx context.Context, engine *Engine, // Save state immediately after the instance is created so it is not orphaned // if the subsequent wait is interrupted. - if err := engine.SaveState(id, config); err != nil { - return "", nil, err - } + engine.SaveState(ctx, id, config) waiterObj := &database.WaitGetDatabaseInstanceDatabaseAvailable[database.DatabaseInstance]{ Response: config, diff --git a/bundle/direct/dresources/engine.go b/bundle/direct/dresources/engine.go index 1e9bdd7b28c..46deca6600d 100644 --- a/bundle/direct/dresources/engine.go +++ b/bundle/direct/dresources/engine.go @@ -1,8 +1,11 @@ package dresources import ( + "context" "fmt" "reflect" + + "github.com/databricks/cli/libs/logdiag" ) // Engine provides state persistence to resource implementations. @@ -28,7 +31,9 @@ func NewNopEngine(stateType reflect.Type) *Engine { // SaveState saves the resource state. id must be the resource's identifier; on // the first call it is recorded, and subsequent calls panic if a different id is // passed. x must be a pointer to the same struct type as the resource's state. -func (e *Engine) SaveState(id string, x any) error { +// Failures to persist state are logged but do not abort the deployment — the +// resource already exists and aborting would not undo its creation. +func (e *Engine) SaveState(ctx context.Context, id string, x any) { if e.id == "" { e.id = id } else if e.id != id { @@ -36,7 +41,9 @@ func (e *Engine) SaveState(id string, x any) error { } xt := reflect.TypeOf(x) if xt != e.stateType { - return fmt.Errorf("SaveState: type mismatch: expected %v, got %v", e.stateType, xt) + panic(fmt.Sprintf("SaveState: type mismatch: expected %v, got %v", e.stateType, xt)) + } + if err := e.saveFunc(e.id, x); err != nil { + logdiag.LogError(ctx, err) } - return e.saveFunc(e.id, x) } diff --git a/bundle/direct/dresources/model_serving_endpoint.go b/bundle/direct/dresources/model_serving_endpoint.go index 94d7fe8ee6f..091effa0631 100644 --- a/bundle/direct/dresources/model_serving_endpoint.go +++ b/bundle/direct/dresources/model_serving_endpoint.go @@ -137,9 +137,7 @@ func (r *ResourceModelServingEndpoint) DoCreate(ctx context.Context, engine *Eng // Save state immediately after the endpoint is created so it is not orphaned // if the subsequent wait is interrupted. - if err := engine.SaveState(id, config); err != nil { - return "", nil, err - } + engine.SaveState(ctx, id, config) remote, err := r.waitForEndpointReady(ctx, config.Name) return id, remote, err diff --git a/bundle/direct/dresources/vector_search_endpoint.go b/bundle/direct/dresources/vector_search_endpoint.go index 001df65e9c3..719901b79c3 100644 --- a/bundle/direct/dresources/vector_search_endpoint.go +++ b/bundle/direct/dresources/vector_search_endpoint.go @@ -74,9 +74,7 @@ func (r *ResourceVectorSearchEndpoint) DoCreate(ctx context.Context, engine *Eng // Save state immediately after the endpoint is created so it is not orphaned // if the subsequent wait is interrupted. - if err := engine.SaveState(id, config); err != nil { - return "", nil, err - } + engine.SaveState(ctx, id, config) info, err := r.client.VectorSearchEndpoints.WaitGetEndpointVectorSearchEndpointOnline(ctx, config.Name, 60*time.Minute, nil) if err != nil { diff --git a/bundle/direct/dresources/vector_search_index.go b/bundle/direct/dresources/vector_search_index.go index 68e6a86e85e..ce796bc9f93 100644 --- a/bundle/direct/dresources/vector_search_index.go +++ b/bundle/direct/dresources/vector_search_index.go @@ -138,9 +138,7 @@ func (r *ResourceVectorSearchIndex) DoCreate(ctx context.Context, engine *Engine // Save state immediately after the index is created (endpoint UUID now set) so it // is not orphaned if the subsequent provisioning wait is interrupted. - if err := engine.SaveState(config.Name, config); err != nil { - return "", nil, err - } + engine.SaveState(ctx, config.Name, config) remote, err := r.waitForIndexReady(ctx, config.Name, endpointUuid) if err != nil { From 11f5eb09d9bdaaad4f6b9d63942fc7d7028bb2ae Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Mon, 1 Jun 2026 17:13:31 +0200 Subject: [PATCH 06/13] direct: save state before wait in postgres and dashboard resources All six resources with a gap between resource creation and a subsequent long-running wait now call engine.SaveState immediately after the create API returns, using waiter.Name() (available before Wait()) for the postgres resources and createResp.DashboardId for the dashboard. This ensures an interrupted deployment leaves a tracked resource rather than an orphan the next plan must rediscover from remote state. Co-authored-by: Denis Bilenko --- bundle/direct/dresources/dashboard.go | 3 ++- bundle/direct/dresources/postgres_branch.go | 3 ++- bundle/direct/dresources/postgres_catalog.go | 3 ++- bundle/direct/dresources/postgres_endpoint.go | 3 ++- bundle/direct/dresources/postgres_project.go | 3 ++- bundle/direct/dresources/postgres_synced_table.go | 3 ++- 6 files changed, 12 insertions(+), 6 deletions(-) diff --git a/bundle/direct/dresources/dashboard.go b/bundle/direct/dresources/dashboard.go index 71d7b1f9218..e0917c490b1 100644 --- a/bundle/direct/dresources/dashboard.go +++ b/bundle/direct/dresources/dashboard.go @@ -264,7 +264,7 @@ func responseToState(createOrUpdateResp *dashboards.Dashboard, publishResp *dash } } -func (r *ResourceDashboard) DoCreate(ctx context.Context, _ *Engine, config *DashboardState) (string, *DashboardState, error) { +func (r *ResourceDashboard) DoCreate(ctx context.Context, engine *Engine, config *DashboardState) (string, *DashboardState, error) { dashboard, err := prepareDashboardRequest(config) if err != nil { return "", nil, err @@ -299,6 +299,7 @@ func (r *ResourceDashboard) DoCreate(ctx context.Context, _ *Engine, config *Das // Persist the etag in state. config.Etag = createResp.Etag + engine.SaveState(ctx, createResp.DashboardId, config) var publishResp *dashboards.PublishedDashboard // Note, today config.Published is always true (we do not have this field in input config). diff --git a/bundle/direct/dresources/postgres_branch.go b/bundle/direct/dresources/postgres_branch.go index d53ac29dade..784daddb7fa 100644 --- a/bundle/direct/dresources/postgres_branch.go +++ b/bundle/direct/dresources/postgres_branch.go @@ -102,7 +102,7 @@ func (r *ResourcePostgresBranch) DoRead(ctx context.Context, id string) (*Postgr return makePostgresBranchRemote(branch), nil } -func (r *ResourcePostgresBranch) DoCreate(ctx context.Context, _ *Engine, config *PostgresBranchState) (string, *PostgresBranchRemote, error) { +func (r *ResourcePostgresBranch) DoCreate(ctx context.Context, engine *Engine, config *PostgresBranchState) (string, *PostgresBranchRemote, error) { waiter, err := r.client.Postgres.CreateBranch(ctx, postgres.CreateBranchRequest{ BranchId: config.BranchId, Parent: config.Parent, @@ -124,6 +124,7 @@ func (r *ResourcePostgresBranch) DoCreate(ctx context.Context, _ *Engine, config if err != nil { return "", nil, err } + engine.SaveState(ctx, waiter.Name(), config) // Wait for the branch to be ready (long-running operation) result, err := waiter.Wait(ctx) diff --git a/bundle/direct/dresources/postgres_catalog.go b/bundle/direct/dresources/postgres_catalog.go index f8d3f51f6d3..b4574307118 100644 --- a/bundle/direct/dresources/postgres_catalog.go +++ b/bundle/direct/dresources/postgres_catalog.go @@ -91,7 +91,7 @@ func (r *ResourcePostgresCatalog) DoRead(ctx context.Context, id string) (*Postg return makePostgresCatalogRemote(catalog), nil } -func (r *ResourcePostgresCatalog) DoCreate(ctx context.Context, _ *Engine, config *PostgresCatalogState) (string, *PostgresCatalogRemote, error) { +func (r *ResourcePostgresCatalog) DoCreate(ctx context.Context, engine *Engine, config *PostgresCatalogState) (string, *PostgresCatalogRemote, error) { waiter, err := r.client.Postgres.CreateCatalog(ctx, postgres.CreateCatalogRequest{ CatalogId: config.CatalogId, Catalog: postgres.Catalog{ @@ -109,6 +109,7 @@ func (r *ResourcePostgresCatalog) DoCreate(ctx context.Context, _ *Engine, confi if err != nil { return "", nil, err } + engine.SaveState(ctx, waiter.Name(), config) result, err := waiter.Wait(ctx) if err != nil { diff --git a/bundle/direct/dresources/postgres_endpoint.go b/bundle/direct/dresources/postgres_endpoint.go index 91e43df1028..62d10cead17 100644 --- a/bundle/direct/dresources/postgres_endpoint.go +++ b/bundle/direct/dresources/postgres_endpoint.go @@ -141,7 +141,7 @@ func (r *ResourcePostgresEndpoint) waitForReconciliation(ctx context.Context, na } } -func (r *ResourcePostgresEndpoint) DoCreate(ctx context.Context, _ *Engine, config *PostgresEndpointState) (string, *PostgresEndpointRemote, error) { +func (r *ResourcePostgresEndpoint) DoCreate(ctx context.Context, engine *Engine, config *PostgresEndpointState) (string, *PostgresEndpointRemote, error) { waiter, err := r.client.Postgres.CreateEndpoint(ctx, postgres.CreateEndpointRequest{ EndpointId: config.EndpointId, Parent: config.Parent, @@ -163,6 +163,7 @@ func (r *ResourcePostgresEndpoint) DoCreate(ctx context.Context, _ *Engine, conf if err != nil { return "", nil, err } + engine.SaveState(ctx, waiter.Name(), config) // Wait for the operation to complete result, err := waiter.Wait(ctx) diff --git a/bundle/direct/dresources/postgres_project.go b/bundle/direct/dresources/postgres_project.go index 35f223a534d..fcc9e24f9c7 100644 --- a/bundle/direct/dresources/postgres_project.go +++ b/bundle/direct/dresources/postgres_project.go @@ -106,7 +106,7 @@ func (r *ResourcePostgresProject) DoRead(ctx context.Context, id string) (*Postg return makePostgresProjectRemote(project), nil } -func (r *ResourcePostgresProject) DoCreate(ctx context.Context, _ *Engine, config *PostgresProjectState) (string, *PostgresProjectRemote, error) { +func (r *ResourcePostgresProject) DoCreate(ctx context.Context, engine *Engine, config *PostgresProjectState) (string, *PostgresProjectRemote, error) { waiter, err := r.client.Postgres.CreateProject(ctx, postgres.CreateProjectRequest{ ProjectId: config.ProjectId, Project: postgres.Project{ @@ -127,6 +127,7 @@ func (r *ResourcePostgresProject) DoCreate(ctx context.Context, _ *Engine, confi if err != nil { return "", nil, err } + engine.SaveState(ctx, waiter.Name(), config) // Wait for the project to be ready (long-running operation) result, err := waiter.Wait(ctx) diff --git a/bundle/direct/dresources/postgres_synced_table.go b/bundle/direct/dresources/postgres_synced_table.go index 22f3c4d89c1..d90f8fc89b6 100644 --- a/bundle/direct/dresources/postgres_synced_table.go +++ b/bundle/direct/dresources/postgres_synced_table.go @@ -91,7 +91,7 @@ func (r *ResourcePostgresSyncedTable) DoRead(ctx context.Context, id string) (*P return makePostgresSyncedTableRemote(syncedTable), nil } -func (r *ResourcePostgresSyncedTable) DoCreate(ctx context.Context, _ *Engine, config *PostgresSyncedTableState) (string, *PostgresSyncedTableRemote, error) { +func (r *ResourcePostgresSyncedTable) DoCreate(ctx context.Context, engine *Engine, config *PostgresSyncedTableState) (string, *PostgresSyncedTableRemote, error) { waiter, err := r.client.Postgres.CreateSyncedTable(ctx, postgres.CreateSyncedTableRequest{ SyncedTableId: config.SyncedTableId, SyncedTable: postgres.SyncedTable{ @@ -108,6 +108,7 @@ func (r *ResourcePostgresSyncedTable) DoCreate(ctx context.Context, _ *Engine, c if err != nil { return "", nil, err } + engine.SaveState(ctx, waiter.Name(), config) result, err := waiter.Wait(ctx) if err != nil { From c6dc54efdfd3bdce412e5b2532111a29496fffe8 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Tue, 2 Jun 2026 15:31:20 +0200 Subject: [PATCH 07/13] direct: save state before publishing dashboard; add acceptance tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dashboard.DoCreate now calls engine.SaveState immediately after the dashboard is created (with etag persisted), before publishDashboard. A failed publish leaves the draft tracked in state rather than orphaned; the next deploy finds it via DoRead and re-publishes via DoUpdate without recreating the dashboard. The old trash-on-publish-failure cleanup is removed — it was a fragile workaround for the lack of state persistence and is now unnecessary. Acceptance tests: - publish-failure-cleans-up-dashboard: updated to reflect the new behavior per engine (direct: draft persists with URL; terraform: existing behavior, cleaned up). Output files split to per-engine variants (out.summary.*.txt, out.dashboardrequests.*.txt). - publish-failure-retry (new, direct only): verifies end-to-end that a transient publish failure leaves the draft in state (summary shows URL, plan detects diff), and the subsequent deploy re-publishes without issuing a CREATE call. Co-authored-by: Denis Bilenko --- .../out.dashboardrequests.direct.txt | 39 +++++++++++ ...xt => out.dashboardrequests.terraform.txt} | 0 .../out.deploy.direct.txt | 1 + .../out.summary.direct.txt | 12 ++++ .../out.summary.terraform.txt | 12 ++++ .../output.txt | 12 ---- .../script | 8 +-- .../dashboard.lvdash.json | 1 + .../publish-failure-retry/databricks.yml.tmpl | 9 +++ .../publish-failure-retry/out.test.toml | 4 ++ .../publish-failure-retry/output.txt | 64 +++++++++++++++++++ .../dashboards/publish-failure-retry/script | 30 +++++++++ .../publish-failure-retry/test.toml | 12 ++++ bundle/direct/dresources/dashboard.go | 13 +--- 14 files changed, 191 insertions(+), 26 deletions(-) create mode 100644 acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.dashboardrequests.direct.txt rename acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/{out.dashboardrequests.txt => out.dashboardrequests.terraform.txt} (100%) create mode 100644 acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.summary.direct.txt create mode 100644 acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.summary.terraform.txt create mode 100644 acceptance/bundle/resources/dashboards/publish-failure-retry/dashboard.lvdash.json create mode 100644 acceptance/bundle/resources/dashboards/publish-failure-retry/databricks.yml.tmpl create mode 100644 acceptance/bundle/resources/dashboards/publish-failure-retry/out.test.toml create mode 100644 acceptance/bundle/resources/dashboards/publish-failure-retry/output.txt create mode 100644 acceptance/bundle/resources/dashboards/publish-failure-retry/script create mode 100644 acceptance/bundle/resources/dashboards/publish-failure-retry/test.toml diff --git a/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.dashboardrequests.direct.txt b/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.dashboardrequests.direct.txt new file mode 100644 index 00000000000..3e94771bdac --- /dev/null +++ b/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.dashboardrequests.direct.txt @@ -0,0 +1,39 @@ +{ + "method": "POST", + "path": "/api/2.0/workspace/mkdirs", + "body": { + "path": "/Workspace/Users/[USERNAME]/.bundle/publish-failure-cleans-up-dashboard/default/artifacts/.internal" + } +} +{ + "method": "POST", + "path": "/api/2.0/workspace/mkdirs", + "body": { + "path": "/Workspace/Users/[USERNAME]/.bundle/publish-failure-cleans-up-dashboard/default/files" + } +} +{ + "method": "POST", + "path": "/api/2.0/workspace/mkdirs", + "body": { + "path": "/Workspace/Users/[USERNAME]/.bundle/publish-failure-cleans-up-dashboard/default/resources" + } +} +{ + "method": "POST", + "path": "/api/2.0/lakeview/dashboards", + "body": { + "display_name": "my dashboard", + "parent_path": "/Workspace/Users/[USERNAME]/.bundle/publish-failure-cleans-up-dashboard/default/resources", + "serialized_dashboard": "{\"pages\":[{\"name\":\"test-page\",\"displayName\":\"Test Dashboard\"}]}\n", + "warehouse_id": "doesnotexist" + } +} +{ + "method": "POST", + "path": "/api/2.0/lakeview/dashboards/[DASHBOARD_ID]/published", + "body": { + "embed_credentials": false, + "warehouse_id": "doesnotexist" + } +} diff --git a/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.dashboardrequests.txt b/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.dashboardrequests.terraform.txt similarity index 100% rename from acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.dashboardrequests.txt rename to acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.dashboardrequests.terraform.txt diff --git a/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.deploy.direct.txt b/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.deploy.direct.txt index 705bd09cb32..84918b848bf 100644 --- a/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.deploy.direct.txt +++ b/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.deploy.direct.txt @@ -9,5 +9,6 @@ HTTP Status: 400 Bad Request API error_code: RESOURCE_DOES_NOT_EXIST API message: Warehouse doesnotexist does not exist +Updating deployment state... Exit code: 1 diff --git a/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.summary.direct.txt b/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.summary.direct.txt new file mode 100644 index 00000000000..52c168b5302 --- /dev/null +++ b/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.summary.direct.txt @@ -0,0 +1,12 @@ + +>>> [CLI] bundle summary +Name: publish-failure-cleans-up-dashboard +Target: default +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/publish-failure-cleans-up-dashboard/default +Resources: + Dashboards: + dashboard1: + Name: my dashboard + URL: [DATABRICKS_URL]/dashboardsv3/[DASHBOARD_ID]/published?o=[NUMID] diff --git a/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.summary.terraform.txt b/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.summary.terraform.txt new file mode 100644 index 00000000000..37d00329c77 --- /dev/null +++ b/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.summary.terraform.txt @@ -0,0 +1,12 @@ + +>>> [CLI] bundle summary +Name: publish-failure-cleans-up-dashboard +Target: default +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/publish-failure-cleans-up-dashboard/default +Resources: + Dashboards: + dashboard1: + Name: my dashboard + URL: (not deployed) diff --git a/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/output.txt b/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/output.txt index 37d00329c77..e69de29bb2d 100644 --- a/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/output.txt +++ b/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/output.txt @@ -1,12 +0,0 @@ - ->>> [CLI] bundle summary -Name: publish-failure-cleans-up-dashboard -Target: default -Workspace: - User: [USERNAME] - Path: /Workspace/Users/[USERNAME]/.bundle/publish-failure-cleans-up-dashboard/default -Resources: - Dashboards: - dashboard1: - Name: my dashboard - URL: (not deployed) diff --git a/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/script b/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/script index 693a4320211..e8062b99e6e 100755 --- a/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/script +++ b/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/script @@ -4,9 +4,9 @@ envsubst < databricks.yml.tmpl > databricks.yml # Deploy the dashboard. The dashboard will be created but publish will fail because the warehouse does not exist. errcode trace $CLI bundle deploy &> out.deploy.$DATABRICKS_BUNDLE_ENGINE.txt -trace $CLI bundle summary +# After publish failure the dashboard draft should be in state (direct) or cleaned up (terraform). +trace $CLI bundle summary &>> out.summary.$DATABRICKS_BUNDLE_ENGINE.txt -# API should record a DELETE call to clean up the draft dashboard that was not published. -# Request sequence is identical across terraform and direct modes. +# API request sequence differs between engines (direct: no DELETE; terraform: DELETE to clean up). unset MSYS_NO_PATHCONV -print_requests.py //lakeview/dashboards //workspace/mkdirs > out.dashboardrequests.txt +print_requests.py //lakeview/dashboards //workspace/mkdirs > out.dashboardrequests.$DATABRICKS_BUNDLE_ENGINE.txt diff --git a/acceptance/bundle/resources/dashboards/publish-failure-retry/dashboard.lvdash.json b/acceptance/bundle/resources/dashboards/publish-failure-retry/dashboard.lvdash.json new file mode 100644 index 00000000000..0bfc5797ff0 --- /dev/null +++ b/acceptance/bundle/resources/dashboards/publish-failure-retry/dashboard.lvdash.json @@ -0,0 +1 @@ +{"pages":[{"name":"test-page","displayName":"Test Dashboard"}]} diff --git a/acceptance/bundle/resources/dashboards/publish-failure-retry/databricks.yml.tmpl b/acceptance/bundle/resources/dashboards/publish-failure-retry/databricks.yml.tmpl new file mode 100644 index 00000000000..553305fbfde --- /dev/null +++ b/acceptance/bundle/resources/dashboards/publish-failure-retry/databricks.yml.tmpl @@ -0,0 +1,9 @@ +bundle: + name: publish-failure-retry + +resources: + dashboards: + dashboard1: + display_name: my dashboard + warehouse_id: someid + file_path: ./dashboard.lvdash.json diff --git a/acceptance/bundle/resources/dashboards/publish-failure-retry/out.test.toml b/acceptance/bundle/resources/dashboards/publish-failure-retry/out.test.toml new file mode 100644 index 00000000000..a29f11b9ab2 --- /dev/null +++ b/acceptance/bundle/resources/dashboards/publish-failure-retry/out.test.toml @@ -0,0 +1,4 @@ +Local = true +Cloud = false +RequiresWarehouse = true +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["direct"] diff --git a/acceptance/bundle/resources/dashboards/publish-failure-retry/output.txt b/acceptance/bundle/resources/dashboards/publish-failure-retry/output.txt new file mode 100644 index 00000000000..05fd57f6c99 --- /dev/null +++ b/acceptance/bundle/resources/dashboards/publish-failure-retry/output.txt @@ -0,0 +1,64 @@ + +>>> [CLI] bundle deploy +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/publish-failure-retry/default/files... +Deploying resources... +Error: cannot create resources.dashboards.dashboard1: Fault injected by test. (400 INJECTED) + +Endpoint: POST [DATABRICKS_URL]/api/2.0/lakeview/dashboards/[DASHBOARD_ID]/published +HTTP Status: 400 Bad Request +API error_code: INJECTED +API message: Fault injected by test. + +Updating deployment state... + +Exit code: 1 + +>>> [CLI] bundle summary +Name: publish-failure-retry +Target: default +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/publish-failure-retry/default +Resources: + Dashboards: + dashboard1: + Name: my dashboard + URL: [DATABRICKS_URL]/dashboardsv3/[DASHBOARD_ID]/published?o=[NUMID] + +>>> [CLI] bundle plan +update dashboards.dashboard1 + +Plan: 0 to add, 1 to change, 0 to delete, 0 unchanged + +>>> [CLI] bundle deploy +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/publish-failure-retry/default/files... +Deploying resources... +Updating deployment state... +Deployment complete! +{ + "method": "PATCH", + "path": "/api/2.0/lakeview/dashboards/[DASHBOARD_ID]", + "body": { + "display_name": "my dashboard", + "parent_path": "/Workspace/Users/[USERNAME]/.bundle/publish-failure-retry/default/resources", + "serialized_dashboard": "{\"pages\":[{\"name\":\"test-page\",\"displayName\":\"Test Dashboard\"}]}\n", + "warehouse_id": "someid" + } +} +{ + "method": "POST", + "path": "/api/2.0/lakeview/dashboards/[DASHBOARD_ID]/published", + "body": { + "embed_credentials": false, + "warehouse_id": "someid" + } +} + +>>> [CLI] bundle destroy --auto-approve +The following resources will be deleted: + delete resources.dashboards.dashboard1 + +All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/publish-failure-retry/default + +Deleting files... +Destroy complete! diff --git a/acceptance/bundle/resources/dashboards/publish-failure-retry/script b/acceptance/bundle/resources/dashboards/publish-failure-retry/script new file mode 100644 index 00000000000..3b430628865 --- /dev/null +++ b/acceptance/bundle/resources/dashboards/publish-failure-retry/script @@ -0,0 +1,30 @@ +envsubst < databricks.yml.tmpl > databricks.yml + +cleanup() { + trace $CLI bundle destroy --auto-approve + rm -f out.requests.txt +} +trap cleanup EXIT + +# Inject a single publish failure so the first deploy creates the dashboard +# draft but fails to publish it. +fault.py "POST /api/2.0/lakeview/dashboards/*" 400 0 1 + +# First deploy: dashboard is created and saved to state, but publish fails. +errcode trace $CLI bundle deploy + +# Dashboard should be in state (tracked) despite the publish failure. +trace $CLI bundle summary + +# Plan should show that publishing is still needed. +trace $CLI bundle plan + +# Discard first-deploy requests so the output only contains second-deploy +# calls, making it easy to confirm no CREATE was issued. +rm out.requests.txt + +# Second deploy: fault is gone; must publish the existing draft, not create a new one. +trace $CLI bundle deploy + +# Confirm: second deploy issued an UPDATE and a PUBLISH call but no CREATE. +print_requests.py //lakeview/dashboards diff --git a/acceptance/bundle/resources/dashboards/publish-failure-retry/test.toml b/acceptance/bundle/resources/dashboards/publish-failure-retry/test.toml new file mode 100644 index 00000000000..ac1be44882d --- /dev/null +++ b/acceptance/bundle/resources/dashboards/publish-failure-retry/test.toml @@ -0,0 +1,12 @@ +Cloud = false +Local = true +RecordRequests = true + +# Only run with the direct engine: the test verifies direct engine's SaveState +# behavior (draft persists on publish failure and is re-published on retry). +[EnvMatrix] +DATABRICKS_BUNDLE_ENGINE = ["direct"] + +[[Repls]] +Old = "[0-9a-f]{32}" +New = "[DASHBOARD_ID]" diff --git a/bundle/direct/dresources/dashboard.go b/bundle/direct/dresources/dashboard.go index e0917c490b1..995b5e2f348 100644 --- a/bundle/direct/dresources/dashboard.go +++ b/bundle/direct/dresources/dashboard.go @@ -10,7 +10,6 @@ import ( "github.com/databricks/cli/bundle/config/resources" "github.com/databricks/cli/bundle/deployplan" - "github.com/databricks/cli/libs/log" "github.com/databricks/cli/libs/structs/structpath" "github.com/databricks/cli/libs/utils" "github.com/databricks/databricks-sdk-go" @@ -299,6 +298,9 @@ func (r *ResourceDashboard) DoCreate(ctx context.Context, engine *Engine, config // Persist the etag in state. config.Etag = createResp.Etag + // Save state before publishing so an interrupted publish leaves a tracked + // draft rather than an orphan. The next deploy finds the draft in state and + // re-publishes via DoUpdate without recreating the dashboard. engine.SaveState(ctx, createResp.DashboardId, config) var publishResp *dashboards.PublishedDashboard @@ -306,16 +308,7 @@ func (r *ResourceDashboard) DoCreate(ctx context.Context, engine *Engine, config if config.Published { publishResp, err = r.publishDashboard(ctx, createResp.DashboardId, config) if err != nil { - // If the publish fails, we should delete the dashboard to avoid leaving it in a bad state. - deleteErr := r.client.Lakeview.Trash(ctx, dashboards.TrashDashboardRequest{ - DashboardId: createResp.DashboardId, - }) - if deleteErr != nil { - log.Warnf(ctx, "failed to delete draft dashboard %s after publish failed: %v", createResp.DashboardId, deleteErr) - return "", nil, deleteErr - } return "", nil, err - // QQQ: instead, we could store partial state with published=false } } From 16a59a4ad34549dd22babe334e3ef74bc13c65b8 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Tue, 2 Jun 2026 22:04:04 +0200 Subject: [PATCH 08/13] acceptance/dashboards: normalize ?o=/?w= workspace param in URL replacements The local testserver uses ?o= and cloud uses ?w= for the workspace/org ID in dashboard published URLs. Add a parent-level [[Repls]] rule that maps both to ?[WSPARAM]= so the output files are environment-independent. Co-authored-by: Denis Bilenko --- .../out.summary.direct.txt | 2 +- .../resources/dashboards/publish-failure-retry/output.txt | 2 +- acceptance/bundle/resources/dashboards/test.toml | 5 +++++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.summary.direct.txt b/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.summary.direct.txt index 52c168b5302..39ec72db595 100644 --- a/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.summary.direct.txt +++ b/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.summary.direct.txt @@ -9,4 +9,4 @@ Resources: Dashboards: dashboard1: Name: my dashboard - URL: [DATABRICKS_URL]/dashboardsv3/[DASHBOARD_ID]/published?o=[NUMID] + URL: [DATABRICKS_URL]/dashboardsv3/[DASHBOARD_ID]/published?[WSPARAM]=[NUMID] diff --git a/acceptance/bundle/resources/dashboards/publish-failure-retry/output.txt b/acceptance/bundle/resources/dashboards/publish-failure-retry/output.txt index 05fd57f6c99..f5ab0131d7d 100644 --- a/acceptance/bundle/resources/dashboards/publish-failure-retry/output.txt +++ b/acceptance/bundle/resources/dashboards/publish-failure-retry/output.txt @@ -23,7 +23,7 @@ Resources: Dashboards: dashboard1: Name: my dashboard - URL: [DATABRICKS_URL]/dashboardsv3/[DASHBOARD_ID]/published?o=[NUMID] + URL: [DATABRICKS_URL]/dashboardsv3/[DASHBOARD_ID]/published?[WSPARAM]=[NUMID] >>> [CLI] bundle plan update dashboards.dashboard1 diff --git a/acceptance/bundle/resources/dashboards/test.toml b/acceptance/bundle/resources/dashboards/test.toml index d932390098f..ff86f677620 100644 --- a/acceptance/bundle/resources/dashboards/test.toml +++ b/acceptance/bundle/resources/dashboards/test.toml @@ -19,3 +19,8 @@ New = "[ETAG]" [[Repls]] Old = "\"[0-9]{8,}\"" New = "[ETAG]" + +# Dashboard published URLs use ?o= (local testserver) or ?w= (cloud) for the workspace/org ID. +[[Repls]] +Old = "\\?(o|w)=" +New = "?[WSPARAM]=" From fd62e917e6723dd2b6fe48125225b3051e43a20a Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Tue, 2 Jun 2026 22:20:39 +0200 Subject: [PATCH 09/13] acceptance/dashboards: fix ?o=/?w= workspace param normalization Use per-test [[Repls]] rules (matching raw digits) in the two publish-failure tests so the URL parameter is normalized to ?[WSPARAM]=[NUMID] regardless of whether the testserver or cloud environment is used. Revert the over-broad parent-level rule that broke detect-change's existing ?[ow]=... cleanup. Co-authored-by: Denis Bilenko --- .../dashboards/publish-failure-cleans-up-dashboard/test.toml | 5 +++++ .../resources/dashboards/publish-failure-retry/test.toml | 5 +++++ acceptance/bundle/resources/dashboards/test.toml | 5 ----- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/test.toml b/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/test.toml index a8ede7b099c..ed07dda2980 100644 --- a/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/test.toml +++ b/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/test.toml @@ -10,3 +10,8 @@ Response.Body = '{"error_code": "RESOURCE_DOES_NOT_EXIST", "message": "Warehouse [[Repls]] Old = "[0-9a-f]{32}" New = "[DASHBOARD_ID]" + +# Dashboard published URLs use ?o= (local testserver) or ?w= (cloud) for the workspace/org ID. +[[Repls]] +Old = '\?[ow]=\d+' +New = "?[WSPARAM]=[NUMID]" diff --git a/acceptance/bundle/resources/dashboards/publish-failure-retry/test.toml b/acceptance/bundle/resources/dashboards/publish-failure-retry/test.toml index ac1be44882d..3d4dd86612e 100644 --- a/acceptance/bundle/resources/dashboards/publish-failure-retry/test.toml +++ b/acceptance/bundle/resources/dashboards/publish-failure-retry/test.toml @@ -10,3 +10,8 @@ DATABRICKS_BUNDLE_ENGINE = ["direct"] [[Repls]] Old = "[0-9a-f]{32}" New = "[DASHBOARD_ID]" + +# Dashboard published URLs use ?o= (local testserver) or ?w= (cloud) for the workspace/org ID. +[[Repls]] +Old = '\?[ow]=\d+' +New = "?[WSPARAM]=[NUMID]" diff --git a/acceptance/bundle/resources/dashboards/test.toml b/acceptance/bundle/resources/dashboards/test.toml index ff86f677620..d932390098f 100644 --- a/acceptance/bundle/resources/dashboards/test.toml +++ b/acceptance/bundle/resources/dashboards/test.toml @@ -19,8 +19,3 @@ New = "[ETAG]" [[Repls]] Old = "\"[0-9]{8,}\"" New = "[ETAG]" - -# Dashboard published URLs use ?o= (local testserver) or ?w= (cloud) for the workspace/org ID. -[[Repls]] -Old = "\\?(o|w)=" -New = "?[WSPARAM]=" From 76a11d93b591500b50ec3b00681192344ed0a3cf Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Tue, 2 Jun 2026 22:41:17 +0200 Subject: [PATCH 10/13] acceptance/dashboards: fix &>> redirect for bash 3.2 compatibility macOS ships bash 3.2 which does not support &>> (bash 4+). Replace with >> file 2>&1 which works on all bash versions. Co-authored-by: Denis Bilenko --- .../dashboards/publish-failure-cleans-up-dashboard/script | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/script b/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/script index e8062b99e6e..e28caa3d0de 100755 --- a/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/script +++ b/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/script @@ -5,7 +5,7 @@ envsubst < databricks.yml.tmpl > databricks.yml errcode trace $CLI bundle deploy &> out.deploy.$DATABRICKS_BUNDLE_ENGINE.txt # After publish failure the dashboard draft should be in state (direct) or cleaned up (terraform). -trace $CLI bundle summary &>> out.summary.$DATABRICKS_BUNDLE_ENGINE.txt +trace $CLI bundle summary >> out.summary.$DATABRICKS_BUNDLE_ENGINE.txt 2>&1 # API request sequence differs between engines (direct: no DELETE; terraform: DELETE to clean up). unset MSYS_NO_PATHCONV From 30058faae3e06ad638e3baf5224eaf3c5b779fe5 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Wed, 3 Jun 2026 10:51:38 +0200 Subject: [PATCH 11/13] acceptance/dashboards: unset MSYS_NO_PATHCONV before fault.py on Windows MSYS_NO_PATHCONV=1 (set in the parent test.toml) prevents MSYS2 from converting POSIX paths to Windows paths, causing Python to receive a broken path (/c/a/... instead of C:\a\...) when invoking fault.py. Unset it before the fault.py call, matching the pattern used by other dashboard scripts before their bin helper invocations. Co-authored-by: Denis Bilenko --- .../bundle/resources/dashboards/publish-failure-retry/script | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/acceptance/bundle/resources/dashboards/publish-failure-retry/script b/acceptance/bundle/resources/dashboards/publish-failure-retry/script index 3b430628865..2096b92ee67 100644 --- a/acceptance/bundle/resources/dashboards/publish-failure-retry/script +++ b/acceptance/bundle/resources/dashboards/publish-failure-retry/script @@ -6,6 +6,10 @@ cleanup() { } trap cleanup EXIT +# unset MSYS_NO_PATHCONV so MSYS2 converts the script path to a Windows path +# when invoking the Python interpreter (required for fault.py to be found on Windows). +unset MSYS_NO_PATHCONV + # Inject a single publish failure so the first deploy creates the dashboard # draft but fails to publish it. fault.py "POST /api/2.0/lakeview/dashboards/*" 400 0 1 From cdd95c87b7ffccc494e4c37758032c22a42940b2 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Wed, 3 Jun 2026 13:33:39 +0200 Subject: [PATCH 12/13] direct: Engine dedup+logging; dashboard saves draft state as Published=false MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Engine.SaveState: - Accepts resourceKey for logging: "SaveState: resources.X id=Y N bytes: {...}" - Skips WAL write if state is unchanged (structdiff.IsEqual), logging a skip message. - Records the last saved value in e.lastSaved for subsequent comparisons. Dashboard DoCreate: - Saves intermediate state with Published=false (the actual draft state) instead of the user's Published=true. This ensures the planner sees a real diff (false→true) on the next deploy if publish is interrupted, rather than treating the resource as up-to-date and silently skipping the publish. publish-failure-retry acceptance test: - Adds `bundle plan -o json` and extracts the published change to verify old=false, new=true in the plan after a failed publish. README.md: update stale SetID+SaveState reference to current SaveState(ctx, id, state) API. Co-authored-by: Denis Bilenko --- .../out.plan_published_change.json | 6 ++++ .../publish-failure-retry/output.txt | 11 +++++-- .../dashboards/publish-failure-retry/script | 6 ++-- bundle/direct/apply.go | 4 +-- bundle/direct/dresources/README.md | 2 +- bundle/direct/dresources/dashboard.go | 10 +++++-- bundle/direct/dresources/engine.go | 30 +++++++++++++++---- 7 files changed, 52 insertions(+), 17 deletions(-) create mode 100644 acceptance/bundle/resources/dashboards/publish-failure-retry/out.plan_published_change.json diff --git a/acceptance/bundle/resources/dashboards/publish-failure-retry/out.plan_published_change.json b/acceptance/bundle/resources/dashboards/publish-failure-retry/out.plan_published_change.json new file mode 100644 index 00000000000..0a05a1b42d3 --- /dev/null +++ b/acceptance/bundle/resources/dashboards/publish-failure-retry/out.plan_published_change.json @@ -0,0 +1,6 @@ +{ + "action": "update", + "old": false, + "new": true, + "remote": false +} diff --git a/acceptance/bundle/resources/dashboards/publish-failure-retry/output.txt b/acceptance/bundle/resources/dashboards/publish-failure-retry/output.txt index f5ab0131d7d..b1f7b6bb3b3 100644 --- a/acceptance/bundle/resources/dashboards/publish-failure-retry/output.txt +++ b/acceptance/bundle/resources/dashboards/publish-failure-retry/output.txt @@ -25,10 +25,15 @@ Resources: Name: my dashboard URL: [DATABRICKS_URL]/dashboardsv3/[DASHBOARD_ID]/published?[WSPARAM]=[NUMID] ->>> [CLI] bundle plan -update dashboards.dashboard1 +>>> [CLI] bundle plan -o json -Plan: 0 to add, 1 to change, 0 to delete, 0 unchanged +>>> cat out.plan_published_change.json +{ + "action": "update", + "old": false, + "new": true, + "remote": false +} >>> [CLI] bundle deploy Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/publish-failure-retry/default/files... diff --git a/acceptance/bundle/resources/dashboards/publish-failure-retry/script b/acceptance/bundle/resources/dashboards/publish-failure-retry/script index 2096b92ee67..c231427120b 100644 --- a/acceptance/bundle/resources/dashboards/publish-failure-retry/script +++ b/acceptance/bundle/resources/dashboards/publish-failure-retry/script @@ -20,8 +20,10 @@ errcode trace $CLI bundle deploy # Dashboard should be in state (tracked) despite the publish failure. trace $CLI bundle summary -# Plan should show that publishing is still needed. -trace $CLI bundle plan +# Plan should show published: false (saved state) -> true (desired). +# Capture the Changes entry for 'published' to verify the diff direction. +trace $CLI bundle plan -o json | jq '.plan["resources.dashboards.dashboard1"].changes.published' > out.plan_published_change.json +trace cat out.plan_published_change.json # Discard first-deploy requests so the output only contains second-deploy # calls, making it easy to confirm no CREATE was issued. diff --git a/bundle/direct/apply.go b/bundle/direct/apply.go index 3380e5018b6..dd631d5e14a 100644 --- a/bundle/direct/apply.go +++ b/bundle/direct/apply.go @@ -51,7 +51,7 @@ func (d *DeploymentUnit) Deploy(ctx context.Context, db *dstate.DeploymentState, } func (d *DeploymentUnit) Create(ctx context.Context, db *dstate.DeploymentState, newState any) error { - engine := dresources.NewEngine(d.Adapter.StateType(), func(id string, x any) error { + engine := dresources.NewEngine(d.ResourceKey, d.Adapter.StateType(), func(id string, x any) error { return db.SaveState(d.ResourceKey, id, x, d.DependsOn) }) @@ -126,7 +126,7 @@ func (d *DeploymentUnit) Update(ctx context.Context, db *dstate.DeploymentState, return fmt.Errorf("internal error: DoUpdate not implemented for resource %s", d.ResourceKey) } - engine := dresources.NewEngine(d.Adapter.StateType(), func(_ string, x any) error { + engine := dresources.NewEngine(d.ResourceKey, d.Adapter.StateType(), func(_ string, x any) error { return db.SaveState(d.ResourceKey, id, x, d.DependsOn) }) remoteState, err := retryOnTransient(ctx, func() (any, error) { diff --git a/bundle/direct/dresources/README.md b/bundle/direct/dresources/README.md index b9c6e4754e2..1c2e44d68f0 100644 --- a/bundle/direct/dresources/README.md +++ b/bundle/direct/dresources/README.md @@ -36,7 +36,7 @@ If a resource has fields that must not be sent in updates (deploy-only, lifecycl ## Async APIs -For resources whose create or update is asynchronous, poll inline inside `DoCreate`/`DoUpdate` after the initial API call. To prevent orphaning if deployment is interrupted during a long wait, call `engine.SetID(id)` then `engine.SaveState(config)` immediately after the resource is created and before any waiting. The framework provides a `*Engine` as the second argument to both methods. +For resources whose create or update is asynchronous, poll inline inside `DoCreate`/`DoUpdate` after the initial API call. To prevent orphaning if deployment is interrupted during a long wait, call `engine.SaveState(ctx, id, config)` immediately after the resource is created and before any waiting. The framework provides a `*Engine` as the second argument to both methods. ## Slice ordering: KeyedSlices diff --git a/bundle/direct/dresources/dashboard.go b/bundle/direct/dresources/dashboard.go index 995b5e2f348..36158237194 100644 --- a/bundle/direct/dresources/dashboard.go +++ b/bundle/direct/dresources/dashboard.go @@ -298,10 +298,14 @@ func (r *ResourceDashboard) DoCreate(ctx context.Context, engine *Engine, config // Persist the etag in state. config.Etag = createResp.Etag - // Save state before publishing so an interrupted publish leaves a tracked - // draft rather than an orphan. The next deploy finds the draft in state and - // re-publishes via DoUpdate without recreating the dashboard. + // Save state with Published=false: the dashboard exists as a draft; publish + // has not succeeded yet. Using Published=false ensures the planner sees a + // real diff (false→true) if publish is interrupted, triggering a DoUpdate + // on the next deploy instead of silently treating the resource as up-to-date. + savedPublished := config.Published + config.Published = false engine.SaveState(ctx, createResp.DashboardId, config) + config.Published = savedPublished var publishResp *dashboards.PublishedDashboard // Note, today config.Published is always true (we do not have this field in input config). diff --git a/bundle/direct/dresources/engine.go b/bundle/direct/dresources/engine.go index 46deca6600d..0172f287ff1 100644 --- a/bundle/direct/dresources/engine.go +++ b/bundle/direct/dresources/engine.go @@ -2,35 +2,41 @@ package dresources import ( "context" + "encoding/json" "fmt" "reflect" + "github.com/databricks/cli/libs/log" "github.com/databricks/cli/libs/logdiag" + "github.com/databricks/cli/libs/structs/structdiff" ) // Engine provides state persistence to resource implementations. // Pass it to DoCreate or DoUpdate to save intermediate state before long-running // wait operations, so the resource is not orphaned if deployment is interrupted. type Engine struct { - id string - stateType reflect.Type - saveFunc func(id string, x any) error + resourceKey string + id string + stateType reflect.Type + saveFunc func(id string, x any) error + lastSaved any } // NewEngine creates an Engine with the given state type and save function. // The framework calls this before invoking DoCreate or DoUpdate. -func NewEngine(stateType reflect.Type, saveFunc func(id string, x any) error) *Engine { - return &Engine{id: "", stateType: stateType, saveFunc: saveFunc} +func NewEngine(resourceKey string, stateType reflect.Type, saveFunc func(id string, x any) error) *Engine { + return &Engine{resourceKey: resourceKey, id: "", stateType: stateType, saveFunc: saveFunc, lastSaved: nil} } // NewNopEngine creates an Engine that discards all saves. Use in tests. func NewNopEngine(stateType reflect.Type) *Engine { - return NewEngine(stateType, func(_ string, _ any) error { return nil }) + return NewEngine("", stateType, func(_ string, _ any) error { return nil }) } // SaveState saves the resource state. id must be the resource's identifier; on // the first call it is recorded, and subsequent calls panic if a different id is // passed. x must be a pointer to the same struct type as the resource's state. +// If the state is identical to what was last saved, the write is skipped. // Failures to persist state are logged but do not abort the deployment — the // resource already exists and aborting would not undo its creation. func (e *Engine) SaveState(ctx context.Context, id string, x any) { @@ -43,7 +49,19 @@ func (e *Engine) SaveState(ctx context.Context, id string, x any) { if xt != e.stateType { panic(fmt.Sprintf("SaveState: type mismatch: expected %v, got %v", e.stateType, xt)) } + if e.lastSaved != nil && structdiff.IsEqual(e.lastSaved, x) { + log.Debugf(ctx, "SaveState: %s id=%s: skipping, state unchanged", e.resourceKey, id) + return + } + b, _ := json.Marshal(x) + preview := string(b) + if len(preview) > 100 { + preview = preview[:100] + } + log.Debugf(ctx, "SaveState: %s id=%s %d bytes: %s", e.resourceKey, id, len(b), preview) if err := e.saveFunc(e.id, x); err != nil { logdiag.LogError(ctx, err) + return } + e.lastSaved = x } From df19a643ff0466649088c775b342f48c873bd7b5 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Wed, 3 Jun 2026 17:35:03 +0200 Subject: [PATCH 13/13] direct: inline WaitAfterCreate/WaitAfterUpdate into sql_warehouse DoCreate/DoUpdate Mergiraf reintroduced the standalone WaitAfterCreate and WaitAfterUpdate methods during the rebase against main (sql_warehouse.go had a merge conflict). Inline their wait logic directly into DoCreate and DoUpdate and remove the standalone methods, consistent with the Engine callback pattern used by other resources. Co-authored-by: Denis Bilenko --- bundle/direct/dresources/sql_warehouse.go | 74 ++++++++++------------- 1 file changed, 32 insertions(+), 42 deletions(-) diff --git a/bundle/direct/dresources/sql_warehouse.go b/bundle/direct/dresources/sql_warehouse.go index 5dbd9d1d621..1225420febe 100644 --- a/bundle/direct/dresources/sql_warehouse.go +++ b/bundle/direct/dresources/sql_warehouse.go @@ -123,7 +123,31 @@ func (r *ResourceSqlWarehouse) DoCreate(ctx context.Context, _ *Engine, config * if err != nil { return "", nil, err } - return waiter.Id, nil, nil + id := waiter.Id + + if config.Lifecycle == nil || config.Lifecycle.Started == nil { + return id, nil, nil + } + + // Always wait for RUNNING first: warehouses start asynchronously. + _, err = r.client.Warehouses.WaitGetWarehouseRunning(ctx, id, 20*time.Minute, nil) + if err != nil { + return "", nil, err + } + + if !*config.Lifecycle.Started { + // started=false: stop the warehouse after it reaches RUNNING. + stopWaiter, err := r.client.Warehouses.Stop(ctx, sql.StopRequest{Id: id}) + if err != nil { + return "", nil, err + } + _, err = stopWaiter.Get() + if err != nil { + return "", nil, err + } + } + + return id, nil, nil } // hasWarehouseChanges reports whether the plan entry contains any Update changes @@ -170,59 +194,25 @@ func (r *ResourceSqlWarehouse) DoUpdate(ctx context.Context, _ *Engine, id strin desiredStarted := *config.Lifecycle.Started alreadyRunning := remoteWarehouseIsRunning(entry) if desiredStarted && !alreadyRunning { - // lifecycle.started=true: fire Start; WaitAfterUpdate polls for RUNNING. _, err := r.client.Warehouses.Start(ctx, sql.StartRequest{Id: id}) - return nil, err + if err != nil { + return nil, err + } } else if !desiredStarted && alreadyRunning { - // lifecycle.started=false: fire Stop; WaitAfterUpdate polls for STOPPED. _, err := r.client.Warehouses.Stop(ctx, sql.StopRequest{Id: id}) - return nil, err - } - - return nil, nil -} - -// WaitAfterUpdate waits for the warehouse to reach the desired lifecycle state after DoUpdate. -func (r *ResourceSqlWarehouse) WaitAfterUpdate(ctx context.Context, id string, config *SqlWarehouseState) (*SqlWarehouseRemote, error) { - if config.Lifecycle == nil || config.Lifecycle.Started == nil { - return nil, nil + if err != nil { + return nil, err + } } - if *config.Lifecycle.Started { + if desiredStarted { _, err := r.client.Warehouses.WaitGetWarehouseRunning(ctx, id, 20*time.Minute, nil) return nil, err } - _, err := r.client.Warehouses.WaitGetWarehouseStopped(ctx, id, 20*time.Minute, nil) return nil, err } -// WaitAfterCreate waits for the warehouse to be ready, then stops it if lifecycle.started=false. -// Warehouses are created in a starting state; WaitGetWarehouseRunning waits for them to be RUNNING. -func (r *ResourceSqlWarehouse) WaitAfterCreate(ctx context.Context, id string, config *SqlWarehouseState) (*SqlWarehouseRemote, error) { - if config.Lifecycle == nil || config.Lifecycle.Started == nil { - return nil, nil - } - - // Always wait for RUNNING first: warehouses start asynchronously. - _, err := r.client.Warehouses.WaitGetWarehouseRunning(ctx, id, 20*time.Minute, nil) - if err != nil { - return nil, err - } - - if !*config.Lifecycle.Started { - // started=false: stop the warehouse after it reaches RUNNING. - stopWaiter, err := r.client.Warehouses.Stop(ctx, sql.StopRequest{Id: id}) - if err != nil { - return nil, err - } - _, err = stopWaiter.Get() - return nil, err - } - - return nil, nil -} - func (r *ResourceSqlWarehouse) DoDelete(ctx context.Context, oldID string, _ *SqlWarehouseState) error { return r.client.Warehouses.DeleteById(ctx, oldID) }