From cf6ef38472dda66af88af1d567c32e6de3a3cf42 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Mon, 1 Jun 2026 21:55:23 +0200 Subject: [PATCH] bundle/direct: conditionally upgrade state version when opted into DMS Normal deploys keep writing state_version 2. When the bundle opts into experimental.record_deployment_history, the deploy upgrades the local state to dmsStateVersion (3) and stamps a new CurrentDmsVersion header field, so future DMS breaking changes or minimum-CLI requirements can be gated on what the state was written with. migrateState now reads up to dmsStateVersion but still auto-migrates only up to the baseline currentStateVersion, so a v3 state is neither auto-applied nor downgraded. The upgrade is applied explicitly in deployCore, never as an automatic migration. Co-authored-by: Isaac --- acceptance/acceptance_test.go | 7 ++ .../state-upgrade/databricks.dms.yml | 10 ++ .../state-upgrade/databricks.yml | 7 ++ .../state-upgrade/out.test.toml | 3 + .../state-upgrade/output.txt | 52 ++++++++++ .../state-upgrade/script | 35 +++++++ .../state-upgrade/test.toml | 10 ++ .../terraform-unsupported/databricks.yml | 10 ++ .../terraform-unsupported/out.test.toml | 3 + .../terraform-unsupported/output.txt | 6 ++ .../terraform-unsupported/script | 3 + .../terraform-unsupported/test.toml | 8 ++ .../bundle/state/future_version/output.txt | 2 +- bundle/configsync/diff.go | 2 +- bundle/configsync/variables.go | 2 +- bundle/direct/bind.go | 12 +-- bundle/direct/dstate/migrate.go | 21 ++-- bundle/direct/dstate/migrate_test.go | 95 +++++++++++++++++++ bundle/direct/dstate/state.go | 78 +++++++++++++-- bundle/direct/dstate/state_test.go | 66 +++++++++++-- bundle/phases/deploy.go | 6 ++ cmd/bundle/generate/dashboard.go | 2 +- cmd/bundle/utils/process.go | 12 ++- 23 files changed, 417 insertions(+), 35 deletions(-) create mode 100644 acceptance/bundle/deploy/record-deployment-history/state-upgrade/databricks.dms.yml create mode 100644 acceptance/bundle/deploy/record-deployment-history/state-upgrade/databricks.yml create mode 100644 acceptance/bundle/deploy/record-deployment-history/state-upgrade/out.test.toml create mode 100644 acceptance/bundle/deploy/record-deployment-history/state-upgrade/output.txt create mode 100644 acceptance/bundle/deploy/record-deployment-history/state-upgrade/script create mode 100644 acceptance/bundle/deploy/record-deployment-history/state-upgrade/test.toml create mode 100644 acceptance/bundle/deploy/record-deployment-history/terraform-unsupported/databricks.yml create mode 100644 acceptance/bundle/deploy/record-deployment-history/terraform-unsupported/out.test.toml create mode 100644 acceptance/bundle/deploy/record-deployment-history/terraform-unsupported/output.txt create mode 100644 acceptance/bundle/deploy/record-deployment-history/terraform-unsupported/script create mode 100644 acceptance/bundle/deploy/record-deployment-history/terraform-unsupported/test.toml create mode 100644 bundle/direct/dstate/migrate_test.go diff --git a/acceptance/acceptance_test.go b/acceptance/acceptance_test.go index f6ec0805fb2..733b431f46c 100644 --- a/acceptance/acceptance_test.go +++ b/acceptance/acceptance_test.go @@ -277,6 +277,13 @@ func testAccept(t *testing.T, inprocessMode bool, singleTest string) int { cli293Path := DownloadCLI(t, buildDir, "0.293.0") t.Setenv("CLI_293", cli293Path) repls.SetPath(cli293Path, "[CLI_293]") + + // v1.0.0 understands state schema versions only up to 2. Used by tests + // asserting that an older CLI rejects newer state (e.g. the v3 state + // written when a bundle opts into the deployment metadata service). + cliV1Path := DownloadCLI(t, buildDir, "1.0.0") + t.Setenv("CLI_V1", cliV1Path) + repls.SetPath(cliV1Path, "[CLI_V1]") } paths := []string{ diff --git a/acceptance/bundle/deploy/record-deployment-history/state-upgrade/databricks.dms.yml b/acceptance/bundle/deploy/record-deployment-history/state-upgrade/databricks.dms.yml new file mode 100644 index 00000000000..a0803723918 --- /dev/null +++ b/acceptance/bundle/deploy/record-deployment-history/state-upgrade/databricks.dms.yml @@ -0,0 +1,10 @@ +bundle: + name: test-rdh-state-upgrade + +experimental: + record_deployment_history: true + +resources: + jobs: + foo: + name: foo-job-renamed diff --git a/acceptance/bundle/deploy/record-deployment-history/state-upgrade/databricks.yml b/acceptance/bundle/deploy/record-deployment-history/state-upgrade/databricks.yml new file mode 100644 index 00000000000..a721f66492b --- /dev/null +++ b/acceptance/bundle/deploy/record-deployment-history/state-upgrade/databricks.yml @@ -0,0 +1,7 @@ +bundle: + name: test-rdh-state-upgrade + +resources: + jobs: + foo: + name: foo-job diff --git a/acceptance/bundle/deploy/record-deployment-history/state-upgrade/out.test.toml b/acceptance/bundle/deploy/record-deployment-history/state-upgrade/out.test.toml new file mode 100644 index 00000000000..e90b6d5d1ba --- /dev/null +++ b/acceptance/bundle/deploy/record-deployment-history/state-upgrade/out.test.toml @@ -0,0 +1,3 @@ +Local = true +Cloud = false +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["direct"] diff --git a/acceptance/bundle/deploy/record-deployment-history/state-upgrade/output.txt b/acceptance/bundle/deploy/record-deployment-history/state-upgrade/output.txt new file mode 100644 index 00000000000..5f4f74cc0a9 --- /dev/null +++ b/acceptance/bundle/deploy/record-deployment-history/state-upgrade/output.txt @@ -0,0 +1,52 @@ + +=== without the flag: state stays at the baseline schema version +>>> [CLI] bundle deploy +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/test-rdh-state-upgrade/default/files... +Deploying resources... +Updating deployment state... +Deployment complete! + +>>> print_state.py +{ + "state_version": 2, + "dms_version": null +} + +=== with experimental.record_deployment_history: state upgrades and records the DMS version +>>> [CLI] bundle deploy +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/test-rdh-state-upgrade/default/files... +Deploying resources... +Updating deployment state... +Deployment complete! + +>>> print_state.py +{ + "state_version": 3, + "dms_version": 1 +} + +=== an older CLI (v1.0.0, max state version 2) rejects operations on the upgraded state +>>> errcode [CLI_V1] bundle plan +Warning: unknown field: record_deployment_history + at experimental + in databricks.yml:5:3 + +Error: migrating state [TEST_TMP_DIR]/.databricks/bundle/default/resources.json: state version 3 is newer than supported version 2; upgrade the CLI + + +Exit code: 1 + +=== with the flag, a state written by a newer record_deployment_history version is rejected +>>> update_file.py .databricks/bundle/default/resources.json "dms_version": 1 "dms_version": 2 + +>>> errcode [CLI] bundle plan +Error: record_deployment_history state version 2 is newer than supported version 1; upgrade the CLI + + +Exit code: 1 + +=== without the flag, the same state is accepted (the check is gated on opt-in) +>>> update_file.py databricks.yml record_deployment_history: true record_deployment_history: false + +>>> errcode [CLI] bundle plan +Plan: 0 to add, 0 to change, 0 to delete, 1 unchanged diff --git a/acceptance/bundle/deploy/record-deployment-history/state-upgrade/script b/acceptance/bundle/deploy/record-deployment-history/state-upgrade/script new file mode 100644 index 00000000000..0d8099c2df3 --- /dev/null +++ b/acceptance/bundle/deploy/record-deployment-history/state-upgrade/script @@ -0,0 +1,35 @@ +# State-version lifecycle for the DMS preview (experimental.record_deployment_history): +# 1. Without the flag, deploys keep writing the baseline state version. +# 2. Opting in upgrades the state version and records the DMS version. +# 3. An older CLI that predates the DMS state version refuses to operate on the +# upgraded state and tells the user to upgrade, so it cannot mishandle it. +# 4. With the flag set, a state stamped with a newer record_deployment_history +# version than this CLI knows is also refused (the state schema version is +# one we support). Without the flag the check is skipped. + +title "without the flag: state stays at the baseline schema version" +trace $CLI bundle deploy +trace print_state.py | jq '{state_version, dms_version}' + +title "with experimental.record_deployment_history: state upgrades and records the DMS version" +# Also renames the job so the deploy writes state (a no-op deploy would not +# rewrite the state file and the upgrade would not be observable yet). +cp databricks.dms.yml databricks.yml +trace $CLI bundle deploy +trace print_state.py | jq '{state_version, dms_version}' + +title "an older CLI (v1.0.0, max state version 2) rejects operations on the upgraded state" +trace errcode $CLI_V1 bundle plan 2>&1 | contains.py "state version 3 is newer than supported version 2; upgrade the CLI" + +title "with the flag, a state written by a newer record_deployment_history version is rejected" +# The state must hold a record_deployment_history version greater than this CLI's +# supported version (dmsVersion) to be rejected, so we set supported+1. +# TODO: when dmsVersion is bumped, change the value written below to the new +# supported+1. The error's version numbers are not asserted here, so the assertion +# needs no change. +trace update_file.py .databricks/bundle/default/resources.json '"dms_version": 1' '"dms_version": 2' +trace errcode $CLI bundle plan 2>&1 | contains.py "record_deployment_history state version" "is newer than supported version" "upgrade the CLI" + +title "without the flag, the same state is accepted (the check is gated on opt-in)" +trace update_file.py databricks.yml "record_deployment_history: true" "record_deployment_history: false" +trace errcode $CLI bundle plan 2>&1 | contains.py "!is newer than supported version" diff --git a/acceptance/bundle/deploy/record-deployment-history/state-upgrade/test.toml b/acceptance/bundle/deploy/record-deployment-history/state-upgrade/test.toml new file mode 100644 index 00000000000..70992ebb996 --- /dev/null +++ b/acceptance/bundle/deploy/record-deployment-history/state-upgrade/test.toml @@ -0,0 +1,10 @@ +Local = true +Cloud = false + +# databricks.yml is rewritten in-place by the script (flag added in the second step). +Ignore = [".databricks", "databricks.yml"] + +# The state-version upgrade only applies to the direct engine; terraform stores +# state differently and would diverge. +[EnvMatrix] +DATABRICKS_BUNDLE_ENGINE = ["direct"] diff --git a/acceptance/bundle/deploy/record-deployment-history/terraform-unsupported/databricks.yml b/acceptance/bundle/deploy/record-deployment-history/terraform-unsupported/databricks.yml new file mode 100644 index 00000000000..3287d83a22b --- /dev/null +++ b/acceptance/bundle/deploy/record-deployment-history/terraform-unsupported/databricks.yml @@ -0,0 +1,10 @@ +bundle: + name: test-rdh-terraform-unsupported + +experimental: + record_deployment_history: true + +resources: + jobs: + foo: + name: foo-job diff --git a/acceptance/bundle/deploy/record-deployment-history/terraform-unsupported/out.test.toml b/acceptance/bundle/deploy/record-deployment-history/terraform-unsupported/out.test.toml new file mode 100644 index 00000000000..65156e0457c --- /dev/null +++ b/acceptance/bundle/deploy/record-deployment-history/terraform-unsupported/out.test.toml @@ -0,0 +1,3 @@ +Local = true +Cloud = false +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["terraform"] diff --git a/acceptance/bundle/deploy/record-deployment-history/terraform-unsupported/output.txt b/acceptance/bundle/deploy/record-deployment-history/terraform-unsupported/output.txt new file mode 100644 index 00000000000..7346d37fae6 --- /dev/null +++ b/acceptance/bundle/deploy/record-deployment-history/terraform-unsupported/output.txt @@ -0,0 +1,6 @@ + +>>> errcode [CLI] bundle plan +Error: experimental.record_deployment_history is only supported with the direct deployment engine + + +Exit code: 1 diff --git a/acceptance/bundle/deploy/record-deployment-history/terraform-unsupported/script b/acceptance/bundle/deploy/record-deployment-history/terraform-unsupported/script new file mode 100644 index 00000000000..9a202595e32 --- /dev/null +++ b/acceptance/bundle/deploy/record-deployment-history/terraform-unsupported/script @@ -0,0 +1,3 @@ +# record_deployment_history drives DMS, which only the direct engine supports. +# Under the terraform engine the flag must be rejected, not silently ignored. +trace errcode $CLI bundle plan 2>&1 | contains.py "experimental.record_deployment_history is only supported with the direct deployment engine" diff --git a/acceptance/bundle/deploy/record-deployment-history/terraform-unsupported/test.toml b/acceptance/bundle/deploy/record-deployment-history/terraform-unsupported/test.toml new file mode 100644 index 00000000000..38ca093e146 --- /dev/null +++ b/acceptance/bundle/deploy/record-deployment-history/terraform-unsupported/test.toml @@ -0,0 +1,8 @@ +Local = true +Cloud = false +RecordRequests = false + +Ignore = [".databricks"] + +[EnvMatrix] +DATABRICKS_BUNDLE_ENGINE = ["terraform"] diff --git a/acceptance/bundle/state/future_version/output.txt b/acceptance/bundle/state/future_version/output.txt index 7cf98129ee9..0a16971f472 100644 --- a/acceptance/bundle/state/future_version/output.txt +++ b/acceptance/bundle/state/future_version/output.txt @@ -1,3 +1,3 @@ -state version 999 is newer than supported version 2; upgrade the CLI +state version 999 is newer than supported version 3; upgrade the CLI Exit code: 1 diff --git a/bundle/configsync/diff.go b/bundle/configsync/diff.go index 4a0b4f01ca3..955e971f93f 100644 --- a/bundle/configsync/diff.go +++ b/bundle/configsync/diff.go @@ -140,7 +140,7 @@ func DetectChanges(ctx context.Context, b *bundle.Bundle, engine engine.EngineTy } else { deployBundle = &direct.DeploymentBundle{} _, statePath := b.StateFilenameConfigSnapshot(ctx) - if err := deployBundle.StateDB.Open(ctx, statePath, dstate.WithRecovery(true), dstate.WithWrite(false)); err != nil { + if err := deployBundle.StateDB.Open(ctx, statePath, dstate.WithRecovery(true), dstate.WithWrite(false), dstate.WithDMS(false)); err != nil { return nil, fmt.Errorf("failed to open state: %w", err) } } diff --git a/bundle/configsync/variables.go b/bundle/configsync/variables.go index 0745bfba43b..4ee7d6d4b1d 100644 --- a/bundle/configsync/variables.go +++ b/bundle/configsync/variables.go @@ -144,7 +144,7 @@ func resourceIDLookup(ctx context.Context, b *bundle.Bundle) func(string) string } _, statePath := b.StateFilenameConfigSnapshot(ctx) db := &dstate.DeploymentState{} - if err := db.Open(ctx, statePath, dstate.WithRecovery(false), dstate.WithWrite(false)); err != nil { + if err := db.Open(ctx, statePath, dstate.WithRecovery(false), dstate.WithWrite(false), dstate.WithDMS(false)); err != nil { log.Debugf(ctx, "variable restoration: failed to open state DB at %s: %v", statePath, err) return nil } diff --git a/bundle/direct/bind.go b/bundle/direct/bind.go index f1c534bea9d..6a0faa300de 100644 --- a/bundle/direct/bind.go +++ b/bundle/direct/bind.go @@ -62,7 +62,7 @@ type BindResult struct { func (b *DeploymentBundle) Bind(ctx context.Context, client *databricks.WorkspaceClient, configRoot *config.Root, statePath, resourceKey, resourceID string) (*BindResult, error) { // Check if the resource is already managed (bound to a different ID) var checkStateDB dstate.DeploymentState - if err := checkStateDB.Open(ctx, statePath, dstate.WithRecovery(true), dstate.WithWrite(false)); err == nil { + if err := checkStateDB.Open(ctx, statePath, dstate.WithRecovery(true), dstate.WithWrite(false), dstate.WithDMS(false)); err == nil { existingID := checkStateDB.GetResourceID(resourceKey) if _, err := checkStateDB.Finalize(ctx); err != nil { log.Warnf(ctx, "failed to finalize state: %v", err) @@ -86,7 +86,7 @@ func (b *DeploymentBundle) Bind(ctx context.Context, client *databricks.Workspac } // Open temp state - err := b.StateDB.Open(ctx, tmpStatePath, dstate.WithRecovery(false), dstate.WithWrite(true)) + err := b.StateDB.Open(ctx, tmpStatePath, dstate.WithRecovery(false), dstate.WithWrite(true), dstate.WithDMS(false)) if err != nil { os.Remove(tmpStatePath) return nil, err @@ -109,7 +109,7 @@ func (b *DeploymentBundle) Bind(ctx context.Context, client *databricks.Workspac log.Infof(ctx, "Bound %s to id=%s (in temp state)", resourceKey, resourceID) // First plan + update: populate state with resolved config - err = b.StateDB.Open(ctx, tmpStatePath, dstate.WithRecovery(true), dstate.WithWrite(false)) + err = b.StateDB.Open(ctx, tmpStatePath, dstate.WithRecovery(true), dstate.WithWrite(false), dstate.WithDMS(false)) if err != nil { os.Remove(tmpStatePath) return nil, err @@ -144,7 +144,7 @@ func (b *DeploymentBundle) Bind(ctx context.Context, client *databricks.Workspac } } - err = b.StateDB.Open(ctx, tmpStatePath, dstate.WithRecovery(true), dstate.WithWrite(true)) + err = b.StateDB.Open(ctx, tmpStatePath, dstate.WithRecovery(true), dstate.WithWrite(true), dstate.WithDMS(false)) if err != nil { os.Remove(tmpStatePath) return nil, err @@ -164,7 +164,7 @@ func (b *DeploymentBundle) Bind(ctx context.Context, client *databricks.Workspac } // Second plan: this is the plan to present to the user (change between remote resource and config) - err = b.StateDB.Open(ctx, tmpStatePath, dstate.WithRecovery(true), dstate.WithWrite(false)) + err = b.StateDB.Open(ctx, tmpStatePath, dstate.WithRecovery(true), dstate.WithWrite(false), dstate.WithDMS(false)) if err != nil { os.Remove(tmpStatePath) return nil, err @@ -214,7 +214,7 @@ func (result *BindResult) Cancel() { // Unbind removes a resource from direct engine state without deleting // the workspace resource. Also removes associated permissions/grants entries. func (b *DeploymentBundle) Unbind(ctx context.Context, statePath, resourceKey string) error { - err := b.StateDB.Open(ctx, statePath, dstate.WithRecovery(true), dstate.WithWrite(true)) + err := b.StateDB.Open(ctx, statePath, dstate.WithRecovery(true), dstate.WithWrite(true), dstate.WithDMS(false)) if err != nil { return err } diff --git a/bundle/direct/dstate/migrate.go b/bundle/direct/dstate/migrate.go index 381d63a12eb..93e1ff6a45c 100644 --- a/bundle/direct/dstate/migrate.go +++ b/bundle/direct/dstate/migrate.go @@ -10,16 +10,25 @@ import ( "github.com/databricks/databricks-sdk-go/service/iam" ) -// migrateState runs all necessary migrations on the database. +// migrateState brings a freshly-loaded state up to a version this CLI can use. // It is called after loading state from disk. +// +// Two versions are "current" and left untouched: the baseline currentStateVersion +// and the opt-in dmsStateVersion (written when a bundle previews DMS). Legacy +// states below the baseline are migrated forward via the migrations map. A state +// newer than dmsStateVersion was written by a newer CLI, so we refuse it rather +// than risk mishandling a format we don't understand. +// +// The DMS protocol version (dms_version) is enforced separately and only when +// the bundle has opted into DMS; see Open's WithDMS option. func migrateState(db *Database) error { - if db.StateVersion == currentStateVersion { - return nil - } - if db.StateVersion > currentStateVersion { - return fmt.Errorf("state version %d is newer than supported version %d; upgrade the CLI", db.StateVersion, currentStateVersion) + if db.StateVersion > dmsStateVersion { + return fmt.Errorf("state version %d is newer than supported version %d; upgrade the CLI", db.StateVersion, dmsStateVersion) } + // Only legacy states (below the baseline) migrate here. The DMS upgrade is an + // explicit deploy-time step (see UpgradeToDMS), never an automatic migration, + // so a dmsStateVersion state falls through this loop unchanged. for version := db.StateVersion; version < currentStateVersion; version++ { fn, ok := migrations[version] if !ok { diff --git a/bundle/direct/dstate/migrate_test.go b/bundle/direct/dstate/migrate_test.go new file mode 100644 index 00000000000..7ebb5fe8a28 --- /dev/null +++ b/bundle/direct/dstate/migrate_test.go @@ -0,0 +1,95 @@ +package dstate + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestMigrateStateLeavesCurrentUntouched(t *testing.T) { + db := &Database{Header: Header{StateVersion: currentStateVersion}} + require.NoError(t, migrateState(db)) + assert.Equal(t, currentStateVersion, db.StateVersion) +} + +func TestMigrateStateLeavesDMSStateUntouched(t *testing.T) { + // A DMS-upgraded state is already current; it must not be downgraded to the + // baseline version, and the recorded DMS version must be preserved. + db := &Database{Header: Header{StateVersion: dmsStateVersion, DmsVersion: dmsVersion}} + require.NoError(t, migrateState(db)) + assert.Equal(t, dmsStateVersion, db.StateVersion) + assert.Equal(t, dmsVersion, db.DmsVersion) +} + +func TestMigrateStateRejectsNewerThanSupported(t *testing.T) { + db := &Database{Header: Header{StateVersion: dmsStateVersion + 1}} + err := migrateState(db) + require.Error(t, err) + assert.Contains(t, err.Error(), "upgrade the CLI") +} + +// TestStateSchemaVersions pins the state schema version constants. They are part +// of the on-disk format and a contract with older CLIs, so changing them must be +// deliberate. If you are intentionally bumping the baseline schema, this test is +// your checklist: follow the steps below, then update the assertions last. +// +// HOW TO BUMP THE BASELINE STATE VERSION (2 -> 4) AND RETIRE THE DMS SPECIAL-CASING +// +// Version 3 only exists because previewing DMS bumped the schema out-of-band via +// UpgradeToDMS, so non-preview bundles weren't forced to upgrade. The next baseline +// bump is when you delete all of that custom code and make 3 an ordinary version +// in the linear migration chain. Go to 4, not 3 (3 is already in the wild): +// +// 1. state.go: set currentStateVersion = 4. Delete the dmsStateVersion constant +// and the UpgradeToDMS method. Version 3 is now reached only by migration, +// like any other version. (Leave dmsVersion and Header.DmsVersion +// alone; they are a separate concern, see step 4.) +// +// 2. migrate.go: change the upper-bound guard from "> dmsStateVersion" back to +// "> currentStateVersion" (there is again only one current version). Add +// stepwise migrations so every old state climbs to 4: +// migrations[2] = v2 (non-DMS baseline) -> v3 +// migrations[3] = v3 (former DMS preview) -> v4 +// A v2 state climbs 2->3->4 and a v3 state climbs 3->4, exactly like any +// other version. Write a real transform for whatever the v4 change is. +// +// 3. deploy.go: delete the `if RecordDeploymentHistory { UpgradeToDMS() }` block. +// The version is no longer bumped conditionally; every deploy writes the +// baseline through the normal path. +// +// 4. dms_version is NOT part of this bump. It tracks the DMS *protocol* version +// (dmsVersion), independent of the state schema version, and is enforced +// by Open's WithDMS option (passed by cmd/bundle/utils/process.go only when the +// bundle has opted into DMS). Leave the field, the constant, and that check in +// place; if UpgradeToDMS was the only place stamping it, move that stamping into +// the normal write path. +// +// 5. Tests: update the assertions below (the dmsStateVersion assertion and the +// DMS-schema cases go away); add 2->3->4 and 3->4 migration tests. +// TestMigrationsCoverBaseline fails until migrations[2] and migrations[3] exist. +// +// RELATED COVERAGE +// - acceptance/bundle/state/permission_level_migration is a golden migration +// fixture: it commits a real v1 state and asserts the migrated v2 output, +// catching migration-correctness bugs (not just "did you mean to change this"). +// - acceptance/bundle/deploy/record-deployment-history/state-upgrade drives the +// full lifecycle, including both rejections (older CLI vs newer state, and a +// newer DMS version vs this CLI). +// - bundle/invariant/continue_293 asserts the current CLI reads state written by +// an older released CLI, so we never break reading older state. +func TestStateSchemaVersions(t *testing.T) { + assert.Equal(t, 2, currentStateVersion) + assert.Equal(t, 3, dmsStateVersion) + assert.Equal(t, 1, dmsVersion) +} + +// TestMigrationsCoverBaseline guards a baseline bump: every state version below +// currentStateVersion must have a migration to the next version, so migrateState +// can always climb a legacy state up to the baseline. A bump that forgets a +// migration fails here instead of at a user's deploy. +func TestMigrationsCoverBaseline(t *testing.T) { + for v := range currentStateVersion { + assert.Containsf(t, migrations, v, "missing migration for state version %d", v) + } +} diff --git a/bundle/direct/dstate/state.go b/bundle/direct/dstate/state.go index 5b2a70adbb3..69192839656 100644 --- a/bundle/direct/dstate/state.go +++ b/bundle/direct/dstate/state.go @@ -21,10 +21,33 @@ import ( ) const ( + // currentStateVersion is the schema version written for normal deployments + // and the target that older states are migrated up to on load. + // + // NOTE: the next bump to the baseline schema must go to 4, not 3 (which is + // reserved by dmsStateVersion below), and should delete dmsStateVersion, + // folding DMS state handling back into normal versioning. currentStateVersion = 2 - initialBufferSize = 64 * 1024 - maxWalEntrySize = 10 * 1024 * 1024 - walSuffix = ".wal" + + // dmsStateVersion is the schema version written when the bundle opts into the + // deployment metadata service via experimental.record_deployment_history. It + // is also the newest version this CLI understands; newer states are rejected. + // + // It is kept separate from currentStateVersion on purpose: previewing DMS + // must not force a state upgrade on everyone else. Non-DMS deploys stay at + // currentStateVersion while only DMS opt-in bumps the state to this version. + // Remove it once currentStateVersion is bumped (to 4) and the two reconcile. + dmsStateVersion = 3 + + // dmsVersion is the DMS protocol version this CLI understands. It is + // stamped into DMS-upgraded state (see Header.DmsVersion) and enforced when a + // bundle opts into DMS: Open with WithDMS rejects a state stamped with a higher + // version. Bump it when the DMS protocol changes in a way older CLIs must not act on. + dmsVersion = 1 + + initialBufferSize = 64 * 1024 + maxWalEntrySize = 10 * 1024 * 1024 + walSuffix = ".wal" ) // errStaleWAL is returned when the WAL serial is behind the expected serial. @@ -42,10 +65,17 @@ type DeploymentState struct { } type Header struct { - StateVersion int `json:"state_version"` - CLIVersion string `json:"cli_version"` - Lineage string `json:"lineage"` - Serial int `json:"serial"` + StateVersion int `json:"state_version"` + + // DmsVersion records the deployment metadata service (DMS) protocol + // version this state was written with. Set only for states opted into DMS + // (see dmsStateVersion) and omitted otherwise. When a bundle opts into DMS, + // Open with WithDMS rejects a state whose value exceeds dmsVersion. + DmsVersion int `json:"dms_version,omitempty"` + + CLIVersion string `json:"cli_version"` + Lineage string `json:"lineage"` + Serial int `json:"serial"` } type Database struct { @@ -154,9 +184,15 @@ type ( // If true, the state is opened in Write mode, which enables methods such as SaveState // but disables GetResourceEntry (since writes go strictly into WAL and not in memory). WithWrite bool + + // If true, the bundle has opted into the deployment metadata service (DMS) and + // Open rejects a state stamped with a dms_version newer than this CLI supports. + // Pass false when the bundle has not opted in: such a bundle does not act on + // the recorded DMS version, so it must not be blocked by it. + WithDMS bool ) -func (db *DeploymentState) Open(ctx context.Context, path string, withRecovery WithRecovery, withWrite WithWrite) error { +func (db *DeploymentState) Open(ctx context.Context, path string, withRecovery WithRecovery, withWrite WithWrite, withDMS WithDMS) error { db.mu.Lock() defer db.mu.Unlock() @@ -204,6 +240,12 @@ func (db *DeploymentState) Open(ctx context.Context, path string, withRecovery W return fmt.Errorf("migrating state %s: %w", path, err) } + // Enforce the recorded DMS protocol version only when the bundle has opted into + // DMS; a bundle that has not opted in does not act on it (see WithDMS). + if withDMS && db.Data.DmsVersion > dmsVersion { + return fmt.Errorf("record_deployment_history state version %d is newer than supported version %d; upgrade the CLI", db.Data.DmsVersion, dmsVersion) + } + if withWrite { if err := os.MkdirAll(filepath.Dir(walPath), 0o755); err != nil { return fmt.Errorf("failed to create state directory: %w", err) @@ -221,7 +263,8 @@ func (db *DeploymentState) Open(ctx context.Context, path string, withRecovery W walHead := Header{ Lineage: lineage, Serial: db.Data.Serial + 1, - StateVersion: currentStateVersion, + StateVersion: db.Data.StateVersion, + DmsVersion: db.Data.DmsVersion, CLIVersion: build.GetInfo().Version, } return appendJSONLine(db.walFile, walHead) @@ -408,12 +451,27 @@ func (db *DeploymentState) UpgradeToWrite() error { walHead := Header{ Lineage: lineage, Serial: db.Data.Serial + 1, - StateVersion: currentStateVersion, + StateVersion: db.Data.StateVersion, + DmsVersion: db.Data.DmsVersion, CLIVersion: build.GetInfo().Version, } return appendJSONLine(db.walFile, walHead) } +// UpgradeToDMS marks the state as opted into the deployment metadata service +// (DMS): it bumps the schema to dmsStateVersion and stamps the current DMS +// version. It must be called before the WAL is started (UpgradeToWrite) so the +// bumped version is captured in the WAL header; the change is persisted on save. +func (db *DeploymentState) UpgradeToDMS() { + db.mu.Lock() + defer db.mu.Unlock() + if db.walFile != nil { + panic("internal error: UpgradeToDMS must be called before the state is opened for write") + } + db.Data.StateVersion = dmsStateVersion + db.Data.DmsVersion = dmsVersion +} + func (db *DeploymentState) AssertOpenedForReadOrWrite() { if db.Path == "" { panic("internal error: DeploymentState must be opened first") diff --git a/bundle/direct/dstate/state_test.go b/bundle/direct/dstate/state_test.go index bbfd2559951..5a697173d5b 100644 --- a/bundle/direct/dstate/state_test.go +++ b/bundle/direct/dstate/state_test.go @@ -1,6 +1,7 @@ package dstate import ( + "encoding/json" "os" "path/filepath" "testing" @@ -19,24 +20,73 @@ func TestOpenSaveFinalizeRoundTrip(t *testing.T) { path := filepath.Join(t.TempDir(), "state.json") var db DeploymentState - require.NoError(t, db.Open(t.Context(), path, WithRecovery(true), WithWrite(true))) + require.NoError(t, db.Open(t.Context(), path, WithRecovery(true), WithWrite(true), WithDMS(false))) require.NoError(t, db.SaveState("jobs.my_job", "123", map[string]string{"key": "val"}, nil)) mustFinalize(t, &db) // Re-open and verify persisted data. var db2 DeploymentState - require.NoError(t, db2.Open(t.Context(), path, WithRecovery(false), WithWrite(false))) + require.NoError(t, db2.Open(t.Context(), path, WithRecovery(false), WithWrite(false), WithDMS(false))) assert.Equal(t, 1, db2.Data.Serial) assert.Equal(t, "123", db2.GetResourceID("jobs.my_job")) mustFinalize(t, &db2) } +func TestUpgradeToDMSPersistsVersions(t *testing.T) { + path := filepath.Join(t.TempDir(), "state.json") + + // UpgradeToDMS must run before the WAL is started (UpgradeToWrite), so the + // bumped version is captured in the WAL header. + var db DeploymentState + require.NoError(t, db.Open(t.Context(), path, WithRecovery(true), WithWrite(false), WithDMS(false))) + db.UpgradeToDMS() + require.NoError(t, db.UpgradeToWrite()) + require.NoError(t, db.SaveState("jobs.my_job", "123", map[string]string{"key": "val"}, nil)) + mustFinalize(t, &db) + + // Re-open and verify the upgraded schema version and DMS version persisted, + // and that loading the upgraded state does not error or downgrade it. + var db2 DeploymentState + require.NoError(t, db2.Open(t.Context(), path, WithRecovery(false), WithWrite(false), WithDMS(false))) + assert.Equal(t, dmsStateVersion, db2.Data.StateVersion) + assert.Equal(t, dmsVersion, db2.Data.DmsVersion) + mustFinalize(t, &db2) +} + +func TestUpgradeToDMSPanicsAfterWALStarted(t *testing.T) { + path := filepath.Join(t.TempDir(), "state.json") + + var db DeploymentState + require.NoError(t, db.Open(t.Context(), path, WithRecovery(true), WithWrite(true), WithDMS(false))) + assert.Panics(t, db.UpgradeToDMS) + mustFinalize(t, &db) +} + +func TestOpenWithDMSRejectsNewerDmsVersion(t *testing.T) { + path := filepath.Join(t.TempDir(), "state.json") + data, err := json.Marshal(Database{Header: Header{StateVersion: dmsStateVersion, DmsVersion: dmsVersion + 1}}) + require.NoError(t, err) + require.NoError(t, os.WriteFile(path, data, 0o600)) + + // WithDMS(true): a state with a newer DMS protocol version is rejected. + var db DeploymentState + err = db.Open(t.Context(), path, WithRecovery(true), WithWrite(false), WithDMS(true)) + require.Error(t, err) + assert.Contains(t, err.Error(), "record_deployment_history state version") + assert.Contains(t, err.Error(), "upgrade the CLI") + + // WithDMS(false): the same state loads fine; the check is gated on opt-in. + var db2 DeploymentState + require.NoError(t, db2.Open(t.Context(), path, WithRecovery(true), WithWrite(false), WithDMS(false))) + mustFinalize(t, &db2) +} + func TestFinalizeWithNoEntriesDoesNotWriteStateFile(t *testing.T) { path := filepath.Join(t.TempDir(), "state.json") var db DeploymentState - require.NoError(t, db.Open(t.Context(), path, WithRecovery(true), WithWrite(true))) + require.NoError(t, db.Open(t.Context(), path, WithRecovery(true), WithWrite(true), WithDMS(false))) mustFinalize(t, &db) _, err := os.Stat(path) @@ -47,10 +97,10 @@ func TestPanicOnDoubleOpen(t *testing.T) { path := filepath.Join(t.TempDir(), "state.json") var db DeploymentState - require.NoError(t, db.Open(t.Context(), path, WithRecovery(true), WithWrite(true))) + require.NoError(t, db.Open(t.Context(), path, WithRecovery(true), WithWrite(true), WithDMS(false))) assert.Panics(t, func() { - _ = db.Open(t.Context(), path, WithRecovery(true), WithWrite(true)) + _ = db.Open(t.Context(), path, WithRecovery(true), WithWrite(true), WithDMS(false)) }) mustFinalize(t, &db) } @@ -59,17 +109,17 @@ func TestDeleteState(t *testing.T) { path := filepath.Join(t.TempDir(), "state.json") var db DeploymentState - require.NoError(t, db.Open(t.Context(), path, WithRecovery(true), WithWrite(true))) + require.NoError(t, db.Open(t.Context(), path, WithRecovery(true), WithWrite(true), WithDMS(false))) require.NoError(t, db.SaveState("jobs.my_job", "123", map[string]string{}, nil)) mustFinalize(t, &db) var db2 DeploymentState - require.NoError(t, db2.Open(t.Context(), path, WithRecovery(true), WithWrite(true))) + require.NoError(t, db2.Open(t.Context(), path, WithRecovery(true), WithWrite(true), WithDMS(false))) require.NoError(t, db2.DeleteState("jobs.my_job")) mustFinalize(t, &db2) var db3 DeploymentState - require.NoError(t, db3.Open(t.Context(), path, WithRecovery(false), WithWrite(false))) + require.NoError(t, db3.Open(t.Context(), path, WithRecovery(false), WithWrite(false), WithDMS(false))) assert.Equal(t, 2, db3.Data.Serial) assert.Empty(t, db3.GetResourceID("jobs.my_job")) mustFinalize(t, &db3) diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index 15546880b9a..7a6863fc55b 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -166,6 +166,12 @@ func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHand } if engine.IsDirect() { + // Opting into DMS bumps the state schema before the WAL is started so the + // version is captured in the WAL header (UpgradeToDMS panics if the WAL is + // already open, so it must run before UpgradeToWrite below). + if b.Config.Experimental != nil && b.Config.Experimental.RecordDeploymentHistory { + b.DeploymentBundle.StateDB.UpgradeToDMS() + } // Upgrade from read (opened by process.go) to write mode if err := b.DeploymentBundle.StateDB.UpgradeToWrite(); err != nil { logdiag.LogError(ctx, err) diff --git a/cmd/bundle/generate/dashboard.go b/cmd/bundle/generate/dashboard.go index 7af4e01e92f..17be8ab7ff4 100644 --- a/cmd/bundle/generate/dashboard.go +++ b/cmd/bundle/generate/dashboard.go @@ -394,7 +394,7 @@ func (d *dashboard) runForResource(ctx context.Context, b *bundle.Bundle) { var state statemgmt.ExportedResourcesMap if stateDesc.Engine.IsDirect() { _, localPath := b.StateFilenameDirect(ctx) - if err := b.DeploymentBundle.StateDB.Open(ctx, localPath, dstate.WithRecovery(true), dstate.WithWrite(false)); err != nil { + if err := b.DeploymentBundle.StateDB.Open(ctx, localPath, dstate.WithRecovery(true), dstate.WithWrite(false), dstate.WithDMS(false)); err != nil { logdiag.LogError(ctx, err) return } diff --git a/cmd/bundle/utils/process.go b/cmd/bundle/utils/process.go index 5f43cff6acd..09f2f9a4154 100644 --- a/cmd/bundle/utils/process.go +++ b/cmd/bundle/utils/process.go @@ -185,11 +185,21 @@ func ProcessBundleRet(cmd *cobra.Command, opts ProcessOptions) (b *bundle.Bundle } cmd.SetContext(ctx) + // record_deployment_history drives DMS, which only the direct engine + // supports. Reject it under terraform rather than silently ignoring it. + recordDeploymentHistory := b.Config.Experimental != nil && b.Config.Experimental.RecordDeploymentHistory + if recordDeploymentHistory && !stateDesc.Engine.IsDirect() { + logdiag.LogError(ctx, errors.New("experimental.record_deployment_history is only supported with the direct deployment engine")) + return b, stateDesc, root.ErrAlreadyPrinted + } + // Open direct engine state once for all subsequent operations (ExportState, CalculatePlan, Apply, etc.) needDirectState := stateDesc.Engine.IsDirect() && (opts.InitIDs || opts.ErrorOnEmptyState || opts.Deploy || opts.ReadPlanPath != "" || opts.PreDeployChecks || opts.PostStateFunc != nil) if needDirectState { _, localPath := b.StateFilenameDirect(ctx) - if err := b.DeploymentBundle.StateDB.Open(ctx, localPath, dstate.WithRecovery(true), dstate.WithWrite(false)); err != nil { + // WithDMS enforces the recorded DMS protocol version, but only when the + // bundle has opted into DMS (a bundle that has not opted in does not act on it). + if err := b.DeploymentBundle.StateDB.Open(ctx, localPath, dstate.WithRecovery(true), dstate.WithWrite(false), dstate.WithDMS(recordDeploymentHistory)); err != nil { logdiag.LogError(ctx, err) return b, stateDesc, root.ErrAlreadyPrinted }