From 2a3b079635706c8f9834fded6796164ad5c05aa0 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Mon, 1 Jun 2026 16:10:41 +0000 Subject: [PATCH] Persist since-versions on docgen for stable annotations `x-since-version` was recomputed from git history on every schema generation, keyed by the Go type path. That made versions drift: when a type is renamed or a shared struct is split into per-resource typed structs (e.g. permissions), the field re-keys and gets re-stamped with a newer version. Persist the computed map as an append-only state file and treat stored entries as authoritative so a recorded version never changes, even across refactors. The state lives on the `docgen` branch (next to jsonschema_for_docs.json), not in the main source tree: - since_version.go reads/writes the state at the path in DATABRICKS_SINCE_VERSIONS_FILE. When unset (local `task generate`, regular CI) behavior is unchanged: versions are computed from history and nothing is persisted, so main stays clean and the workflow's "only the docs schema changed" assertion still holds. - When set, computeSinceVersions loads the stored map, refreshes it from git history to discover new fields, merges with stored entries winning, and writes it back. sinceVersionAliases lets a renamed/retyped field inherit its previous key's version. - The update-schema-docs workflow checks out docgen, points the env var at its since_versions.json, regenerates, and commits both the schema and the refreshed state back to docgen. It runs on every release tag, so the state is updated on each release; the first run seeds it from history. The map is written deterministically (sorted, trailing newline) so it stays diff-stable. No existing annotation changes; this only prevents future drift. Co-authored-by: Isaac --- .github/workflows/update-schema-docs.yml | 31 +++-- bundle/internal/schema/main.go | 4 +- bundle/internal/schema/since_version.go | 131 ++++++++++++++++++- bundle/internal/schema/since_version_test.go | 96 ++++++++++++++ 4 files changed, 250 insertions(+), 12 deletions(-) create mode 100644 bundle/internal/schema/since_version_test.go diff --git a/.github/workflows/update-schema-docs.yml b/.github/workflows/update-schema-docs.yml index f47e191e49a..413b6b20dd3 100644 --- a/.github/workflows/update-schema-docs.yml +++ b/.github/workflows/update-schema-docs.yml @@ -7,6 +7,11 @@ name: update-schema-docs # from the list of `v*` git tags that exist when the schema is generated. The # `docgen` branch is therefore stale by one release as soon as the next tag is # pushed; this workflow keeps it current. +# +# The append-only since-versions state (bundle/schema/since_versions.json) also +# lives on `docgen`. The generator reads it from the docgen worktree, refreshes +# it (recorded versions never change), and this workflow commits it back — so +# annotations stay stable across schema refactors without keeping state on main. on: push: @@ -68,7 +73,21 @@ jobs: echo "tag=$tag" >> "$GITHUB_OUTPUT" echo "Publishing for tag $tag" + # Check out docgen first: it holds the append-only since-versions state + # (bundle/schema/since_versions.json) that the generator reads and refreshes. + - name: Check out docgen worktree + run: | + git fetch origin docgen + git worktree add "$RUNNER_TEMP/docgen" origin/docgen + - name: Regenerate jsonschema_for_docs.json + env: + # Point the generator at the since-versions state on docgen. It loads + # this map, refreshes it from git history (append-only — recorded + # versions never change), writes it back, and uses it to annotate the + # schema. Keeping it on docgen means x-since-version stays stable across + # schema refactors without adding an internal state file to main. + DATABRICKS_SINCE_VERSIONS_FILE: ${{ runner.temp }}/docgen/bundle/schema/since_versions.json run: go tool -modfile=tools/task/go.mod task --force generate-schema-docs # Fail loudly if regeneration touches anything other than the docs schema. @@ -94,17 +113,13 @@ jobs: mkdir -p "$RUNNER_TEMP/regen" cp bundle/schema/jsonschema_for_docs.json "$RUNNER_TEMP/regen/jsonschema_for_docs.json" - - name: Check out docgen worktree - run: | - git fetch origin docgen - git worktree add "$RUNNER_TEMP/docgen" origin/docgen - - - name: Stage regenerated file on docgen + - name: Stage regenerated files on docgen working-directory: ${{ runner.temp }}/docgen run: | mkdir -p bundle/schema cp "$RUNNER_TEMP/regen/jsonschema_for_docs.json" bundle/schema/jsonschema_for_docs.json - git add bundle/schema/jsonschema_for_docs.json + # since_versions.json was refreshed in place by the generator above. + git add bundle/schema/jsonschema_for_docs.json bundle/schema/since_versions.json - name: Commit and push to docgen working-directory: ${{ runner.temp }}/docgen @@ -117,5 +132,5 @@ jobs: fi git config user.name "github-actions[bot]" git config user.email "41898282+github-actions[bot]@users.noreply.github.com" - git commit -m "Update jsonschema_for_docs.json for ${TAG}" + git commit -m "Update jsonschema_for_docs.json and since_versions.json for ${TAG}" git push origin HEAD:docgen diff --git a/bundle/internal/schema/main.go b/bundle/internal/schema/main.go index f8bc399134e..382e185d293 100644 --- a/bundle/internal/schema/main.go +++ b/bundle/internal/schema/main.go @@ -254,7 +254,9 @@ func generateSchema(workdir, outputFile string, docsMode bool) { log.Fatal(err) } - // In docs mode, add sinceVersion annotations by analyzing git history. + // In docs mode, add sinceVersion annotations. When DATABRICKS_SINCE_VERSIONS_FILE + // is set (by the update-schema-docs workflow) these come from the persisted, + // append-only state on docgen; otherwise they are computed from git history. if docsMode { sinceVersions, err := computeSinceVersions() if err != nil { diff --git a/bundle/internal/schema/since_version.go b/bundle/internal/schema/since_version.go index 4f9b7d30b7a..939715afcd9 100644 --- a/bundle/internal/schema/since_version.go +++ b/bundle/internal/schema/since_version.go @@ -2,8 +2,11 @@ package main import ( "encoding/json" + "errors" "fmt" + "io/fs" "maps" + "os" "os/exec" "strconv" "strings" @@ -11,13 +14,135 @@ import ( "github.com/databricks/cli/libs/jsonschema" ) +// sinceVersionsStateEnv names the env var that points at the persisted, +// append-only since-versions state file. It is set by the update-schema-docs +// workflow to a checkout of the `docgen` branch, so the state is stored and +// updated there (never in the main source tree). When unset (local `task +// generate`, regular CI) since versions are computed from git history only and +// nothing is persisted. +const sinceVersionsStateEnv = "DATABRICKS_SINCE_VERSIONS_FILE" + +// sinceVersionAliases maps a current "typePath.fieldName" key to the key it was +// previously known by. When a Go type is renamed, moved, or retyped (e.g. the +// shared Permission struct being split into per-resource typed structs), the new +// key would otherwise look brand new and be stamped with the current release +// version. Listing the rename here lets the new key inherit the original key's +// since version, keeping it stable across refactors. +// +// Example: +// +// "github.com/databricks/cli/bundle/config/resources.AppPermission.user_name": +// "github.com/databricks/cli/bundle/config/resources.Permission.user_name", +var sinceVersionAliases = map[string]string{} + // Version when bundle/schema/jsonschema.json was added to the repo. var embeddedSchemaVersion = [3]int{0, 229, 0} -// computeSinceVersions computes when each field was first introduced by analyzing git history. -// It returns a map from "typePath.fieldName" to the version string (e.g., "v0.229.0"). -// This function always recomputes versions at runtime without storing state. +// computeSinceVersions returns the "typePath.fieldName" -> version map used to +// annotate the schema. +// +// When DATABRICKS_SINCE_VERSIONS_FILE is unset, versions are computed purely +// from git history (the original behavior) and nothing is persisted. +// +// When it is set (by the update-schema-docs workflow, pointing at a docgen +// checkout), the state there is treated as append-only and authoritative: +// 1. Load the stored map (missing file is treated as empty — the first run +// seeds it). +// 2. Recompute first-observed versions from git history to discover newly +// added fields. +// 3. Merge: stored entries win, so a recorded version never changes even if a +// field's Go type is later renamed or retyped; brand-new fields take their +// computed version; renamed fields inherit via sinceVersionAliases. +// 4. Write the merged map back so newly discovered fields become frozen too. +// The workflow then commits it to docgen. func computeSinceVersions() (map[string]string, error) { + computed, err := computeSinceVersionsFromHistory() + + statePath := os.Getenv(sinceVersionsStateEnv) + if statePath == "" { + // No persisted state: legacy behavior (annotate from history, or surface + // the error so the caller skips annotation). + return computed, err + } + + if err != nil { + // Without git history/tags we can still annotate from the stored state. + fmt.Printf("Warning: could not compute since versions from git history: %v\n", err) + computed = map[string]string{} + } + + stored, err := loadStoredSinceVersions(statePath) + if err != nil { + return nil, err + } + + merged := mergeSinceVersions(computed, stored, sinceVersionAliases) + + if err := saveStoredSinceVersions(statePath, merged); err != nil { + return nil, fmt.Errorf("writing %s: %w", statePath, err) + } + return merged, nil +} + +// mergeSinceVersions combines freshly computed versions with the stored map. +// +// Stored entries are authoritative and never overwritten (append-only), which is +// what makes versions stable across refactors. Fields not yet stored take their +// computed (first-observed) version. A renamed field whose new key is not yet +// stored inherits its previous key's version via aliases. +func mergeSinceVersions(computed, stored, aliases map[string]string) map[string]string { + result := make(map[string]string, len(computed)+len(stored)) + maps.Copy(result, computed) + + for newKey, oldKey := range aliases { + if _, frozen := stored[newKey]; frozen { + continue + } + if v, ok := stored[oldKey]; ok { + result[newKey] = v + } else if v, ok := computed[oldKey]; ok { + result[newKey] = v + } + } + + // Stored wins: a recorded version is the canonical "first observed" answer. + maps.Copy(result, stored) + return result +} + +// loadStoredSinceVersions reads the persisted since-version map. A missing file +// is not an error (returns an empty map) so the generator works on a fresh +// checkout that has not recorded versions yet. +func loadStoredSinceVersions(path string) (map[string]string, error) { + data, err := os.ReadFile(path) + if errors.Is(err, fs.ErrNotExist) { + return map[string]string{}, nil + } + if err != nil { + return nil, fmt.Errorf("reading %s: %w", path, err) + } + stored := map[string]string{} + if err := json.Unmarshal(data, &stored); err != nil { + return nil, fmt.Errorf("parsing %s: %w", path, err) + } + return stored, nil +} + +// saveStoredSinceVersions writes the map back deterministically (sorted keys via +// json.MarshalIndent, trailing newline) so the committed file stays diff-stable. +func saveStoredSinceVersions(path string, versions map[string]string) error { + b, err := json.MarshalIndent(versions, "", " ") + if err != nil { + return err + } + b = append(b, '\n') + return os.WriteFile(path, b, 0o644) +} + +// computeSinceVersionsFromHistory computes when each field was first introduced +// by analyzing git history. It returns a map from "typePath.fieldName" to the +// version string (e.g., "v0.229.0"). +func computeSinceVersionsFromHistory() (map[string]string, error) { versions, err := getVersionTags() if err != nil { return nil, fmt.Errorf("getting version tags: %w", err) diff --git a/bundle/internal/schema/since_version_test.go b/bundle/internal/schema/since_version_test.go new file mode 100644 index 00000000000..0356ec05755 --- /dev/null +++ b/bundle/internal/schema/since_version_test.go @@ -0,0 +1,96 @@ +package main + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestMergeSinceVersionsStoredWins(t *testing.T) { + // A stored version is authoritative and must not be overwritten by a freshly + // computed (possibly drifted) value. This is what keeps versions stable when + // a type is refactored and history re-keys the field to a newer version. + computed := map[string]string{ + "pkg.Type.field": "v0.300.0", // drifted forward by a refactor + "pkg.Type.new_field": "v0.310.0", // genuinely new, not yet stored + } + stored := map[string]string{ + "pkg.Type.field": "v0.229.0", + } + + merged := mergeSinceVersions(computed, stored, nil) + + assert.Equal(t, "v0.229.0", merged["pkg.Type.field"], "stored version must win") + assert.Equal(t, "v0.310.0", merged["pkg.Type.new_field"], "new field keeps computed version") +} + +func TestMergeSinceVersionsAliasInheritsOldVersion(t *testing.T) { + // A renamed/retyped field whose new key is not yet stored inherits the old + // key's version instead of being treated as brand new. + computed := map[string]string{ + "pkg.AppPermission.user_name": "v0.247.0", // when the typed struct appeared + "pkg.Permission.user_name": "v0.229.0", + } + stored := map[string]string{ + "pkg.Permission.user_name": "v0.229.0", + } + aliases := map[string]string{ + "pkg.AppPermission.user_name": "pkg.Permission.user_name", + } + + merged := mergeSinceVersions(computed, stored, aliases) + + assert.Equal(t, "v0.229.0", merged["pkg.AppPermission.user_name"], + "renamed field must inherit the original key's version") +} + +func TestMergeSinceVersionsAliasSkippedWhenAlreadyFrozen(t *testing.T) { + // Once the new key is stored, the alias is a no-op: the stored value stands. + computed := map[string]string{} + stored := map[string]string{ + "pkg.AppPermission.user_name": "v0.247.0", + "pkg.Permission.user_name": "v0.229.0", + } + aliases := map[string]string{ + "pkg.AppPermission.user_name": "pkg.Permission.user_name", + } + + merged := mergeSinceVersions(computed, stored, aliases) + + assert.Equal(t, "v0.247.0", merged["pkg.AppPermission.user_name"], + "a frozen key must not be rewritten by an alias") +} + +func TestStoredSinceVersionsRoundTrip(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "since_versions.json") + + want := map[string]string{"a.b": "v0.1.0", "c.d": "v0.2.0"} + require.NoError(t, saveStoredSinceVersions(path, want)) + + got, err := loadStoredSinceVersions(path) + require.NoError(t, err) + assert.Equal(t, want, got) +} + +func TestLoadStoredSinceVersionsMissingFile(t *testing.T) { + // A fresh checkout (or docgen branch without the file yet) must not error — + // the first run seeds it. + got, err := loadStoredSinceVersions(filepath.Join(t.TempDir(), "does-not-exist.json")) + require.NoError(t, err) + assert.Empty(t, got) +} + +func TestStoredSinceVersionsWriteIsCanonical(t *testing.T) { + // saveStoredSinceVersions must write sorted keys with a trailing newline so + // the file committed to docgen stays diff-stable across runs. + path := filepath.Join(t.TempDir(), "since_versions.json") + require.NoError(t, saveStoredSinceVersions(path, map[string]string{"b": "v0.2.0", "a": "v0.1.0"})) + + got, err := os.ReadFile(path) + require.NoError(t, err) + assert.Equal(t, "{\n \"a\": \"v0.1.0\",\n \"b\": \"v0.2.0\"\n}\n", string(got)) +}