Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 23 additions & 8 deletions .github/workflows/update-schema-docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ name: update-schema-docs
# from the list of `v*` git tags that exist when the schema is generated. The
# `docgen` branch is therefore stale by one release as soon as the next tag is
# pushed; this workflow keeps it current.
#
# The append-only since-versions state (bundle/schema/since_versions.json) also
# lives on `docgen`. The generator reads it from the docgen worktree, refreshes
# it (recorded versions never change), and this workflow commits it back — so
# annotations stay stable across schema refactors without keeping state on main.

on:
push:
Expand Down Expand Up @@ -68,7 +73,21 @@ jobs:
echo "tag=$tag" >> "$GITHUB_OUTPUT"
echo "Publishing for tag $tag"

# Check out docgen first: it holds the append-only since-versions state
# (bundle/schema/since_versions.json) that the generator reads and refreshes.
- name: Check out docgen worktree
run: |
git fetch origin docgen
git worktree add "$RUNNER_TEMP/docgen" origin/docgen

- name: Regenerate jsonschema_for_docs.json
env:
# Point the generator at the since-versions state on docgen. It loads
# this map, refreshes it from git history (append-only — recorded
# versions never change), writes it back, and uses it to annotate the
# schema. Keeping it on docgen means x-since-version stays stable across
# schema refactors without adding an internal state file to main.
DATABRICKS_SINCE_VERSIONS_FILE: ${{ runner.temp }}/docgen/bundle/schema/since_versions.json
run: go tool -modfile=tools/task/go.mod task --force generate-schema-docs

# Fail loudly if regeneration touches anything other than the docs schema.
Expand All @@ -94,17 +113,13 @@ jobs:
mkdir -p "$RUNNER_TEMP/regen"
cp bundle/schema/jsonschema_for_docs.json "$RUNNER_TEMP/regen/jsonschema_for_docs.json"

- name: Check out docgen worktree
run: |
git fetch origin docgen
git worktree add "$RUNNER_TEMP/docgen" origin/docgen

- name: Stage regenerated file on docgen
- name: Stage regenerated files on docgen
working-directory: ${{ runner.temp }}/docgen
run: |
mkdir -p bundle/schema
cp "$RUNNER_TEMP/regen/jsonschema_for_docs.json" bundle/schema/jsonschema_for_docs.json
git add bundle/schema/jsonschema_for_docs.json
# since_versions.json was refreshed in place by the generator above.
git add bundle/schema/jsonschema_for_docs.json bundle/schema/since_versions.json

- name: Commit and push to docgen
working-directory: ${{ runner.temp }}/docgen
Expand All @@ -117,5 +132,5 @@ jobs:
fi
git config user.name "github-actions[bot]"
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
git commit -m "Update jsonschema_for_docs.json for ${TAG}"
git commit -m "Update jsonschema_for_docs.json and since_versions.json for ${TAG}"
git push origin HEAD:docgen
4 changes: 3 additions & 1 deletion bundle/internal/schema/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,9 @@ func generateSchema(workdir, outputFile string, docsMode bool) {
log.Fatal(err)
}

// In docs mode, add sinceVersion annotations by analyzing git history.
// In docs mode, add sinceVersion annotations. When DATABRICKS_SINCE_VERSIONS_FILE
// is set (by the update-schema-docs workflow) these come from the persisted,
// append-only state on docgen; otherwise they are computed from git history.
if docsMode {
sinceVersions, err := computeSinceVersions()
if err != nil {
Expand Down
131 changes: 128 additions & 3 deletions bundle/internal/schema/since_version.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,147 @@

import (
"encoding/json"
"errors"
"fmt"
"io/fs"
"maps"
"os"
"os/exec"
"strconv"
"strings"

"github.com/databricks/cli/libs/jsonschema"
)

// sinceVersionsStateEnv names the env var that points at the persisted,
// append-only since-versions state file. It is set by the update-schema-docs
// workflow to a checkout of the `docgen` branch, so the state is stored and
// updated there (never in the main source tree). When unset (local `task
// generate`, regular CI) since versions are computed from git history only and
// nothing is persisted.
const sinceVersionsStateEnv = "DATABRICKS_SINCE_VERSIONS_FILE"

// sinceVersionAliases maps a current "typePath.fieldName" key to the key it was
// previously known by. When a Go type is renamed, moved, or retyped (e.g. the
// shared Permission struct being split into per-resource typed structs), the new
// key would otherwise look brand new and be stamped with the current release
// version. Listing the rename here lets the new key inherit the original key's
// since version, keeping it stable across refactors.
//
// Example:
//
// "github.com/databricks/cli/bundle/config/resources.AppPermission.user_name":
// "github.com/databricks/cli/bundle/config/resources.Permission.user_name",
var sinceVersionAliases = map[string]string{}

// Version when bundle/schema/jsonschema.json was added to the repo.
var embeddedSchemaVersion = [3]int{0, 229, 0}

// computeSinceVersions computes when each field was first introduced by analyzing git history.
// It returns a map from "typePath.fieldName" to the version string (e.g., "v0.229.0").
// This function always recomputes versions at runtime without storing state.
// computeSinceVersions returns the "typePath.fieldName" -> version map used to
// annotate the schema.
//
// When DATABRICKS_SINCE_VERSIONS_FILE is unset, versions are computed purely
// from git history (the original behavior) and nothing is persisted.
//
// When it is set (by the update-schema-docs workflow, pointing at a docgen
// checkout), the state there is treated as append-only and authoritative:
// 1. Load the stored map (missing file is treated as empty — the first run
// seeds it).
// 2. Recompute first-observed versions from git history to discover newly
// added fields.
// 3. Merge: stored entries win, so a recorded version never changes even if a
// field's Go type is later renamed or retyped; brand-new fields take their
// computed version; renamed fields inherit via sinceVersionAliases.
// 4. Write the merged map back so newly discovered fields become frozen too.
// The workflow then commits it to docgen.
func computeSinceVersions() (map[string]string, error) {
computed, err := computeSinceVersionsFromHistory()

statePath := os.Getenv(sinceVersionsStateEnv)

Check failure on line 61 in bundle/internal/schema/since_version.go

View workflow job for this annotation

GitHub Actions / lint

use of `os.Getenv` forbidden because "Use env.Get(ctx) from the libs/env package instead of os.Getenv." (forbidigo)
if statePath == "" {
// No persisted state: legacy behavior (annotate from history, or surface
// the error so the caller skips annotation).
return computed, err
}

if err != nil {
// Without git history/tags we can still annotate from the stored state.
fmt.Printf("Warning: could not compute since versions from git history: %v\n", err)
computed = map[string]string{}
}

stored, err := loadStoredSinceVersions(statePath)
if err != nil {
return nil, err
}

merged := mergeSinceVersions(computed, stored, sinceVersionAliases)

if err := saveStoredSinceVersions(statePath, merged); err != nil {
return nil, fmt.Errorf("writing %s: %w", statePath, err)
}
return merged, nil
}

// mergeSinceVersions combines freshly computed versions with the stored map.
//
// Stored entries are authoritative and never overwritten (append-only), which is
// what makes versions stable across refactors. Fields not yet stored take their
// computed (first-observed) version. A renamed field whose new key is not yet
// stored inherits its previous key's version via aliases.
func mergeSinceVersions(computed, stored, aliases map[string]string) map[string]string {
result := make(map[string]string, len(computed)+len(stored))
maps.Copy(result, computed)

for newKey, oldKey := range aliases {
if _, frozen := stored[newKey]; frozen {
continue
}
if v, ok := stored[oldKey]; ok {
result[newKey] = v
} else if v, ok := computed[oldKey]; ok {
result[newKey] = v
}
}

// Stored wins: a recorded version is the canonical "first observed" answer.
maps.Copy(result, stored)
return result
}

// loadStoredSinceVersions reads the persisted since-version map. A missing file
// is not an error (returns an empty map) so the generator works on a fresh
// checkout that has not recorded versions yet.
func loadStoredSinceVersions(path string) (map[string]string, error) {
data, err := os.ReadFile(path)
if errors.Is(err, fs.ErrNotExist) {
return map[string]string{}, nil
}
if err != nil {
return nil, fmt.Errorf("reading %s: %w", path, err)
}
stored := map[string]string{}
if err := json.Unmarshal(data, &stored); err != nil {
return nil, fmt.Errorf("parsing %s: %w", path, err)
}
return stored, nil
}

// saveStoredSinceVersions writes the map back deterministically (sorted keys via
// json.MarshalIndent, trailing newline) so the committed file stays diff-stable.
func saveStoredSinceVersions(path string, versions map[string]string) error {
b, err := json.MarshalIndent(versions, "", " ")
if err != nil {
return err
}
b = append(b, '\n')
return os.WriteFile(path, b, 0o644)
}

// computeSinceVersionsFromHistory computes when each field was first introduced
// by analyzing git history. It returns a map from "typePath.fieldName" to the
// version string (e.g., "v0.229.0").
func computeSinceVersionsFromHistory() (map[string]string, error) {
versions, err := getVersionTags()
if err != nil {
return nil, fmt.Errorf("getting version tags: %w", err)
Expand Down
96 changes: 96 additions & 0 deletions bundle/internal/schema/since_version_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
package main

import (
"os"
"path/filepath"
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestMergeSinceVersionsStoredWins(t *testing.T) {
// A stored version is authoritative and must not be overwritten by a freshly
// computed (possibly drifted) value. This is what keeps versions stable when
// a type is refactored and history re-keys the field to a newer version.
computed := map[string]string{
"pkg.Type.field": "v0.300.0", // drifted forward by a refactor
"pkg.Type.new_field": "v0.310.0", // genuinely new, not yet stored
}
stored := map[string]string{
"pkg.Type.field": "v0.229.0",
}

merged := mergeSinceVersions(computed, stored, nil)

assert.Equal(t, "v0.229.0", merged["pkg.Type.field"], "stored version must win")
assert.Equal(t, "v0.310.0", merged["pkg.Type.new_field"], "new field keeps computed version")
}

func TestMergeSinceVersionsAliasInheritsOldVersion(t *testing.T) {
// A renamed/retyped field whose new key is not yet stored inherits the old
// key's version instead of being treated as brand new.
computed := map[string]string{
"pkg.AppPermission.user_name": "v0.247.0", // when the typed struct appeared
"pkg.Permission.user_name": "v0.229.0",
}
stored := map[string]string{
"pkg.Permission.user_name": "v0.229.0",
}
aliases := map[string]string{
"pkg.AppPermission.user_name": "pkg.Permission.user_name",
}

merged := mergeSinceVersions(computed, stored, aliases)

assert.Equal(t, "v0.229.0", merged["pkg.AppPermission.user_name"],
"renamed field must inherit the original key's version")
}

func TestMergeSinceVersionsAliasSkippedWhenAlreadyFrozen(t *testing.T) {
// Once the new key is stored, the alias is a no-op: the stored value stands.
computed := map[string]string{}
stored := map[string]string{
"pkg.AppPermission.user_name": "v0.247.0",
"pkg.Permission.user_name": "v0.229.0",
}
aliases := map[string]string{
"pkg.AppPermission.user_name": "pkg.Permission.user_name",
}

merged := mergeSinceVersions(computed, stored, aliases)

assert.Equal(t, "v0.247.0", merged["pkg.AppPermission.user_name"],
"a frozen key must not be rewritten by an alias")
}

func TestStoredSinceVersionsRoundTrip(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "since_versions.json")

want := map[string]string{"a.b": "v0.1.0", "c.d": "v0.2.0"}
require.NoError(t, saveStoredSinceVersions(path, want))

got, err := loadStoredSinceVersions(path)
require.NoError(t, err)
assert.Equal(t, want, got)
}

func TestLoadStoredSinceVersionsMissingFile(t *testing.T) {
// A fresh checkout (or docgen branch without the file yet) must not error —
// the first run seeds it.
got, err := loadStoredSinceVersions(filepath.Join(t.TempDir(), "does-not-exist.json"))
require.NoError(t, err)
assert.Empty(t, got)
}

func TestStoredSinceVersionsWriteIsCanonical(t *testing.T) {
// saveStoredSinceVersions must write sorted keys with a trailing newline so
// the file committed to docgen stays diff-stable across runs.
path := filepath.Join(t.TempDir(), "since_versions.json")
require.NoError(t, saveStoredSinceVersions(path, map[string]string{"b": "v0.2.0", "a": "v0.1.0"}))

got, err := os.ReadFile(path)
require.NoError(t, err)
assert.Equal(t, "{\n \"a\": \"v0.1.0\",\n \"b\": \"v0.2.0\"\n}\n", string(got))
}
Loading