From 9b3aed6ba279c0486bb9fe010d19ad41f19bf6ed Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 26 Mar 2026 15:14:37 +0100 Subject: [PATCH 1/7] Pin Alpine image digest and add SHA256 verification for TF provider download Co-authored-by: Isaac --- Dockerfile | 4 ++-- bundle/deploy/terraform/pkg.go | 15 ++++++++++----- bundle/deploy/terraform/pkg_test.go | 11 +++++++++++ bundle/internal/tf/codegen/generator/generator.go | 12 ++++++++---- bundle/internal/tf/codegen/schema/version.go | 9 +++++++++ bundle/internal/tf/codegen/templates/root.go.tmpl | 2 ++ bundle/internal/tf/schema/root.go | 2 ++ docker/setup.sh | 8 ++++++++ 8 files changed, 52 insertions(+), 11 deletions(-) diff --git a/Dockerfile b/Dockerfile index f48b90b437..0945680299 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM alpine:3.22 as builder +FROM alpine:3.22@sha256:55ae5d250caebc548793f321534bc6a8ef1d116f334f18f4ada1b2daad3251b2 as builder RUN ["apk", "add", "jq"] RUN ["apk", "add", "bash"] @@ -13,7 +13,7 @@ ARG ARCH RUN /build/docker/setup.sh # Start from a fresh base image, to remove any build artifacts and scripts. -FROM alpine:3.22 +FROM alpine:3.22@sha256:55ae5d250caebc548793f321534bc6a8ef1d116f334f18f4ada1b2daad3251b2 ENV DATABRICKS_TF_EXEC_PATH "/app/bin/terraform" ENV DATABRICKS_TF_CLI_CONFIG_FILE "/app/config/config.tfrc" diff --git a/bundle/deploy/terraform/pkg.go b/bundle/deploy/terraform/pkg.go index 6c8b108337..83ea796024 100644 --- a/bundle/deploy/terraform/pkg.go +++ b/bundle/deploy/terraform/pkg.go @@ -77,11 +77,12 @@ type Checksum struct { } type TerraformMetadata struct { - Version string `json:"version"` - Checksum Checksum `json:"checksum"` - ProviderHost string `json:"providerHost"` - ProviderSource string `json:"providerSource"` - ProviderVersion string `json:"providerVersion"` + Version string `json:"version"` + Checksum Checksum `json:"checksum"` + ProviderHost string `json:"providerHost"` + ProviderSource string `json:"providerSource"` + ProviderVersion string `json:"providerVersion"` + ProviderChecksum Checksum `json:"providerChecksum"` } func NewTerraformMetadata(ctx context.Context) (*TerraformMetadata, error) { @@ -98,6 +99,10 @@ func NewTerraformMetadata(ctx context.Context) (*TerraformMetadata, error) { ProviderHost: schema.ProviderHost, ProviderSource: schema.ProviderSource, ProviderVersion: schema.ProviderVersion, + ProviderChecksum: Checksum{ + LinuxAmd64: schema.ProviderChecksumLinuxAmd64, + LinuxArm64: schema.ProviderChecksumLinuxArm64, + }, }, nil } diff --git a/bundle/deploy/terraform/pkg_test.go b/bundle/deploy/terraform/pkg_test.go index 2b19e99234..f427112271 100644 --- a/bundle/deploy/terraform/pkg_test.go +++ b/bundle/deploy/terraform/pkg_test.go @@ -54,6 +54,17 @@ func TestTerraformArchiveChecksums(t *testing.T) { downloadAndChecksum(t, armUrl, tv.ChecksumLinuxArm64) } +func TestTerraformProviderArchiveChecksums(t *testing.T) { + metadata, err := NewTerraformMetadata(t.Context()) + require.NoError(t, err) + + amdUrl := fmt.Sprintf("https://github.com/databricks/terraform-provider-databricks/releases/download/v%s/terraform-provider-databricks_%s_linux_amd64.zip", metadata.ProviderVersion, metadata.ProviderVersion) + armUrl := fmt.Sprintf("https://github.com/databricks/terraform-provider-databricks/releases/download/v%s/terraform-provider-databricks_%s_linux_arm64.zip", metadata.ProviderVersion, metadata.ProviderVersion) + + downloadAndChecksum(t, amdUrl, metadata.ProviderChecksum.LinuxAmd64) + downloadAndChecksum(t, armUrl, metadata.ProviderChecksum.LinuxArm64) +} + func TestGetTerraformVersionDefault(t *testing.T) { // Verify that the default version is used tv, isDefault, err := GetTerraformVersion(t.Context()) diff --git a/bundle/internal/tf/codegen/generator/generator.go b/bundle/internal/tf/codegen/generator/generator.go index e135d13f9e..f96a9e4bc3 100644 --- a/bundle/internal/tf/codegen/generator/generator.go +++ b/bundle/internal/tf/codegen/generator/generator.go @@ -35,8 +35,10 @@ func (c *collection) Generate(path string) error { } type root struct { - OutputFile string - ProviderVersion string + OutputFile string + ProviderVersion string + ProviderChecksumLinuxAmd64 string + ProviderChecksumLinuxArm64 string } func (r *root) Generate(path string) error { @@ -147,8 +149,10 @@ func Run(ctx context.Context, schema *tfjson.ProviderSchema, path string) error // Generate root.go { r := &root{ - OutputFile: "root.go", - ProviderVersion: schemapkg.ProviderVersion, + OutputFile: "root.go", + ProviderVersion: schemapkg.ProviderVersion, + ProviderChecksumLinuxAmd64: schemapkg.ProviderChecksumLinuxAmd64, + ProviderChecksumLinuxArm64: schemapkg.ProviderChecksumLinuxArm64, } err := r.Generate(path) if err != nil { diff --git a/bundle/internal/tf/codegen/schema/version.go b/bundle/internal/tf/codegen/schema/version.go index 5c02827381..8ca5a2a6c8 100644 --- a/bundle/internal/tf/codegen/schema/version.go +++ b/bundle/internal/tf/codegen/schema/version.go @@ -1,3 +1,12 @@ package schema const ProviderVersion = "1.111.0" + +// Checksums for the Databricks Terraform provider archive. These are not used +// inside the CLI. They are co-located here to be output in the +// "databricks bundle debug terraform" output. Downstream applications like the +// CLI docker image use these checksums to verify the integrity of the downloaded +// provider archive. Please update these when the provider version is bumped. +// The checksums are obtained from https://github.com/databricks/terraform-provider-databricks/releases. +const ProviderChecksumLinuxAmd64 = "c1b46bbaf5c4a0b253309dad072e05025e24731536719d4408bacd48dc0ccfd9" +const ProviderChecksumLinuxArm64 = "ce379c424009b01ec4762dee4d0db27cfc554d921b55a0af8e4203b3652259e9" diff --git a/bundle/internal/tf/codegen/templates/root.go.tmpl b/bundle/internal/tf/codegen/templates/root.go.tmpl index b5c53c1615..fc356042cd 100644 --- a/bundle/internal/tf/codegen/templates/root.go.tmpl +++ b/bundle/internal/tf/codegen/templates/root.go.tmpl @@ -22,6 +22,8 @@ type Root struct { const ProviderHost = "registry.terraform.io" const ProviderSource = "databricks/databricks" const ProviderVersion = "{{ .ProviderVersion }}" +const ProviderChecksumLinuxAmd64 = "{{ .ProviderChecksumLinuxAmd64 }}" +const ProviderChecksumLinuxArm64 = "{{ .ProviderChecksumLinuxArm64 }}" func NewRoot() *Root { return &Root{ diff --git a/bundle/internal/tf/schema/root.go b/bundle/internal/tf/schema/root.go index ed77d10967..4c19c2c4a4 100644 --- a/bundle/internal/tf/schema/root.go +++ b/bundle/internal/tf/schema/root.go @@ -22,6 +22,8 @@ type Root struct { const ProviderHost = "registry.terraform.io" const ProviderSource = "databricks/databricks" const ProviderVersion = "1.111.0" +const ProviderChecksumLinuxAmd64 = "c1b46bbaf5c4a0b253309dad072e05025e24731536719d4408bacd48dc0ccfd9" +const ProviderChecksumLinuxArm64 = "ce379c424009b01ec4762dee4d0db27cfc554d921b55a0af8e4203b3652259e9" func NewRoot() *Root { return &Root{ diff --git a/docker/setup.sh b/docker/setup.sh index 0dc06ce1e2..d6e6e3b4ad 100755 --- a/docker/setup.sh +++ b/docker/setup.sh @@ -30,3 +30,11 @@ mv zip/terraform/terraform /app/bin/terraform TF_PROVIDER_NAME=terraform-provider-databricks_${DATABRICKS_TF_PROVIDER_VERSION}_linux_${ARCH}.zip mkdir -p /app/providers/registry.terraform.io/databricks/databricks wget https://github.com/databricks/terraform-provider-databricks/releases/download/v${DATABRICKS_TF_PROVIDER_VERSION}/${TF_PROVIDER_NAME} -O /app/providers/registry.terraform.io/databricks/databricks/${TF_PROVIDER_NAME} + +# Verify the provider checksum. +EXPECTED_PROVIDER_CHECKSUM="$(/app/databricks bundle debug terraform --output json | jq -r .terraform.providerChecksum.linux_$ARCH)" +COMPUTED_PROVIDER_CHECKSUM=$(sha256sum /app/providers/registry.terraform.io/databricks/databricks/${TF_PROVIDER_NAME} | awk '{ print $1 }') +if [ "$COMPUTED_PROVIDER_CHECKSUM" != "$EXPECTED_PROVIDER_CHECKSUM" ]; then + echo "Checksum mismatch for Terraform provider. Version: $DATABRICKS_TF_PROVIDER_VERSION, Arch: $ARCH, Expected checksum: $EXPECTED_PROVIDER_CHECKSUM, Computed checksum: $COMPUTED_PROVIDER_CHECKSUM." + exit 1 +fi From fc48e0d7e836132e861e4860a3092c330c2c340f Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 26 Mar 2026 15:29:08 +0100 Subject: [PATCH 2/7] Auto-fetch provider checksums during codegen instead of hardcoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The codegen tool (`go run .`) now automatically downloads the SHA256SUMS file from the GitHub release and embeds the checksums into the generated root.go. When bumping the provider version, developers only need to update version.go — checksums are resolved automatically. Co-authored-by: Isaac --- bundle/internal/tf/codegen/README.md | 1 + .../tf/codegen/generator/generator.go | 6 +- bundle/internal/tf/codegen/main.go | 12 +++- bundle/internal/tf/codegen/schema/checksum.go | 67 +++++++++++++++++++ bundle/internal/tf/codegen/schema/version.go | 9 --- 5 files changed, 81 insertions(+), 14 deletions(-) create mode 100644 bundle/internal/tf/codegen/schema/checksum.go diff --git a/bundle/internal/tf/codegen/README.md b/bundle/internal/tf/codegen/README.md index b1f8a33a8b..f309785a21 100644 --- a/bundle/internal/tf/codegen/README.md +++ b/bundle/internal/tf/codegen/README.md @@ -7,6 +7,7 @@ The entry point for this tool is `.`. It uses `./tmp` a temporary data directory and `../schema` as output directory. It automatically installs the Terraform binary as well as the Databricks Terraform provider. +It also fetches SHA256 checksums for the provider archive from GitHub releases. Run with: diff --git a/bundle/internal/tf/codegen/generator/generator.go b/bundle/internal/tf/codegen/generator/generator.go index f96a9e4bc3..47af677c00 100644 --- a/bundle/internal/tf/codegen/generator/generator.go +++ b/bundle/internal/tf/codegen/generator/generator.go @@ -53,7 +53,7 @@ func (r *root) Generate(path string) error { return tmpl.Execute(f, r) } -func Run(ctx context.Context, schema *tfjson.ProviderSchema, path string) error { +func Run(ctx context.Context, schema *tfjson.ProviderSchema, checksums *schemapkg.ProviderChecksums, path string) error { // Generate types for resources var resources []*namedBlock for _, k := range sortKeys(schema.ResourceSchemas) { @@ -151,8 +151,8 @@ func Run(ctx context.Context, schema *tfjson.ProviderSchema, path string) error r := &root{ OutputFile: "root.go", ProviderVersion: schemapkg.ProviderVersion, - ProviderChecksumLinuxAmd64: schemapkg.ProviderChecksumLinuxAmd64, - ProviderChecksumLinuxArm64: schemapkg.ProviderChecksumLinuxArm64, + ProviderChecksumLinuxAmd64: checksums.LinuxAmd64, + ProviderChecksumLinuxArm64: checksums.LinuxArm64, } err := r.Generate(path) if err != nil { diff --git a/bundle/internal/tf/codegen/main.go b/bundle/internal/tf/codegen/main.go index e4982c2bc8..bc7ce6663a 100644 --- a/bundle/internal/tf/codegen/main.go +++ b/bundle/internal/tf/codegen/main.go @@ -11,12 +11,20 @@ import ( func main() { ctx := context.Background() - schema, err := schema.Load(ctx) + s, err := schema.Load(ctx) if err != nil { log.Fatal(err) } - err = generator.Run(ctx, schema, "../schema") + log.Printf("fetching provider checksums for v%s", schema.ProviderVersion) + checksums, err := schema.FetchProviderChecksums(schema.ProviderVersion) + if err != nil { + log.Fatal(err) + } + log.Printf(" linux_amd64: %s", checksums.LinuxAmd64) + log.Printf(" linux_arm64: %s", checksums.LinuxArm64) + + err = generator.Run(ctx, s, checksums, "../schema") if err != nil { log.Fatal(err) } diff --git a/bundle/internal/tf/codegen/schema/checksum.go b/bundle/internal/tf/codegen/schema/checksum.go new file mode 100644 index 0000000000..ef266740df --- /dev/null +++ b/bundle/internal/tf/codegen/schema/checksum.go @@ -0,0 +1,67 @@ +package schema + +import ( + "bufio" + "fmt" + "net/http" + "strings" +) + +// ProviderChecksums holds the SHA256 checksums for the Databricks Terraform +// provider archive for supported Linux architectures. +type ProviderChecksums struct { + LinuxAmd64 string + LinuxArm64 string +} + +// FetchProviderChecksums downloads the SHA256SUMS file from the GitHub release +// for the given provider version and extracts checksums for the linux_amd64 and +// linux_arm64 archives. +// https://github.com/databricks/terraform-provider-databricks/releases +func FetchProviderChecksums(version string) (*ProviderChecksums, error) { + url := fmt.Sprintf( + "https://github.com/databricks/terraform-provider-databricks/releases/download/v%s/terraform-provider-databricks_%s_SHA256SUMS", + version, version, + ) + + resp, err := http.Get(url) + if err != nil { + return nil, fmt.Errorf("downloading SHA256SUMS for provider v%s: %w", version, err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("downloading SHA256SUMS for provider v%s: HTTP %s", version, resp.Status) + } + + checksums := &ProviderChecksums{} + amd64Suffix := fmt.Sprintf("terraform-provider-databricks_%s_linux_amd64.zip", version) + arm64Suffix := fmt.Sprintf("terraform-provider-databricks_%s_linux_arm64.zip", version) + + scanner := bufio.NewScanner(resp.Body) + for scanner.Scan() { + line := scanner.Text() + parts := strings.Fields(line) + if len(parts) != 2 { + continue + } + switch parts[1] { + case amd64Suffix: + checksums.LinuxAmd64 = parts[0] + case arm64Suffix: + checksums.LinuxArm64 = parts[0] + } + } + if err := scanner.Err(); err != nil { + return nil, fmt.Errorf("reading SHA256SUMS for provider v%s: %w", version, err) + } + + if checksums.LinuxAmd64 == "" { + return nil, fmt.Errorf("checksum not found for %s in SHA256SUMS", amd64Suffix) + } + if checksums.LinuxArm64 == "" { + return nil, fmt.Errorf("checksum not found for %s in SHA256SUMS", arm64Suffix) + } + + return checksums, nil +} diff --git a/bundle/internal/tf/codegen/schema/version.go b/bundle/internal/tf/codegen/schema/version.go index 8ca5a2a6c8..5c02827381 100644 --- a/bundle/internal/tf/codegen/schema/version.go +++ b/bundle/internal/tf/codegen/schema/version.go @@ -1,12 +1,3 @@ package schema const ProviderVersion = "1.111.0" - -// Checksums for the Databricks Terraform provider archive. These are not used -// inside the CLI. They are co-located here to be output in the -// "databricks bundle debug terraform" output. Downstream applications like the -// CLI docker image use these checksums to verify the integrity of the downloaded -// provider archive. Please update these when the provider version is bumped. -// The checksums are obtained from https://github.com/databricks/terraform-provider-databricks/releases. -const ProviderChecksumLinuxAmd64 = "c1b46bbaf5c4a0b253309dad072e05025e24731536719d4408bacd48dc0ccfd9" -const ProviderChecksumLinuxArm64 = "ce379c424009b01ec4762dee4d0db27cfc554d921b55a0af8e4203b3652259e9" From cbbc9a39928868708d44191d810ca4be8e89e8f8 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 26 Mar 2026 15:52:38 +0100 Subject: [PATCH 3/7] Skip checksum download tests in short mode These tests download large archives from the internet. Gate them behind `testing.Short()` so they're skipped during normal CI (`make test`) and only run in nightly/long test sessions. Co-authored-by: Isaac --- bundle/deploy/terraform/pkg_test.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/bundle/deploy/terraform/pkg_test.go b/bundle/deploy/terraform/pkg_test.go index f427112271..79f332151f 100644 --- a/bundle/deploy/terraform/pkg_test.go +++ b/bundle/deploy/terraform/pkg_test.go @@ -44,6 +44,10 @@ func downloadAndChecksum(t *testing.T, url, expectedChecksum string) { } func TestTerraformArchiveChecksums(t *testing.T) { + if testing.Short() { + t.Skip("skipping slow test in short mode") + } + tv, isDefault, err := GetTerraformVersion(t.Context()) require.NoError(t, err) assert.True(t, isDefault) @@ -55,6 +59,10 @@ func TestTerraformArchiveChecksums(t *testing.T) { } func TestTerraformProviderArchiveChecksums(t *testing.T) { + if testing.Short() { + t.Skip("skipping slow test in short mode") + } + metadata, err := NewTerraformMetadata(t.Context()) require.NoError(t, err) From cf176528f26a59a66b0804ba22d07e44681275fe Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 26 Mar 2026 15:55:27 +0100 Subject: [PATCH 4/7] Move provider checksum verification from unit test to codegen sanity check Instead of a separate test that downloads large archives on every test run, verify the checksum inline during codegen: FetchProviderChecksums now downloads the linux_amd64 zip and verifies it matches the parsed SHA256SUMS entry. This runs once during `go run .` (provider version bump) rather than on every `make test`. Co-authored-by: Isaac --- bundle/deploy/terraform/pkg_test.go | 14 ------ bundle/internal/tf/codegen/schema/checksum.go | 46 ++++++++++++++++++- 2 files changed, 45 insertions(+), 15 deletions(-) diff --git a/bundle/deploy/terraform/pkg_test.go b/bundle/deploy/terraform/pkg_test.go index 79f332151f..453ea15e98 100644 --- a/bundle/deploy/terraform/pkg_test.go +++ b/bundle/deploy/terraform/pkg_test.go @@ -58,20 +58,6 @@ func TestTerraformArchiveChecksums(t *testing.T) { downloadAndChecksum(t, armUrl, tv.ChecksumLinuxArm64) } -func TestTerraformProviderArchiveChecksums(t *testing.T) { - if testing.Short() { - t.Skip("skipping slow test in short mode") - } - - metadata, err := NewTerraformMetadata(t.Context()) - require.NoError(t, err) - - amdUrl := fmt.Sprintf("https://github.com/databricks/terraform-provider-databricks/releases/download/v%s/terraform-provider-databricks_%s_linux_amd64.zip", metadata.ProviderVersion, metadata.ProviderVersion) - armUrl := fmt.Sprintf("https://github.com/databricks/terraform-provider-databricks/releases/download/v%s/terraform-provider-databricks_%s_linux_arm64.zip", metadata.ProviderVersion, metadata.ProviderVersion) - - downloadAndChecksum(t, amdUrl, metadata.ProviderChecksum.LinuxAmd64) - downloadAndChecksum(t, armUrl, metadata.ProviderChecksum.LinuxArm64) -} func TestGetTerraformVersionDefault(t *testing.T) { // Verify that the default version is used diff --git a/bundle/internal/tf/codegen/schema/checksum.go b/bundle/internal/tf/codegen/schema/checksum.go index ef266740df..5927319e93 100644 --- a/bundle/internal/tf/codegen/schema/checksum.go +++ b/bundle/internal/tf/codegen/schema/checksum.go @@ -2,7 +2,11 @@ package schema import ( "bufio" + "crypto/sha256" + "encoding/hex" "fmt" + "io" + "log" "net/http" "strings" ) @@ -16,7 +20,8 @@ type ProviderChecksums struct { // FetchProviderChecksums downloads the SHA256SUMS file from the GitHub release // for the given provider version and extracts checksums for the linux_amd64 and -// linux_arm64 archives. +// linux_arm64 archives. It also downloads the linux_amd64 zip to verify that +// the parsed checksum is correct. // https://github.com/databricks/terraform-provider-databricks/releases func FetchProviderChecksums(version string) (*ProviderChecksums, error) { url := fmt.Sprintf( @@ -63,5 +68,44 @@ func FetchProviderChecksums(version string) (*ProviderChecksums, error) { return nil, fmt.Errorf("checksum not found for %s in SHA256SUMS", arm64Suffix) } + // Sanity check: download the linux_amd64 zip and verify the checksum matches. + err = verifyProviderChecksum(version, "linux_amd64", checksums.LinuxAmd64) + if err != nil { + return nil, err + } + return checksums, nil } + +// verifyProviderChecksum downloads the provider zip for the given platform and +// verifies it matches the expected SHA256 checksum. +func verifyProviderChecksum(version, platform, expectedChecksum string) error { + url := fmt.Sprintf( + "https://github.com/databricks/terraform-provider-databricks/releases/download/v%s/terraform-provider-databricks_%s_%s.zip", + version, version, platform, + ) + + log.Printf("verifying checksum for %s provider archive", platform) + resp, err := http.Get(url) + if err != nil { + return fmt.Errorf("downloading provider archive for checksum verification: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("downloading provider archive for checksum verification: HTTP %s", resp.Status) + } + + hash := sha256.New() + if _, err := io.Copy(hash, resp.Body); err != nil { + return fmt.Errorf("computing checksum for provider archive: %w", err) + } + + actualChecksum := hex.EncodeToString(hash.Sum(nil)) + if actualChecksum != expectedChecksum { + return fmt.Errorf("checksum mismatch for %s provider archive: expected %s, got %s", platform, expectedChecksum, actualChecksum) + } + + log.Printf("checksum verified for %s provider archive", platform) + return nil +} From 54b9d8a77402f076dbf0e9e208957341c9ba31da Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 26 Mar 2026 15:56:48 +0100 Subject: [PATCH 5/7] Revert changes to pkg_test.go Co-authored-by: Isaac --- bundle/deploy/terraform/pkg_test.go | 5 ----- 1 file changed, 5 deletions(-) diff --git a/bundle/deploy/terraform/pkg_test.go b/bundle/deploy/terraform/pkg_test.go index 453ea15e98..2b19e99234 100644 --- a/bundle/deploy/terraform/pkg_test.go +++ b/bundle/deploy/terraform/pkg_test.go @@ -44,10 +44,6 @@ func downloadAndChecksum(t *testing.T, url, expectedChecksum string) { } func TestTerraformArchiveChecksums(t *testing.T) { - if testing.Short() { - t.Skip("skipping slow test in short mode") - } - tv, isDefault, err := GetTerraformVersion(t.Context()) require.NoError(t, err) assert.True(t, isDefault) @@ -58,7 +54,6 @@ func TestTerraformArchiveChecksums(t *testing.T) { downloadAndChecksum(t, armUrl, tv.ChecksumLinuxArm64) } - func TestGetTerraformVersionDefault(t *testing.T) { // Verify that the default version is used tv, isDefault, err := GetTerraformVersion(t.Context()) From faf09498b5da61b46839afefd1397715da2dba6b Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 26 Mar 2026 15:57:23 +0100 Subject: [PATCH 6/7] Verify both linux_amd64 and linux_arm64 provider checksums during codegen Co-authored-by: Isaac --- bundle/internal/tf/codegen/schema/checksum.go | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/bundle/internal/tf/codegen/schema/checksum.go b/bundle/internal/tf/codegen/schema/checksum.go index 5927319e93..7cc0678eae 100644 --- a/bundle/internal/tf/codegen/schema/checksum.go +++ b/bundle/internal/tf/codegen/schema/checksum.go @@ -20,8 +20,8 @@ type ProviderChecksums struct { // FetchProviderChecksums downloads the SHA256SUMS file from the GitHub release // for the given provider version and extracts checksums for the linux_amd64 and -// linux_arm64 archives. It also downloads the linux_amd64 zip to verify that -// the parsed checksum is correct. +// linux_arm64 archives. It also downloads both zips to verify that the parsed +// checksums are correct. // https://github.com/databricks/terraform-provider-databricks/releases func FetchProviderChecksums(version string) (*ProviderChecksums, error) { url := fmt.Sprintf( @@ -68,11 +68,15 @@ func FetchProviderChecksums(version string) (*ProviderChecksums, error) { return nil, fmt.Errorf("checksum not found for %s in SHA256SUMS", arm64Suffix) } - // Sanity check: download the linux_amd64 zip and verify the checksum matches. + // Sanity check: download both zips and verify the checksums match. err = verifyProviderChecksum(version, "linux_amd64", checksums.LinuxAmd64) if err != nil { return nil, err } + err = verifyProviderChecksum(version, "linux_arm64", checksums.LinuxArm64) + if err != nil { + return nil, err + } return checksums, nil } From 3c456c1ce80cf7874a87fae0252cc805760831c3 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 26 Mar 2026 17:04:44 +0100 Subject: [PATCH 7/7] Add bundle deployment telemetry for variables, validation gap, and dependency graph Add several new telemetry signals to improve understanding of DABs usage patterns: - Detect nested variable references (var-in-var patterns like ${var.foo_${var.bar}}) - Track variable defaults count, lookup type distribution, override sources (env var, file, CLI flag), and resolution depth - Track validation-pass-deploy-fail gap via "validation_passed" metric with deferred telemetry logging - Track direct engine deployment plan metrics: action types, resource counts, dependency graph depth/width - Remove unused telemetry fields: SetFields, TerraformStateSizeBytes, FromWebTerminal Co-authored-by: Isaac --- .../telemetry/deploy-compute-type/output.txt | 8 + .../telemetry/deploy-experimental/output.txt | 4 + .../deploy-name-prefix/custom/output.txt | 4 + .../mode-development/output.txt | 4 + .../telemetry/deploy-whl-artifacts/output.txt | 8 + .../bundle/telemetry/deploy/out.telemetry.txt | 4 + bundle/bundle.go | 1 + .../mutator/resolve_variable_references.go | 38 ++ .../resolve_variable_references_test.go | 85 ++++ bundle/config/mutator/set_variables.go | 68 +++- bundle/config/mutator/set_variables_test.go | 92 ++++- bundle/deployplan/telemetry.go | 165 ++++++++ bundle/deployplan/telemetry_test.go | 377 ++++++++++++++++++ bundle/phases/deploy.go | 10 + bundle/phases/telemetry.go | 49 +++ cmd/bundle/utils/process.go | 5 + libs/dyn/dynvar/ref.go | 10 + libs/dyn/dynvar/ref_test.go | 20 + libs/telemetry/protos/bundle_deploy.go | 19 +- libs/telemetry/protos/databricks_cli_log.go | 3 - 20 files changed, 937 insertions(+), 37 deletions(-) create mode 100644 bundle/deployplan/telemetry.go create mode 100644 bundle/deployplan/telemetry_test.go diff --git a/acceptance/bundle/telemetry/deploy-compute-type/output.txt b/acceptance/bundle/telemetry/deploy-compute-type/output.txt index a424df8ce1..3b08334fc7 100644 --- a/acceptance/bundle/telemetry/deploy-compute-type/output.txt +++ b/acceptance/bundle/telemetry/deploy-compute-type/output.txt @@ -37,6 +37,10 @@ Deployment complete! "key": "python_wheel_wrapper_is_set", "value": false }, + { + "key": "validation_passed", + "value": true + }, { "key": "skip_artifact_cleanup", "value": false @@ -79,6 +83,10 @@ Deployment complete! "key": "python_wheel_wrapper_is_set", "value": false }, + { + "key": "validation_passed", + "value": true + }, { "key": "skip_artifact_cleanup", "value": false diff --git a/acceptance/bundle/telemetry/deploy-experimental/output.txt b/acceptance/bundle/telemetry/deploy-experimental/output.txt index d96e688b0a..1aff87684d 100644 --- a/acceptance/bundle/telemetry/deploy-experimental/output.txt +++ b/acceptance/bundle/telemetry/deploy-experimental/output.txt @@ -36,6 +36,10 @@ Deployment complete! "key": "python_wheel_wrapper_is_set", "value": false }, + { + "key": "validation_passed", + "value": true + }, { "key": "skip_artifact_cleanup", "value": false diff --git a/acceptance/bundle/telemetry/deploy-name-prefix/custom/output.txt b/acceptance/bundle/telemetry/deploy-name-prefix/custom/output.txt index 31ff8e9cf7..681e1abbdc 100644 --- a/acceptance/bundle/telemetry/deploy-name-prefix/custom/output.txt +++ b/acceptance/bundle/telemetry/deploy-name-prefix/custom/output.txt @@ -32,6 +32,10 @@ Deployment complete! "key": "python_wheel_wrapper_is_set", "value": false }, + { + "key": "validation_passed", + "value": true + }, { "key": "skip_artifact_cleanup", "value": false diff --git a/acceptance/bundle/telemetry/deploy-name-prefix/mode-development/output.txt b/acceptance/bundle/telemetry/deploy-name-prefix/mode-development/output.txt index 39b671bec3..e9e8cf2a50 100644 --- a/acceptance/bundle/telemetry/deploy-name-prefix/mode-development/output.txt +++ b/acceptance/bundle/telemetry/deploy-name-prefix/mode-development/output.txt @@ -32,6 +32,10 @@ Deployment complete! "key": "python_wheel_wrapper_is_set", "value": false }, + { + "key": "validation_passed", + "value": true + }, { "key": "skip_artifact_cleanup", "value": false diff --git a/acceptance/bundle/telemetry/deploy-whl-artifacts/output.txt b/acceptance/bundle/telemetry/deploy-whl-artifacts/output.txt index a9b8ce4ae6..5ff3b768ad 100644 --- a/acceptance/bundle/telemetry/deploy-whl-artifacts/output.txt +++ b/acceptance/bundle/telemetry/deploy-whl-artifacts/output.txt @@ -36,6 +36,10 @@ Deployment complete! "key": "python_wheel_wrapper_is_set", "value": false }, + { + "key": "validation_passed", + "value": true + }, { "key": "skip_artifact_cleanup", "value": false @@ -80,6 +84,10 @@ Deployment complete! "key": "python_wheel_wrapper_is_set", "value": true }, + { + "key": "validation_passed", + "value": true + }, { "key": "skip_artifact_cleanup", "value": true diff --git a/acceptance/bundle/telemetry/deploy/out.telemetry.txt b/acceptance/bundle/telemetry/deploy/out.telemetry.txt index f945233dd1..7bd3c04fda 100644 --- a/acceptance/bundle/telemetry/deploy/out.telemetry.txt +++ b/acceptance/bundle/telemetry/deploy/out.telemetry.txt @@ -66,6 +66,10 @@ "key": "python_wheel_wrapper_is_set", "value": false }, + { + "key": "validation_passed", + "value": true + }, { "key": "skip_artifact_cleanup", "value": false diff --git a/bundle/bundle.go b/bundle/bundle.go index 97824eb839..5145dd3aba 100644 --- a/bundle/bundle.go +++ b/bundle/bundle.go @@ -56,6 +56,7 @@ type Metrics struct { PythonUpdatedResourcesCount int64 ExecutionTimes []protos.IntMapEntry LocalCacheMeasurementsMs []protos.IntMapEntry // Local cache measurements stored as milliseconds + DeployPlanMetrics []protos.IntMapEntry // Deployment plan and graph metrics (direct engine) } // SetBoolValue sets the value of a boolean metric. diff --git a/bundle/config/mutator/resolve_variable_references.go b/bundle/config/mutator/resolve_variable_references.go index 9868b4fb95..566e9a4911 100644 --- a/bundle/config/mutator/resolve_variable_references.go +++ b/bundle/config/mutator/resolve_variable_references.go @@ -150,11 +150,18 @@ func (m *resolveVariableReferences) Apply(ctx context.Context, b *bundle.Bundle) // We rewrite it here to make the resolution logic simpler. varPath := dyn.NewPath(dyn.Key("var")) + // Detect nested variable references like ${var.foo_${var.bar}} and log + // a telemetry event if found. These patterns are not supported by the + // interpolation regex and silently fail to resolve. + m.detectNestedVariableReferences(b) + var diags diag.Diagnostics maxRounds := 1 + m.extraRounds + roundsUsed := 0 for round := range maxRounds { hasUpdates, newDiags := m.resolveOnce(b, prefixes, varPath) + roundsUsed = round + 1 diags = diags.Extend(newDiags) @@ -180,6 +187,13 @@ func (m *resolveVariableReferences) Apply(ctx context.Context, b *bundle.Bundle) b.Metrics.SetBoolValue("artifacts_reference_used", true) } + if roundsUsed > 1 { + b.Metrics.SetBoolValue("variable_resolution_rounds_gt_1", true) + } + if roundsUsed > 3 { + b.Metrics.SetBoolValue("variable_resolution_rounds_gt_3", true) + } + return diags } @@ -294,6 +308,30 @@ func (m *resolveVariableReferences) selectivelyMutate(b *bundle.Bundle, fn func( }) } +// errNestedVarRefFound is a sentinel error used to short-circuit WalkReadOnly +// once a nested variable reference is detected. +var errNestedVarRefFound = errors.New("nested variable reference found") + +// detectNestedVariableReferences walks the bundle configuration and checks for +// nested variable references like ${var.foo_${var.bar}}. These patterns are not +// supported and are tracked via telemetry to understand how common they are. +func (m *resolveVariableReferences) detectNestedVariableReferences(b *bundle.Bundle) { + err := dyn.WalkReadOnly(b.Config.Value(), func(_ dyn.Path, v dyn.Value) error { + s, ok := v.AsString() + if !ok { + return nil + } + + if dynvar.ContainsNestedVariableReference(s) { + return errNestedVarRefFound + } + return nil + }) + if err == errNestedVarRefFound { + b.Metrics.SetBoolValue("nested_var_reference_used", true) + } +} + func getAllKeys(root dyn.Value) ([]string, error) { var keys []string diff --git a/bundle/config/mutator/resolve_variable_references_test.go b/bundle/config/mutator/resolve_variable_references_test.go index 876980e948..6be86be666 100644 --- a/bundle/config/mutator/resolve_variable_references_test.go +++ b/bundle/config/mutator/resolve_variable_references_test.go @@ -6,10 +6,46 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/bundle/config/variable" + "github.com/databricks/cli/libs/telemetry/protos" "github.com/databricks/databricks-sdk-go/service/pipelines" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) +func TestResolveVariableReferencesDetectsNestedVarRef(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Bundle: config.Bundle{ + Name: "${var.env_${var.region}}", + }, + }, + } + + diags := bundle.Apply(t.Context(), b, ResolveVariableReferencesWithoutResources()) + // The nested reference won't resolve, but we should still detect it. + _ = diags + + assert.Contains(t, b.Metrics.BoolValues, protos.BoolMapEntry{Key: "nested_var_reference_used", Value: true}) +} + +func TestResolveVariableReferencesNoNestedVarRef(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Bundle: config.Bundle{ + Name: "${var.env}", + }, + }, + } + + diags := bundle.Apply(t.Context(), b, ResolveVariableReferencesWithoutResources()) + _ = diags + + for _, entry := range b.Metrics.BoolValues { + assert.NotEqual(t, "nested_var_reference_used", entry.Key) + } +} + func TestResolveVariableReferencesWithSourceLinkedDeployment(t *testing.T) { testCases := []struct { enabled bool @@ -63,3 +99,52 @@ func TestResolveVariableReferencesWithSourceLinkedDeployment(t *testing.T) { testCase.assert(t, b) } } + +func TestResolveVariableReferencesRoundsNoReferences(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Bundle: config.Bundle{ + Name: "literal-name", + }, + }, + } + + diags := bundle.Apply(t.Context(), b, ResolveVariableReferencesWithoutResources()) + require.NoError(t, diags.Error()) + + // No references means a single round with no updates, so gt_1 should not be set. + for _, entry := range b.Metrics.BoolValues { + assert.NotEqual(t, "variable_resolution_rounds_gt_1", entry.Key) + assert.NotEqual(t, "variable_resolution_rounds_gt_3", entry.Key) + } +} + +func TestResolveVariableReferencesRoundsGt1MultiRound(t *testing.T) { + // Set up a chain: bundle.name -> var.a -> var.b -> literal. + // This requires 2 rounds to fully resolve. + b := &bundle.Bundle{ + Config: config.Root{ + Bundle: config.Bundle{ + Name: "${var.a}", + }, + Variables: map[string]*variable.Variable{ + "a": { + Value: "${var.b}", + }, + "b": { + Value: "final", + }, + }, + }, + } + + diags := bundle.Apply(t.Context(), b, ResolveVariableReferencesWithoutResources()) + require.NoError(t, diags.Error()) + assert.Equal(t, "final", b.Config.Bundle.Name) + + assert.Contains(t, b.Metrics.BoolValues, protos.BoolMapEntry{Key: "variable_resolution_rounds_gt_1", Value: true}) + // 2 rounds should not trigger gt_3. + for _, entry := range b.Metrics.BoolValues { + assert.NotEqual(t, "variable_resolution_rounds_gt_3", entry.Key) + } +} diff --git a/bundle/config/mutator/set_variables.go b/bundle/config/mutator/set_variables.go index 3038b05aca..a4d913d1ad 100644 --- a/bundle/config/mutator/set_variables.go +++ b/bundle/config/mutator/set_variables.go @@ -30,30 +30,42 @@ func getDefaultVariableFilePath(target string) string { return ".databricks/bundle/" + target + "/variable-overrides.json" } -func setVariable(ctx context.Context, v dyn.Value, variable *variable.Variable, name string, fileDefault dyn.Value) (dyn.Value, error) { - // case: variable already has value initialized, so skip +// variableOverrideSource tracks which override method resolved a variable's value. +type variableOverrideSource int + +const ( + variableOverrideSourceCLI variableOverrideSource = iota // --var flag (value already set) + variableOverrideSourceEnvVar // BUNDLE_VAR_* environment variable + variableOverrideSourceFile // variable-overrides.json + variableOverrideSourceDefault // default value from config + variableOverrideSourceNone // no value resolved (lookup or error) +) + +func setVariable(ctx context.Context, v dyn.Value, variable *variable.Variable, name string, fileDefault dyn.Value) (dyn.Value, variableOverrideSource, error) { + // case: variable already has value initialized, so skip. + // This happens when the value is set via the --var CLI flag. if variable.HasValue() { - return v, nil + return v, variableOverrideSourceCLI, nil } // case: read and set variable value from process environment envVarName := bundleVarPrefix + name if val, ok := env.Lookup(ctx, envVarName); ok { if variable.IsComplex() { - return dyn.InvalidValue, fmt.Errorf(`setting via environment variables (%s) is not supported for complex variable %s`, envVarName, name) + return dyn.InvalidValue, variableOverrideSourceNone, fmt.Errorf(`setting via environment variables (%s) is not supported for complex variable %s`, envVarName, name) } v, err := dyn.Set(v, "value", dyn.V(val)) if err != nil { - return dyn.InvalidValue, fmt.Errorf(`failed to assign value "%s" to variable %s from environment variable %s with error: %v`, val, name, envVarName, err) + return dyn.InvalidValue, variableOverrideSourceNone, fmt.Errorf(`failed to assign value "%s" to variable %s from environment variable %s with error: %v`, val, name, envVarName, err) } - return v, nil + return v, variableOverrideSourceEnvVar, nil } // case: Defined a variable for named lookup for a resource // It will be resolved later in ResolveResourceReferences mutator if variable.Lookup != nil { - return v, nil + return v, variableOverrideSourceNone, nil } // case: Set the variable to the default value from the variable file @@ -62,36 +74,36 @@ func setVariable(ctx context.Context, v dyn.Value, variable *variable.Variable, hasComplexValue := fileDefault.Kind() == dyn.KindMap || fileDefault.Kind() == dyn.KindSequence if hasComplexType && !hasComplexValue { - return dyn.InvalidValue, fmt.Errorf(`variable %s is of type complex, but the value in the variable file is not a complex type`, name) + return dyn.InvalidValue, variableOverrideSourceNone, fmt.Errorf(`variable %s is of type complex, but the value in the variable file is not a complex type`, name) } if !hasComplexType && hasComplexValue { - return dyn.InvalidValue, fmt.Errorf(`variable %s is not of type complex, but the value in the variable file is a complex type`, name) + return dyn.InvalidValue, variableOverrideSourceNone, fmt.Errorf(`variable %s is not of type complex, but the value in the variable file is a complex type`, name) } v, err := dyn.Set(v, "value", fileDefault) if err != nil { - return dyn.InvalidValue, fmt.Errorf(`failed to assign default value from variable file to variable %s with error: %v`, name, err) + return dyn.InvalidValue, variableOverrideSourceNone, fmt.Errorf(`failed to assign default value from variable file to variable %s with error: %v`, name, err) } - return v, nil + return v, variableOverrideSourceFile, nil } // case: Set the variable to its default value if variable.HasDefault() { vDefault, err := dyn.Get(v, "default") if err != nil { - return dyn.InvalidValue, fmt.Errorf(`failed to get default value from config "%s" for variable %s with error: %v`, variable.Default, name, err) + return dyn.InvalidValue, variableOverrideSourceNone, fmt.Errorf(`failed to get default value from config "%s" for variable %s with error: %v`, variable.Default, name, err) } v, err := dyn.Set(v, "value", vDefault) if err != nil { - return dyn.InvalidValue, fmt.Errorf(`failed to assign default value from config "%s" to variable %s with error: %v`, variable.Default, name, err) + return dyn.InvalidValue, variableOverrideSourceNone, fmt.Errorf(`failed to assign default value from config "%s" to variable %s with error: %v`, variable.Default, name, err) } - return v, nil + return v, variableOverrideSourceDefault, nil } // We should have had a value to set for the variable at this point. - return dyn.InvalidValue, fmt.Errorf(`no value assigned to required variable %s. Variables are usually assigned in databricks.yml, and they can be overridden using "--var", the %s environment variable, or %s`, name, bundleVarPrefix+name, getDefaultVariableFilePath("")) + return dyn.InvalidValue, variableOverrideSourceNone, fmt.Errorf(`no value assigned to required variable %s. Variables are usually assigned in databricks.yml, and they can be overridden using "--var", the %s environment variable, or %s`, name, bundleVarPrefix+name, getDefaultVariableFilePath("")) } func readVariablesFromFile(b *bundle.Bundle) (dyn.Value, diag.Diagnostics) { @@ -128,6 +140,11 @@ func (m *setVariables) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnos if diags.HasError() { return diags } + + envVarUsed := false + fileUsed := false + cliUsed := false + err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { return dyn.Map(v, "variables", dyn.Foreach(func(p dyn.Path, variable dyn.Value) (dyn.Value, error) { name := p[1].Key() @@ -137,9 +154,28 @@ func (m *setVariables) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnos } fileDefault, _ := dyn.Get(defaults, name) - return setVariable(ctx, variable, v, name, fileDefault) + result, source, err := setVariable(ctx, variable, v, name, fileDefault) + switch source { + case variableOverrideSourceCLI: + cliUsed = true + case variableOverrideSourceEnvVar: + envVarUsed = true + case variableOverrideSourceFile: + fileUsed = true + } + return result, err })) }) + if envVarUsed { + b.Metrics.SetBoolValue("variable_override_env_var_used", true) + } + if fileUsed { + b.Metrics.SetBoolValue("variable_override_file_used", true) + } + if cliUsed { + b.Metrics.SetBoolValue("variable_override_cli_flag_used", true) + } + return diags.Extend(diag.FromErr(err)) } diff --git a/bundle/config/mutator/set_variables_test.go b/bundle/config/mutator/set_variables_test.go index e69bbf34c3..5465b6e837 100644 --- a/bundle/config/mutator/set_variables_test.go +++ b/bundle/config/mutator/set_variables_test.go @@ -8,6 +8,7 @@ import ( "github.com/databricks/cli/bundle/config/variable" "github.com/databricks/cli/libs/dyn" "github.com/databricks/cli/libs/dyn/convert" + "github.com/databricks/cli/libs/telemetry/protos" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -24,8 +25,9 @@ func TestSetVariableFromProcessEnvVar(t *testing.T) { v, err := convert.FromTyped(variable, dyn.NilValue) require.NoError(t, err) - v, err = setVariable(t.Context(), v, &variable, "foo", dyn.NilValue) + v, source, err := setVariable(t.Context(), v, &variable, "foo", dyn.NilValue) require.NoError(t, err) + assert.Equal(t, variableOverrideSourceEnvVar, source) err = convert.ToTyped(&variable, v) require.NoError(t, err) @@ -42,8 +44,9 @@ func TestSetVariableUsingDefaultValue(t *testing.T) { v, err := convert.FromTyped(variable, dyn.NilValue) require.NoError(t, err) - v, err = setVariable(t.Context(), v, &variable, "foo", dyn.NilValue) + v, source, err := setVariable(t.Context(), v, &variable, "foo", dyn.NilValue) require.NoError(t, err) + assert.Equal(t, variableOverrideSourceDefault, source) err = convert.ToTyped(&variable, v) require.NoError(t, err) @@ -64,8 +67,9 @@ func TestSetVariableWhenAlreadyAValueIsAssigned(t *testing.T) { v, err := convert.FromTyped(variable, dyn.NilValue) require.NoError(t, err) - v, err = setVariable(t.Context(), v, &variable, "foo", dyn.NilValue) + v, source, err := setVariable(t.Context(), v, &variable, "foo", dyn.NilValue) require.NoError(t, err) + assert.Equal(t, variableOverrideSourceCLI, source) err = convert.ToTyped(&variable, v) require.NoError(t, err) @@ -89,8 +93,9 @@ func TestSetVariableEnvVarValueDoesNotOverridePresetValue(t *testing.T) { v, err := convert.FromTyped(variable, dyn.NilValue) require.NoError(t, err) - v, err = setVariable(t.Context(), v, &variable, "foo", dyn.NilValue) + v, source, err := setVariable(t.Context(), v, &variable, "foo", dyn.NilValue) require.NoError(t, err) + assert.Equal(t, variableOverrideSourceCLI, source) err = convert.ToTyped(&variable, v) require.NoError(t, err) @@ -106,7 +111,7 @@ func TestSetVariablesErrorsIfAValueCouldNotBeResolved(t *testing.T) { v, err := convert.FromTyped(variable, dyn.NilValue) require.NoError(t, err) - _, err = setVariable(t.Context(), v, &variable, "foo", dyn.NilValue) + _, _, err = setVariable(t.Context(), v, &variable, "foo", dyn.NilValue) assert.ErrorContains(t, err, "no value assigned to required variable foo. Variables are usually assigned in databricks.yml, and they can be overridden using \"--var\", the BUNDLE_VAR_foo environment variable, or .databricks/bundle//variable-overrides.json") } @@ -156,6 +161,81 @@ func TestSetComplexVariablesViaEnvVariablesIsNotAllowed(t *testing.T) { v, err := convert.FromTyped(variable, dyn.NilValue) require.NoError(t, err) - _, err = setVariable(t.Context(), v, &variable, "foo", dyn.NilValue) + _, _, err = setVariable(t.Context(), v, &variable, "foo", dyn.NilValue) assert.ErrorContains(t, err, "setting via environment variables (BUNDLE_VAR_foo) is not supported for complex variable foo") } + +func TestSetVariablesMutatorTracksEnvVarOverride(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Variables: map[string]*variable.Variable{ + "a": { + Description: "resolved from env var", + Default: "default-a", + }, + }, + }, + } + + t.Setenv("BUNDLE_VAR_a", "env-val") + + diags := bundle.Apply(t.Context(), b, SetVariables()) + require.NoError(t, diags.Error()) + assert.Contains(t, b.Metrics.BoolValues, protos.BoolMapEntry{Key: "variable_override_env_var_used", Value: true}) +} + +func TestSetVariablesMutatorTracksCliFlagOverride(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Variables: map[string]*variable.Variable{ + "a": { + Description: "already has a value (set via CLI flag)", + Value: "cli-val", + }, + }, + }, + } + + diags := bundle.Apply(t.Context(), b, SetVariables()) + require.NoError(t, diags.Error()) + assert.Contains(t, b.Metrics.BoolValues, protos.BoolMapEntry{Key: "variable_override_cli_flag_used", Value: true}) +} + +func TestSetVariablesMutatorNoOverrideMetricsForDefaults(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Variables: map[string]*variable.Variable{ + "a": { + Description: "resolved to default", + Default: "default-a", + }, + }, + }, + } + + diags := bundle.Apply(t.Context(), b, SetVariables()) + require.NoError(t, diags.Error()) + + for _, entry := range b.Metrics.BoolValues { + assert.NotEqual(t, "variable_override_env_var_used", entry.Key) + assert.NotEqual(t, "variable_override_file_used", entry.Key) + assert.NotEqual(t, "variable_override_cli_flag_used", entry.Key) + } +} + +func TestSetVariableFromFileReturnsFileSource(t *testing.T) { + variable := variable.Variable{ + Description: "a test variable", + } + + v, err := convert.FromTyped(variable, dyn.NilValue) + require.NoError(t, err) + + v, source, err := setVariable(t.Context(), v, &variable, "foo", dyn.V("file-val")) + require.NoError(t, err) + assert.Equal(t, variableOverrideSourceFile, source) + + err = convert.ToTyped(&variable, v) + require.NoError(t, err) + assert.Equal(t, "file-val", variable.Value) +} diff --git a/bundle/deployplan/telemetry.go b/bundle/deployplan/telemetry.go new file mode 100644 index 0000000000..86b8718d4f --- /dev/null +++ b/bundle/deployplan/telemetry.go @@ -0,0 +1,165 @@ +package deployplan + +import ( + "strings" + + "github.com/databricks/cli/libs/telemetry/protos" +) + +// ComputePlanTelemetry computes telemetry metrics from a deployment plan. +// It returns boolean metrics (action type presence, dependency presence) and +// integer metrics (resource counts, graph depth/width, per-type counts). +func ComputePlanTelemetry(plan *Plan) (boolMetrics []protos.BoolMapEntry, intMetrics []protos.IntMapEntry) { + hasCreates := false + hasDeletes := false + hasRecreates := false + hasUpdates := false + hasDependencies := false + nonSkipCount := int64(0) + + // Count resources by type (e.g., "jobs", "pipelines", "schemas"). + typeCounts := map[string]int64{} + + for _, entry := range plan.Plan { + if entry.Action != Skip { + nonSkipCount++ + } + + switch entry.Action { + case Create: + hasCreates = true + case Delete: + hasDeletes = true + case Recreate: + hasRecreates = true + case Update, UpdateWithID, Resize: + hasUpdates = true + } + + if len(entry.DependsOn) > 0 { + hasDependencies = true + } + } + + // Count per resource type from keys in plan. + for key, entry := range plan.Plan { + if entry.Action == Skip { + continue + } + rType := resourceTypeFromKey(key) + if rType != "" { + typeCounts[rType]++ + } + } + + boolMetrics = []protos.BoolMapEntry{ + {Key: "deploy_plan_has_creates", Value: hasCreates}, + {Key: "deploy_plan_has_deletes", Value: hasDeletes}, + {Key: "deploy_plan_has_recreates", Value: hasRecreates}, + {Key: "deploy_plan_has_updates", Value: hasUpdates}, + {Key: "deploy_graph_has_dependencies", Value: hasDependencies}, + } + + intMetrics = []protos.IntMapEntry{ + {Key: "deploy_plan_resource_count", Value: nonSkipCount}, + } + + // Add graph depth and width metrics. + maxDepth, maxWidth := computeGraphDepthWidth(plan) + intMetrics = append(intMetrics, + protos.IntMapEntry{Key: "deploy_graph_max_depth", Value: int64(maxDepth)}, + protos.IntMapEntry{Key: "deploy_graph_max_width", Value: int64(maxWidth)}, + ) + + // Add per-type counts. + for rType, count := range typeCounts { + intMetrics = append(intMetrics, protos.IntMapEntry{ + Key: "deploy_plan_" + rType + "_count", + Value: count, + }) + } + + return boolMetrics, intMetrics +} + +// resourceTypeFromKey extracts the resource type from a key like "resources.jobs.foo" +// or "resources.jobs.foo.permissions". Returns the second component (e.g., "jobs"). +func resourceTypeFromKey(key string) string { + parts := strings.SplitN(key, ".", 4) + if len(parts) >= 2 { + return parts[1] + } + return "" +} + +// computeGraphDepthWidth computes the maximum depth (longest path) and maximum +// width (most nodes at a single depth level) in the DAG defined by the plan's +// dependency edges. Depth is measured as number of edges on the longest path. +func computeGraphDepthWidth(plan *Plan) (maxDepth, maxWidth int) { + if len(plan.Plan) == 0 { + return 0, 0 + } + + // Build adjacency list from DependsOn edges. Direction: dependency -> dependent + // (same as makeGraph for non-delete entries). For telemetry we treat all entries + // uniformly regardless of action type. + adj := make(map[string][]string, len(plan.Plan)) + inDegree := make(map[string]int, len(plan.Plan)) + for key := range plan.Plan { + adj[key] = nil + inDegree[key] = 0 + } + + for key, entry := range plan.Plan { + for _, dep := range entry.DependsOn { + if _, exists := plan.Plan[dep.Node]; !exists { + continue + } + adj[dep.Node] = append(adj[dep.Node], key) + inDegree[key]++ + } + } + + // BFS-based topological traversal to compute depth of each node. + depth := make(map[string]int, len(plan.Plan)) + var queue []string + for key, deg := range inDegree { + if deg == 0 { + queue = append(queue, key) + depth[key] = 0 + } + } + + for len(queue) > 0 { + node := queue[0] + queue = queue[1:] + + for _, child := range adj[node] { + childDepth := depth[node] + 1 + if childDepth > depth[child] { + depth[child] = childDepth + } + inDegree[child]-- + if inDegree[child] == 0 { + queue = append(queue, child) + } + } + } + + // Find max depth and count nodes per depth level for max width. + widthAtDepth := map[int]int{} + for _, d := range depth { + if d > maxDepth { + maxDepth = d + } + widthAtDepth[d]++ + } + + for _, w := range widthAtDepth { + if w > maxWidth { + maxWidth = w + } + } + + return maxDepth, maxWidth +} diff --git a/bundle/deployplan/telemetry_test.go b/bundle/deployplan/telemetry_test.go new file mode 100644 index 0000000000..e317cc71b4 --- /dev/null +++ b/bundle/deployplan/telemetry_test.go @@ -0,0 +1,377 @@ +package deployplan + +import ( + "slices" + "testing" + + "github.com/databricks/cli/libs/telemetry/protos" + "github.com/stretchr/testify/assert" +) + +func findBool(entries []protos.BoolMapEntry, key string) (bool, bool) { + for _, e := range entries { + if e.Key == key { + return e.Value, true + } + } + return false, false +} + +func findInt(entries []protos.IntMapEntry, key string) (int64, bool) { + for _, e := range entries { + if e.Key == key { + return e.Value, true + } + } + return 0, false +} + +func TestComputePlanTelemetryEmptyPlan(t *testing.T) { + plan := &Plan{Plan: map[string]*PlanEntry{}} + + boolMetrics, intMetrics := ComputePlanTelemetry(plan) + + v, ok := findBool(boolMetrics, "deploy_plan_has_creates") + assert.True(t, ok) + assert.False(t, v) + + v, ok = findBool(boolMetrics, "deploy_plan_has_deletes") + assert.True(t, ok) + assert.False(t, v) + + v, ok = findBool(boolMetrics, "deploy_plan_has_recreates") + assert.True(t, ok) + assert.False(t, v) + + v, ok = findBool(boolMetrics, "deploy_plan_has_updates") + assert.True(t, ok) + assert.False(t, v) + + v, ok = findBool(boolMetrics, "deploy_graph_has_dependencies") + assert.True(t, ok) + assert.False(t, v) + + count, ok := findInt(intMetrics, "deploy_plan_resource_count") + assert.True(t, ok) + assert.Equal(t, int64(0), count) + + depth, ok := findInt(intMetrics, "deploy_graph_max_depth") + assert.True(t, ok) + assert.Equal(t, int64(0), depth) + + width, ok := findInt(intMetrics, "deploy_graph_max_width") + assert.True(t, ok) + assert.Equal(t, int64(0), width) +} + +func TestComputePlanTelemetryActionTypes(t *testing.T) { + plan := &Plan{ + Plan: map[string]*PlanEntry{ + "resources.jobs.a": {Action: Create}, + "resources.jobs.b": {Action: Delete}, + "resources.pipelines.c": {Action: Recreate}, + "resources.jobs.d": {Action: Update}, + "resources.jobs.e": {Action: Skip}, + "resources.schemas.f": {Action: UpdateWithID}, + "resources.pipelines.g": {Action: Resize}, + }, + } + + boolMetrics, intMetrics := ComputePlanTelemetry(plan) + + v, _ := findBool(boolMetrics, "deploy_plan_has_creates") + assert.True(t, v) + + v, _ = findBool(boolMetrics, "deploy_plan_has_deletes") + assert.True(t, v) + + v, _ = findBool(boolMetrics, "deploy_plan_has_recreates") + assert.True(t, v) + + v, _ = findBool(boolMetrics, "deploy_plan_has_updates") + assert.True(t, v) + + // 6 non-skip entries + count, _ := findInt(intMetrics, "deploy_plan_resource_count") + assert.Equal(t, int64(6), count) +} + +func TestComputePlanTelemetryNoUpdates(t *testing.T) { + plan := &Plan{ + Plan: map[string]*PlanEntry{ + "resources.jobs.a": {Action: Create}, + "resources.jobs.b": {Action: Skip}, + }, + } + + boolMetrics, _ := ComputePlanTelemetry(plan) + + v, _ := findBool(boolMetrics, "deploy_plan_has_creates") + assert.True(t, v) + + v, _ = findBool(boolMetrics, "deploy_plan_has_updates") + assert.False(t, v) + + v, _ = findBool(boolMetrics, "deploy_plan_has_deletes") + assert.False(t, v) + + v, _ = findBool(boolMetrics, "deploy_plan_has_recreates") + assert.False(t, v) +} + +func TestComputePlanTelemetryResourceTypeCounts(t *testing.T) { + plan := &Plan{ + Plan: map[string]*PlanEntry{ + "resources.jobs.a": {Action: Create}, + "resources.jobs.b": {Action: Update}, + "resources.pipelines.c": {Action: Create}, + "resources.schemas.d": {Action: Delete}, + "resources.jobs.e": {Action: Skip}, + }, + } + + _, intMetrics := ComputePlanTelemetry(plan) + + jobsCount, ok := findInt(intMetrics, "deploy_plan_jobs_count") + assert.True(t, ok) + assert.Equal(t, int64(2), jobsCount) + + pipelinesCount, ok := findInt(intMetrics, "deploy_plan_pipelines_count") + assert.True(t, ok) + assert.Equal(t, int64(1), pipelinesCount) + + schemasCount, ok := findInt(intMetrics, "deploy_plan_schemas_count") + assert.True(t, ok) + assert.Equal(t, int64(1), schemasCount) +} + +func TestComputePlanTelemetryDependencyGraph(t *testing.T) { + // A -> B -> C (linear chain, depth=2, width=1) + plan := &Plan{ + Plan: map[string]*PlanEntry{ + "resources.jobs.a": {Action: Create}, + "resources.jobs.b": { + Action: Create, + DependsOn: []DependsOnEntry{ + {Node: "resources.jobs.a", Label: "${resources.jobs.a.id}"}, + }, + }, + "resources.jobs.c": { + Action: Create, + DependsOn: []DependsOnEntry{ + {Node: "resources.jobs.b", Label: "${resources.jobs.b.id}"}, + }, + }, + }, + } + + boolMetrics, intMetrics := ComputePlanTelemetry(plan) + + v, _ := findBool(boolMetrics, "deploy_graph_has_dependencies") + assert.True(t, v) + + depth, _ := findInt(intMetrics, "deploy_graph_max_depth") + assert.Equal(t, int64(2), depth) + + width, _ := findInt(intMetrics, "deploy_graph_max_width") + assert.Equal(t, int64(1), width) +} + +func TestComputePlanTelemetryWideGraph(t *testing.T) { + // A -> B, A -> C, A -> D (fan-out, depth=1, width=3) + plan := &Plan{ + Plan: map[string]*PlanEntry{ + "resources.jobs.a": {Action: Create}, + "resources.jobs.b": { + Action: Create, + DependsOn: []DependsOnEntry{{Node: "resources.jobs.a"}}, + }, + "resources.jobs.c": { + Action: Create, + DependsOn: []DependsOnEntry{{Node: "resources.jobs.a"}}, + }, + "resources.jobs.d": { + Action: Create, + DependsOn: []DependsOnEntry{{Node: "resources.jobs.a"}}, + }, + }, + } + + _, intMetrics := ComputePlanTelemetry(plan) + + depth, _ := findInt(intMetrics, "deploy_graph_max_depth") + assert.Equal(t, int64(1), depth) + + width, _ := findInt(intMetrics, "deploy_graph_max_width") + assert.Equal(t, int64(3), width) +} + +func TestComputePlanTelemetryNoDependencies(t *testing.T) { + plan := &Plan{ + Plan: map[string]*PlanEntry{ + "resources.jobs.a": {Action: Create}, + "resources.pipelines.b": {Action: Update}, + "resources.jobs.c": {Action: Create}, + }, + } + + boolMetrics, intMetrics := ComputePlanTelemetry(plan) + + v, _ := findBool(boolMetrics, "deploy_graph_has_dependencies") + assert.False(t, v) + + depth, _ := findInt(intMetrics, "deploy_graph_max_depth") + assert.Equal(t, int64(0), depth) + + // All 3 nodes at depth 0 + width, _ := findInt(intMetrics, "deploy_graph_max_width") + assert.Equal(t, int64(3), width) +} + +func TestComputePlanTelemetryDiamondGraph(t *testing.T) { + // A -> B, A -> C, B -> D, C -> D (diamond, depth=2, width=2) + plan := &Plan{ + Plan: map[string]*PlanEntry{ + "resources.jobs.a": {Action: Create}, + "resources.jobs.b": { + Action: Create, + DependsOn: []DependsOnEntry{{Node: "resources.jobs.a"}}, + }, + "resources.jobs.c": { + Action: Create, + DependsOn: []DependsOnEntry{{Node: "resources.jobs.a"}}, + }, + "resources.jobs.d": { + Action: Create, + DependsOn: []DependsOnEntry{ + {Node: "resources.jobs.b"}, + {Node: "resources.jobs.c"}, + }, + }, + }, + } + + _, intMetrics := ComputePlanTelemetry(plan) + + depth, _ := findInt(intMetrics, "deploy_graph_max_depth") + assert.Equal(t, int64(2), depth) + + width, _ := findInt(intMetrics, "deploy_graph_max_width") + assert.Equal(t, int64(2), width) +} + +func TestComputePlanTelemetrySkipsExcludedFromTypeCounts(t *testing.T) { + plan := &Plan{ + Plan: map[string]*PlanEntry{ + "resources.jobs.a": {Action: Skip}, + "resources.jobs.b": {Action: Skip}, + }, + } + + _, intMetrics := ComputePlanTelemetry(plan) + + count, _ := findInt(intMetrics, "deploy_plan_resource_count") + assert.Equal(t, int64(0), count) + + // No per-type counts for skipped resources. + _, ok := findInt(intMetrics, "deploy_plan_jobs_count") + assert.False(t, ok) +} + +func TestComputePlanTelemetryDependencyOnMissingNode(t *testing.T) { + // Dependency on a node not in the plan should be ignored for graph metrics. + plan := &Plan{ + Plan: map[string]*PlanEntry{ + "resources.jobs.a": { + Action: Create, + DependsOn: []DependsOnEntry{ + {Node: "resources.jobs.missing"}, + }, + }, + }, + } + + boolMetrics, intMetrics := ComputePlanTelemetry(plan) + + // DependsOn is set, so has_dependencies should be true. + v, _ := findBool(boolMetrics, "deploy_graph_has_dependencies") + assert.True(t, v) + + // Missing node is skipped in graph construction, so depth is 0. + depth, _ := findInt(intMetrics, "deploy_graph_max_depth") + assert.Equal(t, int64(0), depth) +} + +func TestResourceTypeFromKey(t *testing.T) { + tests := []struct { + key string + expected string + }{ + {"resources.jobs.foo", "jobs"}, + {"resources.pipelines.bar", "pipelines"}, + {"resources.schemas.baz", "schemas"}, + {"resources.jobs.foo.permissions", "jobs"}, + {"single", ""}, + } + + for _, tc := range tests { + assert.Equal(t, tc.expected, resourceTypeFromKey(tc.key)) + } +} + +func TestComputePlanTelemetryAllUpdateVariants(t *testing.T) { + tests := []struct { + action ActionType + }{ + {Update}, + {UpdateWithID}, + {Resize}, + } + + for _, tc := range tests { + plan := &Plan{ + Plan: map[string]*PlanEntry{ + "resources.jobs.a": {Action: tc.action}, + }, + } + + boolMetrics, _ := ComputePlanTelemetry(plan) + v, _ := findBool(boolMetrics, "deploy_plan_has_updates") + assert.True(t, v, "expected has_updates=true for action %s", tc.action) + } +} + +func TestComputePlanTelemetryIntMetricsAreDeterministic(t *testing.T) { + plan := &Plan{ + Plan: map[string]*PlanEntry{ + "resources.jobs.a": {Action: Create}, + "resources.pipelines.b": {Action: Update}, + }, + } + + // Run twice and compare. + _, intMetrics1 := ComputePlanTelemetry(plan) + _, intMetrics2 := ComputePlanTelemetry(plan) + + // Sort both by key for comparison. + slices.SortFunc(intMetrics1, func(a, b protos.IntMapEntry) int { + if a.Key < b.Key { + return -1 + } + if a.Key > b.Key { + return 1 + } + return 0 + }) + slices.SortFunc(intMetrics2, func(a, b protos.IntMapEntry) int { + if a.Key < b.Key { + return -1 + } + if a.Key > b.Key { + return 1 + } + return 0 + }) + + assert.Equal(t, intMetrics1, intMetrics2) +} diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index 4613a7a211..f5ef6df18a 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -192,6 +192,15 @@ func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHand return } + // Collect telemetry about the deployment plan and its dependency graph. + if plan != nil && engine.IsDirect() { + boolMetrics, intMetrics := deployplan.ComputePlanTelemetry(plan) + for _, m := range boolMetrics { + b.Metrics.AddBoolValue(m.Key, m.Value) + } + b.Metrics.DeployPlanMetrics = append(b.Metrics.DeployPlanMetrics, intMetrics...) + } + haveApproval, err := approvalForDeploy(ctx, b, plan) if err != nil { logdiag.LogError(ctx, err) @@ -209,6 +218,7 @@ func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHand } logDeployTelemetry(ctx, b) + bundle.ApplyContext(ctx, b, scripts.Execute(config.ScriptPostDeploy)) } diff --git a/bundle/phases/telemetry.go b/bundle/phases/telemetry.go index 4584e9fc5e..f826b3afa7 100644 --- a/bundle/phases/telemetry.go +++ b/bundle/phases/telemetry.go @@ -7,6 +7,7 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/variable" "github.com/databricks/cli/bundle/libraries" "github.com/databricks/cli/libs/dyn" "github.com/databricks/cli/libs/log" @@ -112,6 +113,7 @@ func logDeployTelemetry(ctx context.Context, b *bundle.Bundle) { variableCount := len(b.Config.Variables) complexVariableCount := int64(0) lookupVariableCount := int64(0) + variableWithDefaultCount := int64(0) for _, v := range b.Config.Variables { // If the resolved value of the variable is a complex type, we count it as a complex variable. // We can't rely on the "type: complex" annotation because the annotation is optional in some contexts @@ -122,6 +124,11 @@ func logDeployTelemetry(ctx context.Context, b *bundle.Bundle) { if v.Lookup != nil { lookupVariableCount++ + trackLookupType(b, v.Lookup) + } + + if v.HasDefault() { + variableWithDefaultCount++ } } @@ -183,8 +190,50 @@ func logDeployTelemetry(ctx context.Context, b *bundle.Bundle) { VariableCount: int64(variableCount), ComplexVariableCount: complexVariableCount, LookupVariableCount: lookupVariableCount, + VariableWithDefaultCount: variableWithDefaultCount, BundleMutatorExecutionTimeMs: getExecutionTimes(b), + DeployPlanMetrics: b.Metrics.DeployPlanMetrics, }, }, }) } + +// trackLookupType sets a bool metric for the lookup type used by a variable. +func trackLookupType(b *bundle.Bundle, l *variable.Lookup) { + if l.Alert != "" { + b.Metrics.SetBoolValue("lookup_type_alert_used", true) + } + if l.ClusterPolicy != "" { + b.Metrics.SetBoolValue("lookup_type_cluster_policy_used", true) + } + if l.Cluster != "" { + b.Metrics.SetBoolValue("lookup_type_cluster_used", true) + } + if l.Dashboard != "" { + b.Metrics.SetBoolValue("lookup_type_dashboard_used", true) + } + if l.InstancePool != "" { + b.Metrics.SetBoolValue("lookup_type_instance_pool_used", true) + } + if l.Job != "" { + b.Metrics.SetBoolValue("lookup_type_job_used", true) + } + if l.Metastore != "" { + b.Metrics.SetBoolValue("lookup_type_metastore_used", true) + } + if l.NotificationDestination != "" { + b.Metrics.SetBoolValue("lookup_type_notification_destination_used", true) + } + if l.Pipeline != "" { + b.Metrics.SetBoolValue("lookup_type_pipeline_used", true) + } + if l.Query != "" { + b.Metrics.SetBoolValue("lookup_type_query_used", true) + } + if l.ServicePrincipal != "" { + b.Metrics.SetBoolValue("lookup_type_service_principal_used", true) + } + if l.Warehouse != "" { + b.Metrics.SetBoolValue("lookup_type_warehouse_used", true) + } +} diff --git a/cmd/bundle/utils/process.go b/cmd/bundle/utils/process.go index 8390548912..3f386fb690 100644 --- a/cmd/bundle/utils/process.go +++ b/cmd/bundle/utils/process.go @@ -267,6 +267,11 @@ func ProcessBundleRet(cmd *cobra.Command, opts ProcessOptions) (*bundle.Bundle, } if opts.Deploy { + // Record that initialization and validation passed before entering the + // deploy phase. Combined with a non-zero exit code this lets us measure + // the "validation pass, deploy fail" gap. + b.Metrics.SetBoolValue("validation_passed", true) + var outputHandler sync.OutputHandler if opts.Verbose { outputHandler = func(ctx context.Context, c <-chan sync.Event) { diff --git a/libs/dyn/dynvar/ref.go b/libs/dyn/dynvar/ref.go index a048c80cb8..26d2641e09 100644 --- a/libs/dyn/dynvar/ref.go +++ b/libs/dyn/dynvar/ref.go @@ -13,6 +13,10 @@ var ( // !!! See python/databricks/bundles/core/_transform.py baseVarDef = `[a-zA-Z]+([-_]*[a-zA-Z0-9]+)*` re = regexp.MustCompile(fmt.Sprintf(`\$\{(%s(\.%s(\[[0-9]+\])*)*(\[[0-9]+\])*)\}`, baseVarDef, baseVarDef)) + + // Matches a nested variable reference pattern like ${var.foo_${var.bar}}. + // This is when a ${var. reference appears inside an outer ${...} block. + reNestedVarRef = regexp.MustCompile(`\$\{[^}]*\$\{var\.`) ) // Ref represents a variable reference. @@ -84,6 +88,12 @@ func ContainsVariableReference(s string) bool { return re.MatchString(s) } +// ContainsNestedVariableReference returns true if the string contains a +// nested variable reference pattern like ${var.foo_${var.bar}}. +func ContainsNestedVariableReference(s string) bool { + return reNestedVarRef.MatchString(s) +} + // If s is a pure variable reference, this function returns the corresponding // dyn.Path. Otherwise, it returns false. func PureReferenceToPath(s string) (dyn.Path, bool) { diff --git a/libs/dyn/dynvar/ref_test.go b/libs/dyn/dynvar/ref_test.go index d59d80cba1..571e5dd50d 100644 --- a/libs/dyn/dynvar/ref_test.go +++ b/libs/dyn/dynvar/ref_test.go @@ -54,6 +54,26 @@ func TestIsPureVariableReference(t *testing.T) { assert.True(t, IsPureVariableReference("${foo.bar}")) } +func TestContainsNestedVariableReference(t *testing.T) { + tests := []struct { + in string + expected bool + }{ + {"${var.foo_${var.bar}}", true}, + {"${var.${var.bar}}", true}, + {"${bundle.name_${var.env}}", true}, + {"prefix ${resources.jobs.${var.name}.id}", true}, + {"${var.foo}", false}, + {"${var.foo} ${var.bar}", false}, + {"${bundle.name}", false}, + {"plain string", false}, + {"", false}, + } + for _, tc := range tests { + assert.Equal(t, tc.expected, ContainsNestedVariableReference(tc.in), "input: %s", tc.in) + } +} + func TestPureReferenceToPath(t *testing.T) { for _, tc := range []struct { in string diff --git a/libs/telemetry/protos/bundle_deploy.go b/libs/telemetry/protos/bundle_deploy.go index ab1b3a46de..ac88495813 100644 --- a/libs/telemetry/protos/bundle_deploy.go +++ b/libs/telemetry/protos/bundle_deploy.go @@ -39,23 +39,15 @@ type BundleDeployExperimental struct { // Number of configuration files in the bundle. ConfigurationFileCount int64 `json:"configuration_file_count"` - // Size in bytes of the Terraform state file - TerraformStateSizeBytes int64 `json:"terraform_state_size_bytes,omitempty"` - // Number of variables in the bundle - VariableCount int64 `json:"variable_count"` - ComplexVariableCount int64 `json:"complex_variable_count"` - LookupVariableCount int64 `json:"lookup_variable_count"` + VariableCount int64 `json:"variable_count"` + ComplexVariableCount int64 `json:"complex_variable_count"` + LookupVariableCount int64 `json:"lookup_variable_count"` + VariableWithDefaultCount int64 `json:"variable_with_default_count"` // Number of targets in the bundle TargetCount int64 `json:"target_count"` - // Whether a field is set or not. If a configuration field is not present in this - // map then it is not tracked by this field. - // Keys are the full path of the field in the configuration tree. - // Examples: "bundle.terraform.exec_path", "bundle.git.branch" etc. - SetFields []BoolMapEntry `json:"set_fields,omitempty"` - // Values for boolean configuration fields like `experimental.python_wheel_wrapper` or just any // boolean values that we want to track. // We don't need to define protos to track boolean values and can simply write those @@ -83,6 +75,9 @@ type BundleDeployExperimental struct { // Local cache measurements in milliseconds (compute duration, potential savings, etc.) LocalCacheMeasurementsMs []IntMapEntry `json:"local_cache_measurements_ms,omitempty"` + + // Metrics about the deployment plan and its dependency graph (direct engine only). + DeployPlanMetrics []IntMapEntry `json:"deploy_plan_metrics,omitempty"` } type BoolMapEntry struct { diff --git a/libs/telemetry/protos/databricks_cli_log.go b/libs/telemetry/protos/databricks_cli_log.go index 64baa6b384..aa60cdb276 100644 --- a/libs/telemetry/protos/databricks_cli_log.go +++ b/libs/telemetry/protos/databricks_cli_log.go @@ -20,9 +20,6 @@ type ExecutionContext struct { // Only set when the CLI is being run from a Databricks cluster. DbrVersion string `json:"dbr_version,omitempty"` - // If true, the CLI is being run from a Databricks notebook / cluster web terminal. - FromWebTerminal bool `json:"from_web_terminal,omitempty"` - // Time taken for the CLI command to execute. // We want to serialize the zero value as well so the omitempty tag is not set. ExecutionTimeMs int64 `json:"execution_time_ms"`