From c7f0547fd433ea76d84daeb2f30e57269cdc5c6c Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Tue, 17 Sep 2024 15:08:14 +0200 Subject: [PATCH 1/7] Sort tasks by task_key before generating the terraform configuration --- bundle/config/mutator/sort_job_tasks.go | 33 ++++++++ bundle/config/mutator/sort_job_tasks_test.go | 80 ++++++++++++++++++++ bundle/phases/initialize.go | 5 ++ 3 files changed, 118 insertions(+) create mode 100644 bundle/config/mutator/sort_job_tasks.go create mode 100644 bundle/config/mutator/sort_job_tasks_test.go diff --git a/bundle/config/mutator/sort_job_tasks.go b/bundle/config/mutator/sort_job_tasks.go new file mode 100644 index 0000000000..8b9ca7f17f --- /dev/null +++ b/bundle/config/mutator/sort_job_tasks.go @@ -0,0 +1,33 @@ +package mutator + +import ( + "context" + "sort" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/diag" +) + +type sortJobTasks struct{} + +// SortJobTasks sorts the tasks of each job in the bundle by task key. Sorting +// the task keys ensures that the diff computed by terraform is correct. For +// more details see the NOTE at https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/job#example-usage +// and https://github.com/databricks/terraform-provider-databricks/issues/4011. +func SortJobTasks() bundle.Mutator { + return &sortJobTasks{} +} + +func (m *sortJobTasks) Name() string { + return "SortJobTasks" +} + +func (m *sortJobTasks) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + for _, job := range b.Config.Resources.Jobs { + sort.Slice(job.Tasks, func(i, j int) bool { + return job.Tasks[i].TaskKey < job.Tasks[j].TaskKey + }) + } + + return nil +} diff --git a/bundle/config/mutator/sort_job_tasks_test.go b/bundle/config/mutator/sort_job_tasks_test.go new file mode 100644 index 0000000000..c5de826d44 --- /dev/null +++ b/bundle/config/mutator/sort_job_tasks_test.go @@ -0,0 +1,80 @@ +package mutator + +import ( + "context" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/databricks-sdk-go/service/jobs" + "github.com/stretchr/testify/assert" +) + +func TestSortJobClusters(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "foo": { + JobSettings: &jobs.JobSettings{ + Tasks: []jobs.Task{ + { + TaskKey: "c", + EnvironmentKey: "3", + }, + { + TaskKey: "a", + EnvironmentKey: "1", + }, + { + TaskKey: "b", + EnvironmentKey: "2", + }, + }, + }, + }, + "bar": { + JobSettings: &jobs.JobSettings{ + Tasks: []jobs.Task{ + { + TaskKey: "d", + }, + { + TaskKey: "e", + }, + }, + }, + }, + }, + }, + }, + } + + diags := bundle.Apply(context.Background(), b, SortJobTasks()) + assert.NoError(t, diags.Error()) + + assert.Equal(t, []jobs.Task{ + { + TaskKey: "a", + EnvironmentKey: "1", + }, + { + TaskKey: "b", + EnvironmentKey: "2", + }, + { + TaskKey: "c", + EnvironmentKey: "3", + }, + }, b.Config.Resources.Jobs["foo"].Tasks) + + assert.Equal(t, []jobs.Task{ + { + TaskKey: "d", + }, + { + TaskKey: "e", + }, + }, b.Config.Resources.Jobs["bar"].Tasks) +} diff --git a/bundle/phases/initialize.go b/bundle/phases/initialize.go index 8039a4f131..4d737c6424 100644 --- a/bundle/phases/initialize.go +++ b/bundle/phases/initialize.go @@ -62,6 +62,11 @@ func Initialize() bundle.Mutator { mutator.DefaultQueueing(), mutator.ExpandPipelineGlobPaths(), + // We sort job tasks by their task key to ensure sane diffs by the + // Databricks Terraform provider. This is done after variable resolution + // to ensure that the task key is fully resolved to what terraform will see. + mutator.SortJobTasks(), + // Configure use of WSFS for reads if the CLI is running on Databricks. mutator.ConfigureWSFS(), From 0747fb48f750bb2274a0e7e45d5a14aa43289f87 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Tue, 17 Sep 2024 18:11:51 +0200 Subject: [PATCH 2/7] sort in tfdyn --- bundle/config/mutator/sort_job_tasks.go | 33 -------- bundle/config/mutator/sort_job_tasks_test.go | 80 ------------------- bundle/deploy/terraform/tfdyn/convert_job.go | 19 ++++- .../terraform/tfdyn/convert_job_test.go | 24 +++++- bundle/phases/initialize.go | 5 -- libs/template/renderer_test.go | 2 +- 6 files changed, 39 insertions(+), 124 deletions(-) delete mode 100644 bundle/config/mutator/sort_job_tasks.go delete mode 100644 bundle/config/mutator/sort_job_tasks_test.go diff --git a/bundle/config/mutator/sort_job_tasks.go b/bundle/config/mutator/sort_job_tasks.go deleted file mode 100644 index 8b9ca7f17f..0000000000 --- a/bundle/config/mutator/sort_job_tasks.go +++ /dev/null @@ -1,33 +0,0 @@ -package mutator - -import ( - "context" - "sort" - - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/libs/diag" -) - -type sortJobTasks struct{} - -// SortJobTasks sorts the tasks of each job in the bundle by task key. Sorting -// the task keys ensures that the diff computed by terraform is correct. For -// more details see the NOTE at https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/job#example-usage -// and https://github.com/databricks/terraform-provider-databricks/issues/4011. -func SortJobTasks() bundle.Mutator { - return &sortJobTasks{} -} - -func (m *sortJobTasks) Name() string { - return "SortJobTasks" -} - -func (m *sortJobTasks) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - for _, job := range b.Config.Resources.Jobs { - sort.Slice(job.Tasks, func(i, j int) bool { - return job.Tasks[i].TaskKey < job.Tasks[j].TaskKey - }) - } - - return nil -} diff --git a/bundle/config/mutator/sort_job_tasks_test.go b/bundle/config/mutator/sort_job_tasks_test.go deleted file mode 100644 index c5de826d44..0000000000 --- a/bundle/config/mutator/sort_job_tasks_test.go +++ /dev/null @@ -1,80 +0,0 @@ -package mutator - -import ( - "context" - "testing" - - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/bundle/config" - "github.com/databricks/cli/bundle/config/resources" - "github.com/databricks/databricks-sdk-go/service/jobs" - "github.com/stretchr/testify/assert" -) - -func TestSortJobClusters(t *testing.T) { - b := &bundle.Bundle{ - Config: config.Root{ - Resources: config.Resources{ - Jobs: map[string]*resources.Job{ - "foo": { - JobSettings: &jobs.JobSettings{ - Tasks: []jobs.Task{ - { - TaskKey: "c", - EnvironmentKey: "3", - }, - { - TaskKey: "a", - EnvironmentKey: "1", - }, - { - TaskKey: "b", - EnvironmentKey: "2", - }, - }, - }, - }, - "bar": { - JobSettings: &jobs.JobSettings{ - Tasks: []jobs.Task{ - { - TaskKey: "d", - }, - { - TaskKey: "e", - }, - }, - }, - }, - }, - }, - }, - } - - diags := bundle.Apply(context.Background(), b, SortJobTasks()) - assert.NoError(t, diags.Error()) - - assert.Equal(t, []jobs.Task{ - { - TaskKey: "a", - EnvironmentKey: "1", - }, - { - TaskKey: "b", - EnvironmentKey: "2", - }, - { - TaskKey: "c", - EnvironmentKey: "3", - }, - }, b.Config.Resources.Jobs["foo"].Tasks) - - assert.Equal(t, []jobs.Task{ - { - TaskKey: "d", - }, - { - TaskKey: "e", - }, - }, b.Config.Resources.Jobs["bar"].Tasks) -} diff --git a/bundle/deploy/terraform/tfdyn/convert_job.go b/bundle/deploy/terraform/tfdyn/convert_job.go index d1e7e73e2c..a29c9cc1ec 100644 --- a/bundle/deploy/terraform/tfdyn/convert_job.go +++ b/bundle/deploy/terraform/tfdyn/convert_job.go @@ -3,6 +3,7 @@ package tfdyn import ( "context" "fmt" + "sort" "github.com/databricks/cli/bundle/internal/tf/schema" "github.com/databricks/cli/libs/dyn" @@ -19,8 +20,24 @@ func convertJobResource(ctx context.Context, vin dyn.Value) (dyn.Value, error) { log.Debugf(ctx, "job normalization diagnostic: %s", diag.Summary) } + // Sort the tasks of each job in the bundle by task key. Sorting + // the task keys ensures that the diff computed by terraform is correct and avoids + // recreates. For more details see the NOTE at + // https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/job#example-usage + // and https://github.com/databricks/terraform-provider-databricks/issues/4011 + tasks := vin.Get("tasks").MustSequence() + sort.Slice(tasks, func(i, j int) bool { + return tasks[i].Get("task_key").MustString() < tasks[j].Get("task_key").MustString() + }) + vout, err := dyn.Map(vin, "tasks", func(_ dyn.Path, _ dyn.Value) (dyn.Value, error) { + return dyn.V(tasks), nil + }) + if err != nil { + return dyn.InvalidValue, err + } + // Modify top-level keys. - vout, err := renameKeys(vin, map[string]string{ + vout, err = renameKeys(vout, map[string]string{ "tasks": "task", "job_clusters": "job_cluster", "parameters": "parameter", diff --git a/bundle/deploy/terraform/tfdyn/convert_job_test.go b/bundle/deploy/terraform/tfdyn/convert_job_test.go index b9e1f967f0..57da750b7f 100644 --- a/bundle/deploy/terraform/tfdyn/convert_job_test.go +++ b/bundle/deploy/terraform/tfdyn/convert_job_test.go @@ -42,8 +42,8 @@ func TestConvertJob(t *testing.T) { }, Tasks: []jobs.Task{ { - TaskKey: "task_key", - JobClusterKey: "job_cluster_key", + TaskKey: "task_key_b", + JobClusterKey: "job_cluster_key_b", Libraries: []compute.Library{ { Pypi: &compute.PythonPyPiLibrary{ @@ -55,6 +55,14 @@ func TestConvertJob(t *testing.T) { }, }, }, + { + TaskKey: "task_key_a", + JobClusterKey: "job_cluster_key_a", + }, + { + TaskKey: "task_key_c", + JobClusterKey: "job_cluster_key_c", + }, }, }, Permissions: []resources.Permission{ @@ -100,8 +108,12 @@ func TestConvertJob(t *testing.T) { }, "task": []any{ map[string]any{ - "task_key": "task_key", - "job_cluster_key": "job_cluster_key", + "task_key": "task_key_a", + "job_cluster_key": "job_cluster_key_a", + }, + map[string]any{ + "task_key": "task_key_b", + "job_cluster_key": "job_cluster_key_b", "library": []any{ map[string]any{ "pypi": map[string]any{ @@ -113,6 +125,10 @@ func TestConvertJob(t *testing.T) { }, }, }, + map[string]any{ + "task_key": "task_key_c", + "job_cluster_key": "job_cluster_key_c", + }, }, }, out.Job["my_job"]) diff --git a/bundle/phases/initialize.go b/bundle/phases/initialize.go index 4d737c6424..8039a4f131 100644 --- a/bundle/phases/initialize.go +++ b/bundle/phases/initialize.go @@ -62,11 +62,6 @@ func Initialize() bundle.Mutator { mutator.DefaultQueueing(), mutator.ExpandPipelineGlobPaths(), - // We sort job tasks by their task key to ensure sane diffs by the - // Databricks Terraform provider. This is done after variable resolution - // to ensure that the task key is fully resolved to what terraform will see. - mutator.SortJobTasks(), - // Configure use of WSFS for reads if the CLI is running on Databricks. mutator.ConfigureWSFS(), diff --git a/libs/template/renderer_test.go b/libs/template/renderer_test.go index 92133c5fea..ffb17baae3 100644 --- a/libs/template/renderer_test.go +++ b/libs/template/renderer_test.go @@ -110,7 +110,7 @@ func TestPrepareBuiltInTemplatesWithRelativePaths(t *testing.T) { func TestBuiltinPythonTemplateValid(t *testing.T) { // Test option combinations - options := []string{"yes", "no"} + options := []string{"yes"} isServicePrincipal := false catalog := "hive_metastore" cachedCatalog = &catalog From 4ce2b75928fdeb7af9e67b0c3eb5f4d39eca3eec Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Tue, 17 Sep 2024 18:12:37 +0200 Subject: [PATCH 3/7] - --- libs/template/renderer_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/template/renderer_test.go b/libs/template/renderer_test.go index ffb17baae3..92133c5fea 100644 --- a/libs/template/renderer_test.go +++ b/libs/template/renderer_test.go @@ -110,7 +110,7 @@ func TestPrepareBuiltInTemplatesWithRelativePaths(t *testing.T) { func TestBuiltinPythonTemplateValid(t *testing.T) { // Test option combinations - options := []string{"yes"} + options := []string{"yes", "no"} isServicePrincipal := false catalog := "hive_metastore" cachedCatalog = &catalog From a448c7f2387cfd1939bd942491b2e49aced80dac Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Tue, 17 Sep 2024 18:37:17 +0200 Subject: [PATCH 4/7] - --- bundle/deploy/terraform/convert.go | 5 +++ bundle/deploy/terraform/tfdyn/convert_job.go | 31 +++++++++++++------ .../terraform/tfdyn/convert_job_test.go | 6 ++++ 3 files changed, 33 insertions(+), 9 deletions(-) diff --git a/bundle/deploy/terraform/convert.go b/bundle/deploy/terraform/convert.go index f13c241cee..6992a630d6 100644 --- a/bundle/deploy/terraform/convert.go +++ b/bundle/deploy/terraform/convert.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "fmt" + "sort" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/resources" @@ -82,6 +83,10 @@ func BundleToTerraform(config *config.Root) *schema.Root { conv(src, &dst) if src.JobSettings != nil { + sort.Slice(src.JobSettings.Tasks, func(i, j int) bool { + return src.JobSettings.Tasks[i].TaskKey < src.JobSettings.Tasks[j].TaskKey + }) + for _, v := range src.Tasks { var t schema.ResourceJobTask conv(v, &t) diff --git a/bundle/deploy/terraform/tfdyn/convert_job.go b/bundle/deploy/terraform/tfdyn/convert_job.go index a29c9cc1ec..dbf56f84cc 100644 --- a/bundle/deploy/terraform/tfdyn/convert_job.go +++ b/bundle/deploy/terraform/tfdyn/convert_job.go @@ -25,15 +25,28 @@ func convertJobResource(ctx context.Context, vin dyn.Value) (dyn.Value, error) { // recreates. For more details see the NOTE at // https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/job#example-usage // and https://github.com/databricks/terraform-provider-databricks/issues/4011 - tasks := vin.Get("tasks").MustSequence() - sort.Slice(tasks, func(i, j int) bool { - return tasks[i].Get("task_key").MustString() < tasks[j].Get("task_key").MustString() - }) - vout, err := dyn.Map(vin, "tasks", func(_ dyn.Path, _ dyn.Value) (dyn.Value, error) { - return dyn.V(tasks), nil - }) - if err != nil { - return dyn.InvalidValue, err + // TODO: Is this safe for nil values of task key? Empty strings? + vout := vin + var err error + tasks, ok := vin.Get("tasks").AsSequence() + if ok { + sort.Slice(tasks, func(i, j int) bool { + tk1, ok := tasks[i].Get("task_key").AsString() + if !ok { + return true + } + tk2, ok := tasks[j].Get("task_key").AsString() + if !ok { + return false + } + return tk1 < tk2 + }) + vout, err = dyn.Map(vin, "tasks", func(_ dyn.Path, _ dyn.Value) (dyn.Value, error) { + return dyn.V(tasks), nil + }) + if err != nil { + return dyn.InvalidValue, err + } } // Modify top-level keys. diff --git a/bundle/deploy/terraform/tfdyn/convert_job_test.go b/bundle/deploy/terraform/tfdyn/convert_job_test.go index 57da750b7f..695b9ba249 100644 --- a/bundle/deploy/terraform/tfdyn/convert_job_test.go +++ b/bundle/deploy/terraform/tfdyn/convert_job_test.go @@ -63,6 +63,9 @@ func TestConvertJob(t *testing.T) { TaskKey: "task_key_c", JobClusterKey: "job_cluster_key_c", }, + { + Description: "missing task key 😱", + }, }, }, Permissions: []resources.Permission{ @@ -107,6 +110,9 @@ func TestConvertJob(t *testing.T) { }, }, "task": []any{ + map[string]any{ + "description": "missing task key 😱", + }, map[string]any{ "task_key": "task_key_a", "job_cluster_key": "job_cluster_key_a", From f4f0e19263d170faa36ec4afc2189c0c002f205f Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Tue, 17 Sep 2024 18:39:59 +0200 Subject: [PATCH 5/7] - --- bundle/deploy/terraform/tfdyn/convert_job.go | 1 - 1 file changed, 1 deletion(-) diff --git a/bundle/deploy/terraform/tfdyn/convert_job.go b/bundle/deploy/terraform/tfdyn/convert_job.go index dbf56f84cc..d841d0f373 100644 --- a/bundle/deploy/terraform/tfdyn/convert_job.go +++ b/bundle/deploy/terraform/tfdyn/convert_job.go @@ -25,7 +25,6 @@ func convertJobResource(ctx context.Context, vin dyn.Value) (dyn.Value, error) { // recreates. For more details see the NOTE at // https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/job#example-usage // and https://github.com/databricks/terraform-provider-databricks/issues/4011 - // TODO: Is this safe for nil values of task key? Empty strings? vout := vin var err error tasks, ok := vin.Get("tasks").AsSequence() From 9d861aa922b5c13ff14381c2fa552d54829c3130 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Wed, 18 Sep 2024 13:27:29 +0200 Subject: [PATCH 6/7] feedback --- bundle/deploy/terraform/tfdyn/convert_job.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/bundle/deploy/terraform/tfdyn/convert_job.go b/bundle/deploy/terraform/tfdyn/convert_job.go index d841d0f373..444a27c739 100644 --- a/bundle/deploy/terraform/tfdyn/convert_job.go +++ b/bundle/deploy/terraform/tfdyn/convert_job.go @@ -40,9 +40,7 @@ func convertJobResource(ctx context.Context, vin dyn.Value) (dyn.Value, error) { } return tk1 < tk2 }) - vout, err = dyn.Map(vin, "tasks", func(_ dyn.Path, _ dyn.Value) (dyn.Value, error) { - return dyn.V(tasks), nil - }) + vout, err = dyn.Set(vin, "tasks", dyn.V(tasks)) if err != nil { return dyn.InvalidValue, err } From 9ffc417ebd9b78e3b1b1a064131824cf4edbff31 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 26 Sep 2024 15:13:59 +0200 Subject: [PATCH 7/7] address comments --- bundle/deploy/terraform/tfdyn/convert_job.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bundle/deploy/terraform/tfdyn/convert_job.go b/bundle/deploy/terraform/tfdyn/convert_job.go index 444a27c739..8948e3bafd 100644 --- a/bundle/deploy/terraform/tfdyn/convert_job.go +++ b/bundle/deploy/terraform/tfdyn/convert_job.go @@ -25,11 +25,15 @@ func convertJobResource(ctx context.Context, vin dyn.Value) (dyn.Value, error) { // recreates. For more details see the NOTE at // https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/job#example-usage // and https://github.com/databricks/terraform-provider-databricks/issues/4011 + // and https://github.com/databricks/cli/pull/1776 vout := vin var err error tasks, ok := vin.Get("tasks").AsSequence() if ok { sort.Slice(tasks, func(i, j int) bool { + // We sort the tasks by their task key. Tasks without task keys are ordered + // before tasks with task keys. We do not error for those tasks + // since presence of a task_key is validated for in the Jobs backend. tk1, ok := tasks[i].Get("task_key").AsString() if !ok { return true