From 0f76c516b4a6df04166c8ea7665ca2cbec2e8c78 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 23 Aug 2023 14:17:28 +0200 Subject: [PATCH 1/5] Added run_as section for bundle configuration --- bundle/config/mutator/mutator.go | 14 ++++---- bundle/config/mutator/run_as.go | 49 +++++++++++++++++++++++++++ bundle/config/root.go | 9 +++++ bundle/config/target.go | 4 +++ bundle/tests/loader.go | 13 +++---- bundle/tests/run_as/databricks.yml | 37 ++++++++++++++++++++ bundle/tests/run_as_test.go | 54 ++++++++++++++++++++++++++++++ 7 files changed, 168 insertions(+), 12 deletions(-) create mode 100644 bundle/config/mutator/run_as.go create mode 100644 bundle/tests/run_as/databricks.yml create mode 100644 bundle/tests/run_as_test.go diff --git a/bundle/config/mutator/mutator.go b/bundle/config/mutator/mutator.go index ff1f96f50e..af2a00f6af 100644 --- a/bundle/config/mutator/mutator.go +++ b/bundle/config/mutator/mutator.go @@ -4,14 +4,16 @@ import ( "github.com/databricks/cli/bundle" ) +var defaultMutators []bundle.Mutator = []bundle.Mutator{ + ProcessRootIncludes(), + DefineDefaultTarget(), + LoadGitDetails(), +} + func DefaultMutators() []bundle.Mutator { - return []bundle.Mutator{ - ProcessRootIncludes(), - DefineDefaultTarget(), - LoadGitDetails(), - } + return append(defaultMutators, SetRunAs()) } func DefaultMutatorsForTarget(env string) []bundle.Mutator { - return append(DefaultMutators(), SelectTarget(env)) + return append(defaultMutators, SelectTarget(env), SetRunAs()) } diff --git a/bundle/config/mutator/run_as.go b/bundle/config/mutator/run_as.go new file mode 100644 index 0000000000..42df3071dd --- /dev/null +++ b/bundle/config/mutator/run_as.go @@ -0,0 +1,49 @@ +package mutator + +import ( + "context" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/databricks-sdk-go/service/jobs" +) + +type setRunAs struct { +} + +func SetRunAs() bundle.Mutator { + return &setRunAs{} +} + +func (m *setRunAs) Name() string { + return "SetRunAs" +} + +func (m *setRunAs) Apply(_ context.Context, b *bundle.Bundle) error { + runAs := b.Config.RunAs + if runAs == nil { + return nil + } + + for i := range b.Config.Resources.Jobs { + job := b.Config.Resources.Jobs[i] + if job.RunAs != nil { + continue + } + job.RunAs = &jobs.JobRunAs{ + ServicePrincipalName: runAs.ServicePrincipalName, + UserName: runAs.UserName, + } + } + + for i := range b.Config.Resources.Pipelines { + pipeline := b.Config.Resources.Pipelines[i] + pipeline.Permissions = append(pipeline.Permissions, resources.Permission{ + Level: "IS_OWNER", + ServicePrincipalName: runAs.ServicePrincipalName, + UserName: runAs.UserName, + }) + } + + return nil +} diff --git a/bundle/config/root.go b/bundle/config/root.go index e0d20425b2..3b8f2d5272 100644 --- a/bundle/config/root.go +++ b/bundle/config/root.go @@ -7,6 +7,7 @@ import ( "strings" "github.com/databricks/cli/bundle/config/variable" + "github.com/databricks/databricks-sdk-go/service/jobs" "github.com/ghodss/yaml" "github.com/imdario/mergo" ) @@ -80,6 +81,9 @@ type Root struct { // Sync section specifies options for files synchronization Sync Sync `json:"sync"` + + // RunAs section allows to define an execution identity for jobs and pipelines runs + RunAs *jobs.JobRunAs `json:"run_as,omitempty"` } func Load(path string) (*Root, error) { @@ -237,6 +241,11 @@ func (r *Root) MergeTargetOverrides(target *Target) error { } } + if target.RunAs != nil { + r.RunAs = target.RunAs + fmt.Println("Merging run as") + } + if target.Mode != "" { r.Bundle.Mode = target.Mode } diff --git a/bundle/config/target.go b/bundle/config/target.go index 10775049dc..6a45fdb851 100644 --- a/bundle/config/target.go +++ b/bundle/config/target.go @@ -1,5 +1,7 @@ package config +import "github.com/databricks/databricks-sdk-go/service/jobs" + type Mode string // Target defines overrides for a single target. @@ -31,6 +33,8 @@ type Target struct { Variables map[string]string `json:"variables,omitempty"` Git Git `json:"git,omitempty"` + + RunAs *jobs.JobRunAs `json:"run_as,omitempty"` } const ( diff --git a/bundle/tests/loader.go b/bundle/tests/loader.go index f23b107649..b500e98699 100644 --- a/bundle/tests/loader.go +++ b/bundle/tests/loader.go @@ -9,18 +9,19 @@ import ( "github.com/stretchr/testify/require" ) -func load(t *testing.T, path string) *bundle.Bundle { +func loadBundle(t *testing.T, path string, mutators []bundle.Mutator) *bundle.Bundle { ctx := context.Background() b, err := bundle.Load(ctx, path) require.NoError(t, err) - err = bundle.Apply(ctx, b, bundle.Seq(mutator.DefaultMutators()...)) + err = bundle.Apply(ctx, b, bundle.Seq(mutators...)) require.NoError(t, err) return b } +func load(t *testing.T, path string) *bundle.Bundle { + return loadBundle(t, path, mutator.DefaultMutators()) +} + func loadTarget(t *testing.T, path, env string) *bundle.Bundle { - b := load(t, path) - err := bundle.Apply(context.Background(), b, mutator.SelectTarget(env)) - require.NoError(t, err) - return b + return loadBundle(t, path, mutator.DefaultMutatorsForTarget(env)) } diff --git a/bundle/tests/run_as/databricks.yml b/bundle/tests/run_as/databricks.yml new file mode 100644 index 0000000000..e1d10be9c8 --- /dev/null +++ b/bundle/tests/run_as/databricks.yml @@ -0,0 +1,37 @@ +bundle: + name: "run_as" + +run_as: + service_principal_name: "my_service_principal" + +targets: + development: + mode: development + run_as: + user_name: "my_user_name" + +resources: + pipelines: + nyc_taxi_pipeline: + name: "nyc taxi loader" + libraries: + - notebook: + path: ./dlt/nyc_taxi_loader + jobs: + job_one: + name: Job One + tasks: + - task: + notebook_path: "./test.py" + job_two: + name: Job Two + tasks: + - task: + notebook_path: "./test.py" + job_three: + name: Job Three + run_as: + service_principal_name: "my_service_principal_for_job" + tasks: + - task: + notebook_path: "./test.py" diff --git a/bundle/tests/run_as_test.go b/bundle/tests/run_as_test.go new file mode 100644 index 0000000000..dad4fa2d55 --- /dev/null +++ b/bundle/tests/run_as_test.go @@ -0,0 +1,54 @@ +package config_tests + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestRunAsDefault(t *testing.T) { + b := load(t, "./run_as") + assert.Len(t, b.Config.Resources.Jobs, 3) + jobs := b.Config.Resources.Jobs + + assert.NotNil(t, jobs["job_one"].RunAs) + assert.Equal(t, "my_service_principal", jobs["job_one"].RunAs.ServicePrincipalName) + assert.Equal(t, "", jobs["job_one"].RunAs.UserName) + + assert.NotNil(t, jobs["job_two"].RunAs) + assert.Equal(t, "my_service_principal", jobs["job_two"].RunAs.ServicePrincipalName) + assert.Equal(t, "", jobs["job_two"].RunAs.UserName) + + assert.NotNil(t, jobs["job_three"].RunAs) + assert.Equal(t, "my_service_principal_for_job", jobs["job_three"].RunAs.ServicePrincipalName) + assert.Equal(t, "", jobs["job_three"].RunAs.UserName) + + pipelines := b.Config.Resources.Pipelines + assert.NotNil(t, pipelines["nyc_taxi_pipeline"].Permissions) + assert.Equal(t, pipelines["nyc_taxi_pipeline"].Permissions[0].Level, "IS_OWNER") + assert.Equal(t, pipelines["nyc_taxi_pipeline"].Permissions[0].ServicePrincipalName, "my_service_principal") +} + +func TestRunAsDevelopment(t *testing.T) { + b := loadTarget(t, "./run_as", "development") + assert.Len(t, b.Config.Resources.Jobs, 3) + + jobs := b.Config.Resources.Jobs + + assert.NotNil(t, jobs["job_one"].RunAs) + assert.Equal(t, "", jobs["job_one"].RunAs.ServicePrincipalName) + assert.Equal(t, "my_user_name", jobs["job_one"].RunAs.UserName) + + assert.NotNil(t, jobs["job_two"].RunAs) + assert.Equal(t, "", jobs["job_two"].RunAs.ServicePrincipalName) + assert.Equal(t, "my_user_name", jobs["job_two"].RunAs.UserName) + + assert.NotNil(t, jobs["job_three"].RunAs) + assert.Equal(t, "my_service_principal_for_job", jobs["job_three"].RunAs.ServicePrincipalName) + assert.Equal(t, "", jobs["job_three"].RunAs.UserName) + + pipelines := b.Config.Resources.Pipelines + assert.NotNil(t, pipelines["nyc_taxi_pipeline"].Permissions) + assert.Equal(t, pipelines["nyc_taxi_pipeline"].Permissions[0].Level, "IS_OWNER") + assert.Equal(t, pipelines["nyc_taxi_pipeline"].Permissions[0].UserName, "my_user_name") +} From b5e0f7507ac0985e60744930718d46388bb42905 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 23 Aug 2023 15:05:37 +0200 Subject: [PATCH 2/5] remove existing matched permissions --- bundle/config/mutator/run_as.go | 8 ++++++++ bundle/config/root.go | 1 - bundle/tests/run_as/databricks.yml | 5 +++++ bundle/tests/run_as_test.go | 18 ++++++++++++------ 4 files changed, 25 insertions(+), 7 deletions(-) diff --git a/bundle/config/mutator/run_as.go b/bundle/config/mutator/run_as.go index 42df3071dd..48d482d280 100644 --- a/bundle/config/mutator/run_as.go +++ b/bundle/config/mutator/run_as.go @@ -2,6 +2,7 @@ package mutator import ( "context" + "slices" "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config/resources" @@ -11,6 +12,9 @@ import ( type setRunAs struct { } +// SetRunAs mutator is used to go over defined resources such as Jobs and DLT Pipelines +// And set correct execution identity ("run_as" for a job or "is_owner" permission for DLT) +// if top-level "run-as" section is defined in the configuration. func SetRunAs() bundle.Mutator { return &setRunAs{} } @@ -38,6 +42,10 @@ func (m *setRunAs) Apply(_ context.Context, b *bundle.Bundle) error { for i := range b.Config.Resources.Pipelines { pipeline := b.Config.Resources.Pipelines[i] + pipeline.Permissions = slices.DeleteFunc(pipeline.Permissions, func(p resources.Permission) bool { + return (runAs.ServicePrincipalName != "" && p.ServicePrincipalName == runAs.ServicePrincipalName) || + (runAs.UserName != "" && p.UserName == runAs.UserName) + }) pipeline.Permissions = append(pipeline.Permissions, resources.Permission{ Level: "IS_OWNER", ServicePrincipalName: runAs.ServicePrincipalName, diff --git a/bundle/config/root.go b/bundle/config/root.go index 3b8f2d5272..1275dab481 100644 --- a/bundle/config/root.go +++ b/bundle/config/root.go @@ -243,7 +243,6 @@ func (r *Root) MergeTargetOverrides(target *Target) error { if target.RunAs != nil { r.RunAs = target.RunAs - fmt.Println("Merging run as") } if target.Mode != "" { diff --git a/bundle/tests/run_as/databricks.yml b/bundle/tests/run_as/databricks.yml index e1d10be9c8..18ea55736d 100644 --- a/bundle/tests/run_as/databricks.yml +++ b/bundle/tests/run_as/databricks.yml @@ -13,6 +13,11 @@ targets: resources: pipelines: nyc_taxi_pipeline: + permissions: + - level: CAN_VIEW + service_principal_name: my_service_principal + - level: CAN_VIEW + user_name: my_user_name name: "nyc taxi loader" libraries: - notebook: diff --git a/bundle/tests/run_as_test.go b/bundle/tests/run_as_test.go index dad4fa2d55..bc0af61edb 100644 --- a/bundle/tests/run_as_test.go +++ b/bundle/tests/run_as_test.go @@ -24,9 +24,12 @@ func TestRunAsDefault(t *testing.T) { assert.Equal(t, "", jobs["job_three"].RunAs.UserName) pipelines := b.Config.Resources.Pipelines - assert.NotNil(t, pipelines["nyc_taxi_pipeline"].Permissions) - assert.Equal(t, pipelines["nyc_taxi_pipeline"].Permissions[0].Level, "IS_OWNER") - assert.Equal(t, pipelines["nyc_taxi_pipeline"].Permissions[0].ServicePrincipalName, "my_service_principal") + assert.Len(t, pipelines["nyc_taxi_pipeline"].Permissions, 2) + assert.Equal(t, pipelines["nyc_taxi_pipeline"].Permissions[0].Level, "CAN_VIEW") + assert.Equal(t, pipelines["nyc_taxi_pipeline"].Permissions[0].UserName, "my_user_name") + + assert.Equal(t, pipelines["nyc_taxi_pipeline"].Permissions[1].Level, "IS_OWNER") + assert.Equal(t, pipelines["nyc_taxi_pipeline"].Permissions[1].ServicePrincipalName, "my_service_principal") } func TestRunAsDevelopment(t *testing.T) { @@ -48,7 +51,10 @@ func TestRunAsDevelopment(t *testing.T) { assert.Equal(t, "", jobs["job_three"].RunAs.UserName) pipelines := b.Config.Resources.Pipelines - assert.NotNil(t, pipelines["nyc_taxi_pipeline"].Permissions) - assert.Equal(t, pipelines["nyc_taxi_pipeline"].Permissions[0].Level, "IS_OWNER") - assert.Equal(t, pipelines["nyc_taxi_pipeline"].Permissions[0].UserName, "my_user_name") + assert.Len(t, pipelines["nyc_taxi_pipeline"].Permissions, 2) + assert.Equal(t, pipelines["nyc_taxi_pipeline"].Permissions[0].Level, "CAN_VIEW") + assert.Equal(t, pipelines["nyc_taxi_pipeline"].Permissions[0].ServicePrincipalName, "my_service_principal") + + assert.Equal(t, pipelines["nyc_taxi_pipeline"].Permissions[1].Level, "IS_OWNER") + assert.Equal(t, pipelines["nyc_taxi_pipeline"].Permissions[1].UserName, "my_user_name") } From d0de033e8d5abd14088e131e460c4caa0eb6760e Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 23 Aug 2023 16:19:31 +0200 Subject: [PATCH 3/5] do not allow current user in run_as section --- bundle/config/mutator/mutator.go | 14 ++++++-------- bundle/config/mutator/run_as.go | 6 ++++++ bundle/phases/initialize.go | 1 + bundle/tests/loader.go | 13 ++++++------- bundle/tests/run_as_test.go | 12 +++++++++++- 5 files changed, 30 insertions(+), 16 deletions(-) diff --git a/bundle/config/mutator/mutator.go b/bundle/config/mutator/mutator.go index af2a00f6af..ff1f96f50e 100644 --- a/bundle/config/mutator/mutator.go +++ b/bundle/config/mutator/mutator.go @@ -4,16 +4,14 @@ import ( "github.com/databricks/cli/bundle" ) -var defaultMutators []bundle.Mutator = []bundle.Mutator{ - ProcessRootIncludes(), - DefineDefaultTarget(), - LoadGitDetails(), -} - func DefaultMutators() []bundle.Mutator { - return append(defaultMutators, SetRunAs()) + return []bundle.Mutator{ + ProcessRootIncludes(), + DefineDefaultTarget(), + LoadGitDetails(), + } } func DefaultMutatorsForTarget(env string) []bundle.Mutator { - return append(defaultMutators, SelectTarget(env), SetRunAs()) + return append(DefaultMutators(), SelectTarget(env)) } diff --git a/bundle/config/mutator/run_as.go b/bundle/config/mutator/run_as.go index 48d482d280..d2c293eec7 100644 --- a/bundle/config/mutator/run_as.go +++ b/bundle/config/mutator/run_as.go @@ -2,6 +2,7 @@ package mutator import ( "context" + "fmt" "slices" "github.com/databricks/cli/bundle" @@ -29,6 +30,11 @@ func (m *setRunAs) Apply(_ context.Context, b *bundle.Bundle) error { return nil } + me := b.Config.Workspace.CurrentUser.UserName + if (runAs.UserName == me || runAs.ServicePrincipalName == me) && len(b.Config.Resources.Pipelines) > 0 { + return fmt.Errorf("it's not allowed to define current user (%s) as identity in 'run_as' section if there are DLT pipelines defined", me) + } + for i := range b.Config.Resources.Jobs { job := b.Config.Resources.Jobs[i] if job.RunAs != nil { diff --git a/bundle/phases/initialize.go b/bundle/phases/initialize.go index 219ec26cf9..546a8478bc 100644 --- a/bundle/phases/initialize.go +++ b/bundle/phases/initialize.go @@ -16,6 +16,7 @@ func Initialize() bundle.Mutator { "initialize", []bundle.Mutator{ mutator.PopulateCurrentUser(), + mutator.SetRunAs(), mutator.DefineDefaultWorkspaceRoot(), mutator.ExpandWorkspaceRoot(), mutator.DefineDefaultWorkspacePaths(), diff --git a/bundle/tests/loader.go b/bundle/tests/loader.go index b500e98699..f23b107649 100644 --- a/bundle/tests/loader.go +++ b/bundle/tests/loader.go @@ -9,19 +9,18 @@ import ( "github.com/stretchr/testify/require" ) -func loadBundle(t *testing.T, path string, mutators []bundle.Mutator) *bundle.Bundle { +func load(t *testing.T, path string) *bundle.Bundle { ctx := context.Background() b, err := bundle.Load(ctx, path) require.NoError(t, err) - err = bundle.Apply(ctx, b, bundle.Seq(mutators...)) + err = bundle.Apply(ctx, b, bundle.Seq(mutator.DefaultMutators()...)) require.NoError(t, err) return b } -func load(t *testing.T, path string) *bundle.Bundle { - return loadBundle(t, path, mutator.DefaultMutators()) -} - func loadTarget(t *testing.T, path, env string) *bundle.Bundle { - return loadBundle(t, path, mutator.DefaultMutatorsForTarget(env)) + b := load(t, path) + err := bundle.Apply(context.Background(), b, mutator.SelectTarget(env)) + require.NoError(t, err) + return b } diff --git a/bundle/tests/run_as_test.go b/bundle/tests/run_as_test.go index bc0af61edb..b5acf89255 100644 --- a/bundle/tests/run_as_test.go +++ b/bundle/tests/run_as_test.go @@ -1,13 +1,20 @@ package config_tests import ( + "context" "testing" + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config/mutator" "github.com/stretchr/testify/assert" ) func TestRunAsDefault(t *testing.T) { b := load(t, "./run_as") + ctx := context.Background() + err := bundle.Apply(ctx, b, bundle.Seq(mutator.PopulateCurrentUser(), mutator.SetRunAs())) + assert.NoError(t, err) + assert.Len(t, b.Config.Resources.Jobs, 3) jobs := b.Config.Resources.Jobs @@ -34,8 +41,11 @@ func TestRunAsDefault(t *testing.T) { func TestRunAsDevelopment(t *testing.T) { b := loadTarget(t, "./run_as", "development") - assert.Len(t, b.Config.Resources.Jobs, 3) + ctx := context.Background() + err := bundle.Apply(ctx, b, bundle.Seq(mutator.PopulateCurrentUser(), mutator.SetRunAs())) + assert.NoError(t, err) + assert.Len(t, b.Config.Resources.Jobs, 3) jobs := b.Config.Resources.Jobs assert.NotNil(t, jobs["job_one"].RunAs) From 6bb5f9c7f32dcb54f98f309f2900e4104f403191 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 23 Aug 2023 16:25:28 +0200 Subject: [PATCH 4/5] fixed test --- bundle/tests/run_as_test.go | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/bundle/tests/run_as_test.go b/bundle/tests/run_as_test.go index b5acf89255..44c068165a 100644 --- a/bundle/tests/run_as_test.go +++ b/bundle/tests/run_as_test.go @@ -5,14 +5,21 @@ import ( "testing" "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/mutator" + "github.com/databricks/databricks-sdk-go/service/iam" "github.com/stretchr/testify/assert" ) func TestRunAsDefault(t *testing.T) { b := load(t, "./run_as") + b.Config.Workspace.CurrentUser = &config.User{ + User: &iam.User{ + UserName: "jane@doe.com", + }, + } ctx := context.Background() - err := bundle.Apply(ctx, b, bundle.Seq(mutator.PopulateCurrentUser(), mutator.SetRunAs())) + err := bundle.Apply(ctx, b, mutator.SetRunAs()) assert.NoError(t, err) assert.Len(t, b.Config.Resources.Jobs, 3) @@ -41,8 +48,13 @@ func TestRunAsDefault(t *testing.T) { func TestRunAsDevelopment(t *testing.T) { b := loadTarget(t, "./run_as", "development") + b.Config.Workspace.CurrentUser = &config.User{ + User: &iam.User{ + UserName: "jane@doe.com", + }, + } ctx := context.Background() - err := bundle.Apply(ctx, b, bundle.Seq(mutator.PopulateCurrentUser(), mutator.SetRunAs())) + err := bundle.Apply(ctx, b, mutator.SetRunAs()) assert.NoError(t, err) assert.Len(t, b.Config.Resources.Jobs, 3) From 6884e2c780caf27ffec6fd32b588a61eb974a581 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 23 Aug 2023 17:46:07 +0200 Subject: [PATCH 5/5] do not add current owner as an user --- bundle/config/mutator/run_as.go | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/bundle/config/mutator/run_as.go b/bundle/config/mutator/run_as.go index d2c293eec7..7d1a491753 100644 --- a/bundle/config/mutator/run_as.go +++ b/bundle/config/mutator/run_as.go @@ -2,7 +2,6 @@ package mutator import ( "context" - "fmt" "slices" "github.com/databricks/cli/bundle" @@ -30,11 +29,6 @@ func (m *setRunAs) Apply(_ context.Context, b *bundle.Bundle) error { return nil } - me := b.Config.Workspace.CurrentUser.UserName - if (runAs.UserName == me || runAs.ServicePrincipalName == me) && len(b.Config.Resources.Pipelines) > 0 { - return fmt.Errorf("it's not allowed to define current user (%s) as identity in 'run_as' section if there are DLT pipelines defined", me) - } - for i := range b.Config.Resources.Jobs { job := b.Config.Resources.Jobs[i] if job.RunAs != nil { @@ -46,6 +40,14 @@ func (m *setRunAs) Apply(_ context.Context, b *bundle.Bundle) error { } } + me := b.Config.Workspace.CurrentUser.UserName + // If user deploying the bundle and the one defined in run_as are the same + // Do not add IS_OWNER permission. Current user is implied to be an owner in this case. + // Otherwise, it will fail due to this bug https://github.com/databricks/terraform-provider-databricks/issues/2407 + if runAs.UserName == me || runAs.ServicePrincipalName == me { + return nil + } + for i := range b.Config.Resources.Pipelines { pipeline := b.Config.Resources.Pipelines[i] pipeline.Permissions = slices.DeleteFunc(pipeline.Permissions, func(p resources.Permission) bool {