From 3ab37b3b1c60a656a2f730544aa253846988574a Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Mon, 8 Sep 2025 14:18:51 +0200 Subject: [PATCH 1/7] Added new Lakeflow Pipelines support for bundle generate --- .../generate/lakeflow_pipelines/out.test.toml | 5 ++++ .../out/config/out.pipeline.yml | 10 +++++++ .../out/pipeline/explorations/1.py | 1 + .../out/pipeline/transformations/1.py | 1 + .../out/pipeline/transformations/2.py | 1 + .../generate/lakeflow_pipelines/output.txt | 4 +++ .../generate/lakeflow_pipelines/pipeline.json | 16 ++++++++++ .../bundle/generate/lakeflow_pipelines/script | 14 +++++++++ .../generate/lakeflow_pipelines/test.toml | 30 +++++++++++++++++++ bundle/generate/pipeline.go | 18 +++++++++-- cmd/bundle/generate/pipeline.go | 17 ++++++++++- 11 files changed, 114 insertions(+), 3 deletions(-) create mode 100644 acceptance/bundle/generate/lakeflow_pipelines/out.test.toml create mode 100644 acceptance/bundle/generate/lakeflow_pipelines/out/config/out.pipeline.yml create mode 100644 acceptance/bundle/generate/lakeflow_pipelines/out/pipeline/explorations/1.py create mode 100644 acceptance/bundle/generate/lakeflow_pipelines/out/pipeline/transformations/1.py create mode 100644 acceptance/bundle/generate/lakeflow_pipelines/out/pipeline/transformations/2.py create mode 100644 acceptance/bundle/generate/lakeflow_pipelines/output.txt create mode 100644 acceptance/bundle/generate/lakeflow_pipelines/pipeline.json create mode 100644 acceptance/bundle/generate/lakeflow_pipelines/script create mode 100644 acceptance/bundle/generate/lakeflow_pipelines/test.toml diff --git a/acceptance/bundle/generate/lakeflow_pipelines/out.test.toml b/acceptance/bundle/generate/lakeflow_pipelines/out.test.toml new file mode 100644 index 0000000000..8f3575be7b --- /dev/null +++ b/acceptance/bundle/generate/lakeflow_pipelines/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_CLI_DEPLOYMENT = ["terraform", "direct-exp"] diff --git a/acceptance/bundle/generate/lakeflow_pipelines/out/config/out.pipeline.yml b/acceptance/bundle/generate/lakeflow_pipelines/out/config/out.pipeline.yml new file mode 100644 index 0000000000..479fe57f00 --- /dev/null +++ b/acceptance/bundle/generate/lakeflow_pipelines/out/config/out.pipeline.yml @@ -0,0 +1,10 @@ +resources: + pipelines: + out: + name: lakeflow-pipeline + libraries: + - glob: + include: ../pipeline/transformations/** + - glob: + include: /Workspace/Users/foo@databricks.com/another/** + root_path: ../pipeline diff --git a/acceptance/bundle/generate/lakeflow_pipelines/out/pipeline/explorations/1.py b/acceptance/bundle/generate/lakeflow_pipelines/out/pipeline/explorations/1.py new file mode 100644 index 0000000000..7df869a15e --- /dev/null +++ b/acceptance/bundle/generate/lakeflow_pipelines/out/pipeline/explorations/1.py @@ -0,0 +1 @@ +print("Hello, World!") diff --git a/acceptance/bundle/generate/lakeflow_pipelines/out/pipeline/transformations/1.py b/acceptance/bundle/generate/lakeflow_pipelines/out/pipeline/transformations/1.py new file mode 100644 index 0000000000..7df869a15e --- /dev/null +++ b/acceptance/bundle/generate/lakeflow_pipelines/out/pipeline/transformations/1.py @@ -0,0 +1 @@ +print("Hello, World!") diff --git a/acceptance/bundle/generate/lakeflow_pipelines/out/pipeline/transformations/2.py b/acceptance/bundle/generate/lakeflow_pipelines/out/pipeline/transformations/2.py new file mode 100644 index 0000000000..7df869a15e --- /dev/null +++ b/acceptance/bundle/generate/lakeflow_pipelines/out/pipeline/transformations/2.py @@ -0,0 +1 @@ +print("Hello, World!") diff --git a/acceptance/bundle/generate/lakeflow_pipelines/output.txt b/acceptance/bundle/generate/lakeflow_pipelines/output.txt new file mode 100644 index 0000000000..b4c4e3d60a --- /dev/null +++ b/acceptance/bundle/generate/lakeflow_pipelines/output.txt @@ -0,0 +1,4 @@ +Pipeline configuration successfully saved to out/config/out.pipeline.yml +File successfully saved to out/pipeline/explorations/1.py +File successfully saved to out/pipeline/transformations/1.py +File successfully saved to out/pipeline/transformations/2.py diff --git a/acceptance/bundle/generate/lakeflow_pipelines/pipeline.json b/acceptance/bundle/generate/lakeflow_pipelines/pipeline.json new file mode 100644 index 0000000000..0254d7b549 --- /dev/null +++ b/acceptance/bundle/generate/lakeflow_pipelines/pipeline.json @@ -0,0 +1,16 @@ +{ + "name": "lakeflow-pipeline", + "libraries": [ + { + "glob": { + "include": "/Workspace/Users/tester@databricks.com/lakeflow_pipeline/transformations/**" + } + }, + { + "glob": { + "include": "/Workspace/Users/foo@databricks.com/another/**" + } + } + ], + "root_path": "/Workspace/Users/tester@databricks.com/lakeflow_pipeline" +} diff --git a/acceptance/bundle/generate/lakeflow_pipelines/script b/acceptance/bundle/generate/lakeflow_pipelines/script new file mode 100644 index 0000000000..f348338110 --- /dev/null +++ b/acceptance/bundle/generate/lakeflow_pipelines/script @@ -0,0 +1,14 @@ +export PIPELINE_ID=$($CLI pipelines create --json @pipeline.json | jq -r .pipeline_id) + +# content is base64 encoded "print('Hello, World!')" +$CLI workspace import /Workspace/Users/tester@databricks.com/lakeflow_pipeline/transformations/1.py --content "cHJpbnQoJ0hlbGxvLCBXb3JsZCAxIScp" --format AUTO +$CLI workspace import /Workspace/Users/tester@databricks.com/lakeflow_pipeline/transformations/2.py --content "cHJpbnQoJ0hlbGxvLCBXb3JsZCAxIScp" --format AUTO +$CLI workspace import /Workspace/Users/tester@databricks.com/lakeflow_pipeline/explorations/1.py --content "cHJpbnQoJ0hlbGxvLCBXb3JsZCAxIScp" --format AUTO + +$CLI bundle generate pipeline --existing-pipeline-id ${PIPELINE_ID} --config-dir out/config --key out --source-dir out/pipeline > out.stdout 2> out.stderr + +# Combine stdout and stderr, then sort only the "File successfully saved" lines +cat out.stdout out.stderr > out.txt +grep -v "^File successfully saved" out.txt +grep "^File successfully saved" out.txt | sort +rm out.txt out.stdout out.stderr diff --git a/acceptance/bundle/generate/lakeflow_pipelines/test.toml b/acceptance/bundle/generate/lakeflow_pipelines/test.toml new file mode 100644 index 0000000000..8cfe423a86 --- /dev/null +++ b/acceptance/bundle/generate/lakeflow_pipelines/test.toml @@ -0,0 +1,30 @@ +[[Server]] +Pattern = "GET /api/2.0/workspace/export" +Response.Body = ''' +print("Hello, World!") +''' + +[[Repls]] +Old = '\\' +New = '/' + +[[Server]] +Pattern = "GET /api/2.0/workspace/list" +Response.Body = ''' +{ + "objects": [ + { + "path": "/Workspace/Users/tester@databricks.com/lakeflow_pipeline/transformations/1.py", + "object_type": "FILE" + }, + { + "path": "/Workspace/Users/tester@databricks.com/lakeflow_pipeline/transformations/2.py", + "object_type": "FILE" + }, + { + "path": "/Workspace/Users/tester@databricks.com/lakeflow_pipeline/explorations/1.py", + "object_type": "FILE" + } + ] +} +''' diff --git a/bundle/generate/pipeline.go b/bundle/generate/pipeline.go index ba4aedfa02..7409ebf55f 100644 --- a/bundle/generate/pipeline.go +++ b/bundle/generate/pipeline.go @@ -1,6 +1,8 @@ package generate import ( + "strings" + "github.com/databricks/cli/libs/dyn" "github.com/databricks/cli/libs/dyn/yamlsaver" "github.com/databricks/databricks-sdk-go/service/pipelines" @@ -8,9 +10,21 @@ import ( var pipelineOrder = yamlsaver.NewOrder([]string{"name", "clusters", "configuration", "libraries"}) -func ConvertPipelineToValue(pipeline *pipelines.PipelineSpec) (dyn.Value, error) { - value := make(map[string]dyn.Value) +func ConvertPipelineToValue(pipeline *pipelines.PipelineSpec, rootPath string, remoteRootPath string) (dyn.Value, error) { + if pipeline.RootPath != "" { + pipeline.RootPath = rootPath + } + if pipeline.Libraries != nil { + for i := range pipeline.Libraries { + lib := &pipeline.Libraries[i] + if lib.Glob != nil { + lib.Glob.Include = strings.ReplaceAll(lib.Glob.Include, remoteRootPath, rootPath) + } + } + } + + value := make(map[string]dyn.Value) // We ignore the following fields: // - id: this is a read-only field // - storage: changes to this field are rare because changing the storage recreates pipeline-related resources diff --git a/cmd/bundle/generate/pipeline.go b/cmd/bundle/generate/pipeline.go index 9910efc32f..47030a24ec 100644 --- a/cmd/bundle/generate/pipeline.go +++ b/cmd/bundle/generate/pipeline.go @@ -80,7 +80,22 @@ like catalogs, schemas, and compute configurations per target.`, } } - v, err := generate.ConvertPipelineToValue(pipeline.Spec) + // If the root path is set, we need to download the files from the root path + remoteRootPath := pipeline.Spec.RootPath + if pipeline.Spec.RootPath != "" { + err := downloader.MarkDirectoryForDownload(ctx, &pipeline.Spec.RootPath) + if err != nil { + return err + } + } + + // Making sure the root path is relative to the config directory. + rel, err := filepath.Rel(configDir, sourceDir) + if err != nil { + return err + } + + v, err := generate.ConvertPipelineToValue(pipeline.Spec, filepath.ToSlash(rel), remoteRootPath) if err != nil { return err } From f9d332473d0dbaddef7bc7bb469a2e3a03fb862f Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Mon, 8 Sep 2025 15:15:22 +0200 Subject: [PATCH 2/7] fix fmt --- bundle/generate/pipeline.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bundle/generate/pipeline.go b/bundle/generate/pipeline.go index 7409ebf55f..c85e6f88b5 100644 --- a/bundle/generate/pipeline.go +++ b/bundle/generate/pipeline.go @@ -10,7 +10,7 @@ import ( var pipelineOrder = yamlsaver.NewOrder([]string{"name", "clusters", "configuration", "libraries"}) -func ConvertPipelineToValue(pipeline *pipelines.PipelineSpec, rootPath string, remoteRootPath string) (dyn.Value, error) { +func ConvertPipelineToValue(pipeline *pipelines.PipelineSpec, rootPath, remoteRootPath string) (dyn.Value, error) { if pipeline.RootPath != "" { pipeline.RootPath = rootPath } From 4e6d18a212e407ea1da779767f093315ec613572 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Tue, 9 Sep 2025 14:09:21 +0200 Subject: [PATCH 3/7] test windows --- libs/testserver/fake_workspace.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/testserver/fake_workspace.go b/libs/testserver/fake_workspace.go index 543f70ad6c..0c3812d1b7 100644 --- a/libs/testserver/fake_workspace.go +++ b/libs/testserver/fake_workspace.go @@ -205,7 +205,7 @@ func (s *FakeWorkspace) WorkspaceGetStatus(path string) Response { } else { return Response{ StatusCode: 404, - Body: map[string]string{"message": "Workspace path not found"}, + Body: map[string]string{"message": "Workspace path not found: " + path}, } } } From 70b7a04d0d6812e59390dc6ac20c5628d86d3b91 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Tue, 9 Sep 2025 14:23:05 +0200 Subject: [PATCH 4/7] fixed test --- acceptance/bundle/generate/lakeflow_pipelines/script | 5 ----- acceptance/bundle/generate/lakeflow_pipelines/test.toml | 7 +++++++ libs/testserver/fake_workspace.go | 2 +- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/acceptance/bundle/generate/lakeflow_pipelines/script b/acceptance/bundle/generate/lakeflow_pipelines/script index f348338110..f4b7257ecf 100644 --- a/acceptance/bundle/generate/lakeflow_pipelines/script +++ b/acceptance/bundle/generate/lakeflow_pipelines/script @@ -1,10 +1,5 @@ export PIPELINE_ID=$($CLI pipelines create --json @pipeline.json | jq -r .pipeline_id) -# content is base64 encoded "print('Hello, World!')" -$CLI workspace import /Workspace/Users/tester@databricks.com/lakeflow_pipeline/transformations/1.py --content "cHJpbnQoJ0hlbGxvLCBXb3JsZCAxIScp" --format AUTO -$CLI workspace import /Workspace/Users/tester@databricks.com/lakeflow_pipeline/transformations/2.py --content "cHJpbnQoJ0hlbGxvLCBXb3JsZCAxIScp" --format AUTO -$CLI workspace import /Workspace/Users/tester@databricks.com/lakeflow_pipeline/explorations/1.py --content "cHJpbnQoJ0hlbGxvLCBXb3JsZCAxIScp" --format AUTO - $CLI bundle generate pipeline --existing-pipeline-id ${PIPELINE_ID} --config-dir out/config --key out --source-dir out/pipeline > out.stdout 2> out.stderr # Combine stdout and stderr, then sort only the "File successfully saved" lines diff --git a/acceptance/bundle/generate/lakeflow_pipelines/test.toml b/acceptance/bundle/generate/lakeflow_pipelines/test.toml index 8cfe423a86..0b39960b4b 100644 --- a/acceptance/bundle/generate/lakeflow_pipelines/test.toml +++ b/acceptance/bundle/generate/lakeflow_pipelines/test.toml @@ -28,3 +28,10 @@ Response.Body = ''' ] } ''' + +[[Server]] +Pattern = "GET /api/2.0/workspace/get-status" +Response.Body = ''' +{ +} +''' diff --git a/libs/testserver/fake_workspace.go b/libs/testserver/fake_workspace.go index 0c3812d1b7..543f70ad6c 100644 --- a/libs/testserver/fake_workspace.go +++ b/libs/testserver/fake_workspace.go @@ -205,7 +205,7 @@ func (s *FakeWorkspace) WorkspaceGetStatus(path string) Response { } else { return Response{ StatusCode: 404, - Body: map[string]string{"message": "Workspace path not found: " + path}, + Body: map[string]string{"message": "Workspace path not found"}, } } } From 956c09e5f8a0f9ae87897c5bbb80daaf584c6ecb Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 17 Sep 2025 13:25:17 +0200 Subject: [PATCH 5/7] change to DATABRICKS_BUNDLE_ENGINE --- acceptance/bundle/generate/lakeflow_pipelines/out.test.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/acceptance/bundle/generate/lakeflow_pipelines/out.test.toml b/acceptance/bundle/generate/lakeflow_pipelines/out.test.toml index 8f3575be7b..e092fd5ed6 100644 --- a/acceptance/bundle/generate/lakeflow_pipelines/out.test.toml +++ b/acceptance/bundle/generate/lakeflow_pipelines/out.test.toml @@ -2,4 +2,4 @@ Local = true Cloud = false [EnvMatrix] - DATABRICKS_CLI_DEPLOYMENT = ["terraform", "direct-exp"] + DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct-exp"] From 9ddba93e7732aa2c53432b61dcae51da806b4ba9 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 17 Sep 2025 16:05:51 +0200 Subject: [PATCH 6/7] add changelog --- NEXT_CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index 867919c355..f699149a21 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -10,5 +10,6 @@ ### Bundles * Fix checkForPreventDestroy to check all resources if some does not have prevent_destroy set ([#3615](https://github.com/databricks/cli/pull/3615)) +* Add new Lakeflow Pipelines support for bundle generate ([#3568](https://github.com/databricks/cli/pull/3568)) ### API Changes From be9219d1918a8bae2a9461fba16e412aa6416aea Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 17 Sep 2025 16:08:23 +0200 Subject: [PATCH 7/7] Update NEXT_CHANGELOG.md with recent changes --- NEXT_CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index c2be2357d9..c1f7fd6bc9 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -9,7 +9,6 @@ ### Dependency updates ### Bundles -* Fix checkForPreventDestroy to check all resources if some does not have prevent_destroy set ([#3615](https://github.com/databricks/cli/pull/3615)) * Add new Lakeflow Pipelines support for bundle generate ([#3568](https://github.com/databricks/cli/pull/3568)) ### API Changes