Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NEXT_CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,6 @@
### Dependency updates

### Bundles
* Add new Lakeflow Pipelines support for bundle generate ([#3568](https://github.com/databricks/cli/pull/3568))

### API Changes
5 changes: 5 additions & 0 deletions acceptance/bundle/generate/lakeflow_pipelines/out.test.toml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
resources:
pipelines:
out:
name: lakeflow-pipeline
libraries:
- glob:
include: ../pipeline/transformations/**
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This might be something for a followup, but pipeline source code should be in the same folder as the pipeline definiton for Lakeflow. So this should always say transformations/**. See also #3558.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is configured by use of --config-dir and --source-dir flags in bundle generate command and for this specific test I set them up in such a way that it also tests that all complex relative paths are resolved correctly.

This is not an example of how configuration should look like though

- glob:
include: /Workspace/Users/foo@databricks.com/another/**
root_path: ../pipeline
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar to the above, the .yml file should be included in the root_path, so this should actually say . or maybe for clarity ".".

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
print("Hello, World!")
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
print("Hello, World!")
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
print("Hello, World!")
4 changes: 4 additions & 0 deletions acceptance/bundle/generate/lakeflow_pipelines/output.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Pipeline configuration successfully saved to out/config/out.pipeline.yml
File successfully saved to out/pipeline/explorations/1.py
File successfully saved to out/pipeline/transformations/1.py
File successfully saved to out/pipeline/transformations/2.py
16 changes: 16 additions & 0 deletions acceptance/bundle/generate/lakeflow_pipelines/pipeline.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"name": "lakeflow-pipeline",
"libraries": [
{
"glob": {
"include": "/Workspace/Users/tester@databricks.com/lakeflow_pipeline/transformations/**"
}
},
{
"glob": {
"include": "/Workspace/Users/foo@databricks.com/another/**"
}
}
],
"root_path": "/Workspace/Users/tester@databricks.com/lakeflow_pipeline"
}
9 changes: 9 additions & 0 deletions acceptance/bundle/generate/lakeflow_pipelines/script
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
export PIPELINE_ID=$($CLI pipelines create --json @pipeline.json | jq -r .pipeline_id)

$CLI bundle generate pipeline --existing-pipeline-id ${PIPELINE_ID} --config-dir out/config --key out --source-dir out/pipeline > out.stdout 2> out.stderr

# Combine stdout and stderr, then sort only the "File successfully saved" lines
cat out.stdout out.stderr > out.txt
grep -v "^File successfully saved" out.txt
grep "^File successfully saved" out.txt | sort
rm out.txt out.stdout out.stderr
37 changes: 37 additions & 0 deletions acceptance/bundle/generate/lakeflow_pipelines/test.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
[[Server]]
Pattern = "GET /api/2.0/workspace/export"
Response.Body = '''
print("Hello, World!")
'''

[[Repls]]
Old = '\\'
New = '/'

[[Server]]
Pattern = "GET /api/2.0/workspace/list"
Response.Body = '''
{
"objects": [
{
"path": "/Workspace/Users/tester@databricks.com/lakeflow_pipeline/transformations/1.py",
"object_type": "FILE"
},
{
"path": "/Workspace/Users/tester@databricks.com/lakeflow_pipeline/transformations/2.py",
"object_type": "FILE"
},
{
"path": "/Workspace/Users/tester@databricks.com/lakeflow_pipeline/explorations/1.py",
"object_type": "FILE"
}
]
}
'''

[[Server]]
Pattern = "GET /api/2.0/workspace/get-status"
Response.Body = '''
{
}
'''
18 changes: 16 additions & 2 deletions bundle/generate/pipeline.go
Original file line number Diff line number Diff line change
@@ -1,16 +1,30 @@
package generate

import (
"strings"

"github.com/databricks/cli/libs/dyn"
"github.com/databricks/cli/libs/dyn/yamlsaver"
"github.com/databricks/databricks-sdk-go/service/pipelines"
)

var pipelineOrder = yamlsaver.NewOrder([]string{"name", "clusters", "configuration", "libraries"})

func ConvertPipelineToValue(pipeline *pipelines.PipelineSpec) (dyn.Value, error) {
value := make(map[string]dyn.Value)
func ConvertPipelineToValue(pipeline *pipelines.PipelineSpec, rootPath, remoteRootPath string) (dyn.Value, error) {
if pipeline.RootPath != "" {
pipeline.RootPath = rootPath
}

if pipeline.Libraries != nil {
for i := range pipeline.Libraries {
lib := &pipeline.Libraries[i]
if lib.Glob != nil {
lib.Glob.Include = strings.ReplaceAll(lib.Glob.Include, remoteRootPath, rootPath)
}
}
}

value := make(map[string]dyn.Value)
// We ignore the following fields:
// - id: this is a read-only field
// - storage: changes to this field are rare because changing the storage recreates pipeline-related resources
Expand Down
17 changes: 16 additions & 1 deletion cmd/bundle/generate/pipeline.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,22 @@ like catalogs, schemas, and compute configurations per target.`,
}
}

v, err := generate.ConvertPipelineToValue(pipeline.Spec)
// If the root path is set, we need to download the files from the root path
remoteRootPath := pipeline.Spec.RootPath
if pipeline.Spec.RootPath != "" {
err := downloader.MarkDirectoryForDownload(ctx, &pipeline.Spec.RootPath)
if err != nil {
return err
}
}

// Making sure the root path is relative to the config directory.
rel, err := filepath.Rel(configDir, sourceDir)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we add an test case for this error as well?

if err != nil {
return err
}

v, err := generate.ConvertPipelineToValue(pipeline.Spec, filepath.ToSlash(rel), remoteRootPath)
if err != nil {
return err
}
Expand Down
Loading