-
Notifications
You must be signed in to change notification settings - Fork 154
Added new Lakeflow Pipelines support for bundle generate
#3568
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
3ab37b3
f9d3324
4e6d18a
70b7a04
c2a58e3
956c09e
9ddba93
01ad638
be9219d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,10 @@ | ||
| resources: | ||
| pipelines: | ||
| out: | ||
| name: lakeflow-pipeline | ||
| libraries: | ||
| - glob: | ||
| include: ../pipeline/transformations/** | ||
| - glob: | ||
| include: /Workspace/Users/foo@databricks.com/another/** | ||
| root_path: ../pipeline | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similar to the above, the .yml file should be included in the root_path, so this should actually say |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| print("Hello, World!") |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| print("Hello, World!") |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| print("Hello, World!") |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,4 @@ | ||
| Pipeline configuration successfully saved to out/config/out.pipeline.yml | ||
| File successfully saved to out/pipeline/explorations/1.py | ||
| File successfully saved to out/pipeline/transformations/1.py | ||
| File successfully saved to out/pipeline/transformations/2.py |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,16 @@ | ||
| { | ||
| "name": "lakeflow-pipeline", | ||
| "libraries": [ | ||
| { | ||
| "glob": { | ||
| "include": "/Workspace/Users/tester@databricks.com/lakeflow_pipeline/transformations/**" | ||
| } | ||
| }, | ||
| { | ||
| "glob": { | ||
| "include": "/Workspace/Users/foo@databricks.com/another/**" | ||
| } | ||
| } | ||
| ], | ||
| "root_path": "/Workspace/Users/tester@databricks.com/lakeflow_pipeline" | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,9 @@ | ||
| export PIPELINE_ID=$($CLI pipelines create --json @pipeline.json | jq -r .pipeline_id) | ||
|
|
||
| $CLI bundle generate pipeline --existing-pipeline-id ${PIPELINE_ID} --config-dir out/config --key out --source-dir out/pipeline > out.stdout 2> out.stderr | ||
|
|
||
| # Combine stdout and stderr, then sort only the "File successfully saved" lines | ||
| cat out.stdout out.stderr > out.txt | ||
| grep -v "^File successfully saved" out.txt | ||
| grep "^File successfully saved" out.txt | sort | ||
| rm out.txt out.stdout out.stderr |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,37 @@ | ||
| [[Server]] | ||
| Pattern = "GET /api/2.0/workspace/export" | ||
| Response.Body = ''' | ||
| print("Hello, World!") | ||
| ''' | ||
|
|
||
| [[Repls]] | ||
| Old = '\\' | ||
| New = '/' | ||
|
|
||
| [[Server]] | ||
| Pattern = "GET /api/2.0/workspace/list" | ||
| Response.Body = ''' | ||
| { | ||
| "objects": [ | ||
| { | ||
| "path": "/Workspace/Users/tester@databricks.com/lakeflow_pipeline/transformations/1.py", | ||
| "object_type": "FILE" | ||
| }, | ||
| { | ||
| "path": "/Workspace/Users/tester@databricks.com/lakeflow_pipeline/transformations/2.py", | ||
| "object_type": "FILE" | ||
| }, | ||
| { | ||
| "path": "/Workspace/Users/tester@databricks.com/lakeflow_pipeline/explorations/1.py", | ||
| "object_type": "FILE" | ||
| } | ||
| ] | ||
| } | ||
| ''' | ||
|
|
||
| [[Server]] | ||
| Pattern = "GET /api/2.0/workspace/get-status" | ||
| Response.Body = ''' | ||
| { | ||
| } | ||
| ''' |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -80,7 +80,22 @@ like catalogs, schemas, and compute configurations per target.`, | |
| } | ||
| } | ||
|
|
||
| v, err := generate.ConvertPipelineToValue(pipeline.Spec) | ||
| // If the root path is set, we need to download the files from the root path | ||
| remoteRootPath := pipeline.Spec.RootPath | ||
| if pipeline.Spec.RootPath != "" { | ||
| err := downloader.MarkDirectoryForDownload(ctx, &pipeline.Spec.RootPath) | ||
| if err != nil { | ||
| return err | ||
| } | ||
| } | ||
|
|
||
| // Making sure the root path is relative to the config directory. | ||
| rel, err := filepath.Rel(configDir, sourceDir) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we add an test case for this error as well? |
||
| if err != nil { | ||
| return err | ||
| } | ||
|
|
||
| v, err := generate.ConvertPipelineToValue(pipeline.Spec, filepath.ToSlash(rel), remoteRootPath) | ||
| if err != nil { | ||
| return err | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This might be something for a followup, but pipeline source code should be in the same folder as the pipeline definiton for Lakeflow. So this should always say
transformations/**. See also #3558.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is configured by use of --config-dir and --source-dir flags in bundle generate command and for this specific test I set them up in such a way that it also tests that all complex relative paths are resolved correctly.
This is not an example of how configuration should look like though