Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@
"project_name": "my_default_python",
"include_notebook": "yes",
"include_dlt": "yes",
"include_python": "yes"
"include_python": "yes",
"serverless": "no"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
--- [TESTROOT]/bundle/templates/default-python/classic/../serverless/output/my_default_python/resources/my_default_python.job.yml
+++ output/my_default_python/resources/my_default_python.job.yml
@@ -17,4 +17,5 @@
tasks:
- task_key: notebook_task
+ job_cluster_key: job_cluster
notebook_task:
notebook_path: ../src/notebook.ipynb
@@ -29,17 +30,21 @@
depends_on:
- task_key: refresh_pipeline
- environment_key: default
+ job_cluster_key: job_cluster
python_wheel_task:
package_name: my_default_python
entry_point: main
+ libraries:
+ # By default we just include the .whl file generated for the my_default_python package.
+ # See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
+ # for more information on how to add other libraries.
+ - whl: ../dist/*.whl

- # A list of task execution environment specifications that can be referenced by tasks of this job.
- environments:
- - environment_key: default
-
- # Full documentation of this spec can be found at:
- # https://docs.databricks.com/api/workspace/jobs/create#environments-spec
- spec:
- client: "1"
- dependencies:
- - ../dist/*.whl
+ job_clusters:
+ - job_cluster_key: job_cluster
+ new_cluster:
+ spark_version: 15.4.x-scala2.12
+ node_type_id: i3.xlarge
+ data_security_mode: SINGLE_USER
+ autoscale:
+ min_workers: 1
+ max_workers: 4
--- [TESTROOT]/bundle/templates/default-python/classic/../serverless/output/my_default_python/resources/my_default_python.pipeline.yml
+++ output/my_default_python/resources/my_default_python.pipeline.yml
@@ -4,8 +4,7 @@
my_default_python_pipeline:
name: my_default_python_pipeline
- ## Catalog is required for serverless compute
- catalog: main
+ ## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog:
+ # catalog: catalog_name
target: my_default_python_${bundle.target}
- serverless: true
libraries:
- notebook:
15 changes: 15 additions & 0 deletions acceptance/bundle/templates/default-python/classic/script
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
trace $CLI bundle init default-python --config-file ./input.json --output-dir output

cd output/my_default_python
trace $CLI bundle validate -t dev
trace $CLI bundle validate -t prod

# Do not affect this repository's git behaviour #2318
mv .gitignore out.gitignore

cd ../../

# Calculate the difference from the serverless template
diff.py $TESTDIR/../serverless/output output/ > out.compare-vs-serverless.diff

rm -fr output
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@

>>> [CLI] bundle init default-python --config-file [TESTROOT]/bundle/templates/default-python/serverless-customcatalog/../serverless/input.json --output-dir output

Welcome to the default Python template for Databricks Asset Bundles!
Workspace to use (auto-detected, edit in 'my_default_python/databricks.yml'): [DATABRICKS_URL]

✨ Your new project has been created in the 'my_default_python' directory!

Please refer to the README.md file for "getting started" instructions.
See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html.

>>> diff.py [TESTROOT]/bundle/templates/default-python/serverless-customcatalog/../serverless/output output/
--- [TESTROOT]/bundle/templates/default-python/serverless-customcatalog/../serverless/output/my_default_python/resources/my_default_python.pipeline.yml
+++ output/my_default_python/resources/my_default_python.pipeline.yml
@@ -4,6 +4,5 @@
my_default_python_pipeline:
name: my_default_python_pipeline
- ## Catalog is required for serverless compute
- catalog: main
+ catalog: customcatalog
target: my_default_python_${bundle.target}
serverless: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
trace $CLI bundle init default-python --config-file $TESTDIR/../serverless/input.json --output-dir output
mv output/my_default_python/.gitignore output/my_default_python/out.gitignore
trace diff.py $TESTDIR/../serverless/output output/
rm -fr output
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[[Server]]
Pattern = "GET /api/2.1/unity-catalog/current-metastore-assignment"
Response.Body = '{"default_catalog_name": "customcatalog"}'

[[Repls]]
# windows fix
Old = '\\'
New = '/'
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"project_name": "my_default_python",
"include_notebook": "yes",
"include_dlt": "yes",
"include_python": "yes",
"serverless": "yes"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@

>>> [CLI] bundle init default-python --config-file ./input.json --output-dir output

Welcome to the default Python template for Databricks Asset Bundles!
Workspace to use (auto-detected, edit in 'my_default_python/databricks.yml'): [DATABRICKS_URL]

✨ Your new project has been created in the 'my_default_python' directory!

Please refer to the README.md file for "getting started" instructions.
See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html.

>>> [CLI] bundle validate -t dev
Name: my_default_python
Target: dev
Workspace:
Host: [DATABRICKS_URL]
User: [USERNAME]
Path: /Workspace/Users/[USERNAME]/.bundle/my_default_python/dev

Validation OK!

>>> [CLI] bundle validate -t prod
Name: my_default_python
Target: prod
Workspace:
Host: [DATABRICKS_URL]
User: [USERNAME]
Path: /Workspace/Users/[USERNAME]/.bundle/my_default_python/prod

Validation OK!
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ resources:

tasks:
- task_key: notebook_task
job_cluster_key: job_cluster
notebook_task:
notebook_path: ../src/notebook.ipynb

Expand All @@ -29,22 +28,18 @@ resources:
- task_key: main_task
depends_on:
- task_key: refresh_pipeline
job_cluster_key: job_cluster
environment_key: default
python_wheel_task:
package_name: my_default_python
entry_point: main
libraries:
# By default we just include the .whl file generated for the my_default_python package.
# See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
# for more information on how to add other libraries.
- whl: ../dist/*.whl

job_clusters:
- job_cluster_key: job_cluster
new_cluster:
spark_version: 15.4.x-scala2.12
node_type_id: i3.xlarge
data_security_mode: SINGLE_USER
autoscale:
min_workers: 1
max_workers: 4
# A list of task execution environment specifications that can be referenced by tasks of this job.
environments:
- environment_key: default

# Full documentation of this spec can be found at:
# https://docs.databricks.com/api/workspace/jobs/create#environments-spec
spec:
client: "1"
dependencies:
- ../dist/*.whl
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@ resources:
pipelines:
my_default_python_pipeline:
name: my_default_python_pipeline
## Catalog is required for serverless compute
catalog: main
target: my_default_python_${bundle.target}
serverless: true
libraries:
- notebook:
path: ../src/dlt_pipeline.ipynb
Expand Down
10 changes: 7 additions & 3 deletions acceptance/server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ var testUser = iam.User{
UserName: "tester@databricks.com",
}

var testMetastore = catalog.MetastoreAssignment{
DefaultCatalogName: "hive_metastore",
MetastoreId: "120efa64-9b68-46ba-be38-f319458430d2",
WorkspaceId: 470123456789500,
}

func AddHandlers(server *testserver.Server) {
server.Handle("GET", "/api/2.0/policies/clusters/list", func(req testserver.Request) any {
return compute.ListPoliciesResponse{
Expand Down Expand Up @@ -106,9 +112,7 @@ func AddHandlers(server *testserver.Server) {
})

server.Handle("GET", "/api/2.1/unity-catalog/current-metastore-assignment", func(req testserver.Request) any {
return catalog.MetastoreAssignment{
DefaultCatalogName: "main",
}
return testMetastore
})

server.Handle("GET", "/api/2.0/permissions/directories/{objectId}", func(req testserver.Request) any {
Expand Down
18 changes: 11 additions & 7 deletions libs/template/renderer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,14 +116,17 @@ func TestBuiltinPythonTemplateValid(t *testing.T) {
for _, includeDlt := range options {
for _, includePython := range options {
for _, isServicePrincipal := range []bool{true, false} {
config := map[string]any{
"project_name": "my_project",
"include_notebook": includeNotebook,
"include_dlt": includeDlt,
"include_python": includePython,
for _, serverless := range options {
config := map[string]any{
"project_name": "my_project",
"include_notebook": includeNotebook,
"include_dlt": includeDlt,
"include_python": includePython,
"serverless": serverless,
}
tempDir := t.TempDir()
assertBuiltinTemplateValid(t, "default-python", config, "dev", isServicePrincipal, build, tempDir)
}
tempDir := t.TempDir()
assertBuiltinTemplateValid(t, "default-python", config, "dev", isServicePrincipal, build, tempDir)
}
}
}
Expand All @@ -135,6 +138,7 @@ func TestBuiltinPythonTemplateValid(t *testing.T) {
"include_notebook": "yes",
"include_dlt": "yes",
"include_python": "yes",
"serverless": "yes",
}
isServicePrincipal = false
build = true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,13 @@
"enum": ["yes", "no"],
"description": "Include a stub (sample) Python package in '{{.project_name}}{{path_separator}}src'",
"order": 4
},
"serverless": {
"type": "string",
"default": "no",
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we use no as default by the way?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the status quo. We can flip the switch any time, but the goal of this PR is to add this feature as an option.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this should be yes since that's what we are promoting as a company. cc: @lennartkats-db @pietern

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't disagree, but it can be a separate PR or we can decide to do it in a later version.

I'm also working on integration tests for standard templates (with deploy & summary & run), those would give more confidence in the new one.

"enum": ["yes", "no"],
"description": "Use serverless compute",
"order": 5
}
},
"success_message": "Workspace to use (auto-detected, edit in '{{.project_name}}/databricks.yml'): {{workspace_host}}\n\n✨ Your new project has been created in the '{{.project_name}}' directory!\n\nPlease refer to the README.md file for \"getting started\" instructions.\nSee also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
{{if and (eq .include_dlt "yes") (and (eq .include_notebook "no") (eq .include_python "no")) -}}
# This job runs {{.project_name}}_pipeline on a schedule.
{{end -}}
{{$with_serverless := (eq .serverless "yes") -}}

resources:
jobs:
Expand All @@ -29,7 +30,8 @@ resources:
tasks:
{{- if eq .include_notebook "yes" }}
- task_key: notebook_task
job_cluster_key: job_cluster
{{- if not $with_serverless}}
job_cluster_key: job_cluster{{end}}
notebook_task:
notebook_path: ../src/notebook.ipynb
{{end -}}
Expand All @@ -52,18 +54,34 @@ resources:
depends_on:
- task_key: notebook_task
{{end}}
job_cluster_key: job_cluster
{{- if $with_serverless }}
environment_key: default
{{- else }}
job_cluster_key: job_cluster{{end}}
python_wheel_task:
package_name: {{.project_name}}
entry_point: main
{{- if not $with_serverless }}
libraries:
# By default we just include the .whl file generated for the {{.project_name}} package.
# See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
# for more information on how to add other libraries.
- whl: ../dist/*.whl
{{- end -}}
{{else}}
{{- end}}
{{if $with_serverless}}
# A list of task execution environment specifications that can be referenced by tasks of this job.
environments:
- environment_key: default

{{else}}
{{end -}}
# Full documentation of this spec can be found at:
# https://docs.databricks.com/api/workspace/jobs/create#environments-spec
spec:
client: "1"
dependencies:
- ../dist/*.whl
{{ else }}
job_clusters:
- job_cluster_key: job_cluster
new_cluster:
Expand All @@ -73,3 +91,4 @@ resources:
autoscale:
min_workers: 1
max_workers: 4
{{end -}}
Original file line number Diff line number Diff line change
@@ -1,15 +1,22 @@
{{$with_serverless := (eq .serverless "yes") -}}
# The main pipeline for {{.project_name}}
resources:
pipelines:
{{.project_name}}_pipeline:
name: {{.project_name}}_pipeline
{{- if or (eq default_catalog "") (eq default_catalog "hive_metastore")}}
{{- if $with_serverless }}
## Catalog is required for serverless compute
catalog: main{{else}}
## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog:
# catalog: catalog_name
# catalog: catalog_name{{end}}
{{- else}}
catalog: {{default_catalog}}
{{- end}}
target: {{.project_name}}_${bundle.target}
{{- if $with_serverless }}
serverless: true
{{- end}}
libraries:
- notebook:
path: ../src/dlt_pipeline.ipynb
Expand Down