From 5a0de74d69002f03d4652b52dd2bd4c6ee9fce41 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Tue, 4 Feb 2025 13:12:06 +0100 Subject: [PATCH 01/11] Support serverless in default-python with explicit prompt --- .../bundle/templates/default-python/README.md | 6 ++ .../default-python/{ => classic}/input.json | 3 +- .../classic/out.compare-vs-serverless.diff | 48 ++++++++++ .../default-python/{ => classic}/output.txt | 0 .../.vscode/__builtins__.pyi | 0 .../my_default_python/.vscode/extensions.json | 0 .../my_default_python/.vscode/settings.json | 0 .../output/my_default_python/README.md | 0 .../output/my_default_python/databricks.yml | 0 .../my_default_python/fixtures/.gitkeep | 0 .../output/my_default_python/out.gitignore | 0 .../output/my_default_python/pytest.ini | 0 .../my_default_python/requirements-dev.txt | 0 .../resources/my_default_python.job.yml | 0 .../resources/my_default_python.pipeline.yml | 0 .../my_default_python/scratch/README.md | 0 .../scratch/exploration.ipynb | 0 .../output/my_default_python/setup.py | 0 .../my_default_python/src/dlt_pipeline.ipynb | 0 .../src/my_default_python/__init__.py | 0 .../src/my_default_python/main.py | 0 .../my_default_python/src/notebook.ipynb | 0 .../my_default_python/tests/main_test.py | 0 .../templates/default-python/classic/script | 13 +++ .../serverless-auto-disabled/input.json | 7 ++ .../serverless-auto-disabled/output.txt | 11 +++ .../serverless-auto-disabled/script | 7 ++ .../serverless-auto-disabled/test.toml | 3 + .../serverless-auto-enabled/input.json | 7 ++ .../serverless-auto-enabled/output.txt | 10 +++ .../serverless-auto-enabled/script | 7 ++ .../serverless-auto-enabled/test.toml | 3 + .../serverless-auto-error/input.json | 7 ++ .../serverless-auto-error/output.txt | 11 +++ .../serverless-auto-error/script | 6 ++ .../serverless-auto-error/test.toml | 3 + .../default-python/serverless/input.json | 7 ++ .../default-python/serverless/output.txt | 30 +++++++ .../.vscode/__builtins__.pyi | 3 + .../my_default_python/.vscode/extensions.json | 7 ++ .../my_default_python/.vscode/settings.json | 16 ++++ .../output/my_default_python/README.md | 49 ++++++++++ .../output/my_default_python/databricks.yml | 29 ++++++ .../my_default_python/fixtures/.gitkeep | 22 +++++ .../output/my_default_python/out.gitignore | 8 ++ .../output/my_default_python/pytest.ini | 3 + .../my_default_python/requirements-dev.txt | 29 ++++++ .../resources/my_default_python.job.yml | 45 ++++++++++ .../resources/my_default_python.pipeline.yml | 14 +++ .../my_default_python/scratch/README.md | 4 + .../scratch/exploration.ipynb | 61 +++++++++++++ .../output/my_default_python/setup.py | 41 +++++++++ .../my_default_python/src/dlt_pipeline.ipynb | 90 +++++++++++++++++++ .../src/my_default_python/__init__.py | 1 + .../src/my_default_python/main.py | 25 ++++++ .../my_default_python/src/notebook.ipynb | 75 ++++++++++++++++ .../my_default_python/tests/main_test.py | 6 ++ .../default-python/{ => serverless}/script | 0 acceptance/bundle/templates/test.toml | 4 + libs/template/helpers.go | 66 ++++++++++++++ libs/template/renderer_test.go | 18 ++-- .../databricks_template_schema.json | 8 ++ .../resources/{{.project_name}}.job.yml.tmpl | 30 ++++++- .../{{.project_name}}.pipeline.yml.tmpl | 7 ++ 64 files changed, 838 insertions(+), 12 deletions(-) create mode 100644 acceptance/bundle/templates/default-python/README.md rename acceptance/bundle/templates/default-python/{ => classic}/input.json (66%) create mode 100644 acceptance/bundle/templates/default-python/classic/out.compare-vs-serverless.diff rename acceptance/bundle/templates/default-python/{ => classic}/output.txt (100%) rename acceptance/bundle/templates/default-python/{ => classic}/output/my_default_python/.vscode/__builtins__.pyi (100%) rename acceptance/bundle/templates/default-python/{ => classic}/output/my_default_python/.vscode/extensions.json (100%) rename acceptance/bundle/templates/default-python/{ => classic}/output/my_default_python/.vscode/settings.json (100%) rename acceptance/bundle/templates/default-python/{ => classic}/output/my_default_python/README.md (100%) rename acceptance/bundle/templates/default-python/{ => classic}/output/my_default_python/databricks.yml (100%) rename acceptance/bundle/templates/default-python/{ => classic}/output/my_default_python/fixtures/.gitkeep (100%) rename acceptance/bundle/templates/default-python/{ => classic}/output/my_default_python/out.gitignore (100%) rename acceptance/bundle/templates/default-python/{ => classic}/output/my_default_python/pytest.ini (100%) rename acceptance/bundle/templates/default-python/{ => classic}/output/my_default_python/requirements-dev.txt (100%) rename acceptance/bundle/templates/default-python/{ => classic}/output/my_default_python/resources/my_default_python.job.yml (100%) rename acceptance/bundle/templates/default-python/{ => classic}/output/my_default_python/resources/my_default_python.pipeline.yml (100%) rename acceptance/bundle/templates/default-python/{ => classic}/output/my_default_python/scratch/README.md (100%) rename acceptance/bundle/templates/default-python/{ => classic}/output/my_default_python/scratch/exploration.ipynb (100%) rename acceptance/bundle/templates/default-python/{ => classic}/output/my_default_python/setup.py (100%) rename acceptance/bundle/templates/default-python/{ => classic}/output/my_default_python/src/dlt_pipeline.ipynb (100%) rename acceptance/bundle/templates/default-python/{ => classic}/output/my_default_python/src/my_default_python/__init__.py (100%) rename acceptance/bundle/templates/default-python/{ => classic}/output/my_default_python/src/my_default_python/main.py (100%) rename acceptance/bundle/templates/default-python/{ => classic}/output/my_default_python/src/notebook.ipynb (100%) rename acceptance/bundle/templates/default-python/{ => classic}/output/my_default_python/tests/main_test.py (100%) create mode 100644 acceptance/bundle/templates/default-python/classic/script create mode 100644 acceptance/bundle/templates/default-python/serverless-auto-disabled/input.json create mode 100644 acceptance/bundle/templates/default-python/serverless-auto-disabled/output.txt create mode 100644 acceptance/bundle/templates/default-python/serverless-auto-disabled/script create mode 100644 acceptance/bundle/templates/default-python/serverless-auto-disabled/test.toml create mode 100644 acceptance/bundle/templates/default-python/serverless-auto-enabled/input.json create mode 100644 acceptance/bundle/templates/default-python/serverless-auto-enabled/output.txt create mode 100644 acceptance/bundle/templates/default-python/serverless-auto-enabled/script create mode 100644 acceptance/bundle/templates/default-python/serverless-auto-enabled/test.toml create mode 100644 acceptance/bundle/templates/default-python/serverless-auto-error/input.json create mode 100644 acceptance/bundle/templates/default-python/serverless-auto-error/output.txt create mode 100644 acceptance/bundle/templates/default-python/serverless-auto-error/script create mode 100644 acceptance/bundle/templates/default-python/serverless-auto-error/test.toml create mode 100644 acceptance/bundle/templates/default-python/serverless/input.json create mode 100644 acceptance/bundle/templates/default-python/serverless/output.txt create mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/__builtins__.pyi create mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/extensions.json create mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/settings.json create mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/README.md create mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/databricks.yml create mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/fixtures/.gitkeep create mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/out.gitignore create mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/pytest.ini create mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/requirements-dev.txt create mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/resources/my_default_python.job.yml create mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/resources/my_default_python.pipeline.yml create mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/scratch/README.md create mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/scratch/exploration.ipynb create mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/setup.py create mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/dlt_pipeline.ipynb create mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/my_default_python/__init__.py create mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/my_default_python/main.py create mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/notebook.ipynb create mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/tests/main_test.py rename acceptance/bundle/templates/default-python/{ => serverless}/script (100%) diff --git a/acceptance/bundle/templates/default-python/README.md b/acceptance/bundle/templates/default-python/README.md new file mode 100644 index 0000000000..e4f4fd816b --- /dev/null +++ b/acceptance/bundle/templates/default-python/README.md @@ -0,0 +1,6 @@ +The 'serverless' and 'classic' directories contain full tests: they +have full output of materialized template, perform "bundle validate" +and in the future will perform deploy/summary/run. + +Other directories (serverless-auto-\*) contain short tests: they only do +"bundle init" and then check that the output matches 'serverless' or 'classic' exactly. diff --git a/acceptance/bundle/templates/default-python/input.json b/acceptance/bundle/templates/default-python/classic/input.json similarity index 66% rename from acceptance/bundle/templates/default-python/input.json rename to acceptance/bundle/templates/default-python/classic/input.json index 3e1d79c68a..2c4416c00c 100644 --- a/acceptance/bundle/templates/default-python/input.json +++ b/acceptance/bundle/templates/default-python/classic/input.json @@ -2,5 +2,6 @@ "project_name": "my_default_python", "include_notebook": "yes", "include_dlt": "yes", - "include_python": "yes" + "include_python": "yes", + "serverless": "no" } diff --git a/acceptance/bundle/templates/default-python/classic/out.compare-vs-serverless.diff b/acceptance/bundle/templates/default-python/classic/out.compare-vs-serverless.diff new file mode 100644 index 0000000000..6f5b0a7f40 --- /dev/null +++ b/acceptance/bundle/templates/default-python/classic/out.compare-vs-serverless.diff @@ -0,0 +1,48 @@ +--- [TESTROOT]/bundle/templates/default-python/classic/../serverless/output/my_default_python/resources/my_default_python.job.yml ++++ output/my_default_python/resources/my_default_python.job.yml +@@ -17,4 +17,5 @@ + tasks: + - task_key: notebook_task ++ job_cluster_key: job_cluster + notebook_task: + notebook_path: ../src/notebook.ipynb +@@ -29,17 +30,20 @@ + depends_on: + - task_key: refresh_pipeline +- environment_key: default ++ job_cluster_key: job_cluster + python_wheel_task: + package_name: my_default_python + entry_point: main ++ libraries: ++ # By default we just include the .whl file generated for the my_default_python package. ++ # See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html ++ # for more information on how to add other libraries. ++ - whl: ../dist/*.whl + +- # A list of task execution environment specifications that can be referenced by tasks of this job. +- environments: +- - environment_key: default +- +- # Full documentation of this spec can be found at: +- # https://docs.databricks.com/api/workspace/jobs/create#environments-spec +- spec: +- client: "1" +- dependencies: +- - ../dist/*.whl ++ job_clusters: ++ - job_cluster_key: job_cluster ++ new_cluster: ++ spark_version: 15.4.x-scala2.12 ++ node_type_id: i3.xlarge ++ autoscale: ++ min_workers: 1 ++ max_workers: 4 +--- [TESTROOT]/bundle/templates/default-python/classic/../serverless/output/my_default_python/resources/my_default_python.pipeline.yml ++++ output/my_default_python/resources/my_default_python.pipeline.yml +@@ -6,5 +6,4 @@ + catalog: main + target: my_default_python_${bundle.target} +- serverless: true + libraries: + - notebook: diff --git a/acceptance/bundle/templates/default-python/output.txt b/acceptance/bundle/templates/default-python/classic/output.txt similarity index 100% rename from acceptance/bundle/templates/default-python/output.txt rename to acceptance/bundle/templates/default-python/classic/output.txt diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/.vscode/__builtins__.pyi b/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/__builtins__.pyi similarity index 100% rename from acceptance/bundle/templates/default-python/output/my_default_python/.vscode/__builtins__.pyi rename to acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/__builtins__.pyi diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/.vscode/extensions.json b/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/extensions.json similarity index 100% rename from acceptance/bundle/templates/default-python/output/my_default_python/.vscode/extensions.json rename to acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/extensions.json diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/.vscode/settings.json b/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/settings.json similarity index 100% rename from acceptance/bundle/templates/default-python/output/my_default_python/.vscode/settings.json rename to acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/settings.json diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/README.md b/acceptance/bundle/templates/default-python/classic/output/my_default_python/README.md similarity index 100% rename from acceptance/bundle/templates/default-python/output/my_default_python/README.md rename to acceptance/bundle/templates/default-python/classic/output/my_default_python/README.md diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/databricks.yml b/acceptance/bundle/templates/default-python/classic/output/my_default_python/databricks.yml similarity index 100% rename from acceptance/bundle/templates/default-python/output/my_default_python/databricks.yml rename to acceptance/bundle/templates/default-python/classic/output/my_default_python/databricks.yml diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/fixtures/.gitkeep b/acceptance/bundle/templates/default-python/classic/output/my_default_python/fixtures/.gitkeep similarity index 100% rename from acceptance/bundle/templates/default-python/output/my_default_python/fixtures/.gitkeep rename to acceptance/bundle/templates/default-python/classic/output/my_default_python/fixtures/.gitkeep diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/out.gitignore b/acceptance/bundle/templates/default-python/classic/output/my_default_python/out.gitignore similarity index 100% rename from acceptance/bundle/templates/default-python/output/my_default_python/out.gitignore rename to acceptance/bundle/templates/default-python/classic/output/my_default_python/out.gitignore diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/pytest.ini b/acceptance/bundle/templates/default-python/classic/output/my_default_python/pytest.ini similarity index 100% rename from acceptance/bundle/templates/default-python/output/my_default_python/pytest.ini rename to acceptance/bundle/templates/default-python/classic/output/my_default_python/pytest.ini diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/requirements-dev.txt b/acceptance/bundle/templates/default-python/classic/output/my_default_python/requirements-dev.txt similarity index 100% rename from acceptance/bundle/templates/default-python/output/my_default_python/requirements-dev.txt rename to acceptance/bundle/templates/default-python/classic/output/my_default_python/requirements-dev.txt diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/resources/my_default_python.job.yml b/acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/my_default_python.job.yml similarity index 100% rename from acceptance/bundle/templates/default-python/output/my_default_python/resources/my_default_python.job.yml rename to acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/my_default_python.job.yml diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/resources/my_default_python.pipeline.yml b/acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/my_default_python.pipeline.yml similarity index 100% rename from acceptance/bundle/templates/default-python/output/my_default_python/resources/my_default_python.pipeline.yml rename to acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/my_default_python.pipeline.yml diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/scratch/README.md b/acceptance/bundle/templates/default-python/classic/output/my_default_python/scratch/README.md similarity index 100% rename from acceptance/bundle/templates/default-python/output/my_default_python/scratch/README.md rename to acceptance/bundle/templates/default-python/classic/output/my_default_python/scratch/README.md diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/scratch/exploration.ipynb b/acceptance/bundle/templates/default-python/classic/output/my_default_python/scratch/exploration.ipynb similarity index 100% rename from acceptance/bundle/templates/default-python/output/my_default_python/scratch/exploration.ipynb rename to acceptance/bundle/templates/default-python/classic/output/my_default_python/scratch/exploration.ipynb diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/setup.py b/acceptance/bundle/templates/default-python/classic/output/my_default_python/setup.py similarity index 100% rename from acceptance/bundle/templates/default-python/output/my_default_python/setup.py rename to acceptance/bundle/templates/default-python/classic/output/my_default_python/setup.py diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/src/dlt_pipeline.ipynb b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/dlt_pipeline.ipynb similarity index 100% rename from acceptance/bundle/templates/default-python/output/my_default_python/src/dlt_pipeline.ipynb rename to acceptance/bundle/templates/default-python/classic/output/my_default_python/src/dlt_pipeline.ipynb diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/src/my_default_python/__init__.py b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/my_default_python/__init__.py similarity index 100% rename from acceptance/bundle/templates/default-python/output/my_default_python/src/my_default_python/__init__.py rename to acceptance/bundle/templates/default-python/classic/output/my_default_python/src/my_default_python/__init__.py diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/src/my_default_python/main.py b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/my_default_python/main.py similarity index 100% rename from acceptance/bundle/templates/default-python/output/my_default_python/src/my_default_python/main.py rename to acceptance/bundle/templates/default-python/classic/output/my_default_python/src/my_default_python/main.py diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/src/notebook.ipynb b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/notebook.ipynb similarity index 100% rename from acceptance/bundle/templates/default-python/output/my_default_python/src/notebook.ipynb rename to acceptance/bundle/templates/default-python/classic/output/my_default_python/src/notebook.ipynb diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/tests/main_test.py b/acceptance/bundle/templates/default-python/classic/output/my_default_python/tests/main_test.py similarity index 100% rename from acceptance/bundle/templates/default-python/output/my_default_python/tests/main_test.py rename to acceptance/bundle/templates/default-python/classic/output/my_default_python/tests/main_test.py diff --git a/acceptance/bundle/templates/default-python/classic/script b/acceptance/bundle/templates/default-python/classic/script new file mode 100644 index 0000000000..7e5524065a --- /dev/null +++ b/acceptance/bundle/templates/default-python/classic/script @@ -0,0 +1,13 @@ +trace $CLI bundle init default-python --config-file ./input.json --output-dir output + +cd output/my_default_python +trace $CLI bundle validate -t dev +trace $CLI bundle validate -t prod + +# Do not affect this repository's git behaviour #2318 +mv .gitignore out.gitignore + +cd ../../ + +# Calculate the difference from the serverless template +diff.py $TESTDIR/../serverless/output output/ > out.compare-vs-serverless.diff diff --git a/acceptance/bundle/templates/default-python/serverless-auto-disabled/input.json b/acceptance/bundle/templates/default-python/serverless-auto-disabled/input.json new file mode 100644 index 0000000000..7ccdf8faab --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless-auto-disabled/input.json @@ -0,0 +1,7 @@ +{ + "project_name": "my_default_python", + "include_notebook": "yes", + "include_dlt": "yes", + "include_python": "yes", + "serverless": "auto" +} diff --git a/acceptance/bundle/templates/default-python/serverless-auto-disabled/output.txt b/acceptance/bundle/templates/default-python/serverless-auto-disabled/output.txt new file mode 100644 index 0000000000..060963ac85 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless-auto-disabled/output.txt @@ -0,0 +1,11 @@ + +>>> [CLI] bundle init default-python --config-file ./input.json --output-dir output + +Welcome to the default Python template for Databricks Asset Bundles! +Warn: Auto-detected that serverless is not enabled for your workspace 900800700600 +Workspace to use (auto-detected, edit in 'my_default_python/databricks.yml'): [DATABRICKS_URL] + +✨ Your new project has been created in the 'my_default_python' directory! + +Please refer to the README.md file for "getting started" instructions. +See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html. diff --git a/acceptance/bundle/templates/default-python/serverless-auto-disabled/script b/acceptance/bundle/templates/default-python/serverless-auto-disabled/script new file mode 100644 index 0000000000..ede33c2c9a --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless-auto-disabled/script @@ -0,0 +1,7 @@ +trace $CLI bundle init default-python --config-file ./input.json --output-dir output +mv output/my_default_python/.gitignore output/my_default_python/out.gitignore + +# serverless is disabled, same output as for classic +diff.py $TESTDIR/../classic/output output/ + +rm -fr output diff --git a/acceptance/bundle/templates/default-python/serverless-auto-disabled/test.toml b/acceptance/bundle/templates/default-python/serverless-auto-disabled/test.toml new file mode 100644 index 0000000000..47d9637881 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless-auto-disabled/test.toml @@ -0,0 +1,3 @@ +[[Server]] +Pattern = "GET /api/2.0/settings-api/workspace/{workspaceId}/serverless_job_nb" +Response.Body = '{}' diff --git a/acceptance/bundle/templates/default-python/serverless-auto-enabled/input.json b/acceptance/bundle/templates/default-python/serverless-auto-enabled/input.json new file mode 100644 index 0000000000..7ccdf8faab --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless-auto-enabled/input.json @@ -0,0 +1,7 @@ +{ + "project_name": "my_default_python", + "include_notebook": "yes", + "include_dlt": "yes", + "include_python": "yes", + "serverless": "auto" +} diff --git a/acceptance/bundle/templates/default-python/serverless-auto-enabled/output.txt b/acceptance/bundle/templates/default-python/serverless-auto-enabled/output.txt new file mode 100644 index 0000000000..6abf52cf09 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless-auto-enabled/output.txt @@ -0,0 +1,10 @@ + +>>> [CLI] bundle init default-python --config-file ./input.json --output-dir output + +Welcome to the default Python template for Databricks Asset Bundles! +Workspace to use (auto-detected, edit in 'my_default_python/databricks.yml'): [DATABRICKS_URL] + +✨ Your new project has been created in the 'my_default_python' directory! + +Please refer to the README.md file for "getting started" instructions. +See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html. diff --git a/acceptance/bundle/templates/default-python/serverless-auto-enabled/script b/acceptance/bundle/templates/default-python/serverless-auto-enabled/script new file mode 100644 index 0000000000..a8c748002a --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless-auto-enabled/script @@ -0,0 +1,7 @@ +trace $CLI bundle init default-python --config-file ./input.json --output-dir output +mv output/my_default_python/.gitignore output/my_default_python/out.gitignore + +# serverless is enabled, same output as serverless enabled explicitly: +diff.py $TESTDIR/../serverless/output output/ + +rm -fr output diff --git a/acceptance/bundle/templates/default-python/serverless-auto-enabled/test.toml b/acceptance/bundle/templates/default-python/serverless-auto-enabled/test.toml new file mode 100644 index 0000000000..ffb51da950 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless-auto-enabled/test.toml @@ -0,0 +1,3 @@ +[[Server]] +Pattern = "GET /api/2.0/settings-api/workspace/{workspaceId}/serverless_job_nb" +Response.Body = '''{"setting": {"value": {"preview_enablement_val": {"enabled": true}}}}''' diff --git a/acceptance/bundle/templates/default-python/serverless-auto-error/input.json b/acceptance/bundle/templates/default-python/serverless-auto-error/input.json new file mode 100644 index 0000000000..7ccdf8faab --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless-auto-error/input.json @@ -0,0 +1,7 @@ +{ + "project_name": "my_default_python", + "include_notebook": "yes", + "include_dlt": "yes", + "include_python": "yes", + "serverless": "auto" +} diff --git a/acceptance/bundle/templates/default-python/serverless-auto-error/output.txt b/acceptance/bundle/templates/default-python/serverless-auto-error/output.txt new file mode 100644 index 0000000000..08991d1995 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless-auto-error/output.txt @@ -0,0 +1,11 @@ + +>>> [CLI] bundle init default-python --config-file ./input.json --output-dir output + +Welcome to the default Python template for Databricks Asset Bundles! +Warn: Failed to detect if serverless is supported: /api/2.0/settings-api/workspace/900800700600/serverless_job_nb failed: Internal Server Error +Workspace to use (auto-detected, edit in 'my_default_python/databricks.yml'): [DATABRICKS_URL] + +✨ Your new project has been created in the 'my_default_python' directory! + +Please refer to the README.md file for "getting started" instructions. +See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html. diff --git a/acceptance/bundle/templates/default-python/serverless-auto-error/script b/acceptance/bundle/templates/default-python/serverless-auto-error/script new file mode 100644 index 0000000000..3354360495 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless-auto-error/script @@ -0,0 +1,6 @@ +trace $CLI bundle init default-python --config-file ./input.json --output-dir output +mv output/my_default_python/.gitignore output/my_default_python/out.gitignore + +# If serverless is "auto" and there was an error reaching settings-api, we should get the same output as 'classic' +diff.py $TESTDIR/../classic/output output/ +rm -fr output diff --git a/acceptance/bundle/templates/default-python/serverless-auto-error/test.toml b/acceptance/bundle/templates/default-python/serverless-auto-error/test.toml new file mode 100644 index 0000000000..f28682b5bf --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless-auto-error/test.toml @@ -0,0 +1,3 @@ +[[Server]] +Pattern = "GET /api/2.0/settings-api/workspace/{workspaceId}/serverless_job_nb" +Response.StatusCode = 500 diff --git a/acceptance/bundle/templates/default-python/serverless/input.json b/acceptance/bundle/templates/default-python/serverless/input.json new file mode 100644 index 0000000000..b1ae9a2ba4 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/input.json @@ -0,0 +1,7 @@ +{ + "project_name": "my_default_python", + "include_notebook": "yes", + "include_dlt": "yes", + "include_python": "yes", + "serverless": "yes" +} diff --git a/acceptance/bundle/templates/default-python/serverless/output.txt b/acceptance/bundle/templates/default-python/serverless/output.txt new file mode 100644 index 0000000000..930e756de7 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output.txt @@ -0,0 +1,30 @@ + +>>> [CLI] bundle init default-python --config-file ./input.json --output-dir output + +Welcome to the default Python template for Databricks Asset Bundles! +Workspace to use (auto-detected, edit in 'my_default_python/databricks.yml'): [DATABRICKS_URL] + +✨ Your new project has been created in the 'my_default_python' directory! + +Please refer to the README.md file for "getting started" instructions. +See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html. + +>>> [CLI] bundle validate -t dev +Name: my_default_python +Target: dev +Workspace: + Host: [DATABRICKS_URL] + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/my_default_python/dev + +Validation OK! + +>>> [CLI] bundle validate -t prod +Name: my_default_python +Target: prod +Workspace: + Host: [DATABRICKS_URL] + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/my_default_python/prod + +Validation OK! diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/__builtins__.pyi b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/__builtins__.pyi new file mode 100644 index 0000000000..0edd5181bc --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/__builtins__.pyi @@ -0,0 +1,3 @@ +# Typings for Pylance in Visual Studio Code +# see https://github.com/microsoft/pyright/blob/main/docs/builtins.md +from databricks.sdk.runtime import * diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/extensions.json b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/extensions.json new file mode 100644 index 0000000000..5d15eba363 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/extensions.json @@ -0,0 +1,7 @@ +{ + "recommendations": [ + "databricks.databricks", + "ms-python.vscode-pylance", + "redhat.vscode-yaml" + ] +} diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/settings.json b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/settings.json new file mode 100644 index 0000000000..8ee87c30d4 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/settings.json @@ -0,0 +1,16 @@ +{ + "python.analysis.stubPath": ".vscode", + "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])", + "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------", + "python.testing.pytestArgs": [ + "." + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, + "python.analysis.extraPaths": ["src"], + "files.exclude": { + "**/*.egg-info": true, + "**/__pycache__": true, + ".pytest_cache": true, + }, +} diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/README.md b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/README.md new file mode 100644 index 0000000000..10f570bf46 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/README.md @@ -0,0 +1,49 @@ +# my_default_python + +The 'my_default_python' project was generated by using the default-python template. + +## Getting started + +1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html + +2. Authenticate to your Databricks workspace, if you have not done so already: + ``` + $ databricks configure + ``` + +3. To deploy a development copy of this project, type: + ``` + $ databricks bundle deploy --target dev + ``` + (Note that "dev" is the default target, so the `--target` parameter + is optional here.) + + This deploys everything that's defined for this project. + For example, the default template would deploy a job called + `[dev yourname] my_default_python_job` to your workspace. + You can find that job by opening your workpace and clicking on **Workflows**. + +4. Similarly, to deploy a production copy, type: + ``` + $ databricks bundle deploy --target prod + ``` + + Note that the default job from the template has a schedule that runs every day + (defined in resources/my_default_python.job.yml). The schedule + is paused when deploying in development mode (see + https://docs.databricks.com/dev-tools/bundles/deployment-modes.html). + +5. To run a job or pipeline, use the "run" command: + ``` + $ databricks bundle run + ``` +6. Optionally, install the Databricks extension for Visual Studio code for local development from + https://docs.databricks.com/dev-tools/vscode-ext.html. It can configure your + virtual environment and setup Databricks Connect for running unit tests locally. + When not using these tools, consult your development environment's documentation + and/or the documentation for Databricks Connect for manually setting up your environment + (https://docs.databricks.com/en/dev-tools/databricks-connect/python/index.html). + +7. For documentation on the Databricks asset bundles format used + for this project, and for CI/CD configuration, see + https://docs.databricks.com/dev-tools/bundles/index.html. diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/databricks.yml b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/databricks.yml new file mode 100644 index 0000000000..6080a368f9 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/databricks.yml @@ -0,0 +1,29 @@ +# This is a Databricks asset bundle definition for my_default_python. +# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. +bundle: + name: my_default_python + uuid: [UUID] + +include: + - resources/*.yml + +targets: + dev: + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. + mode: development + default: true + workspace: + host: [DATABRICKS_URL] + + prod: + mode: production + workspace: + host: [DATABRICKS_URL] + # We explicitly deploy to /Workspace/Users/[USERNAME] to make sure we only have a single copy. + root_path: /Workspace/Users/[USERNAME]/.bundle/${bundle.name}/${bundle.target} + permissions: + - user_name: [USERNAME] + level: CAN_MANAGE diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/fixtures/.gitkeep b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/fixtures/.gitkeep new file mode 100644 index 0000000000..fa25d2745e --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/fixtures/.gitkeep @@ -0,0 +1,22 @@ +# Fixtures + +This folder is reserved for fixtures, such as CSV files. + +Below is an example of how to load fixtures as a data frame: + +``` +import pandas as pd +import os + +def get_absolute_path(*relative_parts): + if 'dbutils' in globals(): + base_dir = os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) # type: ignore + path = os.path.normpath(os.path.join(base_dir, *relative_parts)) + return path if path.startswith("/Workspace") else "/Workspace" + path + else: + return os.path.join(*relative_parts) + +csv_file = get_absolute_path("..", "fixtures", "mycsv.csv") +df = pd.read_csv(csv_file) +display(df) +``` diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/out.gitignore b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/out.gitignore new file mode 100644 index 0000000000..0dab7f4995 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/out.gitignore @@ -0,0 +1,8 @@ +.databricks/ +build/ +dist/ +__pycache__/ +*.egg-info +.venv/ +scratch/** +!scratch/README.md diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/pytest.ini b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/pytest.ini new file mode 100644 index 0000000000..80432c2203 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +testpaths = tests +pythonpath = src diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/requirements-dev.txt b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/requirements-dev.txt new file mode 100644 index 0000000000..0ffbf6aed0 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/requirements-dev.txt @@ -0,0 +1,29 @@ +## requirements-dev.txt: dependencies for local development. +## +## For defining dependencies used by jobs in Databricks Workflows, see +## https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + +## Add code completion support for DLT +databricks-dlt + +## pytest is the default package used for testing +pytest + +## Dependencies for building wheel files +setuptools +wheel + +## databricks-connect can be used to run parts of this project locally. +## See https://docs.databricks.com/dev-tools/databricks-connect.html. +## +## databricks-connect is automatically installed if you're using Databricks +## extension for Visual Studio Code +## (https://docs.databricks.com/dev-tools/vscode-ext/dev-tasks/databricks-connect.html). +## +## To manually install databricks-connect, either follow the instructions +## at https://docs.databricks.com/dev-tools/databricks-connect.html +## to install the package system-wide. Or uncomment the line below to install a +## version of db-connect that corresponds to the Databricks Runtime version used +## for this project. +# +# databricks-connect>=15.4,<15.5 diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/resources/my_default_python.job.yml b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/resources/my_default_python.job.yml new file mode 100644 index 0000000000..cc5aeb71c1 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/resources/my_default_python.job.yml @@ -0,0 +1,45 @@ +# The main job for my_default_python. +resources: + jobs: + my_default_python_job: + name: my_default_python_job + + trigger: + # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger + periodic: + interval: 1 + unit: DAYS + + email_notifications: + on_failure: + - [USERNAME] + + tasks: + - task_key: notebook_task + notebook_task: + notebook_path: ../src/notebook.ipynb + + - task_key: refresh_pipeline + depends_on: + - task_key: notebook_task + pipeline_task: + pipeline_id: ${resources.pipelines.my_default_python_pipeline.id} + + - task_key: main_task + depends_on: + - task_key: refresh_pipeline + environment_key: default + python_wheel_task: + package_name: my_default_python + entry_point: main + + # A list of task execution environment specifications that can be referenced by tasks of this job. + environments: + - environment_key: default + + # Full documentation of this spec can be found at: + # https://docs.databricks.com/api/workspace/jobs/create#environments-spec + spec: + client: "1" + dependencies: + - ../dist/*.whl diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/resources/my_default_python.pipeline.yml b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/resources/my_default_python.pipeline.yml new file mode 100644 index 0000000000..4f880ba485 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/resources/my_default_python.pipeline.yml @@ -0,0 +1,14 @@ +# The main pipeline for my_default_python +resources: + pipelines: + my_default_python_pipeline: + name: my_default_python_pipeline + catalog: main + target: my_default_python_${bundle.target} + serverless: true + libraries: + - notebook: + path: ../src/dlt_pipeline.ipynb + + configuration: + bundle.sourcePath: ${workspace.file_path}/src diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/scratch/README.md b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/scratch/README.md new file mode 100644 index 0000000000..e6cfb81b46 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/scratch/README.md @@ -0,0 +1,4 @@ +# scratch + +This folder is reserved for personal, exploratory notebooks. +By default these are not committed to Git, as 'scratch' is listed in .gitignore. diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/scratch/exploration.ipynb b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/scratch/exploration.ipynb new file mode 100644 index 0000000000..a12773d4e8 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/scratch/exploration.ipynb @@ -0,0 +1,61 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "[UUID]", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "import sys\n", + "\n", + "sys.path.append(\"../src\")\n", + "from my_default_python import main\n", + "\n", + "main.get_taxis(spark).show(10)" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "ipynb-notebook", + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/setup.py b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/setup.py new file mode 100644 index 0000000000..548f1035eb --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/setup.py @@ -0,0 +1,41 @@ +""" +setup.py configuration script describing how to build and package this project. + +This file is primarily used by the setuptools library and typically should not +be executed directly. See README.md for how to deploy, test, and run +the my_default_python project. +""" + +from setuptools import setup, find_packages + +import sys + +sys.path.append("./src") + +import datetime +import my_default_python + +local_version = datetime.datetime.utcnow().strftime("%Y%m%d.%H%M%S") + +setup( + name="my_default_python", + # We use timestamp as Local version identifier (https://peps.python.org/pep-0440/#local-version-identifiers.) + # to ensure that changes to wheel package are picked up when used on all-purpose clusters + version=my_default_python.__version__ + "+" + local_version, + url="https://databricks.com", + author="[USERNAME]", + description="wheel file based on my_default_python/src", + packages=find_packages(where="./src"), + package_dir={"": "src"}, + entry_points={ + "packages": [ + "main=my_default_python.main:main", + ], + }, + install_requires=[ + # Dependencies in case the output wheel file is used as a library dependency. + # For defining dependencies, when this package is used in Databricks, see: + # https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + "setuptools" + ], +) diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/dlt_pipeline.ipynb b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/dlt_pipeline.ipynb new file mode 100644 index 0000000000..8a02183e75 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/dlt_pipeline.ipynb @@ -0,0 +1,90 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "[UUID]", + "showTitle": false, + "title": "" + } + }, + "source": [ + "# DLT pipeline\n", + "\n", + "This Delta Live Tables (DLT) definition is executed using a pipeline defined in resources/my_default_python.pipeline.yml." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "[UUID]", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "# Import DLT and src/my_default_python\n", + "import dlt\n", + "import sys\n", + "\n", + "sys.path.append(spark.conf.get(\"bundle.sourcePath\", \".\"))\n", + "from pyspark.sql.functions import expr\n", + "from my_default_python import main" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "[UUID]", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "@dlt.view\n", + "def taxi_raw():\n", + " return main.get_taxis(spark)\n", + "\n", + "\n", + "@dlt.table\n", + "def filtered_taxis():\n", + " return dlt.read(\"taxi_raw\").filter(expr(\"fare_amount < 30\"))" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "dlt_pipeline", + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/my_default_python/__init__.py b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/my_default_python/__init__.py new file mode 100644 index 0000000000..f102a9cadf --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/my_default_python/__init__.py @@ -0,0 +1 @@ +__version__ = "0.0.1" diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/my_default_python/main.py b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/my_default_python/main.py new file mode 100644 index 0000000000..5ae344c7e2 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/my_default_python/main.py @@ -0,0 +1,25 @@ +from pyspark.sql import SparkSession, DataFrame + + +def get_taxis(spark: SparkSession) -> DataFrame: + return spark.read.table("samples.nyctaxi.trips") + + +# Create a new Databricks Connect session. If this fails, +# check that you have configured Databricks Connect correctly. +# See https://docs.databricks.com/dev-tools/databricks-connect.html. +def get_spark() -> SparkSession: + try: + from databricks.connect import DatabricksSession + + return DatabricksSession.builder.getOrCreate() + except ImportError: + return SparkSession.builder.getOrCreate() + + +def main(): + get_taxis(get_spark()).show(5) + + +if __name__ == "__main__": + main() diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/notebook.ipynb b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/notebook.ipynb new file mode 100644 index 0000000000..472ccb2190 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/notebook.ipynb @@ -0,0 +1,75 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "[UUID]", + "showTitle": false, + "title": "" + } + }, + "source": [ + "# Default notebook\n", + "\n", + "This default notebook is executed using Databricks Workflows as defined in resources/my_default_python.job.yml." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "[UUID]", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "from my_default_python import main\n", + "\n", + "main.get_taxis(spark).show(10)" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "notebook", + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/tests/main_test.py b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/tests/main_test.py new file mode 100644 index 0000000000..dc449154a6 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/tests/main_test.py @@ -0,0 +1,6 @@ +from my_default_python.main import get_taxis, get_spark + + +def test_main(): + taxis = get_taxis(get_spark()) + assert taxis.count() > 5 diff --git a/acceptance/bundle/templates/default-python/script b/acceptance/bundle/templates/default-python/serverless/script similarity index 100% rename from acceptance/bundle/templates/default-python/script rename to acceptance/bundle/templates/default-python/serverless/script diff --git a/acceptance/bundle/templates/test.toml b/acceptance/bundle/templates/test.toml index 90539263de..af66647f58 100644 --- a/acceptance/bundle/templates/test.toml +++ b/acceptance/bundle/templates/test.toml @@ -1,2 +1,6 @@ # At the moment, there are many differences across different envs w.r.t to catalog use, node type and so on. LocalOnly = true + +[[Server]] +Pattern = "GET /api/2.0/settings-api/workspace/{workspaceId}/serverless_job_nb" +Response.Body = '''{"setting": {"value": {"preview_enablement_val": {"enabled": true}}}}''' diff --git a/libs/template/helpers.go b/libs/template/helpers.go index 4550e5fa2b..13bd6ceccb 100644 --- a/libs/template/helpers.go +++ b/libs/template/helpers.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "math/rand" + "net/http" "net/url" "os" "regexp" @@ -12,7 +13,10 @@ import ( "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/libs/iamutil" + "github.com/databricks/cli/libs/log" + "github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go/apierr" + "github.com/databricks/databricks-sdk-go/client" "github.com/databricks/databricks-sdk-go/service/iam" "github.com/google/uuid" @@ -35,8 +39,11 @@ var ( cachedUser *iam.User cachedIsServicePrincipal *bool cachedCatalog *string + cachedIsServerless *bool ) +const defaultServerlessSupported = false + // UUID that is stable for the duration of the template execution. This can be used // to populate the `bundle.uuid` field in databricks.yml by template authors. // @@ -44,6 +51,16 @@ var ( // is run and can be used to attribute DBU revenue to bundle templates. var bundleUuid = uuid.New().String() +type APISettingResponse struct { + Setting struct { + Value struct { + PreviewEnablementVal struct { + Enabled bool `json:"enabled"` + } `json:"preview_enablement_val"` + } `json:"value"` + } `json:"setting"` +} + func loadHelpers(ctx context.Context) template.FuncMap { w := root.WorkspaceClient(ctx) return template.FuncMap{ @@ -167,5 +184,54 @@ func loadHelpers(ctx context.Context) template.FuncMap { cachedIsServicePrincipal = &result return result, nil }, + "is_serverless_supported": func() bool { + if cachedIsServerless == nil { + result := isServerlessSupported(ctx, w) + cachedIsServerless = &result + } + return *cachedIsServerless + }, } } + +func isServerlessSupported(ctx context.Context, w *databricks.WorkspaceClient) bool { + apiClient, err := client.New(w.Config) + if err != nil { + log.Warnf(ctx, "Failed to detect if serverless is supported: cannot create client: %s", err) + return defaultServerlessSupported + } + + workspaceId, err := w.CurrentWorkspaceID(ctx) + if err != nil { + log.Warnf(ctx, "Failed to detect if serverless is supported: CurrentWorkspaceID() failed: %s", err) + return defaultServerlessSupported + } + + apiEndpoint := fmt.Sprintf("/api/2.0/settings-api/workspace/%d/serverless_job_nb", workspaceId) + var response APISettingResponse + err = apiClient.Do( + ctx, + http.MethodGet, + apiEndpoint, + nil, + nil, + nil, + &response, + ) + if err != nil { + log.Warnf(ctx, "Failed to detect if serverless is supported: %s failed: %s", apiEndpoint, err) + return defaultServerlessSupported + } + + log.Debugf(ctx, "Called %s: %#v", apiEndpoint, response) + + isSupported := response.Setting.Value.PreviewEnablementVal.Enabled + + if isSupported { + log.Infof(ctx, "Auto-detected that serverless is enabled for your workspace %d", workspaceId) + } else { + log.Warnf(ctx, "Auto-detected that serverless is not enabled for your workspace %d", workspaceId) + } + + return isSupported +} diff --git a/libs/template/renderer_test.go b/libs/template/renderer_test.go index b2ec388bdd..f9588edd18 100644 --- a/libs/template/renderer_test.go +++ b/libs/template/renderer_test.go @@ -116,14 +116,17 @@ func TestBuiltinPythonTemplateValid(t *testing.T) { for _, includeDlt := range options { for _, includePython := range options { for _, isServicePrincipal := range []bool{true, false} { - config := map[string]any{ - "project_name": "my_project", - "include_notebook": includeNotebook, - "include_dlt": includeDlt, - "include_python": includePython, + for _, serverless := range options { + config := map[string]any{ + "project_name": "my_project", + "include_notebook": includeNotebook, + "include_dlt": includeDlt, + "include_python": includePython, + "serverless": serverless, + } + tempDir := t.TempDir() + assertBuiltinTemplateValid(t, "default-python", config, "dev", isServicePrincipal, build, tempDir) } - tempDir := t.TempDir() - assertBuiltinTemplateValid(t, "default-python", config, "dev", isServicePrincipal, build, tempDir) } } } @@ -135,6 +138,7 @@ func TestBuiltinPythonTemplateValid(t *testing.T) { "include_notebook": "yes", "include_dlt": "yes", "include_python": "yes", + "serverless": "yes", } isServicePrincipal = false build = true diff --git a/libs/template/templates/default-python/databricks_template_schema.json b/libs/template/templates/default-python/databricks_template_schema.json index d53bad91ab..520b23deac 100644 --- a/libs/template/templates/default-python/databricks_template_schema.json +++ b/libs/template/templates/default-python/databricks_template_schema.json @@ -29,6 +29,14 @@ "enum": ["yes", "no"], "description": "Include a stub (sample) Python package in '{{.project_name}}{{path_separator}}src'", "order": 4 + }, + "serverless": { + "type": "string", + "default": "no", + "enum": ["yes", "no", "auto"], + "description": "Use serverless compute. If auto, it will be enabled unless it is disabled on workspace level.", + "order": 5, + "skip_prompt_if": {} } }, "success_message": "Workspace to use (auto-detected, edit in '{{.project_name}}/databricks.yml'): {{workspace_host}}\n\n✨ Your new project has been created in the '{{.project_name}}' directory!\n\nPlease refer to the README.md file for \"getting started\" instructions.\nSee also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html." diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.job.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.job.yml.tmpl index 735a232be2..b2300cee7d 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.job.yml.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.job.yml.tmpl @@ -4,6 +4,10 @@ {{if and (eq .include_dlt "yes") (and (eq .include_notebook "no") (eq .include_python "no")) -}} # This job runs {{.project_name}}_pipeline on a schedule. {{end -}} +{{$with_serverless := (eq .serverless "yes") -}} +{{if (eq .serverless "auto") -}} + {{$with_serverless = is_serverless_supported -}} +{{end -}} resources: jobs: @@ -29,7 +33,8 @@ resources: tasks: {{- if eq .include_notebook "yes" }} - task_key: notebook_task - job_cluster_key: job_cluster + {{- if not $with_serverless}} + job_cluster_key: job_cluster{{end}} notebook_task: notebook_path: ../src/notebook.ipynb {{end -}} @@ -52,18 +57,34 @@ resources: depends_on: - task_key: notebook_task {{end}} - job_cluster_key: job_cluster + {{- if $with_serverless }} + environment_key: default + {{- else }} + job_cluster_key: job_cluster{{end}} python_wheel_task: package_name: {{.project_name}} entry_point: main + {{- if not $with_serverless }} libraries: # By default we just include the .whl file generated for the {{.project_name}} package. # See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html # for more information on how to add other libraries. - whl: ../dist/*.whl +{{- end -}} +{{else}} +{{- end}} +{{if $with_serverless}} + # A list of task execution environment specifications that can be referenced by tasks of this job. + environments: + - environment_key: default - {{else}} - {{end -}} + # Full documentation of this spec can be found at: + # https://docs.databricks.com/api/workspace/jobs/create#environments-spec + spec: + client: "1" + dependencies: + - ../dist/*.whl +{{ else }} job_clusters: - job_cluster_key: job_cluster new_cluster: @@ -73,3 +94,4 @@ resources: autoscale: min_workers: 1 max_workers: 4 +{{end -}} diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl index 50f11fe2cc..67eff5b3e4 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl @@ -1,3 +1,7 @@ +{{$with_serverless := (eq .serverless "yes") -}} +{{if (eq .serverless "auto") -}} + {{$with_serverless = is_serverless_supported -}} +{{end -}} # The main pipeline for {{.project_name}} resources: pipelines: @@ -10,6 +14,9 @@ resources: catalog: {{default_catalog}} {{- end}} target: {{.project_name}}_${bundle.target} + {{- if $with_serverless }} + serverless: true + {{- end}} libraries: - notebook: path: ../src/dlt_pipeline.ipynb From 427e6c95ec3a8db85f717d2f0c81685b1301cd6d Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Tue, 25 Feb 2025 14:53:36 +0100 Subject: [PATCH 02/11] clean up serverless-auto tests --- .../serverless-auto-disabled/input.json | 7 ------- .../serverless-auto-disabled/output.txt | 11 ----------- .../default-python/serverless-auto-disabled/script | 7 ------- .../default-python/serverless-auto-disabled/test.toml | 3 --- .../default-python/serverless-auto-enabled/input.json | 7 ------- .../default-python/serverless-auto-enabled/output.txt | 10 ---------- .../default-python/serverless-auto-enabled/script | 7 ------- .../default-python/serverless-auto-enabled/test.toml | 3 --- .../default-python/serverless-auto-error/input.json | 7 ------- .../default-python/serverless-auto-error/output.txt | 11 ----------- .../default-python/serverless-auto-error/script | 6 ------ .../default-python/serverless-auto-error/test.toml | 3 --- 12 files changed, 82 deletions(-) delete mode 100644 acceptance/bundle/templates/default-python/serverless-auto-disabled/input.json delete mode 100644 acceptance/bundle/templates/default-python/serverless-auto-disabled/output.txt delete mode 100644 acceptance/bundle/templates/default-python/serverless-auto-disabled/script delete mode 100644 acceptance/bundle/templates/default-python/serverless-auto-disabled/test.toml delete mode 100644 acceptance/bundle/templates/default-python/serverless-auto-enabled/input.json delete mode 100644 acceptance/bundle/templates/default-python/serverless-auto-enabled/output.txt delete mode 100644 acceptance/bundle/templates/default-python/serverless-auto-enabled/script delete mode 100644 acceptance/bundle/templates/default-python/serverless-auto-enabled/test.toml delete mode 100644 acceptance/bundle/templates/default-python/serverless-auto-error/input.json delete mode 100644 acceptance/bundle/templates/default-python/serverless-auto-error/output.txt delete mode 100644 acceptance/bundle/templates/default-python/serverless-auto-error/script delete mode 100644 acceptance/bundle/templates/default-python/serverless-auto-error/test.toml diff --git a/acceptance/bundle/templates/default-python/serverless-auto-disabled/input.json b/acceptance/bundle/templates/default-python/serverless-auto-disabled/input.json deleted file mode 100644 index 7ccdf8faab..0000000000 --- a/acceptance/bundle/templates/default-python/serverless-auto-disabled/input.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "project_name": "my_default_python", - "include_notebook": "yes", - "include_dlt": "yes", - "include_python": "yes", - "serverless": "auto" -} diff --git a/acceptance/bundle/templates/default-python/serverless-auto-disabled/output.txt b/acceptance/bundle/templates/default-python/serverless-auto-disabled/output.txt deleted file mode 100644 index 060963ac85..0000000000 --- a/acceptance/bundle/templates/default-python/serverless-auto-disabled/output.txt +++ /dev/null @@ -1,11 +0,0 @@ - ->>> [CLI] bundle init default-python --config-file ./input.json --output-dir output - -Welcome to the default Python template for Databricks Asset Bundles! -Warn: Auto-detected that serverless is not enabled for your workspace 900800700600 -Workspace to use (auto-detected, edit in 'my_default_python/databricks.yml'): [DATABRICKS_URL] - -✨ Your new project has been created in the 'my_default_python' directory! - -Please refer to the README.md file for "getting started" instructions. -See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html. diff --git a/acceptance/bundle/templates/default-python/serverless-auto-disabled/script b/acceptance/bundle/templates/default-python/serverless-auto-disabled/script deleted file mode 100644 index ede33c2c9a..0000000000 --- a/acceptance/bundle/templates/default-python/serverless-auto-disabled/script +++ /dev/null @@ -1,7 +0,0 @@ -trace $CLI bundle init default-python --config-file ./input.json --output-dir output -mv output/my_default_python/.gitignore output/my_default_python/out.gitignore - -# serverless is disabled, same output as for classic -diff.py $TESTDIR/../classic/output output/ - -rm -fr output diff --git a/acceptance/bundle/templates/default-python/serverless-auto-disabled/test.toml b/acceptance/bundle/templates/default-python/serverless-auto-disabled/test.toml deleted file mode 100644 index 47d9637881..0000000000 --- a/acceptance/bundle/templates/default-python/serverless-auto-disabled/test.toml +++ /dev/null @@ -1,3 +0,0 @@ -[[Server]] -Pattern = "GET /api/2.0/settings-api/workspace/{workspaceId}/serverless_job_nb" -Response.Body = '{}' diff --git a/acceptance/bundle/templates/default-python/serverless-auto-enabled/input.json b/acceptance/bundle/templates/default-python/serverless-auto-enabled/input.json deleted file mode 100644 index 7ccdf8faab..0000000000 --- a/acceptance/bundle/templates/default-python/serverless-auto-enabled/input.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "project_name": "my_default_python", - "include_notebook": "yes", - "include_dlt": "yes", - "include_python": "yes", - "serverless": "auto" -} diff --git a/acceptance/bundle/templates/default-python/serverless-auto-enabled/output.txt b/acceptance/bundle/templates/default-python/serverless-auto-enabled/output.txt deleted file mode 100644 index 6abf52cf09..0000000000 --- a/acceptance/bundle/templates/default-python/serverless-auto-enabled/output.txt +++ /dev/null @@ -1,10 +0,0 @@ - ->>> [CLI] bundle init default-python --config-file ./input.json --output-dir output - -Welcome to the default Python template for Databricks Asset Bundles! -Workspace to use (auto-detected, edit in 'my_default_python/databricks.yml'): [DATABRICKS_URL] - -✨ Your new project has been created in the 'my_default_python' directory! - -Please refer to the README.md file for "getting started" instructions. -See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html. diff --git a/acceptance/bundle/templates/default-python/serverless-auto-enabled/script b/acceptance/bundle/templates/default-python/serverless-auto-enabled/script deleted file mode 100644 index a8c748002a..0000000000 --- a/acceptance/bundle/templates/default-python/serverless-auto-enabled/script +++ /dev/null @@ -1,7 +0,0 @@ -trace $CLI bundle init default-python --config-file ./input.json --output-dir output -mv output/my_default_python/.gitignore output/my_default_python/out.gitignore - -# serverless is enabled, same output as serverless enabled explicitly: -diff.py $TESTDIR/../serverless/output output/ - -rm -fr output diff --git a/acceptance/bundle/templates/default-python/serverless-auto-enabled/test.toml b/acceptance/bundle/templates/default-python/serverless-auto-enabled/test.toml deleted file mode 100644 index ffb51da950..0000000000 --- a/acceptance/bundle/templates/default-python/serverless-auto-enabled/test.toml +++ /dev/null @@ -1,3 +0,0 @@ -[[Server]] -Pattern = "GET /api/2.0/settings-api/workspace/{workspaceId}/serverless_job_nb" -Response.Body = '''{"setting": {"value": {"preview_enablement_val": {"enabled": true}}}}''' diff --git a/acceptance/bundle/templates/default-python/serverless-auto-error/input.json b/acceptance/bundle/templates/default-python/serverless-auto-error/input.json deleted file mode 100644 index 7ccdf8faab..0000000000 --- a/acceptance/bundle/templates/default-python/serverless-auto-error/input.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "project_name": "my_default_python", - "include_notebook": "yes", - "include_dlt": "yes", - "include_python": "yes", - "serverless": "auto" -} diff --git a/acceptance/bundle/templates/default-python/serverless-auto-error/output.txt b/acceptance/bundle/templates/default-python/serverless-auto-error/output.txt deleted file mode 100644 index 08991d1995..0000000000 --- a/acceptance/bundle/templates/default-python/serverless-auto-error/output.txt +++ /dev/null @@ -1,11 +0,0 @@ - ->>> [CLI] bundle init default-python --config-file ./input.json --output-dir output - -Welcome to the default Python template for Databricks Asset Bundles! -Warn: Failed to detect if serverless is supported: /api/2.0/settings-api/workspace/900800700600/serverless_job_nb failed: Internal Server Error -Workspace to use (auto-detected, edit in 'my_default_python/databricks.yml'): [DATABRICKS_URL] - -✨ Your new project has been created in the 'my_default_python' directory! - -Please refer to the README.md file for "getting started" instructions. -See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html. diff --git a/acceptance/bundle/templates/default-python/serverless-auto-error/script b/acceptance/bundle/templates/default-python/serverless-auto-error/script deleted file mode 100644 index 3354360495..0000000000 --- a/acceptance/bundle/templates/default-python/serverless-auto-error/script +++ /dev/null @@ -1,6 +0,0 @@ -trace $CLI bundle init default-python --config-file ./input.json --output-dir output -mv output/my_default_python/.gitignore output/my_default_python/out.gitignore - -# If serverless is "auto" and there was an error reaching settings-api, we should get the same output as 'classic' -diff.py $TESTDIR/../classic/output output/ -rm -fr output diff --git a/acceptance/bundle/templates/default-python/serverless-auto-error/test.toml b/acceptance/bundle/templates/default-python/serverless-auto-error/test.toml deleted file mode 100644 index f28682b5bf..0000000000 --- a/acceptance/bundle/templates/default-python/serverless-auto-error/test.toml +++ /dev/null @@ -1,3 +0,0 @@ -[[Server]] -Pattern = "GET /api/2.0/settings-api/workspace/{workspaceId}/serverless_job_nb" -Response.StatusCode = 500 From 79f2f3894658aebf5ca4b1a5c500043bdb667509 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Tue, 25 Feb 2025 14:54:41 +0100 Subject: [PATCH 03/11] clean up serverless 'auto' feature --- libs/template/helpers.go | 56 ------------------- .../databricks_template_schema.json | 7 +-- .../resources/{{.project_name}}.job.yml.tmpl | 3 - .../{{.project_name}}.pipeline.yml.tmpl | 3 - 4 files changed, 3 insertions(+), 66 deletions(-) diff --git a/libs/template/helpers.go b/libs/template/helpers.go index 13bd6ceccb..fff7de5802 100644 --- a/libs/template/helpers.go +++ b/libs/template/helpers.go @@ -5,7 +5,6 @@ import ( "errors" "fmt" "math/rand" - "net/http" "net/url" "os" "regexp" @@ -13,10 +12,7 @@ import ( "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/libs/iamutil" - "github.com/databricks/cli/libs/log" - "github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go/apierr" - "github.com/databricks/databricks-sdk-go/client" "github.com/databricks/databricks-sdk-go/service/iam" "github.com/google/uuid" @@ -39,11 +35,8 @@ var ( cachedUser *iam.User cachedIsServicePrincipal *bool cachedCatalog *string - cachedIsServerless *bool ) -const defaultServerlessSupported = false - // UUID that is stable for the duration of the template execution. This can be used // to populate the `bundle.uuid` field in databricks.yml by template authors. // @@ -184,54 +177,5 @@ func loadHelpers(ctx context.Context) template.FuncMap { cachedIsServicePrincipal = &result return result, nil }, - "is_serverless_supported": func() bool { - if cachedIsServerless == nil { - result := isServerlessSupported(ctx, w) - cachedIsServerless = &result - } - return *cachedIsServerless - }, - } -} - -func isServerlessSupported(ctx context.Context, w *databricks.WorkspaceClient) bool { - apiClient, err := client.New(w.Config) - if err != nil { - log.Warnf(ctx, "Failed to detect if serverless is supported: cannot create client: %s", err) - return defaultServerlessSupported - } - - workspaceId, err := w.CurrentWorkspaceID(ctx) - if err != nil { - log.Warnf(ctx, "Failed to detect if serverless is supported: CurrentWorkspaceID() failed: %s", err) - return defaultServerlessSupported } - - apiEndpoint := fmt.Sprintf("/api/2.0/settings-api/workspace/%d/serverless_job_nb", workspaceId) - var response APISettingResponse - err = apiClient.Do( - ctx, - http.MethodGet, - apiEndpoint, - nil, - nil, - nil, - &response, - ) - if err != nil { - log.Warnf(ctx, "Failed to detect if serverless is supported: %s failed: %s", apiEndpoint, err) - return defaultServerlessSupported - } - - log.Debugf(ctx, "Called %s: %#v", apiEndpoint, response) - - isSupported := response.Setting.Value.PreviewEnablementVal.Enabled - - if isSupported { - log.Infof(ctx, "Auto-detected that serverless is enabled for your workspace %d", workspaceId) - } else { - log.Warnf(ctx, "Auto-detected that serverless is not enabled for your workspace %d", workspaceId) - } - - return isSupported } diff --git a/libs/template/templates/default-python/databricks_template_schema.json b/libs/template/templates/default-python/databricks_template_schema.json index 520b23deac..7a77ad3b1a 100644 --- a/libs/template/templates/default-python/databricks_template_schema.json +++ b/libs/template/templates/default-python/databricks_template_schema.json @@ -33,10 +33,9 @@ "serverless": { "type": "string", "default": "no", - "enum": ["yes", "no", "auto"], - "description": "Use serverless compute. If auto, it will be enabled unless it is disabled on workspace level.", - "order": 5, - "skip_prompt_if": {} + "enum": ["yes", "no"], + "description": "Use serverless compute", + "order": 5 } }, "success_message": "Workspace to use (auto-detected, edit in '{{.project_name}}/databricks.yml'): {{workspace_host}}\n\n✨ Your new project has been created in the '{{.project_name}}' directory!\n\nPlease refer to the README.md file for \"getting started\" instructions.\nSee also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html." diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.job.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.job.yml.tmpl index b2300cee7d..22434aa64c 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.job.yml.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.job.yml.tmpl @@ -5,9 +5,6 @@ # This job runs {{.project_name}}_pipeline on a schedule. {{end -}} {{$with_serverless := (eq .serverless "yes") -}} -{{if (eq .serverless "auto") -}} - {{$with_serverless = is_serverless_supported -}} -{{end -}} resources: jobs: diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl index 67eff5b3e4..f740a848b3 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl @@ -1,7 +1,4 @@ {{$with_serverless := (eq .serverless "yes") -}} -{{if (eq .serverless "auto") -}} - {{$with_serverless = is_serverless_supported -}} -{{end -}} # The main pipeline for {{.project_name}} resources: pipelines: From 38f7e2ca0abeaa3599eea5d698c816ae018dd1b7 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Tue, 25 Feb 2025 14:57:41 +0100 Subject: [PATCH 04/11] clean up readme --- acceptance/bundle/templates/default-python/README.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/acceptance/bundle/templates/default-python/README.md b/acceptance/bundle/templates/default-python/README.md index e4f4fd816b..093cbda2dc 100644 --- a/acceptance/bundle/templates/default-python/README.md +++ b/acceptance/bundle/templates/default-python/README.md @@ -1,6 +1,3 @@ The 'serverless' and 'classic' directories contain full tests: they have full output of materialized template, perform "bundle validate" and in the future will perform deploy/summary/run. - -Other directories (serverless-auto-\*) contain short tests: they only do -"bundle init" and then check that the output matches 'serverless' or 'classic' exactly. From 179a247ef07e06066d1b20c95858166882a5e76c Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Tue, 25 Feb 2025 15:07:09 +0100 Subject: [PATCH 05/11] clean up --- acceptance/bundle/templates/default-python/README.md | 3 --- libs/template/helpers.go | 10 ---------- 2 files changed, 13 deletions(-) delete mode 100644 acceptance/bundle/templates/default-python/README.md diff --git a/acceptance/bundle/templates/default-python/README.md b/acceptance/bundle/templates/default-python/README.md deleted file mode 100644 index 093cbda2dc..0000000000 --- a/acceptance/bundle/templates/default-python/README.md +++ /dev/null @@ -1,3 +0,0 @@ -The 'serverless' and 'classic' directories contain full tests: they -have full output of materialized template, perform "bundle validate" -and in the future will perform deploy/summary/run. diff --git a/libs/template/helpers.go b/libs/template/helpers.go index fff7de5802..4550e5fa2b 100644 --- a/libs/template/helpers.go +++ b/libs/template/helpers.go @@ -44,16 +44,6 @@ var ( // is run and can be used to attribute DBU revenue to bundle templates. var bundleUuid = uuid.New().String() -type APISettingResponse struct { - Setting struct { - Value struct { - PreviewEnablementVal struct { - Enabled bool `json:"enabled"` - } `json:"preview_enablement_val"` - } `json:"value"` - } `json:"setting"` -} - func loadHelpers(ctx context.Context) template.FuncMap { w := root.WorkspaceClient(ctx) return template.FuncMap{ From ee43de51495a931233f9df4fbb4c60ab5da2f250 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Wed, 26 Feb 2025 09:51:48 +0100 Subject: [PATCH 06/11] Remove full output in classic case, only keep the diff --- .../.vscode/__builtins__.pyi | 3 - .../my_default_python/.vscode/extensions.json | 7 -- .../my_default_python/.vscode/settings.json | 16 ---- .../output/my_default_python/README.md | 49 ---------- .../output/my_default_python/databricks.yml | 29 ------ .../my_default_python/fixtures/.gitkeep | 22 ----- .../output/my_default_python/out.gitignore | 8 -- .../output/my_default_python/pytest.ini | 3 - .../my_default_python/requirements-dev.txt | 29 ------ .../resources/my_default_python.job.yml | 50 ----------- .../resources/my_default_python.pipeline.yml | 13 --- .../my_default_python/scratch/README.md | 4 - .../scratch/exploration.ipynb | 61 ------------- .../classic/output/my_default_python/setup.py | 41 --------- .../my_default_python/src/dlt_pipeline.ipynb | 90 ------------------- .../src/my_default_python/__init__.py | 1 - .../src/my_default_python/main.py | 25 ------ .../my_default_python/src/notebook.ipynb | 75 ---------------- .../my_default_python/tests/main_test.py | 6 -- .../templates/default-python/classic/script | 2 + 20 files changed, 2 insertions(+), 532 deletions(-) delete mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/__builtins__.pyi delete mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/extensions.json delete mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/settings.json delete mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/README.md delete mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/databricks.yml delete mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/fixtures/.gitkeep delete mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/out.gitignore delete mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/pytest.ini delete mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/requirements-dev.txt delete mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/my_default_python.job.yml delete mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/my_default_python.pipeline.yml delete mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/scratch/README.md delete mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/scratch/exploration.ipynb delete mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/setup.py delete mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/src/dlt_pipeline.ipynb delete mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/src/my_default_python/__init__.py delete mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/src/my_default_python/main.py delete mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/src/notebook.ipynb delete mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/tests/main_test.py diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/__builtins__.pyi b/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/__builtins__.pyi deleted file mode 100644 index 0edd5181bc..0000000000 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/__builtins__.pyi +++ /dev/null @@ -1,3 +0,0 @@ -# Typings for Pylance in Visual Studio Code -# see https://github.com/microsoft/pyright/blob/main/docs/builtins.md -from databricks.sdk.runtime import * diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/extensions.json b/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/extensions.json deleted file mode 100644 index 5d15eba363..0000000000 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/extensions.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "recommendations": [ - "databricks.databricks", - "ms-python.vscode-pylance", - "redhat.vscode-yaml" - ] -} diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/settings.json b/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/settings.json deleted file mode 100644 index 8ee87c30d4..0000000000 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/settings.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "python.analysis.stubPath": ".vscode", - "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])", - "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------", - "python.testing.pytestArgs": [ - "." - ], - "python.testing.unittestEnabled": false, - "python.testing.pytestEnabled": true, - "python.analysis.extraPaths": ["src"], - "files.exclude": { - "**/*.egg-info": true, - "**/__pycache__": true, - ".pytest_cache": true, - }, -} diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/README.md b/acceptance/bundle/templates/default-python/classic/output/my_default_python/README.md deleted file mode 100644 index 10f570bf46..0000000000 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/README.md +++ /dev/null @@ -1,49 +0,0 @@ -# my_default_python - -The 'my_default_python' project was generated by using the default-python template. - -## Getting started - -1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html - -2. Authenticate to your Databricks workspace, if you have not done so already: - ``` - $ databricks configure - ``` - -3. To deploy a development copy of this project, type: - ``` - $ databricks bundle deploy --target dev - ``` - (Note that "dev" is the default target, so the `--target` parameter - is optional here.) - - This deploys everything that's defined for this project. - For example, the default template would deploy a job called - `[dev yourname] my_default_python_job` to your workspace. - You can find that job by opening your workpace and clicking on **Workflows**. - -4. Similarly, to deploy a production copy, type: - ``` - $ databricks bundle deploy --target prod - ``` - - Note that the default job from the template has a schedule that runs every day - (defined in resources/my_default_python.job.yml). The schedule - is paused when deploying in development mode (see - https://docs.databricks.com/dev-tools/bundles/deployment-modes.html). - -5. To run a job or pipeline, use the "run" command: - ``` - $ databricks bundle run - ``` -6. Optionally, install the Databricks extension for Visual Studio code for local development from - https://docs.databricks.com/dev-tools/vscode-ext.html. It can configure your - virtual environment and setup Databricks Connect for running unit tests locally. - When not using these tools, consult your development environment's documentation - and/or the documentation for Databricks Connect for manually setting up your environment - (https://docs.databricks.com/en/dev-tools/databricks-connect/python/index.html). - -7. For documentation on the Databricks asset bundles format used - for this project, and for CI/CD configuration, see - https://docs.databricks.com/dev-tools/bundles/index.html. diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/databricks.yml b/acceptance/bundle/templates/default-python/classic/output/my_default_python/databricks.yml deleted file mode 100644 index 6080a368f9..0000000000 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/databricks.yml +++ /dev/null @@ -1,29 +0,0 @@ -# This is a Databricks asset bundle definition for my_default_python. -# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. -bundle: - name: my_default_python - uuid: [UUID] - -include: - - resources/*.yml - -targets: - dev: - # The default target uses 'mode: development' to create a development copy. - # - Deployed resources get prefixed with '[dev my_user_name]' - # - Any job schedules and triggers are paused by default. - # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. - mode: development - default: true - workspace: - host: [DATABRICKS_URL] - - prod: - mode: production - workspace: - host: [DATABRICKS_URL] - # We explicitly deploy to /Workspace/Users/[USERNAME] to make sure we only have a single copy. - root_path: /Workspace/Users/[USERNAME]/.bundle/${bundle.name}/${bundle.target} - permissions: - - user_name: [USERNAME] - level: CAN_MANAGE diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/fixtures/.gitkeep b/acceptance/bundle/templates/default-python/classic/output/my_default_python/fixtures/.gitkeep deleted file mode 100644 index fa25d2745e..0000000000 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/fixtures/.gitkeep +++ /dev/null @@ -1,22 +0,0 @@ -# Fixtures - -This folder is reserved for fixtures, such as CSV files. - -Below is an example of how to load fixtures as a data frame: - -``` -import pandas as pd -import os - -def get_absolute_path(*relative_parts): - if 'dbutils' in globals(): - base_dir = os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) # type: ignore - path = os.path.normpath(os.path.join(base_dir, *relative_parts)) - return path if path.startswith("/Workspace") else "/Workspace" + path - else: - return os.path.join(*relative_parts) - -csv_file = get_absolute_path("..", "fixtures", "mycsv.csv") -df = pd.read_csv(csv_file) -display(df) -``` diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/out.gitignore b/acceptance/bundle/templates/default-python/classic/output/my_default_python/out.gitignore deleted file mode 100644 index 0dab7f4995..0000000000 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/out.gitignore +++ /dev/null @@ -1,8 +0,0 @@ -.databricks/ -build/ -dist/ -__pycache__/ -*.egg-info -.venv/ -scratch/** -!scratch/README.md diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/pytest.ini b/acceptance/bundle/templates/default-python/classic/output/my_default_python/pytest.ini deleted file mode 100644 index 80432c2203..0000000000 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/pytest.ini +++ /dev/null @@ -1,3 +0,0 @@ -[pytest] -testpaths = tests -pythonpath = src diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/requirements-dev.txt b/acceptance/bundle/templates/default-python/classic/output/my_default_python/requirements-dev.txt deleted file mode 100644 index 0ffbf6aed0..0000000000 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/requirements-dev.txt +++ /dev/null @@ -1,29 +0,0 @@ -## requirements-dev.txt: dependencies for local development. -## -## For defining dependencies used by jobs in Databricks Workflows, see -## https://docs.databricks.com/dev-tools/bundles/library-dependencies.html - -## Add code completion support for DLT -databricks-dlt - -## pytest is the default package used for testing -pytest - -## Dependencies for building wheel files -setuptools -wheel - -## databricks-connect can be used to run parts of this project locally. -## See https://docs.databricks.com/dev-tools/databricks-connect.html. -## -## databricks-connect is automatically installed if you're using Databricks -## extension for Visual Studio Code -## (https://docs.databricks.com/dev-tools/vscode-ext/dev-tasks/databricks-connect.html). -## -## To manually install databricks-connect, either follow the instructions -## at https://docs.databricks.com/dev-tools/databricks-connect.html -## to install the package system-wide. Or uncomment the line below to install a -## version of db-connect that corresponds to the Databricks Runtime version used -## for this project. -# -# databricks-connect>=15.4,<15.5 diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/my_default_python.job.yml b/acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/my_default_python.job.yml deleted file mode 100644 index 7c11e143f4..0000000000 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/my_default_python.job.yml +++ /dev/null @@ -1,50 +0,0 @@ -# The main job for my_default_python. -resources: - jobs: - my_default_python_job: - name: my_default_python_job - - trigger: - # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger - periodic: - interval: 1 - unit: DAYS - - email_notifications: - on_failure: - - [USERNAME] - - tasks: - - task_key: notebook_task - job_cluster_key: job_cluster - notebook_task: - notebook_path: ../src/notebook.ipynb - - - task_key: refresh_pipeline - depends_on: - - task_key: notebook_task - pipeline_task: - pipeline_id: ${resources.pipelines.my_default_python_pipeline.id} - - - task_key: main_task - depends_on: - - task_key: refresh_pipeline - job_cluster_key: job_cluster - python_wheel_task: - package_name: my_default_python - entry_point: main - libraries: - # By default we just include the .whl file generated for the my_default_python package. - # See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html - # for more information on how to add other libraries. - - whl: ../dist/*.whl - - job_clusters: - - job_cluster_key: job_cluster - new_cluster: - spark_version: 15.4.x-scala2.12 - node_type_id: i3.xlarge - data_security_mode: SINGLE_USER - autoscale: - min_workers: 1 - max_workers: 4 diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/my_default_python.pipeline.yml b/acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/my_default_python.pipeline.yml deleted file mode 100644 index f9e083f4fb..0000000000 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/my_default_python.pipeline.yml +++ /dev/null @@ -1,13 +0,0 @@ -# The main pipeline for my_default_python -resources: - pipelines: - my_default_python_pipeline: - name: my_default_python_pipeline - catalog: main - target: my_default_python_${bundle.target} - libraries: - - notebook: - path: ../src/dlt_pipeline.ipynb - - configuration: - bundle.sourcePath: ${workspace.file_path}/src diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/scratch/README.md b/acceptance/bundle/templates/default-python/classic/output/my_default_python/scratch/README.md deleted file mode 100644 index e6cfb81b46..0000000000 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/scratch/README.md +++ /dev/null @@ -1,4 +0,0 @@ -# scratch - -This folder is reserved for personal, exploratory notebooks. -By default these are not committed to Git, as 'scratch' is listed in .gitignore. diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/scratch/exploration.ipynb b/acceptance/bundle/templates/default-python/classic/output/my_default_python/scratch/exploration.ipynb deleted file mode 100644 index a12773d4e8..0000000000 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/scratch/exploration.ipynb +++ /dev/null @@ -1,61 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": { - "byteLimit": 2048000, - "rowLimit": 10000 - }, - "inputWidgets": {}, - "nuid": "[UUID]", - "showTitle": false, - "title": "" - } - }, - "outputs": [], - "source": [ - "import sys\n", - "\n", - "sys.path.append(\"../src\")\n", - "from my_default_python import main\n", - "\n", - "main.get_taxis(spark).show(10)" - ] - } - ], - "metadata": { - "application/vnd.databricks.v1+notebook": { - "dashboards": [], - "language": "python", - "notebookMetadata": { - "pythonIndentUnit": 2 - }, - "notebookName": "ipynb-notebook", - "widgets": {} - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3.11.4" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/setup.py b/acceptance/bundle/templates/default-python/classic/output/my_default_python/setup.py deleted file mode 100644 index 548f1035eb..0000000000 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/setup.py +++ /dev/null @@ -1,41 +0,0 @@ -""" -setup.py configuration script describing how to build and package this project. - -This file is primarily used by the setuptools library and typically should not -be executed directly. See README.md for how to deploy, test, and run -the my_default_python project. -""" - -from setuptools import setup, find_packages - -import sys - -sys.path.append("./src") - -import datetime -import my_default_python - -local_version = datetime.datetime.utcnow().strftime("%Y%m%d.%H%M%S") - -setup( - name="my_default_python", - # We use timestamp as Local version identifier (https://peps.python.org/pep-0440/#local-version-identifiers.) - # to ensure that changes to wheel package are picked up when used on all-purpose clusters - version=my_default_python.__version__ + "+" + local_version, - url="https://databricks.com", - author="[USERNAME]", - description="wheel file based on my_default_python/src", - packages=find_packages(where="./src"), - package_dir={"": "src"}, - entry_points={ - "packages": [ - "main=my_default_python.main:main", - ], - }, - install_requires=[ - # Dependencies in case the output wheel file is used as a library dependency. - # For defining dependencies, when this package is used in Databricks, see: - # https://docs.databricks.com/dev-tools/bundles/library-dependencies.html - "setuptools" - ], -) diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/dlt_pipeline.ipynb b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/dlt_pipeline.ipynb deleted file mode 100644 index 8a02183e75..0000000000 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/dlt_pipeline.ipynb +++ /dev/null @@ -1,90 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": {}, - "inputWidgets": {}, - "nuid": "[UUID]", - "showTitle": false, - "title": "" - } - }, - "source": [ - "# DLT pipeline\n", - "\n", - "This Delta Live Tables (DLT) definition is executed using a pipeline defined in resources/my_default_python.pipeline.yml." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": {}, - "inputWidgets": {}, - "nuid": "[UUID]", - "showTitle": false, - "title": "" - } - }, - "outputs": [], - "source": [ - "# Import DLT and src/my_default_python\n", - "import dlt\n", - "import sys\n", - "\n", - "sys.path.append(spark.conf.get(\"bundle.sourcePath\", \".\"))\n", - "from pyspark.sql.functions import expr\n", - "from my_default_python import main" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": {}, - "inputWidgets": {}, - "nuid": "[UUID]", - "showTitle": false, - "title": "" - } - }, - "outputs": [], - "source": [ - "@dlt.view\n", - "def taxi_raw():\n", - " return main.get_taxis(spark)\n", - "\n", - "\n", - "@dlt.table\n", - "def filtered_taxis():\n", - " return dlt.read(\"taxi_raw\").filter(expr(\"fare_amount < 30\"))" - ] - } - ], - "metadata": { - "application/vnd.databricks.v1+notebook": { - "dashboards": [], - "language": "python", - "notebookMetadata": { - "pythonIndentUnit": 2 - }, - "notebookName": "dlt_pipeline", - "widgets": {} - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3.11.4" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/my_default_python/__init__.py b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/my_default_python/__init__.py deleted file mode 100644 index f102a9cadf..0000000000 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/my_default_python/__init__.py +++ /dev/null @@ -1 +0,0 @@ -__version__ = "0.0.1" diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/my_default_python/main.py b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/my_default_python/main.py deleted file mode 100644 index 5ae344c7e2..0000000000 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/my_default_python/main.py +++ /dev/null @@ -1,25 +0,0 @@ -from pyspark.sql import SparkSession, DataFrame - - -def get_taxis(spark: SparkSession) -> DataFrame: - return spark.read.table("samples.nyctaxi.trips") - - -# Create a new Databricks Connect session. If this fails, -# check that you have configured Databricks Connect correctly. -# See https://docs.databricks.com/dev-tools/databricks-connect.html. -def get_spark() -> SparkSession: - try: - from databricks.connect import DatabricksSession - - return DatabricksSession.builder.getOrCreate() - except ImportError: - return SparkSession.builder.getOrCreate() - - -def main(): - get_taxis(get_spark()).show(5) - - -if __name__ == "__main__": - main() diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/notebook.ipynb b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/notebook.ipynb deleted file mode 100644 index 472ccb2190..0000000000 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/notebook.ipynb +++ /dev/null @@ -1,75 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": {}, - "inputWidgets": {}, - "nuid": "[UUID]", - "showTitle": false, - "title": "" - } - }, - "source": [ - "# Default notebook\n", - "\n", - "This default notebook is executed using Databricks Workflows as defined in resources/my_default_python.job.yml." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": { - "byteLimit": 2048000, - "rowLimit": 10000 - }, - "inputWidgets": {}, - "nuid": "[UUID]", - "showTitle": false, - "title": "" - } - }, - "outputs": [], - "source": [ - "from my_default_python import main\n", - "\n", - "main.get_taxis(spark).show(10)" - ] - } - ], - "metadata": { - "application/vnd.databricks.v1+notebook": { - "dashboards": [], - "language": "python", - "notebookMetadata": { - "pythonIndentUnit": 2 - }, - "notebookName": "notebook", - "widgets": {} - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3.11.4" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/tests/main_test.py b/acceptance/bundle/templates/default-python/classic/output/my_default_python/tests/main_test.py deleted file mode 100644 index dc449154a6..0000000000 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/tests/main_test.py +++ /dev/null @@ -1,6 +0,0 @@ -from my_default_python.main import get_taxis, get_spark - - -def test_main(): - taxis = get_taxis(get_spark()) - assert taxis.count() > 5 diff --git a/acceptance/bundle/templates/default-python/classic/script b/acceptance/bundle/templates/default-python/classic/script index 7e5524065a..074f262f1a 100644 --- a/acceptance/bundle/templates/default-python/classic/script +++ b/acceptance/bundle/templates/default-python/classic/script @@ -11,3 +11,5 @@ cd ../../ # Calculate the difference from the serverless template diff.py $TESTDIR/../serverless/output output/ > out.compare-vs-serverless.diff + +rm -fr output From 0b90f97b7f92d9a1520f0b53f29d25227e46b711 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Wed, 26 Feb 2025 10:27:42 +0100 Subject: [PATCH 07/11] clean up unnecessary stub --- acceptance/bundle/templates/test.toml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/acceptance/bundle/templates/test.toml b/acceptance/bundle/templates/test.toml index af66647f58..90539263de 100644 --- a/acceptance/bundle/templates/test.toml +++ b/acceptance/bundle/templates/test.toml @@ -1,6 +1,2 @@ # At the moment, there are many differences across different envs w.r.t to catalog use, node type and so on. LocalOnly = true - -[[Server]] -Pattern = "GET /api/2.0/settings-api/workspace/{workspaceId}/serverless_job_nb" -Response.Body = '''{"setting": {"value": {"preview_enablement_val": {"enabled": true}}}}''' From 54781f97394382dcc3980a66b0f0b72504927b8b Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Wed, 26 Feb 2025 12:15:36 +0100 Subject: [PATCH 08/11] Make serverless use 'main' catalog if it's not provided by API --- .../serverless-customcatalog/output.txt | 21 +++++++++++++++++++ .../serverless-customcatalog/script | 4 ++++ .../serverless-customcatalog/test.toml | 3 +++ acceptance/server_test.go | 10 ++++++--- .../{{.project_name}}.pipeline.yml.tmpl | 6 +++++- 5 files changed, 40 insertions(+), 4 deletions(-) create mode 100644 acceptance/bundle/templates/default-python/serverless-customcatalog/output.txt create mode 100644 acceptance/bundle/templates/default-python/serverless-customcatalog/script create mode 100644 acceptance/bundle/templates/default-python/serverless-customcatalog/test.toml diff --git a/acceptance/bundle/templates/default-python/serverless-customcatalog/output.txt b/acceptance/bundle/templates/default-python/serverless-customcatalog/output.txt new file mode 100644 index 0000000000..e5b3d905a0 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless-customcatalog/output.txt @@ -0,0 +1,21 @@ + +>>> [CLI] bundle init default-python --config-file [TESTROOT]/bundle/templates/default-python/serverless-customcatalog/../serverless/input.json --output-dir output + +Welcome to the default Python template for Databricks Asset Bundles! +Workspace to use (auto-detected, edit in 'my_default_python/databricks.yml'): [DATABRICKS_URL] + +✨ Your new project has been created in the 'my_default_python' directory! + +Please refer to the README.md file for "getting started" instructions. +See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html. + +>>> diff.py [TESTROOT]/bundle/templates/default-python/serverless-customcatalog/../serverless/output output/ +--- [TESTROOT]/bundle/templates/default-python/serverless-customcatalog/../serverless/output/my_default_python/resources/my_default_python.pipeline.yml ++++ output/my_default_python/resources/my_default_python.pipeline.yml +@@ -4,5 +4,5 @@ + my_default_python_pipeline: + name: my_default_python_pipeline +- catalog: main ++ catalog: customcatalog + target: my_default_python_${bundle.target} + serverless: true diff --git a/acceptance/bundle/templates/default-python/serverless-customcatalog/script b/acceptance/bundle/templates/default-python/serverless-customcatalog/script new file mode 100644 index 0000000000..2d1597c81f --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless-customcatalog/script @@ -0,0 +1,4 @@ +trace $CLI bundle init default-python --config-file $TESTDIR/../serverless/input.json --output-dir output +mv output/my_default_python/.gitignore output/my_default_python/out.gitignore +trace diff.py $TESTDIR/../serverless/output output/ +rm -fr output diff --git a/acceptance/bundle/templates/default-python/serverless-customcatalog/test.toml b/acceptance/bundle/templates/default-python/serverless-customcatalog/test.toml new file mode 100644 index 0000000000..7d4ab30653 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless-customcatalog/test.toml @@ -0,0 +1,3 @@ +[[Server]] +Pattern = "GET /api/2.1/unity-catalog/current-metastore-assignment" +Response.Body = '{"default_catalog_name": "customcatalog"}' diff --git a/acceptance/server_test.go b/acceptance/server_test.go index 4fc3108d20..402e3ca5f0 100644 --- a/acceptance/server_test.go +++ b/acceptance/server_test.go @@ -20,6 +20,12 @@ var testUser = iam.User{ UserName: "tester@databricks.com", } +var testMetastore = catalog.MetastoreAssignment{ + DefaultCatalogName: "hive_metastore", + MetastoreId: "120efa64-9b68-46ba-be38-f319458430d2", + WorkspaceId: 470123456789500, +} + func AddHandlers(server *testserver.Server) { server.Handle("GET", "/api/2.0/policies/clusters/list", func(req testserver.Request) any { return compute.ListPoliciesResponse{ @@ -106,9 +112,7 @@ func AddHandlers(server *testserver.Server) { }) server.Handle("GET", "/api/2.1/unity-catalog/current-metastore-assignment", func(req testserver.Request) any { - return catalog.MetastoreAssignment{ - DefaultCatalogName: "main", - } + return testMetastore }) server.Handle("GET", "/api/2.0/permissions/directories/{objectId}", func(req testserver.Request) any { diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl index f740a848b3..c8dd4c0a23 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl @@ -5,9 +5,13 @@ resources: {{.project_name}}_pipeline: name: {{.project_name}}_pipeline {{- if or (eq default_catalog "") (eq default_catalog "hive_metastore")}} + {{- if $with_serverless }} + ## Catalog is required for serverless compute + catalog: main + {{else}} ## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog: # catalog: catalog_name - {{- else}} + {{end}}{{- else}} catalog: {{default_catalog}} {{- end}} target: {{.project_name}}_${bundle.target} From 6f5dc05d3aef4b4e54ea9e6945fc82c40419f4bf Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Wed, 26 Feb 2025 12:20:53 +0100 Subject: [PATCH 09/11] fix whitespace & update tests --- .../classic/out.compare-vs-serverless.diff | 9 +++++++-- .../default-python/serverless-customcatalog/output.txt | 3 ++- .../resources/my_default_python.pipeline.yml | 1 + .../resources/{{.project_name}}.pipeline.yml.tmpl | 7 +++---- 4 files changed, 13 insertions(+), 7 deletions(-) diff --git a/acceptance/bundle/templates/default-python/classic/out.compare-vs-serverless.diff b/acceptance/bundle/templates/default-python/classic/out.compare-vs-serverless.diff index 6f5b0a7f40..4c4eac05d3 100644 --- a/acceptance/bundle/templates/default-python/classic/out.compare-vs-serverless.diff +++ b/acceptance/bundle/templates/default-python/classic/out.compare-vs-serverless.diff @@ -40,8 +40,13 @@ + max_workers: 4 --- [TESTROOT]/bundle/templates/default-python/classic/../serverless/output/my_default_python/resources/my_default_python.pipeline.yml +++ output/my_default_python/resources/my_default_python.pipeline.yml -@@ -6,5 +6,4 @@ - catalog: main +@@ -4,8 +4,7 @@ + my_default_python_pipeline: + name: my_default_python_pipeline +- ## Catalog is required for serverless compute +- catalog: main ++ ## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog: ++ # catalog: catalog_name target: my_default_python_${bundle.target} - serverless: true libraries: diff --git a/acceptance/bundle/templates/default-python/serverless-customcatalog/output.txt b/acceptance/bundle/templates/default-python/serverless-customcatalog/output.txt index e5b3d905a0..30726013bf 100644 --- a/acceptance/bundle/templates/default-python/serverless-customcatalog/output.txt +++ b/acceptance/bundle/templates/default-python/serverless-customcatalog/output.txt @@ -12,9 +12,10 @@ See also the documentation at https://docs.databricks.com/dev-tools/bundles/inde >>> diff.py [TESTROOT]/bundle/templates/default-python/serverless-customcatalog/../serverless/output output/ --- [TESTROOT]/bundle/templates/default-python/serverless-customcatalog/../serverless/output/my_default_python/resources/my_default_python.pipeline.yml +++ output/my_default_python/resources/my_default_python.pipeline.yml -@@ -4,5 +4,5 @@ +@@ -4,6 +4,5 @@ my_default_python_pipeline: name: my_default_python_pipeline +- ## Catalog is required for serverless compute - catalog: main + catalog: customcatalog target: my_default_python_${bundle.target} diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/resources/my_default_python.pipeline.yml b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/resources/my_default_python.pipeline.yml index 4f880ba485..6dac62ded7 100644 --- a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/resources/my_default_python.pipeline.yml +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/resources/my_default_python.pipeline.yml @@ -3,6 +3,7 @@ resources: pipelines: my_default_python_pipeline: name: my_default_python_pipeline + ## Catalog is required for serverless compute catalog: main target: my_default_python_${bundle.target} serverless: true diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl index c8dd4c0a23..024c1ab15b 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl @@ -7,11 +7,10 @@ resources: {{- if or (eq default_catalog "") (eq default_catalog "hive_metastore")}} {{- if $with_serverless }} ## Catalog is required for serverless compute - catalog: main - {{else}} + catalog: main{{else}} ## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog: - # catalog: catalog_name - {{end}}{{- else}} + # catalog: catalog_name{{end}} + {{- else}} catalog: {{default_catalog}} {{- end}} target: {{.project_name}}_${bundle.target} From f8a1b6f1000200256eeeffa0927a3b92df6ab5ee Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Wed, 26 Feb 2025 12:30:39 +0100 Subject: [PATCH 10/11] fix windows --- .../default-python/serverless-customcatalog/test.toml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/acceptance/bundle/templates/default-python/serverless-customcatalog/test.toml b/acceptance/bundle/templates/default-python/serverless-customcatalog/test.toml index 7d4ab30653..4029057bea 100644 --- a/acceptance/bundle/templates/default-python/serverless-customcatalog/test.toml +++ b/acceptance/bundle/templates/default-python/serverless-customcatalog/test.toml @@ -1,3 +1,8 @@ [[Server]] Pattern = "GET /api/2.1/unity-catalog/current-metastore-assignment" Response.Body = '{"default_catalog_name": "customcatalog"}' + +[[Repls]] +# windows fix +Old = '\\' +New = '/' From 48ebd08fab7d86783a425af58a52be1f83cf59c9 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Wed, 26 Feb 2025 13:35:58 +0100 Subject: [PATCH 11/11] rebase and update tests --- .../default-python/classic/out.compare-vs-serverless.diff | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/acceptance/bundle/templates/default-python/classic/out.compare-vs-serverless.diff b/acceptance/bundle/templates/default-python/classic/out.compare-vs-serverless.diff index 4c4eac05d3..6890badf0b 100644 --- a/acceptance/bundle/templates/default-python/classic/out.compare-vs-serverless.diff +++ b/acceptance/bundle/templates/default-python/classic/out.compare-vs-serverless.diff @@ -6,7 +6,7 @@ + job_cluster_key: job_cluster notebook_task: notebook_path: ../src/notebook.ipynb -@@ -29,17 +30,20 @@ +@@ -29,17 +30,21 @@ depends_on: - task_key: refresh_pipeline - environment_key: default @@ -35,6 +35,7 @@ + new_cluster: + spark_version: 15.4.x-scala2.12 + node_type_id: i3.xlarge ++ data_security_mode: SINGLE_USER + autoscale: + min_workers: 1 + max_workers: 4