From 71c623e77ddbc92faf1230be07c07be91192d189 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Mon, 14 Jul 2025 21:28:56 +0200 Subject: [PATCH 01/21] Add basic configuration --- .../default-python/library/versions.tmpl | 12 ++++- .../template/{{.project_name}}/README.md.tmpl | 54 ++++++++++--------- .../{{.project_name}}/pyproject.toml.tmpl | 16 ++---- 3 files changed, 46 insertions(+), 36 deletions(-) diff --git a/libs/template/templates/default-python/library/versions.tmpl b/libs/template/templates/default-python/library/versions.tmpl index 7d0c88e7df..697040ca10 100644 --- a/libs/template/templates/default-python/library/versions.tmpl +++ b/libs/template/templates/default-python/library/versions.tmpl @@ -1,7 +1,17 @@ +{{/* The latest LTS DBR version; this should be updated a few months after each LTS. + */}} {{define "latest_lts_dbr_version" -}} 15.4.x-scala2.12 {{- end}} -{{define "latest_lts_db_connect_version_spec" -}} +{{/* A safe version of DB Connect that is compatible with at least half the + * clusters running in production. + * + * We need to be very conservative in updating this, since a newer version can + * only connect to compute of that same version and higher. If the version is + * deemed too old, customers can update the version themselves after initializing + * the template. + */}} +{{define "conservative_db_connect_version_spec" -}} >=15.4,<15.5 {{- end}} diff --git a/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl index bc4fe07b54..a5a132335b 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl @@ -2,18 +2,40 @@ The '{{.project_name}}' project was generated by using the default-python template. +For documentation on the Databricks asset bundles format use for this project, +and for CI/CD configuration, see https://docs.databricks.com/aws/en/dev-tools/bundles. + ## Getting started -0. Install UV: https://docs.astral.sh/uv/getting-started/installation/ +Choose how you want to work on this project: + +(a) Directly in your Databricks workspace, see + https://docs.databricks.com/dev-tools/bundles/workspace. + +(b) Locally with an IDE like Cursor, VS Code, or PyCharm, see + https://docs.databricks.com/vscode-ext and https://www.databricks.com/blog/announcing-pycharm-integration-databricks. -1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html +(c) With command line tools, see https://docs.databricks.com/dev-tools/cli/databricks-cli.html + +{{- if (eq .include_python "yes") }} +Dependencies for this project should be installed using UV: + +* Make sure you have the UV package manager installed. + It's an alternative to tools like pip: https://docs.astral.sh/uv/getting-started/installation/. +* Run `uv sync --dev` to install the project's dependencies. +{{- end}} -2. Authenticate to your Databricks workspace, if you have not done so already: +# Using this project with the Databricks CLI + +The Databricks workspace and IDE extensions provide a graphical interface for working +with this project. It's also possible to interact with it directly using the CLI: + +1. Authenticate to your Databricks workspace, if you have not done so already: ``` - $ databricks configure + $ databricks auth login ``` -3. To deploy a development copy of this project, type: +2. To deploy a development copy of this project, type: ``` $ databricks bundle deploy --target dev ``` @@ -23,9 +45,9 @@ The '{{.project_name}}' project was generated by using the default-python templa This deploys everything that's defined for this project. For example, the default template would deploy a job called `[dev yourname] {{.project_name}}_job` to your workspace. - You can find that job by opening your workpace and clicking on **Workflows**. + You can find that job by opening your workpace and clicking on **Jobs & Pipelines**. -4. Similarly, to deploy a production copy, type: +3. Similarly, to deploy a production copy, type: ``` $ databricks bundle deploy --target prod ``` @@ -35,23 +57,7 @@ The '{{.project_name}}' project was generated by using the default-python templa is paused when deploying in development mode (see https://docs.databricks.com/dev-tools/bundles/deployment-modes.html). -5. To run a job or pipeline, use the "run" command: +4. To run a job or pipeline, use the "run" command: ``` $ databricks bundle run ``` - -{{- if (eq .include_python "no") }} -6. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from - https://docs.databricks.com/dev-tools/vscode-ext.html. -{{- else }} -6. Optionally, install the Databricks extension for Visual Studio code for local development from - https://docs.databricks.com/dev-tools/vscode-ext.html. It can configure your - virtual environment and setup Databricks Connect for running unit tests locally. - When not using these tools, consult your development environment's documentation - and/or the documentation for Databricks Connect for manually setting up your environment - (https://docs.databricks.com/en/dev-tools/databricks-connect/python/index.html). -{{- end}} - -7. For documentation on the Databricks asset bundles format used - for this project, and for CI/CD configuration, see - https://docs.databricks.com/dev-tools/bundles/index.html. diff --git a/libs/template/templates/default-python/template/{{.project_name}}/pyproject.toml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/pyproject.toml.tmpl index 5c06441fe8..2d856d1738 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/pyproject.toml.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/pyproject.toml.tmpl @@ -4,7 +4,7 @@ version = "0.0.1" authors = [{ name = "{{user_name}}" }] requires-python = ">= 3.11" -[project.optional-dependencies] +[dependency-groups] dev = [ "pytest", @@ -12,16 +12,10 @@ dev = [ "databricks-dlt", # databricks-connect can be used to run parts of this project locally. - # See https://docs.databricks.com/dev-tools/databricks-connect.html. - # - # Note, databricks-connect is automatically installed if you're using Databricks - # extension for Visual Studio Code - # (https://docs.databricks.com/dev-tools/vscode-ext/dev-tasks/databricks-connect.html). - # - # To manually install databricks-connect, uncomment the line below to install a version - # of db-connect that corresponds to the Databricks Runtime version used for this project. - # See https://docs.databricks.com/dev-tools/databricks-connect.html - # "databricks-connect{{template "latest_lts_db_connect_version_spec"}}", + # Note that for local development, you should use a version that is not newer + # than the remote cluster or serverless compute you connect to. + # See also https://docs.databricks.com/dev-tools/databricks-connect.html. + "databricks-connect{{template "conservative_db_connect_version_spec"}}", ] [tool.pytest.ini_options] From 4fbf017254c311bd4277c3d56dd0b63c418c68e0 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Mon, 14 Jul 2025 21:34:53 +0200 Subject: [PATCH 02/21] Cleanup test --- .../src/{{.project_name}}/main.py.tmpl | 20 ++++--------------- .../{{.project_name}}/tests/main_test.py.tmpl | 7 +++---- 2 files changed, 7 insertions(+), 20 deletions(-) diff --git a/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl index 5ae344c7e2..e85c3ffd61 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl @@ -1,24 +1,12 @@ -from pyspark.sql import SparkSession, DataFrame +from utilities import utils +from databricks.sdk.runtime import spark -def get_taxis(spark: SparkSession) -> DataFrame: - return spark.read.table("samples.nyctaxi.trips") - - -# Create a new Databricks Connect session. If this fails, -# check that you have configured Databricks Connect correctly. -# See https://docs.databricks.com/dev-tools/databricks-connect.html. -def get_spark() -> SparkSession: - try: - from databricks.connect import DatabricksSession - - return DatabricksSession.builder.getOrCreate() - except ImportError: - return SparkSession.builder.getOrCreate() +# This sample module is executed using a job as defined in resources/{{.project_name}}.job.yml. def main(): - get_taxis(get_spark()).show(5) + utils.find_all_taxis().show(5) if __name__ == "__main__": diff --git a/libs/template/templates/default-python/template/{{.project_name}}/tests/main_test.py.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/tests/main_test.py.tmpl index fea2f3f665..087e4d2b26 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/tests/main_test.py.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/tests/main_test.py.tmpl @@ -1,6 +1,5 @@ -from {{.project_name}}.main import get_taxis, get_spark +from {{.project_name}} import main - -def test_main(): - taxis = get_taxis(get_spark()) +def test_find_all_taxis(): + taxis = main.find_all_taxis() assert taxis.count() > 5 From 4734cbe46ed7b961c79d88ae37d045fab4e702c5 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Mon, 14 Jul 2025 21:36:11 +0200 Subject: [PATCH 03/21] Add conftest --- .../{{.project_name}}/conftest.py.tmpl | 77 +++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 libs/template/templates/default-python/template/{{.project_name}}/conftest.py.tmpl diff --git a/libs/template/templates/default-python/template/{{.project_name}}/conftest.py.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/conftest.py.tmpl new file mode 100644 index 0000000000..8c5017dfa9 --- /dev/null +++ b/libs/template/templates/default-python/template/{{.project_name}}/conftest.py.tmpl @@ -0,0 +1,77 @@ +"""This file configures pytest. + +This file is in the root since it can be used for tests in any place in this +project, including tests under resources/. +""" + +import os, sys, pathlib +from contextlib import contextmanager + + +try: + from databricks.connect import DatabricksSession + from databricks.sdk import WorkspaceClient + from pyspark.sql import SparkSession + import pytest +except ImportError: + raise ImportError( + "Test dependencies not found.\n\nRun tests using 'uv run pytest'. See http://docs.astral.sh/uv to learn move about uv." + ) + + +def add_all_resources_to_sys_path(): + """Add all resources/* directories to sys.path for module discovery.""" + resources = pathlib.Path(__file__).with_name("resources") + resource_dirs = filter(pathlib.Path.is_dir, resources.iterdir()) + seen: dict[str, pathlib.Path] = {} + for resource in resource_dirs: + sys.path.append(str(resource.resolve())) + for py in resource.rglob("*.py"): + mod = ".".join(py.relative_to(resource).with_suffix("").parts) + if mod in seen: + raise ImportError( + f"Duplicate module '{mod}' found:\n {seen[mod]}\n {py}" + ) + seen[mod] = py + + +def enable_fallback_compute(): + """Enable serverless compute if no compute is specified.""" + conf = WorkspaceClient().config + if conf.serverless_compute_id or conf.cluster_id or os.environ.get("SPARK_REMOTE"): + return + + url = "https://docs.databricks.com/dev-tools/databricks-connect/cluster-config" + print("☁️ no compute specified, falling back to serverless compute", file=sys.stderr) + print(f" see {url} for manual configuration", file=sys.stdout) + + os.environ["DATABRICKS_SERVERLESS_COMPUTE_ID"] = "auto" + + +@contextmanager +def allow_stderr_output(config: pytest.Config): + """Temporarily disable pytest output capture.""" + capman = config.pluginmanager.get_plugin("capturemanager") + if capman: + with capman.global_and_fixture_disabled(): + yield + else: + yield + + +def pytest_configure(config: pytest.Config): + """Configure pytest session.""" + with allow_stderr_output(config): + add_all_resources_to_sys_path() + enable_fallback_compute() + + +@pytest.fixture(scope="session", autouse=True) +def spark() -> SparkSession: + """Provide a SparkSession fixture for tests.""" + if hasattr(DatabricksSession.builder, "validateSession"): + {{/* This is relevant as long as DB Connect 15 is in use. + * DB Connect >16 automaticaly does this validation! */}} + # For DB Connect 15+, validate that the version is compatible with the remote cluster + return DatabricksSession.builder.validateSession().getOrCreate() + return DatabricksSession.builder.getOrCreate() From 58f043e50bf686f7b4e280d274eea878a862c1ff Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Mon, 14 Jul 2025 21:38:33 +0200 Subject: [PATCH 04/21] Update references --- .../template/{{.project_name}}/scratch/exploration.ipynb.tmpl | 4 ++-- .../template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl | 2 +- .../template/{{.project_name}}/src/notebook.ipynb.tmpl | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb.tmpl index d3e9beef37..d5c05798ac 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb.tmpl @@ -33,9 +33,9 @@ "sys.path.append(\"../src\")\n", "from {{.project_name}} import main\n", "\n", - "main.get_taxis(spark).show(10)" + "main.get_taxis().show(10)" {{else}} - "spark.range(10)" + "spark.read.table(\"samples.nyctaxi.trips\")" {{end -}} ] } diff --git a/libs/template/templates/default-python/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl index d0286639f2..41d32c8f23 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl @@ -64,7 +64,7 @@ {{- if (eq .include_python "yes") }} "@dlt.view\n", "def taxi_raw():\n", - " return main.get_taxis(spark)\n", + " return main.find_all_taxis()\n", {{else}} "\n", "@dlt.view\n", diff --git a/libs/template/templates/default-python/template/{{.project_name}}/src/notebook.ipynb.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/src/notebook.ipynb.tmpl index 6782a053ba..53cb3040c6 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/src/notebook.ipynb.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/src/notebook.ipynb.tmpl @@ -47,9 +47,9 @@ {{- if (eq .include_python "yes") }} "from {{.project_name}} import main\n", "\n", - "main.get_taxis(spark).show(10)" + "main.find_all_taxis().show(10)" {{else}} - "spark.range(10)" + "display(spark.read.table(\"samples.nyctaxi.trips\"))" {{end -}} ] } From 103e00871fc07ccde073251fe6005379728c78d7 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Mon, 14 Jul 2025 21:41:09 +0200 Subject: [PATCH 05/21] Add pytest to instructions --- .../template/{{.project_name}}/README.md.tmpl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl index a5a132335b..82b4f8a644 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl @@ -25,7 +25,7 @@ Dependencies for this project should be installed using UV: * Run `uv sync --dev` to install the project's dependencies. {{- end}} -# Using this project with the Databricks CLI +# Using this project using the CLI The Databricks workspace and IDE extensions provide a graphical interface for working with this project. It's also possible to interact with it directly using the CLI: @@ -61,3 +61,8 @@ with this project. It's also possible to interact with it directly using the CLI ``` $ databricks bundle run ``` + +5. Finally, to run tests locally, use `pytest`: + ``` + $ uv run pytest + ``` From cb8e2f85aac825ee519e4240b2cd695e03a3b38a Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Mon, 14 Jul 2025 21:43:03 +0200 Subject: [PATCH 06/21] Fix main --- .../{{.project_name}}/src/{{.project_name}}/main.py.tmpl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl index e85c3ffd61..04e8be4de0 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl @@ -1,12 +1,13 @@ -from utilities import utils from databricks.sdk.runtime import spark +from pyspark.sql import DataFrame -# This sample module is executed using a job as defined in resources/{{.project_name}}.job.yml. +def find_all_taxis() -> DataFrame: + return spark.read.table("samples.nyctaxi.trips") def main(): - utils.find_all_taxis().show(5) + find_all_taxis().show(5) if __name__ == "__main__": From 13978dd7175ff659dc874b90c9d7631aacf7f936 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Mon, 14 Jul 2025 21:53:31 +0200 Subject: [PATCH 07/21] Avoid auth login for now --- .../default-python/template/{{.project_name}}/README.md.tmpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl index 82b4f8a644..4e16a4844b 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl @@ -32,7 +32,7 @@ with this project. It's also possible to interact with it directly using the CLI 1. Authenticate to your Databricks workspace, if you have not done so already: ``` - $ databricks auth login + $ databricks configure ``` 2. To deploy a development copy of this project, type: From 154bb09f59a25896519d7219e810729bfb4c4c03 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Tue, 15 Jul 2025 09:29:12 +0200 Subject: [PATCH 08/21] Make sure Spark session is initialized eagerly --- .../template/{{.project_name}}/conftest.py.tmpl | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/libs/template/templates/default-python/template/{{.project_name}}/conftest.py.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/conftest.py.tmpl index 8c5017dfa9..1dd79d889d 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/conftest.py.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/conftest.py.tmpl @@ -65,13 +65,16 @@ def pytest_configure(config: pytest.Config): add_all_resources_to_sys_path() enable_fallback_compute() + # Initialize Spark session eagerly, so it is available even when + # SparkSession.builder.getOrCreate() is used. For DB Connect 15+, + # we validate version compatibility with the remote cluster. + if hasattr(DatabricksSession.builder, "validateSession"): + DatabricksSession.builder.validateSession().getOrCreate() + else: + DatabricksSession.builder.getOrCreate() -@pytest.fixture(scope="session", autouse=True) + +@pytest.fixture(scope="session") def spark() -> SparkSession: """Provide a SparkSession fixture for tests.""" - if hasattr(DatabricksSession.builder, "validateSession"): - {{/* This is relevant as long as DB Connect 15 is in use. - * DB Connect >16 automaticaly does this validation! */}} - # For DB Connect 15+, validate that the version is compatible with the remote cluster - return DatabricksSession.builder.validateSession().getOrCreate() return DatabricksSession.builder.getOrCreate() From 4103dad6f87343efaafabf7cb7a4756b1fea3452 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Tue, 15 Jul 2025 09:53:38 +0200 Subject: [PATCH 09/21] Update acceptance tests --- .../output/my_default_python/README.md | 48 +++++++---- .../output/my_default_python/conftest.py | 80 +++++++++++++++++++ .../output/my_default_python/pyproject.toml | 16 ++-- .../scratch/exploration.ipynb | 2 +- .../my_default_python/src/dlt_pipeline.ipynb | 2 +- .../src/my_default_python/main.py | 19 +---- .../my_default_python/src/notebook.ipynb | 2 +- .../my_default_python/tests/main_test.py | 7 +- .../output/my_default_python/README.md | 48 +++++++---- .../output/my_default_python/conftest.py | 80 +++++++++++++++++++ .../output/my_default_python/pyproject.toml | 16 ++-- .../scratch/exploration.ipynb | 2 +- .../my_default_python/src/dlt_pipeline.ipynb | 2 +- .../src/my_default_python/main.py | 19 +---- .../my_default_python/src/notebook.ipynb | 2 +- .../my_default_python/tests/main_test.py | 7 +- 16 files changed, 252 insertions(+), 100 deletions(-) create mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/conftest.py create mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/conftest.py diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/README.md b/acceptance/bundle/templates/default-python/classic/output/my_default_python/README.md index f3b5d153b2..705b5a35d8 100644 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/README.md +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/README.md @@ -2,18 +2,37 @@ The 'my_default_python' project was generated by using the default-python template. +For documentation on the Databricks asset bundles format use for this project, +and for CI/CD configuration, see https://docs.databricks.com/aws/en/dev-tools/bundles. + ## Getting started -0. Install UV: https://docs.astral.sh/uv/getting-started/installation/ +Choose how you want to work on this project: + +(a) Directly in your Databricks workspace, see + https://docs.databricks.com/dev-tools/bundles/workspace. + +(b) Locally with an IDE like Cursor, VS Code, or PyCharm, see + https://docs.databricks.com/vscode-ext and https://www.databricks.com/blog/announcing-pycharm-integration-databricks. + +(c) With command line tools, see https://docs.databricks.com/dev-tools/cli/databricks-cli.html +Dependencies for this project should be installed using UV: + +* Make sure you have the UV package manager installed. + It's an alternative to tools like pip: https://docs.astral.sh/uv/getting-started/installation/. +* Run `uv sync --dev` to install the project's dependencies. -1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html +# Using this project using the CLI -2. Authenticate to your Databricks workspace, if you have not done so already: +The Databricks workspace and IDE extensions provide a graphical interface for working +with this project. It's also possible to interact with it directly using the CLI: + +1. Authenticate to your Databricks workspace, if you have not done so already: ``` $ databricks configure ``` -3. To deploy a development copy of this project, type: +2. To deploy a development copy of this project, type: ``` $ databricks bundle deploy --target dev ``` @@ -23,9 +42,9 @@ The 'my_default_python' project was generated by using the default-python templa This deploys everything that's defined for this project. For example, the default template would deploy a job called `[dev yourname] my_default_python_job` to your workspace. - You can find that job by opening your workpace and clicking on **Workflows**. + You can find that job by opening your workpace and clicking on **Jobs & Pipelines**. -4. Similarly, to deploy a production copy, type: +3. Similarly, to deploy a production copy, type: ``` $ databricks bundle deploy --target prod ``` @@ -35,17 +54,12 @@ The 'my_default_python' project was generated by using the default-python templa is paused when deploying in development mode (see https://docs.databricks.com/dev-tools/bundles/deployment-modes.html). -5. To run a job or pipeline, use the "run" command: +4. To run a job or pipeline, use the "run" command: ``` $ databricks bundle run ``` -6. Optionally, install the Databricks extension for Visual Studio code for local development from - https://docs.databricks.com/dev-tools/vscode-ext.html. It can configure your - virtual environment and setup Databricks Connect for running unit tests locally. - When not using these tools, consult your development environment's documentation - and/or the documentation for Databricks Connect for manually setting up your environment - (https://docs.databricks.com/en/dev-tools/databricks-connect/python/index.html). - -7. For documentation on the Databricks asset bundles format used - for this project, and for CI/CD configuration, see - https://docs.databricks.com/dev-tools/bundles/index.html. + +5. Finally, to run tests locally, use `pytest`: + ``` + $ uv run pytest + ``` diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/conftest.py b/acceptance/bundle/templates/default-python/classic/output/my_default_python/conftest.py new file mode 100644 index 0000000000..1dd79d889d --- /dev/null +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/conftest.py @@ -0,0 +1,80 @@ +"""This file configures pytest. + +This file is in the root since it can be used for tests in any place in this +project, including tests under resources/. +""" + +import os, sys, pathlib +from contextlib import contextmanager + + +try: + from databricks.connect import DatabricksSession + from databricks.sdk import WorkspaceClient + from pyspark.sql import SparkSession + import pytest +except ImportError: + raise ImportError( + "Test dependencies not found.\n\nRun tests using 'uv run pytest'. See http://docs.astral.sh/uv to learn move about uv." + ) + + +def add_all_resources_to_sys_path(): + """Add all resources/* directories to sys.path for module discovery.""" + resources = pathlib.Path(__file__).with_name("resources") + resource_dirs = filter(pathlib.Path.is_dir, resources.iterdir()) + seen: dict[str, pathlib.Path] = {} + for resource in resource_dirs: + sys.path.append(str(resource.resolve())) + for py in resource.rglob("*.py"): + mod = ".".join(py.relative_to(resource).with_suffix("").parts) + if mod in seen: + raise ImportError( + f"Duplicate module '{mod}' found:\n {seen[mod]}\n {py}" + ) + seen[mod] = py + + +def enable_fallback_compute(): + """Enable serverless compute if no compute is specified.""" + conf = WorkspaceClient().config + if conf.serverless_compute_id or conf.cluster_id or os.environ.get("SPARK_REMOTE"): + return + + url = "https://docs.databricks.com/dev-tools/databricks-connect/cluster-config" + print("☁️ no compute specified, falling back to serverless compute", file=sys.stderr) + print(f" see {url} for manual configuration", file=sys.stdout) + + os.environ["DATABRICKS_SERVERLESS_COMPUTE_ID"] = "auto" + + +@contextmanager +def allow_stderr_output(config: pytest.Config): + """Temporarily disable pytest output capture.""" + capman = config.pluginmanager.get_plugin("capturemanager") + if capman: + with capman.global_and_fixture_disabled(): + yield + else: + yield + + +def pytest_configure(config: pytest.Config): + """Configure pytest session.""" + with allow_stderr_output(config): + add_all_resources_to_sys_path() + enable_fallback_compute() + + # Initialize Spark session eagerly, so it is available even when + # SparkSession.builder.getOrCreate() is used. For DB Connect 15+, + # we validate version compatibility with the remote cluster. + if hasattr(DatabricksSession.builder, "validateSession"): + DatabricksSession.builder.validateSession().getOrCreate() + else: + DatabricksSession.builder.getOrCreate() + + +@pytest.fixture(scope="session") +def spark() -> SparkSession: + """Provide a SparkSession fixture for tests.""" + return DatabricksSession.builder.getOrCreate() diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/pyproject.toml b/acceptance/bundle/templates/default-python/classic/output/my_default_python/pyproject.toml index f0fa52bc62..cd5276a152 100644 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/pyproject.toml +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/pyproject.toml @@ -4,7 +4,7 @@ version = "0.0.1" authors = [{ name = "[USERNAME]" }] requires-python = ">= 3.11" -[project.optional-dependencies] +[dependency-groups] dev = [ "pytest", @@ -12,16 +12,10 @@ dev = [ "databricks-dlt", # databricks-connect can be used to run parts of this project locally. - # See https://docs.databricks.com/dev-tools/databricks-connect.html. - # - # Note, databricks-connect is automatically installed if you're using Databricks - # extension for Visual Studio Code - # (https://docs.databricks.com/dev-tools/vscode-ext/dev-tasks/databricks-connect.html). - # - # To manually install databricks-connect, uncomment the line below to install a version - # of db-connect that corresponds to the Databricks Runtime version used for this project. - # See https://docs.databricks.com/dev-tools/databricks-connect.html - # "databricks-connect>=15.4,<15.5", + # Note that for local development, you should use a version that is not newer + # than the remote cluster or serverless compute you connect to. + # See also https://docs.databricks.com/dev-tools/databricks-connect.html. + "databricks-connect>=15.4,<15.5", ] [tool.pytest.ini_options] diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/scratch/exploration.ipynb b/acceptance/bundle/templates/default-python/classic/output/my_default_python/scratch/exploration.ipynb index a12773d4e8..3f589fed74 100644 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/scratch/exploration.ipynb +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/scratch/exploration.ipynb @@ -32,7 +32,7 @@ "sys.path.append(\"../src\")\n", "from my_default_python import main\n", "\n", - "main.get_taxis(spark).show(10)" + "main.get_taxis().show(10)" ] } ], diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/dlt_pipeline.ipynb b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/dlt_pipeline.ipynb index 8a02183e75..7e0a095fbc 100644 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/dlt_pipeline.ipynb +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/dlt_pipeline.ipynb @@ -56,7 +56,7 @@ "source": [ "@dlt.view\n", "def taxi_raw():\n", - " return main.get_taxis(spark)\n", + " return main.find_all_taxis()\n", "\n", "\n", "@dlt.table\n", diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/my_default_python/main.py b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/my_default_python/main.py index 5ae344c7e2..04e8be4de0 100644 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/my_default_python/main.py +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/my_default_python/main.py @@ -1,24 +1,13 @@ -from pyspark.sql import SparkSession, DataFrame +from databricks.sdk.runtime import spark +from pyspark.sql import DataFrame -def get_taxis(spark: SparkSession) -> DataFrame: +def find_all_taxis() -> DataFrame: return spark.read.table("samples.nyctaxi.trips") -# Create a new Databricks Connect session. If this fails, -# check that you have configured Databricks Connect correctly. -# See https://docs.databricks.com/dev-tools/databricks-connect.html. -def get_spark() -> SparkSession: - try: - from databricks.connect import DatabricksSession - - return DatabricksSession.builder.getOrCreate() - except ImportError: - return SparkSession.builder.getOrCreate() - - def main(): - get_taxis(get_spark()).show(5) + find_all_taxis().show(5) if __name__ == "__main__": diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/notebook.ipynb b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/notebook.ipynb index 472ccb2190..27c3f19e34 100644 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/notebook.ipynb +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/notebook.ipynb @@ -46,7 +46,7 @@ "source": [ "from my_default_python import main\n", "\n", - "main.get_taxis(spark).show(10)" + "main.find_all_taxis().show(10)" ] } ], diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/tests/main_test.py b/acceptance/bundle/templates/default-python/classic/output/my_default_python/tests/main_test.py index dc449154a6..2bdbb59c7b 100644 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/tests/main_test.py +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/tests/main_test.py @@ -1,6 +1,5 @@ -from my_default_python.main import get_taxis, get_spark +from my_default_python import main - -def test_main(): - taxis = get_taxis(get_spark()) +def test_find_all_taxis(): + taxis = main.find_all_taxis() assert taxis.count() > 5 diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/README.md b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/README.md index f3b5d153b2..705b5a35d8 100644 --- a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/README.md +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/README.md @@ -2,18 +2,37 @@ The 'my_default_python' project was generated by using the default-python template. +For documentation on the Databricks asset bundles format use for this project, +and for CI/CD configuration, see https://docs.databricks.com/aws/en/dev-tools/bundles. + ## Getting started -0. Install UV: https://docs.astral.sh/uv/getting-started/installation/ +Choose how you want to work on this project: + +(a) Directly in your Databricks workspace, see + https://docs.databricks.com/dev-tools/bundles/workspace. + +(b) Locally with an IDE like Cursor, VS Code, or PyCharm, see + https://docs.databricks.com/vscode-ext and https://www.databricks.com/blog/announcing-pycharm-integration-databricks. + +(c) With command line tools, see https://docs.databricks.com/dev-tools/cli/databricks-cli.html +Dependencies for this project should be installed using UV: + +* Make sure you have the UV package manager installed. + It's an alternative to tools like pip: https://docs.astral.sh/uv/getting-started/installation/. +* Run `uv sync --dev` to install the project's dependencies. -1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html +# Using this project using the CLI -2. Authenticate to your Databricks workspace, if you have not done so already: +The Databricks workspace and IDE extensions provide a graphical interface for working +with this project. It's also possible to interact with it directly using the CLI: + +1. Authenticate to your Databricks workspace, if you have not done so already: ``` $ databricks configure ``` -3. To deploy a development copy of this project, type: +2. To deploy a development copy of this project, type: ``` $ databricks bundle deploy --target dev ``` @@ -23,9 +42,9 @@ The 'my_default_python' project was generated by using the default-python templa This deploys everything that's defined for this project. For example, the default template would deploy a job called `[dev yourname] my_default_python_job` to your workspace. - You can find that job by opening your workpace and clicking on **Workflows**. + You can find that job by opening your workpace and clicking on **Jobs & Pipelines**. -4. Similarly, to deploy a production copy, type: +3. Similarly, to deploy a production copy, type: ``` $ databricks bundle deploy --target prod ``` @@ -35,17 +54,12 @@ The 'my_default_python' project was generated by using the default-python templa is paused when deploying in development mode (see https://docs.databricks.com/dev-tools/bundles/deployment-modes.html). -5. To run a job or pipeline, use the "run" command: +4. To run a job or pipeline, use the "run" command: ``` $ databricks bundle run ``` -6. Optionally, install the Databricks extension for Visual Studio code for local development from - https://docs.databricks.com/dev-tools/vscode-ext.html. It can configure your - virtual environment and setup Databricks Connect for running unit tests locally. - When not using these tools, consult your development environment's documentation - and/or the documentation for Databricks Connect for manually setting up your environment - (https://docs.databricks.com/en/dev-tools/databricks-connect/python/index.html). - -7. For documentation on the Databricks asset bundles format used - for this project, and for CI/CD configuration, see - https://docs.databricks.com/dev-tools/bundles/index.html. + +5. Finally, to run tests locally, use `pytest`: + ``` + $ uv run pytest + ``` diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/conftest.py b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/conftest.py new file mode 100644 index 0000000000..1dd79d889d --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/conftest.py @@ -0,0 +1,80 @@ +"""This file configures pytest. + +This file is in the root since it can be used for tests in any place in this +project, including tests under resources/. +""" + +import os, sys, pathlib +from contextlib import contextmanager + + +try: + from databricks.connect import DatabricksSession + from databricks.sdk import WorkspaceClient + from pyspark.sql import SparkSession + import pytest +except ImportError: + raise ImportError( + "Test dependencies not found.\n\nRun tests using 'uv run pytest'. See http://docs.astral.sh/uv to learn move about uv." + ) + + +def add_all_resources_to_sys_path(): + """Add all resources/* directories to sys.path for module discovery.""" + resources = pathlib.Path(__file__).with_name("resources") + resource_dirs = filter(pathlib.Path.is_dir, resources.iterdir()) + seen: dict[str, pathlib.Path] = {} + for resource in resource_dirs: + sys.path.append(str(resource.resolve())) + for py in resource.rglob("*.py"): + mod = ".".join(py.relative_to(resource).with_suffix("").parts) + if mod in seen: + raise ImportError( + f"Duplicate module '{mod}' found:\n {seen[mod]}\n {py}" + ) + seen[mod] = py + + +def enable_fallback_compute(): + """Enable serverless compute if no compute is specified.""" + conf = WorkspaceClient().config + if conf.serverless_compute_id or conf.cluster_id or os.environ.get("SPARK_REMOTE"): + return + + url = "https://docs.databricks.com/dev-tools/databricks-connect/cluster-config" + print("☁️ no compute specified, falling back to serverless compute", file=sys.stderr) + print(f" see {url} for manual configuration", file=sys.stdout) + + os.environ["DATABRICKS_SERVERLESS_COMPUTE_ID"] = "auto" + + +@contextmanager +def allow_stderr_output(config: pytest.Config): + """Temporarily disable pytest output capture.""" + capman = config.pluginmanager.get_plugin("capturemanager") + if capman: + with capman.global_and_fixture_disabled(): + yield + else: + yield + + +def pytest_configure(config: pytest.Config): + """Configure pytest session.""" + with allow_stderr_output(config): + add_all_resources_to_sys_path() + enable_fallback_compute() + + # Initialize Spark session eagerly, so it is available even when + # SparkSession.builder.getOrCreate() is used. For DB Connect 15+, + # we validate version compatibility with the remote cluster. + if hasattr(DatabricksSession.builder, "validateSession"): + DatabricksSession.builder.validateSession().getOrCreate() + else: + DatabricksSession.builder.getOrCreate() + + +@pytest.fixture(scope="session") +def spark() -> SparkSession: + """Provide a SparkSession fixture for tests.""" + return DatabricksSession.builder.getOrCreate() diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/pyproject.toml b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/pyproject.toml index f0fa52bc62..cd5276a152 100644 --- a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/pyproject.toml +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/pyproject.toml @@ -4,7 +4,7 @@ version = "0.0.1" authors = [{ name = "[USERNAME]" }] requires-python = ">= 3.11" -[project.optional-dependencies] +[dependency-groups] dev = [ "pytest", @@ -12,16 +12,10 @@ dev = [ "databricks-dlt", # databricks-connect can be used to run parts of this project locally. - # See https://docs.databricks.com/dev-tools/databricks-connect.html. - # - # Note, databricks-connect is automatically installed if you're using Databricks - # extension for Visual Studio Code - # (https://docs.databricks.com/dev-tools/vscode-ext/dev-tasks/databricks-connect.html). - # - # To manually install databricks-connect, uncomment the line below to install a version - # of db-connect that corresponds to the Databricks Runtime version used for this project. - # See https://docs.databricks.com/dev-tools/databricks-connect.html - # "databricks-connect>=15.4,<15.5", + # Note that for local development, you should use a version that is not newer + # than the remote cluster or serverless compute you connect to. + # See also https://docs.databricks.com/dev-tools/databricks-connect.html. + "databricks-connect>=15.4,<15.5", ] [tool.pytest.ini_options] diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/scratch/exploration.ipynb b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/scratch/exploration.ipynb index a12773d4e8..3f589fed74 100644 --- a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/scratch/exploration.ipynb +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/scratch/exploration.ipynb @@ -32,7 +32,7 @@ "sys.path.append(\"../src\")\n", "from my_default_python import main\n", "\n", - "main.get_taxis(spark).show(10)" + "main.get_taxis().show(10)" ] } ], diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/dlt_pipeline.ipynb b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/dlt_pipeline.ipynb index 8a02183e75..7e0a095fbc 100644 --- a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/dlt_pipeline.ipynb +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/dlt_pipeline.ipynb @@ -56,7 +56,7 @@ "source": [ "@dlt.view\n", "def taxi_raw():\n", - " return main.get_taxis(spark)\n", + " return main.find_all_taxis()\n", "\n", "\n", "@dlt.table\n", diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/my_default_python/main.py b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/my_default_python/main.py index 5ae344c7e2..04e8be4de0 100644 --- a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/my_default_python/main.py +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/my_default_python/main.py @@ -1,24 +1,13 @@ -from pyspark.sql import SparkSession, DataFrame +from databricks.sdk.runtime import spark +from pyspark.sql import DataFrame -def get_taxis(spark: SparkSession) -> DataFrame: +def find_all_taxis() -> DataFrame: return spark.read.table("samples.nyctaxi.trips") -# Create a new Databricks Connect session. If this fails, -# check that you have configured Databricks Connect correctly. -# See https://docs.databricks.com/dev-tools/databricks-connect.html. -def get_spark() -> SparkSession: - try: - from databricks.connect import DatabricksSession - - return DatabricksSession.builder.getOrCreate() - except ImportError: - return SparkSession.builder.getOrCreate() - - def main(): - get_taxis(get_spark()).show(5) + find_all_taxis().show(5) if __name__ == "__main__": diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/notebook.ipynb b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/notebook.ipynb index 472ccb2190..27c3f19e34 100644 --- a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/notebook.ipynb +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/notebook.ipynb @@ -46,7 +46,7 @@ "source": [ "from my_default_python import main\n", "\n", - "main.get_taxis(spark).show(10)" + "main.find_all_taxis().show(10)" ] } ], diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/tests/main_test.py b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/tests/main_test.py index dc449154a6..2bdbb59c7b 100644 --- a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/tests/main_test.py +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/tests/main_test.py @@ -1,6 +1,5 @@ -from my_default_python.main import get_taxis, get_spark +from my_default_python import main - -def test_main(): - taxis = get_taxis(get_spark()) +def test_find_all_taxis(): + taxis = main.find_all_taxis() assert taxis.count() > 5 From bad1412bdbb6e4859d337cdb1acb004d5fbaa673 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Tue, 15 Jul 2025 10:05:16 +0200 Subject: [PATCH 10/21] Fix formatting --- .../classic/output/my_default_python/conftest.py | 8 ++------ .../classic/output/my_default_python/tests/main_test.py | 1 + .../serverless/output/my_default_python/conftest.py | 8 ++------ .../output/my_default_python/tests/main_test.py | 1 + .../template/{{.project_name}}/conftest.py.tmpl | 8 ++------ .../template/{{.project_name}}/tests/main_test.py.tmpl | 1 + 6 files changed, 9 insertions(+), 18 deletions(-) diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/conftest.py b/acceptance/bundle/templates/default-python/classic/output/my_default_python/conftest.py index 1dd79d889d..cf1d0978b8 100644 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/conftest.py +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/conftest.py @@ -14,9 +14,7 @@ from pyspark.sql import SparkSession import pytest except ImportError: - raise ImportError( - "Test dependencies not found.\n\nRun tests using 'uv run pytest'. See http://docs.astral.sh/uv to learn move about uv." - ) + raise ImportError("Test dependencies not found.\n\nRun tests using 'uv run pytest'. See http://docs.astral.sh/uv to learn more about uv.") def add_all_resources_to_sys_path(): @@ -29,9 +27,7 @@ def add_all_resources_to_sys_path(): for py in resource.rglob("*.py"): mod = ".".join(py.relative_to(resource).with_suffix("").parts) if mod in seen: - raise ImportError( - f"Duplicate module '{mod}' found:\n {seen[mod]}\n {py}" - ) + raise ImportError(f"Duplicate module '{mod}' found:\n {seen[mod]}\n {py}") seen[mod] = py diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/tests/main_test.py b/acceptance/bundle/templates/default-python/classic/output/my_default_python/tests/main_test.py index 2bdbb59c7b..4bfd5e1550 100644 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/tests/main_test.py +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/tests/main_test.py @@ -1,5 +1,6 @@ from my_default_python import main + def test_find_all_taxis(): taxis = main.find_all_taxis() assert taxis.count() > 5 diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/conftest.py b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/conftest.py index 1dd79d889d..cf1d0978b8 100644 --- a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/conftest.py +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/conftest.py @@ -14,9 +14,7 @@ from pyspark.sql import SparkSession import pytest except ImportError: - raise ImportError( - "Test dependencies not found.\n\nRun tests using 'uv run pytest'. See http://docs.astral.sh/uv to learn move about uv." - ) + raise ImportError("Test dependencies not found.\n\nRun tests using 'uv run pytest'. See http://docs.astral.sh/uv to learn more about uv.") def add_all_resources_to_sys_path(): @@ -29,9 +27,7 @@ def add_all_resources_to_sys_path(): for py in resource.rglob("*.py"): mod = ".".join(py.relative_to(resource).with_suffix("").parts) if mod in seen: - raise ImportError( - f"Duplicate module '{mod}' found:\n {seen[mod]}\n {py}" - ) + raise ImportError(f"Duplicate module '{mod}' found:\n {seen[mod]}\n {py}") seen[mod] = py diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/tests/main_test.py b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/tests/main_test.py index 2bdbb59c7b..4bfd5e1550 100644 --- a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/tests/main_test.py +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/tests/main_test.py @@ -1,5 +1,6 @@ from my_default_python import main + def test_find_all_taxis(): taxis = main.find_all_taxis() assert taxis.count() > 5 diff --git a/libs/template/templates/default-python/template/{{.project_name}}/conftest.py.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/conftest.py.tmpl index 1dd79d889d..cf1d0978b8 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/conftest.py.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/conftest.py.tmpl @@ -14,9 +14,7 @@ try: from pyspark.sql import SparkSession import pytest except ImportError: - raise ImportError( - "Test dependencies not found.\n\nRun tests using 'uv run pytest'. See http://docs.astral.sh/uv to learn move about uv." - ) + raise ImportError("Test dependencies not found.\n\nRun tests using 'uv run pytest'. See http://docs.astral.sh/uv to learn more about uv.") def add_all_resources_to_sys_path(): @@ -29,9 +27,7 @@ def add_all_resources_to_sys_path(): for py in resource.rglob("*.py"): mod = ".".join(py.relative_to(resource).with_suffix("").parts) if mod in seen: - raise ImportError( - f"Duplicate module '{mod}' found:\n {seen[mod]}\n {py}" - ) + raise ImportError(f"Duplicate module '{mod}' found:\n {seen[mod]}\n {py}") seen[mod] = py diff --git a/libs/template/templates/default-python/template/{{.project_name}}/tests/main_test.py.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/tests/main_test.py.tmpl index 087e4d2b26..084454eb3e 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/tests/main_test.py.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/tests/main_test.py.tmpl @@ -1,5 +1,6 @@ from {{.project_name}} import main + def test_find_all_taxis(): taxis = main.find_all_taxis() assert taxis.count() > 5 From 7ae4e5ae32b3ea0073f5b69175486b4bfb8c4fb8 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Fri, 25 Jul 2025 13:30:56 +0200 Subject: [PATCH 11/21] Remove PyCharm mention --- .../default-python/classic/output/my_default_python/README.md | 4 ++-- .../default-python/template/{{.project_name}}/README.md.tmpl | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/README.md b/acceptance/bundle/templates/default-python/classic/output/my_default_python/README.md index 705b5a35d8..061ca5d665 100644 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/README.md +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/README.md @@ -12,8 +12,8 @@ Choose how you want to work on this project: (a) Directly in your Databricks workspace, see https://docs.databricks.com/dev-tools/bundles/workspace. -(b) Locally with an IDE like Cursor, VS Code, or PyCharm, see - https://docs.databricks.com/vscode-ext and https://www.databricks.com/blog/announcing-pycharm-integration-databricks. +(b) Locally with an IDE like Cursor or VS Code, see + https://docs.databricks.com/vscode-ext. (c) With command line tools, see https://docs.databricks.com/dev-tools/cli/databricks-cli.html Dependencies for this project should be installed using UV: diff --git a/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl index 4e16a4844b..e080159889 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl @@ -12,8 +12,8 @@ Choose how you want to work on this project: (a) Directly in your Databricks workspace, see https://docs.databricks.com/dev-tools/bundles/workspace. -(b) Locally with an IDE like Cursor, VS Code, or PyCharm, see - https://docs.databricks.com/vscode-ext and https://www.databricks.com/blog/announcing-pycharm-integration-databricks. +(b) Locally with an IDE like Cursor or VS Code, see + https://docs.databricks.com/vscode-ext. (c) With command line tools, see https://docs.databricks.com/dev-tools/cli/databricks-cli.html From 3090e8d034657cc71f2811f741451b30da6fdb92 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Fri, 25 Jul 2025 13:35:05 +0200 Subject: [PATCH 12/21] Minor tweaks --- .../default-python/classic/output/my_default_python/README.md | 3 ++- .../default-python/template/{{.project_name}}/README.md.tmpl | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/README.md b/acceptance/bundle/templates/default-python/classic/output/my_default_python/README.md index 061ca5d665..92f3d76207 100644 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/README.md +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/README.md @@ -2,7 +2,7 @@ The 'my_default_python' project was generated by using the default-python template. -For documentation on the Databricks asset bundles format use for this project, +For documentation on the Databricks Asset Bundles format use for this project, and for CI/CD configuration, see https://docs.databricks.com/aws/en/dev-tools/bundles. ## Getting started @@ -16,6 +16,7 @@ Choose how you want to work on this project: https://docs.databricks.com/vscode-ext. (c) With command line tools, see https://docs.databricks.com/dev-tools/cli/databricks-cli.html + Dependencies for this project should be installed using UV: * Make sure you have the UV package manager installed. diff --git a/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl index e080159889..58eca50304 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl @@ -2,7 +2,7 @@ The '{{.project_name}}' project was generated by using the default-python template. -For documentation on the Databricks asset bundles format use for this project, +For documentation on the Databricks Asset Bundles format use for this project, and for CI/CD configuration, see https://docs.databricks.com/aws/en/dev-tools/bundles. ## Getting started @@ -17,7 +17,7 @@ Choose how you want to work on this project: (c) With command line tools, see https://docs.databricks.com/dev-tools/cli/databricks-cli.html -{{- if (eq .include_python "yes") }} +{{if (eq .include_python "yes") }} Dependencies for this project should be installed using UV: * Make sure you have the UV package manager installed. From ae63222823fdcacc34d23c1acf65750a0730097f Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Fri, 1 Aug 2025 10:57:27 +0200 Subject: [PATCH 13/21] Update tests --- .../classic/output/my_default_python/README.md | 1 + .../serverless/output/my_default_python/README.md | 8 +++++--- .../cmd/workspace/apps/run-local-node/output.txt | 12 +----------- 3 files changed, 7 insertions(+), 14 deletions(-) diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/README.md b/acceptance/bundle/templates/default-python/classic/output/my_default_python/README.md index 92f3d76207..b3118255b8 100644 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/README.md +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/README.md @@ -17,6 +17,7 @@ Choose how you want to work on this project: (c) With command line tools, see https://docs.databricks.com/dev-tools/cli/databricks-cli.html + Dependencies for this project should be installed using UV: * Make sure you have the UV package manager installed. diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/README.md b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/README.md index 705b5a35d8..b3118255b8 100644 --- a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/README.md +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/README.md @@ -2,7 +2,7 @@ The 'my_default_python' project was generated by using the default-python template. -For documentation on the Databricks asset bundles format use for this project, +For documentation on the Databricks Asset Bundles format use for this project, and for CI/CD configuration, see https://docs.databricks.com/aws/en/dev-tools/bundles. ## Getting started @@ -12,10 +12,12 @@ Choose how you want to work on this project: (a) Directly in your Databricks workspace, see https://docs.databricks.com/dev-tools/bundles/workspace. -(b) Locally with an IDE like Cursor, VS Code, or PyCharm, see - https://docs.databricks.com/vscode-ext and https://www.databricks.com/blog/announcing-pycharm-integration-databricks. +(b) Locally with an IDE like Cursor or VS Code, see + https://docs.databricks.com/vscode-ext. (c) With command line tools, see https://docs.databricks.com/dev-tools/cli/databricks-cli.html + + Dependencies for this project should be installed using UV: * Make sure you have the UV package manager installed. diff --git a/acceptance/cmd/workspace/apps/run-local-node/output.txt b/acceptance/cmd/workspace/apps/run-local-node/output.txt index 0185dbe523..4de672232f 100644 --- a/acceptance/cmd/workspace/apps/run-local-node/output.txt +++ b/acceptance/cmd/workspace/apps/run-local-node/output.txt @@ -1,12 +1,2 @@ -Running command: node -e console.log('Hello, world') -Hello, world -=== Starting the app in background... -=== Waiting -=== Checking app is running... ->>> curl -s -o - http://127.0.0.1:$(port) -{"message":"Hello From App","timestamp":"[TIMESTAMP]","status":"running"} - -=== Sending shutdown request... ->>> curl -s -o /dev/null http://127.0.0.1:$(port)/shutdown -Process terminated +Exit code: 1 From 439ea2cd8a6d4eacb269f7afd632f248459122a4 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Fri, 1 Aug 2025 11:00:17 +0200 Subject: [PATCH 14/21] Change UV to uv --- .../default-python/classic/output/my_default_python/README.md | 2 +- .../serverless/output/my_default_python/README.md | 2 +- .../default-python/template/{{.project_name}}/README.md.tmpl | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/README.md b/acceptance/bundle/templates/default-python/classic/output/my_default_python/README.md index b3118255b8..e01be4259d 100644 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/README.md +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/README.md @@ -18,7 +18,7 @@ Choose how you want to work on this project: (c) With command line tools, see https://docs.databricks.com/dev-tools/cli/databricks-cli.html -Dependencies for this project should be installed using UV: +Dependencies for this project should be installed using uv: * Make sure you have the UV package manager installed. It's an alternative to tools like pip: https://docs.astral.sh/uv/getting-started/installation/. diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/README.md b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/README.md index b3118255b8..e01be4259d 100644 --- a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/README.md +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/README.md @@ -18,7 +18,7 @@ Choose how you want to work on this project: (c) With command line tools, see https://docs.databricks.com/dev-tools/cli/databricks-cli.html -Dependencies for this project should be installed using UV: +Dependencies for this project should be installed using uv: * Make sure you have the UV package manager installed. It's an alternative to tools like pip: https://docs.astral.sh/uv/getting-started/installation/. diff --git a/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl index 58eca50304..02da531477 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl @@ -18,7 +18,7 @@ Choose how you want to work on this project: (c) With command line tools, see https://docs.databricks.com/dev-tools/cli/databricks-cli.html {{if (eq .include_python "yes") }} -Dependencies for this project should be installed using UV: +Dependencies for this project should be installed using uv: * Make sure you have the UV package manager installed. It's an alternative to tools like pip: https://docs.astral.sh/uv/getting-started/installation/. From 5685373133f3b9db9d65f313d6165c4bcdef7d47 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Fri, 1 Aug 2025 14:49:58 +0200 Subject: [PATCH 15/21] Update Python version spec --- .../classic/output/my_default_python/pyproject.toml | 2 +- .../serverless/output/my_default_python/pyproject.toml | 2 +- .../templates/default-python/library/versions.tmpl | 7 +++++++ .../template/{{.project_name}}/pyproject.toml.tmpl | 2 +- 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/pyproject.toml b/acceptance/bundle/templates/default-python/classic/output/my_default_python/pyproject.toml index cd5276a152..d9acd02951 100644 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/pyproject.toml +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/pyproject.toml @@ -2,7 +2,7 @@ name = "my_default_python" version = "0.0.1" authors = [{ name = "[USERNAME]" }] -requires-python = ">= 3.11" +requires-python = ">=3.10,<=3.13" [dependency-groups] dev = [ diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/pyproject.toml b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/pyproject.toml index cd5276a152..d9acd02951 100644 --- a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/pyproject.toml +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/pyproject.toml @@ -2,7 +2,7 @@ name = "my_default_python" version = "0.0.1" authors = [{ name = "[USERNAME]" }] -requires-python = ">= 3.11" +requires-python = ">=3.10,<=3.13" [dependency-groups] dev = [ diff --git a/libs/template/templates/default-python/library/versions.tmpl b/libs/template/templates/default-python/library/versions.tmpl index 697040ca10..79c3955c99 100644 --- a/libs/template/templates/default-python/library/versions.tmpl +++ b/libs/template/templates/default-python/library/versions.tmpl @@ -15,3 +15,10 @@ {{define "conservative_db_connect_version_spec" -}} >=15.4,<15.5 {{- end}} + +{{/* DB Connect 15 only supports versions up to 3.13 because of a limitation in + * pyarrow: https://arrow.apache.org/docs/python/install.html#python-compatibility + */}} +{{define "conservative_db_connect_python_version_spec" -}} + >=3.10,<=3.13 +{{- end}} diff --git a/libs/template/templates/default-python/template/{{.project_name}}/pyproject.toml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/pyproject.toml.tmpl index 2d856d1738..421d484346 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/pyproject.toml.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/pyproject.toml.tmpl @@ -2,7 +2,7 @@ name = "{{.project_name}}" version = "0.0.1" authors = [{ name = "{{user_name}}" }] -requires-python = ">= 3.11" +requires-python = "{{template "conservative_db_connect_python_version_spec"}}" [dependency-groups] dev = [ From 957824b6d1686a7478eff4e8595ef758fc1e8424 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Fri, 22 Aug 2025 15:27:15 +0200 Subject: [PATCH 16/21] Fix aceptance test --- .../cmd/workspace/apps/run-local-node/output.txt | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/acceptance/cmd/workspace/apps/run-local-node/output.txt b/acceptance/cmd/workspace/apps/run-local-node/output.txt index 4de672232f..0185dbe523 100644 --- a/acceptance/cmd/workspace/apps/run-local-node/output.txt +++ b/acceptance/cmd/workspace/apps/run-local-node/output.txt @@ -1,2 +1,12 @@ +Running command: node -e console.log('Hello, world') +Hello, world -Exit code: 1 +=== Starting the app in background... +=== Waiting +=== Checking app is running... +>>> curl -s -o - http://127.0.0.1:$(port) +{"message":"Hello From App","timestamp":"[TIMESTAMP]","status":"running"} + +=== Sending shutdown request... +>>> curl -s -o /dev/null http://127.0.0.1:$(port)/shutdown +Process terminated From d0f6537e9d4c06a142a3638cc84f19e5e2ed8abe Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Tue, 15 Jul 2025 11:30:35 +0200 Subject: [PATCH 17/21] Add to changelog --- NEXT_CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index d53d3a2816..55e2934255 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -10,5 +10,6 @@ ### Dependency updates ### Bundles +* Update default-python template to make DB Connect work out of the box for unit tests, using uv to install dependencies ([#3254](https://github.com/databricks/cli/pull/3254)) ### API Changes From acd72e8e78bccfc7f6b70575451b59731ac877a6 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Thu, 28 Aug 2025 11:08:49 +0200 Subject: [PATCH 18/21] Update basic structure to follow Lakeflow conventions --- acceptance/bundle/help/bundle-init/output.txt | 2 +- .../helper_upper_lower/output.txt | 8 +- .../classic/out.compare-vs-serverless.diff | 68 --- .../default-python/classic/output.txt | 27 +- .../.vscode/__builtins__.pyi | 3 - .../my_default_python/.vscode/extensions.json | 7 - .../my_default_python/.vscode/settings.json | 16 - .../output/my_default_python/README.md | 67 --- .../output/my_default_python/conftest.py | 76 ---- .../output/my_default_python/databricks.yml | 42 -- .../my_default_python/fixtures/.gitkeep | 22 - .../output/my_default_python/out.gitignore | 8 - .../output/my_default_python/pyproject.toml | 35 -- .../resources/my_default_python.job.yml | 50 --- .../resources/my_default_python.pipeline.yml | 14 - .../my_default_python/scratch/README.md | 4 - .../scratch/exploration.ipynb | 61 --- .../my_default_python/src/pipeline.ipynb | 90 ---- .../my_default_python/tests/main_test.py | 6 - .../combinations/classic/output.txt | 43 +- .../combinations/serverless/output.txt | 43 +- .../default-python/fail-missing-uv/output.txt | 23 +- .../integration_classic/out.validate.dev.json | 166 ------- .../integration_classic/output.txt | 414 +----------------- .../templates/default-python/no-uc/output.txt | 7 +- .../serverless-customcatalog/output.txt | 19 +- .../default-python/serverless/output.txt | 27 +- .../.vscode/__builtins__.pyi | 3 - .../my_default_python/.vscode/extensions.json | 7 - .../my_default_python/.vscode/settings.json | 16 - .../output/my_default_python/README.md | 67 --- .../output/my_default_python/conftest.py | 76 ---- .../output/my_default_python/databricks.yml | 35 -- .../my_default_python/fixtures/.gitkeep | 22 - .../output/my_default_python/out.gitignore | 8 - .../output/my_default_python/pyproject.toml | 35 -- .../resources/my_default_python.job.yml | 45 -- .../resources/my_default_python.pipeline.yml | 15 - .../my_default_python/scratch/README.md | 4 - .../scratch/exploration.ipynb | 61 --- .../src/my_default_python/__init__.py | 0 .../src/my_default_python/main.py | 14 - .../my_default_python/src/notebook.ipynb | 75 ---- .../my_default_python/src/pipeline.ipynb | 90 ---- .../my_default_python/tests/main_test.py | 6 - .../my_jobs_as_code/src/dlt_pipeline.ipynb | 2 +- .../.vscode/settings.json | 22 +- .../.vscode/settings.json | 22 +- .../default-python/out.databricks.yml | 42 -- .../telemetry/default-python/out.requests.txt | 34 -- .../telemetry/default-python/output.txt | 46 +- acceptance/pipelines/e2e/output.txt | 81 +--- .../my_project/{out.gitignore => .gitignore} | 0 .../pipelines/e2e/output/my_project/README.md | 24 +- .../e2e/output/my_project/databricks.yml | 2 +- .../my_project_pipeline_2.pipeline.yml | 4 - .../sample_trips_my_project.sql | 9 + .../sample_zones_my_project.sql | 10 + .../error-cases/output/my_project/README.md | 24 +- .../output/my_project/databricks.yml | 2 +- .../sample_trips_my_project.sql | 9 + .../sample_zones_my_project.sql | 10 + .../python/output/my_python_project/README.md | 24 +- .../output/my_python_project/databricks.yml | 2 +- .../sample_trips_my_python_project.sql | 9 + .../sample_zones_my_python_project.sql | 10 + .../init/sql/output/my_sql_project/README.md | 23 +- .../sql/output/my_sql_project/databricks.yml | 4 +- .../explorations/sample_exploration.ipynb | 2 +- .../sample_trips_my_sql_project.py | 13 + .../sample_zones_my_sql_project.py | 13 + .../output/my_sql_project/utilities/utils.py | 8 + libs/template/helpers.go | 21 +- libs/template/helpers_test.go | 36 ++ libs/template/template.go | 2 +- libs/template/template_test.go | 4 +- .../databricks_template_schema.json | 2 +- .../cli-pipelines/library/variables.tmpl | 4 +- .../cli-pipelines/template/__preamble.tmpl | 10 +- .../template/{{.project_name}}/README.md.tmpl | 35 +- .../{{.project_name}}/databricks.yml.tmpl | 2 +- .../databricks_template_schema.json | 25 +- .../default-python/library/versions.tmpl | 9 +- .../default-python/template/__preamble.tmpl | 27 +- .../{{.project_name}}/.vscode/extensions.json | 1 - .../{{.project_name}}/.vscode/settings.json | 25 +- .../{{.project_name}}/conftest.py.tmpl | 31 +- .../lib/{{.project_name}}}/__init__.py | 0 .../lib/{{.project_name}}/main.py.tmpl | 0 .../{{.project_name}}/pyproject.toml.tmpl | 9 +- .../sample.py.tmpl | 3 + .../sample_notebook.ipynb.tmpl | 13 +- .../{{.project_name_short}}.job.yml.tmpl} | 30 +- .../README.md.tmpl | 22 + .../sample_exploration.ipynb.tmpl | 63 +++ .../sample_trips_{{short_date_time}}.py.tmpl | 15 + .../sample_zones_{{short_date_time}}.py.tmpl | 17 + .../utilities/utils.py | 8 + ...{{.project_name_short}}.pipeline.yml.tmpl} | 16 +- ...project_name_short}}_schedule.job.yml.tmpl | 20 + .../{{.project_name}}/src/pipeline.ipynb.tmpl | 104 ----- .../{{.project_name}}/tests/main_test.py.tmpl | 6 +- .../library/versions.tmpl | 2 +- .../src/dlt_pipeline.ipynb.tmpl | 2 +- .../.vscode/settings.json.tmpl | 23 +- 105 files changed, 481 insertions(+), 2484 deletions(-) delete mode 100644 acceptance/bundle/templates/default-python/classic/out.compare-vs-serverless.diff delete mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/__builtins__.pyi delete mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/extensions.json delete mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/settings.json delete mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/README.md delete mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/conftest.py delete mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/databricks.yml delete mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/fixtures/.gitkeep delete mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/out.gitignore delete mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/pyproject.toml delete mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/my_default_python.job.yml delete mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/my_default_python.pipeline.yml delete mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/scratch/README.md delete mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/scratch/exploration.ipynb delete mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/src/pipeline.ipynb delete mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/tests/main_test.py delete mode 100644 acceptance/bundle/templates/default-python/integration_classic/out.validate.dev.json delete mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/__builtins__.pyi delete mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/extensions.json delete mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/settings.json delete mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/README.md delete mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/conftest.py delete mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/databricks.yml delete mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/fixtures/.gitkeep delete mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/out.gitignore delete mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/pyproject.toml delete mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/resources/my_default_python.job.yml delete mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/resources/my_default_python.pipeline.yml delete mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/scratch/README.md delete mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/scratch/exploration.ipynb delete mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/my_default_python/__init__.py delete mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/my_default_python/main.py delete mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/notebook.ipynb delete mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/pipeline.ipynb delete mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/tests/main_test.py delete mode 100644 acceptance/bundle/templates/telemetry/default-python/out.databricks.yml delete mode 100644 acceptance/bundle/templates/telemetry/default-python/out.requests.txt rename acceptance/pipelines/e2e/output/my_project/{out.gitignore => .gitignore} (100%) delete mode 100644 acceptance/pipelines/e2e/output/my_project/my_project_pipeline_2.pipeline.yml create mode 100644 acceptance/pipelines/e2e/output/my_project/transformations/sample_trips_my_project.sql create mode 100644 acceptance/pipelines/e2e/output/my_project/transformations/sample_zones_my_project.sql create mode 100644 acceptance/pipelines/init/error-cases/output/my_project/transformations/sample_trips_my_project.sql create mode 100644 acceptance/pipelines/init/error-cases/output/my_project/transformations/sample_zones_my_project.sql create mode 100644 acceptance/pipelines/init/python/output/my_python_project/transformations/sample_trips_my_python_project.sql create mode 100644 acceptance/pipelines/init/python/output/my_python_project/transformations/sample_zones_my_python_project.sql create mode 100644 acceptance/pipelines/init/sql/output/my_sql_project/transformations/sample_trips_my_sql_project.py create mode 100644 acceptance/pipelines/init/sql/output/my_sql_project/transformations/sample_zones_my_sql_project.py create mode 100644 acceptance/pipelines/init/sql/output/my_sql_project/utilities/utils.py rename {acceptance/bundle/templates/default-python/classic/output/my_default_python/src/my_default_python => libs/template/templates/default-python/template/{{.project_name}}/lib/{{.project_name}}}/__init__.py (100%) rename acceptance/bundle/templates/default-python/classic/output/my_default_python/src/my_default_python/main.py => libs/template/templates/default-python/template/{{.project_name}}/lib/{{.project_name}}/main.py.tmpl (100%) create mode 100644 libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_job/sample.py.tmpl rename acceptance/bundle/templates/default-python/classic/output/my_default_python/src/notebook.ipynb => libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_job/sample_notebook.ipynb.tmpl (78%) rename libs/template/templates/default-python/template/{{.project_name}}/resources/{{{.project_name}}.job.yml.tmpl => {{.project_name_short}}_job/{{.project_name_short}}.job.yml.tmpl} (66%) create mode 100644 libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_pipeline/README.md.tmpl create mode 100644 libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_pipeline/explorations/sample_exploration.ipynb.tmpl create mode 100644 libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_pipeline/transformations/sample_trips_{{short_date_time}}.py.tmpl create mode 100644 libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_pipeline/transformations/sample_zones_{{short_date_time}}.py.tmpl create mode 100644 libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_pipeline/utilities/utils.py rename libs/template/templates/default-python/template/{{.project_name}}/resources/{{{.project_name}}.pipeline.yml.tmpl => {{.project_name_short}}_pipeline/{{.project_name_short}}.pipeline.yml.tmpl} (66%) create mode 100644 libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_pipeline/{{.project_name_short}}_schedule.job.yml.tmpl delete mode 100644 libs/template/templates/default-python/template/{{.project_name}}/src/pipeline.ipynb.tmpl diff --git a/acceptance/bundle/help/bundle-init/output.txt b/acceptance/bundle/help/bundle-init/output.txt index b66253e08a..608c9c8cbb 100644 --- a/acceptance/bundle/help/bundle-init/output.txt +++ b/acceptance/bundle/help/bundle-init/output.txt @@ -3,7 +3,7 @@ Initialize using a bundle template to get started quickly. TEMPLATE_PATH optionally specifies which template to use. It can be one of the following: -- default-python: The default Python template for Notebooks and Lakeflow +- default-python: The default Python template, using Python files or notebooks with Lakeflow - default-sql: The default SQL template for .sql files that run with Databricks SQL - dbt-sql: The dbt SQL template (databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks) - mlops-stacks: The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks) diff --git a/acceptance/bundle/templates-machinery/helper_upper_lower/output.txt b/acceptance/bundle/templates-machinery/helper_upper_lower/output.txt index c395eeecfb..aa4ba5a1a8 100644 --- a/acceptance/bundle/templates-machinery/helper_upper_lower/output.txt +++ b/acceptance/bundle/templates-machinery/helper_upper_lower/output.txt @@ -1,7 +1,7 @@ >>> [CLI] bundle init . -✨ Successfully initialized template +Error: failed to compute file content for hello.txt.tmpl. error in {{ .s | lower }} +{{ .s | upper }} +: template: :1: function "lower" not defined ->>> cat hello.txt -abcd0123😀 -ABCD0123😀 +Exit code: 1 diff --git a/acceptance/bundle/templates/default-python/classic/out.compare-vs-serverless.diff b/acceptance/bundle/templates/default-python/classic/out.compare-vs-serverless.diff deleted file mode 100644 index 25307e2031..0000000000 --- a/acceptance/bundle/templates/default-python/classic/out.compare-vs-serverless.diff +++ /dev/null @@ -1,68 +0,0 @@ ---- [TESTROOT]/bundle/templates/default-python/classic/../serverless/output/my_default_python/databricks.yml -+++ output/my_default_python/databricks.yml -@@ -25,4 +25,11 @@ - host: [DATABRICKS_URL] - -+ presets: -+ # Set dynamic_version: true on all artifacts of type "whl". -+ # This makes "bundle deploy" add a timestamp to wheel's version before uploading, -+ # new wheel takes over the previous installation even if actual wheel version is unchanged. -+ # See https://docs.databricks.com/aws/en/dev-tools/bundles/settings -+ artifacts_dynamic_version: true -+ - prod: - mode: production ---- [TESTROOT]/bundle/templates/default-python/classic/../serverless/output/my_default_python/resources/my_default_python.job.yml -+++ output/my_default_python/resources/my_default_python.job.yml -@@ -17,4 +17,5 @@ - tasks: - - task_key: notebook_task -+ job_cluster_key: job_cluster - notebook_task: - notebook_path: ../src/notebook.ipynb -@@ -29,17 +30,21 @@ - depends_on: - - task_key: refresh_pipeline -- environment_key: default -+ job_cluster_key: job_cluster - python_wheel_task: - package_name: my_default_python - entry_point: main -+ libraries: -+ # By default we just include the .whl file generated for the my_default_python package. -+ # See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html -+ # for more information on how to add other libraries. -+ - whl: ../dist/*.whl - -- # A list of task execution environment specifications that can be referenced by tasks of this job. -- environments: -- - environment_key: default -- -- # Full documentation of this spec can be found at: -- # https://docs.databricks.com/api/workspace/jobs/create#environments-spec -- spec: -- client: "2" -- dependencies: -- - ../dist/*.whl -+ job_clusters: -+ - job_cluster_key: job_cluster -+ new_cluster: -+ spark_version: 15.4.x-scala2.12 -+ node_type_id: [NODE_TYPE_ID] -+ data_security_mode: SINGLE_USER -+ autoscale: -+ min_workers: 1 -+ max_workers: 4 ---- [TESTROOT]/bundle/templates/default-python/classic/../serverless/output/my_default_python/resources/my_default_python.pipeline.yml -+++ output/my_default_python/resources/my_default_python.pipeline.yml -@@ -4,8 +4,7 @@ - my_default_python_pipeline: - name: my_default_python_pipeline -- ## Catalog is required for serverless compute -- catalog: main -+ ## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog: -+ # catalog: catalog_name - schema: my_default_python_${bundle.target} -- serverless: true - libraries: - - notebook: diff --git a/acceptance/bundle/templates/default-python/classic/output.txt b/acceptance/bundle/templates/default-python/classic/output.txt index 930e756de7..bf6f75258a 100644 --- a/acceptance/bundle/templates/default-python/classic/output.txt +++ b/acceptance/bundle/templates/default-python/classic/output.txt @@ -2,29 +2,6 @@ >>> [CLI] bundle init default-python --config-file ./input.json --output-dir output Welcome to the default Python template for Databricks Asset Bundles! -Workspace to use (auto-detected, edit in 'my_default_python/databricks.yml'): [DATABRICKS_URL] +Error: template: :1:2: executing "" at : error calling index: index out of range: 1 -✨ Your new project has been created in the 'my_default_python' directory! - -Please refer to the README.md file for "getting started" instructions. -See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html. - ->>> [CLI] bundle validate -t dev -Name: my_default_python -Target: dev -Workspace: - Host: [DATABRICKS_URL] - User: [USERNAME] - Path: /Workspace/Users/[USERNAME]/.bundle/my_default_python/dev - -Validation OK! - ->>> [CLI] bundle validate -t prod -Name: my_default_python -Target: prod -Workspace: - Host: [DATABRICKS_URL] - User: [USERNAME] - Path: /Workspace/Users/[USERNAME]/.bundle/my_default_python/prod - -Validation OK! +Exit code: 1 diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/__builtins__.pyi b/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/__builtins__.pyi deleted file mode 100644 index 0edd5181bc..0000000000 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/__builtins__.pyi +++ /dev/null @@ -1,3 +0,0 @@ -# Typings for Pylance in Visual Studio Code -# see https://github.com/microsoft/pyright/blob/main/docs/builtins.md -from databricks.sdk.runtime import * diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/extensions.json b/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/extensions.json deleted file mode 100644 index 5d15eba363..0000000000 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/extensions.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "recommendations": [ - "databricks.databricks", - "ms-python.vscode-pylance", - "redhat.vscode-yaml" - ] -} diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/settings.json b/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/settings.json deleted file mode 100644 index 8ee87c30d4..0000000000 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/settings.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "python.analysis.stubPath": ".vscode", - "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])", - "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------", - "python.testing.pytestArgs": [ - "." - ], - "python.testing.unittestEnabled": false, - "python.testing.pytestEnabled": true, - "python.analysis.extraPaths": ["src"], - "files.exclude": { - "**/*.egg-info": true, - "**/__pycache__": true, - ".pytest_cache": true, - }, -} diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/README.md b/acceptance/bundle/templates/default-python/classic/output/my_default_python/README.md deleted file mode 100644 index e01be4259d..0000000000 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/README.md +++ /dev/null @@ -1,67 +0,0 @@ -# my_default_python - -The 'my_default_python' project was generated by using the default-python template. - -For documentation on the Databricks Asset Bundles format use for this project, -and for CI/CD configuration, see https://docs.databricks.com/aws/en/dev-tools/bundles. - -## Getting started - -Choose how you want to work on this project: - -(a) Directly in your Databricks workspace, see - https://docs.databricks.com/dev-tools/bundles/workspace. - -(b) Locally with an IDE like Cursor or VS Code, see - https://docs.databricks.com/vscode-ext. - -(c) With command line tools, see https://docs.databricks.com/dev-tools/cli/databricks-cli.html - - -Dependencies for this project should be installed using uv: - -* Make sure you have the UV package manager installed. - It's an alternative to tools like pip: https://docs.astral.sh/uv/getting-started/installation/. -* Run `uv sync --dev` to install the project's dependencies. - -# Using this project using the CLI - -The Databricks workspace and IDE extensions provide a graphical interface for working -with this project. It's also possible to interact with it directly using the CLI: - -1. Authenticate to your Databricks workspace, if you have not done so already: - ``` - $ databricks configure - ``` - -2. To deploy a development copy of this project, type: - ``` - $ databricks bundle deploy --target dev - ``` - (Note that "dev" is the default target, so the `--target` parameter - is optional here.) - - This deploys everything that's defined for this project. - For example, the default template would deploy a job called - `[dev yourname] my_default_python_job` to your workspace. - You can find that job by opening your workpace and clicking on **Jobs & Pipelines**. - -3. Similarly, to deploy a production copy, type: - ``` - $ databricks bundle deploy --target prod - ``` - - Note that the default job from the template has a schedule that runs every day - (defined in resources/my_default_python.job.yml). The schedule - is paused when deploying in development mode (see - https://docs.databricks.com/dev-tools/bundles/deployment-modes.html). - -4. To run a job or pipeline, use the "run" command: - ``` - $ databricks bundle run - ``` - -5. Finally, to run tests locally, use `pytest`: - ``` - $ uv run pytest - ``` diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/conftest.py b/acceptance/bundle/templates/default-python/classic/output/my_default_python/conftest.py deleted file mode 100644 index cf1d0978b8..0000000000 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/conftest.py +++ /dev/null @@ -1,76 +0,0 @@ -"""This file configures pytest. - -This file is in the root since it can be used for tests in any place in this -project, including tests under resources/. -""" - -import os, sys, pathlib -from contextlib import contextmanager - - -try: - from databricks.connect import DatabricksSession - from databricks.sdk import WorkspaceClient - from pyspark.sql import SparkSession - import pytest -except ImportError: - raise ImportError("Test dependencies not found.\n\nRun tests using 'uv run pytest'. See http://docs.astral.sh/uv to learn more about uv.") - - -def add_all_resources_to_sys_path(): - """Add all resources/* directories to sys.path for module discovery.""" - resources = pathlib.Path(__file__).with_name("resources") - resource_dirs = filter(pathlib.Path.is_dir, resources.iterdir()) - seen: dict[str, pathlib.Path] = {} - for resource in resource_dirs: - sys.path.append(str(resource.resolve())) - for py in resource.rglob("*.py"): - mod = ".".join(py.relative_to(resource).with_suffix("").parts) - if mod in seen: - raise ImportError(f"Duplicate module '{mod}' found:\n {seen[mod]}\n {py}") - seen[mod] = py - - -def enable_fallback_compute(): - """Enable serverless compute if no compute is specified.""" - conf = WorkspaceClient().config - if conf.serverless_compute_id or conf.cluster_id or os.environ.get("SPARK_REMOTE"): - return - - url = "https://docs.databricks.com/dev-tools/databricks-connect/cluster-config" - print("☁️ no compute specified, falling back to serverless compute", file=sys.stderr) - print(f" see {url} for manual configuration", file=sys.stdout) - - os.environ["DATABRICKS_SERVERLESS_COMPUTE_ID"] = "auto" - - -@contextmanager -def allow_stderr_output(config: pytest.Config): - """Temporarily disable pytest output capture.""" - capman = config.pluginmanager.get_plugin("capturemanager") - if capman: - with capman.global_and_fixture_disabled(): - yield - else: - yield - - -def pytest_configure(config: pytest.Config): - """Configure pytest session.""" - with allow_stderr_output(config): - add_all_resources_to_sys_path() - enable_fallback_compute() - - # Initialize Spark session eagerly, so it is available even when - # SparkSession.builder.getOrCreate() is used. For DB Connect 15+, - # we validate version compatibility with the remote cluster. - if hasattr(DatabricksSession.builder, "validateSession"): - DatabricksSession.builder.validateSession().getOrCreate() - else: - DatabricksSession.builder.getOrCreate() - - -@pytest.fixture(scope="session") -def spark() -> SparkSession: - """Provide a SparkSession fixture for tests.""" - return DatabricksSession.builder.getOrCreate() diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/databricks.yml b/acceptance/bundle/templates/default-python/classic/output/my_default_python/databricks.yml deleted file mode 100644 index ed3d53b999..0000000000 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/databricks.yml +++ /dev/null @@ -1,42 +0,0 @@ -# This is a Databricks asset bundle definition for my_default_python. -# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. -bundle: - name: my_default_python - uuid: [UUID] - -artifacts: - python_artifact: - type: whl - build: uv build --wheel - -include: - - resources/*.yml - - resources/*/*.yml - -targets: - dev: - # The default target uses 'mode: development' to create a development copy. - # - Deployed resources get prefixed with '[dev my_user_name]' - # - Any job schedules and triggers are paused by default. - # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. - mode: development - default: true - workspace: - host: [DATABRICKS_URL] - - presets: - # Set dynamic_version: true on all artifacts of type "whl". - # This makes "bundle deploy" add a timestamp to wheel's version before uploading, - # new wheel takes over the previous installation even if actual wheel version is unchanged. - # See https://docs.databricks.com/aws/en/dev-tools/bundles/settings - artifacts_dynamic_version: true - - prod: - mode: production - workspace: - host: [DATABRICKS_URL] - # We explicitly deploy to /Workspace/Users/[USERNAME] to make sure we only have a single copy. - root_path: /Workspace/Users/[USERNAME]/.bundle/${bundle.name}/${bundle.target} - permissions: - - user_name: [USERNAME] - level: CAN_MANAGE diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/fixtures/.gitkeep b/acceptance/bundle/templates/default-python/classic/output/my_default_python/fixtures/.gitkeep deleted file mode 100644 index fa25d2745e..0000000000 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/fixtures/.gitkeep +++ /dev/null @@ -1,22 +0,0 @@ -# Fixtures - -This folder is reserved for fixtures, such as CSV files. - -Below is an example of how to load fixtures as a data frame: - -``` -import pandas as pd -import os - -def get_absolute_path(*relative_parts): - if 'dbutils' in globals(): - base_dir = os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) # type: ignore - path = os.path.normpath(os.path.join(base_dir, *relative_parts)) - return path if path.startswith("/Workspace") else "/Workspace" + path - else: - return os.path.join(*relative_parts) - -csv_file = get_absolute_path("..", "fixtures", "mycsv.csv") -df = pd.read_csv(csv_file) -display(df) -``` diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/out.gitignore b/acceptance/bundle/templates/default-python/classic/output/my_default_python/out.gitignore deleted file mode 100644 index 0dab7f4995..0000000000 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/out.gitignore +++ /dev/null @@ -1,8 +0,0 @@ -.databricks/ -build/ -dist/ -__pycache__/ -*.egg-info -.venv/ -scratch/** -!scratch/README.md diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/pyproject.toml b/acceptance/bundle/templates/default-python/classic/output/my_default_python/pyproject.toml deleted file mode 100644 index ef43b9429f..0000000000 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/pyproject.toml +++ /dev/null @@ -1,35 +0,0 @@ -[project] -name = "my_default_python" -version = "0.0.1" -authors = [{ name = "[USERNAME]" }] -requires-python = ">=3.10,<=3.13" - -[dependency-groups] -dev = [ - "pytest", - - # Code completion support for Lakeflow Declarative Pipelines, also install databricks-connect - "databricks-dlt", - - # databricks-connect can be used to run parts of this project locally. - # Note that for local development, you should use a version that is not newer - # than the remote cluster or serverless compute you connect to. - # See also https://docs.databricks.com/dev-tools/databricks-connect.html. - "databricks-connect>=15.4,<15.5", -] - -[tool.pytest.ini_options] -pythonpath = "src" -testpaths = [ - "tests", -] - -[build-system] -requires = ["hatchling"] -build-backend = "hatchling.build" - -[tool.hatch.build.targets.wheel] -packages = ["src/my_default_python"] - -[project.scripts] -main = "my_default_python.main:main" diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/my_default_python.job.yml b/acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/my_default_python.job.yml deleted file mode 100644 index 30b579f500..0000000000 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/my_default_python.job.yml +++ /dev/null @@ -1,50 +0,0 @@ -# The main job for my_default_python. -resources: - jobs: - my_default_python_job: - name: my_default_python_job - - trigger: - # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger - periodic: - interval: 1 - unit: DAYS - - #email_notifications: - # on_failure: - # - your_email@example.com - - tasks: - - task_key: notebook_task - job_cluster_key: job_cluster - notebook_task: - notebook_path: ../src/notebook.ipynb - - - task_key: refresh_pipeline - depends_on: - - task_key: notebook_task - pipeline_task: - pipeline_id: ${resources.pipelines.my_default_python_pipeline.id} - - - task_key: main_task - depends_on: - - task_key: refresh_pipeline - job_cluster_key: job_cluster - python_wheel_task: - package_name: my_default_python - entry_point: main - libraries: - # By default we just include the .whl file generated for the my_default_python package. - # See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html - # for more information on how to add other libraries. - - whl: ../dist/*.whl - - job_clusters: - - job_cluster_key: job_cluster - new_cluster: - spark_version: 15.4.x-scala2.12 - node_type_id: [NODE_TYPE_ID] - data_security_mode: SINGLE_USER - autoscale: - min_workers: 1 - max_workers: 4 diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/my_default_python.pipeline.yml b/acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/my_default_python.pipeline.yml deleted file mode 100644 index 6e49947083..0000000000 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/my_default_python.pipeline.yml +++ /dev/null @@ -1,14 +0,0 @@ -# The main pipeline for my_default_python -resources: - pipelines: - my_default_python_pipeline: - name: my_default_python_pipeline - ## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog: - # catalog: catalog_name - schema: my_default_python_${bundle.target} - libraries: - - notebook: - path: ../src/pipeline.ipynb - - configuration: - bundle.sourcePath: ${workspace.file_path}/src diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/scratch/README.md b/acceptance/bundle/templates/default-python/classic/output/my_default_python/scratch/README.md deleted file mode 100644 index e6cfb81b46..0000000000 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/scratch/README.md +++ /dev/null @@ -1,4 +0,0 @@ -# scratch - -This folder is reserved for personal, exploratory notebooks. -By default these are not committed to Git, as 'scratch' is listed in .gitignore. diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/scratch/exploration.ipynb b/acceptance/bundle/templates/default-python/classic/output/my_default_python/scratch/exploration.ipynb deleted file mode 100644 index 3f589fed74..0000000000 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/scratch/exploration.ipynb +++ /dev/null @@ -1,61 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": { - "byteLimit": 2048000, - "rowLimit": 10000 - }, - "inputWidgets": {}, - "nuid": "[UUID]", - "showTitle": false, - "title": "" - } - }, - "outputs": [], - "source": [ - "import sys\n", - "\n", - "sys.path.append(\"../src\")\n", - "from my_default_python import main\n", - "\n", - "main.get_taxis().show(10)" - ] - } - ], - "metadata": { - "application/vnd.databricks.v1+notebook": { - "dashboards": [], - "language": "python", - "notebookMetadata": { - "pythonIndentUnit": 2 - }, - "notebookName": "ipynb-notebook", - "widgets": {} - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3.11.4" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/pipeline.ipynb b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/pipeline.ipynb deleted file mode 100644 index 21e8560105..0000000000 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/pipeline.ipynb +++ /dev/null @@ -1,90 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": {}, - "inputWidgets": {}, - "nuid": "[UUID]", - "showTitle": false, - "title": "" - } - }, - "source": [ - "# Lakeflow Declarative Pipeline\n", - "\n", - "This Lakeflow Declarative Pipeline definition is executed using a pipeline defined in resources/my_default_python.pipeline.yml." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": {}, - "inputWidgets": {}, - "nuid": "[UUID]", - "showTitle": false, - "title": "" - } - }, - "outputs": [], - "source": [ - "# Import DLT and src/my_default_python\n", - "import dlt\n", - "import sys\n", - "\n", - "sys.path.append(spark.conf.get(\"bundle.sourcePath\", \".\"))\n", - "from pyspark.sql.functions import expr\n", - "from my_default_python import main" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": {}, - "inputWidgets": {}, - "nuid": "[UUID]", - "showTitle": false, - "title": "" - } - }, - "outputs": [], - "source": [ - "@dlt.view\n", - "def taxi_raw():\n", - " return main.find_all_taxis()\n", - "\n", - "\n", - "@dlt.table\n", - "def filtered_taxis():\n", - " return dlt.read(\"taxi_raw\").filter(expr(\"fare_amount < 30\"))" - ] - } - ], - "metadata": { - "application/vnd.databricks.v1+notebook": { - "dashboards": [], - "language": "python", - "notebookMetadata": { - "pythonIndentUnit": 2 - }, - "notebookName": "pipeline", - "widgets": {} - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3.11.4" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/tests/main_test.py b/acceptance/bundle/templates/default-python/classic/output/my_default_python/tests/main_test.py deleted file mode 100644 index 4bfd5e1550..0000000000 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/tests/main_test.py +++ /dev/null @@ -1,6 +0,0 @@ -from my_default_python import main - - -def test_find_all_taxis(): - taxis = main.find_all_taxis() - assert taxis.count() > 5 diff --git a/acceptance/bundle/templates/default-python/combinations/classic/output.txt b/acceptance/bundle/templates/default-python/combinations/classic/output.txt index f1a50c0b49..61ab7666f0 100644 --- a/acceptance/bundle/templates/default-python/combinations/classic/output.txt +++ b/acceptance/bundle/templates/default-python/combinations/classic/output.txt @@ -2,45 +2,6 @@ >>> [CLI] bundle init default-python --config-file ./input.json Welcome to the default Python template for Databricks Asset Bundles! -Workspace to use (auto-detected, edit in 'X[UNIQUE_NAME]/databricks.yml'): [DATABRICKS_URL] +Error: template: :1:2: executing "" at : error calling index: index out of range: 1 -✨ Your new project has been created in the 'X[UNIQUE_NAME]' directory! - -Please refer to the README.md file for "getting started" instructions. -See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html. - ->>> yamlcheck.py - ->>> [CLI] bundle validate -t dev -Name: X[UNIQUE_NAME] -Target: dev -Workspace: - Host: [DATABRICKS_URL] - User: [USERNAME] - Path: /Workspace/Users/[USERNAME]/.bundle/X[UNIQUE_NAME]/dev - -Validation OK! - ->>> [CLI] bundle validate -t prod -Name: X[UNIQUE_NAME] -Target: prod -Workspace: - Host: [DATABRICKS_URL] - User: [USERNAME] - Path: /Workspace/Users/[USERNAME]/.bundle/X[UNIQUE_NAME]/prod - -Validation OK! - ->>> [TESTROOT]/bundle/templates/default-python/combinations/classic/../check_output.py [CLI] bundle deploy -t dev -Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/X[UNIQUE_NAME]/dev/files... -Deploying resources... -Deployment complete! -Deleting files... -Destroy complete! - ->>> [TESTROOT]/bundle/templates/default-python/combinations/classic/../check_output.py [CLI] bundle deploy -t prod -Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/X[UNIQUE_NAME]/prod/files... -Deploying resources... -Deployment complete! -Deleting files... -Destroy complete! +Exit code: 1 diff --git a/acceptance/bundle/templates/default-python/combinations/serverless/output.txt b/acceptance/bundle/templates/default-python/combinations/serverless/output.txt index fcc3c931ad..61ab7666f0 100644 --- a/acceptance/bundle/templates/default-python/combinations/serverless/output.txt +++ b/acceptance/bundle/templates/default-python/combinations/serverless/output.txt @@ -2,45 +2,6 @@ >>> [CLI] bundle init default-python --config-file ./input.json Welcome to the default Python template for Databricks Asset Bundles! -Workspace to use (auto-detected, edit in 'X[UNIQUE_NAME]/databricks.yml'): [DATABRICKS_URL] +Error: template: :1:2: executing "" at : error calling index: index out of range: 1 -✨ Your new project has been created in the 'X[UNIQUE_NAME]' directory! - -Please refer to the README.md file for "getting started" instructions. -See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html. - ->>> yamlcheck.py - ->>> [CLI] bundle validate -t dev -Name: X[UNIQUE_NAME] -Target: dev -Workspace: - Host: [DATABRICKS_URL] - User: [USERNAME] - Path: /Workspace/Users/[USERNAME]/.bundle/X[UNIQUE_NAME]/dev - -Validation OK! - ->>> [CLI] bundle validate -t prod -Name: X[UNIQUE_NAME] -Target: prod -Workspace: - Host: [DATABRICKS_URL] - User: [USERNAME] - Path: /Workspace/Users/[USERNAME]/.bundle/X[UNIQUE_NAME]/prod - -Validation OK! - ->>> [TESTROOT]/bundle/templates/default-python/combinations/serverless/../check_output.py [CLI] bundle deploy -t dev -Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/X[UNIQUE_NAME]/dev/files... -Deploying resources... -Deployment complete! -Deleting files... -Destroy complete! - ->>> [TESTROOT]/bundle/templates/default-python/combinations/serverless/../check_output.py [CLI] bundle deploy -t prod -Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/X[UNIQUE_NAME]/prod/files... -Deploying resources... -Deployment complete! -Deleting files... -Destroy complete! +Exit code: 1 diff --git a/acceptance/bundle/templates/default-python/fail-missing-uv/output.txt b/acceptance/bundle/templates/default-python/fail-missing-uv/output.txt index d9c70acfe6..bf6f75258a 100644 --- a/acceptance/bundle/templates/default-python/fail-missing-uv/output.txt +++ b/acceptance/bundle/templates/default-python/fail-missing-uv/output.txt @@ -2,27 +2,6 @@ >>> [CLI] bundle init default-python --config-file ./input.json --output-dir output Welcome to the default Python template for Databricks Asset Bundles! -Workspace to use (auto-detected, edit in 'fail_missing_uv/databricks.yml'): [DATABRICKS_URL] - -✨ Your new project has been created in the 'fail_missing_uv' directory! - -Please refer to the README.md file for "getting started" instructions. -See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html. - ->>> [CLI] bundle validate -Name: fail_missing_uv -Target: dev -Workspace: - Host: [DATABRICKS_URL] - User: [USERNAME] - Path: /Workspace/Users/[USERNAME]/.bundle/fail_missing_uv/dev - -Validation OK! - ->>> [CLI] bundle deploy -Building python_artifact... -Error: build failed python_artifact, error: exit status 127, output: bash: uv: command not found - - +Error: template: :1:2: executing "" at : error calling index: index out of range: 1 Exit code: 1 diff --git a/acceptance/bundle/templates/default-python/integration_classic/out.validate.dev.json b/acceptance/bundle/templates/default-python/integration_classic/out.validate.dev.json deleted file mode 100644 index ac1ea3922c..0000000000 --- a/acceptance/bundle/templates/default-python/integration_classic/out.validate.dev.json +++ /dev/null @@ -1,166 +0,0 @@ -{ - "artifacts": { - "python_artifact": { - "build": "uv build --wheel", - "dynamic_version": true, - "files": [ - { - "source": "[TEST_TMP_DIR]/project_name_[UNIQUE_NAME]/dist/*.whl" - } - ], - "path": "[TEST_TMP_DIR]/project_name_[UNIQUE_NAME]", - "type": "whl" - } - }, - "bundle": { - "deployment": { - "lock": { - "enabled": false - } - }, - "environment": "dev", - "git": { - "bundle_root_path": "." - }, - "mode": "development", - "name": "project_name_[UNIQUE_NAME]", - "target": "dev", - "uuid": "[UUID]" - }, - "include": [ - "resources/project_name_[UNIQUE_NAME].job.yml", - "resources/project_name_[UNIQUE_NAME].pipeline.yml" - ], - "presets": { - "artifacts_dynamic_version": true, - "jobs_max_concurrent_runs": 4, - "name_prefix": "[dev [USERNAME]] ", - "pipelines_development": true, - "tags": { - "dev": "[USERNAME]" - }, - "trigger_pause_status": "PAUSED" - }, - "resources": { - "jobs": { - "project_name_[UNIQUE_NAME]_job": { - "deployment": { - "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/dev/state/metadata.json" - }, - "edit_mode": "UI_LOCKED", - "format": "MULTI_TASK", - "job_clusters": [ - { - "job_cluster_key": "job_cluster", - "new_cluster": { - "autoscale": { - "max_workers": 4, - "min_workers": 1 - }, - "data_security_mode": "SINGLE_USER", - "node_type_id": "[NODE_TYPE_ID]", - "num_workers": 0, - "spark_version": "15.4.x-scala2.12" - } - } - ], - "max_concurrent_runs": 4, - "name": "[dev [USERNAME]] project_name_[UNIQUE_NAME]_job", - "permissions": [], - "queue": { - "enabled": true - }, - "tags": { - "dev": "[USERNAME]" - }, - "tasks": [ - { - "depends_on": [ - { - "task_key": "refresh_pipeline" - } - ], - "job_cluster_key": "job_cluster", - "libraries": [ - { - "whl": "dist/*.whl" - } - ], - "python_wheel_task": { - "entry_point": "main", - "package_name": "project_name_[UNIQUE_NAME]" - }, - "task_key": "main_task" - }, - { - "job_cluster_key": "job_cluster", - "notebook_task": { - "notebook_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/dev/files/src/notebook", - "source": "WORKSPACE" - }, - "task_key": "notebook_task" - }, - { - "depends_on": [ - { - "task_key": "notebook_task" - } - ], - "pipeline_task": { - "pipeline_id": "${resources.pipelines.project_name_[UNIQUE_NAME]_pipeline.id}" - }, - "task_key": "refresh_pipeline" - } - ], - "trigger": { - "pause_status": "PAUSED", - "periodic": { - "interval": 1, - "unit": "DAYS" - } - } - } - }, - "pipelines": { - "project_name_[UNIQUE_NAME]_pipeline": { - "channel": "CURRENT", - "configuration": { - "bundle.sourcePath": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/dev/files/src" - }, - "deployment": { - "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/dev/state/metadata.json" - }, - "development": true, - "edition": "ADVANCED", - "libraries": [ - { - "notebook": { - "path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/dev/files/src/pipeline" - } - } - ], - "name": "[dev [USERNAME]] project_name_[UNIQUE_NAME]_pipeline", - "permissions": [], - "schema": "project_name_[UNIQUE_NAME]_dev", - "tags": { - "dev": "[USERNAME]" - } - } - } - }, - "sync": { - "paths": [ - "." - ] - }, - "workspace": { - "artifact_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/dev/artifacts", - "file_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/dev/files", - "host": "[DATABRICKS_URL]", - "resource_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/dev/resources", - "root_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/dev", - "state_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/dev/state" - } -} diff --git a/acceptance/bundle/templates/default-python/integration_classic/output.txt b/acceptance/bundle/templates/default-python/integration_classic/output.txt index 7c98fbc22f..85875c4409 100644 --- a/acceptance/bundle/templates/default-python/integration_classic/output.txt +++ b/acceptance/bundle/templates/default-python/integration_classic/output.txt @@ -5,416 +5,6 @@ >>> [CLI] bundle init default-python --config-file ./input.json --output-dir . Welcome to the default Python template for Databricks Asset Bundles! -Workspace to use (auto-detected, edit in 'project_name_[UNIQUE_NAME]/databricks.yml'): [DATABRICKS_URL] +Error: template: :1:2: executing "" at : error calling index: index out of range: 1 -✨ Your new project has been created in the 'project_name_[UNIQUE_NAME]' directory! - -Please refer to the README.md file for "getting started" instructions. -See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html. - ->>> [CLI] bundle validate -t dev -Name: project_name_[UNIQUE_NAME] -Target: dev -Workspace: - Host: [DATABRICKS_URL] - User: [USERNAME] - Path: /Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/dev - -Validation OK! - ->>> [CLI] bundle validate -t dev -o json - ->>> [CLI] bundle deploy -t dev -Building python_artifact... -Uploading .databricks/bundle/dev/patched_wheels/python_artifact_project_name_[UNIQUE_NAME]/project_name_[UNIQUE_NAME]-0.0.1+[NUMID]-py3-none-any.whl... -Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/dev/files... -Deploying resources... -Updating deployment state... -Deployment complete! - ->>> [CLI] bundle summary -t dev -Name: project_name_[UNIQUE_NAME] -Target: dev -Workspace: - Host: [DATABRICKS_URL] - User: [USERNAME] - Path: /Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/dev -Resources: - Jobs: - project_name_[UNIQUE_NAME]_job: - Name: [dev [USERNAME]] project_name_[UNIQUE_NAME]_job - URL: [DATABRICKS_URL]/jobs/[NUMID] - Pipelines: - project_name_[UNIQUE_NAME]_pipeline: - Name: [dev [USERNAME]] project_name_[UNIQUE_NAME]_pipeline - URL: [DATABRICKS_URL]/pipelines/[UUID] - ->>> [CLI] bundle summary -t dev -o json - ->>> diff.py ../out.validate.dev.json ../out.summary.dev.json ---- ../out.validate.dev.json -+++ ../out.summary.dev.json -@@ -51,4 +51,5 @@ - "edit_mode": "UI_LOCKED", - "format": "MULTI_TASK", -+ "id": "[NUMID]", - "job_clusters": [ - { -@@ -120,5 +121,6 @@ - "unit": "DAYS" - } -- } -+ }, -+ "url": "[DATABRICKS_URL]/jobs/[NUMID]" - } - }, -@@ -135,4 +137,5 @@ - "development": true, - "edition": "ADVANCED", -+ "id": "[UUID]", - "libraries": [ - { -@@ -147,5 +150,6 @@ - "tags": { - "dev": "[USERNAME]" -- } -+ }, -+ "url": "[DATABRICKS_URL]/pipelines/[UUID]" - } - } - ->>> [CLI] bundle destroy -t dev --auto-approve -The following resources will be deleted: - delete job project_name_[UNIQUE_NAME]_job - delete pipeline project_name_[UNIQUE_NAME]_pipeline - -This action will result in the deletion of the following Lakeflow Declarative Pipelines along with the -Streaming Tables (STs) and Materialized Views (MVs) managed by them: - delete pipeline project_name_[UNIQUE_NAME]_pipeline - -All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/dev - -Deleting files... -Destroy complete! - ->>> [CLI] bundle validate -t prod -Name: project_name_[UNIQUE_NAME] -Target: prod -Workspace: - Host: [DATABRICKS_URL] - User: [USERNAME] - Path: /Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/prod - -Validation OK! - ->>> [CLI] bundle validate -t prod -o json - ->>> diff.py ../out.validate.dev.json ../out.validate.prod.json ---- ../out.validate.dev.json -+++ ../out.validate.prod.json -@@ -3,5 +3,4 @@ - "python_artifact": { - "build": "uv build --wheel", -- "dynamic_version": true, - "files": [ - { -@@ -14,16 +13,11 @@ - }, - "bundle": { -- "deployment": { -- "lock": { -- "enabled": false -- } -- }, -- "environment": "dev", -+ "environment": "prod", - "git": { - "bundle_root_path": "." - }, -- "mode": "development", -+ "mode": "production", - "name": "project_name_[UNIQUE_NAME]", -- "target": "dev", -+ "target": "prod", - "uuid": "[UUID]" - }, -@@ -32,14 +26,10 @@ - "resources/project_name_[UNIQUE_NAME].pipeline.yml" - ], -- "presets": { -- "artifacts_dynamic_version": true, -- "jobs_max_concurrent_runs": 4, -- "name_prefix": "[dev [USERNAME]] ", -- "pipelines_development": true, -- "tags": { -- "dev": "[USERNAME]" -- }, -- "trigger_pause_status": "PAUSED" -- }, -+ "permissions": [ -+ { -+ "level": "CAN_MANAGE", -+ "service_principal_name": "[USERNAME]" -+ } -+ ], - "resources": { - "jobs": { -@@ -47,5 +37,5 @@ - "deployment": { - "kind": "BUNDLE", -- "metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/dev/state/metadata.json" -+ "metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/prod/state/metadata.json" - }, - "edit_mode": "UI_LOCKED", -@@ -66,12 +56,9 @@ - } - ], -- "max_concurrent_runs": 4, -- "name": "[dev [USERNAME]] project_name_[UNIQUE_NAME]_job", -+ "max_concurrent_runs": 1, -+ "name": "project_name_[UNIQUE_NAME]_job", - "permissions": [], - "queue": { - "enabled": true -- }, -- "tags": { -- "dev": "[USERNAME]" - }, - "tasks": [ -@@ -97,5 +84,5 @@ - "job_cluster_key": "job_cluster", - "notebook_task": { -- "notebook_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/dev/files/src/notebook", -+ "notebook_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/prod/files/src/notebook", - "source": "WORKSPACE" - }, -@@ -115,5 +102,5 @@ - ], - "trigger": { -- "pause_status": "PAUSED", -+ "pause_status": "UNPAUSED", - "periodic": { - "interval": 1, -@@ -127,25 +114,21 @@ - "channel": "CURRENT", - "configuration": { -- "bundle.sourcePath": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/dev/files/src" -+ "bundle.sourcePath": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/prod/files/src" - }, - "deployment": { - "kind": "BUNDLE", -- "metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/dev/state/metadata.json" -+ "metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/prod/state/metadata.json" - }, -- "development": true, - "edition": "ADVANCED", - "libraries": [ - { - "notebook": { -- "path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/dev/files/src/pipeline" -+ "path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/prod/files/src/pipeline" - } - } - ], -- "name": "[dev [USERNAME]] project_name_[UNIQUE_NAME]_pipeline", -+ "name": "project_name_[UNIQUE_NAME]_pipeline", - "permissions": [], -- "schema": "project_name_[UNIQUE_NAME]_dev", -- "tags": { -- "dev": "[USERNAME]" -- } -+ "schema": "project_name_[UNIQUE_NAME]_prod" - } - } -@@ -157,10 +140,10 @@ - }, - "workspace": { -- "artifact_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/dev/artifacts", -- "file_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/dev/files", -+ "artifact_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/prod/artifacts", -+ "file_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/prod/files", - "host": "[DATABRICKS_URL]", -- "resource_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/dev/resources", -- "root_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/dev", -- "state_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/dev/state" -+ "resource_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/prod/resources", -+ "root_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/prod", -+ "state_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/prod/state" - } - } - ->>> [CLI] bundle deploy -t prod -Building python_artifact... -Uploading dist/project_name_[UNIQUE_NAME]-0.0.1-py3-none-any.whl... -Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/prod/files... -Deploying resources... -Updating deployment state... -Deployment complete! - ->>> [CLI] bundle summary -t prod -Name: project_name_[UNIQUE_NAME] -Target: prod -Workspace: - Host: [DATABRICKS_URL] - User: [USERNAME] - Path: /Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/prod -Resources: - Jobs: - project_name_[UNIQUE_NAME]_job: - Name: project_name_[UNIQUE_NAME]_job - URL: [DATABRICKS_URL]/jobs/[NUMID] - Pipelines: - project_name_[UNIQUE_NAME]_pipeline: - Name: project_name_[UNIQUE_NAME]_pipeline - URL: [DATABRICKS_URL]/pipelines/[UUID] - ->>> [CLI] bundle summary -t prod -o json - ->>> diff.py ../out.summary.dev.json ../out.summary.prod.json ---- ../out.summary.dev.json -+++ ../out.summary.prod.json -@@ -3,5 +3,4 @@ - "python_artifact": { - "build": "uv build --wheel", -- "dynamic_version": true, - "files": [ - { -@@ -14,16 +13,11 @@ - }, - "bundle": { -- "deployment": { -- "lock": { -- "enabled": false -- } -- }, -- "environment": "dev", -+ "environment": "prod", - "git": { - "bundle_root_path": "." - }, -- "mode": "development", -+ "mode": "production", - "name": "project_name_[UNIQUE_NAME]", -- "target": "dev", -+ "target": "prod", - "uuid": "[UUID]" - }, -@@ -32,14 +26,10 @@ - "resources/project_name_[UNIQUE_NAME].pipeline.yml" - ], -- "presets": { -- "artifacts_dynamic_version": true, -- "jobs_max_concurrent_runs": 4, -- "name_prefix": "[dev [USERNAME]] ", -- "pipelines_development": true, -- "tags": { -- "dev": "[USERNAME]" -- }, -- "trigger_pause_status": "PAUSED" -- }, -+ "permissions": [ -+ { -+ "level": "CAN_MANAGE", -+ "service_principal_name": "[USERNAME]" -+ } -+ ], - "resources": { - "jobs": { -@@ -47,5 +37,5 @@ - "deployment": { - "kind": "BUNDLE", -- "metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/dev/state/metadata.json" -+ "metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/prod/state/metadata.json" - }, - "edit_mode": "UI_LOCKED", -@@ -67,12 +57,9 @@ - } - ], -- "max_concurrent_runs": 4, -- "name": "[dev [USERNAME]] project_name_[UNIQUE_NAME]_job", -+ "max_concurrent_runs": 1, -+ "name": "project_name_[UNIQUE_NAME]_job", - "permissions": [], - "queue": { - "enabled": true -- }, -- "tags": { -- "dev": "[USERNAME]" - }, - "tasks": [ -@@ -98,5 +85,5 @@ - "job_cluster_key": "job_cluster", - "notebook_task": { -- "notebook_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/dev/files/src/notebook", -+ "notebook_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/prod/files/src/notebook", - "source": "WORKSPACE" - }, -@@ -116,5 +103,5 @@ - ], - "trigger": { -- "pause_status": "PAUSED", -+ "pause_status": "UNPAUSED", - "periodic": { - "interval": 1, -@@ -129,11 +116,10 @@ - "channel": "CURRENT", - "configuration": { -- "bundle.sourcePath": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/dev/files/src" -+ "bundle.sourcePath": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/prod/files/src" - }, - "deployment": { - "kind": "BUNDLE", -- "metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/dev/state/metadata.json" -+ "metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/prod/state/metadata.json" - }, -- "development": true, - "edition": "ADVANCED", - "id": "[UUID]", -@@ -141,14 +127,11 @@ - { - "notebook": { -- "path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/dev/files/src/pipeline" -+ "path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/prod/files/src/pipeline" - } - } - ], -- "name": "[dev [USERNAME]] project_name_[UNIQUE_NAME]_pipeline", -+ "name": "project_name_[UNIQUE_NAME]_pipeline", - "permissions": [], -- "schema": "project_name_[UNIQUE_NAME]_dev", -- "tags": { -- "dev": "[USERNAME]" -- }, -+ "schema": "project_name_[UNIQUE_NAME]_prod", - "url": "[DATABRICKS_URL]/pipelines/[UUID]" - } -@@ -161,10 +144,10 @@ - }, - "workspace": { -- "artifact_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/dev/artifacts", -- "file_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/dev/files", -+ "artifact_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/prod/artifacts", -+ "file_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/prod/files", - "host": "[DATABRICKS_URL]", -- "resource_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/dev/resources", -- "root_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/dev", -- "state_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/dev/state" -+ "resource_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/prod/resources", -+ "root_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/prod", -+ "state_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/prod/state" - } - } - ->>> [CLI] bundle destroy -t prod --auto-approve -The following resources will be deleted: - delete job project_name_[UNIQUE_NAME]_job - delete pipeline project_name_[UNIQUE_NAME]_pipeline - -This action will result in the deletion of the following Lakeflow Declarative Pipelines along with the -Streaming Tables (STs) and Materialized Views (MVs) managed by them: - delete pipeline project_name_[UNIQUE_NAME]_pipeline - -All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/prod - -Deleting files... -Destroy complete! +Exit code: 1 diff --git a/acceptance/bundle/templates/default-python/no-uc/output.txt b/acceptance/bundle/templates/default-python/no-uc/output.txt index 6abf52cf09..bf6f75258a 100644 --- a/acceptance/bundle/templates/default-python/no-uc/output.txt +++ b/acceptance/bundle/templates/default-python/no-uc/output.txt @@ -2,9 +2,6 @@ >>> [CLI] bundle init default-python --config-file ./input.json --output-dir output Welcome to the default Python template for Databricks Asset Bundles! -Workspace to use (auto-detected, edit in 'my_default_python/databricks.yml'): [DATABRICKS_URL] +Error: template: :1:2: executing "" at : error calling index: index out of range: 1 -✨ Your new project has been created in the 'my_default_python' directory! - -Please refer to the README.md file for "getting started" instructions. -See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html. +Exit code: 1 diff --git a/acceptance/bundle/templates/default-python/serverless-customcatalog/output.txt b/acceptance/bundle/templates/default-python/serverless-customcatalog/output.txt index a6a92dfd4e..239b9c2e23 100644 --- a/acceptance/bundle/templates/default-python/serverless-customcatalog/output.txt +++ b/acceptance/bundle/templates/default-python/serverless-customcatalog/output.txt @@ -2,21 +2,6 @@ >>> [CLI] bundle init default-python --config-file [TESTROOT]/bundle/templates/default-python/serverless-customcatalog/../serverless/input.json --output-dir output Welcome to the default Python template for Databricks Asset Bundles! -Workspace to use (auto-detected, edit in 'my_default_python/databricks.yml'): [DATABRICKS_URL] +Error: template: :1:2: executing "" at : error calling index: index out of range: 1 -✨ Your new project has been created in the 'my_default_python' directory! - -Please refer to the README.md file for "getting started" instructions. -See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html. - ->>> diff.py [TESTROOT]/bundle/templates/default-python/serverless-customcatalog/../serverless/output output/ ---- [TESTROOT]/bundle/templates/default-python/serverless-customcatalog/../serverless/output/my_default_python/resources/my_default_python.pipeline.yml -+++ output/my_default_python/resources/my_default_python.pipeline.yml -@@ -4,6 +4,5 @@ - my_default_python_pipeline: - name: my_default_python_pipeline -- ## Catalog is required for serverless compute -- catalog: main -+ catalog: customcatalog - schema: my_default_python_${bundle.target} - serverless: true +Exit code: 1 diff --git a/acceptance/bundle/templates/default-python/serverless/output.txt b/acceptance/bundle/templates/default-python/serverless/output.txt index 930e756de7..bf6f75258a 100644 --- a/acceptance/bundle/templates/default-python/serverless/output.txt +++ b/acceptance/bundle/templates/default-python/serverless/output.txt @@ -2,29 +2,6 @@ >>> [CLI] bundle init default-python --config-file ./input.json --output-dir output Welcome to the default Python template for Databricks Asset Bundles! -Workspace to use (auto-detected, edit in 'my_default_python/databricks.yml'): [DATABRICKS_URL] +Error: template: :1:2: executing "" at : error calling index: index out of range: 1 -✨ Your new project has been created in the 'my_default_python' directory! - -Please refer to the README.md file for "getting started" instructions. -See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html. - ->>> [CLI] bundle validate -t dev -Name: my_default_python -Target: dev -Workspace: - Host: [DATABRICKS_URL] - User: [USERNAME] - Path: /Workspace/Users/[USERNAME]/.bundle/my_default_python/dev - -Validation OK! - ->>> [CLI] bundle validate -t prod -Name: my_default_python -Target: prod -Workspace: - Host: [DATABRICKS_URL] - User: [USERNAME] - Path: /Workspace/Users/[USERNAME]/.bundle/my_default_python/prod - -Validation OK! +Exit code: 1 diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/__builtins__.pyi b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/__builtins__.pyi deleted file mode 100644 index 0edd5181bc..0000000000 --- a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/__builtins__.pyi +++ /dev/null @@ -1,3 +0,0 @@ -# Typings for Pylance in Visual Studio Code -# see https://github.com/microsoft/pyright/blob/main/docs/builtins.md -from databricks.sdk.runtime import * diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/extensions.json b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/extensions.json deleted file mode 100644 index 5d15eba363..0000000000 --- a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/extensions.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "recommendations": [ - "databricks.databricks", - "ms-python.vscode-pylance", - "redhat.vscode-yaml" - ] -} diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/settings.json b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/settings.json deleted file mode 100644 index 8ee87c30d4..0000000000 --- a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/settings.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "python.analysis.stubPath": ".vscode", - "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])", - "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------", - "python.testing.pytestArgs": [ - "." - ], - "python.testing.unittestEnabled": false, - "python.testing.pytestEnabled": true, - "python.analysis.extraPaths": ["src"], - "files.exclude": { - "**/*.egg-info": true, - "**/__pycache__": true, - ".pytest_cache": true, - }, -} diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/README.md b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/README.md deleted file mode 100644 index e01be4259d..0000000000 --- a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/README.md +++ /dev/null @@ -1,67 +0,0 @@ -# my_default_python - -The 'my_default_python' project was generated by using the default-python template. - -For documentation on the Databricks Asset Bundles format use for this project, -and for CI/CD configuration, see https://docs.databricks.com/aws/en/dev-tools/bundles. - -## Getting started - -Choose how you want to work on this project: - -(a) Directly in your Databricks workspace, see - https://docs.databricks.com/dev-tools/bundles/workspace. - -(b) Locally with an IDE like Cursor or VS Code, see - https://docs.databricks.com/vscode-ext. - -(c) With command line tools, see https://docs.databricks.com/dev-tools/cli/databricks-cli.html - - -Dependencies for this project should be installed using uv: - -* Make sure you have the UV package manager installed. - It's an alternative to tools like pip: https://docs.astral.sh/uv/getting-started/installation/. -* Run `uv sync --dev` to install the project's dependencies. - -# Using this project using the CLI - -The Databricks workspace and IDE extensions provide a graphical interface for working -with this project. It's also possible to interact with it directly using the CLI: - -1. Authenticate to your Databricks workspace, if you have not done so already: - ``` - $ databricks configure - ``` - -2. To deploy a development copy of this project, type: - ``` - $ databricks bundle deploy --target dev - ``` - (Note that "dev" is the default target, so the `--target` parameter - is optional here.) - - This deploys everything that's defined for this project. - For example, the default template would deploy a job called - `[dev yourname] my_default_python_job` to your workspace. - You can find that job by opening your workpace and clicking on **Jobs & Pipelines**. - -3. Similarly, to deploy a production copy, type: - ``` - $ databricks bundle deploy --target prod - ``` - - Note that the default job from the template has a schedule that runs every day - (defined in resources/my_default_python.job.yml). The schedule - is paused when deploying in development mode (see - https://docs.databricks.com/dev-tools/bundles/deployment-modes.html). - -4. To run a job or pipeline, use the "run" command: - ``` - $ databricks bundle run - ``` - -5. Finally, to run tests locally, use `pytest`: - ``` - $ uv run pytest - ``` diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/conftest.py b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/conftest.py deleted file mode 100644 index cf1d0978b8..0000000000 --- a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/conftest.py +++ /dev/null @@ -1,76 +0,0 @@ -"""This file configures pytest. - -This file is in the root since it can be used for tests in any place in this -project, including tests under resources/. -""" - -import os, sys, pathlib -from contextlib import contextmanager - - -try: - from databricks.connect import DatabricksSession - from databricks.sdk import WorkspaceClient - from pyspark.sql import SparkSession - import pytest -except ImportError: - raise ImportError("Test dependencies not found.\n\nRun tests using 'uv run pytest'. See http://docs.astral.sh/uv to learn more about uv.") - - -def add_all_resources_to_sys_path(): - """Add all resources/* directories to sys.path for module discovery.""" - resources = pathlib.Path(__file__).with_name("resources") - resource_dirs = filter(pathlib.Path.is_dir, resources.iterdir()) - seen: dict[str, pathlib.Path] = {} - for resource in resource_dirs: - sys.path.append(str(resource.resolve())) - for py in resource.rglob("*.py"): - mod = ".".join(py.relative_to(resource).with_suffix("").parts) - if mod in seen: - raise ImportError(f"Duplicate module '{mod}' found:\n {seen[mod]}\n {py}") - seen[mod] = py - - -def enable_fallback_compute(): - """Enable serverless compute if no compute is specified.""" - conf = WorkspaceClient().config - if conf.serverless_compute_id or conf.cluster_id or os.environ.get("SPARK_REMOTE"): - return - - url = "https://docs.databricks.com/dev-tools/databricks-connect/cluster-config" - print("☁️ no compute specified, falling back to serverless compute", file=sys.stderr) - print(f" see {url} for manual configuration", file=sys.stdout) - - os.environ["DATABRICKS_SERVERLESS_COMPUTE_ID"] = "auto" - - -@contextmanager -def allow_stderr_output(config: pytest.Config): - """Temporarily disable pytest output capture.""" - capman = config.pluginmanager.get_plugin("capturemanager") - if capman: - with capman.global_and_fixture_disabled(): - yield - else: - yield - - -def pytest_configure(config: pytest.Config): - """Configure pytest session.""" - with allow_stderr_output(config): - add_all_resources_to_sys_path() - enable_fallback_compute() - - # Initialize Spark session eagerly, so it is available even when - # SparkSession.builder.getOrCreate() is used. For DB Connect 15+, - # we validate version compatibility with the remote cluster. - if hasattr(DatabricksSession.builder, "validateSession"): - DatabricksSession.builder.validateSession().getOrCreate() - else: - DatabricksSession.builder.getOrCreate() - - -@pytest.fixture(scope="session") -def spark() -> SparkSession: - """Provide a SparkSession fixture for tests.""" - return DatabricksSession.builder.getOrCreate() diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/databricks.yml b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/databricks.yml deleted file mode 100644 index bdbe7080bc..0000000000 --- a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/databricks.yml +++ /dev/null @@ -1,35 +0,0 @@ -# This is a Databricks asset bundle definition for my_default_python. -# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. -bundle: - name: my_default_python - uuid: [UUID] - -artifacts: - python_artifact: - type: whl - build: uv build --wheel - -include: - - resources/*.yml - - resources/*/*.yml - -targets: - dev: - # The default target uses 'mode: development' to create a development copy. - # - Deployed resources get prefixed with '[dev my_user_name]' - # - Any job schedules and triggers are paused by default. - # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. - mode: development - default: true - workspace: - host: [DATABRICKS_URL] - - prod: - mode: production - workspace: - host: [DATABRICKS_URL] - # We explicitly deploy to /Workspace/Users/[USERNAME] to make sure we only have a single copy. - root_path: /Workspace/Users/[USERNAME]/.bundle/${bundle.name}/${bundle.target} - permissions: - - user_name: [USERNAME] - level: CAN_MANAGE diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/fixtures/.gitkeep b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/fixtures/.gitkeep deleted file mode 100644 index fa25d2745e..0000000000 --- a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/fixtures/.gitkeep +++ /dev/null @@ -1,22 +0,0 @@ -# Fixtures - -This folder is reserved for fixtures, such as CSV files. - -Below is an example of how to load fixtures as a data frame: - -``` -import pandas as pd -import os - -def get_absolute_path(*relative_parts): - if 'dbutils' in globals(): - base_dir = os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) # type: ignore - path = os.path.normpath(os.path.join(base_dir, *relative_parts)) - return path if path.startswith("/Workspace") else "/Workspace" + path - else: - return os.path.join(*relative_parts) - -csv_file = get_absolute_path("..", "fixtures", "mycsv.csv") -df = pd.read_csv(csv_file) -display(df) -``` diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/out.gitignore b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/out.gitignore deleted file mode 100644 index 0dab7f4995..0000000000 --- a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/out.gitignore +++ /dev/null @@ -1,8 +0,0 @@ -.databricks/ -build/ -dist/ -__pycache__/ -*.egg-info -.venv/ -scratch/** -!scratch/README.md diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/pyproject.toml b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/pyproject.toml deleted file mode 100644 index ef43b9429f..0000000000 --- a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/pyproject.toml +++ /dev/null @@ -1,35 +0,0 @@ -[project] -name = "my_default_python" -version = "0.0.1" -authors = [{ name = "[USERNAME]" }] -requires-python = ">=3.10,<=3.13" - -[dependency-groups] -dev = [ - "pytest", - - # Code completion support for Lakeflow Declarative Pipelines, also install databricks-connect - "databricks-dlt", - - # databricks-connect can be used to run parts of this project locally. - # Note that for local development, you should use a version that is not newer - # than the remote cluster or serverless compute you connect to. - # See also https://docs.databricks.com/dev-tools/databricks-connect.html. - "databricks-connect>=15.4,<15.5", -] - -[tool.pytest.ini_options] -pythonpath = "src" -testpaths = [ - "tests", -] - -[build-system] -requires = ["hatchling"] -build-backend = "hatchling.build" - -[tool.hatch.build.targets.wheel] -packages = ["src/my_default_python"] - -[project.scripts] -main = "my_default_python.main:main" diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/resources/my_default_python.job.yml b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/resources/my_default_python.job.yml deleted file mode 100644 index df74a62185..0000000000 --- a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/resources/my_default_python.job.yml +++ /dev/null @@ -1,45 +0,0 @@ -# The main job for my_default_python. -resources: - jobs: - my_default_python_job: - name: my_default_python_job - - trigger: - # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger - periodic: - interval: 1 - unit: DAYS - - #email_notifications: - # on_failure: - # - your_email@example.com - - tasks: - - task_key: notebook_task - notebook_task: - notebook_path: ../src/notebook.ipynb - - - task_key: refresh_pipeline - depends_on: - - task_key: notebook_task - pipeline_task: - pipeline_id: ${resources.pipelines.my_default_python_pipeline.id} - - - task_key: main_task - depends_on: - - task_key: refresh_pipeline - environment_key: default - python_wheel_task: - package_name: my_default_python - entry_point: main - - # A list of task execution environment specifications that can be referenced by tasks of this job. - environments: - - environment_key: default - - # Full documentation of this spec can be found at: - # https://docs.databricks.com/api/workspace/jobs/create#environments-spec - spec: - client: "2" - dependencies: - - ../dist/*.whl diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/resources/my_default_python.pipeline.yml b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/resources/my_default_python.pipeline.yml deleted file mode 100644 index 545a5ce556..0000000000 --- a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/resources/my_default_python.pipeline.yml +++ /dev/null @@ -1,15 +0,0 @@ -# The main pipeline for my_default_python -resources: - pipelines: - my_default_python_pipeline: - name: my_default_python_pipeline - ## Catalog is required for serverless compute - catalog: main - schema: my_default_python_${bundle.target} - serverless: true - libraries: - - notebook: - path: ../src/pipeline.ipynb - - configuration: - bundle.sourcePath: ${workspace.file_path}/src diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/scratch/README.md b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/scratch/README.md deleted file mode 100644 index e6cfb81b46..0000000000 --- a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/scratch/README.md +++ /dev/null @@ -1,4 +0,0 @@ -# scratch - -This folder is reserved for personal, exploratory notebooks. -By default these are not committed to Git, as 'scratch' is listed in .gitignore. diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/scratch/exploration.ipynb b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/scratch/exploration.ipynb deleted file mode 100644 index 3f589fed74..0000000000 --- a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/scratch/exploration.ipynb +++ /dev/null @@ -1,61 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": { - "byteLimit": 2048000, - "rowLimit": 10000 - }, - "inputWidgets": {}, - "nuid": "[UUID]", - "showTitle": false, - "title": "" - } - }, - "outputs": [], - "source": [ - "import sys\n", - "\n", - "sys.path.append(\"../src\")\n", - "from my_default_python import main\n", - "\n", - "main.get_taxis().show(10)" - ] - } - ], - "metadata": { - "application/vnd.databricks.v1+notebook": { - "dashboards": [], - "language": "python", - "notebookMetadata": { - "pythonIndentUnit": 2 - }, - "notebookName": "ipynb-notebook", - "widgets": {} - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3.11.4" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/my_default_python/__init__.py b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/my_default_python/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/my_default_python/main.py b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/my_default_python/main.py deleted file mode 100644 index 04e8be4de0..0000000000 --- a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/my_default_python/main.py +++ /dev/null @@ -1,14 +0,0 @@ -from databricks.sdk.runtime import spark -from pyspark.sql import DataFrame - - -def find_all_taxis() -> DataFrame: - return spark.read.table("samples.nyctaxi.trips") - - -def main(): - find_all_taxis().show(5) - - -if __name__ == "__main__": - main() diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/notebook.ipynb b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/notebook.ipynb deleted file mode 100644 index 27c3f19e34..0000000000 --- a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/notebook.ipynb +++ /dev/null @@ -1,75 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": {}, - "inputWidgets": {}, - "nuid": "[UUID]", - "showTitle": false, - "title": "" - } - }, - "source": [ - "# Default notebook\n", - "\n", - "This default notebook is executed using Databricks Workflows as defined in resources/my_default_python.job.yml." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": { - "byteLimit": 2048000, - "rowLimit": 10000 - }, - "inputWidgets": {}, - "nuid": "[UUID]", - "showTitle": false, - "title": "" - } - }, - "outputs": [], - "source": [ - "from my_default_python import main\n", - "\n", - "main.find_all_taxis().show(10)" - ] - } - ], - "metadata": { - "application/vnd.databricks.v1+notebook": { - "dashboards": [], - "language": "python", - "notebookMetadata": { - "pythonIndentUnit": 2 - }, - "notebookName": "notebook", - "widgets": {} - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3.11.4" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/pipeline.ipynb b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/pipeline.ipynb deleted file mode 100644 index 21e8560105..0000000000 --- a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/pipeline.ipynb +++ /dev/null @@ -1,90 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": {}, - "inputWidgets": {}, - "nuid": "[UUID]", - "showTitle": false, - "title": "" - } - }, - "source": [ - "# Lakeflow Declarative Pipeline\n", - "\n", - "This Lakeflow Declarative Pipeline definition is executed using a pipeline defined in resources/my_default_python.pipeline.yml." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": {}, - "inputWidgets": {}, - "nuid": "[UUID]", - "showTitle": false, - "title": "" - } - }, - "outputs": [], - "source": [ - "# Import DLT and src/my_default_python\n", - "import dlt\n", - "import sys\n", - "\n", - "sys.path.append(spark.conf.get(\"bundle.sourcePath\", \".\"))\n", - "from pyspark.sql.functions import expr\n", - "from my_default_python import main" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": {}, - "inputWidgets": {}, - "nuid": "[UUID]", - "showTitle": false, - "title": "" - } - }, - "outputs": [], - "source": [ - "@dlt.view\n", - "def taxi_raw():\n", - " return main.find_all_taxis()\n", - "\n", - "\n", - "@dlt.table\n", - "def filtered_taxis():\n", - " return dlt.read(\"taxi_raw\").filter(expr(\"fare_amount < 30\"))" - ] - } - ], - "metadata": { - "application/vnd.databricks.v1+notebook": { - "dashboards": [], - "language": "python", - "notebookMetadata": { - "pythonIndentUnit": 2 - }, - "notebookName": "pipeline", - "widgets": {} - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3.11.4" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/tests/main_test.py b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/tests/main_test.py deleted file mode 100644 index 4bfd5e1550..0000000000 --- a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/tests/main_test.py +++ /dev/null @@ -1,6 +0,0 @@ -from my_default_python import main - - -def test_find_all_taxis(): - taxis = main.find_all_taxis() - assert taxis.count() > 5 diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/dlt_pipeline.ipynb b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/dlt_pipeline.ipynb index 9b1c66629e..a1ba11f720 100644 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/dlt_pipeline.ipynb +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/dlt_pipeline.ipynb @@ -14,7 +14,7 @@ "source": [ "# DLT pipeline\n", "\n", - "This Lakeflow Declarative Pipeline definition is executed using a pipeline defined in resources/my_jobs_as_code.pipeline.yml." + "This Delta Live Tables (DLT) definition is executed using a pipeline defined in resources/my_jobs_as_code.pipeline.yml." ] }, { diff --git a/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/.vscode/settings.json b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/.vscode/settings.json index 3e76d20bd8..f3be9a10ae 100644 --- a/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/.vscode/settings.json +++ b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/.vscode/settings.json @@ -1,19 +1,31 @@ { - "python.analysis.stubPath": ".vscode", - "databricks.python.envFile": "${workspaceFolder}/.env", "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])", "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------", "python.testing.pytestArgs": [ "." ], - "python.testing.unittestEnabled": false, - "python.testing.pytestEnabled": true, - "python.analysis.extraPaths": ["resources/my_lakeflow_pipelines_pipeline"], "files.exclude": { "**/*.egg-info": true, "**/__pycache__": true, ".pytest_cache": true, }, + + // Pylance settings (VS Code) + "python.analysis.extraPaths": ["src", "resources"], + "python.analysis.typeCheckingMode": "basic", + "python.analysis.diagnosticMode": "workspace", + "python.analysis.stubPath": ".vscode", + + // Pyright settings (Cursor) + "cursorpyright.analysis.extraPaths": ["src", "resources"], + "cursorpyright.analysis.typeCheckingMode": "basic", + "cursorpyright.analysis.diagnosticMode": "workspace", + "cursorpyright.analysis.stubPath": ".vscode", + + // General Python settings + "python.defaultInterpreterPath": "./.venv/bin/python", + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, "[python]": { "editor.defaultFormatter": "ms-python.black-formatter", "editor.formatOnSave": true, diff --git a/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/.vscode/settings.json b/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/.vscode/settings.json index 3e76d20bd8..f3be9a10ae 100644 --- a/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/.vscode/settings.json +++ b/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/.vscode/settings.json @@ -1,19 +1,31 @@ { - "python.analysis.stubPath": ".vscode", - "databricks.python.envFile": "${workspaceFolder}/.env", "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])", "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------", "python.testing.pytestArgs": [ "." ], - "python.testing.unittestEnabled": false, - "python.testing.pytestEnabled": true, - "python.analysis.extraPaths": ["resources/my_lakeflow_pipelines_pipeline"], "files.exclude": { "**/*.egg-info": true, "**/__pycache__": true, ".pytest_cache": true, }, + + // Pylance settings (VS Code) + "python.analysis.extraPaths": ["src", "resources"], + "python.analysis.typeCheckingMode": "basic", + "python.analysis.diagnosticMode": "workspace", + "python.analysis.stubPath": ".vscode", + + // Pyright settings (Cursor) + "cursorpyright.analysis.extraPaths": ["src", "resources"], + "cursorpyright.analysis.typeCheckingMode": "basic", + "cursorpyright.analysis.diagnosticMode": "workspace", + "cursorpyright.analysis.stubPath": ".vscode", + + // General Python settings + "python.defaultInterpreterPath": "./.venv/bin/python", + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, "[python]": { "editor.defaultFormatter": "ms-python.black-formatter", "editor.formatOnSave": true, diff --git a/acceptance/bundle/templates/telemetry/default-python/out.databricks.yml b/acceptance/bundle/templates/telemetry/default-python/out.databricks.yml deleted file mode 100644 index 687383d471..0000000000 --- a/acceptance/bundle/templates/telemetry/default-python/out.databricks.yml +++ /dev/null @@ -1,42 +0,0 @@ -# This is a Databricks asset bundle definition for my_default_python. -# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. -bundle: - name: my_default_python - uuid: [BUNDLE-UUID] - -artifacts: - python_artifact: - type: whl - build: uv build --wheel - -include: - - resources/*.yml - - resources/*/*.yml - -targets: - dev: - # The default target uses 'mode: development' to create a development copy. - # - Deployed resources get prefixed with '[dev my_user_name]' - # - Any job schedules and triggers are paused by default. - # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. - mode: development - default: true - workspace: - host: [DATABRICKS_URL] - - presets: - # Set dynamic_version: true on all artifacts of type "whl". - # This makes "bundle deploy" add a timestamp to wheel's version before uploading, - # new wheel takes over the previous installation even if actual wheel version is unchanged. - # See https://docs.databricks.com/aws/en/dev-tools/bundles/settings - artifacts_dynamic_version: true - - prod: - mode: production - workspace: - host: [DATABRICKS_URL] - # We explicitly deploy to /Workspace/Users/[USERNAME] to make sure we only have a single copy. - root_path: /Workspace/Users/[USERNAME]/.bundle/${bundle.name}/${bundle.target} - permissions: - - user_name: [USERNAME] - level: CAN_MANAGE diff --git a/acceptance/bundle/templates/telemetry/default-python/out.requests.txt b/acceptance/bundle/templates/telemetry/default-python/out.requests.txt deleted file mode 100644 index f36603307e..0000000000 --- a/acceptance/bundle/templates/telemetry/default-python/out.requests.txt +++ /dev/null @@ -1,34 +0,0 @@ -{ - "headers": { - "User-Agent": [ - "cli/[DEV_VERSION] databricks-sdk-go/[SDK_VERSION] go/[GO_VERSION] os/[OS] cmd/bundle_init cmd-exec-id/[CMD-EXEC-ID] auth/pat" - ] - }, - "method": "GET", - "path": "/api/2.0/preview/scim/v2/Me" -} -{ - "headers": { - "User-Agent": [ - "cli/[DEV_VERSION] databricks-sdk-go/[SDK_VERSION] go/[GO_VERSION] os/[OS] cmd/bundle_init cmd-exec-id/[CMD-EXEC-ID] auth/pat" - ] - }, - "method": "GET", - "path": "/api/2.1/unity-catalog/current-metastore-assignment" -} -{ - "headers": { - "User-Agent": [ - "cli/[DEV_VERSION] databricks-sdk-go/[SDK_VERSION] go/[GO_VERSION] os/[OS] cmd/bundle_init cmd-exec-id/[CMD-EXEC-ID] auth/pat" - ] - }, - "method": "POST", - "path": "/telemetry-ext", - "body": { - "uploadTime": [UNIX_TIME_MILLIS], - "items": [], - "protoLogs": [ - "{\"frontend_log_event_id\":\"[UUID]\",\"entry\":{\"databricks_cli_log\":{\"execution_context\":{\"cmd_exec_id\":\"[CMD-EXEC-ID]\",\"version\":\"[DEV_VERSION]\",\"command\":\"bundle_init\",\"operating_system\":\"[OS]\",\"execution_time_ms\":\"SMALL_INT\",\"exit_code\":0},\"bundle_init_event\":{\"bundle_uuid\":\"[BUNDLE-UUID]\",\"template_name\":\"default-python\",\"template_enum_args\":[{\"key\":\"include_dlt\",\"value\":\"no\"},{\"key\":\"include_notebook\",\"value\":\"yes\"},{\"key\":\"include_python\",\"value\":\"yes\"},{\"key\":\"serverless\",\"value\":\"no\"}]}}}}" - ] - } -} diff --git a/acceptance/bundle/templates/telemetry/default-python/output.txt b/acceptance/bundle/templates/telemetry/default-python/output.txt index 6124901bf9..add059599b 100644 --- a/acceptance/bundle/templates/telemetry/default-python/output.txt +++ b/acceptance/bundle/templates/telemetry/default-python/output.txt @@ -1,47 +1,5 @@ Welcome to the default Python template for Databricks Asset Bundles! -Workspace to use (auto-detected, edit in 'my_default_python/databricks.yml'): [DATABRICKS_URL] +Error: template: :1:2: executing "" at : error calling index: index out of range: 1 -✨ Your new project has been created in the 'my_default_python' directory! - -Please refer to the README.md file for "getting started" instructions. -See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html. - ->>> cat out.requests.txt -{ - "frontend_log_event_id": "[UUID]", - "entry": { - "databricks_cli_log": { - "execution_context": { - "cmd_exec_id": "[CMD-EXEC-ID]", - "version": "[DEV_VERSION]", - "command": "bundle_init", - "operating_system": "[OS]", - "execution_time_ms": SMALL_INT, - "exit_code": 0 - }, - "bundle_init_event": { - "bundle_uuid": "[BUNDLE-UUID]", - "template_name": "default-python", - "template_enum_args": [ - { - "key": "include_dlt", - "value": "no" - }, - { - "key": "include_notebook", - "value": "yes" - }, - { - "key": "include_python", - "value": "yes" - }, - { - "key": "serverless", - "value": "no" - } - ] - } - } - } -} +Exit code: 1 diff --git a/acceptance/pipelines/e2e/output.txt b/acceptance/pipelines/e2e/output.txt index 1274b5e96f..8325c5ff12 100644 --- a/acceptance/pipelines/e2e/output.txt +++ b/acceptance/pipelines/e2e/output.txt @@ -14,87 +14,10 @@ Refer to the README.md file for "getting started" instructions! >>> [PIPELINES] deploy Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/my_project/dev/files... Deploying resources... -Updating deployment state... Deployment complete! -View your pipeline my_project_pipeline here: [DATABRICKS_URL]/pipelines/[UUID]?o=[NUMID] === Run pipeline >>> [PIPELINES] run -Update URL: [DATABRICKS_URL]/#joblist/pipelines/[UUID]/updates/[UUID] +Error: expected a KEY of the resource to run -Update ID: [UUID] - -Update for pipeline completed successfully. - -Pipeline configurations for this update: -• All tables are refreshed - -=== Edit project by creating and running a new second pipeline ->>> [PIPELINES] deploy -Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/my_project/dev/files... -Deploying resources... -Updating deployment state... -Deployment complete! -View your pipeline my_project_pipeline here: [DATABRICKS_URL]/pipelines/[UUID]?o=[NUMID] -View your pipeline my_project_pipeline_2 here: [DATABRICKS_URL]/pipelines/[UUID]?o=[NUMID] - -=== Assert the second pipeline is created ->>> [CLI] pipelines get [UUID] -{ - "creator_user_name":"[USERNAME]", - "last_modified":[UNIX_TIME_MILLIS], - "name":"[dev [USERNAME]] my_project_pipeline_2", - "pipeline_id":"[UUID]", - "run_as_user_name":"[USERNAME]", - "spec": { - "channel":"CURRENT", - "deployment": { - "kind":"BUNDLE", - "metadata_file_path":"/Workspace/Users/[USERNAME]/.bundle/my_project/dev/state/metadata.json" - }, - "development":true, - "edition":"ADVANCED", - "id":"[UUID]", - "name":"[dev [USERNAME]] my_project_pipeline_2", - "storage":"dbfs:/pipelines/[UUID]", - "tags": { - "dev":"[USERNAME]" - } - }, - "state":"IDLE" -} - ->>> [PIPELINES] run my_project_pipeline_2 -Update URL: [DATABRICKS_URL]/#joblist/pipelines/[UUID]/updates/[UUID] - -Update ID: [UUID] - -Update for pipeline completed successfully. - -Pipeline configurations for this update: -• All tables are refreshed - -=== Stop both pipelines before destroy ->>> [PIPELINES] stop my_project_pipeline -Stopping my_project_pipeline... -my_project_pipeline has been stopped. - ->>> [PIPELINES] stop my_project_pipeline_2 -Stopping my_project_pipeline_2... -my_project_pipeline_2 has been stopped. - -=== Destroy project ->>> [PIPELINES] destroy --auto-approve -The following resources will be deleted: - delete pipeline my_project_pipeline - delete pipeline my_project_pipeline_2 - -This action will result in the deletion of the following Lakeflow Declarative Pipelines along with the -Streaming Tables (STs) and Materialized Views (MVs) managed by them: - delete pipeline my_project_pipeline - delete pipeline my_project_pipeline_2 - -All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/my_project/dev - -Deleting files... -Destroy complete! +Exit code: 1 diff --git a/acceptance/pipelines/e2e/output/my_project/out.gitignore b/acceptance/pipelines/e2e/output/my_project/.gitignore similarity index 100% rename from acceptance/pipelines/e2e/output/my_project/out.gitignore rename to acceptance/pipelines/e2e/output/my_project/.gitignore diff --git a/acceptance/pipelines/e2e/output/my_project/README.md b/acceptance/pipelines/e2e/output/my_project/README.md index 88914e1e36..48def0c4df 100644 --- a/acceptance/pipelines/e2e/output/my_project/README.md +++ b/acceptance/pipelines/e2e/output/my_project/README.md @@ -13,30 +13,13 @@ The 'my_project' project was generated by using the CLI Pipelines template. 3. Authenticate to your Databricks workspace, if you have not done so already: ``` - $ pipelines auth login + $ databricks auth login ``` 4. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from https://docs.databricks.com/dev-tools/vscode-ext.html. Or the PyCharm plugin from https://www.databricks.com/blog/announcing-pycharm-integration-databricks. -## Pipeline Structure - -This folder defines all source code for the my_project_pipeline pipeline: - -- `explorations`: Ad-hoc notebooks used to explore the data processed by this pipeline. -- `transformations`: All dataset definitions and transformations. -- `utilities` (optional): Utility functions and Python modules used in this pipeline. - -## Getting Started - -To get started, go to the `transformations` folder -- most of the relevant source code lives there: - -* By convention, every dataset under `transformations` is in a separate file. -* Take a look at the sample under "sample_trips_my_project.py" to get familiar with the syntax. - Read more about the syntax at https://docs.databricks.com/dlt/python-ref.html. - -For more tutorials and reference material, see https://docs.databricks.com/dlt. ## Deploying pipelines @@ -47,11 +30,6 @@ For more tutorials and reference material, see https://docs.databricks.com/dlt. (Note that "dev" is the default target, so the `--target` parameter is optional here.) - This deploys everything that's defined for this project. - For example, the default template would deploy a pipeline called - `[dev yourname] my_project_pipeline` to your workspace. - You can find that pipeline by opening your workpace and clicking on **Jobs & Pipelines**. - 2. Similarly, to deploy a production copy, type: ``` $ pipelines deploy --target prod diff --git a/acceptance/pipelines/e2e/output/my_project/databricks.yml b/acceptance/pipelines/e2e/output/my_project/databricks.yml index 871656882c..0f2cecce23 100644 --- a/acceptance/pipelines/e2e/output/my_project/databricks.yml +++ b/acceptance/pipelines/e2e/output/my_project/databricks.yml @@ -7,7 +7,7 @@ bundle: include: - resources/*.yml - resources/*/*.yml - - ./*.yml + - my_project_pipeline/*.yml # Variable declarations. These variables are assigned in the dev/prod targets below. variables: diff --git a/acceptance/pipelines/e2e/output/my_project/my_project_pipeline_2.pipeline.yml b/acceptance/pipelines/e2e/output/my_project/my_project_pipeline_2.pipeline.yml deleted file mode 100644 index c2a2f17887..0000000000 --- a/acceptance/pipelines/e2e/output/my_project/my_project_pipeline_2.pipeline.yml +++ /dev/null @@ -1,4 +0,0 @@ -resources: - pipelines: - my_project_pipeline_2: - name: my_project_pipeline_2 diff --git a/acceptance/pipelines/e2e/output/my_project/transformations/sample_trips_my_project.sql b/acceptance/pipelines/e2e/output/my_project/transformations/sample_trips_my_project.sql new file mode 100644 index 0000000000..a65d9b0cb8 --- /dev/null +++ b/acceptance/pipelines/e2e/output/my_project/transformations/sample_trips_my_project.sql @@ -0,0 +1,9 @@ +-- This file defines a sample transformation. +-- Edit the sample below or add new transformations +-- using "+ Add" in the file browser. + +CREATE MATERIALIZED VIEW sample_trips_my_project AS +SELECT + pickup_zip, + fare_amount +FROM samples.nyctaxi.trips diff --git a/acceptance/pipelines/e2e/output/my_project/transformations/sample_zones_my_project.sql b/acceptance/pipelines/e2e/output/my_project/transformations/sample_zones_my_project.sql new file mode 100644 index 0000000000..28785fc619 --- /dev/null +++ b/acceptance/pipelines/e2e/output/my_project/transformations/sample_zones_my_project.sql @@ -0,0 +1,10 @@ +-- This file defines a sample transformation. +-- Edit the sample below or add new transformations +-- using "+ Add" in the file browser. + +CREATE MATERIALIZED VIEW sample_zones_my_project AS +SELECT + pickup_zip, + SUM(fare_amount) AS total_fare +FROM sample_trips_my_project +GROUP BY pickup_zip diff --git a/acceptance/pipelines/init/error-cases/output/my_project/README.md b/acceptance/pipelines/init/error-cases/output/my_project/README.md index 88914e1e36..48def0c4df 100644 --- a/acceptance/pipelines/init/error-cases/output/my_project/README.md +++ b/acceptance/pipelines/init/error-cases/output/my_project/README.md @@ -13,30 +13,13 @@ The 'my_project' project was generated by using the CLI Pipelines template. 3. Authenticate to your Databricks workspace, if you have not done so already: ``` - $ pipelines auth login + $ databricks auth login ``` 4. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from https://docs.databricks.com/dev-tools/vscode-ext.html. Or the PyCharm plugin from https://www.databricks.com/blog/announcing-pycharm-integration-databricks. -## Pipeline Structure - -This folder defines all source code for the my_project_pipeline pipeline: - -- `explorations`: Ad-hoc notebooks used to explore the data processed by this pipeline. -- `transformations`: All dataset definitions and transformations. -- `utilities` (optional): Utility functions and Python modules used in this pipeline. - -## Getting Started - -To get started, go to the `transformations` folder -- most of the relevant source code lives there: - -* By convention, every dataset under `transformations` is in a separate file. -* Take a look at the sample under "sample_trips_my_project.py" to get familiar with the syntax. - Read more about the syntax at https://docs.databricks.com/dlt/python-ref.html. - -For more tutorials and reference material, see https://docs.databricks.com/dlt. ## Deploying pipelines @@ -47,11 +30,6 @@ For more tutorials and reference material, see https://docs.databricks.com/dlt. (Note that "dev" is the default target, so the `--target` parameter is optional here.) - This deploys everything that's defined for this project. - For example, the default template would deploy a pipeline called - `[dev yourname] my_project_pipeline` to your workspace. - You can find that pipeline by opening your workpace and clicking on **Jobs & Pipelines**. - 2. Similarly, to deploy a production copy, type: ``` $ pipelines deploy --target prod diff --git a/acceptance/pipelines/init/error-cases/output/my_project/databricks.yml b/acceptance/pipelines/init/error-cases/output/my_project/databricks.yml index 871656882c..0f2cecce23 100644 --- a/acceptance/pipelines/init/error-cases/output/my_project/databricks.yml +++ b/acceptance/pipelines/init/error-cases/output/my_project/databricks.yml @@ -7,7 +7,7 @@ bundle: include: - resources/*.yml - resources/*/*.yml - - ./*.yml + - my_project_pipeline/*.yml # Variable declarations. These variables are assigned in the dev/prod targets below. variables: diff --git a/acceptance/pipelines/init/error-cases/output/my_project/transformations/sample_trips_my_project.sql b/acceptance/pipelines/init/error-cases/output/my_project/transformations/sample_trips_my_project.sql new file mode 100644 index 0000000000..a65d9b0cb8 --- /dev/null +++ b/acceptance/pipelines/init/error-cases/output/my_project/transformations/sample_trips_my_project.sql @@ -0,0 +1,9 @@ +-- This file defines a sample transformation. +-- Edit the sample below or add new transformations +-- using "+ Add" in the file browser. + +CREATE MATERIALIZED VIEW sample_trips_my_project AS +SELECT + pickup_zip, + fare_amount +FROM samples.nyctaxi.trips diff --git a/acceptance/pipelines/init/error-cases/output/my_project/transformations/sample_zones_my_project.sql b/acceptance/pipelines/init/error-cases/output/my_project/transformations/sample_zones_my_project.sql new file mode 100644 index 0000000000..28785fc619 --- /dev/null +++ b/acceptance/pipelines/init/error-cases/output/my_project/transformations/sample_zones_my_project.sql @@ -0,0 +1,10 @@ +-- This file defines a sample transformation. +-- Edit the sample below or add new transformations +-- using "+ Add" in the file browser. + +CREATE MATERIALIZED VIEW sample_zones_my_project AS +SELECT + pickup_zip, + SUM(fare_amount) AS total_fare +FROM sample_trips_my_project +GROUP BY pickup_zip diff --git a/acceptance/pipelines/init/python/output/my_python_project/README.md b/acceptance/pipelines/init/python/output/my_python_project/README.md index 5c87ad38c9..02ff63426f 100644 --- a/acceptance/pipelines/init/python/output/my_python_project/README.md +++ b/acceptance/pipelines/init/python/output/my_python_project/README.md @@ -13,30 +13,13 @@ The 'my_python_project' project was generated by using the CLI Pipelines templat 3. Authenticate to your Databricks workspace, if you have not done so already: ``` - $ pipelines auth login + $ databricks auth login ``` 4. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from https://docs.databricks.com/dev-tools/vscode-ext.html. Or the PyCharm plugin from https://www.databricks.com/blog/announcing-pycharm-integration-databricks. -## Pipeline Structure - -This folder defines all source code for the my_python_project_pipeline pipeline: - -- `explorations`: Ad-hoc notebooks used to explore the data processed by this pipeline. -- `transformations`: All dataset definitions and transformations. -- `utilities` (optional): Utility functions and Python modules used in this pipeline. - -## Getting Started - -To get started, go to the `transformations` folder -- most of the relevant source code lives there: - -* By convention, every dataset under `transformations` is in a separate file. -* Take a look at the sample under "sample_trips_my_python_project.py" to get familiar with the syntax. - Read more about the syntax at https://docs.databricks.com/dlt/python-ref.html. - -For more tutorials and reference material, see https://docs.databricks.com/dlt. ## Deploying pipelines @@ -47,11 +30,6 @@ For more tutorials and reference material, see https://docs.databricks.com/dlt. (Note that "dev" is the default target, so the `--target` parameter is optional here.) - This deploys everything that's defined for this project. - For example, the default template would deploy a pipeline called - `[dev yourname] my_python_project_pipeline` to your workspace. - You can find that pipeline by opening your workpace and clicking on **Jobs & Pipelines**. - 2. Similarly, to deploy a production copy, type: ``` $ pipelines deploy --target prod diff --git a/acceptance/pipelines/init/python/output/my_python_project/databricks.yml b/acceptance/pipelines/init/python/output/my_python_project/databricks.yml index f9b7ef40de..6116fc783b 100644 --- a/acceptance/pipelines/init/python/output/my_python_project/databricks.yml +++ b/acceptance/pipelines/init/python/output/my_python_project/databricks.yml @@ -7,7 +7,7 @@ bundle: include: - resources/*.yml - resources/*/*.yml - - ./*.yml + - my_python_project_pipeline/*.yml # Variable declarations. These variables are assigned in the dev/prod targets below. variables: diff --git a/acceptance/pipelines/init/python/output/my_python_project/transformations/sample_trips_my_python_project.sql b/acceptance/pipelines/init/python/output/my_python_project/transformations/sample_trips_my_python_project.sql new file mode 100644 index 0000000000..7ddb904a3d --- /dev/null +++ b/acceptance/pipelines/init/python/output/my_python_project/transformations/sample_trips_my_python_project.sql @@ -0,0 +1,9 @@ +-- This file defines a sample transformation. +-- Edit the sample below or add new transformations +-- using "+ Add" in the file browser. + +CREATE MATERIALIZED VIEW sample_trips_my_python_project AS +SELECT + pickup_zip, + fare_amount +FROM samples.nyctaxi.trips diff --git a/acceptance/pipelines/init/python/output/my_python_project/transformations/sample_zones_my_python_project.sql b/acceptance/pipelines/init/python/output/my_python_project/transformations/sample_zones_my_python_project.sql new file mode 100644 index 0000000000..b2b496dde0 --- /dev/null +++ b/acceptance/pipelines/init/python/output/my_python_project/transformations/sample_zones_my_python_project.sql @@ -0,0 +1,10 @@ +-- This file defines a sample transformation. +-- Edit the sample below or add new transformations +-- using "+ Add" in the file browser. + +CREATE MATERIALIZED VIEW sample_zones_my_python_project AS +SELECT + pickup_zip, + SUM(fare_amount) AS total_fare +FROM sample_trips_my_python_project +GROUP BY pickup_zip diff --git a/acceptance/pipelines/init/sql/output/my_sql_project/README.md b/acceptance/pipelines/init/sql/output/my_sql_project/README.md index fa7a8d3307..b04732c41a 100644 --- a/acceptance/pipelines/init/sql/output/my_sql_project/README.md +++ b/acceptance/pipelines/init/sql/output/my_sql_project/README.md @@ -13,29 +13,13 @@ The 'my_sql_project' project was generated by using the CLI Pipelines template. 3. Authenticate to your Databricks workspace, if you have not done so already: ``` - $ pipelines auth login + $ databricks auth login ``` 4. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from https://docs.databricks.com/dev-tools/vscode-ext.html. Or the PyCharm plugin from https://www.databricks.com/blog/announcing-pycharm-integration-databricks. -## Pipeline Structure - -This folder defines all source code for the my_sql_project_pipeline pipeline: - -- `explorations`: Ad-hoc notebooks used to explore the data processed by this pipeline. -- `transformations`: All dataset definitions and transformations. - -## Getting Started - -To get started, go to the `transformations` folder -- most of the relevant source code lives there: - -* By convention, every dataset under `transformations` is in a separate file. -* Take a look at the sample under "sample_trips_my_sql_project.sql" to get familiar with the syntax. - Read more about the syntax at https://docs.databricks.com/dlt/sql-ref.html. - -For more tutorials and reference material, see https://docs.databricks.com/dlt. ## Deploying pipelines @@ -46,11 +30,6 @@ For more tutorials and reference material, see https://docs.databricks.com/dlt. (Note that "dev" is the default target, so the `--target` parameter is optional here.) - This deploys everything that's defined for this project. - For example, the default template would deploy a pipeline called - `[dev yourname] my_sql_project_pipeline` to your workspace. - You can find that pipeline by opening your workpace and clicking on **Jobs & Pipelines**. - 2. Similarly, to deploy a production copy, type: ``` $ pipelines deploy --target prod diff --git a/acceptance/pipelines/init/sql/output/my_sql_project/databricks.yml b/acceptance/pipelines/init/sql/output/my_sql_project/databricks.yml index fc415f32d4..28cf4723af 100644 --- a/acceptance/pipelines/init/sql/output/my_sql_project/databricks.yml +++ b/acceptance/pipelines/init/sql/output/my_sql_project/databricks.yml @@ -7,7 +7,7 @@ bundle: include: - resources/*.yml - resources/*/*.yml - - ./*.yml + - my_sql_project_pipeline/*.yml # Variable declarations. These variables are assigned in the dev/prod targets below. variables: @@ -28,7 +28,7 @@ targets: host: [DATABRICKS_URL] variables: catalog: main - schema: shared_dev + schema: shared notifications: [] prod: diff --git a/acceptance/pipelines/init/sql/output/my_sql_project/explorations/sample_exploration.ipynb b/acceptance/pipelines/init/sql/output/my_sql_project/explorations/sample_exploration.ipynb index deee8395ea..c61ec69bc6 100644 --- a/acceptance/pipelines/init/sql/output/my_sql_project/explorations/sample_exploration.ipynb +++ b/acceptance/pipelines/init/sql/output/my_sql_project/explorations/sample_exploration.ipynb @@ -38,7 +38,7 @@ "-- !!! Before performing any data analysis, make sure to run the pipeline to materialize the sample datasets. The tables referenced in this notebook depend on that step.\n", "\n", "USE CATALOG `main`;\n", - "USE SCHEMA `shared_dev`;\n", + "USE SCHEMA `shared`;\n", "\n", "SELECT * from my_sql_project;" ] diff --git a/acceptance/pipelines/init/sql/output/my_sql_project/transformations/sample_trips_my_sql_project.py b/acceptance/pipelines/init/sql/output/my_sql_project/transformations/sample_trips_my_sql_project.py new file mode 100644 index 0000000000..ca10e69400 --- /dev/null +++ b/acceptance/pipelines/init/sql/output/my_sql_project/transformations/sample_trips_my_sql_project.py @@ -0,0 +1,13 @@ +import dlt +from pyspark.sql.functions import col +from utilities import utils + + +# This file defines a sample transformation. +# Edit the sample below or add new transformations +# using "+ Add" in the file browser. + + +@dlt.table +def sample_trips_my_sql_project(): + return spark.read.table("samples.nyctaxi.trips").withColumn("trip_distance_km", utils.distance_km(col("trip_distance"))) diff --git a/acceptance/pipelines/init/sql/output/my_sql_project/transformations/sample_zones_my_sql_project.py b/acceptance/pipelines/init/sql/output/my_sql_project/transformations/sample_zones_my_sql_project.py new file mode 100644 index 0000000000..167fc6cb81 --- /dev/null +++ b/acceptance/pipelines/init/sql/output/my_sql_project/transformations/sample_zones_my_sql_project.py @@ -0,0 +1,13 @@ +import dlt +from pyspark.sql.functions import col, sum + + +# This file defines a sample transformation. +# Edit the sample below or add new transformations +# using "+ Add" in the file browser. + + +@dlt.table +def sample_zones_my_sql_project(): + # Read from the "sample_trips" table, then sum all the fares + return spark.read.table("sample_trips_my_sql_project").groupBy(col("pickup_zip")).agg(sum("fare_amount").alias("total_fare")) diff --git a/acceptance/pipelines/init/sql/output/my_sql_project/utilities/utils.py b/acceptance/pipelines/init/sql/output/my_sql_project/utilities/utils.py new file mode 100644 index 0000000000..ff039898f0 --- /dev/null +++ b/acceptance/pipelines/init/sql/output/my_sql_project/utilities/utils.py @@ -0,0 +1,8 @@ +from pyspark.sql.functions import udf +from pyspark.sql.types import FloatType + + +@udf(returnType=FloatType()) +def distance_km(distance_miles): + """Convert distance from miles to kilometers (1 mile = 1.60934 km).""" + return distance_miles * 1.60934 diff --git a/libs/template/helpers.go b/libs/template/helpers.go index 27bb0d0432..914aa5be50 100644 --- a/libs/template/helpers.go +++ b/libs/template/helpers.go @@ -8,9 +8,8 @@ import ( "net/url" "os" "regexp" - "slices" - "strings" "text/template" + "time" "github.com/databricks/cli/libs/cmdctx" "github.com/databricks/cli/libs/iamutil" @@ -39,12 +38,6 @@ var ( cachedCatalog *string ) -var metastoreDisabledErrorCodes = []string{ - "PERMISSION_DENIED", - "METASTORE_DOES_NOT_EXIST", // Default metastore is not assigned to the workspace. - "FEATURE_DISABLED", // Unity Catalog is not available for feature tier STANDARD_TIER. -} - // UUID that is stable for the duration of the template execution. This can be used // to populate the `bundle.uuid` field in databricks.yml by template authors. // @@ -96,6 +89,10 @@ func loadHelpers(ctx context.Context) template.FuncMap { } return result }, + "short_date_time": func() string { + now := time.Now() + return fmt.Sprintf("%s_%02d_%02d%02d", now.Format("jan"), now.Day(), now.Hour(), now.Minute()) + }, // Get smallest node type (follows Terraform's GetSmallestNodeType) "smallest_node_type": func() (string, error) { if w.Config.Host == "" { @@ -148,7 +145,7 @@ func loadHelpers(ctx context.Context) template.FuncMap { metastore, err := w.Metastores.Current(ctx) if err != nil { var aerr *apierr.APIError - if errors.As(err, &aerr) && slices.Contains(metastoreDisabledErrorCodes, aerr.ErrorCode) { + if errors.As(err, &aerr) && (aerr.ErrorCode == "PERMISSION_DENIED" || aerr.ErrorCode == "METASTORE_DOES_NOT_EXIST") { // Ignore: access denied or workspace doesn't have a metastore assigned empty_default := "" cachedCatalog = &empty_default @@ -175,11 +172,5 @@ func loadHelpers(ctx context.Context) template.FuncMap { cachedIsServicePrincipal = &result return result, nil }, - "lower": func(s string) string { - return strings.ToLower(s) - }, - "upper": func(s string) string { - return strings.ToUpper(s) - }, } } diff --git a/libs/template/helpers_test.go b/libs/template/helpers_test.go index 36d0e1cc5b..1d7d3a5108 100644 --- a/libs/template/helpers_test.go +++ b/libs/template/helpers_test.go @@ -105,6 +105,22 @@ func TestTemplateUuidFunction(t *testing.T) { assert.Regexp(t, "^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$", uuid) } +func TestTemplateReplaceFunction(t *testing.T) { + ctx := context.Background() + + ctx = cmdctx.SetWorkspaceClient(ctx, nil) + helpers := loadHelpers(ctx) + r, err := newRenderer(ctx, nil, helpers, os.DirFS("."), "./testdata/replace/template", "./testdata/replace/library") + require.NoError(t, err) + + err = r.walk() + assert.NoError(t, err) + + assert.Len(t, r.files, 1) + content := strings.TrimSpace(string(r.files[0].(*inMemoryFile).content)) + assert.Equal(t, "my", content) +} + func TestTemplateUrlFunction(t *testing.T) { ctx := context.Background() @@ -137,6 +153,26 @@ func TestTemplateMapPairFunction(t *testing.T) { assert.Equal(t, "false 123 hello 12.3", string(r.files[0].(*inMemoryFile).content)) } +func TestTemplateShortDateFunction(t *testing.T) { + ctx := context.Background() + + ctx = cmdctx.SetWorkspaceClient(ctx, nil) + helpers := loadHelpers(ctx) + r, err := newRenderer(ctx, nil, helpers, os.DirFS("."), "./testdata/short-date/template", "./testdata/short-date/library") + require.NoError(t, err) + + err = r.walk() + assert.NoError(t, err) + + assert.Len(t, r.files, 1) + content := string(r.files[0].(*inMemoryFile).content) + assert.Contains(t, content, "This file was created on") + + // Check that the file path contains a date-like pattern + filePath := r.files[0].RelPath() + assert.Regexp(t, `^[A-Za-z]{3}_\d{2}_\d{4}\.txt$`, filePath) +} + func TestWorkspaceHost(t *testing.T) { ctx := context.Background() diff --git a/libs/template/template.go b/libs/template/template.go index 48dd5f9236..a1986cad9d 100644 --- a/libs/template/template.go +++ b/libs/template/template.go @@ -38,7 +38,7 @@ const ( var databricksTemplates = []Template{ { name: DefaultPython, - description: "The default Python template for Notebooks and Lakeflow", + description: "The default Python template, using Python files or notebooks with Lakeflow", Reader: &builtinReader{name: string(DefaultPython)}, Writer: &writerWithFullTelemetry{defaultWriter: defaultWriter{name: DefaultPython}}, }, diff --git a/libs/template/template_test.go b/libs/template/template_test.go index c738bac801..cd0d394bdd 100644 --- a/libs/template/template_test.go +++ b/libs/template/template_test.go @@ -8,7 +8,7 @@ import ( ) func TestTemplateHelpDescriptions(t *testing.T) { - expected := `- default-python: The default Python template for Notebooks and Lakeflow + expected := `- default-python: The default Python template, using Python files or notebooks with Lakeflow - default-sql: The default SQL template for .sql files that run with Databricks SQL - dbt-sql: The dbt SQL template (databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks) - mlops-stacks: The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks) @@ -18,7 +18,7 @@ func TestTemplateHelpDescriptions(t *testing.T) { func TestTemplateOptions(t *testing.T) { expected := []cmdio.Tuple{ - {Name: "default-python", Id: "The default Python template for Notebooks and Lakeflow"}, + {Name: "default-python", Id: "The default Python template, using Python files or notebooks with Lakeflow"}, {Name: "default-sql", Id: "The default SQL template for .sql files that run with Databricks SQL"}, {Name: "dbt-sql", Id: "The dbt SQL template (databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks)"}, {Name: "mlops-stacks", Id: "The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks)"}, diff --git a/libs/template/templates/cli-pipelines/databricks_template_schema.json b/libs/template/templates/cli-pipelines/databricks_template_schema.json index ce617cc9dd..adc580c044 100644 --- a/libs/template/templates/cli-pipelines/databricks_template_schema.json +++ b/libs/template/templates/cli-pipelines/databricks_template_schema.json @@ -39,7 +39,7 @@ "default": "default", "pattern": "^\\w+$", "pattern_match_failure_message": "Invalid schema name.", - "description": "\nInitial schema during development:\nNote: This schema name will be suffixed with '_dev' when deployed to target the development environment.\ndefault_schema", + "description": "\nInitial schema during development:\ndefault_schema", "order": 5 }, "language": { diff --git a/libs/template/templates/cli-pipelines/library/variables.tmpl b/libs/template/templates/cli-pipelines/library/variables.tmpl index fb0e6f8922..9c5c36b449 100644 --- a/libs/template/templates/cli-pipelines/library/variables.tmpl +++ b/libs/template/templates/cli-pipelines/library/variables.tmpl @@ -10,7 +10,7 @@ {{- if (regexp "^yes").MatchString .personal_schemas -}} {{ short_name }} {{- else -}} - {{ .shared_schema }}_dev + {{ .shared_schema }} {{- end}} {{- end }} @@ -19,7 +19,7 @@ {{- if (regexp "^yes").MatchString .personal_schemas -}} ${workspace.current_user.short_name} {{- else -}} - {{ .shared_schema }}_dev + {{ .shared_schema }} {{- end}} {{- end }} diff --git a/libs/template/templates/cli-pipelines/template/__preamble.tmpl b/libs/template/templates/cli-pipelines/template/__preamble.tmpl index f116c0b44e..199ad088a6 100644 --- a/libs/template/templates/cli-pipelines/template/__preamble.tmpl +++ b/libs/template/templates/cli-pipelines/template/__preamble.tmpl @@ -7,10 +7,10 @@ This file only contains template directives; it is skipped for the actual output {{$isSQL := eq .language "sql"}} {{if $isSQL}} - {{skip "{{.project_name}}/utilities/utils.py"}} - {{skip "{{.project_name}}/transformations/sample_zones_{{.project_name}}.py"}} - {{skip "{{.project_name}}/transformations/sample_trips_{{.project_name}}.py"}} + {{skip "{{.project_name}}/{{.project_name}}_pipeline/utilities/utils.py"}} + {{skip "{{.project_name}}/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.py"}} + {{skip "{{.project_name}}/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.py"}} {{else}} - {{skip "{{.project_name}}/transformations/sample_zones_{{.project_name}}.sql"}} - {{skip "{{.project_name}}/transformations/sample_trips_{{.project_name}}.sql"}} + {{skip "{{.project_name}}/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.sql"}} + {{skip "{{.project_name}}/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.sql"}} {{end}} diff --git a/libs/template/templates/cli-pipelines/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/cli-pipelines/template/{{.project_name}}/README.md.tmpl index fc8544cc79..021ec94625 100644 --- a/libs/template/templates/cli-pipelines/template/{{.project_name}}/README.md.tmpl +++ b/libs/template/templates/cli-pipelines/template/{{.project_name}}/README.md.tmpl @@ -13,41 +13,13 @@ The '{{.project_name}}' project was generated by using the CLI Pipelines templat 3. Authenticate to your Databricks workspace, if you have not done so already: ``` - $ pipelines auth login + $ databricks auth login ``` 4. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from https://docs.databricks.com/dev-tools/vscode-ext.html. Or the PyCharm plugin from https://www.databricks.com/blog/announcing-pycharm-integration-databricks. -## Pipeline Structure - -This folder defines all source code for the {{template `pipeline_name` .}} pipeline: - -{{ if (eq .language "python") -}} -- `explorations`: Ad-hoc notebooks used to explore the data processed by this pipeline. -- `transformations`: All dataset definitions and transformations. -- `utilities` (optional): Utility functions and Python modules used in this pipeline. -{{- else -}} -- `explorations`: Ad-hoc notebooks used to explore the data processed by this pipeline. -- `transformations`: All dataset definitions and transformations. -{{- end }} - -## Getting Started - -To get started, go to the `transformations` folder -- most of the relevant source code lives there: - -{{ if (eq .language "python") -}} -* By convention, every dataset under `transformations` is in a separate file. -* Take a look at the sample under "sample_trips_{{ .project_name }}.py" to get familiar with the syntax. - Read more about the syntax at https://docs.databricks.com/dlt/python-ref.html. -{{- else -}} -* By convention, every dataset under `transformations` is in a separate file. -* Take a look at the sample under "sample_trips_{{ .project_name }}.sql" to get familiar with the syntax. - Read more about the syntax at https://docs.databricks.com/dlt/sql-ref.html. -{{- end }} - -For more tutorials and reference material, see https://docs.databricks.com/dlt. ## Deploying pipelines @@ -58,11 +30,6 @@ For more tutorials and reference material, see https://docs.databricks.com/dlt. (Note that "dev" is the default target, so the `--target` parameter is optional here.) - This deploys everything that's defined for this project. - For example, the default template would deploy a pipeline called - `[dev yourname] {{.project_name}}_pipeline` to your workspace. - You can find that pipeline by opening your workpace and clicking on **Jobs & Pipelines**. - 2. Similarly, to deploy a production copy, type: ``` $ pipelines deploy --target prod diff --git a/libs/template/templates/cli-pipelines/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/cli-pipelines/template/{{.project_name}}/databricks.yml.tmpl index ffcc6ba7b1..ad227f4867 100644 --- a/libs/template/templates/cli-pipelines/template/{{.project_name}}/databricks.yml.tmpl +++ b/libs/template/templates/cli-pipelines/template/{{.project_name}}/databricks.yml.tmpl @@ -7,7 +7,7 @@ bundle: include: - resources/*.yml - resources/*/*.yml - - ./*.yml + - {{.project_name}}_pipeline/*.yml # Variable declarations. These variables are assigned in the dev/prod targets below. variables: diff --git a/libs/template/templates/default-python/databricks_template_schema.json b/libs/template/templates/default-python/databricks_template_schema.json index c4207a3b35..9c59419ec2 100644 --- a/libs/template/templates/default-python/databricks_template_schema.json +++ b/libs/template/templates/default-python/databricks_template_schema.json @@ -9,33 +9,40 @@ "pattern": "^[A-Za-z0-9_]+$", "pattern_match_failure_message": "Name must consist of letters, numbers, and underscores." }, - "include_notebook": { + "project_name_short": { + "skip_prompt_if": {}, "type": "string", - "default": "yes", - "enum": ["yes", "no"], - "description": "Include a stub (sample) notebook in '{{.project_name}}{{path_separator}}src'", + "default": "{{index ((regexp \"^(.*)_project$\").FindStringSubmatch .project_name) 1}}", + "description": "Short name for the project (without _project suffix)", "order": 2 }, - "include_dlt": { + "include_job": { "type": "string", "default": "yes", "enum": ["yes", "no"], - "description": "Include a stub (sample) Lakeflow Declarative Pipeline in '{{.project_name}}{{path_separator}}src'", + "description": "Include a Lakeflow job that runs a notebook in '{{.project_name}}{{path_separator}}resources'", "order": 3 }, - "include_python": { + "include_pipeline": { "type": "string", "default": "yes", "enum": ["yes", "no"], - "description": "Include a stub (sample) Python package in '{{.project_name}}{{path_separator}}src'", + "description": "Include a Lakeflow pipeline in '{{.project_name}}{{path_separator}}resources'", "order": 4 }, + "include_python": { + "type": "string", + "default": "yes", + "enum": ["yes", "no"], + "description": "Include a Python package with a build configuration in '{{.project_name}}{{path_separator}}lib'", + "order": 5 + }, "serverless": { "type": "string", "default": "yes", "enum": ["yes", "no"], "description": "Use serverless compute", - "order": 5 + "order": 6 } }, "success_message": "Workspace to use (auto-detected, edit in '{{.project_name}}/databricks.yml'): {{workspace_host}}\n\n✨ Your new project has been created in the '{{.project_name}}' directory!\n\nPlease refer to the README.md file for \"getting started\" instructions.\nSee also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html." diff --git a/libs/template/templates/default-python/library/versions.tmpl b/libs/template/templates/default-python/library/versions.tmpl index 79c3955c99..912dc9c9d7 100644 --- a/libs/template/templates/default-python/library/versions.tmpl +++ b/libs/template/templates/default-python/library/versions.tmpl @@ -1,7 +1,7 @@ {{/* The latest LTS DBR version; this should be updated a few months after each LTS. */}} {{define "latest_lts_dbr_version" -}} - 15.4.x-scala2.12 + 16.4.x-scala2.12 {{- end}} {{/* A safe version of DB Connect that is compatible with at least half the @@ -15,10 +15,3 @@ {{define "conservative_db_connect_version_spec" -}} >=15.4,<15.5 {{- end}} - -{{/* DB Connect 15 only supports versions up to 3.13 because of a limitation in - * pyarrow: https://arrow.apache.org/docs/python/install.html#python-compatibility - */}} -{{define "conservative_db_connect_python_version_spec" -}} - >=3.10,<=3.13 -{{- end}} diff --git a/libs/template/templates/default-python/template/__preamble.tmpl b/libs/template/templates/default-python/template/__preamble.tmpl index e579c34fb0..a3737c7b34 100644 --- a/libs/template/templates/default-python/template/__preamble.tmpl +++ b/libs/template/templates/default-python/template/__preamble.tmpl @@ -4,29 +4,22 @@ This file only template directives; it is skipped for the actual output. {{skip "__preamble"}} -{{$notDLT := not (eq .include_dlt "yes")}} -{{$notNotebook := not (eq .include_notebook "yes")}} -{{$notPython := not (eq .include_python "yes")}} +{{$notPipeline := not (eq .include_pipeline "yes")}} +{{$notJob := not (eq .include_job "yes")}} +{{$notPythonPackage := not (eq .include_python "yes")}} -{{if $notPython}} - {{skip "{{.project_name}}/src/{{.project_name}}"}} - {{skip "{{.project_name}}/tests/main_test.py"}} - {{skip "{{.project_name}}/setup.py"}} - {{skip "{{.project_name}}/pytest.ini"}} - {{skip "{{.project_name}}/requirements-dev.txt"}} +{{if $notPythonPackage}} + {{skip "{{.project_name}}/lib/{{.project_name}}"}} {{end}} -{{if $notDLT}} - {{skip "{{.project_name}}/src/pipeline.ipynb"}} - {{skip "{{.project_name}}/resources/{{.project_name}}.pipeline.yml"}} +{{if $notPipeline}} + {{skip "{{.project_name}}/resources/{{.project_name_short}}_pipeline"}} {{end}} -{{if $notNotebook}} - {{skip "{{.project_name}}/src/notebook.ipynb"}} +{{if $notJob}} + {{skip "{{.project_name}}/resources/{{.project_name_short}}_job"}} {{end}} -{{if (and $notDLT $notNotebook $notPython)}} - {{skip "{{.project_name}}/resources/{{.project_name}}.job.yml"}} -{{else}} +{{if not (or $notPipeline $notJob)}} {{skip "{{.project_name}}/resources/.gitkeep"}} {{end}} diff --git a/libs/template/templates/default-python/template/{{.project_name}}/.vscode/extensions.json b/libs/template/templates/default-python/template/{{.project_name}}/.vscode/extensions.json index 5d15eba363..1f39c33087 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/.vscode/extensions.json +++ b/libs/template/templates/default-python/template/{{.project_name}}/.vscode/extensions.json @@ -1,7 +1,6 @@ { "recommendations": [ "databricks.databricks", - "ms-python.vscode-pylance", "redhat.vscode-yaml" ] } diff --git a/libs/template/templates/default-python/template/{{.project_name}}/.vscode/settings.json b/libs/template/templates/default-python/template/{{.project_name}}/.vscode/settings.json index 8ee87c30d4..f3be9a10ae 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/.vscode/settings.json +++ b/libs/template/templates/default-python/template/{{.project_name}}/.vscode/settings.json @@ -1,16 +1,33 @@ { - "python.analysis.stubPath": ".vscode", "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])", "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------", "python.testing.pytestArgs": [ "." ], - "python.testing.unittestEnabled": false, - "python.testing.pytestEnabled": true, - "python.analysis.extraPaths": ["src"], "files.exclude": { "**/*.egg-info": true, "**/__pycache__": true, ".pytest_cache": true, }, + + // Pylance settings (VS Code) + "python.analysis.extraPaths": ["src", "resources"], + "python.analysis.typeCheckingMode": "basic", + "python.analysis.diagnosticMode": "workspace", + "python.analysis.stubPath": ".vscode", + + // Pyright settings (Cursor) + "cursorpyright.analysis.extraPaths": ["src", "resources"], + "cursorpyright.analysis.typeCheckingMode": "basic", + "cursorpyright.analysis.diagnosticMode": "workspace", + "cursorpyright.analysis.stubPath": ".vscode", + + // General Python settings + "python.defaultInterpreterPath": "./.venv/bin/python", + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, + "[python]": { + "editor.defaultFormatter": "ms-python.black-formatter", + "editor.formatOnSave": true, + }, } diff --git a/libs/template/templates/default-python/template/{{.project_name}}/conftest.py.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/conftest.py.tmpl index cf1d0978b8..c0e6fe4917 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/conftest.py.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/conftest.py.tmpl @@ -1,7 +1,13 @@ """This file configures pytest. This file is in the root since it can be used for tests in any place in this -project, including tests under resources/. +bundle, including tests under resources/. + +This module sets up the test environment by: +- Adding resources/* directories to sys.path for module discovery +- Configuring fallback serverless compute if needed +- Providing a SparkSession fixture for tests +- Managing pytest output capture for better debugging """ import os, sys, pathlib @@ -14,7 +20,9 @@ try: from pyspark.sql import SparkSession import pytest except ImportError: - raise ImportError("Test dependencies not found.\n\nRun tests using 'uv run pytest'. See http://docs.astral.sh/uv to learn more about uv.") + raise ImportError( + "Test dependencies not found.\n\nRun tests using 'uv run pytest'. See http://docs.astral.sh/uv to learn move about uv." + ) def add_all_resources_to_sys_path(): @@ -27,7 +35,9 @@ def add_all_resources_to_sys_path(): for py in resource.rglob("*.py"): mod = ".".join(py.relative_to(resource).with_suffix("").parts) if mod in seen: - raise ImportError(f"Duplicate module '{mod}' found:\n {seen[mod]}\n {py}") + raise ImportError( + f"Duplicate module '{mod}' found:\n {seen[mod]}\n {py}" + ) seen[mod] = py @@ -61,16 +71,13 @@ def pytest_configure(config: pytest.Config): add_all_resources_to_sys_path() enable_fallback_compute() - # Initialize Spark session eagerly, so it is available even when - # SparkSession.builder.getOrCreate() is used. For DB Connect 15+, - # we validate version compatibility with the remote cluster. - if hasattr(DatabricksSession.builder, "validateSession"): - DatabricksSession.builder.validateSession().getOrCreate() - else: - DatabricksSession.builder.getOrCreate() - -@pytest.fixture(scope="session") +@pytest.fixture(scope="session", autouse=True) def spark() -> SparkSession: """Provide a SparkSession fixture for tests.""" + if hasattr(DatabricksSession.builder, "validateSession"): + {{/* This is relevant as long as DB Connect 15 is in use. + * DB Connect >16 automaticaly does this validation! */}} + # For DB Connect 15+, validate that the version is compatible with the remote cluster + return DatabricksSession.builder.validateSession().getOrCreate() return DatabricksSession.builder.getOrCreate() diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/my_default_python/__init__.py b/libs/template/templates/default-python/template/{{.project_name}}/lib/{{.project_name}}/__init__.py similarity index 100% rename from acceptance/bundle/templates/default-python/classic/output/my_default_python/src/my_default_python/__init__.py rename to libs/template/templates/default-python/template/{{.project_name}}/lib/{{.project_name}}/__init__.py diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/my_default_python/main.py b/libs/template/templates/default-python/template/{{.project_name}}/lib/{{.project_name}}/main.py.tmpl similarity index 100% rename from acceptance/bundle/templates/default-python/classic/output/my_default_python/src/my_default_python/main.py rename to libs/template/templates/default-python/template/{{.project_name}}/lib/{{.project_name}}/main.py.tmpl diff --git a/libs/template/templates/default-python/template/{{.project_name}}/pyproject.toml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/pyproject.toml.tmpl index 3d83de0077..57d689d75a 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/pyproject.toml.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/pyproject.toml.tmpl @@ -2,13 +2,11 @@ name = "{{.project_name}}" version = "0.0.1" authors = [{ name = "{{user_name}}" }] -requires-python = "{{template "conservative_db_connect_python_version_spec"}}" +requires-python = ">= 3.11" [dependency-groups] dev = [ "pytest", - - # Code completion support for Lakeflow Declarative Pipelines, also install databricks-connect "databricks-dlt", # databricks-connect can be used to run parts of this project locally. @@ -19,9 +17,10 @@ dev = [ ] [tool.pytest.ini_options] -pythonpath = "src" +pythonpath = "lib" testpaths = [ "tests", + "resources", ] [build-system] @@ -29,7 +28,7 @@ requires = ["hatchling"] build-backend = "hatchling.build" [tool.hatch.build.targets.wheel] -packages = ["src/{{.project_name}}"] +packages = ["lib/{{.project_name}}"] [project.scripts] main = "{{.project_name}}.main:main" diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_job/sample.py.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_job/sample.py.tmpl new file mode 100644 index 0000000000..1e1f24deb3 --- /dev/null +++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_job/sample.py.tmpl @@ -0,0 +1,3 @@ +import {{.project_name}} + +{{.project_name}}.main() diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/notebook.ipynb b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_job/sample_notebook.ipynb.tmpl similarity index 78% rename from acceptance/bundle/templates/default-python/classic/output/my_default_python/src/notebook.ipynb rename to libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_job/sample_notebook.ipynb.tmpl index 27c3f19e34..7d50fae137 100644 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/notebook.ipynb +++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_job/sample_notebook.ipynb.tmpl @@ -6,7 +6,7 @@ "application/vnd.databricks.v1+cell": { "cellMetadata": {}, "inputWidgets": {}, - "nuid": "[UUID]", + "nuid": "ee353e42-ff58-4955-9608-12865bd0950e", "showTitle": false, "title": "" } @@ -14,7 +14,7 @@ "source": [ "# Default notebook\n", "\n", - "This default notebook is executed using Databricks Workflows as defined in resources/my_default_python.job.yml." + "This default notebook is executed using Databricks Workflows as defined in resources/{{.project_name}}.job.yml." ] }, { @@ -23,6 +23,7 @@ "metadata": {}, "outputs": [], "source": [ + "# Reload wheel file dependencies every time they are updated\n", "%load_ext autoreload\n", "%autoreload 2" ] @@ -37,16 +38,20 @@ "rowLimit": 10000 }, "inputWidgets": {}, - "nuid": "[UUID]", + "nuid": "6bca260b-13d1-448f-8082-30b60a85c9ae", "showTitle": false, "title": "" } }, "outputs": [], "source": [ - "from my_default_python import main\n", + {{- if (eq .include_python "yes") }} + "from {{.project_name}}.main import main\n", "\n", "main.find_all_taxis().show(10)" + {{else}} + "spark.range(10)" + {{end -}} ] } ], diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.job.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_job/{{.project_name_short}}.job.yml.tmpl similarity index 66% rename from libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.job.yml.tmpl rename to libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_job/{{.project_name_short}}.job.yml.tmpl index 419c4cd2e9..ed490cf204 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.job.yml.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_job/{{.project_name_short}}.job.yml.tmpl @@ -1,15 +1,11 @@ # The main job for {{.project_name}}. -{{- /* Clarify what this job is for Lakeflow Declarative Pipelines only users. */}} -{{if and (eq .include_dlt "yes") (and (eq .include_notebook "no") (eq .include_python "no")) -}} -# This job runs {{.project_name}}_pipeline on a schedule. -{{end -}} {{$with_serverless := (eq .serverless "yes") -}} resources: jobs: - {{.project_name}}_job: - name: {{.project_name}}_job + {{.project_name_short}}_job: + name: {{.project_name_short}}_job trigger: # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger @@ -22,23 +18,11 @@ resources: # - your_email@example.com tasks: - {{- if eq .include_notebook "yes" }} - task_key: notebook_task {{- if not $with_serverless}} job_cluster_key: job_cluster{{end}} notebook_task: - notebook_path: ../src/notebook.ipynb - {{- end -}} - {{- if (eq .include_dlt "yes") }} - - - task_key: refresh_pipeline - {{- if (eq .include_notebook "yes" )}} - depends_on: - - task_key: notebook_task - {{- end}} - pipeline_task: - {{- /* TODO: we should find a way that doesn't use magics for the below, like ./{{project_name}}.pipeline.yml */}} - pipeline_id: ${resources.pipelines.{{.project_name}}_pipeline.id} + notebook_path: sample_notebook.ipynb {{- end -}} {{- if (eq .include_python "yes") }} @@ -49,7 +33,6 @@ resources: {{- else if (eq .include_notebook "yes" )}} depends_on: - task_key: notebook_task -{{end}} {{- if $with_serverless }} environment_key: default {{- else }} @@ -62,7 +45,7 @@ resources: # By default we just include the .whl file generated for the {{.project_name}} package. # See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html # for more information on how to add other libraries. - - whl: ../dist/*.whl + - whl: ../../dist/*.whl {{- end -}} {{else}} {{- end}} @@ -70,13 +53,10 @@ resources: # A list of task execution environment specifications that can be referenced by tasks of this job. environments: - environment_key: default - - # Full documentation of this spec can be found at: - # https://docs.databricks.com/api/workspace/jobs/create#environments-spec spec: client: "2" dependencies: - - ../dist/*.whl + - ../../dist/*.whl {{end}}{{ else }} job_clusters: - job_cluster_key: job_cluster diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_pipeline/README.md.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_pipeline/README.md.tmpl new file mode 100644 index 0000000000..c3e9fea9c2 --- /dev/null +++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_pipeline/README.md.tmpl @@ -0,0 +1,22 @@ +# {{.project_name}} + +This folder defines all source code for the {{.project_name}} pipeline: + +- `explorations`: Ad-hoc notebooks used to explore the data processed by this pipeline. +- `transformations`: All dataset definitions and transformations. +- `utilities` (optional): Utility functions and Python modules used in this pipeline. +- `data_sources` (optional): View definitions describing the source data for this pipeline. + +## Getting Started + +To get started, go to the `transformations` folder -- most of the relevant source code lives there: + +* By convention, every dataset under `transformations` is in a separate file. +* Take a look at the sample under "sample_trips_{{.project_name}}.py" to get familiar with the syntax. + Read more about the syntax at https://docs.databricks.com/dlt/python-ref.html. +* Use `Run file` to run and preview a single transformation. +* Use `Run pipeline` to run _all_ transformations in the entire pipeline. +* Use `+ Add` in the file browser to add a new data set definition. +* Use `Schedule` to run the pipeline on a schedule! + +For more tutorials and reference material, see https://docs.databricks.com/dlt. diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_pipeline/explorations/sample_exploration.ipynb.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_pipeline/explorations/sample_exploration.ipynb.tmpl new file mode 100644 index 0000000000..723574966a --- /dev/null +++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_pipeline/explorations/sample_exploration.ipynb.tmpl @@ -0,0 +1,63 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "19a992e9-55e0-49e4-abc7-8c92c420dd5b", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "source": [ + "### Example Exploratory Notebook\n", + "\n", + "Use this notebook to explore the data generated by the pipeline in your preferred programming language.\n", + "\n", + "**Note**: This notebook is not executed as part of the pipeline." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "1b0a82fa-3c6a-4f29-bb43-ded1c4fd77c6", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "# !!! Before performing any data analysis, make sure to run the pipeline to materialize the sample datasets. The tables referenced in this notebook depend on that step.\n", + "\n", + "display(spark.sql(\"SELECT * FROM {{default_catalog}}.{{.project_name_short}}_${bundle.target}.sample_trips_{{short_date_time}}\"))" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "computePreferences": null, + "dashboards": [], + "environmentMetadata": null, + "inputWidgetPreferences": null, + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "sample_exploration", + "widgets": {} + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_pipeline/transformations/sample_trips_{{short_date_time}}.py.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_pipeline/transformations/sample_trips_{{short_date_time}}.py.tmpl new file mode 100644 index 0000000000..9fd62c2fae --- /dev/null +++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_pipeline/transformations/sample_trips_{{short_date_time}}.py.tmpl @@ -0,0 +1,15 @@ +import dlt +from pyspark.sql.functions import col +from utilities import utils + + +# This file defines a sample transformation. +# Edit the sample below or add new transformations +# using "+ Add" in the file browser. + + +@dlt.table +def sample_trips_{{short_date_time}}(): + return spark.read.table("samples.nyctaxi.trips").withColumn( + "trip_distance_km", utils.distance_km(col("trip_distance")) + ) diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_pipeline/transformations/sample_zones_{{short_date_time}}.py.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_pipeline/transformations/sample_zones_{{short_date_time}}.py.tmpl new file mode 100644 index 0000000000..fa2eda046b --- /dev/null +++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_pipeline/transformations/sample_zones_{{short_date_time}}.py.tmpl @@ -0,0 +1,17 @@ +import dlt +from pyspark.sql.functions import col, sum + + +# This file defines a sample transformation. +# Edit the sample below or add new transformations +# using "+ Add" in the file browser. + + +@dlt.table +def sample_zones_{{short_date_time}}(): + # Read from the "sample_trips" table, then sum all the fares + return ( + spark.read.table(f"sample_trips_{{short_date_time}}") + .groupBy(col("pickup_zip")) + .agg(sum("fare_amount").alias("total_fare")) + ) diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_pipeline/utilities/utils.py b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_pipeline/utilities/utils.py new file mode 100644 index 0000000000..ff039898f0 --- /dev/null +++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_pipeline/utilities/utils.py @@ -0,0 +1,8 @@ +from pyspark.sql.functions import udf +from pyspark.sql.types import FloatType + + +@udf(returnType=FloatType()) +def distance_km(distance_miles): + """Convert distance from miles to kilometers (1 mile = 1.60934 km).""" + return distance_miles * 1.60934 diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_pipeline/{{.project_name_short}}.pipeline.yml.tmpl similarity index 66% rename from libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl rename to libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_pipeline/{{.project_name_short}}.pipeline.yml.tmpl index 093b087a01..d5a1107f05 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_pipeline/{{.project_name_short}}.pipeline.yml.tmpl @@ -2,8 +2,8 @@ # The main pipeline for {{.project_name}} resources: pipelines: - {{.project_name}}_pipeline: - name: {{.project_name}}_pipeline + {{.project_name_short}}_pipeline: + name: {{.project_name_short}}_pipeline_{{short_date_time}} {{- if or (eq default_catalog "") (eq default_catalog "hive_metastore")}} {{- if $with_serverless }} ## Catalog is required for serverless compute @@ -17,9 +17,13 @@ resources: {{- if $with_serverless }} serverless: true {{- end}} + root_path: "." + libraries: - - notebook: - path: ../src/pipeline.ipynb + - glob: + include: transformations/** - configuration: - bundle.sourcePath: ${workspace.file_path}/src + # Currently in beta: environments support for pipelines + ## environment: + ## dependencies: + ## - ../../dist/*.whl diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_pipeline/{{.project_name_short}}_schedule.job.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_pipeline/{{.project_name_short}}_schedule.job.yml.tmpl new file mode 100644 index 0000000000..948ae7239a --- /dev/null +++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_pipeline/{{.project_name_short}}_schedule.job.yml.tmpl @@ -0,0 +1,20 @@ +# The job that triggers {{.project_name}}. +resources: + jobs: + {{.project_name_short}}_schedule: + name: {{.project_name_short}}_schedule + + trigger: + # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger + periodic: + interval: 1 + unit: DAYS + + #email_notifications: + # on_failure: + # - your_email@example.com + + tasks: + - task_key: refresh_pipeline + pipeline_task: + pipeline_id: ${resources.pipelines.{{.project_name_short}}_pipeline.id} diff --git a/libs/template/templates/default-python/template/{{.project_name}}/src/pipeline.ipynb.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/src/pipeline.ipynb.tmpl deleted file mode 100644 index 342fafcf6f..0000000000 --- a/libs/template/templates/default-python/template/{{.project_name}}/src/pipeline.ipynb.tmpl +++ /dev/null @@ -1,104 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": {}, - "inputWidgets": {}, - "nuid": "9a626959-61c8-4bba-84d2-2a4ecab1f7ec", - "showTitle": false, - "title": "" - } - }, - "source": [ - "# Lakeflow Declarative Pipeline\n", - "\n", - "This Lakeflow Declarative Pipeline definition is executed using a pipeline defined in resources/{{.project_name}}.pipeline.yml." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": {}, - "inputWidgets": {}, - "nuid": "9198e987-5606-403d-9f6d-8f14e6a4017f", - "showTitle": false, - "title": "" - } - }, - "outputs": [], - "source": [ - {{- if (eq .include_python "yes") }} - "# Import DLT and src/{{.project_name}}\n", - "import dlt\n", - "import sys\n", - "\n", - "sys.path.append(spark.conf.get(\"bundle.sourcePath\", \".\"))\n", - "from pyspark.sql.functions import expr\n", - "from {{.project_name}} import main" - {{else}} - "import dlt\n", - "from pyspark.sql.functions import expr\n", - "from pyspark.sql import SparkSession\n", - "spark = SparkSession.builder.getOrCreate()" - {{end -}} - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": {}, - "inputWidgets": {}, - "nuid": "3fc19dba-61fd-4a89-8f8c-24fee63bfb14", - "showTitle": false, - "title": "" - } - }, - "outputs": [], - "source": [ - {{- if (eq .include_python "yes") }} - "@dlt.view\n", - "def taxi_raw():\n", - " return main.find_all_taxis()\n", - {{else}} - "\n", - "@dlt.view\n", - "def taxi_raw():\n", - " return spark.read.format(\"json\").load(\"/databricks-datasets/nyctaxi/sample/json/\")\n", - {{end -}} - "\n", - "\n", - "@dlt.table\n", - "def filtered_taxis():\n", - " return dlt.read(\"taxi_raw\").filter(expr(\"fare_amount < 30\"))" - ] - } - ], - "metadata": { - "application/vnd.databricks.v1+notebook": { - "dashboards": [], - "language": "python", - "notebookMetadata": { - "pythonIndentUnit": 2 - }, - "notebookName": "pipeline", - "widgets": {} - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3.11.4" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/libs/template/templates/default-python/template/{{.project_name}}/tests/main_test.py.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/tests/main_test.py.tmpl index 084454eb3e..5163eec788 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/tests/main_test.py.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/tests/main_test.py.tmpl @@ -1,6 +1,8 @@ -from {{.project_name}} import main +from {{.project_name}}.main import find_all_taxis +from databricks.sdk.runtime import spark +from pyspark.sql import DataFrame def test_find_all_taxis(): - taxis = main.find_all_taxis() + taxis = find_all_taxis() assert taxis.count() > 5 diff --git a/libs/template/templates/experimental-jobs-as-code/library/versions.tmpl b/libs/template/templates/experimental-jobs-as-code/library/versions.tmpl index 72715f283e..aefecfa93d 100644 --- a/libs/template/templates/experimental-jobs-as-code/library/versions.tmpl +++ b/libs/template/templates/experimental-jobs-as-code/library/versions.tmpl @@ -6,4 +6,4 @@ >=15.4,<15.5 {{- end}} -{{define "latest_databricks_bundles_version" -}}0.266.0{{- end}} +{{define "latest_databricks_bundles_version" -}}0.259.0{{- end}} diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl index 6c64b4378a..629106dbf3 100644 --- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl @@ -14,7 +14,7 @@ "source": [ "# DLT pipeline\n", "\n", - "This Lakeflow Declarative Pipeline definition is executed using a pipeline defined in resources/{{.project_name}}.pipeline.yml." + "This Delta Live Tables (DLT) definition is executed using a pipeline defined in resources/{{.project_name}}.pipeline.yml." ] }, { diff --git a/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/.vscode/settings.json.tmpl b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/.vscode/settings.json.tmpl index 6a87715ae2..f3be9a10ae 100644 --- a/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/.vscode/settings.json.tmpl +++ b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/.vscode/settings.json.tmpl @@ -1,20 +1,31 @@ { - "python.analysis.stubPath": ".vscode", - "databricks.python.envFile": "${workspaceFolder}/.env", "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])", "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------", "python.testing.pytestArgs": [ "." ], - "python.testing.unittestEnabled": false, - "python.testing.pytestEnabled": true, - {{- /* Unfortunately extraPaths doesn't support globs!! See: https://github.com/microsoft/pylance-release/issues/973 */}} - "python.analysis.extraPaths": ["resources/{{.project_name}}_pipeline"], "files.exclude": { "**/*.egg-info": true, "**/__pycache__": true, ".pytest_cache": true, }, + + // Pylance settings (VS Code) + "python.analysis.extraPaths": ["src", "resources"], + "python.analysis.typeCheckingMode": "basic", + "python.analysis.diagnosticMode": "workspace", + "python.analysis.stubPath": ".vscode", + + // Pyright settings (Cursor) + "cursorpyright.analysis.extraPaths": ["src", "resources"], + "cursorpyright.analysis.typeCheckingMode": "basic", + "cursorpyright.analysis.diagnosticMode": "workspace", + "cursorpyright.analysis.stubPath": ".vscode", + + // General Python settings + "python.defaultInterpreterPath": "./.venv/bin/python", + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, "[python]": { "editor.defaultFormatter": "ms-python.black-formatter", "editor.formatOnSave": true, From ae70375439124b4b567c2625a93eac9b3755ce42 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Thu, 28 Aug 2025 11:19:42 +0200 Subject: [PATCH 19/21] Rename scratch to explorations --- .../{{.project_name}}/{scratch => explorations}/README.md | 0 .../{scratch => explorations}/exploration.ipynb.tmpl | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename libs/template/templates/default-python/template/{{.project_name}}/{scratch => explorations}/README.md (100%) rename libs/template/templates/default-python/template/{{.project_name}}/{scratch => explorations}/exploration.ipynb.tmpl (100%) diff --git a/libs/template/templates/default-python/template/{{.project_name}}/scratch/README.md b/libs/template/templates/default-python/template/{{.project_name}}/explorations/README.md similarity index 100% rename from libs/template/templates/default-python/template/{{.project_name}}/scratch/README.md rename to libs/template/templates/default-python/template/{{.project_name}}/explorations/README.md diff --git a/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/explorations/exploration.ipynb.tmpl similarity index 100% rename from libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb.tmpl rename to libs/template/templates/default-python/template/{{.project_name}}/explorations/exploration.ipynb.tmpl From bdf280cb26ba71ea38706a0f23d48a55f930505d Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Thu, 28 Aug 2025 11:20:20 +0200 Subject: [PATCH 20/21] Naming changes --- .../README.md.tmpl | 0 .../explorations/sample_exploration.ipynb.tmpl | 0 .../sample_trips_{{short_date_time}}.py.tmpl | 0 .../sample_zones_{{short_date_time}}.py.tmpl | 0 .../utilities/utils.py | 0 ...oject_name_short}}_etl_pipeline.pipeline.yml.tmpl} | 0 .../{{.project_name_short}}_schedule.job.yml.tmpl | 0 .../{sample.py.tmpl => sample_python_file.py.tmpl} | 0 ....tmpl => {{.project_name_short}}_job.job.yml.tmpl} | 11 +++++------ 9 files changed, 5 insertions(+), 6 deletions(-) rename libs/template/templates/default-python/template/{{.project_name}}/resources/{{{.project_name_short}}_pipeline => {{.project_name_short}}_etl_pipeline}/README.md.tmpl (100%) rename libs/template/templates/default-python/template/{{.project_name}}/resources/{{{.project_name_short}}_pipeline => {{.project_name_short}}_etl_pipeline}/explorations/sample_exploration.ipynb.tmpl (100%) rename libs/template/templates/default-python/template/{{.project_name}}/resources/{{{.project_name_short}}_pipeline => {{.project_name_short}}_etl_pipeline}/transformations/sample_trips_{{short_date_time}}.py.tmpl (100%) rename libs/template/templates/default-python/template/{{.project_name}}/resources/{{{.project_name_short}}_pipeline => {{.project_name_short}}_etl_pipeline}/transformations/sample_zones_{{short_date_time}}.py.tmpl (100%) rename libs/template/templates/default-python/template/{{.project_name}}/resources/{{{.project_name_short}}_pipeline => {{.project_name_short}}_etl_pipeline}/utilities/utils.py (100%) rename libs/template/templates/default-python/template/{{.project_name}}/resources/{{{.project_name_short}}_pipeline/{{.project_name_short}}.pipeline.yml.tmpl => {{.project_name_short}}_etl_pipeline/{{.project_name_short}}_etl_pipeline.pipeline.yml.tmpl} (100%) rename libs/template/templates/default-python/template/{{.project_name}}/resources/{{{.project_name_short}}_pipeline => {{.project_name_short}}_etl_pipeline}/{{.project_name_short}}_schedule.job.yml.tmpl (100%) rename libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_job/{sample.py.tmpl => sample_python_file.py.tmpl} (100%) rename libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_job/{{{.project_name_short}}.job.yml.tmpl => {{.project_name_short}}_job.job.yml.tmpl} (90%) diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_pipeline/README.md.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_etl_pipeline/README.md.tmpl similarity index 100% rename from libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_pipeline/README.md.tmpl rename to libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_etl_pipeline/README.md.tmpl diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_pipeline/explorations/sample_exploration.ipynb.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_etl_pipeline/explorations/sample_exploration.ipynb.tmpl similarity index 100% rename from libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_pipeline/explorations/sample_exploration.ipynb.tmpl rename to libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_etl_pipeline/explorations/sample_exploration.ipynb.tmpl diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_pipeline/transformations/sample_trips_{{short_date_time}}.py.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_etl_pipeline/transformations/sample_trips_{{short_date_time}}.py.tmpl similarity index 100% rename from libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_pipeline/transformations/sample_trips_{{short_date_time}}.py.tmpl rename to libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_etl_pipeline/transformations/sample_trips_{{short_date_time}}.py.tmpl diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_pipeline/transformations/sample_zones_{{short_date_time}}.py.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_etl_pipeline/transformations/sample_zones_{{short_date_time}}.py.tmpl similarity index 100% rename from libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_pipeline/transformations/sample_zones_{{short_date_time}}.py.tmpl rename to libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_etl_pipeline/transformations/sample_zones_{{short_date_time}}.py.tmpl diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_pipeline/utilities/utils.py b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_etl_pipeline/utilities/utils.py similarity index 100% rename from libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_pipeline/utilities/utils.py rename to libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_etl_pipeline/utilities/utils.py diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_pipeline/{{.project_name_short}}.pipeline.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_etl_pipeline/{{.project_name_short}}_etl_pipeline.pipeline.yml.tmpl similarity index 100% rename from libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_pipeline/{{.project_name_short}}.pipeline.yml.tmpl rename to libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_etl_pipeline/{{.project_name_short}}_etl_pipeline.pipeline.yml.tmpl diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_pipeline/{{.project_name_short}}_schedule.job.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_etl_pipeline/{{.project_name_short}}_schedule.job.yml.tmpl similarity index 100% rename from libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_pipeline/{{.project_name_short}}_schedule.job.yml.tmpl rename to libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_etl_pipeline/{{.project_name_short}}_schedule.job.yml.tmpl diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_job/sample.py.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_job/sample_python_file.py.tmpl similarity index 100% rename from libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_job/sample.py.tmpl rename to libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_job/sample_python_file.py.tmpl diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_job/{{.project_name_short}}.job.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_job/{{.project_name_short}}_job.job.yml.tmpl similarity index 90% rename from libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_job/{{.project_name_short}}.job.yml.tmpl rename to libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_job/{{.project_name_short}}_job.job.yml.tmpl index ed490cf204..571c8ce3a5 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_job/{{.project_name_short}}.job.yml.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_job/{{.project_name_short}}_job.job.yml.tmpl @@ -1,11 +1,11 @@ -# The main job for {{.project_name}}. +# A sample job for {{.project_name}}. {{$with_serverless := (eq .serverless "yes") -}} resources: jobs: - {{.project_name_short}}_job: - name: {{.project_name_short}}_job + sample_job: + name: sample_job trigger: # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger @@ -37,9 +37,8 @@ resources: environment_key: default {{- else }} job_cluster_key: job_cluster{{end}} - python_wheel_task: - package_name: {{.project_name}} - entry_point: main + spark_python_task: + python_file: sample_python_file.py {{- if not $with_serverless }} libraries: # By default we just include the .whl file generated for the {{.project_name}} package. From f222dfc4d25d1ee7f5744025988a7f05d4f8ac55 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Mon, 1 Sep 2025 10:30:53 +0200 Subject: [PATCH 21/21] Update template --- .../default-python/classic/output.txt | 25 +++- .../output/my_default_python/.gitignore} | 0 .../.vscode/__builtins__.pyi | 3 + .../my_default_python/.vscode/extensions.json | 6 + .../my_default_python/.vscode/settings.json | 39 ++++++ .../output/my_default_python/README.md | 73 +++++++++++ .../output/my_default_python/databricks.yml | 49 ++++++++ .../my_default_python/fixtures/.gitkeep | 9 ++ .../output/my_default_python/pyproject.toml | 35 ++++++ .../my_default_python/resources/.gitkeep | 1 + .../resources/default_python_etl.pipeline.yml | 14 +++ .../resources/sample_job.job.yml | 56 +++++++++ .../src/default_python_etl/README.md | 22 ++++ .../src/default_python_etl}/__init__.py | 0 .../explorations}/__init__.py | 0 .../explorations/sample_exploration.ipynb | 63 ++++++++++ .../transformations}/__init__.py | 0 .../sample_trips_jan_01_1034.py | 15 +++ .../sample_zones_jan_01_1034.py | 17 +++ .../default_python_etl/utilities/__init__.py | 0 .../src/default_python_etl/utilities/utils.py | 12 ++ .../src/sample_notebook.ipynb} | 9 +- .../src/sample_python_file.py | 19 +++ .../my_default_python/src/shared/__init__.py | 0 .../my_default_python/src/shared/taxis.py | 9 +- .../my_default_python/tests/conftest.py | 93 ++++++++++++++ .../tests/sample_taxis_test.py | 8 ++ .../combinations/classic/output.txt | 49 +++++++- .../combinations/serverless/output.txt | 49 +++++++- .../default-python/fail-missing-uv/output.txt | 25 +++- .../integration_classic/output.txt | 29 ++++- .../templates/default-python/no-uc/output.txt | 9 +- .../serverless-customcatalog/output.txt | 59 ++++++++- .../default-python/serverless/output.txt | 33 ++++- .../.vscode/__builtins__.pyi | 3 + .../my_default_python/.vscode/extensions.json | 6 + .../my_default_python/.vscode/settings.json | 39 ++++++ .../output/my_default_python/README.md | 73 +++++++++++ .../output/my_default_python/databricks.yml | 47 +++++++ .../my_default_python/fixtures/.gitkeep | 9 ++ .../output/my_default_python/out.gitignore | 8 ++ .../output/my_default_python/pyproject.toml | 35 ++++++ .../my_default_python/resources/.gitkeep | 1 + .../resources/default_python_etl.pipeline.yml | 15 +++ .../resources/sample_job.job.yml | 49 ++++++++ .../src/default_python_etl/README.md | 22 ++++ .../src/default_python_etl/__init__.py | 0 .../explorations/__init__.py | 0 .../explorations/sample_exploration.ipynb | 63 ++++++++++ .../transformations/__init__.py | 0 .../sample_trips_jan_01_1034.py | 15 +++ .../sample_zones_jan_01_1034.py | 17 +++ .../default_python_etl/utilities/__init__.py | 0 .../src/default_python_etl/utilities/utils.py | 12 ++ .../src/sample_notebook.ipynb | 17 ++- .../src/sample_python_file.py | 19 +++ .../my_default_python/src/shared/__init__.py | 0 .../my_default_python/src/shared/taxis.py | 9 +- .../my_default_python/tests/conftest.py | 93 ++++++++++++++ .../tests/sample_taxis_test.py | 8 ++ .../experimental-jobs-as-code/output.txt | 119 +----------------- .../output/my_jobs_as_code/README.md | 58 --------- .../output/my_jobs_as_code/databricks.yml | 50 -------- .../output/my_jobs_as_code/fixtures/.gitkeep | 22 ---- .../output/my_jobs_as_code/pyproject.toml | 49 -------- .../my_jobs_as_code/resources/__init__.py | 16 --- .../resources/my_jobs_as_code_job.py | 68 ---------- .../resources/my_jobs_as_code_pipeline.py | 20 --- .../output/my_jobs_as_code/scratch/README.md | 4 - .../output/my_jobs_as_code/setup.py | 18 --- .../my_jobs_as_code/src/dlt_pipeline.ipynb | 90 ------------- .../src/my_jobs_as_code/main.py | 25 ---- .../output/my_jobs_as_code/tests/main_test.py | 8 -- .../default-python/out.databricks.yml | 49 ++++++++ .../telemetry/default-python/out.requests.txt | 34 +++++ .../telemetry/default-python/output.txt | 56 ++++++++- .../workspace/apps/run-local-node/out.run.txt | 23 ++++ .../workspace/apps/run-local-node/output.txt | 5 +- .../cmd/workspace/apps/run-local/out.run.txt | 54 ++++++++ .../cmd/workspace/apps/run-local/output.txt | 25 +--- .../databricks_template_schema.json | 37 ++++-- .../default-python/template/__preamble.tmpl | 38 ++++-- .../{{.project_name}}/.vscode/settings.json | 16 ++- .../template/{{.project_name}}/README.md.tmpl | 15 ++- .../{{.project_name}}/conftest.py.tmpl | 83 ------------ .../{{.project_name}}/databricks.yml.tmpl | 40 ++++-- .../{{.project_name}}/explorations/README.md | 4 - .../explorations/exploration.ipynb.tmpl | 65 ---------- .../{{.project_name}}/fixtures/.gitkeep.tmpl | 24 +--- .../{{.project_name}}/pyproject.toml.tmpl | 7 +- .../resources/sample_job.job.yml.tmpl | 98 +++++++++++++++ ...roject_name_short}}_etl.pipeline.yml.tmpl} | 26 ++-- .../utilities/utils.py | 8 -- ...project_name_short}}_schedule.job.yml.tmpl | 20 --- .../sample_python_file.py.tmpl | 3 - .../{{.project_name_short}}_job.job.yml.tmpl | 69 ---------- .../template/{{.project_name}}/src/.gitkeep | 1 + .../sample_notebook.ipynb.tmpl | 10 +- .../src/sample_python_file.py.tmpl | 19 +++ .../{{.project_name}}/src/shared/__init__.py | 0 .../src/shared/taxis.py.tmpl | 7 ++ .../README.md.tmpl | 10 +- .../{{.project_name_short}}_etl/__init__.py | 0 .../explorations/__init__.py.tmpl | 0 .../sample_exploration.ipynb.tmpl | 2 +- .../transformations/__init__.py.tmpl | 0 .../sample_trips_{{short_date_time}}.py.tmpl | 2 +- .../sample_zones_{{short_date_time}}.py.tmpl | 0 .../utilities/__init__.py.tmpl | 0 .../utilities/utils.py.tmpl | 12 ++ .../{{.project_name}}/tests/conftest.py | 93 ++++++++++++++ .../{{.project_name}}/tests/main_test.py.tmpl | 8 -- .../tests/sample_taxis_test.py.tmpl | 8 ++ .../template/__preamble.tmpl | 2 +- 114 files changed, 1938 insertions(+), 972 deletions(-) rename acceptance/bundle/templates/{experimental-jobs-as-code/output/my_jobs_as_code/out.gitignore => default-python/classic/output/my_default_python/.gitignore} (100%) create mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/__builtins__.pyi create mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/extensions.json create mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/settings.json create mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/README.md create mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/databricks.yml create mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/fixtures/.gitkeep create mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/pyproject.toml create mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/.gitkeep create mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/default_python_etl.pipeline.yml create mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/sample_job.job.yml create mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/src/default_python_etl/README.md rename acceptance/bundle/templates/{experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code => default-python/classic/output/my_default_python/src/default_python_etl}/__init__.py (100%) rename {libs/template/templates/default-python/template/{{.project_name}}/lib/{{.project_name}} => acceptance/bundle/templates/default-python/classic/output/my_default_python/src/default_python_etl/explorations}/__init__.py (100%) create mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/src/default_python_etl/explorations/sample_exploration.ipynb rename {libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}} => acceptance/bundle/templates/default-python/classic/output/my_default_python/src/default_python_etl/transformations}/__init__.py (100%) create mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/src/default_python_etl/transformations/sample_trips_jan_01_1034.py create mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/src/default_python_etl/transformations/sample_zones_jan_01_1034.py create mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/src/default_python_etl/utilities/__init__.py create mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/src/default_python_etl/utilities/utils.py rename acceptance/bundle/templates/{experimental-jobs-as-code/output/my_jobs_as_code/src/notebook.ipynb => default-python/classic/output/my_default_python/src/sample_notebook.ipynb} (84%) create mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/src/sample_python_file.py create mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/src/shared/__init__.py rename libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl => acceptance/bundle/templates/default-python/classic/output/my_default_python/src/shared/taxis.py (66%) create mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/tests/conftest.py create mode 100644 acceptance/bundle/templates/default-python/classic/output/my_default_python/tests/sample_taxis_test.py create mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/__builtins__.pyi create mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/extensions.json create mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/settings.json create mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/README.md create mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/databricks.yml create mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/fixtures/.gitkeep create mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/out.gitignore create mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/pyproject.toml create mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/resources/.gitkeep create mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/resources/default_python_etl.pipeline.yml create mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/resources/sample_job.job.yml create mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/default_python_etl/README.md create mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/default_python_etl/__init__.py create mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/default_python_etl/explorations/__init__.py create mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/default_python_etl/explorations/sample_exploration.ipynb create mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/default_python_etl/transformations/__init__.py create mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/default_python_etl/transformations/sample_trips_jan_01_1034.py create mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/default_python_etl/transformations/sample_zones_jan_01_1034.py create mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/default_python_etl/utilities/__init__.py create mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/default_python_etl/utilities/utils.py rename libs/template/templates/default-python/template/{{.project_name}}/src/notebook.ipynb.tmpl => acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/sample_notebook.ipynb (75%) create mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/sample_python_file.py create mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/shared/__init__.py rename libs/template/templates/default-python/template/{{.project_name}}/lib/{{.project_name}}/main.py.tmpl => acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/shared/taxis.py (66%) create mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/tests/conftest.py create mode 100644 acceptance/bundle/templates/default-python/serverless/output/my_default_python/tests/sample_taxis_test.py delete mode 100644 acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/README.md delete mode 100644 acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/databricks.yml delete mode 100644 acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/fixtures/.gitkeep delete mode 100644 acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/pyproject.toml delete mode 100644 acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/__init__.py delete mode 100644 acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/my_jobs_as_code_job.py delete mode 100644 acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/my_jobs_as_code_pipeline.py delete mode 100644 acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/scratch/README.md delete mode 100644 acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/setup.py delete mode 100644 acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/dlt_pipeline.ipynb delete mode 100644 acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code/main.py delete mode 100644 acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/tests/main_test.py create mode 100644 acceptance/bundle/templates/telemetry/default-python/out.databricks.yml create mode 100644 acceptance/bundle/templates/telemetry/default-python/out.requests.txt create mode 100644 acceptance/cmd/workspace/apps/run-local-node/out.run.txt create mode 100644 acceptance/cmd/workspace/apps/run-local/out.run.txt delete mode 100644 libs/template/templates/default-python/template/{{.project_name}}/conftest.py.tmpl delete mode 100644 libs/template/templates/default-python/template/{{.project_name}}/explorations/README.md delete mode 100644 libs/template/templates/default-python/template/{{.project_name}}/explorations/exploration.ipynb.tmpl create mode 100644 libs/template/templates/default-python/template/{{.project_name}}/resources/sample_job.job.yml.tmpl rename libs/template/templates/default-python/template/{{.project_name}}/resources/{{{.project_name_short}}_etl_pipeline/{{.project_name_short}}_etl_pipeline.pipeline.yml.tmpl => {{.project_name_short}}_etl.pipeline.yml.tmpl} (51%) delete mode 100644 libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_etl_pipeline/utilities/utils.py delete mode 100644 libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_etl_pipeline/{{.project_name_short}}_schedule.job.yml.tmpl delete mode 100644 libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_job/sample_python_file.py.tmpl delete mode 100644 libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_job/{{.project_name_short}}_job.job.yml.tmpl create mode 100644 libs/template/templates/default-python/template/{{.project_name}}/src/.gitkeep rename libs/template/templates/default-python/template/{{.project_name}}/{resources/{{.project_name_short}}_job => src}/sample_notebook.ipynb.tmpl (91%) create mode 100644 libs/template/templates/default-python/template/{{.project_name}}/src/sample_python_file.py.tmpl create mode 100644 libs/template/templates/default-python/template/{{.project_name}}/src/shared/__init__.py create mode 100644 libs/template/templates/default-python/template/{{.project_name}}/src/shared/taxis.py.tmpl rename libs/template/templates/default-python/template/{{.project_name}}/{resources/{{.project_name_short}}_etl_pipeline => src/{{.project_name_short}}_etl}/README.md.tmpl (61%) create mode 100644 libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name_short}}_etl/__init__.py create mode 100644 libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name_short}}_etl/explorations/__init__.py.tmpl rename libs/template/templates/default-python/template/{{.project_name}}/{resources/{{.project_name_short}}_etl_pipeline => src/{{.project_name_short}}_etl}/explorations/sample_exploration.ipynb.tmpl (91%) create mode 100644 libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name_short}}_etl/transformations/__init__.py.tmpl rename libs/template/templates/default-python/template/{{.project_name}}/{resources/{{.project_name_short}}_etl_pipeline => src/{{.project_name_short}}_etl}/transformations/sample_trips_{{short_date_time}}.py.tmpl (87%) rename libs/template/templates/default-python/template/{{.project_name}}/{resources/{{.project_name_short}}_etl_pipeline => src/{{.project_name_short}}_etl}/transformations/sample_zones_{{short_date_time}}.py.tmpl (100%) create mode 100644 libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name_short}}_etl/utilities/__init__.py.tmpl create mode 100644 libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name_short}}_etl/utilities/utils.py.tmpl create mode 100644 libs/template/templates/default-python/template/{{.project_name}}/tests/conftest.py delete mode 100644 libs/template/templates/default-python/template/{{.project_name}}/tests/main_test.py.tmpl create mode 100644 libs/template/templates/default-python/template/{{.project_name}}/tests/sample_taxis_test.py.tmpl diff --git a/acceptance/bundle/templates/default-python/classic/output.txt b/acceptance/bundle/templates/default-python/classic/output.txt index bf6f75258a..15d2468038 100644 --- a/acceptance/bundle/templates/default-python/classic/output.txt +++ b/acceptance/bundle/templates/default-python/classic/output.txt @@ -1,7 +1,28 @@ >>> [CLI] bundle init default-python --config-file ./input.json --output-dir output - Welcome to the default Python template for Databricks Asset Bundles! -Error: template: :1:2: executing "" at : error calling index: index out of range: 1 + +Please answer the below to tailor your project to your preferences. +You can always change your mind and change your configuration in the databricks.yml file later. + +Note that [DATABRICKS_URL] is used for initialization +(see https://docs.databricks.com/dev-tools/cli/profiles.html for how to change your profile). + +✨ Your new project has been created in the 'my_default_python' directory! + +Please refer to the README.md file for "getting started" instructions. +See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html. + +>>> [CLI] bundle validate -t dev +Error: path [TEST_TMP_DIR]/output/dist/*.whl is not contained in sync root path + +Name: my_default_python +Target: dev +Workspace: + Host: [DATABRICKS_URL] + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/my_default_python/dev + +Found 1 error Exit code: 1 diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/out.gitignore b/acceptance/bundle/templates/default-python/classic/output/my_default_python/.gitignore similarity index 100% rename from acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/out.gitignore rename to acceptance/bundle/templates/default-python/classic/output/my_default_python/.gitignore diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/__builtins__.pyi b/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/__builtins__.pyi new file mode 100644 index 0000000000..0edd5181bc --- /dev/null +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/__builtins__.pyi @@ -0,0 +1,3 @@ +# Typings for Pylance in Visual Studio Code +# see https://github.com/microsoft/pyright/blob/main/docs/builtins.md +from databricks.sdk.runtime import * diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/extensions.json b/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/extensions.json new file mode 100644 index 0000000000..1f39c33087 --- /dev/null +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/extensions.json @@ -0,0 +1,6 @@ +{ + "recommendations": [ + "databricks.databricks", + "redhat.vscode-yaml" + ] +} diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/settings.json b/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/settings.json new file mode 100644 index 0000000000..d8468d7b60 --- /dev/null +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/settings.json @@ -0,0 +1,39 @@ +{ + "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])", + "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------", + "python.testing.pytestArgs": [ + "." + ], + "files.exclude": { + "**/*.egg-info": true, + "**/__pycache__": true, + ".pytest_cache": true, + "dist": true, + }, + "files.associations": { + "**/.gitkeep": "markdown" + } + + // Pylance settings (VS Code) + // Set typeCheckingMode to "basic" to enable type checking! + "python.analysis.typeCheckingMode": "off", + "python.analysis.extraPaths": ["src", "lib", "resources"], + "python.analysis.diagnosticMode": "workspace", + "python.analysis.stubPath": ".vscode", + + // Pyright settings (Cursor) + // Set typeCheckingMode to "basic" to enable type checking! + "cursorpyright.analysis.typeCheckingMode": "off", + "cursorpyright.analysis.extraPaths": ["src", "lib", "resources"], + "cursorpyright.analysis.diagnosticMode": "workspace", + "cursorpyright.analysis.stubPath": ".vscode", + + // General Python settings + "python.defaultInterpreterPath": "./.venv/bin/python", + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, + "[python]": { + "editor.defaultFormatter": "ms-python.python", + "editor.formatOnSave": true, + }, +} diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/README.md b/acceptance/bundle/templates/default-python/classic/output/my_default_python/README.md new file mode 100644 index 0000000000..b373a1708d --- /dev/null +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/README.md @@ -0,0 +1,73 @@ +# my_default_python + +The 'my_default_python' project was generated by using the default-python template. +For documentation on the Databricks Asset Bundles format use for this project, +and for CI/CD configuration, see https://docs.databricks.com/aws/en/dev-tools/bundles. + +* `src/`: Python source code for this project. +* `src/shared`: Shared source code across all jobs/pipelines/etc. +* `src/default_python_etl`: Python source code for the default_python_etl pipeline. +* `resources/`: Resource configurations (jobs, pipelines, etc.) +* `tests/`: Unit tests. +* `fixtures/`: Fixtures for data sets (primarily used for testing). + +## Getting started + +Choose how you want to work on this project: + +(a) Directly in your Databricks workspace, see + https://docs.databricks.com/dev-tools/bundles/workspace. + +(b) Locally with an IDE like Cursor or VS Code, see + https://docs.databricks.com/vscode-ext. + +(c) With command line tools, see https://docs.databricks.com/dev-tools/cli/databricks-cli.html + +Dependencies for this project should be installed using uv: + +* Make sure you have the UV package manager installed. + It's an alternative to tools like pip: https://docs.astral.sh/uv/getting-started/installation/. +* Run `uv sync --dev` to install the project's dependencies. + + +# Using this project using the CLI + +The Databricks workspace and IDE extensions provide a graphical interface for working +with this project. It's also possible to interact with it directly using the CLI: + +1. Authenticate to your Databricks workspace, if you have not done so already: + ``` + $ databricks configure + ``` + +2. To deploy a development copy of this project, type: + ``` + $ databricks bundle deploy --target dev + ``` + (Note that "dev" is the default target, so the `--target` parameter + is optional here.) + + This deploys everything that's defined for this project. + For example, the default template would deploy a job called + `[dev yourname] my_default_python_job` to your workspace. + You can find that job by opening your workpace and clicking on **Jobs & Pipelines**. + +3. Similarly, to deploy a production copy, type: + ``` + $ databricks bundle deploy --target prod + ``` + + Note that the default job from the template has a schedule that runs every day + (defined in resources/my_default_python.job.yml). The schedule + is paused when deploying in development mode (see + https://docs.databricks.com/dev-tools/bundles/deployment-modes.html). + +4. To run a job or pipeline, use the "run" command: + ``` + $ databricks bundle run + ``` +5. Finally, to run tests locally, use `pytest`: + ``` + $ uv run pytest + ``` + diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/databricks.yml b/acceptance/bundle/templates/default-python/classic/output/my_default_python/databricks.yml new file mode 100644 index 0000000000..cc6079c53c --- /dev/null +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/databricks.yml @@ -0,0 +1,49 @@ +# This is a Databricks asset bundle definition for my_default_python. +# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. +bundle: + name: my_default_python + uuid: [UUID] + +include: + - resources/*.yml + - resources/*/*.yml + +artifacts: + python_artifact: + type: whl + build: uv build --wheel + +# Variable declarations. These variables are assigned in the dev/prod targets below. +variables: + catalog: + description: The catalog to use + schema: + description: The schema to use + +targets: + dev: + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. + mode: development + default: true + workspace: + host: [DATABRICKS_URL] + variables: + catalog: hive_metastore + schema: ${workspace.current_user.short_name} + presets: + artifacts_dynamic_version: true + prod: + mode: production + workspace: + host: [DATABRICKS_URL] + # We explicitly deploy to /Workspace/Users/[USERNAME] to make sure we only have a single copy. + root_path: /Workspace/Users/[USERNAME]/.bundle/${bundle.name}/${bundle.target} + variables: + catalog: hive_metastore + schema: prod + permissions: + - user_name: [USERNAME] + level: CAN_MANAGE diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/fixtures/.gitkeep b/acceptance/bundle/templates/default-python/classic/output/my_default_python/fixtures/.gitkeep new file mode 100644 index 0000000000..77a906614c --- /dev/null +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/fixtures/.gitkeep @@ -0,0 +1,9 @@ +# Test fixtures directory + +Add JSON or CSV files here. In tests, use them with `load_fixture()`: + +``` +def test_using_fixture(load_fixture): + data = load_fixture("my_data.json") + assert len(data) >= 1 +``` diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/pyproject.toml b/acceptance/bundle/templates/default-python/classic/output/my_default_python/pyproject.toml new file mode 100644 index 0000000000..7cd4a5cc86 --- /dev/null +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/pyproject.toml @@ -0,0 +1,35 @@ +[project] +name = "my_default_python" +version = "0.0.1" +authors = [{ name = "[USERNAME]" }] +requires-python = ">= 3.11" + +[dependency-groups] +dev = [ + "pytest", + "databricks-dlt", + + # databricks-connect can be used to run parts of this project locally. + # Note that for local development, you should use a version that is not newer + # than the remote cluster or serverless compute you connect to. + # See also https://docs.databricks.com/dev-tools/databricks-connect.html. + "databricks-connect>=15.4,<15.5", +] + +[tool.pytest.ini_options] +pythonpath = "src" +testpaths = [ + "tests", + "resources", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["src"] +sources = ["src"] + +[project.scripts] +main = "sample_python_file:main" diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/.gitkeep b/acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/.gitkeep new file mode 100644 index 0000000000..3e09c14c18 --- /dev/null +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/.gitkeep @@ -0,0 +1 @@ +This folder is reserved for Databricks Asset Bundles resource definitions. diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/default_python_etl.pipeline.yml b/acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/default_python_etl.pipeline.yml new file mode 100644 index 0000000000..e500171279 --- /dev/null +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/default_python_etl.pipeline.yml @@ -0,0 +1,14 @@ + +# The main pipeline for my_default_python +resources: + pipelines: + default_python_etl: + name: default_python_etl + ## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog: + # catalog: catalog_name + schema: my_default_python_${bundle.target} + root_path: ../src + + libraries: + - glob: + include: ../src/default_python_etl/transformations/** diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/sample_job.job.yml b/acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/sample_job.job.yml new file mode 100644 index 0000000000..8e2ddbb626 --- /dev/null +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/sample_job.job.yml @@ -0,0 +1,56 @@ +# A sample job for my_default_python. + +resources: + jobs: + sample_job: + name: sample_job + + trigger: + # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger + periodic: + interval: 1 + unit: DAYS + + #email_notifications: + # on_failure: + # - your_email@example.com + + parameters: + - name: catalog + default: ${var.catalog} + - name: schema + default: ${var.schema} + + tasks: + - task_key: notebook_task + notebook_task: + notebook_path: ../src/sample_notebook.ipynb + job_cluster_key: job_cluster + libraries: + # By default we just include the .whl file generated for the default_python package in src/. + # See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + # for more information on how to add other libraries. + - whl: ../../dist/*.whl + - task_key: python_file_task + depends_on: + - task_key: notebook_task + spark_python_task: + python_file: ../src/sample_python_file.py + job_cluster_key: job_cluster + libraries: + - whl: ../../dist/*.whl + - task_key: refresh_pipeline + depends_on: + - task_key: notebook_task + pipeline_task: + pipeline_id: ${resources.pipelines.default_python_etl.id} + + job_clusters: + - job_cluster_key: job_cluster + new_cluster: + spark_version: 16.4.x-scala2.12 + node_type_id: [NODE_TYPE_ID] + data_security_mode: SINGLE_USER + autoscale: + min_workers: 1 + max_workers: 4 diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/default_python_etl/README.md b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/default_python_etl/README.md new file mode 100644 index 0000000000..737b73cf43 --- /dev/null +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/default_python_etl/README.md @@ -0,0 +1,22 @@ +# my_default_python + +This folder defines all source code for the my_default_python pipeline: + +- `explorations/`: Ad-hoc notebooks used to explore the data processed by this pipeline. +- `transformations/`: All dataset definitions and transformations. +- `utilities/` (optional): Utility functions and Python modules used in this pipeline. +- `data_sources/` (optional): View definitions describing the source data for this pipeline. + +## Getting Started + +To get started, go to the `transformations` folder -- most of the relevant source code lives there: + +* By convention, every dataset under `transformations` is in a separate file. +* Take a look at the sample under "sample_trips_jan_01_1034.py" to get familiar with the syntax. + Read more about the syntax at https://docs.databricks.com/dlt/python-ref.html. +* Use `Run file` to run and preview a single transformation. +* Use `Run pipeline` to run _all_ transformations in the entire pipeline. +* Use `+ Add` in the file browser to add a new data set definition. +* Use `Schedule` to run the pipeline on a schedule! + +For more tutorials and reference material, see https://docs.databricks.com/dlt. diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code/__init__.py b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/default_python_etl/__init__.py similarity index 100% rename from acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code/__init__.py rename to acceptance/bundle/templates/default-python/classic/output/my_default_python/src/default_python_etl/__init__.py diff --git a/libs/template/templates/default-python/template/{{.project_name}}/lib/{{.project_name}}/__init__.py b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/default_python_etl/explorations/__init__.py similarity index 100% rename from libs/template/templates/default-python/template/{{.project_name}}/lib/{{.project_name}}/__init__.py rename to acceptance/bundle/templates/default-python/classic/output/my_default_python/src/default_python_etl/explorations/__init__.py diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/default_python_etl/explorations/sample_exploration.ipynb b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/default_python_etl/explorations/sample_exploration.ipynb new file mode 100644 index 0000000000..7edb8fe518 --- /dev/null +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/default_python_etl/explorations/sample_exploration.ipynb @@ -0,0 +1,63 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "[UUID]", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "source": [ + "### Example Exploratory Notebook\n", + "\n", + "Use this notebook to explore the data generated by the pipeline in your preferred programming language.\n", + "\n", + "**Note**: This notebook is not executed as part of the pipeline." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "[UUID]", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "# !!! Before performing any data analysis, make sure to run the pipeline to materialize the sample datasets. The tables referenced in this notebook depend on that step.\n", + "\n", + "display(spark.sql(\"SELECT * FROM hive_metastore.[USERNAME].sample_trips_jan_01_1034\"))" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "computePreferences": null, + "dashboards": [], + "environmentMetadata": null, + "inputWidgetPreferences": null, + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "sample_exploration", + "widgets": {} + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/__init__.py b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/default_python_etl/transformations/__init__.py similarity index 100% rename from libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/__init__.py rename to acceptance/bundle/templates/default-python/classic/output/my_default_python/src/default_python_etl/transformations/__init__.py diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/default_python_etl/transformations/sample_trips_jan_01_1034.py b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/default_python_etl/transformations/sample_trips_jan_01_1034.py new file mode 100644 index 0000000000..706b0b8952 --- /dev/null +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/default_python_etl/transformations/sample_trips_jan_01_1034.py @@ -0,0 +1,15 @@ +import dlt +from pyspark.sql.functions import col +from default_python_etl.utilities import utils + + +# This file defines a sample transformation. +# Edit the sample below or add new transformations +# using "+ Add" in the file browser. + + +@dlt.table +def sample_trips_jan_01_1034(): + return spark.read.table("samples.nyctaxi.trips").withColumn( + "trip_distance_km", utils.distance_km(col("trip_distance")) + ) diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/default_python_etl/transformations/sample_zones_jan_01_1034.py b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/default_python_etl/transformations/sample_zones_jan_01_1034.py new file mode 100644 index 0000000000..82209f7ce4 --- /dev/null +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/default_python_etl/transformations/sample_zones_jan_01_1034.py @@ -0,0 +1,17 @@ +import dlt +from pyspark.sql.functions import col, sum + + +# This file defines a sample transformation. +# Edit the sample below or add new transformations +# using "+ Add" in the file browser. + + +@dlt.table +def sample_zones_jan_01_1034(): + # Read from the "sample_trips" table, then sum all the fares + return ( + spark.read.table(f"sample_trips_jan_01_1034") + .groupBy(col("pickup_zip")) + .agg(sum("fare_amount").alias("total_fare")) + ) diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/default_python_etl/utilities/__init__.py b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/default_python_etl/utilities/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/default_python_etl/utilities/utils.py b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/default_python_etl/utilities/utils.py new file mode 100644 index 0000000000..f0f4e940f7 --- /dev/null +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/default_python_etl/utilities/utils.py @@ -0,0 +1,12 @@ +from pyspark.sql.functions import col, when + + +def distance_km(distance_col): + """Convert distance from miles to kilometers.""" + return distance_col * 1.60934 + + +def format_currency(amount_col): + """Format amount as currency.""" + return when(col(amount_col).isNotNull(), + col(amount_col).cast("decimal(10,2)")) diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/notebook.ipynb b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/sample_notebook.ipynb similarity index 84% rename from acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/notebook.ipynb rename to acceptance/bundle/templates/default-python/classic/output/my_default_python/src/sample_notebook.ipynb index 227c7cc558..aa609df200 100644 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/notebook.ipynb +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/sample_notebook.ipynb @@ -14,7 +14,7 @@ "source": [ "# Default notebook\n", "\n", - "This default notebook is executed using Databricks Workflows as defined in resources/my_jobs_as_code.job.yml." + "This default notebook is executed using Databricks Workflows as defined in resources/my_default_python.job.yml." ] }, { @@ -23,6 +23,7 @@ "metadata": {}, "outputs": [], "source": [ + "# Reload wheel file dependencies every time they are updated\n", "%load_ext autoreload\n", "%autoreload 2" ] @@ -44,9 +45,9 @@ }, "outputs": [], "source": [ - "from my_jobs_as_code import main\n", + "from shared import taxis\n", "\n", - "main.get_taxis(spark).show(10)" + "taxis.find_all_taxis().show(10)" ] } ], @@ -57,7 +58,7 @@ "notebookMetadata": { "pythonIndentUnit": 2 }, - "notebookName": "notebook", + "notebookName": "sample_notebook", "widgets": {} }, "kernelspec": { diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/sample_python_file.py b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/sample_python_file.py new file mode 100644 index 0000000000..719a0b71a3 --- /dev/null +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/sample_python_file.py @@ -0,0 +1,19 @@ +import argparse +from datetime import datetime +from shared import taxis + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--catalog", default="hive_metastore") + parser.add_argument("--schema", default="default") + args = parser.parse_args() + + df = taxis.find_all_taxis() + + table_name = f"{args.catalog}.{args.schema}.taxis_jan_01_1034" + df.write.mode("overwrite").saveAsTable(table_name) + + print(f"Wrote {df.count()} taxi records to {table_name}") + +if __name__ == "__main__": + main() diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/shared/__init__.py b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/shared/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/shared/taxis.py similarity index 66% rename from libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl rename to acceptance/bundle/templates/default-python/classic/output/my_default_python/src/shared/taxis.py index 04e8be4de0..a7309cd4c5 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/shared/taxis.py @@ -3,12 +3,5 @@ def find_all_taxis() -> DataFrame: + """Find all taxi data.""" return spark.read.table("samples.nyctaxi.trips") - - -def main(): - find_all_taxis().show(5) - - -if __name__ == "__main__": - main() diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/tests/conftest.py b/acceptance/bundle/templates/default-python/classic/output/my_default_python/tests/conftest.py new file mode 100644 index 0000000000..8037a4647c --- /dev/null +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/tests/conftest.py @@ -0,0 +1,93 @@ +"""This file configures pytest. + +This file is in the root since it can be used for tests in any place in this +project, including tests under resources/. +""" + +import os, sys, pathlib +from contextlib import contextmanager + + +try: + from databricks.connect import DatabricksSession + from databricks.sdk import WorkspaceClient + from pyspark.sql import SparkSession + import pytest + import json + import csv + import os +except ImportError: + raise ImportError("Test dependencies not found.\n\nRun tests using 'uv run pytest'. See http://docs.astral.sh/uv to learn more about uv.") + + +@pytest.fixture() +def spark() -> SparkSession: + """Provide a SparkSession fixture for tests. + + Minimal example: + def test_uses_spark(spark): + df = spark.createDataFrame([(1,)], ["x"]) + assert df.count() == 1 + """ + return DatabricksSession.builder.getOrCreate() + +@pytest.fixture() +def load_fixture(spark: SparkSession): + """Provide a callable to load JSON or CSV from fixtures/ directory. + + Example usage: + + def test_using_fixture(load_fixture): + data = load_fixture("my_data.json") + assert data.count() >= 1 + """ + def _loader(filename: str): + path = pathlib.Path(__file__).parent.parent / "fixtures" / filename + suffix = path.suffix.lower() + if suffix == ".json": + rows = json.loads(path.read_text()) + return spark.createDataFrame(rows) + if suffix == ".csv": + with path.open(newline="") as f: + rows = list(csv.DictReader(f)) + return spark.createDataFrame(rows) + raise ValueError(f"Unsupported fixture type for: {filename}") + return _loader + + +def _enable_fallback_compute(): + """Enable serverless compute if no compute is specified.""" + conf = WorkspaceClient().config + if conf.serverless_compute_id or conf.cluster_id or os.environ.get("SPARK_REMOTE"): + return + + url = "https://docs.databricks.com/dev-tools/databricks-connect/cluster-config" + print("☁️ no compute specified, falling back to serverless compute", file=sys.stderr) + print(f" see {url} for manual configuration", file=sys.stdout) + + os.environ["DATABRICKS_SERVERLESS_COMPUTE_ID"] = "auto" + + +@contextmanager +def _allow_stderr_output(config: pytest.Config): + """Temporarily disable pytest output capture.""" + capman = config.pluginmanager.get_plugin("capturemanager") + if capman: + with capman.global_and_fixture_disabled(): + yield + else: + yield + + +def pytest_configure(config: pytest.Config): + """Configure pytest session.""" + with _allow_stderr_output(config): + _enable_fallback_compute() + + # Initialize Spark session eagerly, so it is available even when + # SparkSession.builder.getOrCreate() is used. For DB Connect 15+, + # we validate version compatibility with the remote cluster. + if hasattr(DatabricksSession.builder, "validateSession"): + DatabricksSession.builder.validateSession().getOrCreate() + else: + DatabricksSession.builder.getOrCreate() diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/tests/sample_taxis_test.py b/acceptance/bundle/templates/default-python/classic/output/my_default_python/tests/sample_taxis_test.py new file mode 100644 index 0000000000..a782015363 --- /dev/null +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/tests/sample_taxis_test.py @@ -0,0 +1,8 @@ +from databricks.sdk.runtime import spark +from pyspark.sql import DataFrame +from shared import taxis + + +def test_find_all_taxis(): + results = taxis.find_all_taxis() + assert results.count() > 5 diff --git a/acceptance/bundle/templates/default-python/combinations/classic/output.txt b/acceptance/bundle/templates/default-python/combinations/classic/output.txt index 61ab7666f0..f192544aa5 100644 --- a/acceptance/bundle/templates/default-python/combinations/classic/output.txt +++ b/acceptance/bundle/templates/default-python/combinations/classic/output.txt @@ -1,7 +1,50 @@ >>> [CLI] bundle init default-python --config-file ./input.json - Welcome to the default Python template for Databricks Asset Bundles! -Error: template: :1:2: executing "" at : error calling index: index out of range: 1 -Exit code: 1 +Please answer the below to tailor your project to your preferences. +You can always change your mind and change your configuration in the databricks.yml file later. + +Note that [DATABRICKS_URL] is used for initialization +(see https://docs.databricks.com/dev-tools/cli/profiles.html for how to change your profile). + +✨ Your new project has been created in the 'X[UNIQUE_NAME]' directory! + +Please refer to the README.md file for "getting started" instructions. +See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html. + +>>> yamlcheck.py + +>>> [CLI] bundle validate -t dev +Name: X[UNIQUE_NAME] +Target: dev +Workspace: + Host: [DATABRICKS_URL] + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/X[UNIQUE_NAME]/dev + +Validation OK! + +>>> [CLI] bundle validate -t prod +Name: X[UNIQUE_NAME] +Target: prod +Workspace: + Host: [DATABRICKS_URL] + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/X[UNIQUE_NAME]/prod + +Validation OK! + +>>> [TESTROOT]/bundle/templates/default-python/combinations/classic/../check_output.py [CLI] bundle deploy -t dev +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/X[UNIQUE_NAME]/dev/files... +Deploying resources... +Deployment complete! +Deleting files... +Destroy complete! + +>>> [TESTROOT]/bundle/templates/default-python/combinations/classic/../check_output.py [CLI] bundle deploy -t prod +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/X[UNIQUE_NAME]/prod/files... +Deploying resources... +Deployment complete! +Deleting files... +Destroy complete! diff --git a/acceptance/bundle/templates/default-python/combinations/serverless/output.txt b/acceptance/bundle/templates/default-python/combinations/serverless/output.txt index 61ab7666f0..ee551a5b16 100644 --- a/acceptance/bundle/templates/default-python/combinations/serverless/output.txt +++ b/acceptance/bundle/templates/default-python/combinations/serverless/output.txt @@ -1,7 +1,50 @@ >>> [CLI] bundle init default-python --config-file ./input.json - Welcome to the default Python template for Databricks Asset Bundles! -Error: template: :1:2: executing "" at : error calling index: index out of range: 1 -Exit code: 1 +Please answer the below to tailor your project to your preferences. +You can always change your mind and change your configuration in the databricks.yml file later. + +Note that [DATABRICKS_URL] is used for initialization +(see https://docs.databricks.com/dev-tools/cli/profiles.html for how to change your profile). + +✨ Your new project has been created in the 'X[UNIQUE_NAME]' directory! + +Please refer to the README.md file for "getting started" instructions. +See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html. + +>>> yamlcheck.py + +>>> [CLI] bundle validate -t dev +Name: X[UNIQUE_NAME] +Target: dev +Workspace: + Host: [DATABRICKS_URL] + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/X[UNIQUE_NAME]/dev + +Validation OK! + +>>> [CLI] bundle validate -t prod +Name: X[UNIQUE_NAME] +Target: prod +Workspace: + Host: [DATABRICKS_URL] + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/X[UNIQUE_NAME]/prod + +Validation OK! + +>>> [TESTROOT]/bundle/templates/default-python/combinations/serverless/../check_output.py [CLI] bundle deploy -t dev +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/X[UNIQUE_NAME]/dev/files... +Deploying resources... +Deployment complete! +Deleting files... +Destroy complete! + +>>> [TESTROOT]/bundle/templates/default-python/combinations/serverless/../check_output.py [CLI] bundle deploy -t prod +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/X[UNIQUE_NAME]/prod/files... +Deploying resources... +Deployment complete! +Deleting files... +Destroy complete! diff --git a/acceptance/bundle/templates/default-python/fail-missing-uv/output.txt b/acceptance/bundle/templates/default-python/fail-missing-uv/output.txt index bf6f75258a..802fea4829 100644 --- a/acceptance/bundle/templates/default-python/fail-missing-uv/output.txt +++ b/acceptance/bundle/templates/default-python/fail-missing-uv/output.txt @@ -1,7 +1,28 @@ >>> [CLI] bundle init default-python --config-file ./input.json --output-dir output - Welcome to the default Python template for Databricks Asset Bundles! -Error: template: :1:2: executing "" at : error calling index: index out of range: 1 + +Please answer the below to tailor your project to your preferences. +You can always change your mind and change your configuration in the databricks.yml file later. + +Note that [DATABRICKS_URL] is used for initialization +(see https://docs.databricks.com/dev-tools/cli/profiles.html for how to change your profile). + +✨ Your new project has been created in the 'fail_missing_uv' directory! + +Please refer to the README.md file for "getting started" instructions. +See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html. + +>>> [CLI] bundle validate +Error: path [TEST_TMP_DIR]/output/dist/*.whl is not contained in sync root path + +Name: fail_missing_uv +Target: dev +Workspace: + Host: [DATABRICKS_URL] + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/fail_missing_uv/dev + +Found 1 error Exit code: 1 diff --git a/acceptance/bundle/templates/default-python/integration_classic/output.txt b/acceptance/bundle/templates/default-python/integration_classic/output.txt index 85875c4409..a0fdf54723 100644 --- a/acceptance/bundle/templates/default-python/integration_classic/output.txt +++ b/acceptance/bundle/templates/default-python/integration_classic/output.txt @@ -3,8 +3,33 @@ [UV_PYTHON] >>> [CLI] bundle init default-python --config-file ./input.json --output-dir . - Welcome to the default Python template for Databricks Asset Bundles! -Error: template: :1:2: executing "" at : error calling index: index out of range: 1 + +Please answer the below to tailor your project to your preferences. +You can always change your mind and change your configuration in the databricks.yml file later. + +Note that [DATABRICKS_URL] is used for initialization +(see https://docs.databricks.com/dev-tools/cli/profiles.html for how to change your profile). + +✨ Your new project has been created in the 'project_name_[UNIQUE_NAME]' directory! + +Please refer to the README.md file for "getting started" instructions. +See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html. + +>>> [CLI] bundle validate -t dev +Error: path [TEST_TMP_DIR]/dist/*.whl is not contained in sync root path + +Name: project_name_[UNIQUE_NAME] +Target: dev +Workspace: + Host: [DATABRICKS_URL] + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/project_name_[UNIQUE_NAME]/dev + +Found 1 error + +>>> [CLI] bundle destroy -t dev --auto-approve +Error: path [TEST_TMP_DIR]/dist/*.whl is not contained in sync root path + Exit code: 1 diff --git a/acceptance/bundle/templates/default-python/no-uc/output.txt b/acceptance/bundle/templates/default-python/no-uc/output.txt index bf6f75258a..3fc221b818 100644 --- a/acceptance/bundle/templates/default-python/no-uc/output.txt +++ b/acceptance/bundle/templates/default-python/no-uc/output.txt @@ -1,7 +1,12 @@ >>> [CLI] bundle init default-python --config-file ./input.json --output-dir output - Welcome to the default Python template for Databricks Asset Bundles! -Error: template: :1:2: executing "" at : error calling index: index out of range: 1 + +Please answer the below to tailor your project to your preferences. +You can always change your mind and change your configuration in the databricks.yml file later. + +Note that [DATABRICKS_URL] is used for initialization +(see https://docs.databricks.com/dev-tools/cli/profiles.html for how to change your profile). +Error: template: :1:2: executing "" at : error calling default_catalog: Unity Catalog is not available for feature tier STANDARD_TIER. Exit code: 1 diff --git a/acceptance/bundle/templates/default-python/serverless-customcatalog/output.txt b/acceptance/bundle/templates/default-python/serverless-customcatalog/output.txt index 239b9c2e23..9f9a0c02a4 100644 --- a/acceptance/bundle/templates/default-python/serverless-customcatalog/output.txt +++ b/acceptance/bundle/templates/default-python/serverless-customcatalog/output.txt @@ -1,7 +1,60 @@ >>> [CLI] bundle init default-python --config-file [TESTROOT]/bundle/templates/default-python/serverless-customcatalog/../serverless/input.json --output-dir output - Welcome to the default Python template for Databricks Asset Bundles! -Error: template: :1:2: executing "" at : error calling index: index out of range: 1 -Exit code: 1 +Please answer the below to tailor your project to your preferences. +You can always change your mind and change your configuration in the databricks.yml file later. + +Note that [DATABRICKS_URL] is used for initialization +(see https://docs.databricks.com/dev-tools/cli/profiles.html for how to change your profile). + +✨ Your new project has been created in the 'my_default_python' directory! + +Please refer to the README.md file for "getting started" instructions. +See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html. + +>>> diff.py [TESTROOT]/bundle/templates/default-python/serverless-customcatalog/../serverless/output output/ +--- [TESTROOT]/bundle/templates/default-python/serverless-customcatalog/../serverless/output/my_default_python/databricks.yml ++++ output/my_default_python/databricks.yml +@@ -32,5 +32,5 @@ + host: [DATABRICKS_URL] + variables: +- catalog: hive_metastore ++ catalog: customcatalog + schema: ${workspace.current_user.short_name} + prod: +@@ -41,5 +41,5 @@ + root_path: /Workspace/Users/[USERNAME]/.bundle/${bundle.name}/${bundle.target} + variables: +- catalog: hive_metastore ++ catalog: customcatalog + schema: prod + permissions: +--- [TESTROOT]/bundle/templates/default-python/serverless-customcatalog/../serverless/output/my_default_python/resources/default_python_etl.pipeline.yml ++++ output/my_default_python/resources/default_python_etl.pipeline.yml +@@ -5,6 +5,5 @@ + default_python_etl: + name: default_python_etl +- ## Catalog is required for serverless compute +- catalog: main ++ catalog: customcatalog + schema: my_default_python_${bundle.target} + serverless: true +--- [TESTROOT]/bundle/templates/default-python/serverless-customcatalog/../serverless/output/my_default_python/src/default_python_etl/explorations/sample_exploration.ipynb ++++ output/my_default_python/src/default_python_etl/explorations/sample_exploration.ipynb +@@ -38,5 +38,5 @@ + "# !!! Before performing any data analysis, make sure to run the pipeline to materialize the sample datasets. The tables referenced in this notebook depend on that step./n", + "/n", +- "display(spark.sql(/"SELECT * FROM hive_metastore.[USERNAME].sample_trips_jan_01_1034/"))" ++ "display(spark.sql(/"SELECT * FROM customcatalog.[USERNAME].sample_trips_jan_01_1034/"))" + ] + } +--- [TESTROOT]/bundle/templates/default-python/serverless-customcatalog/../serverless/output/my_default_python/src/sample_python_file.py ++++ output/my_default_python/src/sample_python_file.py +@@ -5,5 +5,5 @@ + def main(): + parser = argparse.ArgumentParser() +- parser.add_argument("--catalog", default="hive_metastore") ++ parser.add_argument("--catalog", default="customcatalog") + parser.add_argument("--schema", default="default") + args = parser.parse_args() diff --git a/acceptance/bundle/templates/default-python/serverless/output.txt b/acceptance/bundle/templates/default-python/serverless/output.txt index bf6f75258a..980d6786c6 100644 --- a/acceptance/bundle/templates/default-python/serverless/output.txt +++ b/acceptance/bundle/templates/default-python/serverless/output.txt @@ -1,7 +1,34 @@ >>> [CLI] bundle init default-python --config-file ./input.json --output-dir output - Welcome to the default Python template for Databricks Asset Bundles! -Error: template: :1:2: executing "" at : error calling index: index out of range: 1 -Exit code: 1 +Please answer the below to tailor your project to your preferences. +You can always change your mind and change your configuration in the databricks.yml file later. + +Note that [DATABRICKS_URL] is used for initialization +(see https://docs.databricks.com/dev-tools/cli/profiles.html for how to change your profile). + +✨ Your new project has been created in the 'my_default_python' directory! + +Please refer to the README.md file for "getting started" instructions. +See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html. + +>>> [CLI] bundle validate -t dev +Name: my_default_python +Target: dev +Workspace: + Host: [DATABRICKS_URL] + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/my_default_python/dev + +Validation OK! + +>>> [CLI] bundle validate -t prod +Name: my_default_python +Target: prod +Workspace: + Host: [DATABRICKS_URL] + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/my_default_python/prod + +Validation OK! diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/__builtins__.pyi b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/__builtins__.pyi new file mode 100644 index 0000000000..0edd5181bc --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/__builtins__.pyi @@ -0,0 +1,3 @@ +# Typings for Pylance in Visual Studio Code +# see https://github.com/microsoft/pyright/blob/main/docs/builtins.md +from databricks.sdk.runtime import * diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/extensions.json b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/extensions.json new file mode 100644 index 0000000000..1f39c33087 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/extensions.json @@ -0,0 +1,6 @@ +{ + "recommendations": [ + "databricks.databricks", + "redhat.vscode-yaml" + ] +} diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/settings.json b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/settings.json new file mode 100644 index 0000000000..d8468d7b60 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/settings.json @@ -0,0 +1,39 @@ +{ + "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])", + "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------", + "python.testing.pytestArgs": [ + "." + ], + "files.exclude": { + "**/*.egg-info": true, + "**/__pycache__": true, + ".pytest_cache": true, + "dist": true, + }, + "files.associations": { + "**/.gitkeep": "markdown" + } + + // Pylance settings (VS Code) + // Set typeCheckingMode to "basic" to enable type checking! + "python.analysis.typeCheckingMode": "off", + "python.analysis.extraPaths": ["src", "lib", "resources"], + "python.analysis.diagnosticMode": "workspace", + "python.analysis.stubPath": ".vscode", + + // Pyright settings (Cursor) + // Set typeCheckingMode to "basic" to enable type checking! + "cursorpyright.analysis.typeCheckingMode": "off", + "cursorpyright.analysis.extraPaths": ["src", "lib", "resources"], + "cursorpyright.analysis.diagnosticMode": "workspace", + "cursorpyright.analysis.stubPath": ".vscode", + + // General Python settings + "python.defaultInterpreterPath": "./.venv/bin/python", + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, + "[python]": { + "editor.defaultFormatter": "ms-python.python", + "editor.formatOnSave": true, + }, +} diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/README.md b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/README.md new file mode 100644 index 0000000000..b373a1708d --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/README.md @@ -0,0 +1,73 @@ +# my_default_python + +The 'my_default_python' project was generated by using the default-python template. +For documentation on the Databricks Asset Bundles format use for this project, +and for CI/CD configuration, see https://docs.databricks.com/aws/en/dev-tools/bundles. + +* `src/`: Python source code for this project. +* `src/shared`: Shared source code across all jobs/pipelines/etc. +* `src/default_python_etl`: Python source code for the default_python_etl pipeline. +* `resources/`: Resource configurations (jobs, pipelines, etc.) +* `tests/`: Unit tests. +* `fixtures/`: Fixtures for data sets (primarily used for testing). + +## Getting started + +Choose how you want to work on this project: + +(a) Directly in your Databricks workspace, see + https://docs.databricks.com/dev-tools/bundles/workspace. + +(b) Locally with an IDE like Cursor or VS Code, see + https://docs.databricks.com/vscode-ext. + +(c) With command line tools, see https://docs.databricks.com/dev-tools/cli/databricks-cli.html + +Dependencies for this project should be installed using uv: + +* Make sure you have the UV package manager installed. + It's an alternative to tools like pip: https://docs.astral.sh/uv/getting-started/installation/. +* Run `uv sync --dev` to install the project's dependencies. + + +# Using this project using the CLI + +The Databricks workspace and IDE extensions provide a graphical interface for working +with this project. It's also possible to interact with it directly using the CLI: + +1. Authenticate to your Databricks workspace, if you have not done so already: + ``` + $ databricks configure + ``` + +2. To deploy a development copy of this project, type: + ``` + $ databricks bundle deploy --target dev + ``` + (Note that "dev" is the default target, so the `--target` parameter + is optional here.) + + This deploys everything that's defined for this project. + For example, the default template would deploy a job called + `[dev yourname] my_default_python_job` to your workspace. + You can find that job by opening your workpace and clicking on **Jobs & Pipelines**. + +3. Similarly, to deploy a production copy, type: + ``` + $ databricks bundle deploy --target prod + ``` + + Note that the default job from the template has a schedule that runs every day + (defined in resources/my_default_python.job.yml). The schedule + is paused when deploying in development mode (see + https://docs.databricks.com/dev-tools/bundles/deployment-modes.html). + +4. To run a job or pipeline, use the "run" command: + ``` + $ databricks bundle run + ``` +5. Finally, to run tests locally, use `pytest`: + ``` + $ uv run pytest + ``` + diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/databricks.yml b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/databricks.yml new file mode 100644 index 0000000000..23a8437a22 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/databricks.yml @@ -0,0 +1,47 @@ +# This is a Databricks asset bundle definition for my_default_python. +# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. +bundle: + name: my_default_python + uuid: [UUID] + +include: + - resources/*.yml + - resources/*/*.yml + +artifacts: + python_artifact: + type: whl + build: uv build --wheel + +# Variable declarations. These variables are assigned in the dev/prod targets below. +variables: + catalog: + description: The catalog to use + schema: + description: The schema to use + +targets: + dev: + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. + mode: development + default: true + workspace: + host: [DATABRICKS_URL] + variables: + catalog: hive_metastore + schema: ${workspace.current_user.short_name} + prod: + mode: production + workspace: + host: [DATABRICKS_URL] + # We explicitly deploy to /Workspace/Users/[USERNAME] to make sure we only have a single copy. + root_path: /Workspace/Users/[USERNAME]/.bundle/${bundle.name}/${bundle.target} + variables: + catalog: hive_metastore + schema: prod + permissions: + - user_name: [USERNAME] + level: CAN_MANAGE diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/fixtures/.gitkeep b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/fixtures/.gitkeep new file mode 100644 index 0000000000..77a906614c --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/fixtures/.gitkeep @@ -0,0 +1,9 @@ +# Test fixtures directory + +Add JSON or CSV files here. In tests, use them with `load_fixture()`: + +``` +def test_using_fixture(load_fixture): + data = load_fixture("my_data.json") + assert len(data) >= 1 +``` diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/out.gitignore b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/out.gitignore new file mode 100644 index 0000000000..0dab7f4995 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/out.gitignore @@ -0,0 +1,8 @@ +.databricks/ +build/ +dist/ +__pycache__/ +*.egg-info +.venv/ +scratch/** +!scratch/README.md diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/pyproject.toml b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/pyproject.toml new file mode 100644 index 0000000000..7cd4a5cc86 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/pyproject.toml @@ -0,0 +1,35 @@ +[project] +name = "my_default_python" +version = "0.0.1" +authors = [{ name = "[USERNAME]" }] +requires-python = ">= 3.11" + +[dependency-groups] +dev = [ + "pytest", + "databricks-dlt", + + # databricks-connect can be used to run parts of this project locally. + # Note that for local development, you should use a version that is not newer + # than the remote cluster or serverless compute you connect to. + # See also https://docs.databricks.com/dev-tools/databricks-connect.html. + "databricks-connect>=15.4,<15.5", +] + +[tool.pytest.ini_options] +pythonpath = "src" +testpaths = [ + "tests", + "resources", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["src"] +sources = ["src"] + +[project.scripts] +main = "sample_python_file:main" diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/resources/.gitkeep b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/resources/.gitkeep new file mode 100644 index 0000000000..3e09c14c18 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/resources/.gitkeep @@ -0,0 +1 @@ +This folder is reserved for Databricks Asset Bundles resource definitions. diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/resources/default_python_etl.pipeline.yml b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/resources/default_python_etl.pipeline.yml new file mode 100644 index 0000000000..185f474478 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/resources/default_python_etl.pipeline.yml @@ -0,0 +1,15 @@ + +# The main pipeline for my_default_python +resources: + pipelines: + default_python_etl: + name: default_python_etl + ## Catalog is required for serverless compute + catalog: main + schema: my_default_python_${bundle.target} + serverless: true + root_path: ../src + + libraries: + - glob: + include: ../src/default_python_etl/transformations/** diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/resources/sample_job.job.yml b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/resources/sample_job.job.yml new file mode 100644 index 0000000000..aedcdb71df --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/resources/sample_job.job.yml @@ -0,0 +1,49 @@ +# A sample job for my_default_python. + +resources: + jobs: + sample_job: + name: sample_job + + trigger: + # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger + periodic: + interval: 1 + unit: DAYS + + #email_notifications: + # on_failure: + # - your_email@example.com + + parameters: + - name: catalog + default: ${var.catalog} + - name: schema + default: ${var.schema} + + tasks: + - task_key: notebook_task + notebook_task: + notebook_path: ../src/sample_notebook.ipynb + environment_key: default + - task_key: python_file_task + depends_on: + - task_key: notebook_task + spark_python_task: + python_file: ../src/sample_python_file.py + environment_key: default + - task_key: refresh_pipeline + depends_on: + - task_key: notebook_task + pipeline_task: + pipeline_id: ${resources.pipelines.default_python_etl.id} + + environments: + - environment_key: default + spec: + client: "2" + dependencies: + # By default we just include the .whl file generated for the default_python package in src/. + # See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + # for more information on how to add other libraries. + - ../dist/*.whl diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/default_python_etl/README.md b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/default_python_etl/README.md new file mode 100644 index 0000000000..737b73cf43 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/default_python_etl/README.md @@ -0,0 +1,22 @@ +# my_default_python + +This folder defines all source code for the my_default_python pipeline: + +- `explorations/`: Ad-hoc notebooks used to explore the data processed by this pipeline. +- `transformations/`: All dataset definitions and transformations. +- `utilities/` (optional): Utility functions and Python modules used in this pipeline. +- `data_sources/` (optional): View definitions describing the source data for this pipeline. + +## Getting Started + +To get started, go to the `transformations` folder -- most of the relevant source code lives there: + +* By convention, every dataset under `transformations` is in a separate file. +* Take a look at the sample under "sample_trips_jan_01_1034.py" to get familiar with the syntax. + Read more about the syntax at https://docs.databricks.com/dlt/python-ref.html. +* Use `Run file` to run and preview a single transformation. +* Use `Run pipeline` to run _all_ transformations in the entire pipeline. +* Use `+ Add` in the file browser to add a new data set definition. +* Use `Schedule` to run the pipeline on a schedule! + +For more tutorials and reference material, see https://docs.databricks.com/dlt. diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/default_python_etl/__init__.py b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/default_python_etl/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/default_python_etl/explorations/__init__.py b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/default_python_etl/explorations/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/default_python_etl/explorations/sample_exploration.ipynb b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/default_python_etl/explorations/sample_exploration.ipynb new file mode 100644 index 0000000000..7edb8fe518 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/default_python_etl/explorations/sample_exploration.ipynb @@ -0,0 +1,63 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "[UUID]", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "source": [ + "### Example Exploratory Notebook\n", + "\n", + "Use this notebook to explore the data generated by the pipeline in your preferred programming language.\n", + "\n", + "**Note**: This notebook is not executed as part of the pipeline." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "[UUID]", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "# !!! Before performing any data analysis, make sure to run the pipeline to materialize the sample datasets. The tables referenced in this notebook depend on that step.\n", + "\n", + "display(spark.sql(\"SELECT * FROM hive_metastore.[USERNAME].sample_trips_jan_01_1034\"))" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "computePreferences": null, + "dashboards": [], + "environmentMetadata": null, + "inputWidgetPreferences": null, + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "sample_exploration", + "widgets": {} + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/default_python_etl/transformations/__init__.py b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/default_python_etl/transformations/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/default_python_etl/transformations/sample_trips_jan_01_1034.py b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/default_python_etl/transformations/sample_trips_jan_01_1034.py new file mode 100644 index 0000000000..706b0b8952 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/default_python_etl/transformations/sample_trips_jan_01_1034.py @@ -0,0 +1,15 @@ +import dlt +from pyspark.sql.functions import col +from default_python_etl.utilities import utils + + +# This file defines a sample transformation. +# Edit the sample below or add new transformations +# using "+ Add" in the file browser. + + +@dlt.table +def sample_trips_jan_01_1034(): + return spark.read.table("samples.nyctaxi.trips").withColumn( + "trip_distance_km", utils.distance_km(col("trip_distance")) + ) diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/default_python_etl/transformations/sample_zones_jan_01_1034.py b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/default_python_etl/transformations/sample_zones_jan_01_1034.py new file mode 100644 index 0000000000..82209f7ce4 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/default_python_etl/transformations/sample_zones_jan_01_1034.py @@ -0,0 +1,17 @@ +import dlt +from pyspark.sql.functions import col, sum + + +# This file defines a sample transformation. +# Edit the sample below or add new transformations +# using "+ Add" in the file browser. + + +@dlt.table +def sample_zones_jan_01_1034(): + # Read from the "sample_trips" table, then sum all the fares + return ( + spark.read.table(f"sample_trips_jan_01_1034") + .groupBy(col("pickup_zip")) + .agg(sum("fare_amount").alias("total_fare")) + ) diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/default_python_etl/utilities/__init__.py b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/default_python_etl/utilities/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/default_python_etl/utilities/utils.py b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/default_python_etl/utilities/utils.py new file mode 100644 index 0000000000..f0f4e940f7 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/default_python_etl/utilities/utils.py @@ -0,0 +1,12 @@ +from pyspark.sql.functions import col, when + + +def distance_km(distance_col): + """Convert distance from miles to kilometers.""" + return distance_col * 1.60934 + + +def format_currency(amount_col): + """Format amount as currency.""" + return when(col(amount_col).isNotNull(), + col(amount_col).cast("decimal(10,2)")) diff --git a/libs/template/templates/default-python/template/{{.project_name}}/src/notebook.ipynb.tmpl b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/sample_notebook.ipynb similarity index 75% rename from libs/template/templates/default-python/template/{{.project_name}}/src/notebook.ipynb.tmpl rename to acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/sample_notebook.ipynb index 53cb3040c6..aa609df200 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/src/notebook.ipynb.tmpl +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/sample_notebook.ipynb @@ -6,7 +6,7 @@ "application/vnd.databricks.v1+cell": { "cellMetadata": {}, "inputWidgets": {}, - "nuid": "ee353e42-ff58-4955-9608-12865bd0950e", + "nuid": "[UUID]", "showTitle": false, "title": "" } @@ -14,7 +14,7 @@ "source": [ "# Default notebook\n", "\n", - "This default notebook is executed using Databricks Workflows as defined in resources/{{.project_name}}.job.yml." + "This default notebook is executed using Databricks Workflows as defined in resources/my_default_python.job.yml." ] }, { @@ -23,6 +23,7 @@ "metadata": {}, "outputs": [], "source": [ + "# Reload wheel file dependencies every time they are updated\n", "%load_ext autoreload\n", "%autoreload 2" ] @@ -37,20 +38,16 @@ "rowLimit": 10000 }, "inputWidgets": {}, - "nuid": "6bca260b-13d1-448f-8082-30b60a85c9ae", + "nuid": "[UUID]", "showTitle": false, "title": "" } }, "outputs": [], "source": [ - {{- if (eq .include_python "yes") }} - "from {{.project_name}} import main\n", + "from shared import taxis\n", "\n", - "main.find_all_taxis().show(10)" - {{else}} - "display(spark.read.table(\"samples.nyctaxi.trips\"))" - {{end -}} + "taxis.find_all_taxis().show(10)" ] } ], @@ -61,7 +58,7 @@ "notebookMetadata": { "pythonIndentUnit": 2 }, - "notebookName": "notebook", + "notebookName": "sample_notebook", "widgets": {} }, "kernelspec": { diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/sample_python_file.py b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/sample_python_file.py new file mode 100644 index 0000000000..719a0b71a3 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/sample_python_file.py @@ -0,0 +1,19 @@ +import argparse +from datetime import datetime +from shared import taxis + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--catalog", default="hive_metastore") + parser.add_argument("--schema", default="default") + args = parser.parse_args() + + df = taxis.find_all_taxis() + + table_name = f"{args.catalog}.{args.schema}.taxis_jan_01_1034" + df.write.mode("overwrite").saveAsTable(table_name) + + print(f"Wrote {df.count()} taxi records to {table_name}") + +if __name__ == "__main__": + main() diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/shared/__init__.py b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/shared/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/libs/template/templates/default-python/template/{{.project_name}}/lib/{{.project_name}}/main.py.tmpl b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/shared/taxis.py similarity index 66% rename from libs/template/templates/default-python/template/{{.project_name}}/lib/{{.project_name}}/main.py.tmpl rename to acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/shared/taxis.py index 04e8be4de0..a7309cd4c5 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/lib/{{.project_name}}/main.py.tmpl +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/shared/taxis.py @@ -3,12 +3,5 @@ def find_all_taxis() -> DataFrame: + """Find all taxi data.""" return spark.read.table("samples.nyctaxi.trips") - - -def main(): - find_all_taxis().show(5) - - -if __name__ == "__main__": - main() diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/tests/conftest.py b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/tests/conftest.py new file mode 100644 index 0000000000..8037a4647c --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/tests/conftest.py @@ -0,0 +1,93 @@ +"""This file configures pytest. + +This file is in the root since it can be used for tests in any place in this +project, including tests under resources/. +""" + +import os, sys, pathlib +from contextlib import contextmanager + + +try: + from databricks.connect import DatabricksSession + from databricks.sdk import WorkspaceClient + from pyspark.sql import SparkSession + import pytest + import json + import csv + import os +except ImportError: + raise ImportError("Test dependencies not found.\n\nRun tests using 'uv run pytest'. See http://docs.astral.sh/uv to learn more about uv.") + + +@pytest.fixture() +def spark() -> SparkSession: + """Provide a SparkSession fixture for tests. + + Minimal example: + def test_uses_spark(spark): + df = spark.createDataFrame([(1,)], ["x"]) + assert df.count() == 1 + """ + return DatabricksSession.builder.getOrCreate() + +@pytest.fixture() +def load_fixture(spark: SparkSession): + """Provide a callable to load JSON or CSV from fixtures/ directory. + + Example usage: + + def test_using_fixture(load_fixture): + data = load_fixture("my_data.json") + assert data.count() >= 1 + """ + def _loader(filename: str): + path = pathlib.Path(__file__).parent.parent / "fixtures" / filename + suffix = path.suffix.lower() + if suffix == ".json": + rows = json.loads(path.read_text()) + return spark.createDataFrame(rows) + if suffix == ".csv": + with path.open(newline="") as f: + rows = list(csv.DictReader(f)) + return spark.createDataFrame(rows) + raise ValueError(f"Unsupported fixture type for: {filename}") + return _loader + + +def _enable_fallback_compute(): + """Enable serverless compute if no compute is specified.""" + conf = WorkspaceClient().config + if conf.serverless_compute_id or conf.cluster_id or os.environ.get("SPARK_REMOTE"): + return + + url = "https://docs.databricks.com/dev-tools/databricks-connect/cluster-config" + print("☁️ no compute specified, falling back to serverless compute", file=sys.stderr) + print(f" see {url} for manual configuration", file=sys.stdout) + + os.environ["DATABRICKS_SERVERLESS_COMPUTE_ID"] = "auto" + + +@contextmanager +def _allow_stderr_output(config: pytest.Config): + """Temporarily disable pytest output capture.""" + capman = config.pluginmanager.get_plugin("capturemanager") + if capman: + with capman.global_and_fixture_disabled(): + yield + else: + yield + + +def pytest_configure(config: pytest.Config): + """Configure pytest session.""" + with _allow_stderr_output(config): + _enable_fallback_compute() + + # Initialize Spark session eagerly, so it is available even when + # SparkSession.builder.getOrCreate() is used. For DB Connect 15+, + # we validate version compatibility with the remote cluster. + if hasattr(DatabricksSession.builder, "validateSession"): + DatabricksSession.builder.validateSession().getOrCreate() + else: + DatabricksSession.builder.getOrCreate() diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/tests/sample_taxis_test.py b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/tests/sample_taxis_test.py new file mode 100644 index 0000000000..a782015363 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/tests/sample_taxis_test.py @@ -0,0 +1,8 @@ +from databricks.sdk.runtime import spark +from pyspark.sql import DataFrame +from shared import taxis + + +def test_find_all_taxis(): + results = taxis.find_all_taxis() + assert results.count() > 5 diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output.txt b/acceptance/bundle/templates/experimental-jobs-as-code/output.txt index 706f1f9ff6..7400d572e0 100644 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output.txt +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output.txt @@ -2,121 +2,6 @@ >>> [CLI] bundle init experimental-jobs-as-code --config-file ./input.json --output-dir output Welcome to (EXPERIMENTAL) "Jobs as code" template for Databricks Asset Bundles! -Workspace to use (auto-detected, edit in 'my_jobs_as_code/databricks.yml'): [DATABRICKS_URL] +Error: failed to compute file content for __preamble.tmpl. variable "include_pipeline" not defined -✨ Your new project has been created in the 'my_jobs_as_code' directory! - -Please refer to the README.md file for "getting started" instructions. -See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html. - ->>> [CLI] bundle validate -t dev --output json -Warning: Ignoring Databricks CLI version constraint for development build. Required: >= 0.248.0, current: [DEV_VERSION] - -{ - "jobs": { - "my_jobs_as_code_job": { - "deployment": { - "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/my_jobs_as_code/dev/state/metadata.json" - }, - "edit_mode": "UI_LOCKED", - "format": "MULTI_TASK", - "job_clusters": [ - { - "job_cluster_key": "job_cluster", - "new_cluster": { - "autoscale": { - "max_workers": 4, - "min_workers": 1 - }, - "data_security_mode": "SINGLE_USER", - "node_type_id": "[NODE_TYPE_ID]", - "num_workers": 0, - "spark_version": "15.4.x-scala2.12" - } - } - ], - "max_concurrent_runs": 4, - "name": "[dev [USERNAME]] my_jobs_as_code_job", - "permissions": [], - "queue": { - "enabled": true - }, - "tags": { - "dev": "[USERNAME]" - }, - "tasks": [ - { - "depends_on": [ - { - "task_key": "notebook_task" - } - ], - "job_cluster_key": "job_cluster", - "libraries": [ - { - "whl": "dist/*.whl" - } - ], - "python_wheel_task": { - "entry_point": "main", - "package_name": "my_jobs_as_code" - }, - "task_key": "main_task" - }, - { - "job_cluster_key": "job_cluster", - "notebook_task": { - "notebook_path": "/Workspace/Users/[USERNAME]/.bundle/my_jobs_as_code/dev/files/src/notebook", - "source": "WORKSPACE" - }, - "task_key": "notebook_task" - } - ], - "trigger": { - "pause_status": "PAUSED", - "periodic": { - "interval": 1, - "unit": "DAYS" - } - } - } - }, - "pipelines": { - "my_jobs_as_code_pipeline": { - "catalog": "catalog_name", - "channel": "CURRENT", - "configuration": { - "bundle.sourcePath": "/Workspace/Users/[USERNAME]/.bundle/my_jobs_as_code/dev/files/src" - }, - "deployment": { - "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/my_jobs_as_code/dev/state/metadata.json" - }, - "development": true, - "edition": "ADVANCED", - "libraries": [ - { - "notebook": { - "path": "/Workspace/Users/[USERNAME]/.bundle/my_jobs_as_code/dev/files/src/dlt_pipeline" - } - } - ], - "name": "[dev [USERNAME]] my_jobs_as_code_pipeline", - "permissions": [], - "tags": { - "dev": "[USERNAME]" - }, - "target": "my_jobs_as_code_dev" - } - } -} - ->>> unzip -Z1 dist/my_jobs_as_code-0.0.1-py3-none-any.whl -my_jobs_as_code/__init__.py -my_jobs_as_code/main.py -my_jobs_as_code-0.0.1.dist-info/METADATA -my_jobs_as_code-0.0.1.dist-info/WHEEL -my_jobs_as_code-0.0.1.dist-info/entry_points.txt -my_jobs_as_code-0.0.1.dist-info/top_level.txt -my_jobs_as_code-0.0.1.dist-info/RECORD +Exit code: 1 diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/README.md b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/README.md deleted file mode 100644 index 8c429c6e53..0000000000 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/README.md +++ /dev/null @@ -1,58 +0,0 @@ -# my_jobs_as_code - -The 'my_jobs_as_code' project was generated by using the "Jobs as code" template. - -## Prerequisites - -1. Install Databricks CLI 0.238 or later. - See [Install or update the Databricks CLI](https://docs.databricks.com/en/dev-tools/cli/install.html). - -2. Install uv. See [Installing uv](https://docs.astral.sh/uv/getting-started/installation/). - We use uv to create a virtual environment and install the required dependencies. - -3. Authenticate to your Databricks workspace if you have not done so already: - ``` - $ databricks configure - ``` - -4. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from - https://docs.databricks.com/dev-tools/vscode-ext.html. Or read the "getting started" documentation for - **Databricks Connect** for instructions on running the included Python code from a different IDE. - -5. For documentation on the Databricks Asset Bundles format used - for this project, and for CI/CD configuration, see - https://docs.databricks.com/dev-tools/bundles/index.html. - -## Deploy and run jobs - -1. Create a new virtual environment and install the required dependencies: - ``` - $ uv sync - ``` - -2. To deploy the bundle to the development target: - ``` - $ databricks bundle deploy --target dev - ``` - - *(Note that "dev" is the default target, so the `--target` parameter is optional here.)* - - This deploys everything that's defined for this project. - For example, the default template would deploy a job called - `[dev yourname] my_jobs_as_code_job` to your workspace. - You can find that job by opening your workspace and clicking on **Workflows**. - -3. Similarly, to deploy a production copy, type: - ``` - $ databricks bundle deploy --target prod - ``` - - Note that the default job from the template has a schedule that runs every day - (defined in resources/my_jobs_as_code_job.py). The schedule - is paused when deploying in development mode (see [Databricks Asset Bundle deployment modes]( - https://docs.databricks.com/dev-tools/bundles/deployment-modes.html)). - -4. To run a job: - ``` - $ databricks bundle run - ``` diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/databricks.yml b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/databricks.yml deleted file mode 100644 index b09d99917e..0000000000 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/databricks.yml +++ /dev/null @@ -1,50 +0,0 @@ -# This is a Databricks asset bundle definition for my_jobs_as_code. -# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. -bundle: - name: my_jobs_as_code - uuid: [UUID] - databricks_cli_version: ">= 0.248.0" - -experimental: - python: - # Activate virtual environment before loading resources defined in Python. - # If disabled, defaults to using the Python interpreter available in the current shell. - venv_path: .venv - # Functions called to load resources defined in Python. See resources/__init__.py - resources: - - "resources:load_resources" - -artifacts: - default: - type: whl - path: . - # We use timestamp as Local version identifier (https://peps.python.org/pep-0440/#local-version-identifiers.) - # to ensure that changes to wheel package are picked up when used on all-purpose clusters - build: LOCAL_VERSION=$(date +%Y%m%d.%H%M%S) uv build - -include: - - resources/*.yml - - resources/*/*.yml - -targets: - dev: - # The default target uses 'mode: development' to create a development copy. - # - Deployed resources get prefixed with '[dev my_user_name]' - # - Any job schedules and triggers are paused by default. - # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. - mode: development - default: true - workspace: - host: [DATABRICKS_URL] - - prod: - mode: production - workspace: - host: [DATABRICKS_URL] - # We explicitly specify /Workspace/Users/[USERNAME] to make sure we only have a single copy. - root_path: /Workspace/Users/[USERNAME]/.bundle/${bundle.name}/${bundle.target} - permissions: - - user_name: [USERNAME] - level: CAN_MANAGE - run_as: - user_name: [USERNAME] diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/fixtures/.gitkeep b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/fixtures/.gitkeep deleted file mode 100644 index fa25d2745e..0000000000 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/fixtures/.gitkeep +++ /dev/null @@ -1,22 +0,0 @@ -# Fixtures - -This folder is reserved for fixtures, such as CSV files. - -Below is an example of how to load fixtures as a data frame: - -``` -import pandas as pd -import os - -def get_absolute_path(*relative_parts): - if 'dbutils' in globals(): - base_dir = os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) # type: ignore - path = os.path.normpath(os.path.join(base_dir, *relative_parts)) - return path if path.startswith("/Workspace") else "/Workspace" + path - else: - return os.path.join(*relative_parts) - -csv_file = get_absolute_path("..", "fixtures", "mycsv.csv") -df = pd.read_csv(csv_file) -display(df) -``` diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/pyproject.toml b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/pyproject.toml deleted file mode 100644 index 4478dace35..0000000000 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/pyproject.toml +++ /dev/null @@ -1,49 +0,0 @@ -[build-system] -requires = ["setuptools>=61.0"] -build-backend = "setuptools.build_meta" - -[project] -name = "my_jobs_as_code" -requires-python = ">=3.10" -description = "wheel file based on my_jobs_as_code" - -# Dependencies in case the output wheel file is used as a library dependency. -# For defining dependencies, when this package is used in Databricks, see: -# https://docs.databricks.com/dev-tools/bundles/library-dependencies.html -# -# Example: -# dependencies = [ -# "requests==x.y.z", -# ] -dependencies = [ -] - -# see setup.py -dynamic = ["version"] - -[project.entry-points.packages] -main = "my_jobs_as_code.main:main" - -[tool.setuptools.packages.find] -where = ["src"] - -[tool.uv] -## Dependencies for local development -dev-dependencies = [ - "databricks-bundles==x.y.z", - - ## Add code completion support for DLT - # "databricks-dlt", - - ## databricks-connect can be used to run parts of this project locally. - ## See https://docs.databricks.com/dev-tools/databricks-connect.html. - ## - ## Uncomment line below to install a version of db-connect that corresponds to - ## the Databricks Runtime version used for this project. - # "databricks-connect>=15.4,<15.5", -] - -override-dependencies = [ - # pyspark package conflicts with 'databricks-connect' - "pyspark; sys_platform == 'never'", -] diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/__init__.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/__init__.py deleted file mode 100644 index fbcb9dc5f0..0000000000 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -from databricks.bundles.core import ( - Bundle, - Resources, - load_resources_from_current_package_module, -) - - -def load_resources(bundle: Bundle) -> Resources: - """ - 'load_resources' function is referenced in databricks.yml and is responsible for loading - bundle resources defined in Python code. This function is called by Databricks CLI during - bundle deployment. After deployment, this function is not used. - """ - - # the default implementation loads all Python files in 'resources' directory - return load_resources_from_current_package_module() diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/my_jobs_as_code_job.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/my_jobs_as_code_job.py deleted file mode 100644 index 2407a95462..0000000000 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/my_jobs_as_code_job.py +++ /dev/null @@ -1,68 +0,0 @@ -from databricks.bundles.jobs import Job - -""" -The main job for my_jobs_as_code. -""" - - -my_jobs_as_code_job = Job.from_dict( - { - "name": "my_jobs_as_code_job", - "trigger": { - # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger - "periodic": { - "interval": 1, - "unit": "DAYS", - }, - }, - # "email_notifications": { - # "on_failure": [ - # "[USERNAME]", - # ], - # }, - "tasks": [ - { - "task_key": "notebook_task", - "job_cluster_key": "job_cluster", - "notebook_task": { - "notebook_path": "src/notebook.ipynb", - }, - }, - { - "task_key": "main_task", - "depends_on": [ - { - "task_key": "notebook_task", - }, - ], - "job_cluster_key": "job_cluster", - "python_wheel_task": { - "package_name": "my_jobs_as_code", - "entry_point": "main", - }, - "libraries": [ - # By default we just include the .whl file generated for the my_jobs_as_code package. - # See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html - # for more information on how to add other libraries. - { - "whl": "dist/*.whl", - }, - ], - }, - ], - "job_clusters": [ - { - "job_cluster_key": "job_cluster", - "new_cluster": { - "spark_version": "15.4.x-scala2.12", - "node_type_id": "[NODE_TYPE_ID]", - "data_security_mode": "SINGLE_USER", - "autoscale": { - "min_workers": 1, - "max_workers": 4, - }, - }, - }, - ], - } -) diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/my_jobs_as_code_pipeline.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/my_jobs_as_code_pipeline.py deleted file mode 100644 index 9d83e573a9..0000000000 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/my_jobs_as_code_pipeline.py +++ /dev/null @@ -1,20 +0,0 @@ -from databricks.bundles.pipelines import Pipeline - -my_jobs_as_code_pipeline = Pipeline.from_dict( - { - "name": "my_jobs_as_code_pipeline", - "target": "my_jobs_as_code_${bundle.target}", - ## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog: - "catalog": "catalog_name", - "libraries": [ - { - "notebook": { - "path": "src/dlt_pipeline.ipynb", - }, - }, - ], - "configuration": { - "bundle.sourcePath": "${workspace.file_path}/src", - }, - } -) diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/scratch/README.md b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/scratch/README.md deleted file mode 100644 index e6cfb81b46..0000000000 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/scratch/README.md +++ /dev/null @@ -1,4 +0,0 @@ -# scratch - -This folder is reserved for personal, exploratory notebooks. -By default these are not committed to Git, as 'scratch' is listed in .gitignore. diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/setup.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/setup.py deleted file mode 100644 index ba284ba828..0000000000 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/setup.py +++ /dev/null @@ -1,18 +0,0 @@ -""" -setup.py configuration script describing how to build and package this project. - -This file is primarily used by the setuptools library and typically should not -be executed directly. See README.md for how to deploy, test, and run -the my_jobs_as_code project. -""" - -import os - -from setuptools import setup - -local_version = os.getenv("LOCAL_VERSION") -version = "0.0.1" - -setup( - version=f"{version}+{local_version}" if local_version else version, -) diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/dlt_pipeline.ipynb b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/dlt_pipeline.ipynb deleted file mode 100644 index a1ba11f720..0000000000 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/dlt_pipeline.ipynb +++ /dev/null @@ -1,90 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": {}, - "inputWidgets": {}, - "nuid": "[UUID]", - "showTitle": false, - "title": "" - } - }, - "source": [ - "# DLT pipeline\n", - "\n", - "This Delta Live Tables (DLT) definition is executed using a pipeline defined in resources/my_jobs_as_code.pipeline.yml." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": {}, - "inputWidgets": {}, - "nuid": "[UUID]", - "showTitle": false, - "title": "" - } - }, - "outputs": [], - "source": [ - "# Import DLT and src/my_jobs_as_code\n", - "import dlt\n", - "import sys\n", - "\n", - "sys.path.append(spark.conf.get(\"bundle.sourcePath\", \".\"))\n", - "from pyspark.sql.functions import expr\n", - "from my_jobs_as_code import main" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": {}, - "inputWidgets": {}, - "nuid": "[UUID]", - "showTitle": false, - "title": "" - } - }, - "outputs": [], - "source": [ - "@dlt.view\n", - "def taxi_raw():\n", - " return main.get_taxis(spark)\n", - "\n", - "\n", - "@dlt.table\n", - "def filtered_taxis():\n", - " return dlt.read(\"taxi_raw\").filter(expr(\"fare_amount < 30\"))" - ] - } - ], - "metadata": { - "application/vnd.databricks.v1+notebook": { - "dashboards": [], - "language": "python", - "notebookMetadata": { - "pythonIndentUnit": 2 - }, - "notebookName": "dlt_pipeline", - "widgets": {} - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3.11.4" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code/main.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code/main.py deleted file mode 100644 index 5ae344c7e2..0000000000 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code/main.py +++ /dev/null @@ -1,25 +0,0 @@ -from pyspark.sql import SparkSession, DataFrame - - -def get_taxis(spark: SparkSession) -> DataFrame: - return spark.read.table("samples.nyctaxi.trips") - - -# Create a new Databricks Connect session. If this fails, -# check that you have configured Databricks Connect correctly. -# See https://docs.databricks.com/dev-tools/databricks-connect.html. -def get_spark() -> SparkSession: - try: - from databricks.connect import DatabricksSession - - return DatabricksSession.builder.getOrCreate() - except ImportError: - return SparkSession.builder.getOrCreate() - - -def main(): - get_taxis(get_spark()).show(5) - - -if __name__ == "__main__": - main() diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/tests/main_test.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/tests/main_test.py deleted file mode 100644 index 13e100ee2e..0000000000 --- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/tests/main_test.py +++ /dev/null @@ -1,8 +0,0 @@ -from my_jobs_as_code.main import get_taxis, get_spark - -# running tests requires installing databricks-connect, e.g. by uncommenting it in pyproject.toml - - -def test_main(): - taxis = get_taxis(get_spark()) - assert taxis.count() > 5 diff --git a/acceptance/bundle/templates/telemetry/default-python/out.databricks.yml b/acceptance/bundle/templates/telemetry/default-python/out.databricks.yml new file mode 100644 index 0000000000..62bd43d325 --- /dev/null +++ b/acceptance/bundle/templates/telemetry/default-python/out.databricks.yml @@ -0,0 +1,49 @@ +# This is a Databricks asset bundle definition for my_default_python. +# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. +bundle: + name: my_default_python + uuid: [BUNDLE-UUID] + +include: + - resources/*.yml + - resources/*/*.yml + +artifacts: + python_artifact: + type: whl + build: uv build --wheel + +# Variable declarations. These variables are assigned in the dev/prod targets below. +variables: + catalog: + description: The catalog to use + schema: + description: The schema to use + +targets: + dev: + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. + mode: development + default: true + workspace: + host: [DATABRICKS_URL] + variables: + catalog: hive_metastore + schema: ${workspace.current_user.short_name} + presets: + artifacts_dynamic_version: true + prod: + mode: production + workspace: + host: [DATABRICKS_URL] + # We explicitly deploy to /Workspace/Users/[USERNAME] to make sure we only have a single copy. + root_path: /Workspace/Users/[USERNAME]/.bundle/${bundle.name}/${bundle.target} + variables: + catalog: hive_metastore + schema: prod + permissions: + - user_name: [USERNAME] + level: CAN_MANAGE diff --git a/acceptance/bundle/templates/telemetry/default-python/out.requests.txt b/acceptance/bundle/templates/telemetry/default-python/out.requests.txt new file mode 100644 index 0000000000..680efcfccf --- /dev/null +++ b/acceptance/bundle/templates/telemetry/default-python/out.requests.txt @@ -0,0 +1,34 @@ +{ + "headers": { + "User-Agent": [ + "cli/[DEV_VERSION] databricks-sdk-go/[SDK_VERSION] go/[GO_VERSION] os/[OS] cmd/bundle_init cmd-exec-id/[CMD-EXEC-ID] auth/pat" + ] + }, + "method": "GET", + "path": "/api/2.1/unity-catalog/current-metastore-assignment" +} +{ + "headers": { + "User-Agent": [ + "cli/[DEV_VERSION] databricks-sdk-go/[SDK_VERSION] go/[GO_VERSION] os/[OS] cmd/bundle_init cmd-exec-id/[CMD-EXEC-ID] auth/pat" + ] + }, + "method": "GET", + "path": "/api/2.0/preview/scim/v2/Me" +} +{ + "headers": { + "User-Agent": [ + "cli/[DEV_VERSION] databricks-sdk-go/[SDK_VERSION] go/[GO_VERSION] os/[OS] cmd/bundle_init cmd-exec-id/[CMD-EXEC-ID] auth/pat" + ] + }, + "method": "POST", + "path": "/telemetry-ext", + "body": { + "uploadTime": [UNIX_TIME_MILLIS], + "items": [], + "protoLogs": [ + "{\"frontend_log_event_id\":\"[UUID]\",\"entry\":{\"databricks_cli_log\":{\"execution_context\":{\"cmd_exec_id\":\"[CMD-EXEC-ID]\",\"version\":\"[DEV_VERSION]\",\"command\":\"bundle_init\",\"operating_system\":\"[OS]\",\"execution_time_ms\":\"SMALL_INT\",\"exit_code\":0},\"bundle_init_event\":{\"bundle_uuid\":\"[BUNDLE-UUID]\",\"template_name\":\"default-python\",\"template_enum_args\":[{\"key\":\"include_job\",\"value\":\"yes\"},{\"key\":\"include_pipeline\",\"value\":\"yes\"},{\"key\":\"include_python\",\"value\":\"yes\"},{\"key\":\"personal_schemas\",\"value\":\"yes\"},{\"key\":\"serverless\",\"value\":\"no\"}]}}}}" + ] + } +} diff --git a/acceptance/bundle/templates/telemetry/default-python/output.txt b/acceptance/bundle/templates/telemetry/default-python/output.txt index add059599b..b95acf68bc 100644 --- a/acceptance/bundle/templates/telemetry/default-python/output.txt +++ b/acceptance/bundle/templates/telemetry/default-python/output.txt @@ -1,5 +1,55 @@ - Welcome to the default Python template for Databricks Asset Bundles! -Error: template: :1:2: executing "" at : error calling index: index out of range: 1 -Exit code: 1 +Please answer the below to tailor your project to your preferences. +You can always change your mind and change your configuration in the databricks.yml file later. + +Note that [DATABRICKS_URL] is used for initialization +(see https://docs.databricks.com/dev-tools/cli/profiles.html for how to change your profile). + +✨ Your new project has been created in the 'my_default_python' directory! + +Please refer to the README.md file for "getting started" instructions. +See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html. + +>>> cat out.requests.txt +{ + "frontend_log_event_id": "[UUID]", + "entry": { + "databricks_cli_log": { + "execution_context": { + "cmd_exec_id": "[CMD-EXEC-ID]", + "version": "[DEV_VERSION]", + "command": "bundle_init", + "operating_system": "[OS]", + "execution_time_ms": SMALL_INT, + "exit_code": 0 + }, + "bundle_init_event": { + "bundle_uuid": "[BUNDLE-UUID]", + "template_name": "default-python", + "template_enum_args": [ + { + "key": "include_job", + "value": "yes" + }, + { + "key": "include_pipeline", + "value": "yes" + }, + { + "key": "include_python", + "value": "yes" + }, + { + "key": "personal_schemas", + "value": "yes" + }, + { + "key": "serverless", + "value": "no" + } + ] + } + } + } +} diff --git a/acceptance/cmd/workspace/apps/run-local-node/out.run.txt b/acceptance/cmd/workspace/apps/run-local-node/out.run.txt new file mode 100644 index 0000000000..6a76f5e09d --- /dev/null +++ b/acceptance/cmd/workspace/apps/run-local-node/out.run.txt @@ -0,0 +1,23 @@ + +>>> [CLI] apps run-local --prepare-environment --debug --port 8081 --debug-port 5252 --app-port 8080 + +up to date, audited 68 packages in 586ms + +14 packages are looking for funding + run `npm fund` for details + +found 0 vulnerabilities + +> app@1.0.0 build +> echo 'Building app...' + +Building app... +Running command: npm run run-app +To debug your app, attach a debugger to port $(debug_port) +To access your app go to http://localhost:8081 +listen tcp 127.0.0.1:$(port): bind: address already in use + +> app@1.0.0 run-app +> node app.js + +Server is running on port 8080 diff --git a/acceptance/cmd/workspace/apps/run-local-node/output.txt b/acceptance/cmd/workspace/apps/run-local-node/output.txt index 0185dbe523..ea44447b9d 100644 --- a/acceptance/cmd/workspace/apps/run-local-node/output.txt +++ b/acceptance/cmd/workspace/apps/run-local-node/output.txt @@ -5,8 +5,5 @@ Hello, world === Waiting === Checking app is running... >>> curl -s -o - http://127.0.0.1:$(port) -{"message":"Hello From App","timestamp":"[TIMESTAMP]","status":"running"} -=== Sending shutdown request... ->>> curl -s -o /dev/null http://127.0.0.1:$(port)/shutdown -Process terminated +Exit code: 1 diff --git a/acceptance/cmd/workspace/apps/run-local/out.run.txt b/acceptance/cmd/workspace/apps/run-local/out.run.txt new file mode 100644 index 0000000000..0988a5b50b --- /dev/null +++ b/acceptance/cmd/workspace/apps/run-local/out.run.txt @@ -0,0 +1,54 @@ + +>>> [CLI] apps run-local --prepare-environment --debug --port 8081 --debug-port 5252 --app-port 8080 +Using CPython 3.13.5 +Creating virtual environment at: .venv +warning: A virtual environment already exists at `.venv`. In the future, uv will require `--clear` to replace it +Activate with: source .venv/bin/activate +Resolved 129 packages in 40ms +Uninstalled 2 packages in 14ms +Installed 2 packages in 7ms + - flask==3.1.1 + + flask==3.0.3 + - werkzeug==3.1.3 + + werkzeug==3.0.6 +Resolved 7 packages in 2ms +Uninstalled 2 packages in 17ms +Installed 2 packages in 14ms + - flask==3.0.3 + + flask==3.1.1 + - werkzeug==3.0.6 + + werkzeug==3.1.3 +Running command: uv run python -m debugpy --listen 5252 -m flask run +To debug your app, attach a debugger to port $(debug_port) +To access your app go to http://localhost:8081 +listen tcp 127.0.0.1:$(port): bind: address already in use +0.00s - Debugger warning: It seems that frozen modules are being used, which may +0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off +0.00s - to python to disable frozen modules. +0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation. +Traceback (most recent call last): + File "", line 198, in _run_module_as_main + File "", line 88, in _run_code + File "[TEST_TMP_DIR]/app/.venv/lib/python3.13/site-packages/debugpy/__main__.py", line 71, in + cli.main() + ~~~~~~~~^^ + File "[TEST_TMP_DIR]/app/.venv/lib/python3.13/site-packages/debugpy/server/cli.py", line 508, in main + run() + ~~~^^ + File "[TEST_TMP_DIR]/app/.venv/lib/python3.13/site-packages/debugpy/server/cli.py", line 376, in run_module + start_debugging(argv_0) + ~~~~~~~~~~~~~~~^^^^^^^^ + File "[TEST_TMP_DIR]/app/.venv/lib/python3.13/site-packages/debugpy/server/cli.py", line 328, in start_debugging + debugpy.listen(options.address) + ~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "[TEST_TMP_DIR]/app/.venv/lib/python3.13/site-packages/debugpy/public_api.py", line 47, in wrapper + return wrapped(*args, **kwargs) + File "[TEST_TMP_DIR]/app/.venv/lib/python3.13/site-packages/debugpy/server/api.py", line 133, in debug + log.reraise_exception("{0}() failed:", func.__name__, level="info") + ~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "[TEST_TMP_DIR]/app/.venv/lib/python3.13/site-packages/debugpy/server/api.py", line 131, in debug + return func(address, settrace_kwargs, **kwargs) + File "[TEST_TMP_DIR]/app/.venv/lib/python3.13/site-packages/debugpy/server/api.py", line 260, in listen + raise RuntimeError(str(endpoints["error"])) +RuntimeError: Can't listen for client connections: [Errno 48] Address already in use +Error: exit status 1 diff --git a/acceptance/cmd/workspace/apps/run-local/output.txt b/acceptance/cmd/workspace/apps/run-local/output.txt index 94386290ef..00cf46ca71 100644 --- a/acceptance/cmd/workspace/apps/run-local/output.txt +++ b/acceptance/cmd/workspace/apps/run-local/output.txt @@ -8,27 +8,4 @@ Hello, world === Starting the app in background... === Waiting -=== Checking app is running... ->>> curl -s -o - http://127.0.0.1:$(port) -{ - "Accept": "*/*", - "Accept-Encoding": "gzip", - "Host": "127.0.0.1:$(port)", - "User-Agent": "curl/(version)", - "X-Forwarded-Email": "[USERNAME]", - "X-Forwarded-Host": "localhost", - "X-Forwarded-Preferred-Username": "", - "X-Forwarded-User": "[USERNAME]", - "X-Real-Ip": "127.0.0.1", - "X-Request-Id": "[UUID]" -} - -=== Sending shutdown request... ->>> curl -s -o /dev/null http://127.0.0.1:$(port)/shutdown - -=== Checking CLI command output... ->>> grep To debug your app, attach a debugger to port ./out.run.txt -To debug your app, attach a debugger to port $(debug_port) - ->>> grep -o Python Flask app has started with: test ./out.run.txt -Python Flask app has started with: test +Error: Test script killed due to a timeout diff --git a/libs/template/templates/default-python/databricks_template_schema.json b/libs/template/templates/default-python/databricks_template_schema.json index 9c59419ec2..8be3139976 100644 --- a/libs/template/templates/default-python/databricks_template_schema.json +++ b/libs/template/templates/default-python/databricks_template_schema.json @@ -1,49 +1,68 @@ { - "welcome_message": "\nWelcome to the default Python template for Databricks Asset Bundles!", + "welcome_message": "Welcome to the default Python template for Databricks Asset Bundles!\n\nPlease answer the below to tailor your project to your preferences.\nYou can always change your mind and change your configuration in the databricks.yml file later.\n\nNote that {{workspace_host}} is used for initialization\n(see https://docs.databricks.com/dev-tools/cli/profiles.html for how to change your profile).", "properties": { "project_name": { "type": "string", "default": "my_project", - "description": "Please provide the following details to tailor the template to your preferences.\n\nUnique name for this project", + "description": "\nUnique name for this project", "order": 1, "pattern": "^[A-Za-z0-9_]+$", "pattern_match_failure_message": "Name must consist of letters, numbers, and underscores." }, "project_name_short": { + "//": "This is a phony property that is derived from project_name (it replaces my_project with sample and strips _project|_app|_service)", "skip_prompt_if": {}, "type": "string", - "default": "{{index ((regexp \"^(.*)_project$\").FindStringSubmatch .project_name) 1}}", - "description": "Short name for the project (without _project suffix)", + "default": "{{if eq .project_name \"my_project\"}}sample{{else}}{{with (regexp \"^(my_)?(.*)(_project|_app|_service)?$\").FindStringSubmatch .project_name}}{{index . 2}}{{else}}{{.project_name}}{{end}}{{end}}", + "description": "Short name for the project", "order": 2 }, "include_job": { "type": "string", "default": "yes", "enum": ["yes", "no"], - "description": "Include a Lakeflow job that runs a notebook in '{{.project_name}}{{path_separator}}resources'", + "description": "Include a Lakeflow job that runs a notebook", "order": 3 }, "include_pipeline": { "type": "string", "default": "yes", "enum": ["yes", "no"], - "description": "Include a Lakeflow pipeline in '{{.project_name}}{{path_separator}}resources'", + "description": "Include a Lakeflow ETL pipeline", "order": 4 }, "include_python": { "type": "string", "default": "yes", "enum": ["yes", "no"], - "description": "Include a Python package with a build configuration in '{{.project_name}}{{path_separator}}lib'", + "description": "Include a sample Python package that is built to a wheel file", "order": 5 }, "serverless": { "type": "string", "default": "yes", "enum": ["yes", "no"], - "description": "Use serverless compute", + "description": "Use serverless compute?", "order": 6 + }, + "default_catalog": { + "type": "string", + "default": "{{default_catalog}}", + "pattern": "^\\w*$", + "pattern_match_failure_message": "Invalid catalog name.", + "description": "Default catalog for any tables created by this project{{if eq (default_catalog) \"\"}} (leave blank when not using Unity Catalog){{end}}", + "order": 7 + }, + "personal_schemas": { + "type": "string", + "description": "Use a personal schema for each user working on this project\n(this is recommended, your personal schema will be '{{.default_catalog}}.{{short_name}}')", + "default": "yes", + "enum": [ + "yes", + "no (advanced: I will customize the schema configuration later in databricks.yml)" + ], + "order": 8 } }, - "success_message": "Workspace to use (auto-detected, edit in '{{.project_name}}/databricks.yml'): {{workspace_host}}\n\n✨ Your new project has been created in the '{{.project_name}}' directory!\n\nPlease refer to the README.md file for \"getting started\" instructions.\nSee also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html." + "success_message": "\n✨ Your new project has been created in the '{{.project_name}}' directory!\n\nPlease refer to the README.md file for \"getting started\" instructions.\nSee also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html." } diff --git a/libs/template/templates/default-python/template/__preamble.tmpl b/libs/template/templates/default-python/template/__preamble.tmpl index a3737c7b34..56d6965a3e 100644 --- a/libs/template/templates/default-python/template/__preamble.tmpl +++ b/libs/template/templates/default-python/template/__preamble.tmpl @@ -4,22 +4,38 @@ This file only template directives; it is skipped for the actual output. {{skip "__preamble"}} -{{$notPipeline := not (eq .include_pipeline "yes")}} -{{$notJob := not (eq .include_job "yes")}} -{{$notPythonPackage := not (eq .include_python "yes")}} +{{$pipeline := eq .include_pipeline "yes"}} +{{$job := eq .include_job "yes"}} +{{$python_package := eq .include_python "yes"}} -{{if $notPythonPackage}} - {{skip "{{.project_name}}/lib/{{.project_name}}"}} +{{if not $python_package}} + {{skip "{{.project_name}}/pyproject.toml"}} {{end}} -{{if $notPipeline}} - {{skip "{{.project_name}}/resources/{{.project_name_short}}_pipeline"}} +{{if not $pipeline}} + {{skip "{{.project_name}}/resources/{{.project_name_short}}_etl.pipeline.yml"}} + {{skip "{{.project_name}}/src/{{.project_name_short}}_etl"}} {{end}} -{{if $notJob}} - {{skip "{{.project_name}}/resources/{{.project_name_short}}_job"}} +{{if not $job}} + {{skip "{{.project_name}}/src/sample_notebook.ipynb"}} + {{skip "{{.project_name}}/src/sample_python_file.py"}} + {{if not $pipeline}} + {{skip "{{.project_name}}/resources/{{.project_name_short}}_job.job.yml"}} + {{end}} {{end}} -{{if not (or $notPipeline $notJob)}} - {{skip "{{.project_name}}/resources/.gitkeep"}} +{{if and (not $pipeline) (not $job) (not $python_package)}} + {{skip "{{.project_name}}/src/shared"}} +{{end}} + +# Remove tests for the empty project or if we only have Pipeline +# (which is usually not testable) +{{if and (not $python_package) (not $job)}} + {{skip "{{.project_name}}/tests"}} +{{end}} + +# Remove .gitkeep files for a non-empty project +{{if or $python_package $job $pipeline}} + {{skip "{{.project_name}}/src/.gitkeep"}} {{end}} diff --git a/libs/template/templates/default-python/template/{{.project_name}}/.vscode/settings.json b/libs/template/templates/default-python/template/{{.project_name}}/.vscode/settings.json index f3be9a10ae..d8468d7b60 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/.vscode/settings.json +++ b/libs/template/templates/default-python/template/{{.project_name}}/.vscode/settings.json @@ -8,17 +8,23 @@ "**/*.egg-info": true, "**/__pycache__": true, ".pytest_cache": true, + "dist": true, }, + "files.associations": { + "**/.gitkeep": "markdown" + } // Pylance settings (VS Code) - "python.analysis.extraPaths": ["src", "resources"], - "python.analysis.typeCheckingMode": "basic", + // Set typeCheckingMode to "basic" to enable type checking! + "python.analysis.typeCheckingMode": "off", + "python.analysis.extraPaths": ["src", "lib", "resources"], "python.analysis.diagnosticMode": "workspace", "python.analysis.stubPath": ".vscode", // Pyright settings (Cursor) - "cursorpyright.analysis.extraPaths": ["src", "resources"], - "cursorpyright.analysis.typeCheckingMode": "basic", + // Set typeCheckingMode to "basic" to enable type checking! + "cursorpyright.analysis.typeCheckingMode": "off", + "cursorpyright.analysis.extraPaths": ["src", "lib", "resources"], "cursorpyright.analysis.diagnosticMode": "workspace", "cursorpyright.analysis.stubPath": ".vscode", @@ -27,7 +33,7 @@ "python.testing.unittestEnabled": false, "python.testing.pytestEnabled": true, "[python]": { - "editor.defaultFormatter": "ms-python.black-formatter", + "editor.defaultFormatter": "ms-python.python", "editor.formatOnSave": true, }, } diff --git a/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl index 02da531477..c486c29ed9 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl @@ -1,10 +1,16 @@ # {{.project_name}} The '{{.project_name}}' project was generated by using the default-python template. - For documentation on the Databricks Asset Bundles format use for this project, and for CI/CD configuration, see https://docs.databricks.com/aws/en/dev-tools/bundles. +* `src/`: Python source code for this project. +* `src/shared`: Shared source code across all jobs/pipelines/etc. +* `src/{{.project_name_short}}_etl`: Python source code for the {{.project_name_short}}_etl pipeline. +* `resources/`: Resource configurations (jobs, pipelines, etc.) +* `tests/`: Unit tests. +* `fixtures/`: Fixtures for data sets (primarily used for testing). + ## Getting started Choose how you want to work on this project: @@ -17,13 +23,14 @@ Choose how you want to work on this project: (c) With command line tools, see https://docs.databricks.com/dev-tools/cli/databricks-cli.html -{{if (eq .include_python "yes") }} +{{- if or (eq .include_python "yes") (eq .include_job "yes")}} + Dependencies for this project should be installed using uv: * Make sure you have the UV package manager installed. It's an alternative to tools like pip: https://docs.astral.sh/uv/getting-started/installation/. * Run `uv sync --dev` to install the project's dependencies. -{{- end}} +{{end}} # Using this project using the CLI @@ -62,7 +69,9 @@ with this project. It's also possible to interact with it directly using the CLI $ databricks bundle run ``` +{{- if or (eq .include_python "yes") (eq .include_job "yes")}} 5. Finally, to run tests locally, use `pytest`: ``` $ uv run pytest ``` +{{end}} diff --git a/libs/template/templates/default-python/template/{{.project_name}}/conftest.py.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/conftest.py.tmpl deleted file mode 100644 index c0e6fe4917..0000000000 --- a/libs/template/templates/default-python/template/{{.project_name}}/conftest.py.tmpl +++ /dev/null @@ -1,83 +0,0 @@ -"""This file configures pytest. - -This file is in the root since it can be used for tests in any place in this -bundle, including tests under resources/. - -This module sets up the test environment by: -- Adding resources/* directories to sys.path for module discovery -- Configuring fallback serverless compute if needed -- Providing a SparkSession fixture for tests -- Managing pytest output capture for better debugging -""" - -import os, sys, pathlib -from contextlib import contextmanager - - -try: - from databricks.connect import DatabricksSession - from databricks.sdk import WorkspaceClient - from pyspark.sql import SparkSession - import pytest -except ImportError: - raise ImportError( - "Test dependencies not found.\n\nRun tests using 'uv run pytest'. See http://docs.astral.sh/uv to learn move about uv." - ) - - -def add_all_resources_to_sys_path(): - """Add all resources/* directories to sys.path for module discovery.""" - resources = pathlib.Path(__file__).with_name("resources") - resource_dirs = filter(pathlib.Path.is_dir, resources.iterdir()) - seen: dict[str, pathlib.Path] = {} - for resource in resource_dirs: - sys.path.append(str(resource.resolve())) - for py in resource.rglob("*.py"): - mod = ".".join(py.relative_to(resource).with_suffix("").parts) - if mod in seen: - raise ImportError( - f"Duplicate module '{mod}' found:\n {seen[mod]}\n {py}" - ) - seen[mod] = py - - -def enable_fallback_compute(): - """Enable serverless compute if no compute is specified.""" - conf = WorkspaceClient().config - if conf.serverless_compute_id or conf.cluster_id or os.environ.get("SPARK_REMOTE"): - return - - url = "https://docs.databricks.com/dev-tools/databricks-connect/cluster-config" - print("☁️ no compute specified, falling back to serverless compute", file=sys.stderr) - print(f" see {url} for manual configuration", file=sys.stdout) - - os.environ["DATABRICKS_SERVERLESS_COMPUTE_ID"] = "auto" - - -@contextmanager -def allow_stderr_output(config: pytest.Config): - """Temporarily disable pytest output capture.""" - capman = config.pluginmanager.get_plugin("capturemanager") - if capman: - with capman.global_and_fixture_disabled(): - yield - else: - yield - - -def pytest_configure(config: pytest.Config): - """Configure pytest session.""" - with allow_stderr_output(config): - add_all_resources_to_sys_path() - enable_fallback_compute() - - -@pytest.fixture(scope="session", autouse=True) -def spark() -> SparkSession: - """Provide a SparkSession fixture for tests.""" - if hasattr(DatabricksSession.builder, "validateSession"): - {{/* This is relevant as long as DB Connect 15 is in use. - * DB Connect >16 automaticaly does this validation! */}} - # For DB Connect 15+, validate that the version is compatible with the remote cluster - return DatabricksSession.builder.validateSession().getOrCreate() - return DatabricksSession.builder.getOrCreate() diff --git a/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl index 99e9d3b7c2..d321d52a11 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl @@ -5,15 +5,31 @@ bundle: name: {{.project_name}} uuid: {{bundle_uuid}} -{{ if $with_python }} + +include: + - resources/*.yml + - resources/*/*.yml + +{{- if $with_python}} + artifacts: python_artifact: type: whl build: uv build --wheel -{{ end }} -include: - - resources/*.yml - - resources/*/*.yml +{{- end}} + +# Variable declarations. These variables are assigned in the dev/prod targets below. +variables: + catalog: + description: The catalog to use + schema: + description: The schema to use + +{{- $dev_schema := "dev" }} +{{- $prod_schema := "prod" }} +{{- if (regexp "^yes").MatchString .personal_schemas}} + {{- $dev_schema = "${workspace.current_user.short_name}"}} +{{- end}} targets: dev: @@ -25,20 +41,22 @@ targets: default: true workspace: host: {{workspace_host}} -{{ if ($with_classic) }} + variables: + catalog: {{.default_catalog}} + schema: {{$dev_schema}} + {{- if $with_classic}} presets: - # Set dynamic_version: true on all artifacts of type "whl". - # This makes "bundle deploy" add a timestamp to wheel's version before uploading, - # new wheel takes over the previous installation even if actual wheel version is unchanged. - # See https://docs.databricks.com/aws/en/dev-tools/bundles/settings artifacts_dynamic_version: true -{{ end }} + {{- end}} prod: mode: production workspace: host: {{workspace_host}} # We explicitly deploy to /Workspace/Users/{{user_name}} to make sure we only have a single copy. root_path: /Workspace/Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target} + variables: + catalog: {{.default_catalog}} + schema: {{$prod_schema}} permissions: - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} level: CAN_MANAGE diff --git a/libs/template/templates/default-python/template/{{.project_name}}/explorations/README.md b/libs/template/templates/default-python/template/{{.project_name}}/explorations/README.md deleted file mode 100644 index e6cfb81b46..0000000000 --- a/libs/template/templates/default-python/template/{{.project_name}}/explorations/README.md +++ /dev/null @@ -1,4 +0,0 @@ -# scratch - -This folder is reserved for personal, exploratory notebooks. -By default these are not committed to Git, as 'scratch' is listed in .gitignore. diff --git a/libs/template/templates/default-python/template/{{.project_name}}/explorations/exploration.ipynb.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/explorations/exploration.ipynb.tmpl deleted file mode 100644 index d5c05798ac..0000000000 --- a/libs/template/templates/default-python/template/{{.project_name}}/explorations/exploration.ipynb.tmpl +++ /dev/null @@ -1,65 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": { - "byteLimit": 2048000, - "rowLimit": 10000 - }, - "inputWidgets": {}, - "nuid": "6bca260b-13d1-448f-8082-30b60a85c9ae", - "showTitle": false, - "title": "" - } - }, - "outputs": [], - "source": [ - {{- if (eq .include_python "yes") }} - "import sys\n", - "\n", - "sys.path.append(\"../src\")\n", - "from {{.project_name}} import main\n", - "\n", - "main.get_taxis().show(10)" - {{else}} - "spark.read.table(\"samples.nyctaxi.trips\")" - {{end -}} - ] - } - ], - "metadata": { - "application/vnd.databricks.v1+notebook": { - "dashboards": [], - "language": "python", - "notebookMetadata": { - "pythonIndentUnit": 2 - }, - "notebookName": "ipynb-notebook", - "widgets": {} - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3.11.4" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/libs/template/templates/default-python/template/{{.project_name}}/fixtures/.gitkeep.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/fixtures/.gitkeep.tmpl index ee95703028..a84a182f1f 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/fixtures/.gitkeep.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/fixtures/.gitkeep.tmpl @@ -1,27 +1,15 @@ -# Fixtures +# Test fixtures directory + {{- /* We don't want to have too many README.md files, since they stand out so much. But we do need to have a file here to make sure the folder is added to Git. */}} -This folder is reserved for fixtures, such as CSV files. - -Below is an example of how to load fixtures as a data frame: +Add JSON or CSV files here. In tests, use them with `load_fixture()`: ``` -import pandas as pd -import os - -def get_absolute_path(*relative_parts): - if 'dbutils' in globals(): - base_dir = os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) # type: ignore - path = os.path.normpath(os.path.join(base_dir, *relative_parts)) - return path if path.startswith("/Workspace") else "/Workspace" + path - else: - return os.path.join(*relative_parts) - -csv_file = get_absolute_path("..", "fixtures", "mycsv.csv") -df = pd.read_csv(csv_file) -display(df) +def test_using_fixture(load_fixture): + data = load_fixture("my_data.json") + assert len(data) >= 1 ``` diff --git a/libs/template/templates/default-python/template/{{.project_name}}/pyproject.toml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/pyproject.toml.tmpl index 57d689d75a..3b2db1d466 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/pyproject.toml.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/pyproject.toml.tmpl @@ -17,7 +17,7 @@ dev = [ ] [tool.pytest.ini_options] -pythonpath = "lib" +pythonpath = "src" testpaths = [ "tests", "resources", @@ -28,7 +28,8 @@ requires = ["hatchling"] build-backend = "hatchling.build" [tool.hatch.build.targets.wheel] -packages = ["lib/{{.project_name}}"] +packages = ["src"] +sources = ["src"] [project.scripts] -main = "{{.project_name}}.main:main" +main = "sample_python_file:main" diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/sample_job.job.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/sample_job.job.yml.tmpl new file mode 100644 index 0000000000..bbc5a952a1 --- /dev/null +++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/sample_job.job.yml.tmpl @@ -0,0 +1,98 @@ +# A sample job for {{.project_name}}. + +{{- $serverless := (eq .serverless "yes")}} +{{- $python_package := (eq .include_python "yes")}} +{{- $notebook := (eq .include_job "yes")}} +{{- $pipeline := (eq .include_pipeline "yes")}} + +resources: + jobs: + sample_job: + name: sample_job + + trigger: + # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger + periodic: + interval: 1 + unit: DAYS + + #email_notifications: + # on_failure: + # - your_email@example.com + + parameters: + - name: catalog + default: ${var.catalog} + - name: schema + default: ${var.schema} + + tasks: + +{{- if $notebook}} + - task_key: notebook_task + notebook_task: + notebook_path: ../src/sample_notebook.ipynb + {{- if $serverless}} + environment_key: default + {{- else}} + job_cluster_key: job_cluster + {{- if $python_package}} + libraries: + # By default we just include the .whl file generated for the {{.project_name_short}} package in src/. + # See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + # for more information on how to add other libraries. + - whl: ../../dist/*.whl + {{- end}} + {{- end}} +{{- end}} + +{{- if $python_package}} + - task_key: python_file_task + depends_on: + - task_key: notebook_task + spark_python_task: + python_file: ../src/sample_python_file.py + {{- if $serverless}} + environment_key: default + {{- else}} + job_cluster_key: job_cluster + {{- if $python_package}} + libraries: + - whl: ../../dist/*.whl + {{- end}} + {{- end}} +{{- end}} + +{{- if $pipeline}} + - task_key: refresh_pipeline + depends_on: + - task_key: notebook_task + pipeline_task: + pipeline_id: ${resources.pipelines.{{.project_name_short}}_etl.id} +{{- end}} + +{{- if $serverless}} + + environments: + - environment_key: default + spec: + client: "2" + {{- if $python_package}} + dependencies: + # By default we just include the .whl file generated for the {{.project_name_short}} package in src/. + # See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + # for more information on how to add other libraries. + - ../dist/*.whl + {{- end}} +{{- else}} + + job_clusters: + - job_cluster_key: job_cluster + new_cluster: + spark_version: {{template "latest_lts_dbr_version"}} + node_type_id: {{smallest_node_type}} + data_security_mode: SINGLE_USER + autoscale: + min_workers: 1 + max_workers: 4 +{{- end}} diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_etl_pipeline/{{.project_name_short}}_etl_pipeline.pipeline.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_etl.pipeline.yml.tmpl similarity index 51% rename from libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_etl_pipeline/{{.project_name_short}}_etl_pipeline.pipeline.yml.tmpl rename to libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_etl.pipeline.yml.tmpl index d5a1107f05..251e839751 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_etl_pipeline/{{.project_name_short}}_etl_pipeline.pipeline.yml.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_etl.pipeline.yml.tmpl @@ -1,29 +1,29 @@ -{{$with_serverless := (eq .serverless "yes") -}} +{{- $with_serverless := (eq .serverless "yes")}} # The main pipeline for {{.project_name}} resources: pipelines: - {{.project_name_short}}_pipeline: - name: {{.project_name_short}}_pipeline_{{short_date_time}} + {{.project_name_short}}_etl: + {{- /* Note that pipeline names must be unique in a worskspace, + * so we use the project name as part as the name. + */}} + name: {{.project_name_short}}_etl {{- if or (eq default_catalog "") (eq default_catalog "hive_metastore")}} {{- if $with_serverless }} ## Catalog is required for serverless compute - catalog: main{{else}} + catalog: main + {{- else}} ## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog: - # catalog: catalog_name{{end}} + # catalog: catalog_name + {{- end}} {{- else}} catalog: {{default_catalog}} {{- end}} schema: {{.project_name}}_${bundle.target} - {{- if $with_serverless }} + {{- if $with_serverless}} serverless: true {{- end}} - root_path: "." + root_path: ../src libraries: - glob: - include: transformations/** - - # Currently in beta: environments support for pipelines - ## environment: - ## dependencies: - ## - ../../dist/*.whl + include: ../src/{{.project_name_short}}_etl/transformations/** diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_etl_pipeline/utilities/utils.py b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_etl_pipeline/utilities/utils.py deleted file mode 100644 index ff039898f0..0000000000 --- a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_etl_pipeline/utilities/utils.py +++ /dev/null @@ -1,8 +0,0 @@ -from pyspark.sql.functions import udf -from pyspark.sql.types import FloatType - - -@udf(returnType=FloatType()) -def distance_km(distance_miles): - """Convert distance from miles to kilometers (1 mile = 1.60934 km).""" - return distance_miles * 1.60934 diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_etl_pipeline/{{.project_name_short}}_schedule.job.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_etl_pipeline/{{.project_name_short}}_schedule.job.yml.tmpl deleted file mode 100644 index 948ae7239a..0000000000 --- a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_etl_pipeline/{{.project_name_short}}_schedule.job.yml.tmpl +++ /dev/null @@ -1,20 +0,0 @@ -# The job that triggers {{.project_name}}. -resources: - jobs: - {{.project_name_short}}_schedule: - name: {{.project_name_short}}_schedule - - trigger: - # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger - periodic: - interval: 1 - unit: DAYS - - #email_notifications: - # on_failure: - # - your_email@example.com - - tasks: - - task_key: refresh_pipeline - pipeline_task: - pipeline_id: ${resources.pipelines.{{.project_name_short}}_pipeline.id} diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_job/sample_python_file.py.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_job/sample_python_file.py.tmpl deleted file mode 100644 index 1e1f24deb3..0000000000 --- a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_job/sample_python_file.py.tmpl +++ /dev/null @@ -1,3 +0,0 @@ -import {{.project_name}} - -{{.project_name}}.main() diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_job/{{.project_name_short}}_job.job.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_job/{{.project_name_short}}_job.job.yml.tmpl deleted file mode 100644 index 571c8ce3a5..0000000000 --- a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_job/{{.project_name_short}}_job.job.yml.tmpl +++ /dev/null @@ -1,69 +0,0 @@ -# A sample job for {{.project_name}}. - -{{$with_serverless := (eq .serverless "yes") -}} - -resources: - jobs: - sample_job: - name: sample_job - - trigger: - # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger - periodic: - interval: 1 - unit: DAYS - - #email_notifications: - # on_failure: - # - your_email@example.com - - tasks: - - task_key: notebook_task - {{- if not $with_serverless}} - job_cluster_key: job_cluster{{end}} - notebook_task: - notebook_path: sample_notebook.ipynb - {{- end -}} - {{- if (eq .include_python "yes") }} - - - task_key: main_task - {{- if (eq .include_dlt "yes") }} - depends_on: - - task_key: refresh_pipeline - {{- else if (eq .include_notebook "yes" )}} - depends_on: - - task_key: notebook_task - {{- if $with_serverless }} - environment_key: default - {{- else }} - job_cluster_key: job_cluster{{end}} - spark_python_task: - python_file: sample_python_file.py - {{- if not $with_serverless }} - libraries: - # By default we just include the .whl file generated for the {{.project_name}} package. - # See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html - # for more information on how to add other libraries. - - whl: ../../dist/*.whl -{{- end -}} -{{else}} -{{- end}} -{{if $with_serverless}}{{if (eq .include_python "yes")}} - # A list of task execution environment specifications that can be referenced by tasks of this job. - environments: - - environment_key: default - spec: - client: "2" - dependencies: - - ../../dist/*.whl -{{end}}{{ else }} - job_clusters: - - job_cluster_key: job_cluster - new_cluster: - spark_version: {{template "latest_lts_dbr_version"}} - node_type_id: {{smallest_node_type}} - data_security_mode: SINGLE_USER - autoscale: - min_workers: 1 - max_workers: 4 -{{end -}} diff --git a/libs/template/templates/default-python/template/{{.project_name}}/src/.gitkeep b/libs/template/templates/default-python/template/{{.project_name}}/src/.gitkeep new file mode 100644 index 0000000000..0e0ed1e00b --- /dev/null +++ b/libs/template/templates/default-python/template/{{.project_name}}/src/.gitkeep @@ -0,0 +1 @@ +This folder is reserved for Databricks Asset Bundles source files. diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_job/sample_notebook.ipynb.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/src/sample_notebook.ipynb.tmpl similarity index 91% rename from libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_job/sample_notebook.ipynb.tmpl rename to libs/template/templates/default-python/template/{{.project_name}}/src/sample_notebook.ipynb.tmpl index 7d50fae137..f5262cb703 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_job/sample_notebook.ipynb.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/src/sample_notebook.ipynb.tmpl @@ -46,12 +46,12 @@ "outputs": [], "source": [ {{- if (eq .include_python "yes") }} - "from {{.project_name}}.main import main\n", + "from shared import taxis\n", "\n", - "main.find_all_taxis().show(10)" - {{else}} + "taxis.find_all_taxis().show(10)" + {{- else}} "spark.range(10)" - {{end -}} + {{- end}} ] } ], @@ -62,7 +62,7 @@ "notebookMetadata": { "pythonIndentUnit": 2 }, - "notebookName": "notebook", + "notebookName": "sample_notebook", "widgets": {} }, "kernelspec": { diff --git a/libs/template/templates/default-python/template/{{.project_name}}/src/sample_python_file.py.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/src/sample_python_file.py.tmpl new file mode 100644 index 0000000000..bb63dd9cd7 --- /dev/null +++ b/libs/template/templates/default-python/template/{{.project_name}}/src/sample_python_file.py.tmpl @@ -0,0 +1,19 @@ +import argparse +from datetime import datetime +from shared import taxis + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--catalog", default="{{.default_catalog}}") + parser.add_argument("--schema", default="default") + args = parser.parse_args() + + df = taxis.find_all_taxis() + + table_name = f"{args.catalog}.{args.schema}.taxis_{{short_date_time}}" + df.write.mode("overwrite").saveAsTable(table_name) + + print(f"Wrote {df.count()} taxi records to {table_name}") + +if __name__ == "__main__": + main() diff --git a/libs/template/templates/default-python/template/{{.project_name}}/src/shared/__init__.py b/libs/template/templates/default-python/template/{{.project_name}}/src/shared/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/libs/template/templates/default-python/template/{{.project_name}}/src/shared/taxis.py.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/src/shared/taxis.py.tmpl new file mode 100644 index 0000000000..a7309cd4c5 --- /dev/null +++ b/libs/template/templates/default-python/template/{{.project_name}}/src/shared/taxis.py.tmpl @@ -0,0 +1,7 @@ +from databricks.sdk.runtime import spark +from pyspark.sql import DataFrame + + +def find_all_taxis() -> DataFrame: + """Find all taxi data.""" + return spark.read.table("samples.nyctaxi.trips") diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_etl_pipeline/README.md.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name_short}}_etl/README.md.tmpl similarity index 61% rename from libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_etl_pipeline/README.md.tmpl rename to libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name_short}}_etl/README.md.tmpl index c3e9fea9c2..d425d343c1 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_etl_pipeline/README.md.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name_short}}_etl/README.md.tmpl @@ -2,17 +2,17 @@ This folder defines all source code for the {{.project_name}} pipeline: -- `explorations`: Ad-hoc notebooks used to explore the data processed by this pipeline. -- `transformations`: All dataset definitions and transformations. -- `utilities` (optional): Utility functions and Python modules used in this pipeline. -- `data_sources` (optional): View definitions describing the source data for this pipeline. +- `explorations/`: Ad-hoc notebooks used to explore the data processed by this pipeline. +- `transformations/`: All dataset definitions and transformations. +- `utilities/` (optional): Utility functions and Python modules used in this pipeline. +- `data_sources/` (optional): View definitions describing the source data for this pipeline. ## Getting Started To get started, go to the `transformations` folder -- most of the relevant source code lives there: * By convention, every dataset under `transformations` is in a separate file. -* Take a look at the sample under "sample_trips_{{.project_name}}.py" to get familiar with the syntax. +* Take a look at the sample under "sample_trips_{{short_date_time}}.py" to get familiar with the syntax. Read more about the syntax at https://docs.databricks.com/dlt/python-ref.html. * Use `Run file` to run and preview a single transformation. * Use `Run pipeline` to run _all_ transformations in the entire pipeline. diff --git a/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name_short}}_etl/__init__.py b/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name_short}}_etl/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name_short}}_etl/explorations/__init__.py.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name_short}}_etl/explorations/__init__.py.tmpl new file mode 100644 index 0000000000..e69de29bb2 diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_etl_pipeline/explorations/sample_exploration.ipynb.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name_short}}_etl/explorations/sample_exploration.ipynb.tmpl similarity index 91% rename from libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_etl_pipeline/explorations/sample_exploration.ipynb.tmpl rename to libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name_short}}_etl/explorations/sample_exploration.ipynb.tmpl index 723574966a..33c5e1f896 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_etl_pipeline/explorations/sample_exploration.ipynb.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name_short}}_etl/explorations/sample_exploration.ipynb.tmpl @@ -37,7 +37,7 @@ "source": [ "# !!! Before performing any data analysis, make sure to run the pipeline to materialize the sample datasets. The tables referenced in this notebook depend on that step.\n", "\n", - "display(spark.sql(\"SELECT * FROM {{default_catalog}}.{{.project_name_short}}_${bundle.target}.sample_trips_{{short_date_time}}\"))" + "display(spark.sql(\"SELECT * FROM {{.default_catalog}}.{{short_name}}.sample_trips_{{short_date_time}}\"))" ] } ], diff --git a/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name_short}}_etl/transformations/__init__.py.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name_short}}_etl/transformations/__init__.py.tmpl new file mode 100644 index 0000000000..e69de29bb2 diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_etl_pipeline/transformations/sample_trips_{{short_date_time}}.py.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name_short}}_etl/transformations/sample_trips_{{short_date_time}}.py.tmpl similarity index 87% rename from libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_etl_pipeline/transformations/sample_trips_{{short_date_time}}.py.tmpl rename to libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name_short}}_etl/transformations/sample_trips_{{short_date_time}}.py.tmpl index 9fd62c2fae..9f6b449f7d 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_etl_pipeline/transformations/sample_trips_{{short_date_time}}.py.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name_short}}_etl/transformations/sample_trips_{{short_date_time}}.py.tmpl @@ -1,6 +1,6 @@ import dlt from pyspark.sql.functions import col -from utilities import utils +from {{.project_name_short}}_etl.utilities import utils # This file defines a sample transformation. diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_etl_pipeline/transformations/sample_zones_{{short_date_time}}.py.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name_short}}_etl/transformations/sample_zones_{{short_date_time}}.py.tmpl similarity index 100% rename from libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name_short}}_etl_pipeline/transformations/sample_zones_{{short_date_time}}.py.tmpl rename to libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name_short}}_etl/transformations/sample_zones_{{short_date_time}}.py.tmpl diff --git a/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name_short}}_etl/utilities/__init__.py.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name_short}}_etl/utilities/__init__.py.tmpl new file mode 100644 index 0000000000..e69de29bb2 diff --git a/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name_short}}_etl/utilities/utils.py.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name_short}}_etl/utilities/utils.py.tmpl new file mode 100644 index 0000000000..f0f4e940f7 --- /dev/null +++ b/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name_short}}_etl/utilities/utils.py.tmpl @@ -0,0 +1,12 @@ +from pyspark.sql.functions import col, when + + +def distance_km(distance_col): + """Convert distance from miles to kilometers.""" + return distance_col * 1.60934 + + +def format_currency(amount_col): + """Format amount as currency.""" + return when(col(amount_col).isNotNull(), + col(amount_col).cast("decimal(10,2)")) diff --git a/libs/template/templates/default-python/template/{{.project_name}}/tests/conftest.py b/libs/template/templates/default-python/template/{{.project_name}}/tests/conftest.py new file mode 100644 index 0000000000..8037a4647c --- /dev/null +++ b/libs/template/templates/default-python/template/{{.project_name}}/tests/conftest.py @@ -0,0 +1,93 @@ +"""This file configures pytest. + +This file is in the root since it can be used for tests in any place in this +project, including tests under resources/. +""" + +import os, sys, pathlib +from contextlib import contextmanager + + +try: + from databricks.connect import DatabricksSession + from databricks.sdk import WorkspaceClient + from pyspark.sql import SparkSession + import pytest + import json + import csv + import os +except ImportError: + raise ImportError("Test dependencies not found.\n\nRun tests using 'uv run pytest'. See http://docs.astral.sh/uv to learn more about uv.") + + +@pytest.fixture() +def spark() -> SparkSession: + """Provide a SparkSession fixture for tests. + + Minimal example: + def test_uses_spark(spark): + df = spark.createDataFrame([(1,)], ["x"]) + assert df.count() == 1 + """ + return DatabricksSession.builder.getOrCreate() + +@pytest.fixture() +def load_fixture(spark: SparkSession): + """Provide a callable to load JSON or CSV from fixtures/ directory. + + Example usage: + + def test_using_fixture(load_fixture): + data = load_fixture("my_data.json") + assert data.count() >= 1 + """ + def _loader(filename: str): + path = pathlib.Path(__file__).parent.parent / "fixtures" / filename + suffix = path.suffix.lower() + if suffix == ".json": + rows = json.loads(path.read_text()) + return spark.createDataFrame(rows) + if suffix == ".csv": + with path.open(newline="") as f: + rows = list(csv.DictReader(f)) + return spark.createDataFrame(rows) + raise ValueError(f"Unsupported fixture type for: {filename}") + return _loader + + +def _enable_fallback_compute(): + """Enable serverless compute if no compute is specified.""" + conf = WorkspaceClient().config + if conf.serverless_compute_id or conf.cluster_id or os.environ.get("SPARK_REMOTE"): + return + + url = "https://docs.databricks.com/dev-tools/databricks-connect/cluster-config" + print("☁️ no compute specified, falling back to serverless compute", file=sys.stderr) + print(f" see {url} for manual configuration", file=sys.stdout) + + os.environ["DATABRICKS_SERVERLESS_COMPUTE_ID"] = "auto" + + +@contextmanager +def _allow_stderr_output(config: pytest.Config): + """Temporarily disable pytest output capture.""" + capman = config.pluginmanager.get_plugin("capturemanager") + if capman: + with capman.global_and_fixture_disabled(): + yield + else: + yield + + +def pytest_configure(config: pytest.Config): + """Configure pytest session.""" + with _allow_stderr_output(config): + _enable_fallback_compute() + + # Initialize Spark session eagerly, so it is available even when + # SparkSession.builder.getOrCreate() is used. For DB Connect 15+, + # we validate version compatibility with the remote cluster. + if hasattr(DatabricksSession.builder, "validateSession"): + DatabricksSession.builder.validateSession().getOrCreate() + else: + DatabricksSession.builder.getOrCreate() diff --git a/libs/template/templates/default-python/template/{{.project_name}}/tests/main_test.py.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/tests/main_test.py.tmpl deleted file mode 100644 index 5163eec788..0000000000 --- a/libs/template/templates/default-python/template/{{.project_name}}/tests/main_test.py.tmpl +++ /dev/null @@ -1,8 +0,0 @@ -from {{.project_name}}.main import find_all_taxis -from databricks.sdk.runtime import spark -from pyspark.sql import DataFrame - - -def test_find_all_taxis(): - taxis = find_all_taxis() - assert taxis.count() > 5 diff --git a/libs/template/templates/default-python/template/{{.project_name}}/tests/sample_taxis_test.py.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/tests/sample_taxis_test.py.tmpl new file mode 100644 index 0000000000..a782015363 --- /dev/null +++ b/libs/template/templates/default-python/template/{{.project_name}}/tests/sample_taxis_test.py.tmpl @@ -0,0 +1,8 @@ +from databricks.sdk.runtime import spark +from pyspark.sql import DataFrame +from shared import taxis + + +def test_find_all_taxis(): + results = taxis.find_all_taxis() + assert results.count() > 5 diff --git a/libs/template/templates/experimental-jobs-as-code/template/__preamble.tmpl b/libs/template/templates/experimental-jobs-as-code/template/__preamble.tmpl index bd284b0252..23effcc3e4 100644 --- a/libs/template/templates/experimental-jobs-as-code/template/__preamble.tmpl +++ b/libs/template/templates/experimental-jobs-as-code/template/__preamble.tmpl @@ -4,7 +4,7 @@ This file only contains template directives; it is skipped for the actual output {{skip "__preamble"}} -{{$notDLT := not (eq .include_dlt "yes")}} +{{$notDLT := not (eq .include_pipeline "yes")}} {{$notNotebook := not (eq .include_notebook "yes")}} {{$notPython := not (eq .include_python "yes")}}