From 442f13c8ad8d06b043cc28b0addc1676f4c88a87 Mon Sep 17 00:00:00 2001 From: Fabian Jakobs Date: Fri, 30 May 2025 11:12:26 +0200 Subject: [PATCH 01/11] Add Lakeflow template --- libs/template/template.go | 8 ++ .../template/__preamble.tmpl | 2 +- libs/template/templates/lakeflow/README.md | 4 + .../lakeflow/databricks_template_schema.json | 66 +++++++++ .../templates/lakeflow/library/variables.tmpl | 37 +++++ .../lakeflow/template/__preamble.tmpl | 21 +++ .../{{.project_name}}/.gitignore.tmpl | 8 ++ .../.vscode/__builtins__.pyi | 3 + .../{{.project_name}}/.vscode/extensions.json | 7 + .../.vscode/settings.json.tmpl | 22 +++ .../template/{{.project_name}}/README.md.tmpl | 41 ++++++ .../{{.project_name}}/databricks.yml.tmpl | 49 +++++++ .../README.md.tmpl | 46 +++++++ .../sample_exploration.ipynb.tmpl | 130 ++++++++++++++++++ ...s_{{template \"table_suffix\" .}}.py.tmpl" | 16 +++ ..._{{template \"table_suffix\" .}}.sql.tmpl" | 9 ++ ...s_{{template \"table_suffix\" .}}.py.tmpl" | 19 +++ ..._{{template \"table_suffix\" .}}.sql.tmpl" | 10 ++ .../utilities/utils.py | 8 ++ ...{template `pipeline_name` .}}.job.yml.tmpl | 19 +++ ...late `pipeline_name` .}}.pipeline.yml.tmpl | 14 ++ 21 files changed, 538 insertions(+), 1 deletion(-) create mode 100644 libs/template/templates/lakeflow/README.md create mode 100644 libs/template/templates/lakeflow/databricks_template_schema.json create mode 100644 libs/template/templates/lakeflow/library/variables.tmpl create mode 100644 libs/template/templates/lakeflow/template/__preamble.tmpl create mode 100644 libs/template/templates/lakeflow/template/{{.project_name}}/.gitignore.tmpl create mode 100644 libs/template/templates/lakeflow/template/{{.project_name}}/.vscode/__builtins__.pyi create mode 100644 libs/template/templates/lakeflow/template/{{.project_name}}/.vscode/extensions.json create mode 100644 libs/template/templates/lakeflow/template/{{.project_name}}/.vscode/settings.json.tmpl create mode 100644 libs/template/templates/lakeflow/template/{{.project_name}}/README.md.tmpl create mode 100644 libs/template/templates/lakeflow/template/{{.project_name}}/databricks.yml.tmpl create mode 100644 libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/README.md.tmpl create mode 100644 libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/explorations/sample_exploration.ipynb.tmpl create mode 100644 "libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/transformations/sample_trips_{{template \"table_suffix\" .}}.py.tmpl" create mode 100644 "libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/transformations/sample_trips_{{template \"table_suffix\" .}}.sql.tmpl" create mode 100644 "libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/transformations/sample_zones_{{template \"table_suffix\" .}}.py.tmpl" create mode 100644 "libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/transformations/sample_zones_{{template \"table_suffix\" .}}.sql.tmpl" create mode 100644 libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/utilities/utils.py create mode 100644 libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/{{template `pipeline_name` .}}.job.yml.tmpl create mode 100644 libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/{{template `pipeline_name` .}}.pipeline.yml.tmpl diff --git a/libs/template/template.go b/libs/template/template.go index 423770a54e..a6febe5749 100644 --- a/libs/template/template.go +++ b/libs/template/template.go @@ -26,6 +26,7 @@ type TemplateName string const ( DefaultPython TemplateName = "default-python" DefaultSql TemplateName = "default-sql" + Lakeflow TemplateName = "lakeflow" DbtSql TemplateName = "dbt-sql" MlopsStacks TemplateName = "mlops-stacks" DefaultPydabs TemplateName = "default-pydabs" @@ -46,6 +47,13 @@ var databricksTemplates = []Template{ Reader: &builtinReader{name: string(DefaultSql)}, Writer: &writerWithFullTelemetry{defaultWriter: defaultWriter{name: DefaultSql}}, }, + { + name: Lakeflow, + hidden: true, + description: "The Lakeflow template for Databricks Asset Bundles", + Reader: &builtinReader{name: string(Lakeflow)}, + Writer: &writerWithFullTelemetry{defaultWriter: defaultWriter{name: Lakeflow}}, + }, { name: DbtSql, description: "The dbt SQL template (databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks)", diff --git a/libs/template/templates/experimental-jobs-as-code/template/__preamble.tmpl b/libs/template/templates/experimental-jobs-as-code/template/__preamble.tmpl index 708a299385..bd284b0252 100644 --- a/libs/template/templates/experimental-jobs-as-code/template/__preamble.tmpl +++ b/libs/template/templates/experimental-jobs-as-code/template/__preamble.tmpl @@ -1,6 +1,6 @@ # Preamble -This file only template directives; it is skipped for the actual output. +This file only contains template directives; it is skipped for the actual output. {{skip "__preamble"}} diff --git a/libs/template/templates/lakeflow/README.md b/libs/template/templates/lakeflow/README.md new file mode 100644 index 0000000000..b5dcd0d01a --- /dev/null +++ b/libs/template/templates/lakeflow/README.md @@ -0,0 +1,4 @@ +# Lakeflow template + +This template introduces a new structure for organizing data-engineering +assets in DABs. diff --git a/libs/template/templates/lakeflow/databricks_template_schema.json b/libs/template/templates/lakeflow/databricks_template_schema.json new file mode 100644 index 0000000000..ab6d6d21e9 --- /dev/null +++ b/libs/template/templates/lakeflow/databricks_template_schema.json @@ -0,0 +1,66 @@ +{ + "welcome_message": "\nWelcome to the Lakeflow template for Databricks Asset Bundles!", + "properties": { + "project_name": { + "type": "string", + "default": "my_project", + "description": "Please provide the following details to tailor the template to your preferences.\n\nUnique name for this project\nproject_name", + "order": 1, + "pattern": "^[a-z0-9_]+$", + "pattern_match_failure_message": "Name must consist of lower case letters, numbers, and underscores." + }, + "default_catalog": { + "type": "string", + "default": "{{default_catalog}}", + "pattern": "^\\w*$", + "pattern_match_failure_message": "Invalid catalog name.", + "description": "\nPlease provide an initial catalog.\ndefault_catalog", + "order": 3 + }, + "personal_schemas": { + "type": "string", + "description": "\nWould you like to use a personal schema for each user working on this project? (e.g., 'catalog.{{short_name}}')\npersonal_schemas", + "enum": [ + "yes, use a schema based on the current user name during development", + "no, use a shared schema during development" + ], + "order": 4 + }, + "shared_schema": { + "skip_prompt_if": { + "properties": { + "personal_schemas": { + "const": "yes, use a schema based on the current user name during development" + } + } + }, + "type": "string", + "default": "default", + "pattern": "^\\w+$", + "pattern_match_failure_message": "Invalid schema name.", + "description": "\nPlease provide an initial schema during development.\ndefault_schema", + "order": 5 + }, + "language": { + "type": "string", + "default": "python", + "description": "Please select the language for this project.\nlanguage", + "enum": [ + "python", + "sql" + ], + "order": 6 + }, + "include_job": { + "type": "string", + "description": "\nWould you like to include a job that automatically triggers this pipeline?\nThis trigger will only be enabled for production deployments.\ninclude_job", + "order": 7, + "enum": [ + "yes", + "no" + ], + "default": "no" + } + }, + "success_message": "\n\nYour new project has been created in the '{{.project_name}}' directory!\n\nRefer to the README.md file for \"getting started\" instructions!" +} diff --git a/libs/template/templates/lakeflow/library/variables.tmpl b/libs/template/templates/lakeflow/library/variables.tmpl new file mode 100644 index 0000000000..dd92f2efba --- /dev/null +++ b/libs/template/templates/lakeflow/library/variables.tmpl @@ -0,0 +1,37 @@ +{{- define `table_suffix` -}} + {{ (regexp `^_+|_+$`).ReplaceAllString ((regexp `_+`).ReplaceAllString .project_name `_`) `` }} +{{- end }} + +{{- define `pipeline_name` -}} + {{template `table_suffix` .}}_pipeline +{{- end }} + +{{- define `job_name` -}} + {{template `table_suffix` .}}_job +{{- end }} + +{{- define `static_dev_schema` -}} + {{- if (regexp "^yes").MatchString .personal_schemas -}} + {{ short_name }} + {{- else -}} + {{ .shared_schema }} + {{- end}} +{{- end }} + + +{{- define `dev_schema` -}} + {{- if (regexp "^yes").MatchString .personal_schemas -}} + ${workspace.current_user.short_name} + {{- else -}} + {{ .shared_schema }} + {{- end}} +{{- end }} + + +{{- define `prod_schema` -}} + {{- if (regexp "^yes").MatchString .personal_schemas -}} + default + {{- else -}} + {{ .shared_schema }} + {{- end}} +{{- end }} diff --git a/libs/template/templates/lakeflow/template/__preamble.tmpl b/libs/template/templates/lakeflow/template/__preamble.tmpl new file mode 100644 index 0000000000..12cc3b2be0 --- /dev/null +++ b/libs/template/templates/lakeflow/template/__preamble.tmpl @@ -0,0 +1,21 @@ +# Preamble + +This file only contains template directives; it is skipped for the actual output. + +{{skip "__preamble"}} + +{{$isSQL := eq .language "sql"}} +{{$skipJob := eq .include_job "no"}} + +{{if $isSQL}} + {{skip "{{.project_name}}/resources/{{template `pipeline_name` .}}/utilities/utils.py"}} + {{skip "{{.project_name}}/resources/{{template `pipeline_name` .}}/transformations/sample_zones_{{template `table_suffix` .}}.py"}} + {{skip "{{.project_name}}/resources/{{template `pipeline_name` .}}/transformations/sample_trips_{{template `table_suffix` .}}.py"}} +{{else}} + {{skip "{{.project_name}}/resources/{{template `pipeline_name` .}}/transformations/sample_zones_{{template `table_suffix` .}}.sql"}} + {{skip "{{.project_name}}/resources/{{template `pipeline_name` .}}/transformations/sample_trips_{{template `table_suffix` .}}.sql"}} +{{end}} + +{{if $skipJob}} + {{skip "{{.project_name}}/resources/{{template `pipeline_name` .}}/{{template `pipeline_name` .}}.job.yml.tmpl"}} +{{end}} diff --git a/libs/template/templates/lakeflow/template/{{.project_name}}/.gitignore.tmpl b/libs/template/templates/lakeflow/template/{{.project_name}}/.gitignore.tmpl new file mode 100644 index 0000000000..f6a3b5ff93 --- /dev/null +++ b/libs/template/templates/lakeflow/template/{{.project_name}}/.gitignore.tmpl @@ -0,0 +1,8 @@ +.databricks/ +build/ +dist/ +__pycache__/ +*.egg-info +.venv/ +**/explorations/** +**/!explorations/README.md diff --git a/libs/template/templates/lakeflow/template/{{.project_name}}/.vscode/__builtins__.pyi b/libs/template/templates/lakeflow/template/{{.project_name}}/.vscode/__builtins__.pyi new file mode 100644 index 0000000000..0edd5181bc --- /dev/null +++ b/libs/template/templates/lakeflow/template/{{.project_name}}/.vscode/__builtins__.pyi @@ -0,0 +1,3 @@ +# Typings for Pylance in Visual Studio Code +# see https://github.com/microsoft/pyright/blob/main/docs/builtins.md +from databricks.sdk.runtime import * diff --git a/libs/template/templates/lakeflow/template/{{.project_name}}/.vscode/extensions.json b/libs/template/templates/lakeflow/template/{{.project_name}}/.vscode/extensions.json new file mode 100644 index 0000000000..5d15eba363 --- /dev/null +++ b/libs/template/templates/lakeflow/template/{{.project_name}}/.vscode/extensions.json @@ -0,0 +1,7 @@ +{ + "recommendations": [ + "databricks.databricks", + "ms-python.vscode-pylance", + "redhat.vscode-yaml" + ] +} diff --git a/libs/template/templates/lakeflow/template/{{.project_name}}/.vscode/settings.json.tmpl b/libs/template/templates/lakeflow/template/{{.project_name}}/.vscode/settings.json.tmpl new file mode 100644 index 0000000000..2f753e89e8 --- /dev/null +++ b/libs/template/templates/lakeflow/template/{{.project_name}}/.vscode/settings.json.tmpl @@ -0,0 +1,22 @@ +{ + "python.analysis.stubPath": ".vscode", + "databricks.python.envFile": "${workspaceFolder}/.env", + "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])", + "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------", + "python.testing.pytestArgs": [ + "." + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, + {{- /* Unfortunately extraPaths doesn't support globs!! See: https://github.com/microsoft/pylance-release/issues/973 */}} + "python.analysis.extraPaths": ["assets/etl_pipeline"], + "files.exclude": { + "**/*.egg-info": true, + "**/__pycache__": true, + ".pytest_cache": true, + }, + "[python]": { + "editor.defaultFormatter": "ms-python.black-formatter", + "editor.formatOnSave": true, + }, +} diff --git a/libs/template/templates/lakeflow/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/lakeflow/template/{{.project_name}}/README.md.tmpl new file mode 100644 index 0000000000..5754577d50 --- /dev/null +++ b/libs/template/templates/lakeflow/template/{{.project_name}}/README.md.tmpl @@ -0,0 +1,41 @@ +# {{.project_name}} + +The '{{.project_name}}' project was generated by using the Lakeflow template. + +## Setup + +1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html + +2. Authenticate to your Databricks workspace, if you have not done so already: + ``` + $ databricks auth login + ``` + +3. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from + https://docs.databricks.com/dev-tools/vscode-ext.html. Or the PyCharm plugin from + https://www.databricks.com/blog/announcing-pycharm-integration-databricks. + + +## Deploying resources + +1. To deploy a development copy of this project, type: + ``` + $ databricks bundle deploy --target dev + ``` + (Note that "dev" is the default target, so the `--target` parameter + is optional here.) + +2. Similarly, to deploy a production copy, type: + ``` + $ databricks bundle deploy --target prod + ``` + +3. Use the "summary" comand to review everything that was deployed: + ``` + $ databricks bundle summary + ``` + +4. To run a job or pipeline, use the "run" command: + ``` + $ databricks bundle run + ``` diff --git a/libs/template/templates/lakeflow/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/lakeflow/template/{{.project_name}}/databricks.yml.tmpl new file mode 100644 index 0000000000..bcb5b765af --- /dev/null +++ b/libs/template/templates/lakeflow/template/{{.project_name}}/databricks.yml.tmpl @@ -0,0 +1,49 @@ +# This is a Databricks asset bundle definition for {{.project_name}}. +# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. +bundle: + name: {{.project_name}} + uuid: {{bundle_uuid}} + +include: + - resources/*.yml + - resources/*/*.yml + +# Variable declarations. These variables are assigned in the dev/prod targets below. +variables: + catalog: + description: The catalog to use + schema: + description: The schema to use + notifications: + description: The email addresses to use for failure notifications + +targets: + dev: + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. + mode: development + default: true + workspace: + host: {{workspace_host}} + variables: + catalog: {{.default_catalog}} + schema: {{template `dev_schema` .}} + notifications: [] + + prod: + mode: production + workspace: + host: {{workspace_host}} + # We explicitly specify /Workspace/Users/{{user_name}} to make sure we only have a single copy. + root_path: /Workspace/Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target} + permissions: + - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} + level: CAN_MANAGE + run_as: + {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} + variables: + catalog: {{.default_catalog}} + schema: {{template `prod_schema` .}} + notifications: [{{user_name}}] diff --git a/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/README.md.tmpl b/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/README.md.tmpl new file mode 100644 index 0000000000..8c1e37aba3 --- /dev/null +++ b/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/README.md.tmpl @@ -0,0 +1,46 @@ +{{- if (eq .language "python") -}} +# {{template `pipeline_name` .}} + +This folder defines all source code for the {{template `pipeline_name` .}} pipeline: + +- `explorations`: Ad-hoc notebooks used to explore the data processed by this pipeline. +- `transformations`: All dataset definitions and transformations. +- `utilities`: Utility functions and Python modules used in this pipeline. +- `data_sources` (optional): View definitions describing the source data for this pipeline. + +## Getting Started + +To get started, go to the `transformations` folder -- most of the relevant source code lives there: + +* By convention, every dataset under `transformations` is in a separate file. +* Take a look at the sample under "sample_trips_{{template `table_suffix` .}}.py" to get familiar with the syntax. + Read more about the syntax at https://docs.databricks.com/dlt/python-ref.html. +* Use `Run file` to run and preview a single transformation. +* Use `Run pipeline` to run _all_ transformations in the entire pipeline. +* Use `+ Add` in the file browser to add a new data set definition. +* Use `Schedule` to run the pipeline on a schedule! + +For more tutorials and reference material, see https://docs.databricks.com/dlt. +{{- else -}} +# {{template `pipeline_name` .}} + +This folder defines all source code for the '{{template `pipeline_name` .}}' pipeline: + +- \`explorations\`: Ad-hoc notebooks used to explore the data processed by this pipeline. +- \`transformations\`: All dataset definitions and transformations. +- \`data_sources\` (optional): View definitions describing the source data for this pipeline. + +## Getting Started + +To get started, go to the \`transformations\` folder -- most of the relevant source code lives there: + +* By convention, every dataset under \`transformations\` is in a separate file. +* Take a look at the sample under "sample_trips_{{template `table_suffix` .}}.sql" to get familiar with the syntax. + Read more about the syntax at https://docs.databricks.com/dlt/sql-ref.html. +* Use \`Run file\` to run and preview a single transformation. +* Use \`Run pipeline\` to run _all_ transformations in the entire pipeline. +* Use \`+ Add\` in the file browser to add a new data set definition. +* Use \`Schedule\` to run the pipeline on a schedule! + +For more tutorials and reference material, see https://docs.databricks.com/dlt. +{{- end -}} diff --git a/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/explorations/sample_exploration.ipynb.tmpl b/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/explorations/sample_exploration.ipynb.tmpl new file mode 100644 index 0000000000..4f2a7dd623 --- /dev/null +++ b/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/explorations/sample_exploration.ipynb.tmpl @@ -0,0 +1,130 @@ +{{- if (eq .language "python") -}} +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "19a992e9-55e0-49e4-abc7-8c92c420dd5b", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "source": [ + "### Example Exploratory Notebook\n", + "\n", + "Use this notebook to explore the data generated by the pipeline in your preferred programming language.\n", + "\n", + "**Note**: This notebook is not executed as part of the pipeline." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "1b0a82fa-3c6a-4f29-bb43-ded1c4fd77c6", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "# !!! Before performing any data analysis, make sure to run the pipeline to materialize the sample datasets. The tables referenced in this notebook depend on that step.\n", + "\n", + "display(spark.sql(\"SELECT * FROM {{ .default_catalog}}.{{template `static_dev_schema` .}}.{{template `table_suffix` .}}\"))" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "computePreferences": null, + "dashboards": [], + "environmentMetadata": null, + "inputWidgetPreferences": null, + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "sample_exploration", + "widgets": {} + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} +{{- else -}} +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "3bd3cbb1-1518-4d0a-a8d1-f08da3f8840b", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "source": [ + "### Example Exploratory Notebook\n", + "\n", + "Use this notebook to explore the data generated by the pipeline in your preferred programming language.\n", + "\n", + "**Note**: This notebook is not executed as part of the pipeline." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "d30a8e05-bf7a-47e1-982e-b37e64cd6d43", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "-- !!! Before performing any data analysis, make sure to run the pipeline to materialize the sample datasets. The tables referenced in this notebook depend on that step.\n", + "\n", + "USE CATALOG `{{.default_catalog}}`;\n", + "USE SCHEMA `{{template `static_dev_schema` .}}`;\n", + "\n", + "SELECT * from {{template `table_suffix` .}};" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "computePreferences": null, + "dashboards": [], + "environmentMetadata": null, + "inputWidgetPreferences": null, + "language": "sql", + "notebookMetadata": {}, + "notebookName": "sample_exploration", + "widgets": {} + }, + "language_info": { + "name": "sql" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} +{{- end -}} diff --git "a/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/transformations/sample_trips_{{template \"table_suffix\" .}}.py.tmpl" "b/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/transformations/sample_trips_{{template \"table_suffix\" .}}.py.tmpl" new file mode 100644 index 0000000000..ee0499fcdf --- /dev/null +++ "b/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/transformations/sample_trips_{{template \"table_suffix\" .}}.py.tmpl" @@ -0,0 +1,16 @@ +import dlt +from pyspark.sql.functions import col +from utilities import utils + + +# This file defines a sample transformation. +# Edit the sample below or add new transformations +# using "+ Add" in the file browser. + + +@dlt.table +def sample_trips_{{template `table_suffix` .}}(): + return ( + spark.read.table("samples.nyctaxi.trips") + .withColumn("trip_distance_km", utils.distance_km(col("trip_distance"))) + ) diff --git "a/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/transformations/sample_trips_{{template \"table_suffix\" .}}.sql.tmpl" "b/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/transformations/sample_trips_{{template \"table_suffix\" .}}.sql.tmpl" new file mode 100644 index 0000000000..65d3da532a --- /dev/null +++ "b/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/transformations/sample_trips_{{template \"table_suffix\" .}}.sql.tmpl" @@ -0,0 +1,9 @@ +-- This file defines a sample transformation. +-- Edit the sample below or add new transformations +-- using "+ Add" in the file browser. + +CREATE MATERIALIZED VIEW sample_trips_{{template `table_suffix` .}} AS +SELECT + pickup_zip, + fare_amount +FROM samples.nyctaxi.trips diff --git "a/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/transformations/sample_zones_{{template \"table_suffix\" .}}.py.tmpl" "b/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/transformations/sample_zones_{{template \"table_suffix\" .}}.py.tmpl" new file mode 100644 index 0000000000..3c1ecfe84b --- /dev/null +++ "b/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/transformations/sample_zones_{{template \"table_suffix\" .}}.py.tmpl" @@ -0,0 +1,19 @@ +import dlt +from pyspark.sql.functions import col, sum + + +# This file defines a sample transformation. +# Edit the sample below or add new transformations +# using "+ Add" in the file browser. + + +@dlt.table +def sample_zones_{{template `table_suffix` .}}(): + # Read from the "sample_trips" table, then sum all the fares + return ( + spark.read.table("sample_trips_{{template `table_suffix` .}}") + .groupBy(col("pickup_zip")) + .agg( + sum("fare_amount").alias("total_fare") + ) + ) diff --git "a/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/transformations/sample_zones_{{template \"table_suffix\" .}}.sql.tmpl" "b/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/transformations/sample_zones_{{template \"table_suffix\" .}}.sql.tmpl" new file mode 100644 index 0000000000..9aababca55 --- /dev/null +++ "b/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/transformations/sample_zones_{{template \"table_suffix\" .}}.sql.tmpl" @@ -0,0 +1,10 @@ +-- This file defines a sample transformation. +-- Edit the sample below or add new transformations +-- using "+ Add" in the file browser. + +CREATE MATERIALIZED VIEW sample_zones_{{template `table_suffix` .}} AS +SELECT + pickup_zip, + SUM(fare_amount) AS total_fare +FROM sample_trips_{{template `table_suffix` .}} +GROUP BY pickup_zip diff --git a/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/utilities/utils.py b/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/utilities/utils.py new file mode 100644 index 0000000000..ff039898f0 --- /dev/null +++ b/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/utilities/utils.py @@ -0,0 +1,8 @@ +from pyspark.sql.functions import udf +from pyspark.sql.types import FloatType + + +@udf(returnType=FloatType()) +def distance_km(distance_miles): + """Convert distance from miles to kilometers (1 mile = 1.60934 km).""" + return distance_miles * 1.60934 diff --git a/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/{{template `pipeline_name` .}}.job.yml.tmpl b/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/{{template `pipeline_name` .}}.job.yml.tmpl new file mode 100644 index 0000000000..1e7a7ca780 --- /dev/null +++ b/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/{{template `pipeline_name` .}}.job.yml.tmpl @@ -0,0 +1,19 @@ +# The job that triggers {{template `pipeline_name` .}}. +resources: + jobs: + {{template `job_name` .}}: + name: {{template `job_name` .}} + + trigger: + # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger + periodic: + interval: 1 + unit: DAYS + + email_notifications: + on_failure: ${var.notifications} + + tasks: + - task_key: refresh_pipeline + pipeline_task: + pipeline_id: ${resources.pipelines.{{template `pipeline_name` .}}.id} diff --git a/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/{{template `pipeline_name` .}}.pipeline.yml.tmpl b/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/{{template `pipeline_name` .}}.pipeline.yml.tmpl new file mode 100644 index 0000000000..12ee9cc125 --- /dev/null +++ b/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/{{template `pipeline_name` .}}.pipeline.yml.tmpl @@ -0,0 +1,14 @@ +resources: + pipelines: + {{template `pipeline_name` .}}: + name: {{template `pipeline_name` .}} + serverless: true + continuous: false + channel: "PREVIEW" + photon: true + catalog: ${var.catalog} + schema: ${var.schema} + root_path: "." + libraries: + - glob: + include: transformations/** From 6cb3fc0f0c8054e8a0f329c52cd28a07af402677 Mon Sep 17 00:00:00 2001 From: Fabian Jakobs Date: Fri, 30 May 2025 14:13:58 +0200 Subject: [PATCH 02/11] Can't use quotes in file names when embedding --- .../lakeflow/databricks_template_schema.json | 17 +++++++++-------- .../README.md.tmpl | 0 .../explorations/sample_exploration.ipynb.tmpl | 0 .../sample_trips_{{.project_name}}.py.tmpl | 0 .../sample_trips_{{.project_name}}.sql.tmpl | 0 .../sample_zones_{{.project_name}}.py.tmpl | 0 .../sample_zones_{{.project_name}}.sql.tmpl | 0 .../utilities/utils.py | 0 .../{{.project_name}}.job.yml.tmpl} | 0 .../{{.project_name}}.pipeline.yml.tmpl} | 0 10 files changed, 9 insertions(+), 8 deletions(-) rename libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{{template `pipeline_name` .}} => {{.project_name}}_pipeline}/README.md.tmpl (100%) rename libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{{template `pipeline_name` .}} => {{.project_name}}_pipeline}/explorations/sample_exploration.ipynb.tmpl (100%) rename "libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/transformations/sample_trips_{{template \"table_suffix\" .}}.py.tmpl" => libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.py.tmpl (100%) rename "libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/transformations/sample_trips_{{template \"table_suffix\" .}}.sql.tmpl" => libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.sql.tmpl (100%) rename "libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/transformations/sample_zones_{{template \"table_suffix\" .}}.py.tmpl" => libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.py.tmpl (100%) rename "libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/transformations/sample_zones_{{template \"table_suffix\" .}}.sql.tmpl" => libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.sql.tmpl (100%) rename libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{{template `pipeline_name` .}} => {{.project_name}}_pipeline}/utilities/utils.py (100%) rename libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{{template `pipeline_name` .}}/{{template `pipeline_name` .}}.job.yml.tmpl => {{.project_name}}_pipeline/{{.project_name}}.job.yml.tmpl} (100%) rename libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{{template `pipeline_name` .}}/{{template `pipeline_name` .}}.pipeline.yml.tmpl => {{.project_name}}_pipeline/{{.project_name}}.pipeline.yml.tmpl} (100%) diff --git a/libs/template/templates/lakeflow/databricks_template_schema.json b/libs/template/templates/lakeflow/databricks_template_schema.json index ab6d6d21e9..0a98395dcd 100644 --- a/libs/template/templates/lakeflow/databricks_template_schema.json +++ b/libs/template/templates/lakeflow/databricks_template_schema.json @@ -14,15 +14,16 @@ "default": "{{default_catalog}}", "pattern": "^\\w*$", "pattern_match_failure_message": "Invalid catalog name.", - "description": "\nPlease provide an initial catalog.\ndefault_catalog", + "description": "\nInitial catalog.\ndefault_catalog", "order": 3 }, "personal_schemas": { "type": "string", - "description": "\nWould you like to use a personal schema for each user working on this project? (e.g., 'catalog.{{short_name}}')\npersonal_schemas", + "description": "\nUse a personal schema for each user working on this project? (e.g., 'catalog.{{short_name}}')\npersonal_schemas", + "default": "yes", "enum": [ - "yes, use a schema based on the current user name during development", - "no, use a shared schema during development" + "yes", + "no" ], "order": 4 }, @@ -30,7 +31,7 @@ "skip_prompt_if": { "properties": { "personal_schemas": { - "const": "yes, use a schema based on the current user name during development" + "const": "yes" } } }, @@ -38,13 +39,13 @@ "default": "default", "pattern": "^\\w+$", "pattern_match_failure_message": "Invalid schema name.", - "description": "\nPlease provide an initial schema during development.\ndefault_schema", + "description": "\nInitial schema during development:\ndefault_schema", "order": 5 }, "language": { "type": "string", "default": "python", - "description": "Please select the language for this project.\nlanguage", + "description": "\nLanguage for this project:\nlanguage", "enum": [ "python", "sql" @@ -53,7 +54,7 @@ }, "include_job": { "type": "string", - "description": "\nWould you like to include a job that automatically triggers this pipeline?\nThis trigger will only be enabled for production deployments.\ninclude_job", + "description": "\nInclude a job that automatically triggers this pipeline?\nThis trigger will only be enabled for production deployments.\ninclude_job", "order": 7, "enum": [ "yes", diff --git a/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/README.md.tmpl b/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{.project_name}}_pipeline/README.md.tmpl similarity index 100% rename from libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/README.md.tmpl rename to libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{.project_name}}_pipeline/README.md.tmpl diff --git a/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/explorations/sample_exploration.ipynb.tmpl b/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{.project_name}}_pipeline/explorations/sample_exploration.ipynb.tmpl similarity index 100% rename from libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/explorations/sample_exploration.ipynb.tmpl rename to libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{.project_name}}_pipeline/explorations/sample_exploration.ipynb.tmpl diff --git "a/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/transformations/sample_trips_{{template \"table_suffix\" .}}.py.tmpl" b/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.py.tmpl similarity index 100% rename from "libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/transformations/sample_trips_{{template \"table_suffix\" .}}.py.tmpl" rename to libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.py.tmpl diff --git "a/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/transformations/sample_trips_{{template \"table_suffix\" .}}.sql.tmpl" b/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.sql.tmpl similarity index 100% rename from "libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/transformations/sample_trips_{{template \"table_suffix\" .}}.sql.tmpl" rename to libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.sql.tmpl diff --git "a/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/transformations/sample_zones_{{template \"table_suffix\" .}}.py.tmpl" b/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.py.tmpl similarity index 100% rename from "libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/transformations/sample_zones_{{template \"table_suffix\" .}}.py.tmpl" rename to libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.py.tmpl diff --git "a/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/transformations/sample_zones_{{template \"table_suffix\" .}}.sql.tmpl" b/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.sql.tmpl similarity index 100% rename from "libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/transformations/sample_zones_{{template \"table_suffix\" .}}.sql.tmpl" rename to libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.sql.tmpl diff --git a/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/utilities/utils.py b/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{.project_name}}_pipeline/utilities/utils.py similarity index 100% rename from libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/utilities/utils.py rename to libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{.project_name}}_pipeline/utilities/utils.py diff --git a/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/{{template `pipeline_name` .}}.job.yml.tmpl b/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{.project_name}}_pipeline/{{.project_name}}.job.yml.tmpl similarity index 100% rename from libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/{{template `pipeline_name` .}}.job.yml.tmpl rename to libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{.project_name}}_pipeline/{{.project_name}}.job.yml.tmpl diff --git a/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/{{template `pipeline_name` .}}.pipeline.yml.tmpl b/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{.project_name}}_pipeline/{{.project_name}}.pipeline.yml.tmpl similarity index 100% rename from libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{template `pipeline_name` .}}/{{template `pipeline_name` .}}.pipeline.yml.tmpl rename to libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{.project_name}}_pipeline/{{.project_name}}.pipeline.yml.tmpl From b036f5334a277c118cf41d53047b2a9e64ab8044 Mon Sep 17 00:00:00 2001 From: Fabian Jakobs Date: Fri, 30 May 2025 15:00:33 +0200 Subject: [PATCH 03/11] address PR feedback --- libs/template/template.go | 10 +++++----- libs/template/templates/lakeflow-pipelines/README.md | 3 +++ .../databricks_template_schema.json | 2 +- .../library/variables.tmpl | 0 .../template/__preamble.tmpl | 0 .../template/{{.project_name}}/.gitignore.tmpl | 0 .../{{.project_name}}/.vscode/__builtins__.pyi | 0 .../template/{{.project_name}}/.vscode/extensions.json | 0 .../{{.project_name}}/.vscode/settings.json.tmpl | 0 .../template/{{.project_name}}/README.md.tmpl | 0 .../template/{{.project_name}}/databricks.yml.tmpl | 0 .../{{.project_name}}_pipeline/README.md.tmpl | 0 .../explorations/sample_exploration.ipynb.tmpl | 0 .../sample_trips_{{.project_name}}.py.tmpl | 0 .../sample_trips_{{.project_name}}.sql.tmpl | 0 .../sample_zones_{{.project_name}}.py.tmpl | 0 .../sample_zones_{{.project_name}}.sql.tmpl | 0 .../{{.project_name}}_pipeline/utilities/utils.py | 0 .../{{.project_name}}.job.yml.tmpl | 0 .../{{.project_name}}.pipeline.yml.tmpl | 0 libs/template/templates/lakeflow/README.md | 4 ---- 21 files changed, 9 insertions(+), 10 deletions(-) create mode 100644 libs/template/templates/lakeflow-pipelines/README.md rename libs/template/templates/{lakeflow => lakeflow-pipelines}/databricks_template_schema.json (96%) rename libs/template/templates/{lakeflow => lakeflow-pipelines}/library/variables.tmpl (100%) rename libs/template/templates/{lakeflow => lakeflow-pipelines}/template/__preamble.tmpl (100%) rename libs/template/templates/{lakeflow => lakeflow-pipelines}/template/{{.project_name}}/.gitignore.tmpl (100%) rename libs/template/templates/{lakeflow => lakeflow-pipelines}/template/{{.project_name}}/.vscode/__builtins__.pyi (100%) rename libs/template/templates/{lakeflow => lakeflow-pipelines}/template/{{.project_name}}/.vscode/extensions.json (100%) rename libs/template/templates/{lakeflow => lakeflow-pipelines}/template/{{.project_name}}/.vscode/settings.json.tmpl (100%) rename libs/template/templates/{lakeflow => lakeflow-pipelines}/template/{{.project_name}}/README.md.tmpl (100%) rename libs/template/templates/{lakeflow => lakeflow-pipelines}/template/{{.project_name}}/databricks.yml.tmpl (100%) rename libs/template/templates/{lakeflow => lakeflow-pipelines}/template/{{.project_name}}/resources/{{.project_name}}_pipeline/README.md.tmpl (100%) rename libs/template/templates/{lakeflow => lakeflow-pipelines}/template/{{.project_name}}/resources/{{.project_name}}_pipeline/explorations/sample_exploration.ipynb.tmpl (100%) rename libs/template/templates/{lakeflow => lakeflow-pipelines}/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.py.tmpl (100%) rename libs/template/templates/{lakeflow => lakeflow-pipelines}/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.sql.tmpl (100%) rename libs/template/templates/{lakeflow => lakeflow-pipelines}/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.py.tmpl (100%) rename libs/template/templates/{lakeflow => lakeflow-pipelines}/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.sql.tmpl (100%) rename libs/template/templates/{lakeflow => lakeflow-pipelines}/template/{{.project_name}}/resources/{{.project_name}}_pipeline/utilities/utils.py (100%) rename libs/template/templates/{lakeflow => lakeflow-pipelines}/template/{{.project_name}}/resources/{{.project_name}}_pipeline/{{.project_name}}.job.yml.tmpl (100%) rename libs/template/templates/{lakeflow => lakeflow-pipelines}/template/{{.project_name}}/resources/{{.project_name}}_pipeline/{{.project_name}}.pipeline.yml.tmpl (100%) delete mode 100644 libs/template/templates/lakeflow/README.md diff --git a/libs/template/template.go b/libs/template/template.go index a6febe5749..43569050e2 100644 --- a/libs/template/template.go +++ b/libs/template/template.go @@ -26,7 +26,7 @@ type TemplateName string const ( DefaultPython TemplateName = "default-python" DefaultSql TemplateName = "default-sql" - Lakeflow TemplateName = "lakeflow" + LakeflowPipelines TemplateName = "lakeflow-pipelines" DbtSql TemplateName = "dbt-sql" MlopsStacks TemplateName = "mlops-stacks" DefaultPydabs TemplateName = "default-pydabs" @@ -48,11 +48,11 @@ var databricksTemplates = []Template{ Writer: &writerWithFullTelemetry{defaultWriter: defaultWriter{name: DefaultSql}}, }, { - name: Lakeflow, + name: LakeflowPipelines, hidden: true, - description: "The Lakeflow template for Databricks Asset Bundles", - Reader: &builtinReader{name: string(Lakeflow)}, - Writer: &writerWithFullTelemetry{defaultWriter: defaultWriter{name: Lakeflow}}, + description: "The default template for Lakeflow Declarative Pipelines", + Reader: &builtinReader{name: string(LakeflowPipelines)}, + Writer: &writerWithFullTelemetry{defaultWriter: defaultWriter{name: LakeflowPipelines}}, }, { name: DbtSql, diff --git a/libs/template/templates/lakeflow-pipelines/README.md b/libs/template/templates/lakeflow-pipelines/README.md new file mode 100644 index 0000000000..008a23aecd --- /dev/null +++ b/libs/template/templates/lakeflow-pipelines/README.md @@ -0,0 +1,3 @@ +# Lakeflow Pipelines + +Default template for Lakeflow Declarative Pipelines diff --git a/libs/template/templates/lakeflow/databricks_template_schema.json b/libs/template/templates/lakeflow-pipelines/databricks_template_schema.json similarity index 96% rename from libs/template/templates/lakeflow/databricks_template_schema.json rename to libs/template/templates/lakeflow-pipelines/databricks_template_schema.json index 0a98395dcd..07ba3c040d 100644 --- a/libs/template/templates/lakeflow/databricks_template_schema.json +++ b/libs/template/templates/lakeflow-pipelines/databricks_template_schema.json @@ -1,5 +1,5 @@ { - "welcome_message": "\nWelcome to the Lakeflow template for Databricks Asset Bundles!", + "welcome_message": "\nWelcome to the template for Lakeflow Declarative Pipelines!", "properties": { "project_name": { "type": "string", diff --git a/libs/template/templates/lakeflow/library/variables.tmpl b/libs/template/templates/lakeflow-pipelines/library/variables.tmpl similarity index 100% rename from libs/template/templates/lakeflow/library/variables.tmpl rename to libs/template/templates/lakeflow-pipelines/library/variables.tmpl diff --git a/libs/template/templates/lakeflow/template/__preamble.tmpl b/libs/template/templates/lakeflow-pipelines/template/__preamble.tmpl similarity index 100% rename from libs/template/templates/lakeflow/template/__preamble.tmpl rename to libs/template/templates/lakeflow-pipelines/template/__preamble.tmpl diff --git a/libs/template/templates/lakeflow/template/{{.project_name}}/.gitignore.tmpl b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/.gitignore.tmpl similarity index 100% rename from libs/template/templates/lakeflow/template/{{.project_name}}/.gitignore.tmpl rename to libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/.gitignore.tmpl diff --git a/libs/template/templates/lakeflow/template/{{.project_name}}/.vscode/__builtins__.pyi b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/.vscode/__builtins__.pyi similarity index 100% rename from libs/template/templates/lakeflow/template/{{.project_name}}/.vscode/__builtins__.pyi rename to libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/.vscode/__builtins__.pyi diff --git a/libs/template/templates/lakeflow/template/{{.project_name}}/.vscode/extensions.json b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/.vscode/extensions.json similarity index 100% rename from libs/template/templates/lakeflow/template/{{.project_name}}/.vscode/extensions.json rename to libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/.vscode/extensions.json diff --git a/libs/template/templates/lakeflow/template/{{.project_name}}/.vscode/settings.json.tmpl b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/.vscode/settings.json.tmpl similarity index 100% rename from libs/template/templates/lakeflow/template/{{.project_name}}/.vscode/settings.json.tmpl rename to libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/.vscode/settings.json.tmpl diff --git a/libs/template/templates/lakeflow/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/README.md.tmpl similarity index 100% rename from libs/template/templates/lakeflow/template/{{.project_name}}/README.md.tmpl rename to libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/README.md.tmpl diff --git a/libs/template/templates/lakeflow/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/databricks.yml.tmpl similarity index 100% rename from libs/template/templates/lakeflow/template/{{.project_name}}/databricks.yml.tmpl rename to libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/databricks.yml.tmpl diff --git a/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{.project_name}}_pipeline/README.md.tmpl b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/README.md.tmpl similarity index 100% rename from libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{.project_name}}_pipeline/README.md.tmpl rename to libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/README.md.tmpl diff --git a/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{.project_name}}_pipeline/explorations/sample_exploration.ipynb.tmpl b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/explorations/sample_exploration.ipynb.tmpl similarity index 100% rename from libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{.project_name}}_pipeline/explorations/sample_exploration.ipynb.tmpl rename to libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/explorations/sample_exploration.ipynb.tmpl diff --git a/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.py.tmpl b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.py.tmpl similarity index 100% rename from libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.py.tmpl rename to libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.py.tmpl diff --git a/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.sql.tmpl b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.sql.tmpl similarity index 100% rename from libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.sql.tmpl rename to libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.sql.tmpl diff --git a/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.py.tmpl b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.py.tmpl similarity index 100% rename from libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.py.tmpl rename to libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.py.tmpl diff --git a/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.sql.tmpl b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.sql.tmpl similarity index 100% rename from libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.sql.tmpl rename to libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.sql.tmpl diff --git a/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{.project_name}}_pipeline/utilities/utils.py b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/utilities/utils.py similarity index 100% rename from libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{.project_name}}_pipeline/utilities/utils.py rename to libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/utilities/utils.py diff --git a/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{.project_name}}_pipeline/{{.project_name}}.job.yml.tmpl b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/{{.project_name}}.job.yml.tmpl similarity index 100% rename from libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{.project_name}}_pipeline/{{.project_name}}.job.yml.tmpl rename to libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/{{.project_name}}.job.yml.tmpl diff --git a/libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{.project_name}}_pipeline/{{.project_name}}.pipeline.yml.tmpl b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/{{.project_name}}.pipeline.yml.tmpl similarity index 100% rename from libs/template/templates/lakeflow/template/{{.project_name}}/resources/{{.project_name}}_pipeline/{{.project_name}}.pipeline.yml.tmpl rename to libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/{{.project_name}}.pipeline.yml.tmpl diff --git a/libs/template/templates/lakeflow/README.md b/libs/template/templates/lakeflow/README.md deleted file mode 100644 index b5dcd0d01a..0000000000 --- a/libs/template/templates/lakeflow/README.md +++ /dev/null @@ -1,4 +0,0 @@ -# Lakeflow template - -This template introduces a new structure for organizing data-engineering -assets in DABs. From 2bcd81606a9ab704ba46e32fdb8b1b5294fd433b Mon Sep 17 00:00:00 2001 From: Fabian Jakobs Date: Fri, 30 May 2025 15:02:29 +0200 Subject: [PATCH 04/11] always generate jobs --- .../lakeflow-pipelines/databricks_template_schema.json | 10 ---------- .../lakeflow-pipelines/template/__preamble.tmpl | 5 ----- 2 files changed, 15 deletions(-) diff --git a/libs/template/templates/lakeflow-pipelines/databricks_template_schema.json b/libs/template/templates/lakeflow-pipelines/databricks_template_schema.json index 07ba3c040d..a184908409 100644 --- a/libs/template/templates/lakeflow-pipelines/databricks_template_schema.json +++ b/libs/template/templates/lakeflow-pipelines/databricks_template_schema.json @@ -51,16 +51,6 @@ "sql" ], "order": 6 - }, - "include_job": { - "type": "string", - "description": "\nInclude a job that automatically triggers this pipeline?\nThis trigger will only be enabled for production deployments.\ninclude_job", - "order": 7, - "enum": [ - "yes", - "no" - ], - "default": "no" } }, "success_message": "\n\nYour new project has been created in the '{{.project_name}}' directory!\n\nRefer to the README.md file for \"getting started\" instructions!" diff --git a/libs/template/templates/lakeflow-pipelines/template/__preamble.tmpl b/libs/template/templates/lakeflow-pipelines/template/__preamble.tmpl index 12cc3b2be0..98f4c999df 100644 --- a/libs/template/templates/lakeflow-pipelines/template/__preamble.tmpl +++ b/libs/template/templates/lakeflow-pipelines/template/__preamble.tmpl @@ -5,7 +5,6 @@ This file only contains template directives; it is skipped for the actual output {{skip "__preamble"}} {{$isSQL := eq .language "sql"}} -{{$skipJob := eq .include_job "no"}} {{if $isSQL}} {{skip "{{.project_name}}/resources/{{template `pipeline_name` .}}/utilities/utils.py"}} @@ -15,7 +14,3 @@ This file only contains template directives; it is skipped for the actual output {{skip "{{.project_name}}/resources/{{template `pipeline_name` .}}/transformations/sample_zones_{{template `table_suffix` .}}.sql"}} {{skip "{{.project_name}}/resources/{{template `pipeline_name` .}}/transformations/sample_trips_{{template `table_suffix` .}}.sql"}} {{end}} - -{{if $skipJob}} - {{skip "{{.project_name}}/resources/{{template `pipeline_name` .}}/{{template `pipeline_name` .}}.job.yml.tmpl"}} -{{end}} From 1ee4ede965edd94cd88b5129537197824efac7c4 Mon Sep 17 00:00:00 2001 From: Fabian Jakobs Date: Fri, 30 May 2025 17:54:59 +0200 Subject: [PATCH 05/11] Add acceptance tests --- .../.vscode/__builtins__.pyi | 3 + .../.vscode/extensions.json | 7 ++ .../.vscode/settings.json | 21 ++++++ .../output/my_lakeflow_pipelines/README.md | 41 ++++++++++++ .../my_lakeflow_pipelines/databricks.yml | 49 ++++++++++++++ .../my_lakeflow_pipelines/out.gitignore | 8 +++ .../my_lakeflow_pipelines_pipeline/README.md | 22 +++++++ .../explorations/sample_exploration.ipynb | 63 ++++++++++++++++++ .../my_lakeflow_pipelines.job.yml | 19 ++++++ .../my_lakeflow_pipelines.pipeline.yml | 14 ++++ .../sample_trips_my_lakeflow_pipelines.py | 16 +++++ .../sample_zones_my_lakeflow_pipelines.py | 19 ++++++ .../utilities/utils.py | 8 +++ .../lakeflow-pipelines/sql/input.json | 6 ++ .../lakeflow-pipelines/sql/output.txt | 29 +++++++++ .../.vscode/__builtins__.pyi | 3 + .../.vscode/extensions.json | 7 ++ .../.vscode/settings.json | 21 ++++++ .../output/my_lakeflow_pipelines/README.md | 41 ++++++++++++ .../my_lakeflow_pipelines/databricks.yml | 49 ++++++++++++++ .../my_lakeflow_pipelines/out.gitignore | 8 +++ .../my_lakeflow_pipelines_pipeline/README.md | 21 ++++++ .../explorations/sample_exploration.ipynb | 64 +++++++++++++++++++ .../my_lakeflow_pipelines.job.yml | 19 ++++++ .../my_lakeflow_pipelines.pipeline.yml | 14 ++++ .../sample_trips_my_lakeflow_pipelines.sql | 9 +++ .../sample_zones_my_lakeflow_pipelines.sql | 10 +++ .../templates/lakeflow-pipelines/sql/script | 9 +++ 28 files changed, 600 insertions(+) create mode 100644 acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/.vscode/__builtins__.pyi create mode 100644 acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/.vscode/extensions.json create mode 100644 acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/.vscode/settings.json create mode 100644 acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/README.md create mode 100644 acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/databricks.yml create mode 100644 acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/out.gitignore create mode 100644 acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/README.md create mode 100644 acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/explorations/sample_exploration.ipynb create mode 100644 acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/my_lakeflow_pipelines.job.yml create mode 100644 acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/my_lakeflow_pipelines.pipeline.yml create mode 100644 acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/transformations/sample_trips_my_lakeflow_pipelines.py create mode 100644 acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/transformations/sample_zones_my_lakeflow_pipelines.py create mode 100644 acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/utilities/utils.py create mode 100644 acceptance/bundle/templates/lakeflow-pipelines/sql/input.json create mode 100644 acceptance/bundle/templates/lakeflow-pipelines/sql/output.txt create mode 100644 acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/.vscode/__builtins__.pyi create mode 100644 acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/.vscode/extensions.json create mode 100644 acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/.vscode/settings.json create mode 100644 acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/README.md create mode 100644 acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/databricks.yml create mode 100644 acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/out.gitignore create mode 100644 acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/README.md create mode 100644 acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/explorations/sample_exploration.ipynb create mode 100644 acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/my_lakeflow_pipelines.job.yml create mode 100644 acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/my_lakeflow_pipelines.pipeline.yml create mode 100644 acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/transformations/sample_trips_my_lakeflow_pipelines.sql create mode 100644 acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/transformations/sample_zones_my_lakeflow_pipelines.sql create mode 100755 acceptance/bundle/templates/lakeflow-pipelines/sql/script diff --git a/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/.vscode/__builtins__.pyi b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/.vscode/__builtins__.pyi new file mode 100644 index 0000000000..0edd5181bc --- /dev/null +++ b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/.vscode/__builtins__.pyi @@ -0,0 +1,3 @@ +# Typings for Pylance in Visual Studio Code +# see https://github.com/microsoft/pyright/blob/main/docs/builtins.md +from databricks.sdk.runtime import * diff --git a/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/.vscode/extensions.json b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/.vscode/extensions.json new file mode 100644 index 0000000000..5d15eba363 --- /dev/null +++ b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/.vscode/extensions.json @@ -0,0 +1,7 @@ +{ + "recommendations": [ + "databricks.databricks", + "ms-python.vscode-pylance", + "redhat.vscode-yaml" + ] +} diff --git a/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/.vscode/settings.json b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/.vscode/settings.json new file mode 100644 index 0000000000..edff7df998 --- /dev/null +++ b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/.vscode/settings.json @@ -0,0 +1,21 @@ +{ + "python.analysis.stubPath": ".vscode", + "databricks.python.envFile": "${workspaceFolder}/.env", + "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])", + "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------", + "python.testing.pytestArgs": [ + "." + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, + "python.analysis.extraPaths": ["assets/etl_pipeline"], + "files.exclude": { + "**/*.egg-info": true, + "**/__pycache__": true, + ".pytest_cache": true, + }, + "[python]": { + "editor.defaultFormatter": "ms-python.black-formatter", + "editor.formatOnSave": true, + }, +} diff --git a/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/README.md b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/README.md new file mode 100644 index 0000000000..efce1cd997 --- /dev/null +++ b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/README.md @@ -0,0 +1,41 @@ +# my_lakeflow_pipelines + +The 'my_lakeflow_pipelines' project was generated by using the Lakeflow template. + +## Setup + +1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html + +2. Authenticate to your Databricks workspace, if you have not done so already: + ``` + $ databricks auth login + ``` + +3. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from + https://docs.databricks.com/dev-tools/vscode-ext.html. Or the PyCharm plugin from + https://www.databricks.com/blog/announcing-pycharm-integration-databricks. + + +## Deploying resources + +1. To deploy a development copy of this project, type: + ``` + $ databricks bundle deploy --target dev + ``` + (Note that "dev" is the default target, so the `--target` parameter + is optional here.) + +2. Similarly, to deploy a production copy, type: + ``` + $ databricks bundle deploy --target prod + ``` + +3. Use the "summary" comand to review everything that was deployed: + ``` + $ databricks bundle summary + ``` + +4. To run a job or pipeline, use the "run" command: + ``` + $ databricks bundle run + ``` diff --git a/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/databricks.yml b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/databricks.yml new file mode 100644 index 0000000000..a0ec04d24a --- /dev/null +++ b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/databricks.yml @@ -0,0 +1,49 @@ +# This is a Databricks asset bundle definition for my_lakeflow_pipelines. +# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. +bundle: + name: my_lakeflow_pipelines + uuid: [UUID] + +include: + - resources/*.yml + - resources/*/*.yml + +# Variable declarations. These variables are assigned in the dev/prod targets below. +variables: + catalog: + description: The catalog to use + schema: + description: The schema to use + notifications: + description: The email addresses to use for failure notifications + +targets: + dev: + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. + mode: development + default: true + workspace: + host: [DATABRICKS_URL] + variables: + catalog: main + schema: ${workspace.current_user.short_name} + notifications: [] + + prod: + mode: production + workspace: + host: [DATABRICKS_URL] + # We explicitly specify /Workspace/Users/[USERNAME] to make sure we only have a single copy. + root_path: /Workspace/Users/[USERNAME]/.bundle/${bundle.name}/${bundle.target} + permissions: + - user_name: [USERNAME] + level: CAN_MANAGE + run_as: + user_name: [USERNAME] + variables: + catalog: main + schema: default + notifications: [[USERNAME]] diff --git a/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/out.gitignore b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/out.gitignore new file mode 100644 index 0000000000..f6a3b5ff93 --- /dev/null +++ b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/out.gitignore @@ -0,0 +1,8 @@ +.databricks/ +build/ +dist/ +__pycache__/ +*.egg-info +.venv/ +**/explorations/** +**/!explorations/README.md diff --git a/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/README.md b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/README.md new file mode 100644 index 0000000000..8fc187a5af --- /dev/null +++ b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/README.md @@ -0,0 +1,22 @@ +# my_lakeflow_pipelines_pipeline + +This folder defines all source code for the my_lakeflow_pipelines_pipeline pipeline: + +- `explorations`: Ad-hoc notebooks used to explore the data processed by this pipeline. +- `transformations`: All dataset definitions and transformations. +- `utilities`: Utility functions and Python modules used in this pipeline. +- `data_sources` (optional): View definitions describing the source data for this pipeline. + +## Getting Started + +To get started, go to the `transformations` folder -- most of the relevant source code lives there: + +* By convention, every dataset under `transformations` is in a separate file. +* Take a look at the sample under "sample_trips_my_lakeflow_pipelines.py" to get familiar with the syntax. + Read more about the syntax at https://docs.databricks.com/dlt/python-ref.html. +* Use `Run file` to run and preview a single transformation. +* Use `Run pipeline` to run _all_ transformations in the entire pipeline. +* Use `+ Add` in the file browser to add a new data set definition. +* Use `Schedule` to run the pipeline on a schedule! + +For more tutorials and reference material, see https://docs.databricks.com/dlt. \ No newline at end of file diff --git a/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/explorations/sample_exploration.ipynb b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/explorations/sample_exploration.ipynb new file mode 100644 index 0000000000..91bbbeae5a --- /dev/null +++ b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/explorations/sample_exploration.ipynb @@ -0,0 +1,63 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "[UUID]", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "source": [ + "### Example Exploratory Notebook\n", + "\n", + "Use this notebook to explore the data generated by the pipeline in your preferred programming language.\n", + "\n", + "**Note**: This notebook is not executed as part of the pipeline." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "[UUID]", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "# !!! Before performing any data analysis, make sure to run the pipeline to materialize the sample datasets. The tables referenced in this notebook depend on that step.\n", + "\n", + "display(spark.sql(\"SELECT * FROM main.[USERNAME].my_lakeflow_pipelines\"))" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "computePreferences": null, + "dashboards": [], + "environmentMetadata": null, + "inputWidgetPreferences": null, + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "sample_exploration", + "widgets": {} + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/my_lakeflow_pipelines.job.yml b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/my_lakeflow_pipelines.job.yml new file mode 100644 index 0000000000..f07a973780 --- /dev/null +++ b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/my_lakeflow_pipelines.job.yml @@ -0,0 +1,19 @@ +# The job that triggers my_lakeflow_pipelines_pipeline. +resources: + jobs: + my_lakeflow_pipelines_job: + name: my_lakeflow_pipelines_job + + trigger: + # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger + periodic: + interval: 1 + unit: DAYS + + email_notifications: + on_failure: ${var.notifications} + + tasks: + - task_key: refresh_pipeline + pipeline_task: + pipeline_id: ${resources.pipelines.my_lakeflow_pipelines_pipeline.id} diff --git a/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/my_lakeflow_pipelines.pipeline.yml b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/my_lakeflow_pipelines.pipeline.yml new file mode 100644 index 0000000000..678a787540 --- /dev/null +++ b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/my_lakeflow_pipelines.pipeline.yml @@ -0,0 +1,14 @@ +resources: + pipelines: + my_lakeflow_pipelines_pipeline: + name: my_lakeflow_pipelines_pipeline + serverless: true + continuous: false + channel: "PREVIEW" + photon: true + catalog: ${var.catalog} + schema: ${var.schema} + root_path: "." + libraries: + - glob: + include: transformations/** diff --git a/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/transformations/sample_trips_my_lakeflow_pipelines.py b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/transformations/sample_trips_my_lakeflow_pipelines.py new file mode 100644 index 0000000000..804ac6ea25 --- /dev/null +++ b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/transformations/sample_trips_my_lakeflow_pipelines.py @@ -0,0 +1,16 @@ +import dlt +from pyspark.sql.functions import col +from utilities import utils + + +# This file defines a sample transformation. +# Edit the sample below or add new transformations +# using "+ Add" in the file browser. + + +@dlt.table +def sample_trips_my_lakeflow_pipelines(): + return ( + spark.read.table("samples.nyctaxi.trips") + .withColumn("trip_distance_km", utils.distance_km(col("trip_distance"))) + ) diff --git a/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/transformations/sample_zones_my_lakeflow_pipelines.py b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/transformations/sample_zones_my_lakeflow_pipelines.py new file mode 100644 index 0000000000..4c2798d1b1 --- /dev/null +++ b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/transformations/sample_zones_my_lakeflow_pipelines.py @@ -0,0 +1,19 @@ +import dlt +from pyspark.sql.functions import col, sum + + +# This file defines a sample transformation. +# Edit the sample below or add new transformations +# using "+ Add" in the file browser. + + +@dlt.table +def sample_zones_my_lakeflow_pipelines(): + # Read from the "sample_trips" table, then sum all the fares + return ( + spark.read.table("sample_trips_my_lakeflow_pipelines") + .groupBy(col("pickup_zip")) + .agg( + sum("fare_amount").alias("total_fare") + ) + ) diff --git a/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/utilities/utils.py b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/utilities/utils.py new file mode 100644 index 0000000000..ff039898f0 --- /dev/null +++ b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/utilities/utils.py @@ -0,0 +1,8 @@ +from pyspark.sql.functions import udf +from pyspark.sql.types import FloatType + + +@udf(returnType=FloatType()) +def distance_km(distance_miles): + """Convert distance from miles to kilometers (1 mile = 1.60934 km).""" + return distance_miles * 1.60934 diff --git a/acceptance/bundle/templates/lakeflow-pipelines/sql/input.json b/acceptance/bundle/templates/lakeflow-pipelines/sql/input.json new file mode 100644 index 0000000000..36c5cb6995 --- /dev/null +++ b/acceptance/bundle/templates/lakeflow-pipelines/sql/input.json @@ -0,0 +1,6 @@ +{ + "project_name": "my_lakeflow_pipelines", + "default_catalog": "main", + "personal_schemas": "yes", + "language": "sql" +} diff --git a/acceptance/bundle/templates/lakeflow-pipelines/sql/output.txt b/acceptance/bundle/templates/lakeflow-pipelines/sql/output.txt new file mode 100644 index 0000000000..954a2a8409 --- /dev/null +++ b/acceptance/bundle/templates/lakeflow-pipelines/sql/output.txt @@ -0,0 +1,29 @@ + +>>> [CLI] bundle init lakeflow-pipelines --config-file ./input.json --output-dir output + +Welcome to the template for Lakeflow Declarative Pipelines! + + +Your new project has been created in the 'my_lakeflow_pipelines' directory! + +Refer to the README.md file for "getting started" instructions! + +>>> [CLI] bundle validate -t dev +Name: my_lakeflow_pipelines +Target: dev +Workspace: + Host: [DATABRICKS_URL] + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/my_lakeflow_pipelines/dev + +Validation OK! + +>>> [CLI] bundle validate -t prod +Name: my_lakeflow_pipelines +Target: prod +Workspace: + Host: [DATABRICKS_URL] + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/my_lakeflow_pipelines/prod + +Validation OK! diff --git a/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/.vscode/__builtins__.pyi b/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/.vscode/__builtins__.pyi new file mode 100644 index 0000000000..0edd5181bc --- /dev/null +++ b/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/.vscode/__builtins__.pyi @@ -0,0 +1,3 @@ +# Typings for Pylance in Visual Studio Code +# see https://github.com/microsoft/pyright/blob/main/docs/builtins.md +from databricks.sdk.runtime import * diff --git a/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/.vscode/extensions.json b/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/.vscode/extensions.json new file mode 100644 index 0000000000..5d15eba363 --- /dev/null +++ b/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/.vscode/extensions.json @@ -0,0 +1,7 @@ +{ + "recommendations": [ + "databricks.databricks", + "ms-python.vscode-pylance", + "redhat.vscode-yaml" + ] +} diff --git a/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/.vscode/settings.json b/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/.vscode/settings.json new file mode 100644 index 0000000000..edff7df998 --- /dev/null +++ b/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/.vscode/settings.json @@ -0,0 +1,21 @@ +{ + "python.analysis.stubPath": ".vscode", + "databricks.python.envFile": "${workspaceFolder}/.env", + "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])", + "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------", + "python.testing.pytestArgs": [ + "." + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, + "python.analysis.extraPaths": ["assets/etl_pipeline"], + "files.exclude": { + "**/*.egg-info": true, + "**/__pycache__": true, + ".pytest_cache": true, + }, + "[python]": { + "editor.defaultFormatter": "ms-python.black-formatter", + "editor.formatOnSave": true, + }, +} diff --git a/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/README.md b/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/README.md new file mode 100644 index 0000000000..efce1cd997 --- /dev/null +++ b/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/README.md @@ -0,0 +1,41 @@ +# my_lakeflow_pipelines + +The 'my_lakeflow_pipelines' project was generated by using the Lakeflow template. + +## Setup + +1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html + +2. Authenticate to your Databricks workspace, if you have not done so already: + ``` + $ databricks auth login + ``` + +3. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from + https://docs.databricks.com/dev-tools/vscode-ext.html. Or the PyCharm plugin from + https://www.databricks.com/blog/announcing-pycharm-integration-databricks. + + +## Deploying resources + +1. To deploy a development copy of this project, type: + ``` + $ databricks bundle deploy --target dev + ``` + (Note that "dev" is the default target, so the `--target` parameter + is optional here.) + +2. Similarly, to deploy a production copy, type: + ``` + $ databricks bundle deploy --target prod + ``` + +3. Use the "summary" comand to review everything that was deployed: + ``` + $ databricks bundle summary + ``` + +4. To run a job or pipeline, use the "run" command: + ``` + $ databricks bundle run + ``` diff --git a/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/databricks.yml b/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/databricks.yml new file mode 100644 index 0000000000..a0ec04d24a --- /dev/null +++ b/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/databricks.yml @@ -0,0 +1,49 @@ +# This is a Databricks asset bundle definition for my_lakeflow_pipelines. +# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. +bundle: + name: my_lakeflow_pipelines + uuid: [UUID] + +include: + - resources/*.yml + - resources/*/*.yml + +# Variable declarations. These variables are assigned in the dev/prod targets below. +variables: + catalog: + description: The catalog to use + schema: + description: The schema to use + notifications: + description: The email addresses to use for failure notifications + +targets: + dev: + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. + mode: development + default: true + workspace: + host: [DATABRICKS_URL] + variables: + catalog: main + schema: ${workspace.current_user.short_name} + notifications: [] + + prod: + mode: production + workspace: + host: [DATABRICKS_URL] + # We explicitly specify /Workspace/Users/[USERNAME] to make sure we only have a single copy. + root_path: /Workspace/Users/[USERNAME]/.bundle/${bundle.name}/${bundle.target} + permissions: + - user_name: [USERNAME] + level: CAN_MANAGE + run_as: + user_name: [USERNAME] + variables: + catalog: main + schema: default + notifications: [[USERNAME]] diff --git a/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/out.gitignore b/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/out.gitignore new file mode 100644 index 0000000000..f6a3b5ff93 --- /dev/null +++ b/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/out.gitignore @@ -0,0 +1,8 @@ +.databricks/ +build/ +dist/ +__pycache__/ +*.egg-info +.venv/ +**/explorations/** +**/!explorations/README.md diff --git a/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/README.md b/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/README.md new file mode 100644 index 0000000000..f67be11560 --- /dev/null +++ b/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/README.md @@ -0,0 +1,21 @@ +# my_lakeflow_pipelines_pipeline + +This folder defines all source code for the 'my_lakeflow_pipelines_pipeline' pipeline: + +- \`explorations\`: Ad-hoc notebooks used to explore the data processed by this pipeline. +- \`transformations\`: All dataset definitions and transformations. +- \`data_sources\` (optional): View definitions describing the source data for this pipeline. + +## Getting Started + +To get started, go to the \`transformations\` folder -- most of the relevant source code lives there: + +* By convention, every dataset under \`transformations\` is in a separate file. +* Take a look at the sample under "sample_trips_my_lakeflow_pipelines.sql" to get familiar with the syntax. + Read more about the syntax at https://docs.databricks.com/dlt/sql-ref.html. +* Use \`Run file\` to run and preview a single transformation. +* Use \`Run pipeline\` to run _all_ transformations in the entire pipeline. +* Use \`+ Add\` in the file browser to add a new data set definition. +* Use \`Schedule\` to run the pipeline on a schedule! + +For more tutorials and reference material, see https://docs.databricks.com/dlt. \ No newline at end of file diff --git a/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/explorations/sample_exploration.ipynb b/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/explorations/sample_exploration.ipynb new file mode 100644 index 0000000000..b2f2c588d2 --- /dev/null +++ b/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/explorations/sample_exploration.ipynb @@ -0,0 +1,64 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "[UUID]", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "source": [ + "### Example Exploratory Notebook\n", + "\n", + "Use this notebook to explore the data generated by the pipeline in your preferred programming language.\n", + "\n", + "**Note**: This notebook is not executed as part of the pipeline." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "[UUID]", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "-- !!! Before performing any data analysis, make sure to run the pipeline to materialize the sample datasets. The tables referenced in this notebook depend on that step.\n", + "\n", + "USE CATALOG `main`;\n", + "USE SCHEMA `[USERNAME]`;\n", + "\n", + "SELECT * from my_lakeflow_pipelines;" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "computePreferences": null, + "dashboards": [], + "environmentMetadata": null, + "inputWidgetPreferences": null, + "language": "sql", + "notebookMetadata": {}, + "notebookName": "sample_exploration", + "widgets": {} + }, + "language_info": { + "name": "sql" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/my_lakeflow_pipelines.job.yml b/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/my_lakeflow_pipelines.job.yml new file mode 100644 index 0000000000..f07a973780 --- /dev/null +++ b/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/my_lakeflow_pipelines.job.yml @@ -0,0 +1,19 @@ +# The job that triggers my_lakeflow_pipelines_pipeline. +resources: + jobs: + my_lakeflow_pipelines_job: + name: my_lakeflow_pipelines_job + + trigger: + # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger + periodic: + interval: 1 + unit: DAYS + + email_notifications: + on_failure: ${var.notifications} + + tasks: + - task_key: refresh_pipeline + pipeline_task: + pipeline_id: ${resources.pipelines.my_lakeflow_pipelines_pipeline.id} diff --git a/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/my_lakeflow_pipelines.pipeline.yml b/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/my_lakeflow_pipelines.pipeline.yml new file mode 100644 index 0000000000..678a787540 --- /dev/null +++ b/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/my_lakeflow_pipelines.pipeline.yml @@ -0,0 +1,14 @@ +resources: + pipelines: + my_lakeflow_pipelines_pipeline: + name: my_lakeflow_pipelines_pipeline + serverless: true + continuous: false + channel: "PREVIEW" + photon: true + catalog: ${var.catalog} + schema: ${var.schema} + root_path: "." + libraries: + - glob: + include: transformations/** diff --git a/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/transformations/sample_trips_my_lakeflow_pipelines.sql b/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/transformations/sample_trips_my_lakeflow_pipelines.sql new file mode 100644 index 0000000000..116bb5184b --- /dev/null +++ b/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/transformations/sample_trips_my_lakeflow_pipelines.sql @@ -0,0 +1,9 @@ +-- This file defines a sample transformation. +-- Edit the sample below or add new transformations +-- using "+ Add" in the file browser. + +CREATE MATERIALIZED VIEW sample_trips_my_lakeflow_pipelines AS +SELECT + pickup_zip, + fare_amount +FROM samples.nyctaxi.trips diff --git a/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/transformations/sample_zones_my_lakeflow_pipelines.sql b/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/transformations/sample_zones_my_lakeflow_pipelines.sql new file mode 100644 index 0000000000..79cfe04ae5 --- /dev/null +++ b/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/transformations/sample_zones_my_lakeflow_pipelines.sql @@ -0,0 +1,10 @@ +-- This file defines a sample transformation. +-- Edit the sample below or add new transformations +-- using "+ Add" in the file browser. + +CREATE MATERIALIZED VIEW sample_zones_my_lakeflow_pipelines AS +SELECT + pickup_zip, + SUM(fare_amount) AS total_fare +FROM sample_trips_my_lakeflow_pipelines +GROUP BY pickup_zip diff --git a/acceptance/bundle/templates/lakeflow-pipelines/sql/script b/acceptance/bundle/templates/lakeflow-pipelines/sql/script new file mode 100755 index 0000000000..90735aebef --- /dev/null +++ b/acceptance/bundle/templates/lakeflow-pipelines/sql/script @@ -0,0 +1,9 @@ +trace $CLI bundle init lakeflow-pipelines --config-file ./input.json --output-dir output + +cd output/my_lakeflow_pipelines +trace $CLI bundle validate -t dev +trace $CLI bundle validate -t prod + +# Do not affect this repository's git behaviour #2318 +mv .gitignore out.gitignore +rm .databricks/.gitignore From 2959c84be5302cc3c94bd177c584f2ee08617375 Mon Sep 17 00:00:00 2001 From: Fabian Jakobs Date: Fri, 30 May 2025 18:36:09 +0200 Subject: [PATCH 06/11] update acceptance tests --- .../output/my_lakeflow_pipelines/.vscode/settings.json | 2 +- .../sql/output/my_lakeflow_pipelines/README.md | 2 +- .../sql/output/my_lakeflow_pipelines/databricks.yml | 4 +--- .../lakeflow-pipelines/databricks_template_schema.json | 4 ++-- .../lakeflow-pipelines/library/variables.tmpl | 2 +- .../lakeflow-pipelines/template/__preamble.tmpl | 10 +++++----- .../{{.project_name}}/.vscode/settings.json.tmpl | 2 +- .../template/{{.project_name}}/README.md.tmpl | 2 +- .../template/{{.project_name}}/databricks.yml.tmpl | 4 +--- .../{{.project_name}}_pipeline/README.md.tmpl | 10 +++++++--- 10 files changed, 21 insertions(+), 21 deletions(-) diff --git a/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/.vscode/settings.json b/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/.vscode/settings.json index edff7df998..3e76d20bd8 100644 --- a/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/.vscode/settings.json +++ b/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/.vscode/settings.json @@ -8,7 +8,7 @@ ], "python.testing.unittestEnabled": false, "python.testing.pytestEnabled": true, - "python.analysis.extraPaths": ["assets/etl_pipeline"], + "python.analysis.extraPaths": ["resources/my_lakeflow_pipelines_pipeline"], "files.exclude": { "**/*.egg-info": true, "**/__pycache__": true, diff --git a/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/README.md b/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/README.md index efce1cd997..49d493b854 100644 --- a/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/README.md +++ b/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/README.md @@ -1,6 +1,6 @@ # my_lakeflow_pipelines -The 'my_lakeflow_pipelines' project was generated by using the Lakeflow template. +The 'my_lakeflow_pipelines' project was generated by using the Lakeflow Pipelines template. ## Setup diff --git a/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/databricks.yml b/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/databricks.yml index a0ec04d24a..ded4a8470d 100644 --- a/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/databricks.yml +++ b/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/databricks.yml @@ -36,13 +36,11 @@ targets: mode: production workspace: host: [DATABRICKS_URL] - # We explicitly specify /Workspace/Users/[USERNAME] to make sure we only have a single copy. + # We explicitly deploy to /Workspace/Users/[USERNAME] to make sure we only have a single copy. root_path: /Workspace/Users/[USERNAME]/.bundle/${bundle.name}/${bundle.target} permissions: - user_name: [USERNAME] level: CAN_MANAGE - run_as: - user_name: [USERNAME] variables: catalog: main schema: default diff --git a/libs/template/templates/lakeflow-pipelines/databricks_template_schema.json b/libs/template/templates/lakeflow-pipelines/databricks_template_schema.json index a184908409..53841d36f4 100644 --- a/libs/template/templates/lakeflow-pipelines/databricks_template_schema.json +++ b/libs/template/templates/lakeflow-pipelines/databricks_template_schema.json @@ -3,7 +3,7 @@ "properties": { "project_name": { "type": "string", - "default": "my_project", + "default": "my_lakeflow_project", "description": "Please provide the following details to tailor the template to your preferences.\n\nUnique name for this project\nproject_name", "order": 1, "pattern": "^[a-z0-9_]+$", @@ -45,7 +45,7 @@ "language": { "type": "string", "default": "python", - "description": "\nLanguage for this project:\nlanguage", + "description": "\nInitial language for this project:\nlanguage", "enum": [ "python", "sql" diff --git a/libs/template/templates/lakeflow-pipelines/library/variables.tmpl b/libs/template/templates/lakeflow-pipelines/library/variables.tmpl index dd92f2efba..51ead84c1a 100644 --- a/libs/template/templates/lakeflow-pipelines/library/variables.tmpl +++ b/libs/template/templates/lakeflow-pipelines/library/variables.tmpl @@ -1,5 +1,5 @@ {{- define `table_suffix` -}} - {{ (regexp `^_+|_+$`).ReplaceAllString ((regexp `_+`).ReplaceAllString .project_name `_`) `` }} + {{ .project_name }} {{- end }} {{- define `pipeline_name` -}} diff --git a/libs/template/templates/lakeflow-pipelines/template/__preamble.tmpl b/libs/template/templates/lakeflow-pipelines/template/__preamble.tmpl index 98f4c999df..c6c0c2321f 100644 --- a/libs/template/templates/lakeflow-pipelines/template/__preamble.tmpl +++ b/libs/template/templates/lakeflow-pipelines/template/__preamble.tmpl @@ -7,10 +7,10 @@ This file only contains template directives; it is skipped for the actual output {{$isSQL := eq .language "sql"}} {{if $isSQL}} - {{skip "{{.project_name}}/resources/{{template `pipeline_name` .}}/utilities/utils.py"}} - {{skip "{{.project_name}}/resources/{{template `pipeline_name` .}}/transformations/sample_zones_{{template `table_suffix` .}}.py"}} - {{skip "{{.project_name}}/resources/{{template `pipeline_name` .}}/transformations/sample_trips_{{template `table_suffix` .}}.py"}} + {{skip "{{.project_name}}/resources/{{.project_name}}_pipeline/utilities/utils.py"}} + {{skip "{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.py"}} + {{skip "{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.py"}} {{else}} - {{skip "{{.project_name}}/resources/{{template `pipeline_name` .}}/transformations/sample_zones_{{template `table_suffix` .}}.sql"}} - {{skip "{{.project_name}}/resources/{{template `pipeline_name` .}}/transformations/sample_trips_{{template `table_suffix` .}}.sql"}} + {{skip "{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.sql"}} + {{skip "{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.sql"}} {{end}} diff --git a/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/.vscode/settings.json.tmpl b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/.vscode/settings.json.tmpl index 2f753e89e8..6a87715ae2 100644 --- a/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/.vscode/settings.json.tmpl +++ b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/.vscode/settings.json.tmpl @@ -9,7 +9,7 @@ "python.testing.unittestEnabled": false, "python.testing.pytestEnabled": true, {{- /* Unfortunately extraPaths doesn't support globs!! See: https://github.com/microsoft/pylance-release/issues/973 */}} - "python.analysis.extraPaths": ["assets/etl_pipeline"], + "python.analysis.extraPaths": ["resources/{{.project_name}}_pipeline"], "files.exclude": { "**/*.egg-info": true, "**/__pycache__": true, diff --git a/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/README.md.tmpl index 5754577d50..837213a189 100644 --- a/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/README.md.tmpl +++ b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/README.md.tmpl @@ -1,6 +1,6 @@ # {{.project_name}} -The '{{.project_name}}' project was generated by using the Lakeflow template. +The '{{.project_name}}' project was generated by using the Lakeflow Pipelines template. ## Setup diff --git a/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/databricks.yml.tmpl index bcb5b765af..1108b20128 100644 --- a/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/databricks.yml.tmpl +++ b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/databricks.yml.tmpl @@ -36,13 +36,11 @@ targets: mode: production workspace: host: {{workspace_host}} - # We explicitly specify /Workspace/Users/{{user_name}} to make sure we only have a single copy. + # We explicitly deploy to /Workspace/Users/{{user_name}} to make sure we only have a single copy. root_path: /Workspace/Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target} permissions: - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} level: CAN_MANAGE - run_as: - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} variables: catalog: {{.default_catalog}} schema: {{template `prod_schema` .}} diff --git a/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/README.md.tmpl b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/README.md.tmpl index 8c1e37aba3..34a7b2d0fa 100644 --- a/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/README.md.tmpl +++ b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/README.md.tmpl @@ -1,11 +1,12 @@ {{- if (eq .language "python") -}} + # {{template `pipeline_name` .}} This folder defines all source code for the {{template `pipeline_name` .}} pipeline: - `explorations`: Ad-hoc notebooks used to explore the data processed by this pipeline. - `transformations`: All dataset definitions and transformations. -- `utilities`: Utility functions and Python modules used in this pipeline. +- `utilities` (optional): Utility functions and Python modules used in this pipeline. - `data_sources` (optional): View definitions describing the source data for this pipeline. ## Getting Started @@ -13,7 +14,7 @@ This folder defines all source code for the {{template `pipeline_name` .}} pipel To get started, go to the `transformations` folder -- most of the relevant source code lives there: * By convention, every dataset under `transformations` is in a separate file. -* Take a look at the sample under "sample_trips_{{template `table_suffix` .}}.py" to get familiar with the syntax. +* Take a look at the sample under "sample_trips_{{ .project_name }}.py" to get familiar with the syntax. Read more about the syntax at https://docs.databricks.com/dlt/python-ref.html. * Use `Run file` to run and preview a single transformation. * Use `Run pipeline` to run _all_ transformations in the entire pipeline. @@ -21,7 +22,9 @@ To get started, go to the `transformations` folder -- most of the relevant sourc * Use `Schedule` to run the pipeline on a schedule! For more tutorials and reference material, see https://docs.databricks.com/dlt. + {{- else -}} + # {{template `pipeline_name` .}} This folder defines all source code for the '{{template `pipeline_name` .}}' pipeline: @@ -35,7 +38,7 @@ This folder defines all source code for the '{{template `pipeline_name` .}}' pip To get started, go to the \`transformations\` folder -- most of the relevant source code lives there: * By convention, every dataset under \`transformations\` is in a separate file. -* Take a look at the sample under "sample_trips_{{template `table_suffix` .}}.sql" to get familiar with the syntax. +* Take a look at the sample under "sample_trips_{{ .project_name }}.sql" to get familiar with the syntax. Read more about the syntax at https://docs.databricks.com/dlt/sql-ref.html. * Use \`Run file\` to run and preview a single transformation. * Use \`Run pipeline\` to run _all_ transformations in the entire pipeline. @@ -43,4 +46,5 @@ To get started, go to the \`transformations\` folder -- most of the relevant sou * Use \`Schedule\` to run the pipeline on a schedule! For more tutorials and reference material, see https://docs.databricks.com/dlt. + {{- end -}} From 501e864f9fc939cf01c87bd301a69016404ab0ea Mon Sep 17 00:00:00 2001 From: Fabian Jakobs Date: Mon, 2 Jun 2025 10:32:46 +0200 Subject: [PATCH 07/11] format --- .../resources/my_lakeflow_pipelines_pipeline/README.md | 2 +- .../explorations/sample_exploration.ipynb | 2 +- .../transformations/sample_trips_my_lakeflow_pipelines.py | 5 +---- .../transformations/sample_zones_my_lakeflow_pipelines.py | 8 +------- 4 files changed, 4 insertions(+), 13 deletions(-) diff --git a/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/README.md b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/README.md index 8fc187a5af..4e84e76ea9 100644 --- a/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/README.md +++ b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/README.md @@ -19,4 +19,4 @@ To get started, go to the `transformations` folder -- most of the relevant sourc * Use `+ Add` in the file browser to add a new data set definition. * Use `Schedule` to run the pipeline on a schedule! -For more tutorials and reference material, see https://docs.databricks.com/dlt. \ No newline at end of file +For more tutorials and reference material, see https://docs.databricks.com/dlt. diff --git a/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/explorations/sample_exploration.ipynb b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/explorations/sample_exploration.ipynb index 91bbbeae5a..0187c0c95f 100644 --- a/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/explorations/sample_exploration.ipynb +++ b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/explorations/sample_exploration.ipynb @@ -60,4 +60,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} diff --git a/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/transformations/sample_trips_my_lakeflow_pipelines.py b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/transformations/sample_trips_my_lakeflow_pipelines.py index 804ac6ea25..f8355f62ae 100644 --- a/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/transformations/sample_trips_my_lakeflow_pipelines.py +++ b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/transformations/sample_trips_my_lakeflow_pipelines.py @@ -10,7 +10,4 @@ @dlt.table def sample_trips_my_lakeflow_pipelines(): - return ( - spark.read.table("samples.nyctaxi.trips") - .withColumn("trip_distance_km", utils.distance_km(col("trip_distance"))) - ) + return spark.read.table("samples.nyctaxi.trips").withColumn("trip_distance_km", utils.distance_km(col("trip_distance"))) diff --git a/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/transformations/sample_zones_my_lakeflow_pipelines.py b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/transformations/sample_zones_my_lakeflow_pipelines.py index 4c2798d1b1..c9bee817b6 100644 --- a/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/transformations/sample_zones_my_lakeflow_pipelines.py +++ b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/transformations/sample_zones_my_lakeflow_pipelines.py @@ -10,10 +10,4 @@ @dlt.table def sample_zones_my_lakeflow_pipelines(): # Read from the "sample_trips" table, then sum all the fares - return ( - spark.read.table("sample_trips_my_lakeflow_pipelines") - .groupBy(col("pickup_zip")) - .agg( - sum("fare_amount").alias("total_fare") - ) - ) + return spark.read.table("sample_trips_my_lakeflow_pipelines").groupBy(col("pickup_zip")).agg(sum("fare_amount").alias("total_fare")) From 9ba1f489155911417aa331fae09be19df3651c82 Mon Sep 17 00:00:00 2001 From: Fabian Jakobs Date: Mon, 2 Jun 2025 10:46:17 +0200 Subject: [PATCH 08/11] PR feedback --- .../resources/my_lakeflow_pipelines_pipeline/README.md | 2 +- .../explorations/sample_exploration.ipynb | 2 +- .../my_lakeflow_pipelines.pipeline.yml | 2 -- .../resources/{{.project_name}}_pipeline/README.md.tmpl | 3 +-- .../explorations/sample_exploration.ipynb.tmpl | 2 +- .../{{.project_name}}.pipeline.yml.tmpl | 2 -- 6 files changed, 4 insertions(+), 9 deletions(-) diff --git a/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/README.md b/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/README.md index f67be11560..3736bbd95b 100644 --- a/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/README.md +++ b/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/README.md @@ -18,4 +18,4 @@ To get started, go to the \`transformations\` folder -- most of the relevant sou * Use \`+ Add\` in the file browser to add a new data set definition. * Use \`Schedule\` to run the pipeline on a schedule! -For more tutorials and reference material, see https://docs.databricks.com/dlt. \ No newline at end of file +For more tutorials and reference material, see https://docs.databricks.com/dlt. diff --git a/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/explorations/sample_exploration.ipynb b/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/explorations/sample_exploration.ipynb index b2f2c588d2..a3db8fdf08 100644 --- a/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/explorations/sample_exploration.ipynb +++ b/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/explorations/sample_exploration.ipynb @@ -61,4 +61,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} diff --git a/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/my_lakeflow_pipelines.pipeline.yml b/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/my_lakeflow_pipelines.pipeline.yml index 678a787540..499ddad0ca 100644 --- a/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/my_lakeflow_pipelines.pipeline.yml +++ b/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/my_lakeflow_pipelines.pipeline.yml @@ -3,9 +3,7 @@ resources: my_lakeflow_pipelines_pipeline: name: my_lakeflow_pipelines_pipeline serverless: true - continuous: false channel: "PREVIEW" - photon: true catalog: ${var.catalog} schema: ${var.schema} root_path: "." diff --git a/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/README.md.tmpl b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/README.md.tmpl index 34a7b2d0fa..40628458b1 100644 --- a/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/README.md.tmpl +++ b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/README.md.tmpl @@ -46,5 +46,4 @@ To get started, go to the \`transformations\` folder -- most of the relevant sou * Use \`Schedule\` to run the pipeline on a schedule! For more tutorials and reference material, see https://docs.databricks.com/dlt. - -{{- end -}} +{{ end -}} diff --git a/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/explorations/sample_exploration.ipynb.tmpl b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/explorations/sample_exploration.ipynb.tmpl index 4f2a7dd623..9afc20b0a2 100644 --- a/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/explorations/sample_exploration.ipynb.tmpl +++ b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/explorations/sample_exploration.ipynb.tmpl @@ -127,4 +127,4 @@ "nbformat": 4, "nbformat_minor": 0 } -{{- end -}} +{{ end -}} diff --git a/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/{{.project_name}}.pipeline.yml.tmpl b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/{{.project_name}}.pipeline.yml.tmpl index 12ee9cc125..23df081f00 100644 --- a/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/{{.project_name}}.pipeline.yml.tmpl +++ b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/{{.project_name}}.pipeline.yml.tmpl @@ -3,9 +3,7 @@ resources: {{template `pipeline_name` .}}: name: {{template `pipeline_name` .}} serverless: true - continuous: false channel: "PREVIEW" - photon: true catalog: ${var.catalog} schema: ${var.schema} root_path: "." From 8cfd3601b3b1d69bc07c34cdc3d05de9e7f9f7d6 Mon Sep 17 00:00:00 2001 From: Fabian Jakobs Date: Mon, 2 Jun 2025 14:00:37 +0200 Subject: [PATCH 09/11] Fix python acceptance test --- .../lakeflow-pipelines/python/input.json | 6 ++++ .../lakeflow-pipelines/python/output.txt | 29 +++++++++++++++++++ .../.vscode/settings.json | 2 +- .../output/my_lakeflow_pipelines/README.md | 2 +- .../my_lakeflow_pipelines/databricks.yml | 4 +-- .../my_lakeflow_pipelines_pipeline/README.md | 4 +-- .../explorations/sample_exploration.ipynb | 2 +- .../my_lakeflow_pipelines.pipeline.yml | 2 -- .../sample_trips_my_lakeflow_pipelines.py | 5 +++- .../sample_zones_my_lakeflow_pipelines.py | 8 ++++- .../lakeflow-pipelines/python/script | 9 ++++++ .../my_lakeflow_pipelines_pipeline/README.md | 18 ++++++------ .../{{.project_name}}_pipeline/README.md.tmpl | 18 ++++++------ 13 files changed, 79 insertions(+), 30 deletions(-) create mode 100644 acceptance/bundle/templates/lakeflow-pipelines/python/input.json create mode 100644 acceptance/bundle/templates/lakeflow-pipelines/python/output.txt create mode 100755 acceptance/bundle/templates/lakeflow-pipelines/python/script diff --git a/acceptance/bundle/templates/lakeflow-pipelines/python/input.json b/acceptance/bundle/templates/lakeflow-pipelines/python/input.json new file mode 100644 index 0000000000..e3f799e1c7 --- /dev/null +++ b/acceptance/bundle/templates/lakeflow-pipelines/python/input.json @@ -0,0 +1,6 @@ +{ + "project_name": "my_lakeflow_pipelines", + "default_catalog": "main", + "personal_schemas": "yes", + "language": "python" +} diff --git a/acceptance/bundle/templates/lakeflow-pipelines/python/output.txt b/acceptance/bundle/templates/lakeflow-pipelines/python/output.txt new file mode 100644 index 0000000000..954a2a8409 --- /dev/null +++ b/acceptance/bundle/templates/lakeflow-pipelines/python/output.txt @@ -0,0 +1,29 @@ + +>>> [CLI] bundle init lakeflow-pipelines --config-file ./input.json --output-dir output + +Welcome to the template for Lakeflow Declarative Pipelines! + + +Your new project has been created in the 'my_lakeflow_pipelines' directory! + +Refer to the README.md file for "getting started" instructions! + +>>> [CLI] bundle validate -t dev +Name: my_lakeflow_pipelines +Target: dev +Workspace: + Host: [DATABRICKS_URL] + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/my_lakeflow_pipelines/dev + +Validation OK! + +>>> [CLI] bundle validate -t prod +Name: my_lakeflow_pipelines +Target: prod +Workspace: + Host: [DATABRICKS_URL] + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/my_lakeflow_pipelines/prod + +Validation OK! diff --git a/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/.vscode/settings.json b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/.vscode/settings.json index edff7df998..3e76d20bd8 100644 --- a/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/.vscode/settings.json +++ b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/.vscode/settings.json @@ -8,7 +8,7 @@ ], "python.testing.unittestEnabled": false, "python.testing.pytestEnabled": true, - "python.analysis.extraPaths": ["assets/etl_pipeline"], + "python.analysis.extraPaths": ["resources/my_lakeflow_pipelines_pipeline"], "files.exclude": { "**/*.egg-info": true, "**/__pycache__": true, diff --git a/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/README.md b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/README.md index efce1cd997..49d493b854 100644 --- a/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/README.md +++ b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/README.md @@ -1,6 +1,6 @@ # my_lakeflow_pipelines -The 'my_lakeflow_pipelines' project was generated by using the Lakeflow template. +The 'my_lakeflow_pipelines' project was generated by using the Lakeflow Pipelines template. ## Setup diff --git a/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/databricks.yml b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/databricks.yml index a0ec04d24a..ded4a8470d 100644 --- a/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/databricks.yml +++ b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/databricks.yml @@ -36,13 +36,11 @@ targets: mode: production workspace: host: [DATABRICKS_URL] - # We explicitly specify /Workspace/Users/[USERNAME] to make sure we only have a single copy. + # We explicitly deploy to /Workspace/Users/[USERNAME] to make sure we only have a single copy. root_path: /Workspace/Users/[USERNAME]/.bundle/${bundle.name}/${bundle.target} permissions: - user_name: [USERNAME] level: CAN_MANAGE - run_as: - user_name: [USERNAME] variables: catalog: main schema: default diff --git a/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/README.md b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/README.md index 4e84e76ea9..7b2cc02ea9 100644 --- a/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/README.md +++ b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/README.md @@ -4,7 +4,7 @@ This folder defines all source code for the my_lakeflow_pipelines_pipeline pipel - `explorations`: Ad-hoc notebooks used to explore the data processed by this pipeline. - `transformations`: All dataset definitions and transformations. -- `utilities`: Utility functions and Python modules used in this pipeline. +- `utilities` (optional): Utility functions and Python modules used in this pipeline. - `data_sources` (optional): View definitions describing the source data for this pipeline. ## Getting Started @@ -19,4 +19,4 @@ To get started, go to the `transformations` folder -- most of the relevant sourc * Use `+ Add` in the file browser to add a new data set definition. * Use `Schedule` to run the pipeline on a schedule! -For more tutorials and reference material, see https://docs.databricks.com/dlt. +For more tutorials and reference material, see https://docs.databricks.com/dlt. \ No newline at end of file diff --git a/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/explorations/sample_exploration.ipynb b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/explorations/sample_exploration.ipynb index 0187c0c95f..91bbbeae5a 100644 --- a/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/explorations/sample_exploration.ipynb +++ b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/explorations/sample_exploration.ipynb @@ -60,4 +60,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} +} \ No newline at end of file diff --git a/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/my_lakeflow_pipelines.pipeline.yml b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/my_lakeflow_pipelines.pipeline.yml index 678a787540..499ddad0ca 100644 --- a/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/my_lakeflow_pipelines.pipeline.yml +++ b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/my_lakeflow_pipelines.pipeline.yml @@ -3,9 +3,7 @@ resources: my_lakeflow_pipelines_pipeline: name: my_lakeflow_pipelines_pipeline serverless: true - continuous: false channel: "PREVIEW" - photon: true catalog: ${var.catalog} schema: ${var.schema} root_path: "." diff --git a/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/transformations/sample_trips_my_lakeflow_pipelines.py b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/transformations/sample_trips_my_lakeflow_pipelines.py index f8355f62ae..804ac6ea25 100644 --- a/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/transformations/sample_trips_my_lakeflow_pipelines.py +++ b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/transformations/sample_trips_my_lakeflow_pipelines.py @@ -10,4 +10,7 @@ @dlt.table def sample_trips_my_lakeflow_pipelines(): - return spark.read.table("samples.nyctaxi.trips").withColumn("trip_distance_km", utils.distance_km(col("trip_distance"))) + return ( + spark.read.table("samples.nyctaxi.trips") + .withColumn("trip_distance_km", utils.distance_km(col("trip_distance"))) + ) diff --git a/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/transformations/sample_zones_my_lakeflow_pipelines.py b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/transformations/sample_zones_my_lakeflow_pipelines.py index c9bee817b6..4c2798d1b1 100644 --- a/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/transformations/sample_zones_my_lakeflow_pipelines.py +++ b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/transformations/sample_zones_my_lakeflow_pipelines.py @@ -10,4 +10,10 @@ @dlt.table def sample_zones_my_lakeflow_pipelines(): # Read from the "sample_trips" table, then sum all the fares - return spark.read.table("sample_trips_my_lakeflow_pipelines").groupBy(col("pickup_zip")).agg(sum("fare_amount").alias("total_fare")) + return ( + spark.read.table("sample_trips_my_lakeflow_pipelines") + .groupBy(col("pickup_zip")) + .agg( + sum("fare_amount").alias("total_fare") + ) + ) diff --git a/acceptance/bundle/templates/lakeflow-pipelines/python/script b/acceptance/bundle/templates/lakeflow-pipelines/python/script new file mode 100755 index 0000000000..90735aebef --- /dev/null +++ b/acceptance/bundle/templates/lakeflow-pipelines/python/script @@ -0,0 +1,9 @@ +trace $CLI bundle init lakeflow-pipelines --config-file ./input.json --output-dir output + +cd output/my_lakeflow_pipelines +trace $CLI bundle validate -t dev +trace $CLI bundle validate -t prod + +# Do not affect this repository's git behaviour #2318 +mv .gitignore out.gitignore +rm .databricks/.gitignore diff --git a/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/README.md b/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/README.md index 3736bbd95b..d77802d23e 100644 --- a/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/README.md +++ b/acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/README.md @@ -2,20 +2,20 @@ This folder defines all source code for the 'my_lakeflow_pipelines_pipeline' pipeline: -- \`explorations\`: Ad-hoc notebooks used to explore the data processed by this pipeline. -- \`transformations\`: All dataset definitions and transformations. -- \`data_sources\` (optional): View definitions describing the source data for this pipeline. +- `explorations`: Ad-hoc notebooks used to explore the data processed by this pipeline. +- `transformations`: All dataset definitions and transformations. +- `data_sources` (optional): View definitions describing the source data for this pipeline. ## Getting Started -To get started, go to the \`transformations\` folder -- most of the relevant source code lives there: +To get started, go to the `transformations` folder -- most of the relevant source code lives there: -* By convention, every dataset under \`transformations\` is in a separate file. +* By convention, every dataset under `transformations` is in a separate file. * Take a look at the sample under "sample_trips_my_lakeflow_pipelines.sql" to get familiar with the syntax. Read more about the syntax at https://docs.databricks.com/dlt/sql-ref.html. -* Use \`Run file\` to run and preview a single transformation. -* Use \`Run pipeline\` to run _all_ transformations in the entire pipeline. -* Use \`+ Add\` in the file browser to add a new data set definition. -* Use \`Schedule\` to run the pipeline on a schedule! +* Use `Run file` to run and preview a single transformation. +* Use `Run pipeline` to run _all_ transformations in the entire pipeline. +* Use `+ Add` in the file browser to add a new data set definition. +* Use `Schedule` to run the pipeline on a schedule! For more tutorials and reference material, see https://docs.databricks.com/dlt. diff --git a/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/README.md.tmpl b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/README.md.tmpl index 40628458b1..d27b530da2 100644 --- a/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/README.md.tmpl +++ b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/README.md.tmpl @@ -29,21 +29,21 @@ For more tutorials and reference material, see https://docs.databricks.com/dlt. This folder defines all source code for the '{{template `pipeline_name` .}}' pipeline: -- \`explorations\`: Ad-hoc notebooks used to explore the data processed by this pipeline. -- \`transformations\`: All dataset definitions and transformations. -- \`data_sources\` (optional): View definitions describing the source data for this pipeline. +- `explorations`: Ad-hoc notebooks used to explore the data processed by this pipeline. +- `transformations`: All dataset definitions and transformations. +- `data_sources` (optional): View definitions describing the source data for this pipeline. ## Getting Started -To get started, go to the \`transformations\` folder -- most of the relevant source code lives there: +To get started, go to the `transformations` folder -- most of the relevant source code lives there: -* By convention, every dataset under \`transformations\` is in a separate file. +* By convention, every dataset under `transformations` is in a separate file. * Take a look at the sample under "sample_trips_{{ .project_name }}.sql" to get familiar with the syntax. Read more about the syntax at https://docs.databricks.com/dlt/sql-ref.html. -* Use \`Run file\` to run and preview a single transformation. -* Use \`Run pipeline\` to run _all_ transformations in the entire pipeline. -* Use \`+ Add\` in the file browser to add a new data set definition. -* Use \`Schedule\` to run the pipeline on a schedule! +* Use `Run file` to run and preview a single transformation. +* Use `Run pipeline` to run _all_ transformations in the entire pipeline. +* Use `+ Add` in the file browser to add a new data set definition. +* Use `Schedule` to run the pipeline on a schedule! For more tutorials and reference material, see https://docs.databricks.com/dlt. {{ end -}} From 07454050dbabe76f8ed9c7703328402f9f530f46 Mon Sep 17 00:00:00 2001 From: Fabian Jakobs Date: Mon, 2 Jun 2025 14:33:16 +0200 Subject: [PATCH 10/11] fix formatting --- .../resources/my_lakeflow_pipelines_pipeline/README.md | 2 +- .../explorations/sample_exploration.ipynb | 2 +- .../resources/{{.project_name}}_pipeline/README.md.tmpl | 3 +-- .../explorations/sample_exploration.ipynb.tmpl | 2 +- ruff.toml | 7 +++++-- 5 files changed, 9 insertions(+), 7 deletions(-) diff --git a/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/README.md b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/README.md index 7b2cc02ea9..6caf95d48a 100644 --- a/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/README.md +++ b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/README.md @@ -19,4 +19,4 @@ To get started, go to the `transformations` folder -- most of the relevant sourc * Use `+ Add` in the file browser to add a new data set definition. * Use `Schedule` to run the pipeline on a schedule! -For more tutorials and reference material, see https://docs.databricks.com/dlt. \ No newline at end of file +For more tutorials and reference material, see https://docs.databricks.com/dlt. diff --git a/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/explorations/sample_exploration.ipynb b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/explorations/sample_exploration.ipynb index 91bbbeae5a..0187c0c95f 100644 --- a/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/explorations/sample_exploration.ipynb +++ b/acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/explorations/sample_exploration.ipynb @@ -60,4 +60,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} diff --git a/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/README.md.tmpl b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/README.md.tmpl index d27b530da2..b085a301a6 100644 --- a/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/README.md.tmpl +++ b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/README.md.tmpl @@ -22,8 +22,7 @@ To get started, go to the `transformations` folder -- most of the relevant sourc * Use `Schedule` to run the pipeline on a schedule! For more tutorials and reference material, see https://docs.databricks.com/dlt. - -{{- else -}} +{{ else -}} # {{template `pipeline_name` .}} diff --git a/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/explorations/sample_exploration.ipynb.tmpl b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/explorations/sample_exploration.ipynb.tmpl index 9afc20b0a2..acc7a9b50e 100644 --- a/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/explorations/sample_exploration.ipynb.tmpl +++ b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/explorations/sample_exploration.ipynb.tmpl @@ -62,7 +62,7 @@ "nbformat": 4, "nbformat_minor": 0 } -{{- else -}} +{{ else -}} { "cells": [ { diff --git a/ruff.toml b/ruff.toml index b439cc6c00..5838db95ee 100644 --- a/ruff.toml +++ b/ruff.toml @@ -1,4 +1,7 @@ line-length = 150 -# tagging.py is synced from universe in the `openapi/tagging` directory and follows different format rules. -exclude = ["tagging.py"] + +exclude = [ + "tagging.py", # tagging.py is synced from universe in the `openapi/tagging` directory and follows different format rules. + "acceptance/bundle/templates/lakeflow-pipelines/**/*.py" # files are manually formatted +] From ad7897b88276826c6bdb0e4be3718f1f195c9d83 Mon Sep 17 00:00:00 2001 From: Fabian Jakobs Date: Mon, 2 Jun 2025 15:48:14 +0200 Subject: [PATCH 11/11] replace table_suffix with project_name --- .../templates/lakeflow-pipelines/library/variables.tmpl | 8 ++------ .../explorations/sample_exploration.ipynb.tmpl | 4 ++-- .../sample_trips_{{.project_name}}.py.tmpl | 2 +- .../sample_trips_{{.project_name}}.sql.tmpl | 2 +- .../sample_zones_{{.project_name}}.py.tmpl | 4 ++-- .../sample_zones_{{.project_name}}.sql.tmpl | 4 ++-- 6 files changed, 10 insertions(+), 14 deletions(-) diff --git a/libs/template/templates/lakeflow-pipelines/library/variables.tmpl b/libs/template/templates/lakeflow-pipelines/library/variables.tmpl index 51ead84c1a..9c5c36b449 100644 --- a/libs/template/templates/lakeflow-pipelines/library/variables.tmpl +++ b/libs/template/templates/lakeflow-pipelines/library/variables.tmpl @@ -1,13 +1,9 @@ -{{- define `table_suffix` -}} - {{ .project_name }} -{{- end }} - {{- define `pipeline_name` -}} - {{template `table_suffix` .}}_pipeline + {{ .project_name }}_pipeline {{- end }} {{- define `job_name` -}} - {{template `table_suffix` .}}_job + {{ .project_name }}_job {{- end }} {{- define `static_dev_schema` -}} diff --git a/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/explorations/sample_exploration.ipynb.tmpl b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/explorations/sample_exploration.ipynb.tmpl index acc7a9b50e..967e663fae 100644 --- a/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/explorations/sample_exploration.ipynb.tmpl +++ b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/explorations/sample_exploration.ipynb.tmpl @@ -38,7 +38,7 @@ "source": [ "# !!! Before performing any data analysis, make sure to run the pipeline to materialize the sample datasets. The tables referenced in this notebook depend on that step.\n", "\n", - "display(spark.sql(\"SELECT * FROM {{ .default_catalog}}.{{template `static_dev_schema` .}}.{{template `table_suffix` .}}\"))" + "display(spark.sql(\"SELECT * FROM {{ .default_catalog}}.{{template `static_dev_schema` .}}.{{ .project_name }}\"))" ] } ], @@ -105,7 +105,7 @@ "USE CATALOG `{{.default_catalog}}`;\n", "USE SCHEMA `{{template `static_dev_schema` .}}`;\n", "\n", - "SELECT * from {{template `table_suffix` .}};" + "SELECT * from {{ .project_name }};" ] } ], diff --git a/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.py.tmpl b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.py.tmpl index ee0499fcdf..a191f88b9f 100644 --- a/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.py.tmpl +++ b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.py.tmpl @@ -9,7 +9,7 @@ from utilities import utils @dlt.table -def sample_trips_{{template `table_suffix` .}}(): +def sample_trips_{{ .project_name }}(): return ( spark.read.table("samples.nyctaxi.trips") .withColumn("trip_distance_km", utils.distance_km(col("trip_distance"))) diff --git a/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.sql.tmpl b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.sql.tmpl index 65d3da532a..b95a95da4d 100644 --- a/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.sql.tmpl +++ b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.sql.tmpl @@ -2,7 +2,7 @@ -- Edit the sample below or add new transformations -- using "+ Add" in the file browser. -CREATE MATERIALIZED VIEW sample_trips_{{template `table_suffix` .}} AS +CREATE MATERIALIZED VIEW sample_trips_{{ .project_name }} AS SELECT pickup_zip, fare_amount diff --git a/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.py.tmpl b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.py.tmpl index 3c1ecfe84b..64e40036d0 100644 --- a/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.py.tmpl +++ b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.py.tmpl @@ -8,10 +8,10 @@ from pyspark.sql.functions import col, sum @dlt.table -def sample_zones_{{template `table_suffix` .}}(): +def sample_zones_{{ .project_name }}(): # Read from the "sample_trips" table, then sum all the fares return ( - spark.read.table("sample_trips_{{template `table_suffix` .}}") + spark.read.table("sample_trips_{{ .project_name }}") .groupBy(col("pickup_zip")) .agg( sum("fare_amount").alias("total_fare") diff --git a/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.sql.tmpl b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.sql.tmpl index 9aababca55..ab84f4066a 100644 --- a/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.sql.tmpl +++ b/libs/template/templates/lakeflow-pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.sql.tmpl @@ -2,9 +2,9 @@ -- Edit the sample below or add new transformations -- using "+ Add" in the file browser. -CREATE MATERIALIZED VIEW sample_zones_{{template `table_suffix` .}} AS +CREATE MATERIALIZED VIEW sample_zones_{{ .project_name }} AS SELECT pickup_zip, SUM(fare_amount) AS total_fare -FROM sample_trips_{{template `table_suffix` .}} +FROM sample_trips_{{ .project_name }} GROUP BY pickup_zip